1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
26 */
27
28#include <sys/conf.h>
29#include <sys/list.h>
30#include <sys/file.h>
31#include <sys/ddi.h>
32#include <sys/sunddi.h>
33#include <sys/modctl.h>
34#include <sys/scsi/scsi.h>
35#include <sys/scsi/impl/scsi_reset_notify.h>
36#include <sys/disp.h>
37#include <sys/byteorder.h>
38#include <sys/pathname.h>
39#include <sys/atomic.h>
40#include <sys/nvpair.h>
41#include <sys/fs/zfs.h>
42#include <sys/sdt.h>
43#include <sys/dkio.h>
44#include <sys/zfs_ioctl.h>
45
46#include <sys/stmf.h>
47#include <sys/lpif.h>
48#include <sys/stmf_ioctl.h>
49#include <sys/stmf_sbd_ioctl.h>
50
51#include "stmf_sbd.h"
52#include "sbd_impl.h"
53
54#define	SBD_IS_ZVOL(zvol)	(strncmp("/dev/zvol", zvol, 9))
55
56extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
57extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
58extern void sbd_pgr_reset(sbd_lu_t *sl);
59extern int HardwareAcceleratedLocking;
60extern int HardwareAcceleratedInit;
61extern int HardwareAcceleratedMove;
62extern uint8_t sbd_unmap_enable;
63
64static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
65    void **result);
66static int sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
67static int sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
68static int sbd_open(dev_t *devp, int flag, int otype, cred_t *credp);
69static int sbd_close(dev_t dev, int flag, int otype, cred_t *credp);
70static int stmf_sbd_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
71    cred_t *credp, int *rval);
72void sbd_lp_cb(stmf_lu_provider_t *lp, int cmd, void *arg, uint32_t flags);
73stmf_status_t sbd_proxy_reg_lu(uint8_t *luid, void *proxy_reg_arg,
74    uint32_t proxy_reg_arg_len);
75stmf_status_t sbd_proxy_dereg_lu(uint8_t *luid, void *proxy_reg_arg,
76    uint32_t proxy_reg_arg_len);
77stmf_status_t sbd_proxy_msg(uint8_t *luid, void *proxy_arg,
78    uint32_t proxy_arg_len, uint32_t type);
79int sbd_create_register_lu(sbd_create_and_reg_lu_t *slu, int struct_sz,
80    uint32_t *err_ret);
81int sbd_create_standby_lu(sbd_create_standby_lu_t *slu, uint32_t *err_ret);
82int sbd_set_lu_standby(sbd_set_lu_standby_t *stlu, uint32_t *err_ret);
83int sbd_import_lu(sbd_import_lu_t *ilu, int struct_sz, uint32_t *err_ret,
84    int no_register, sbd_lu_t **slr);
85int sbd_import_active_lu(sbd_import_lu_t *ilu, sbd_lu_t *sl, uint32_t *err_ret);
86int sbd_delete_lu(sbd_delete_lu_t *dlu, int struct_sz, uint32_t *err_ret);
87int sbd_modify_lu(sbd_modify_lu_t *mlu, int struct_sz, uint32_t *err_ret);
88int sbd_set_global_props(sbd_global_props_t *mlu, int struct_sz,
89    uint32_t *err_ret);
90int sbd_get_global_props(sbd_global_props_t *oslp, uint32_t oslp_sz,
91    uint32_t *err_ret);
92int sbd_get_lu_props(sbd_lu_props_t *islp, uint32_t islp_sz,
93    sbd_lu_props_t *oslp, uint32_t oslp_sz, uint32_t *err_ret);
94static char *sbd_get_zvol_name(sbd_lu_t *);
95static int sbd_get_unmap_props(sbd_unmap_props_t *sup, sbd_unmap_props_t *osup,
96    uint32_t *err_ret);
97sbd_status_t sbd_create_zfs_meta_object(sbd_lu_t *sl);
98sbd_status_t sbd_open_zfs_meta(sbd_lu_t *sl);
99sbd_status_t sbd_read_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
100    uint64_t off);
101sbd_status_t sbd_write_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
102    uint64_t off);
103sbd_status_t sbd_update_zfs_prop(sbd_lu_t *sl);
104int sbd_is_zvol(char *path);
105int sbd_zvolget(char *zvol_name, char **comstarprop);
106int sbd_zvolset(char *zvol_name, char *comstarprop);
107char sbd_ctoi(char c);
108void sbd_close_lu(sbd_lu_t *sl);
109
110static ldi_ident_t	sbd_zfs_ident;
111static stmf_lu_provider_t *sbd_lp;
112static sbd_lu_t		*sbd_lu_list = NULL;
113static kmutex_t		sbd_lock;
114static dev_info_t	*sbd_dip;
115static uint32_t		sbd_lu_count = 0;
116uint8_t sbd_enable_unmap_sync = 0;
117
118/* Global property settings for the logical unit */
119char sbd_vendor_id[]	= "SUN     ";
120char sbd_product_id[]	= "COMSTAR         ";
121char sbd_revision[]	= "1.0 ";
122char *sbd_mgmt_url = NULL;
123uint16_t sbd_mgmt_url_alloc_size = 0;
124krwlock_t sbd_global_prop_lock;
125
126static char sbd_name[] = "sbd";
127
128static struct cb_ops sbd_cb_ops = {
129	sbd_open,			/* open */
130	sbd_close,			/* close */
131	nodev,				/* strategy */
132	nodev,				/* print */
133	nodev,				/* dump */
134	nodev,				/* read */
135	nodev,				/* write */
136	stmf_sbd_ioctl,			/* ioctl */
137	nodev,				/* devmap */
138	nodev,				/* mmap */
139	nodev,				/* segmap */
140	nochpoll,			/* chpoll */
141	ddi_prop_op,			/* cb_prop_op */
142	0,				/* streamtab */
143	D_NEW | D_MP,			/* cb_flag */
144	CB_REV,				/* rev */
145	nodev,				/* aread */
146	nodev				/* awrite */
147};
148
149static struct dev_ops sbd_ops = {
150	DEVO_REV,
151	0,
152	sbd_getinfo,
153	nulldev,		/* identify */
154	nulldev,		/* probe */
155	sbd_attach,
156	sbd_detach,
157	nodev,			/* reset */
158	&sbd_cb_ops,
159	NULL,			/* bus_ops */
160	NULL			/* power */
161};
162
163#define	SBD_NAME	"COMSTAR SBD+ "
164
165static struct modldrv modldrv = {
166	&mod_driverops,
167	SBD_NAME,
168	&sbd_ops
169};
170
171static struct modlinkage modlinkage = {
172	MODREV_1,
173	&modldrv,
174	NULL
175};
176
177int
178_init(void)
179{
180	int ret;
181
182	ret = mod_install(&modlinkage);
183	if (ret)
184		return (ret);
185	sbd_lp = (stmf_lu_provider_t *)stmf_alloc(STMF_STRUCT_LU_PROVIDER,
186	    0, 0);
187	sbd_lp->lp_lpif_rev = LPIF_REV_2;
188	sbd_lp->lp_instance = 0;
189	sbd_lp->lp_name = sbd_name;
190	sbd_lp->lp_cb = sbd_lp_cb;
191	sbd_lp->lp_alua_support = 1;
192	sbd_lp->lp_proxy_msg = sbd_proxy_msg;
193	sbd_zfs_ident = ldi_ident_from_anon();
194
195	if (stmf_register_lu_provider(sbd_lp) != STMF_SUCCESS) {
196		(void) mod_remove(&modlinkage);
197		stmf_free(sbd_lp);
198		return (EINVAL);
199	}
200	mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL);
201	rw_init(&sbd_global_prop_lock, NULL, RW_DRIVER, NULL);
202
203	if (HardwareAcceleratedLocking == 0)
204		cmn_err(CE_NOTE, "HardwareAcceleratedLocking Disabled");
205	if (HardwareAcceleratedMove == 0)
206		cmn_err(CE_NOTE, "HardwareAcceleratedMove  Disabled");
207	if (HardwareAcceleratedInit == 0)
208		cmn_err(CE_NOTE, "HardwareAcceleratedInit  Disabled");
209
210	return (0);
211}
212
213int
214_fini(void)
215{
216	int ret;
217
218	/*
219	 * If we have registered lus, then make sure they are all offline
220	 * if so then deregister them. This should drop the sbd_lu_count
221	 * to zero.
222	 */
223	if (sbd_lu_count) {
224		sbd_lu_t *slu;
225
226		/* See if all of them are offline */
227		mutex_enter(&sbd_lock);
228		for (slu = sbd_lu_list; slu != NULL; slu = slu->sl_next) {
229			if ((slu->sl_state != STMF_STATE_OFFLINE) ||
230			    slu->sl_state_not_acked) {
231				mutex_exit(&sbd_lock);
232				return (EBUSY);
233			}
234		}
235		mutex_exit(&sbd_lock);
236
237#if 0
238		/* ok start deregistering them */
239		while (sbd_lu_list) {
240			sbd_store_t *sst = sbd_lu_list->sl_sst;
241			if (sst->sst_deregister_lu(sst) != STMF_SUCCESS)
242				return (EBUSY);
243		}
244#endif
245		return (EBUSY);
246	}
247	if (stmf_deregister_lu_provider(sbd_lp) != STMF_SUCCESS)
248		return (EBUSY);
249	ret = mod_remove(&modlinkage);
250	if (ret != 0) {
251		(void) stmf_register_lu_provider(sbd_lp);
252		return (ret);
253	}
254	stmf_free(sbd_lp);
255	mutex_destroy(&sbd_lock);
256	rw_destroy(&sbd_global_prop_lock);
257	ldi_ident_release(sbd_zfs_ident);
258	return (0);
259}
260
261int
262_info(struct modinfo *modinfop)
263{
264	return (mod_info(&modlinkage, modinfop));
265}
266
267/* ARGSUSED */
268static int
269sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
270{
271	switch (cmd) {
272	case DDI_INFO_DEVT2DEVINFO:
273		*result = sbd_dip;
274		break;
275	case DDI_INFO_DEVT2INSTANCE:
276		*result = (void *)(uintptr_t)ddi_get_instance(sbd_dip);
277		break;
278	default:
279		return (DDI_FAILURE);
280	}
281
282	return (DDI_SUCCESS);
283}
284
285static int
286sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
287{
288	char	*prop;
289
290	switch (cmd) {
291	case DDI_ATTACH:
292		sbd_dip = dip;
293
294		if (ddi_create_minor_node(dip, "admin", S_IFCHR, 0,
295		    DDI_NT_STMF_LP, 0) != DDI_SUCCESS) {
296			break;
297		}
298		ddi_report_dev(dip);
299
300		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
301		    DDI_PROP_DONTPASS, "vendor-id", &prop) == DDI_SUCCESS) {
302			(void) snprintf(sbd_vendor_id, 9, "%s%8s", prop, "");
303			ddi_prop_free(prop);
304		}
305		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
306		    DDI_PROP_DONTPASS, "product-id", &prop) == DDI_SUCCESS) {
307			(void) snprintf(sbd_product_id, 17, "%s%16s", prop, "");
308			ddi_prop_free(prop);
309		}
310		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
311		    DDI_PROP_DONTPASS, "revision", &prop) == DDI_SUCCESS) {
312			(void) snprintf(sbd_revision, 5, "%s%4s", prop, "");
313			ddi_prop_free(prop);
314		}
315
316		return (DDI_SUCCESS);
317	}
318
319	return (DDI_FAILURE);
320}
321
322static int
323sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
324{
325	switch (cmd) {
326	case DDI_DETACH:
327		ddi_remove_minor_node(dip, 0);
328		return (DDI_SUCCESS);
329	}
330
331	return (DDI_FAILURE);
332}
333
334/* ARGSUSED */
335static int
336sbd_open(dev_t *devp, int flag, int otype, cred_t *credp)
337{
338	if (otype != OTYP_CHR)
339		return (EINVAL);
340	return (0);
341}
342
343/* ARGSUSED */
344static int
345sbd_close(dev_t dev, int flag, int otype, cred_t *credp)
346{
347	return (0);
348}
349
350/* ARGSUSED */
351static int
352stmf_sbd_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
353    cred_t *credp, int *rval)
354{
355	stmf_iocdata_t		*iocd;
356	void			*ibuf	= NULL;
357	void			*obuf	= NULL;
358	sbd_lu_t		*nsl;
359	int			i;
360	int			ret;
361
362	if (drv_priv(credp) != 0) {
363		return (EPERM);
364	}
365
366	ret = stmf_copyin_iocdata(data, mode, &iocd, &ibuf, &obuf);
367	if (ret)
368		return (ret);
369	iocd->stmf_error = 0;
370
371	switch (cmd) {
372	case SBD_IOCTL_CREATE_AND_REGISTER_LU:
373		if (iocd->stmf_ibuf_size <
374		    (sizeof (sbd_create_and_reg_lu_t) - 8)) {
375			ret = EFAULT;
376			break;
377		}
378		if ((iocd->stmf_obuf_size == 0) ||
379		    (iocd->stmf_obuf_size > iocd->stmf_ibuf_size)) {
380			ret = EINVAL;
381			break;
382		}
383		ret = sbd_create_register_lu((sbd_create_and_reg_lu_t *)
384		    ibuf, iocd->stmf_ibuf_size, &iocd->stmf_error);
385		bcopy(ibuf, obuf, iocd->stmf_obuf_size);
386		break;
387	case SBD_IOCTL_SET_LU_STANDBY:
388		if (iocd->stmf_ibuf_size < sizeof (sbd_set_lu_standby_t)) {
389			ret = EFAULT;
390			break;
391		}
392		if (iocd->stmf_obuf_size) {
393			ret = EINVAL;
394			break;
395		}
396		ret = sbd_set_lu_standby((sbd_set_lu_standby_t *)ibuf,
397		    &iocd->stmf_error);
398		break;
399	case SBD_IOCTL_IMPORT_LU:
400		if (iocd->stmf_ibuf_size <
401		    (sizeof (sbd_import_lu_t) - 8)) {
402			ret = EFAULT;
403			break;
404		}
405		if ((iocd->stmf_obuf_size == 0) ||
406		    (iocd->stmf_obuf_size > iocd->stmf_ibuf_size)) {
407			ret = EINVAL;
408			break;
409		}
410		ret = sbd_import_lu((sbd_import_lu_t *)ibuf,
411		    iocd->stmf_ibuf_size, &iocd->stmf_error, 0, NULL);
412		bcopy(ibuf, obuf, iocd->stmf_obuf_size);
413		break;
414	case SBD_IOCTL_DELETE_LU:
415		if (iocd->stmf_ibuf_size < (sizeof (sbd_delete_lu_t) - 8)) {
416			ret = EFAULT;
417			break;
418		}
419		if (iocd->stmf_obuf_size) {
420			ret = EINVAL;
421			break;
422		}
423		ret = sbd_delete_lu((sbd_delete_lu_t *)ibuf,
424		    iocd->stmf_ibuf_size, &iocd->stmf_error);
425		break;
426	case SBD_IOCTL_MODIFY_LU:
427		if (iocd->stmf_ibuf_size < (sizeof (sbd_modify_lu_t) - 8)) {
428			ret = EFAULT;
429			break;
430		}
431		if (iocd->stmf_obuf_size) {
432			ret = EINVAL;
433			break;
434		}
435		ret = sbd_modify_lu((sbd_modify_lu_t *)ibuf,
436		    iocd->stmf_ibuf_size, &iocd->stmf_error);
437		break;
438	case SBD_IOCTL_SET_GLOBAL_LU:
439		if (iocd->stmf_ibuf_size < (sizeof (sbd_global_props_t) - 8)) {
440			ret = EFAULT;
441			break;
442		}
443		if (iocd->stmf_obuf_size) {
444			ret = EINVAL;
445			break;
446		}
447		ret = sbd_set_global_props((sbd_global_props_t *)ibuf,
448		    iocd->stmf_ibuf_size, &iocd->stmf_error);
449		break;
450	case SBD_IOCTL_GET_GLOBAL_LU:
451		if (iocd->stmf_ibuf_size) {
452			ret = EINVAL;
453			break;
454		}
455		if (iocd->stmf_obuf_size < sizeof (sbd_global_props_t)) {
456			ret = EINVAL;
457			break;
458		}
459		ret = sbd_get_global_props((sbd_global_props_t *)obuf,
460		    iocd->stmf_obuf_size, &iocd->stmf_error);
461		break;
462	case SBD_IOCTL_GET_LU_PROPS:
463		if (iocd->stmf_ibuf_size < (sizeof (sbd_lu_props_t) - 8)) {
464			ret = EFAULT;
465			break;
466		}
467		if (iocd->stmf_obuf_size < sizeof (sbd_lu_props_t)) {
468			ret = EINVAL;
469			break;
470		}
471		ret = sbd_get_lu_props((sbd_lu_props_t *)ibuf,
472		    iocd->stmf_ibuf_size, (sbd_lu_props_t *)obuf,
473		    iocd->stmf_obuf_size, &iocd->stmf_error);
474		break;
475	case SBD_IOCTL_GET_LU_LIST:
476		mutex_enter(&sbd_lock);
477		iocd->stmf_obuf_max_nentries = sbd_lu_count;
478		iocd->stmf_obuf_nentries = min((iocd->stmf_obuf_size >> 4),
479		    sbd_lu_count);
480		for (nsl = sbd_lu_list, i = 0; nsl &&
481		    (i < iocd->stmf_obuf_nentries); i++, nsl = nsl->sl_next) {
482			bcopy(nsl->sl_device_id + 4,
483			    &(((uint8_t *)obuf)[i << 4]), 16);
484		}
485		mutex_exit(&sbd_lock);
486		ret = 0;
487		iocd->stmf_error = 0;
488		break;
489	case SBD_IOCTL_GET_UNMAP_PROPS:
490		if (iocd->stmf_ibuf_size < sizeof (sbd_unmap_props_t)) {
491			ret = EFAULT;
492			break;
493		}
494		if (iocd->stmf_obuf_size < sizeof (sbd_unmap_props_t)) {
495			ret = EINVAL;
496			break;
497		}
498		ret = sbd_get_unmap_props((sbd_unmap_props_t *)ibuf,
499		    (sbd_unmap_props_t *)obuf, &iocd->stmf_error);
500		break;
501	default:
502		ret = ENOTTY;
503	}
504
505	if (ret == 0) {
506		ret = stmf_copyout_iocdata(data, mode, iocd, obuf);
507	} else if (iocd->stmf_error) {
508		(void) stmf_copyout_iocdata(data, mode, iocd, obuf);
509	}
510	if (obuf) {
511		kmem_free(obuf, iocd->stmf_obuf_size);
512		obuf = NULL;
513	}
514	if (ibuf) {
515		kmem_free(ibuf, iocd->stmf_ibuf_size);
516		ibuf = NULL;
517	}
518	kmem_free(iocd, sizeof (stmf_iocdata_t));
519	return (ret);
520}
521
522/* ARGSUSED */
523void
524sbd_lp_cb(stmf_lu_provider_t *lp, int cmd, void *arg, uint32_t flags)
525{
526	nvpair_t	*np;
527	char		*s;
528	sbd_import_lu_t *ilu;
529	uint32_t	ilu_sz;
530	uint32_t	struct_sz;
531	uint32_t	err_ret;
532	int		iret;
533
534	if ((cmd != STMF_PROVIDER_DATA_UPDATED) || (arg == NULL)) {
535		return;
536	}
537
538	if ((flags & (STMF_PCB_STMF_ONLINING | STMF_PCB_PREG_COMPLETE)) == 0) {
539		return;
540	}
541
542	np = NULL;
543	ilu_sz = 1024;
544	ilu = (sbd_import_lu_t *)kmem_zalloc(ilu_sz, KM_SLEEP);
545	while ((np = nvlist_next_nvpair((nvlist_t *)arg, np)) != NULL) {
546		if (nvpair_type(np) != DATA_TYPE_STRING) {
547			continue;
548		}
549		if (nvpair_value_string(np, &s) != 0) {
550			continue;
551		}
552		struct_sz = max(8, strlen(s) + 1);
553		struct_sz += sizeof (sbd_import_lu_t) - 8;
554		if (struct_sz > ilu_sz) {
555			kmem_free(ilu, ilu_sz);
556			ilu_sz = struct_sz + 32;
557			ilu = (sbd_import_lu_t *)kmem_zalloc(ilu_sz, KM_SLEEP);
558		}
559		ilu->ilu_struct_size = struct_sz;
560		(void) strcpy(ilu->ilu_meta_fname, s);
561		iret = sbd_import_lu(ilu, struct_sz, &err_ret, 0, NULL);
562		if (iret) {
563			stmf_trace(0, "sbd_lp_cb: import_lu failed, ret = %d, "
564			    "err_ret = %d", iret, err_ret);
565		} else {
566			stmf_trace(0, "Imported the LU %s", nvpair_name(np));
567		}
568	}
569
570	if (ilu) {
571		kmem_free(ilu, ilu_sz);
572		ilu = NULL;
573	}
574}
575
576sbd_status_t
577sbd_link_lu(sbd_lu_t *sl)
578{
579	sbd_lu_t *nsl;
580
581	mutex_enter(&sbd_lock);
582	mutex_enter(&sl->sl_lock);
583	ASSERT(sl->sl_trans_op != SL_OP_NONE);
584
585	if (sl->sl_flags & SL_LINKED) {
586		mutex_exit(&sbd_lock);
587		mutex_exit(&sl->sl_lock);
588		return (SBD_ALREADY);
589	}
590	for (nsl = sbd_lu_list; nsl; nsl = nsl->sl_next) {
591		if (strcmp(nsl->sl_name, sl->sl_name) == 0)
592			break;
593	}
594	if (nsl) {
595		mutex_exit(&sbd_lock);
596		mutex_exit(&sl->sl_lock);
597		return (SBD_ALREADY);
598	}
599	sl->sl_next = sbd_lu_list;
600	sbd_lu_list = sl;
601	sl->sl_flags |= SL_LINKED;
602	mutex_exit(&sbd_lock);
603	mutex_exit(&sl->sl_lock);
604	return (SBD_SUCCESS);
605}
606
607void
608sbd_unlink_lu(sbd_lu_t *sl)
609{
610	sbd_lu_t **ppnsl;
611
612	mutex_enter(&sbd_lock);
613	mutex_enter(&sl->sl_lock);
614	ASSERT(sl->sl_trans_op != SL_OP_NONE);
615
616	ASSERT(sl->sl_flags & SL_LINKED);
617	for (ppnsl = &sbd_lu_list; *ppnsl; ppnsl = &((*ppnsl)->sl_next)) {
618		if (*ppnsl == sl)
619			break;
620	}
621	ASSERT(*ppnsl);
622	*ppnsl = (*ppnsl)->sl_next;
623	sl->sl_flags &= ~SL_LINKED;
624	mutex_exit(&sbd_lock);
625	mutex_exit(&sl->sl_lock);
626}
627
628sbd_status_t
629sbd_find_and_lock_lu(uint8_t *guid, uint8_t *meta_name, uint8_t op,
630    sbd_lu_t **ppsl)
631{
632	sbd_lu_t *sl;
633	int found = 0;
634	sbd_status_t sret;
635
636	mutex_enter(&sbd_lock);
637	for (sl = sbd_lu_list; sl; sl = sl->sl_next) {
638		if (guid) {
639			found = bcmp(sl->sl_device_id + 4, guid, 16) == 0;
640		} else {
641			found = strcmp(sl->sl_name, (char *)meta_name) == 0;
642		}
643		if (found)
644			break;
645	}
646	if (!found) {
647		mutex_exit(&sbd_lock);
648		return (SBD_NOT_FOUND);
649	}
650	mutex_enter(&sl->sl_lock);
651	if (sl->sl_trans_op == SL_OP_NONE) {
652		sl->sl_trans_op = op;
653		*ppsl = sl;
654		sret = SBD_SUCCESS;
655	} else {
656		sret = SBD_BUSY;
657	}
658	mutex_exit(&sl->sl_lock);
659	mutex_exit(&sbd_lock);
660	return (sret);
661}
662
663sbd_status_t
664sbd_read_meta(sbd_lu_t *sl, uint64_t offset, uint64_t size, uint8_t *buf)
665{
666	uint64_t	meta_align;
667	uint64_t	starting_off;
668	uint64_t	data_off;
669	uint64_t	ending_off;
670	uint64_t	io_size;
671	uint8_t		*io_buf;
672	vnode_t		*vp;
673	sbd_status_t	ret;
674	ssize_t		resid;
675	int		vret;
676
677	ASSERT(sl->sl_flags & SL_META_OPENED);
678	if (sl->sl_flags & SL_SHARED_META) {
679		meta_align = (((uint64_t)1) << sl->sl_data_blocksize_shift) - 1;
680		vp = sl->sl_data_vp;
681		ASSERT(vp);
682	} else {
683		meta_align = (((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1;
684		if ((sl->sl_flags & SL_ZFS_META) == 0) {
685			vp = sl->sl_meta_vp;
686			ASSERT(vp);
687		}
688	}
689	starting_off = offset & ~(meta_align);
690	data_off = offset & meta_align;
691	ending_off = (offset + size + meta_align) & (~meta_align);
692	if (ending_off > sl->sl_meta_size_used) {
693		bzero(buf, size);
694		if (starting_off >= sl->sl_meta_size_used) {
695			return (SBD_SUCCESS);
696		}
697		ending_off = (sl->sl_meta_size_used + meta_align) &
698		    (~meta_align);
699		if (size > (ending_off - (starting_off + data_off))) {
700			size = ending_off - (starting_off + data_off);
701		}
702	}
703	io_size = ending_off - starting_off;
704	io_buf = (uint8_t *)kmem_zalloc(io_size, KM_SLEEP);
705	ASSERT((starting_off + io_size) <= sl->sl_total_meta_size);
706
707	/*
708	 * Don't proceed if the device has been closed
709	 * This can occur on an access state change to standby or
710	 * a delete. The writer lock is acquired before closing the
711	 * lu. If importing, reading the metadata is valid, hence
712	 * the check on SL_OP_IMPORT_LU.
713	 */
714	rw_enter(&sl->sl_access_state_lock, RW_READER);
715	if ((sl->sl_flags & SL_MEDIA_LOADED) == 0 &&
716	    sl->sl_trans_op != SL_OP_IMPORT_LU) {
717		rw_exit(&sl->sl_access_state_lock);
718		ret = SBD_FILEIO_FAILURE;
719		goto sbd_read_meta_failure;
720	}
721	if (sl->sl_flags & SL_ZFS_META) {
722		if ((ret = sbd_read_zfs_meta(sl, io_buf, io_size,
723		    starting_off)) != SBD_SUCCESS) {
724			rw_exit(&sl->sl_access_state_lock);
725			goto sbd_read_meta_failure;
726		}
727	} else {
728		vret = vn_rdwr(UIO_READ, vp, (caddr_t)io_buf, (ssize_t)io_size,
729		    (offset_t)starting_off, UIO_SYSSPACE, FRSYNC,
730		    RLIM64_INFINITY, CRED(), &resid);
731
732		if (vret || resid) {
733			ret = SBD_FILEIO_FAILURE | vret;
734			rw_exit(&sl->sl_access_state_lock);
735			goto sbd_read_meta_failure;
736		}
737	}
738	rw_exit(&sl->sl_access_state_lock);
739
740	bcopy(io_buf + data_off, buf, size);
741	ret = SBD_SUCCESS;
742
743sbd_read_meta_failure:
744	kmem_free(io_buf, io_size);
745	return (ret);
746}
747
748sbd_status_t
749sbd_write_meta(sbd_lu_t *sl, uint64_t offset, uint64_t size, uint8_t *buf)
750{
751	uint64_t	meta_align;
752	uint64_t	starting_off;
753	uint64_t	data_off;
754	uint64_t	ending_off;
755	uint64_t	io_size;
756	uint8_t		*io_buf;
757	vnode_t		*vp;
758	sbd_status_t	ret;
759	ssize_t		resid;
760	int		vret;
761
762	ASSERT(sl->sl_flags & SL_META_OPENED);
763	if (sl->sl_flags & SL_SHARED_META) {
764		meta_align = (((uint64_t)1) << sl->sl_data_blocksize_shift) - 1;
765		vp = sl->sl_data_vp;
766		ASSERT(vp);
767	} else {
768		meta_align = (((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1;
769		if ((sl->sl_flags & SL_ZFS_META) == 0) {
770			vp = sl->sl_meta_vp;
771			ASSERT(vp);
772		}
773	}
774	starting_off = offset & ~(meta_align);
775	data_off = offset & meta_align;
776	ending_off = (offset + size + meta_align) & (~meta_align);
777	io_size = ending_off - starting_off;
778	io_buf = (uint8_t *)kmem_zalloc(io_size, KM_SLEEP);
779	ret = sbd_read_meta(sl, starting_off, io_size, io_buf);
780	if (ret != SBD_SUCCESS) {
781		goto sbd_write_meta_failure;
782	}
783	bcopy(buf, io_buf + data_off, size);
784	/*
785	 * Don't proceed if the device has been closed
786	 * This can occur on an access state change to standby or
787	 * a delete. The writer lock is acquired before closing the
788	 * lu. If importing, reading the metadata is valid, hence
789	 * the check on SL_OP_IMPORT_LU.
790	 */
791	rw_enter(&sl->sl_access_state_lock, RW_READER);
792	if ((sl->sl_flags & SL_MEDIA_LOADED) == 0 &&
793	    sl->sl_trans_op != SL_OP_IMPORT_LU) {
794		rw_exit(&sl->sl_access_state_lock);
795		ret = SBD_FILEIO_FAILURE;
796		goto sbd_write_meta_failure;
797	}
798	if (sl->sl_flags & SL_ZFS_META) {
799		if ((ret = sbd_write_zfs_meta(sl, io_buf, io_size,
800		    starting_off)) != SBD_SUCCESS) {
801			rw_exit(&sl->sl_access_state_lock);
802			goto sbd_write_meta_failure;
803		}
804	} else {
805		vret = vn_rdwr(UIO_WRITE, vp, (caddr_t)io_buf, (ssize_t)io_size,
806		    (offset_t)starting_off, UIO_SYSSPACE, FDSYNC,
807		    RLIM64_INFINITY, CRED(), &resid);
808
809		if (vret || resid) {
810			ret = SBD_FILEIO_FAILURE | vret;
811			rw_exit(&sl->sl_access_state_lock);
812			goto sbd_write_meta_failure;
813		}
814	}
815	rw_exit(&sl->sl_access_state_lock);
816
817	ret = SBD_SUCCESS;
818
819sbd_write_meta_failure:
820	kmem_free(io_buf, io_size);
821	return (ret);
822}
823
824uint8_t
825sbd_calc_sum(uint8_t *buf, int size)
826{
827	uint8_t s = 0;
828
829	while (size > 0)
830		s += buf[--size];
831
832	return (s);
833}
834
835uint8_t
836sbd_calc_section_sum(sm_section_hdr_t *sm, uint32_t sz)
837{
838	uint8_t s, o;
839
840	o = sm->sms_chksum;
841	sm->sms_chksum = 0;
842	s = sbd_calc_sum((uint8_t *)sm, sz);
843	sm->sms_chksum = o;
844
845	return (s);
846}
847
848uint32_t
849sbd_strlen(char *str, uint32_t maxlen)
850{
851	uint32_t i;
852
853	for (i = 0; i < maxlen; i++) {
854		if (str[i] == 0)
855			return (i);
856	}
857	return (i);
858}
859
860void
861sbd_swap_meta_start(sbd_meta_start_t *sm)
862{
863	if (sm->sm_magic == SBD_MAGIC)
864		return;
865	sm->sm_magic		= BSWAP_64(sm->sm_magic);
866	sm->sm_meta_size	= BSWAP_64(sm->sm_meta_size);
867	sm->sm_meta_size_used	= BSWAP_64(sm->sm_meta_size_used);
868	sm->sm_ver_major	= BSWAP_16(sm->sm_ver_major);
869	sm->sm_ver_minor	= BSWAP_16(sm->sm_ver_minor);
870	sm->sm_ver_subminor	= BSWAP_16(sm->sm_ver_subminor);
871}
872
873void
874sbd_swap_section_hdr(sm_section_hdr_t *sm)
875{
876	if (sm->sms_data_order == SMS_DATA_ORDER)
877		return;
878	sm->sms_offset		= BSWAP_64(sm->sms_offset);
879	sm->sms_size		= BSWAP_32(sm->sms_size);
880	sm->sms_id		= BSWAP_16(sm->sms_id);
881	sm->sms_chksum		+= SMS_DATA_ORDER - sm->sms_data_order;
882	sm->sms_data_order	= SMS_DATA_ORDER;
883}
884
885void
886sbd_swap_lu_info_1_0(sbd_lu_info_1_0_t *sli)
887{
888	sbd_swap_section_hdr(&sli->sli_sms_header);
889	if (sli->sli_data_order == SMS_DATA_ORDER)
890		return;
891	sli->sli_sms_header.sms_chksum	+= SMS_DATA_ORDER - sli->sli_data_order;
892	sli->sli_data_order		= SMS_DATA_ORDER;
893	sli->sli_total_store_size	= BSWAP_64(sli->sli_total_store_size);
894	sli->sli_total_meta_size	= BSWAP_64(sli->sli_total_meta_size);
895	sli->sli_lu_data_offset		= BSWAP_64(sli->sli_lu_data_offset);
896	sli->sli_lu_data_size		= BSWAP_64(sli->sli_lu_data_size);
897	sli->sli_flags			= BSWAP_32(sli->sli_flags);
898	sli->sli_blocksize		= BSWAP_16(sli->sli_blocksize);
899}
900
901void
902sbd_swap_lu_info_1_1(sbd_lu_info_1_1_t *sli)
903{
904	sbd_swap_section_hdr(&sli->sli_sms_header);
905	if (sli->sli_data_order == SMS_DATA_ORDER)
906		return;
907	sli->sli_sms_header.sms_chksum	+= SMS_DATA_ORDER - sli->sli_data_order;
908	sli->sli_data_order		= SMS_DATA_ORDER;
909	sli->sli_flags			= BSWAP_32(sli->sli_flags);
910	sli->sli_lu_size		= BSWAP_64(sli->sli_lu_size);
911	sli->sli_meta_fname_offset	= BSWAP_64(sli->sli_meta_fname_offset);
912	sli->sli_data_fname_offset	= BSWAP_64(sli->sli_data_fname_offset);
913	sli->sli_serial_offset		= BSWAP_64(sli->sli_serial_offset);
914	sli->sli_alias_offset		= BSWAP_64(sli->sli_alias_offset);
915	sli->sli_mgmt_url_offset	= BSWAP_64(sli->sli_mgmt_url_offset);
916}
917
918sbd_status_t
919sbd_load_section_hdr(sbd_lu_t *sl, sm_section_hdr_t *sms)
920{
921	sm_section_hdr_t	h;
922	uint64_t		st;
923	sbd_status_t		ret;
924
925	for (st = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
926	    st < sl->sl_meta_size_used; st += h.sms_size) {
927		if ((ret = sbd_read_meta(sl, st, sizeof (sm_section_hdr_t),
928		    (uint8_t *)&h)) != SBD_SUCCESS) {
929			return (ret);
930		}
931		if (h.sms_data_order != SMS_DATA_ORDER) {
932			sbd_swap_section_hdr(&h);
933		}
934		if ((h.sms_data_order != SMS_DATA_ORDER) ||
935		    (h.sms_offset != st) || (h.sms_size < sizeof (h)) ||
936		    ((st + h.sms_size) > sl->sl_meta_size_used)) {
937			return (SBD_META_CORRUPTED);
938		}
939		if (h.sms_id == sms->sms_id) {
940			bcopy(&h, sms, sizeof (h));
941			return (SBD_SUCCESS);
942		}
943	}
944
945	return (SBD_NOT_FOUND);
946}
947
948sbd_status_t
949sbd_load_meta_start(sbd_lu_t *sl)
950{
951	sbd_meta_start_t *sm;
952	sbd_status_t ret;
953
954	/* Fake meta params initially */
955	sl->sl_total_meta_size = (uint64_t)-1;
956	sl->sl_meta_size_used = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
957
958	sm = kmem_zalloc(sizeof (*sm), KM_SLEEP);
959	ret = sbd_read_meta(sl, sl->sl_meta_offset, sizeof (*sm),
960	    (uint8_t *)sm);
961	if (ret != SBD_SUCCESS) {
962		goto load_meta_start_failed;
963	}
964
965	if (sm->sm_magic != SBD_MAGIC) {
966		sbd_swap_meta_start(sm);
967	}
968
969	if ((sm->sm_magic != SBD_MAGIC) || (sbd_calc_sum((uint8_t *)sm,
970	    sizeof (*sm) - 1) != sm->sm_chksum)) {
971		ret = SBD_META_CORRUPTED;
972		goto load_meta_start_failed;
973	}
974
975	if (sm->sm_ver_major != SBD_VER_MAJOR) {
976		ret = SBD_NOT_SUPPORTED;
977		goto load_meta_start_failed;
978	}
979
980	sl->sl_total_meta_size = sm->sm_meta_size;
981	sl->sl_meta_size_used = sm->sm_meta_size_used;
982	ret = SBD_SUCCESS;
983
984load_meta_start_failed:
985	kmem_free(sm, sizeof (*sm));
986	return (ret);
987}
988
989sbd_status_t
990sbd_write_meta_start(sbd_lu_t *sl, uint64_t meta_size, uint64_t meta_size_used)
991{
992	sbd_meta_start_t *sm;
993	sbd_status_t ret;
994
995	sm = (sbd_meta_start_t *)kmem_zalloc(sizeof (sbd_meta_start_t),
996	    KM_SLEEP);
997
998	sm->sm_magic = SBD_MAGIC;
999	sm->sm_meta_size = meta_size;
1000	sm->sm_meta_size_used = meta_size_used;
1001	sm->sm_ver_major = SBD_VER_MAJOR;
1002	sm->sm_ver_minor = SBD_VER_MINOR;
1003	sm->sm_ver_subminor = SBD_VER_SUBMINOR;
1004	sm->sm_chksum = sbd_calc_sum((uint8_t *)sm, sizeof (*sm) - 1);
1005
1006	ret = sbd_write_meta(sl, sl->sl_meta_offset, sizeof (*sm),
1007	    (uint8_t *)sm);
1008	kmem_free(sm, sizeof (*sm));
1009
1010	return (ret);
1011}
1012
1013sbd_status_t
1014sbd_read_meta_section(sbd_lu_t *sl, sm_section_hdr_t **ppsms, uint16_t sms_id)
1015{
1016	sbd_status_t ret;
1017	sm_section_hdr_t sms;
1018	int alloced = 0;
1019
1020	mutex_enter(&sl->sl_metadata_lock);
1021	if (((*ppsms) == NULL) || ((*ppsms)->sms_offset == 0)) {
1022		bzero(&sms, sizeof (sm_section_hdr_t));
1023		sms.sms_id = sms_id;
1024		if ((ret = sbd_load_section_hdr(sl, &sms)) != SBD_SUCCESS) {
1025			mutex_exit(&sl->sl_metadata_lock);
1026			return (ret);
1027		} else {
1028			if ((*ppsms) == NULL) {
1029				*ppsms = (sm_section_hdr_t *)kmem_zalloc(
1030				    sms.sms_size, KM_SLEEP);
1031				alloced = 1;
1032			}
1033			bcopy(&sms, *ppsms, sizeof (sm_section_hdr_t));
1034		}
1035	}
1036
1037	ret = sbd_read_meta(sl, (*ppsms)->sms_offset, (*ppsms)->sms_size,
1038	    (uint8_t *)(*ppsms));
1039	if (ret == SBD_SUCCESS) {
1040		uint8_t s;
1041		if ((*ppsms)->sms_data_order != SMS_DATA_ORDER)
1042			sbd_swap_section_hdr(*ppsms);
1043		if ((*ppsms)->sms_id != SMS_ID_UNUSED) {
1044			s = sbd_calc_section_sum(*ppsms, (*ppsms)->sms_size);
1045			if (s != (*ppsms)->sms_chksum)
1046				ret = SBD_META_CORRUPTED;
1047		}
1048	}
1049	mutex_exit(&sl->sl_metadata_lock);
1050
1051	if ((ret != SBD_SUCCESS) && alloced)
1052		kmem_free(*ppsms, sms.sms_size);
1053	return (ret);
1054}
1055
1056sbd_status_t
1057sbd_load_section_hdr_unbuffered(sbd_lu_t *sl, sm_section_hdr_t *sms)
1058{
1059	sbd_status_t	ret;
1060
1061	/*
1062	 * Bypass buffering and re-read the meta data from permanent storage.
1063	 */
1064	if (sl->sl_flags & SL_ZFS_META) {
1065		if ((ret = sbd_open_zfs_meta(sl)) != SBD_SUCCESS) {
1066			return (ret);
1067		}
1068	}
1069	/* Re-get the meta sizes into sl */
1070	if ((ret = sbd_load_meta_start(sl)) != SBD_SUCCESS) {
1071		return (ret);
1072	}
1073	return (sbd_load_section_hdr(sl, sms));
1074}
1075
1076sbd_status_t
1077sbd_write_meta_section(sbd_lu_t *sl, sm_section_hdr_t *sms)
1078{
1079	sm_section_hdr_t t;
1080	uint64_t off, s;
1081	uint64_t unused_start;
1082	sbd_status_t ret;
1083	sbd_status_t write_meta_ret = SBD_SUCCESS;
1084	uint8_t *cb;
1085	int meta_size_changed = 0;
1086	sm_section_hdr_t sms_before_unused = {0};
1087
1088	mutex_enter(&sl->sl_metadata_lock);
1089write_meta_section_again:
1090	if (sms->sms_offset) {
1091		/*
1092		 * If the section already exists and the size is the
1093		 * same as this new data then overwrite in place. If
1094		 * the sizes are different then mark the existing as
1095		 * unused and look for free space.
1096		 */
1097		ret = sbd_read_meta(sl, sms->sms_offset, sizeof (t),
1098		    (uint8_t *)&t);
1099		if (ret != SBD_SUCCESS) {
1100			mutex_exit(&sl->sl_metadata_lock);
1101			return (ret);
1102		}
1103		if (t.sms_data_order != SMS_DATA_ORDER) {
1104			sbd_swap_section_hdr(&t);
1105		}
1106		if (t.sms_id != sms->sms_id) {
1107			mutex_exit(&sl->sl_metadata_lock);
1108			return (SBD_INVALID_ARG);
1109		}
1110		if (t.sms_size == sms->sms_size) {
1111			ret = sbd_write_meta(sl, sms->sms_offset,
1112			    sms->sms_size, (uint8_t *)sms);
1113			mutex_exit(&sl->sl_metadata_lock);
1114			return (ret);
1115		}
1116		sms_before_unused = t;
1117
1118		t.sms_id = SMS_ID_UNUSED;
1119		/*
1120		 * For unused sections we only use chksum of the header. for
1121		 * all other sections, the chksum is for the entire section.
1122		 */
1123		t.sms_chksum = sbd_calc_section_sum(&t, sizeof (t));
1124		ret = sbd_write_meta(sl, t.sms_offset, sizeof (t),
1125		    (uint8_t *)&t);
1126		if (ret != SBD_SUCCESS) {
1127			mutex_exit(&sl->sl_metadata_lock);
1128			return (ret);
1129		}
1130		sms->sms_offset = 0;
1131	} else {
1132		/* Section location is unknown, search for it. */
1133		t.sms_id = sms->sms_id;
1134		t.sms_data_order = SMS_DATA_ORDER;
1135		ret = sbd_load_section_hdr(sl, &t);
1136		if (ret == SBD_SUCCESS) {
1137			sms->sms_offset = t.sms_offset;
1138			sms->sms_chksum =
1139			    sbd_calc_section_sum(sms, sms->sms_size);
1140			goto write_meta_section_again;
1141		} else if (ret != SBD_NOT_FOUND) {
1142			mutex_exit(&sl->sl_metadata_lock);
1143			return (ret);
1144		}
1145	}
1146
1147	/*
1148	 * At this point we know that section does not already exist.
1149	 * Find space large enough to hold the section or grow meta if
1150	 * possible.
1151	 */
1152	unused_start = 0;
1153	s = 0;	/* size of space found */
1154
1155	/*
1156	 * Search all sections for unused space of sufficient size.
1157	 * The first one found is taken. Contiguous unused sections
1158	 * will be combined.
1159	 */
1160	for (off = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
1161	    off < sl->sl_meta_size_used; off += t.sms_size) {
1162		ret = sbd_read_meta(sl, off, sizeof (t), (uint8_t *)&t);
1163		if (ret != SBD_SUCCESS) {
1164			mutex_exit(&sl->sl_metadata_lock);
1165			return (ret);
1166		}
1167		if (t.sms_data_order != SMS_DATA_ORDER)
1168			sbd_swap_section_hdr(&t);
1169		if (t.sms_size == 0) {
1170			mutex_exit(&sl->sl_metadata_lock);
1171			return (SBD_META_CORRUPTED);
1172		}
1173		if (t.sms_id == SMS_ID_UNUSED) {
1174			if (unused_start == 0)
1175				unused_start = off;
1176			/*
1177			 * Calculate size of the unused space, break out
1178			 * if it satisfies the requirement.
1179			 */
1180			s = t.sms_size - unused_start + off;
1181			if ((s == sms->sms_size) || (s >= (sms->sms_size +
1182			    sizeof (t)))) {
1183				break;
1184			} else {
1185				s = 0;
1186			}
1187		} else {
1188			unused_start = 0;
1189		}
1190	}
1191
1192	off = (unused_start == 0) ? sl->sl_meta_size_used : unused_start;
1193	/*
1194	 * If none found, how much room is at the end?
1195	 * See if the data can be expanded.
1196	 */
1197	if (s == 0) {
1198		s = sl->sl_total_meta_size - off;
1199		if (s >= sms->sms_size || !(sl->sl_flags & SL_SHARED_META)) {
1200			s = sms->sms_size;
1201			meta_size_changed = 1;
1202		} else {
1203			s = 0;
1204		}
1205	}
1206
1207	if (s == 0) {
1208		mutex_exit(&sl->sl_metadata_lock);
1209		return (SBD_ALLOC_FAILURE);
1210	}
1211
1212	sms->sms_offset = off;
1213	sms->sms_chksum = sbd_calc_section_sum(sms, sms->sms_size);
1214	/*
1215	 * Since we may have to write more than one section (current +
1216	 * any unused), use a combined buffer.
1217	 */
1218	cb = kmem_zalloc(s, KM_SLEEP);
1219	bcopy(sms, cb, sms->sms_size);
1220	if (s > sms->sms_size) {
1221		t.sms_offset = off + sms->sms_size;
1222		t.sms_size = s - sms->sms_size;
1223		t.sms_id = SMS_ID_UNUSED;
1224		t.sms_data_order = SMS_DATA_ORDER;
1225		t.sms_chksum = sbd_calc_section_sum(&t, sizeof (t));
1226		bcopy(&t, cb + sms->sms_size, sizeof (t));
1227	}
1228	/*
1229	 * Two write events & statuses take place. Failure writing the
1230	 * meta section takes precedence, can possibly be rolled back,
1231	 * & gets reported. Else return status from writing the meta start.
1232	 */
1233	ret = SBD_SUCCESS; /* Set a default, it's not always loaded below. */
1234	if (meta_size_changed) {
1235		uint64_t old_meta_size;
1236		uint64_t old_sz_used = sl->sl_meta_size_used; /* save a copy */
1237		old_meta_size = sl->sl_total_meta_size; /* save a copy */
1238
1239		write_meta_ret = sbd_write_meta(sl, off, s, cb);
1240		if (write_meta_ret == SBD_SUCCESS) {
1241			sl->sl_meta_size_used = off + s;
1242			if (sl->sl_total_meta_size < sl->sl_meta_size_used) {
1243				uint64_t meta_align =
1244				    (((uint64_t)1) <<
1245				    sl->sl_meta_blocksize_shift) - 1;
1246				sl->sl_total_meta_size =
1247				    (sl->sl_meta_size_used + meta_align) &
1248				    (~meta_align);
1249			}
1250			ret = sbd_write_meta_start(sl, sl->sl_total_meta_size,
1251			    sl->sl_meta_size_used);
1252			if (ret != SBD_SUCCESS) {
1253				sl->sl_meta_size_used = old_sz_used;
1254				sl->sl_total_meta_size = old_meta_size;
1255			}
1256		} else {
1257			sl->sl_meta_size_used = old_sz_used;
1258			sl->sl_total_meta_size = old_meta_size;
1259		}
1260	} else {
1261		write_meta_ret = sbd_write_meta(sl, off, s, cb);
1262	}
1263	if ((write_meta_ret != SBD_SUCCESS) &&
1264	    (sms_before_unused.sms_offset != 0)) {
1265		sm_section_hdr_t new_sms;
1266		sm_section_hdr_t *unused_sms;
1267		/*
1268		 * On failure writing the meta section attempt to undo
1269		 * the change to unused.
1270		 * Re-read the meta data from permanent storage.
1271		 * The section id can't exist for undo to be possible.
1272		 * Read what should be the entire old section data and
1273		 * insure the old data's still present by validating
1274		 * against it's old checksum.
1275		 */
1276		new_sms.sms_id = sms->sms_id;
1277		new_sms.sms_data_order = SMS_DATA_ORDER;
1278		if (sbd_load_section_hdr_unbuffered(sl, &new_sms) !=
1279		    SBD_NOT_FOUND) {
1280			goto done;
1281		}
1282		unused_sms = kmem_zalloc(sms_before_unused.sms_size, KM_SLEEP);
1283		if (sbd_read_meta(sl, sms_before_unused.sms_offset,
1284		    sms_before_unused.sms_size,
1285		    (uint8_t *)unused_sms) != SBD_SUCCESS) {
1286			goto done;
1287		}
1288		if (unused_sms->sms_data_order != SMS_DATA_ORDER) {
1289			sbd_swap_section_hdr(unused_sms);
1290		}
1291		if (unused_sms->sms_id != SMS_ID_UNUSED) {
1292			goto done;
1293		}
1294		if (unused_sms->sms_offset != sms_before_unused.sms_offset) {
1295			goto done;
1296		}
1297		if (unused_sms->sms_size != sms_before_unused.sms_size) {
1298			goto done;
1299		}
1300		unused_sms->sms_id = sms_before_unused.sms_id;
1301		if (sbd_calc_section_sum(unused_sms,
1302		    sizeof (sm_section_hdr_t)) !=
1303		    sbd_calc_section_sum(&sms_before_unused,
1304		    sizeof (sm_section_hdr_t))) {
1305			goto done;
1306		}
1307		unused_sms->sms_chksum =
1308		    sbd_calc_section_sum(unused_sms, unused_sms->sms_size);
1309		if (unused_sms->sms_chksum != sms_before_unused.sms_chksum) {
1310			goto done;
1311		}
1312		(void) sbd_write_meta(sl, unused_sms->sms_offset,
1313		    sizeof (sm_section_hdr_t), (uint8_t *)unused_sms);
1314	}
1315done:
1316	mutex_exit(&sl->sl_metadata_lock);
1317	kmem_free(cb, s);
1318	if (write_meta_ret != SBD_SUCCESS) {
1319		return (write_meta_ret);
1320	}
1321	return (ret);
1322}
1323
1324sbd_status_t
1325sbd_write_lu_info(sbd_lu_t *sl)
1326{
1327	sbd_lu_info_1_1_t *sli;
1328	int s;
1329	uint8_t *p;
1330	char *zvol_name = NULL;
1331	sbd_status_t ret;
1332
1333	mutex_enter(&sl->sl_lock);
1334
1335	s = sl->sl_serial_no_size;
1336	if ((sl->sl_flags & (SL_SHARED_META | SL_ZFS_META)) == 0) {
1337		if (sl->sl_data_filename) {
1338			s += strlen(sl->sl_data_filename) + 1;
1339		}
1340	}
1341	if (sl->sl_flags & SL_ZFS_META) {
1342		zvol_name = sbd_get_zvol_name(sl);
1343		s += strlen(zvol_name) + 1;
1344	}
1345	if (sl->sl_alias) {
1346		s += strlen(sl->sl_alias) + 1;
1347	}
1348	if (sl->sl_mgmt_url) {
1349		s += strlen(sl->sl_mgmt_url) + 1;
1350	}
1351	sli = (sbd_lu_info_1_1_t *)kmem_zalloc(sizeof (*sli) + s, KM_SLEEP);
1352	p = sli->sli_buf;
1353	if ((sl->sl_flags & (SL_SHARED_META | SL_ZFS_META)) == 0) {
1354		sli->sli_flags |= SLI_SEPARATE_META;
1355		(void) strcpy((char *)p, sl->sl_data_filename);
1356		sli->sli_data_fname_offset =
1357		    (uintptr_t)p - (uintptr_t)sli->sli_buf;
1358		sli->sli_flags |= SLI_DATA_FNAME_VALID;
1359		p += strlen(sl->sl_data_filename) + 1;
1360	}
1361	if (sl->sl_flags & SL_ZFS_META) {
1362		(void) strcpy((char *)p, zvol_name);
1363		sli->sli_meta_fname_offset =
1364		    (uintptr_t)p - (uintptr_t)sli->sli_buf;
1365		sli->sli_flags |= SLI_META_FNAME_VALID | SLI_ZFS_META;
1366		p += strlen(zvol_name) + 1;
1367		kmem_free(zvol_name, strlen(zvol_name) + 1);
1368		zvol_name = NULL;
1369	}
1370	if (sl->sl_alias) {
1371		(void) strcpy((char *)p, sl->sl_alias);
1372		sli->sli_alias_offset =
1373		    (uintptr_t)p - (uintptr_t)sli->sli_buf;
1374		sli->sli_flags |= SLI_ALIAS_VALID;
1375		p += strlen(sl->sl_alias) + 1;
1376	}
1377	if (sl->sl_mgmt_url) {
1378		(void) strcpy((char *)p, sl->sl_mgmt_url);
1379		sli->sli_mgmt_url_offset =
1380		    (uintptr_t)p - (uintptr_t)sli->sli_buf;
1381		sli->sli_flags |= SLI_MGMT_URL_VALID;
1382		p += strlen(sl->sl_mgmt_url) + 1;
1383	}
1384	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1385		sli->sli_flags |= SLI_WRITE_PROTECTED;
1386	}
1387	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
1388		sli->sli_flags |= SLI_WRITEBACK_CACHE_DISABLE;
1389	}
1390	if (sl->sl_flags & SL_VID_VALID) {
1391		bcopy(sl->sl_vendor_id, sli->sli_vid, 8);
1392		sli->sli_flags |= SLI_VID_VALID;
1393	}
1394	if (sl->sl_flags & SL_PID_VALID) {
1395		bcopy(sl->sl_product_id, sli->sli_pid, 16);
1396		sli->sli_flags |= SLI_PID_VALID;
1397	}
1398	if (sl->sl_flags & SL_REV_VALID) {
1399		bcopy(sl->sl_revision, sli->sli_rev, 4);
1400		sli->sli_flags |= SLI_REV_VALID;
1401	}
1402	if (sl->sl_serial_no_size) {
1403		bcopy(sl->sl_serial_no, p, sl->sl_serial_no_size);
1404		sli->sli_serial_size = sl->sl_serial_no_size;
1405		sli->sli_serial_offset =
1406		    (uintptr_t)p - (uintptr_t)sli->sli_buf;
1407		sli->sli_flags |= SLI_SERIAL_VALID;
1408		p += sli->sli_serial_size;
1409	}
1410	sli->sli_lu_size = sl->sl_lu_size;
1411	sli->sli_data_blocksize_shift = sl->sl_data_blocksize_shift;
1412	sli->sli_data_order = SMS_DATA_ORDER;
1413	bcopy(sl->sl_device_id, sli->sli_device_id, 20);
1414
1415	sli->sli_sms_header.sms_size = sizeof (*sli) + s;
1416	sli->sli_sms_header.sms_id = SMS_ID_LU_INFO_1_1;
1417	sli->sli_sms_header.sms_data_order = SMS_DATA_ORDER;
1418
1419	mutex_exit(&sl->sl_lock);
1420	ret = sbd_write_meta_section(sl, (sm_section_hdr_t *)sli);
1421	kmem_free(sli, sizeof (*sli) + s);
1422	return (ret);
1423}
1424
1425/*
1426 * Will scribble SL_UNMAP_ENABLED into sl_flags if we succeed.
1427 */
1428static void
1429do_unmap_setup(sbd_lu_t *sl)
1430{
1431	if (sbd_unmap_enable == 0) {
1432		sl->sl_flags &= ~(SL_UNMAP_ENABLED);
1433		return;
1434	}
1435
1436	if ((sl->sl_flags & SL_ZFS_META) == 0)
1437		return;	/* No UNMAP for you. */
1438
1439	sl->sl_flags |= SL_UNMAP_ENABLED;
1440}
1441
1442int
1443sbd_populate_and_register_lu(sbd_lu_t *sl, uint32_t *err_ret)
1444{
1445	stmf_lu_t *lu = sl->sl_lu;
1446	stmf_status_t ret;
1447
1448	do_unmap_setup(sl);
1449
1450	lu->lu_id = (scsi_devid_desc_t *)sl->sl_device_id;
1451	if (sl->sl_alias) {
1452		lu->lu_alias = sl->sl_alias;
1453	} else {
1454		lu->lu_alias = sl->sl_name;
1455	}
1456	if (sl->sl_access_state == SBD_LU_STANDBY) {
1457		/* call set access state */
1458		ret = stmf_set_lu_access(lu, STMF_LU_STANDBY);
1459		if (ret != STMF_SUCCESS) {
1460			*err_ret = SBD_RET_ACCESS_STATE_FAILED;
1461			return (EIO);
1462		}
1463	}
1464	/* set proxy_reg_cb_arg to meta filename */
1465	if (sl->sl_meta_filename) {
1466		lu->lu_proxy_reg_arg = sl->sl_meta_filename;
1467		lu->lu_proxy_reg_arg_len = strlen(sl->sl_meta_filename) + 1;
1468	} else {
1469		lu->lu_proxy_reg_arg = sl->sl_data_filename;
1470		lu->lu_proxy_reg_arg_len = strlen(sl->sl_data_filename) + 1;
1471	}
1472	lu->lu_lp = sbd_lp;
1473	lu->lu_task_alloc = sbd_task_alloc;
1474	lu->lu_new_task = sbd_new_task;
1475	lu->lu_dbuf_xfer_done = sbd_dbuf_xfer_done;
1476	lu->lu_send_status_done = sbd_send_status_done;
1477	lu->lu_task_free = sbd_task_free;
1478	lu->lu_abort = sbd_abort;
1479	lu->lu_task_poll = sbd_task_poll;
1480	lu->lu_dbuf_free = sbd_dbuf_free;
1481	lu->lu_ctl = sbd_ctl;
1482	lu->lu_task_done = sbd_ats_remove_by_task;
1483	lu->lu_info = sbd_info;
1484	sl->sl_state = STMF_STATE_OFFLINE;
1485
1486	if ((ret = stmf_register_lu(lu)) != STMF_SUCCESS) {
1487		stmf_trace(0, "Failed to register with framework, ret=%llx",
1488		    ret);
1489		if (ret == STMF_ALREADY) {
1490			*err_ret = SBD_RET_GUID_ALREADY_REGISTERED;
1491		}
1492		return (EIO);
1493	}
1494
1495	/*
1496	 * setup the ATS (compare and write) lists to handle multiple
1497	 * ATS commands simultaneously
1498	 */
1499	list_create(&sl->sl_ats_io_list, sizeof (ats_state_t),
1500	    offsetof(ats_state_t, as_next));
1501	*err_ret = 0;
1502	return (0);
1503}
1504
1505int
1506sbd_open_data_file(sbd_lu_t *sl, uint32_t *err_ret, int lu_size_valid,
1507    int vp_valid, int keep_open)
1508{
1509	int ret;
1510	int flag;
1511	ulong_t	nbits;
1512	uint64_t supported_size;
1513	vattr_t vattr;
1514	enum vtype vt;
1515	struct dk_cinfo dki;
1516	int unused;
1517
1518	mutex_enter(&sl->sl_lock);
1519	if (vp_valid) {
1520		goto odf_over_open;
1521	}
1522	if (sl->sl_data_filename[0] != '/') {
1523		*err_ret = SBD_RET_DATA_PATH_NOT_ABSOLUTE;
1524		mutex_exit(&sl->sl_lock);
1525		return (EINVAL);
1526	}
1527	if ((ret = lookupname(sl->sl_data_filename, UIO_SYSSPACE, FOLLOW,
1528	    NULLVPP, &sl->sl_data_vp)) != 0) {
1529		*err_ret = SBD_RET_DATA_FILE_LOOKUP_FAILED;
1530		mutex_exit(&sl->sl_lock);
1531		return (ret);
1532	}
1533	sl->sl_data_vtype = vt = sl->sl_data_vp->v_type;
1534	VN_RELE(sl->sl_data_vp);
1535	if ((vt != VREG) && (vt != VCHR) && (vt != VBLK)) {
1536		*err_ret = SBD_RET_WRONG_DATA_FILE_TYPE;
1537		mutex_exit(&sl->sl_lock);
1538		return (EINVAL);
1539	}
1540	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1541		flag = FREAD | FOFFMAX;
1542	} else {
1543		flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1544	}
1545	if ((ret = vn_open(sl->sl_data_filename, UIO_SYSSPACE, flag, 0,
1546	    &sl->sl_data_vp, 0, 0)) != 0) {
1547		*err_ret = SBD_RET_DATA_FILE_OPEN_FAILED;
1548		mutex_exit(&sl->sl_lock);
1549		return (ret);
1550	}
1551odf_over_open:
1552	vattr.va_mask = AT_SIZE;
1553	if ((ret = VOP_GETATTR(sl->sl_data_vp, &vattr, 0, CRED(), NULL)) != 0) {
1554		*err_ret = SBD_RET_DATA_FILE_GETATTR_FAILED;
1555		goto odf_close_data_and_exit;
1556	}
1557	if ((vt != VREG) && (vattr.va_size == 0)) {
1558		/*
1559		 * Its a zero byte block or char device. This cannot be
1560		 * a raw disk.
1561		 */
1562		*err_ret = SBD_RET_WRONG_DATA_FILE_TYPE;
1563		ret = EINVAL;
1564		goto odf_close_data_and_exit;
1565	}
1566	/* sl_data_readable size includes any metadata. */
1567	sl->sl_data_readable_size = vattr.va_size;
1568
1569	if (VOP_PATHCONF(sl->sl_data_vp, _PC_FILESIZEBITS, &nbits,
1570	    CRED(), NULL) != 0) {
1571		nbits = 0;
1572	}
1573	/* nbits cannot be greater than 64 */
1574	sl->sl_data_fs_nbits = (uint8_t)nbits;
1575	if (lu_size_valid) {
1576		sl->sl_total_data_size = sl->sl_lu_size;
1577		if (sl->sl_flags & SL_SHARED_META) {
1578			sl->sl_total_data_size += SHARED_META_DATA_SIZE;
1579		}
1580		if ((nbits > 0) && (nbits < 64)) {
1581			/*
1582			 * The expression below is correct only if nbits is
1583			 * positive and less than 64.
1584			 */
1585			supported_size = (((uint64_t)1) << nbits) - 1;
1586			if (sl->sl_total_data_size > supported_size) {
1587				*err_ret = SBD_RET_SIZE_NOT_SUPPORTED_BY_FS;
1588				ret = EINVAL;
1589				goto odf_close_data_and_exit;
1590			}
1591		}
1592	} else {
1593		sl->sl_total_data_size = vattr.va_size;
1594		if (sl->sl_flags & SL_SHARED_META) {
1595			if (vattr.va_size > SHARED_META_DATA_SIZE) {
1596				sl->sl_lu_size = vattr.va_size -
1597				    SHARED_META_DATA_SIZE;
1598			} else {
1599				*err_ret = SBD_RET_FILE_SIZE_ERROR;
1600				ret = EINVAL;
1601				goto odf_close_data_and_exit;
1602			}
1603		} else {
1604			sl->sl_lu_size = vattr.va_size;
1605		}
1606	}
1607
1608	if (sl->sl_lu_size < SBD_MIN_LU_SIZE) {
1609		*err_ret = SBD_RET_FILE_SIZE_ERROR;
1610		ret = EINVAL;
1611		goto odf_close_data_and_exit;
1612	}
1613	if (sl->sl_lu_size &
1614	    ((((uint64_t)1) << sl->sl_data_blocksize_shift) - 1)) {
1615		*err_ret = SBD_RET_FILE_ALIGN_ERROR;
1616		ret = EINVAL;
1617		goto odf_close_data_and_exit;
1618	}
1619	/*
1620	 * Get the minor device for direct zvol access
1621	 */
1622	if (sl->sl_flags & SL_ZFS_META) {
1623		if ((ret = VOP_IOCTL(sl->sl_data_vp, DKIOCINFO, (intptr_t)&dki,
1624		    FKIOCTL, kcred, &unused, NULL)) != 0) {
1625			cmn_err(CE_WARN, "ioctl(DKIOCINFO) failed %d", ret);
1626			/* zvol reserves 0, so this would fail later */
1627			sl->sl_zvol_minor = 0;
1628		} else {
1629			sl->sl_zvol_minor = dki.dki_unit;
1630			if (sbd_zvol_get_volume_params(sl) == 0)
1631				sl->sl_flags |= SL_CALL_ZVOL;
1632		}
1633	}
1634	sl->sl_flags |= SL_MEDIA_LOADED;
1635	mutex_exit(&sl->sl_lock);
1636	return (0);
1637
1638odf_close_data_and_exit:
1639	if (!keep_open) {
1640		(void) VOP_CLOSE(sl->sl_data_vp, flag, 1, 0, CRED(), NULL);
1641		VN_RELE(sl->sl_data_vp);
1642	}
1643	mutex_exit(&sl->sl_lock);
1644	return (ret);
1645}
1646
1647void
1648sbd_close_lu(sbd_lu_t *sl)
1649{
1650	int flag;
1651
1652	if (((sl->sl_flags & SL_SHARED_META) == 0) &&
1653	    (sl->sl_flags & SL_META_OPENED)) {
1654		if (sl->sl_flags & SL_ZFS_META) {
1655			rw_destroy(&sl->sl_zfs_meta_lock);
1656			if (sl->sl_zfs_meta) {
1657				kmem_free(sl->sl_zfs_meta, ZAP_MAXVALUELEN / 2);
1658				sl->sl_zfs_meta = NULL;
1659			}
1660		} else {
1661			flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1662			(void) VOP_CLOSE(sl->sl_meta_vp, flag, 1, 0,
1663			    CRED(), NULL);
1664			VN_RELE(sl->sl_meta_vp);
1665		}
1666		sl->sl_flags &= ~SL_META_OPENED;
1667	}
1668	if (sl->sl_flags & SL_MEDIA_LOADED) {
1669		if (sl->sl_flags & SL_WRITE_PROTECTED) {
1670			flag = FREAD | FOFFMAX;
1671		} else {
1672			flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1673		}
1674		(void) VOP_CLOSE(sl->sl_data_vp, flag, 1, 0, CRED(), NULL);
1675		VN_RELE(sl->sl_data_vp);
1676		sl->sl_flags &= ~SL_MEDIA_LOADED;
1677		if (sl->sl_flags & SL_SHARED_META) {
1678			sl->sl_flags &= ~SL_META_OPENED;
1679		}
1680	}
1681}
1682
1683int
1684sbd_set_lu_standby(sbd_set_lu_standby_t *stlu, uint32_t *err_ret)
1685{
1686	sbd_lu_t *sl;
1687	sbd_status_t sret;
1688	stmf_status_t stret;
1689	uint8_t old_access_state;
1690
1691	sret = sbd_find_and_lock_lu(stlu->stlu_guid, NULL,
1692	    SL_OP_MODIFY_LU, &sl);
1693	if (sret != SBD_SUCCESS) {
1694		if (sret == SBD_BUSY) {
1695			*err_ret = SBD_RET_LU_BUSY;
1696			return (EBUSY);
1697		} else if (sret == SBD_NOT_FOUND) {
1698			*err_ret = SBD_RET_NOT_FOUND;
1699			return (ENOENT);
1700		}
1701		*err_ret = SBD_RET_ACCESS_STATE_FAILED;
1702		return (EIO);
1703	}
1704
1705	old_access_state = sl->sl_access_state;
1706	sl->sl_access_state = SBD_LU_TRANSITION_TO_STANDBY;
1707	stret = stmf_set_lu_access((stmf_lu_t *)sl->sl_lu, STMF_LU_STANDBY);
1708	if (stret != STMF_SUCCESS) {
1709		sl->sl_trans_op = SL_OP_NONE;
1710		*err_ret = SBD_RET_ACCESS_STATE_FAILED;
1711		sl->sl_access_state = old_access_state;
1712		return (EIO);
1713	}
1714
1715	/*
1716	 * acquire the writer lock here to ensure we're not pulling
1717	 * the rug from the vn_rdwr to the backing store
1718	 */
1719	rw_enter(&sl->sl_access_state_lock, RW_WRITER);
1720	sbd_close_lu(sl);
1721	rw_exit(&sl->sl_access_state_lock);
1722
1723	sl->sl_trans_op = SL_OP_NONE;
1724	return (0);
1725}
1726
1727int
1728sbd_close_delete_lu(sbd_lu_t *sl, int ret)
1729{
1730
1731	/*
1732	 * acquire the writer lock here to ensure we're not pulling
1733	 * the rug from the vn_rdwr to the backing store
1734	 */
1735	rw_enter(&sl->sl_access_state_lock, RW_WRITER);
1736	sbd_close_lu(sl);
1737	rw_exit(&sl->sl_access_state_lock);
1738
1739	if (sl->sl_flags & SL_LINKED)
1740		sbd_unlink_lu(sl);
1741	mutex_destroy(&sl->sl_metadata_lock);
1742	mutex_destroy(&sl->sl_lock);
1743	rw_destroy(&sl->sl_pgr->pgr_lock);
1744	rw_destroy(&sl->sl_access_state_lock);
1745	if (sl->sl_serial_no_alloc_size) {
1746		kmem_free(sl->sl_serial_no, sl->sl_serial_no_alloc_size);
1747	}
1748	if (sl->sl_data_fname_alloc_size) {
1749		kmem_free(sl->sl_data_filename, sl->sl_data_fname_alloc_size);
1750	}
1751	if (sl->sl_alias_alloc_size) {
1752		kmem_free(sl->sl_alias, sl->sl_alias_alloc_size);
1753	}
1754	if (sl->sl_mgmt_url_alloc_size) {
1755		kmem_free(sl->sl_mgmt_url, sl->sl_mgmt_url_alloc_size);
1756	}
1757	stmf_free(sl->sl_lu);
1758	return (ret);
1759}
1760
1761int
1762sbd_create_register_lu(sbd_create_and_reg_lu_t *slu, int struct_sz,
1763    uint32_t *err_ret)
1764{
1765	char *namebuf;
1766	sbd_lu_t *sl;
1767	stmf_lu_t *lu;
1768	char *p;
1769	int sz;
1770	int alloc_sz;
1771	int ret = EIO;
1772	int flag;
1773	int wcd = 0;
1774	uint32_t hid = 0;
1775	enum vtype vt;
1776
1777	sz = struct_sz - sizeof (sbd_create_and_reg_lu_t) + 8 + 1;
1778
1779	*err_ret = 0;
1780
1781	/* Lets validate various offsets */
1782	if (((slu->slu_meta_fname_valid) &&
1783	    (slu->slu_meta_fname_off >= sz)) ||
1784	    (slu->slu_data_fname_off >= sz) ||
1785	    ((slu->slu_alias_valid) &&
1786	    (slu->slu_alias_off >= sz)) ||
1787	    ((slu->slu_mgmt_url_valid) &&
1788	    (slu->slu_mgmt_url_off >= sz)) ||
1789	    ((slu->slu_serial_valid) &&
1790	    ((slu->slu_serial_off + slu->slu_serial_size) >= sz))) {
1791		return (EINVAL);
1792	}
1793
1794	namebuf = kmem_zalloc(sz, KM_SLEEP);
1795	bcopy(slu->slu_buf, namebuf, sz - 1);
1796	namebuf[sz - 1] = 0;
1797
1798	alloc_sz = sizeof (sbd_lu_t) + sizeof (sbd_pgr_t);
1799	if (slu->slu_meta_fname_valid) {
1800		alloc_sz += strlen(namebuf + slu->slu_meta_fname_off) + 1;
1801	}
1802	alloc_sz += strlen(namebuf + slu->slu_data_fname_off) + 1;
1803	if (slu->slu_alias_valid) {
1804		alloc_sz += strlen(namebuf + slu->slu_alias_off) + 1;
1805	}
1806	if (slu->slu_mgmt_url_valid) {
1807		alloc_sz += strlen(namebuf + slu->slu_mgmt_url_off) + 1;
1808	}
1809	if (slu->slu_serial_valid) {
1810		alloc_sz += slu->slu_serial_size;
1811	}
1812
1813	lu = (stmf_lu_t *)stmf_alloc(STMF_STRUCT_STMF_LU, alloc_sz, 0);
1814	if (lu == NULL) {
1815		kmem_free(namebuf, sz);
1816		return (ENOMEM);
1817	}
1818	sl = (sbd_lu_t *)lu->lu_provider_private;
1819	bzero(sl, alloc_sz);
1820	sl->sl_lu = lu;
1821	sl->sl_alloc_size = alloc_sz;
1822	sl->sl_pgr = (sbd_pgr_t *)(sl + 1);
1823	rw_init(&sl->sl_pgr->pgr_lock, NULL, RW_DRIVER, NULL);
1824	mutex_init(&sl->sl_lock, NULL, MUTEX_DRIVER, NULL);
1825	mutex_init(&sl->sl_metadata_lock, NULL, MUTEX_DRIVER, NULL);
1826	rw_init(&sl->sl_access_state_lock, NULL, RW_DRIVER, NULL);
1827	p = ((char *)sl) + sizeof (sbd_lu_t) + sizeof (sbd_pgr_t);
1828	sl->sl_data_filename = p;
1829	(void) strcpy(sl->sl_data_filename, namebuf + slu->slu_data_fname_off);
1830	p += strlen(sl->sl_data_filename) + 1;
1831	sl->sl_meta_offset = SBD_META_OFFSET;
1832	sl->sl_access_state = SBD_LU_ACTIVE;
1833	if (slu->slu_meta_fname_valid) {
1834		sl->sl_alias = sl->sl_name = sl->sl_meta_filename = p;
1835		(void) strcpy(sl->sl_meta_filename, namebuf +
1836		    slu->slu_meta_fname_off);
1837		p += strlen(sl->sl_meta_filename) + 1;
1838	} else {
1839		sl->sl_alias = sl->sl_name = sl->sl_data_filename;
1840		if (sbd_is_zvol(sl->sl_data_filename)) {
1841			sl->sl_flags |= SL_ZFS_META;
1842			sl->sl_meta_offset = 0;
1843		} else {
1844			sl->sl_flags |= SL_SHARED_META;
1845			sl->sl_data_offset = SHARED_META_DATA_SIZE;
1846			sl->sl_total_meta_size = SHARED_META_DATA_SIZE;
1847			sl->sl_meta_size_used = 0;
1848		}
1849	}
1850	if (slu->slu_alias_valid) {
1851		sl->sl_alias = p;
1852		(void) strcpy(p, namebuf + slu->slu_alias_off);
1853		p += strlen(sl->sl_alias) + 1;
1854	}
1855	if (slu->slu_mgmt_url_valid) {
1856		sl->sl_mgmt_url = p;
1857		(void) strcpy(p, namebuf + slu->slu_mgmt_url_off);
1858		p += strlen(sl->sl_mgmt_url) + 1;
1859	}
1860	if (slu->slu_serial_valid) {
1861		sl->sl_serial_no = (uint8_t *)p;
1862		bcopy(namebuf + slu->slu_serial_off, sl->sl_serial_no,
1863		    slu->slu_serial_size);
1864		sl->sl_serial_no_size = slu->slu_serial_size;
1865		p += slu->slu_serial_size;
1866	}
1867	kmem_free(namebuf, sz);
1868	if (slu->slu_vid_valid) {
1869		bcopy(slu->slu_vid, sl->sl_vendor_id, 8);
1870		sl->sl_flags |= SL_VID_VALID;
1871	}
1872	if (slu->slu_pid_valid) {
1873		bcopy(slu->slu_pid, sl->sl_product_id, 16);
1874		sl->sl_flags |= SL_PID_VALID;
1875	}
1876	if (slu->slu_rev_valid) {
1877		bcopy(slu->slu_rev, sl->sl_revision, 4);
1878		sl->sl_flags |= SL_REV_VALID;
1879	}
1880	if (slu->slu_write_protected) {
1881		sl->sl_flags |= SL_WRITE_PROTECTED;
1882	}
1883	if (slu->slu_blksize_valid) {
1884		if ((slu->slu_blksize & (slu->slu_blksize - 1)) ||
1885		    (slu->slu_blksize > (32 * 1024)) ||
1886		    (slu->slu_blksize == 0)) {
1887			*err_ret = SBD_RET_INVALID_BLKSIZE;
1888			ret = EINVAL;
1889			goto scm_err_out;
1890		}
1891		while ((1 << sl->sl_data_blocksize_shift) != slu->slu_blksize) {
1892			sl->sl_data_blocksize_shift++;
1893		}
1894	} else {
1895		sl->sl_data_blocksize_shift = 9;	/* 512 by default */
1896		slu->slu_blksize = 512;
1897	}
1898
1899	/* Now lets start creating meta */
1900	sl->sl_trans_op = SL_OP_CREATE_REGISTER_LU;
1901	if (sbd_link_lu(sl) != SBD_SUCCESS) {
1902		*err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
1903		ret = EALREADY;
1904		goto scm_err_out;
1905	}
1906
1907	/* 1st focus on the data store */
1908	if (slu->slu_lu_size_valid) {
1909		sl->sl_lu_size = slu->slu_lu_size;
1910	}
1911	ret = sbd_open_data_file(sl, err_ret, slu->slu_lu_size_valid, 0, 0);
1912	slu->slu_ret_filesize_nbits = sl->sl_data_fs_nbits;
1913	slu->slu_lu_size = sl->sl_lu_size;
1914	if (ret) {
1915		goto scm_err_out;
1916	}
1917
1918	/*
1919	 * Check if we were explicitly asked to disable/enable write
1920	 * cache on the device, otherwise get current device setting.
1921	 */
1922	if (slu->slu_writeback_cache_disable_valid) {
1923		if (slu->slu_writeback_cache_disable) {
1924			/*
1925			 * Set write cache disable on the device. If it fails,
1926			 * we'll support it using sync/flush.
1927			 */
1928			(void) sbd_wcd_set(1, sl);
1929			wcd = 1;
1930		} else {
1931			/*
1932			 * Set write cache enable on the device. If it fails,
1933			 * return an error.
1934			 */
1935			if (sbd_wcd_set(0, sl) != SBD_SUCCESS) {
1936				*err_ret = SBD_RET_WRITE_CACHE_SET_FAILED;
1937				ret = EFAULT;
1938				goto scm_err_out;
1939			}
1940		}
1941	} else {
1942		sbd_wcd_get(&wcd, sl);
1943	}
1944
1945	if (wcd) {
1946		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE |
1947		    SL_SAVED_WRITE_CACHE_DISABLE;
1948	}
1949
1950	if (sl->sl_flags & SL_SHARED_META) {
1951		goto over_meta_open;
1952	}
1953	if (sl->sl_flags & SL_ZFS_META) {
1954		if (sbd_create_zfs_meta_object(sl) != SBD_SUCCESS) {
1955			*err_ret = SBD_RET_ZFS_META_CREATE_FAILED;
1956			ret = ENOMEM;
1957			goto scm_err_out;
1958		}
1959		sl->sl_meta_blocksize_shift = 0;
1960		goto over_meta_create;
1961	}
1962	if ((ret = lookupname(sl->sl_meta_filename, UIO_SYSSPACE, FOLLOW,
1963	    NULLVPP, &sl->sl_meta_vp)) != 0) {
1964		*err_ret = SBD_RET_META_FILE_LOOKUP_FAILED;
1965		goto scm_err_out;
1966	}
1967	sl->sl_meta_vtype = vt = sl->sl_meta_vp->v_type;
1968	VN_RELE(sl->sl_meta_vp);
1969	if ((vt != VREG) && (vt != VCHR) && (vt != VBLK)) {
1970		*err_ret = SBD_RET_WRONG_META_FILE_TYPE;
1971		ret = EINVAL;
1972		goto scm_err_out;
1973	}
1974	if (vt == VREG) {
1975		sl->sl_meta_blocksize_shift = 0;
1976	} else {
1977		sl->sl_meta_blocksize_shift = 9;
1978	}
1979	flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1980	if ((ret = vn_open(sl->sl_meta_filename, UIO_SYSSPACE, flag, 0,
1981	    &sl->sl_meta_vp, 0, 0)) != 0) {
1982		*err_ret = SBD_RET_META_FILE_OPEN_FAILED;
1983		goto scm_err_out;
1984	}
1985over_meta_create:
1986	sl->sl_total_meta_size = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
1987	sl->sl_total_meta_size +=
1988	    (((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1;
1989	sl->sl_total_meta_size &=
1990	    ~((((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1);
1991	sl->sl_meta_size_used = 0;
1992over_meta_open:
1993	sl->sl_flags |= SL_META_OPENED;
1994
1995	sl->sl_device_id[3] = 16;
1996	if (slu->slu_guid_valid) {
1997		sl->sl_device_id[0] = 0xf1;
1998		sl->sl_device_id[1] = 3;
1999		sl->sl_device_id[2] = 0;
2000		bcopy(slu->slu_guid, sl->sl_device_id + 4, 16);
2001	} else {
2002		if (slu->slu_host_id_valid)
2003			hid = slu->slu_host_id;
2004		if (!slu->slu_company_id_valid)
2005			slu->slu_company_id = COMPANY_ID_SUN;
2006		if (stmf_scsilib_uniq_lu_id2(slu->slu_company_id, hid,
2007		    (scsi_devid_desc_t *)&sl->sl_device_id[0]) !=
2008		    STMF_SUCCESS) {
2009			*err_ret = SBD_RET_META_CREATION_FAILED;
2010			ret = EIO;
2011			goto scm_err_out;
2012		}
2013		bcopy(sl->sl_device_id + 4, slu->slu_guid, 16);
2014	}
2015
2016	/* Lets create the meta now */
2017	mutex_enter(&sl->sl_metadata_lock);
2018	if (sbd_write_meta_start(sl, sl->sl_total_meta_size,
2019	    sizeof (sbd_meta_start_t)) != SBD_SUCCESS) {
2020		mutex_exit(&sl->sl_metadata_lock);
2021		*err_ret = SBD_RET_META_CREATION_FAILED;
2022		ret = EIO;
2023		goto scm_err_out;
2024	}
2025	mutex_exit(&sl->sl_metadata_lock);
2026	sl->sl_meta_size_used = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
2027
2028	if (sbd_write_lu_info(sl) != SBD_SUCCESS) {
2029		*err_ret = SBD_RET_META_CREATION_FAILED;
2030		ret = EIO;
2031		goto scm_err_out;
2032	}
2033
2034	if (sbd_pgr_meta_init(sl) != SBD_SUCCESS) {
2035		*err_ret = SBD_RET_META_CREATION_FAILED;
2036		ret = EIO;
2037		goto scm_err_out;
2038	}
2039
2040	/*
2041	 * Update the zvol separately as this need only be called upon
2042	 * completion of the metadata initialization.
2043	 */
2044	if (sl->sl_flags & SL_ZFS_META) {
2045		if (sbd_update_zfs_prop(sl) != SBD_SUCCESS) {
2046			*err_ret = SBD_RET_META_CREATION_FAILED;
2047			ret = EIO;
2048			goto scm_err_out;
2049		}
2050	}
2051
2052	ret = sbd_populate_and_register_lu(sl, err_ret);
2053	if (ret) {
2054		goto scm_err_out;
2055	}
2056
2057	sl->sl_trans_op = SL_OP_NONE;
2058	atomic_inc_32(&sbd_lu_count);
2059	return (0);
2060
2061scm_err_out:
2062	return (sbd_close_delete_lu(sl, ret));
2063}
2064
2065stmf_status_t
2066sbd_proxy_msg(uint8_t *luid, void *proxy_arg, uint32_t proxy_arg_len,
2067    uint32_t type)
2068{
2069	switch (type) {
2070		case STMF_MSG_LU_ACTIVE:
2071			return (sbd_proxy_reg_lu(luid, proxy_arg,
2072			    proxy_arg_len));
2073		case STMF_MSG_LU_REGISTER:
2074			return (sbd_proxy_reg_lu(luid, proxy_arg,
2075			    proxy_arg_len));
2076		case STMF_MSG_LU_DEREGISTER:
2077			return (sbd_proxy_dereg_lu(luid, proxy_arg,
2078			    proxy_arg_len));
2079		default:
2080			return (STMF_INVALID_ARG);
2081	}
2082}
2083
2084
2085/*
2086 * register a standby logical unit
2087 * proxy_reg_arg contains the meta filename
2088 */
2089stmf_status_t
2090sbd_proxy_reg_lu(uint8_t *luid, void *proxy_reg_arg, uint32_t proxy_reg_arg_len)
2091{
2092	sbd_lu_t *sl;
2093	sbd_status_t sret;
2094	sbd_create_standby_lu_t *stlu;
2095	int alloc_sz;
2096	uint32_t err_ret = 0;
2097	stmf_status_t stret = STMF_SUCCESS;
2098
2099	if (luid == NULL) {
2100		return (STMF_INVALID_ARG);
2101	}
2102
2103	do {
2104		sret = sbd_find_and_lock_lu(luid, NULL, SL_OP_MODIFY_LU, &sl);
2105	} while (sret == SBD_BUSY);
2106
2107	if (sret == SBD_NOT_FOUND) {
2108		alloc_sz = sizeof (*stlu) + proxy_reg_arg_len - 8;
2109		stlu = (sbd_create_standby_lu_t *)kmem_zalloc(alloc_sz,
2110		    KM_SLEEP);
2111		bcopy(luid, stlu->stlu_guid, 16);
2112		if (proxy_reg_arg_len) {
2113			bcopy(proxy_reg_arg, stlu->stlu_meta_fname,
2114			    proxy_reg_arg_len);
2115			stlu->stlu_meta_fname_size = proxy_reg_arg_len;
2116		}
2117		if (sbd_create_standby_lu(stlu, &err_ret) != 0) {
2118			cmn_err(CE_WARN,
2119			    "Unable to create standby logical unit for %s",
2120			    stlu->stlu_meta_fname);
2121			stret = STMF_FAILURE;
2122		}
2123		kmem_free(stlu, alloc_sz);
2124		return (stret);
2125	} else if (sret == SBD_SUCCESS) {
2126		/*
2127		 * if the lu is already registered, then the lu should now
2128		 * be in standby mode
2129		 */
2130		sbd_it_data_t *it;
2131		if (sl->sl_access_state != SBD_LU_STANDBY) {
2132			mutex_enter(&sl->sl_lock);
2133			sl->sl_access_state = SBD_LU_STANDBY;
2134			for (it = sl->sl_it_list; it != NULL;
2135			    it = it->sbd_it_next) {
2136				it->sbd_it_ua_conditions |=
2137				    SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
2138				it->sbd_it_flags &=
2139				    ~SBD_IT_HAS_SCSI2_RESERVATION;
2140				sl->sl_flags &= ~SL_LU_HAS_SCSI2_RESERVATION;
2141			}
2142			mutex_exit(&sl->sl_lock);
2143			sbd_pgr_reset(sl);
2144		}
2145		sl->sl_trans_op = SL_OP_NONE;
2146	} else {
2147		cmn_err(CE_WARN, "could not find and lock logical unit");
2148		stret = STMF_FAILURE;
2149	}
2150out:
2151	return (stret);
2152}
2153
2154/* ARGSUSED */
2155stmf_status_t
2156sbd_proxy_dereg_lu(uint8_t *luid, void *proxy_reg_arg,
2157    uint32_t proxy_reg_arg_len)
2158{
2159	sbd_delete_lu_t dlu = {0};
2160	uint32_t err_ret;
2161
2162	if (luid == NULL) {
2163		cmn_err(CE_WARN, "de-register lu request had null luid");
2164		return (STMF_INVALID_ARG);
2165	}
2166
2167	bcopy(luid, &dlu.dlu_guid, 16);
2168
2169	if (sbd_delete_lu(&dlu, (int)sizeof (dlu), &err_ret) != 0) {
2170		cmn_err(CE_WARN, "failed to delete de-register lu request");
2171		return (STMF_FAILURE);
2172	}
2173
2174	return (STMF_SUCCESS);
2175}
2176
2177int
2178sbd_create_standby_lu(sbd_create_standby_lu_t *slu, uint32_t *err_ret)
2179{
2180	sbd_lu_t *sl;
2181	stmf_lu_t *lu;
2182	int ret = EIO;
2183	int alloc_sz;
2184
2185	alloc_sz = sizeof (sbd_lu_t) + sizeof (sbd_pgr_t) +
2186	    slu->stlu_meta_fname_size;
2187	lu = (stmf_lu_t *)stmf_alloc(STMF_STRUCT_STMF_LU, alloc_sz, 0);
2188	if (lu == NULL) {
2189		return (ENOMEM);
2190	}
2191	sl = (sbd_lu_t *)lu->lu_provider_private;
2192	bzero(sl, alloc_sz);
2193	sl->sl_lu = lu;
2194	sl->sl_alloc_size = alloc_sz;
2195
2196	sl->sl_pgr = (sbd_pgr_t *)(sl + 1);
2197	sl->sl_meta_filename = ((char *)sl) + sizeof (sbd_lu_t) +
2198	    sizeof (sbd_pgr_t);
2199
2200	if (slu->stlu_meta_fname_size > 0) {
2201		(void) strcpy(sl->sl_meta_filename, slu->stlu_meta_fname);
2202	}
2203	sl->sl_name = sl->sl_meta_filename;
2204
2205	sl->sl_device_id[3] = 16;
2206	sl->sl_device_id[0] = 0xf1;
2207	sl->sl_device_id[1] = 3;
2208	sl->sl_device_id[2] = 0;
2209	bcopy(slu->stlu_guid, sl->sl_device_id + 4, 16);
2210	lu->lu_id = (scsi_devid_desc_t *)sl->sl_device_id;
2211	sl->sl_access_state = SBD_LU_STANDBY;
2212
2213	rw_init(&sl->sl_pgr->pgr_lock, NULL, RW_DRIVER, NULL);
2214	mutex_init(&sl->sl_lock, NULL, MUTEX_DRIVER, NULL);
2215	mutex_init(&sl->sl_metadata_lock, NULL, MUTEX_DRIVER, NULL);
2216	rw_init(&sl->sl_access_state_lock, NULL, RW_DRIVER, NULL);
2217
2218	sl->sl_trans_op = SL_OP_CREATE_REGISTER_LU;
2219
2220	if (sbd_link_lu(sl) != SBD_SUCCESS) {
2221		*err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
2222		ret = EALREADY;
2223		goto scs_err_out;
2224	}
2225
2226	ret = sbd_populate_and_register_lu(sl, err_ret);
2227	if (ret) {
2228		goto scs_err_out;
2229	}
2230
2231	sl->sl_trans_op = SL_OP_NONE;
2232	atomic_inc_32(&sbd_lu_count);
2233	return (0);
2234
2235scs_err_out:
2236	return (sbd_close_delete_lu(sl, ret));
2237}
2238
2239int
2240sbd_load_sli_1_0(sbd_lu_t *sl, uint32_t *err_ret)
2241{
2242	sbd_lu_info_1_0_t *sli = NULL;
2243	sbd_status_t sret;
2244
2245	sret = sbd_read_meta_section(sl, (sm_section_hdr_t **)&sli,
2246	    SMS_ID_LU_INFO_1_0);
2247
2248	if (sret != SBD_SUCCESS) {
2249		*err_ret = SBD_RET_NO_META;
2250		return (EIO);
2251	}
2252	if (sli->sli_data_order != SMS_DATA_ORDER) {
2253		sbd_swap_lu_info_1_0(sli);
2254		if (sli->sli_data_order != SMS_DATA_ORDER) {
2255			kmem_free(sli, sli->sli_sms_header.sms_size);
2256			*err_ret = SBD_RET_NO_META;
2257			return (EIO);
2258		}
2259	}
2260
2261	sl->sl_flags |= SL_SHARED_META;
2262	sl->sl_data_blocksize_shift = 9;
2263	sl->sl_data_offset = SHARED_META_DATA_SIZE;
2264	sl->sl_lu_size = sli->sli_total_store_size - SHARED_META_DATA_SIZE;
2265	sl->sl_total_data_size = SHARED_META_DATA_SIZE + sl->sl_lu_size;
2266	bcopy(sli->sli_lu_devid, sl->sl_device_id, 20);
2267
2268	kmem_free(sli, sli->sli_sms_header.sms_size);
2269	return (0);
2270}
2271
2272int
2273sbd_import_lu(sbd_import_lu_t *ilu, int struct_sz, uint32_t *err_ret,
2274    int no_register, sbd_lu_t **slr)
2275{
2276	stmf_lu_t *lu;
2277	sbd_lu_t *sl;
2278	sbd_lu_info_1_1_t *sli = NULL;
2279	int asz;
2280	int ret = 0;
2281	stmf_status_t stret;
2282	int flag;
2283	int wcd = 0;
2284	int data_opened;
2285	uint16_t sli_buf_sz;
2286	uint8_t *sli_buf_copy = NULL;
2287	enum vtype vt;
2288	int standby = 0;
2289	sbd_status_t sret;
2290
2291	if (no_register && slr == NULL) {
2292		return (EINVAL);
2293	}
2294	ilu->ilu_meta_fname[struct_sz - sizeof (*ilu) + 8 - 1] = 0;
2295	/*
2296	 * check whether logical unit is already registered ALUA
2297	 * For a standby logical unit, the meta filename is set. Use
2298	 * that to search for an existing logical unit.
2299	 */
2300	sret = sbd_find_and_lock_lu(NULL, (uint8_t *)&(ilu->ilu_meta_fname),
2301	    SL_OP_IMPORT_LU, &sl);
2302
2303	if (sret == SBD_SUCCESS) {
2304		if (sl->sl_access_state != SBD_LU_ACTIVE) {
2305			no_register = 1;
2306			standby = 1;
2307			lu = sl->sl_lu;
2308			if (sl->sl_alias_alloc_size) {
2309				kmem_free(sl->sl_alias,
2310				    sl->sl_alias_alloc_size);
2311				sl->sl_alias_alloc_size = 0;
2312				sl->sl_alias = NULL;
2313				lu->lu_alias = NULL;
2314			}
2315			if (sl->sl_meta_filename == NULL) {
2316				sl->sl_meta_filename = sl->sl_data_filename;
2317			} else if (sl->sl_data_fname_alloc_size) {
2318				kmem_free(sl->sl_data_filename,
2319				    sl->sl_data_fname_alloc_size);
2320				sl->sl_data_fname_alloc_size = 0;
2321			}
2322			if (sl->sl_serial_no_alloc_size) {
2323				kmem_free(sl->sl_serial_no,
2324				    sl->sl_serial_no_alloc_size);
2325				sl->sl_serial_no_alloc_size = 0;
2326			}
2327			if (sl->sl_mgmt_url_alloc_size) {
2328				kmem_free(sl->sl_mgmt_url,
2329				    sl->sl_mgmt_url_alloc_size);
2330				sl->sl_mgmt_url_alloc_size = 0;
2331			}
2332		} else {
2333			*err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
2334			bcopy(sl->sl_device_id + 4, ilu->ilu_ret_guid, 16);
2335			sl->sl_trans_op = SL_OP_NONE;
2336			return (EALREADY);
2337		}
2338	} else if (sret == SBD_NOT_FOUND) {
2339		asz = strlen(ilu->ilu_meta_fname) + 1;
2340
2341		lu = (stmf_lu_t *)stmf_alloc(STMF_STRUCT_STMF_LU,
2342		    sizeof (sbd_lu_t) + sizeof (sbd_pgr_t) + asz, 0);
2343		if (lu == NULL) {
2344			return (ENOMEM);
2345		}
2346		sl = (sbd_lu_t *)lu->lu_provider_private;
2347		bzero(sl, sizeof (*sl));
2348		sl->sl_lu = lu;
2349		sl->sl_pgr = (sbd_pgr_t *)(sl + 1);
2350		sl->sl_meta_filename = ((char *)sl) + sizeof (*sl) +
2351		    sizeof (sbd_pgr_t);
2352		(void) strcpy(sl->sl_meta_filename, ilu->ilu_meta_fname);
2353		sl->sl_name = sl->sl_meta_filename;
2354		rw_init(&sl->sl_pgr->pgr_lock, NULL, RW_DRIVER, NULL);
2355		rw_init(&sl->sl_access_state_lock, NULL, RW_DRIVER, NULL);
2356		mutex_init(&sl->sl_lock, NULL, MUTEX_DRIVER, NULL);
2357		mutex_init(&sl->sl_metadata_lock, NULL, MUTEX_DRIVER, NULL);
2358		sl->sl_trans_op = SL_OP_IMPORT_LU;
2359	} else {
2360		*err_ret = SBD_RET_META_FILE_LOOKUP_FAILED;
2361		return (EIO);
2362	}
2363
2364	/* we're only loading the metadata */
2365	if (!no_register) {
2366		if (sbd_link_lu(sl) != SBD_SUCCESS) {
2367			*err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
2368			bcopy(sl->sl_device_id + 4, ilu->ilu_ret_guid, 16);
2369			ret = EALREADY;
2370			goto sim_err_out;
2371		}
2372	}
2373	if ((ret = lookupname(sl->sl_meta_filename, UIO_SYSSPACE, FOLLOW,
2374	    NULLVPP, &sl->sl_meta_vp)) != 0) {
2375		*err_ret = SBD_RET_META_FILE_LOOKUP_FAILED;
2376		goto sim_err_out;
2377	}
2378	if (sbd_is_zvol(sl->sl_meta_filename)) {
2379		sl->sl_flags |= SL_ZFS_META;
2380		sl->sl_data_filename = sl->sl_meta_filename;
2381	}
2382	sl->sl_meta_vtype = vt = sl->sl_meta_vp->v_type;
2383	VN_RELE(sl->sl_meta_vp);
2384	if ((vt != VREG) && (vt != VCHR) && (vt != VBLK)) {
2385		*err_ret = SBD_RET_WRONG_META_FILE_TYPE;
2386		ret = EINVAL;
2387		goto sim_err_out;
2388	}
2389	if (sl->sl_flags & SL_ZFS_META) {
2390		if (sbd_open_zfs_meta(sl) != SBD_SUCCESS) {
2391			/* let see if metadata is in the 64k block */
2392			sl->sl_flags &= ~SL_ZFS_META;
2393		}
2394	}
2395	if (!(sl->sl_flags & SL_ZFS_META)) {
2396		/* metadata is always writable */
2397		flag = FREAD | FWRITE | FOFFMAX | FEXCL;
2398		if ((ret = vn_open(sl->sl_meta_filename, UIO_SYSSPACE, flag, 0,
2399		    &sl->sl_meta_vp, 0, 0)) != 0) {
2400			*err_ret = SBD_RET_META_FILE_OPEN_FAILED;
2401			goto sim_err_out;
2402		}
2403	}
2404	if ((sl->sl_flags & SL_ZFS_META) || (vt == VREG)) {
2405		sl->sl_meta_blocksize_shift = 0;
2406	} else {
2407		sl->sl_meta_blocksize_shift = 9;
2408	}
2409	sl->sl_meta_offset = (sl->sl_flags & SL_ZFS_META) ? 0 : SBD_META_OFFSET;
2410	sl->sl_flags |= SL_META_OPENED;
2411
2412	mutex_enter(&sl->sl_metadata_lock);
2413	sret = sbd_load_meta_start(sl);
2414	mutex_exit(&sl->sl_metadata_lock);
2415	if (sret != SBD_SUCCESS) {
2416		if (sret == SBD_META_CORRUPTED) {
2417			*err_ret = SBD_RET_NO_META;
2418		} else if (sret == SBD_NOT_SUPPORTED) {
2419			*err_ret = SBD_RET_VERSION_NOT_SUPPORTED;
2420		} else {
2421			*err_ret = SBD_RET_NO_META;
2422		}
2423		ret = EINVAL;
2424		goto sim_err_out;
2425	}
2426
2427	/* Now lets see if we can read the most recent LU info */
2428	sret = sbd_read_meta_section(sl, (sm_section_hdr_t **)&sli,
2429	    SMS_ID_LU_INFO_1_1);
2430	if ((sret == SBD_NOT_FOUND) && ((sl->sl_flags & SL_ZFS_META) == 0)) {
2431		ret = sbd_load_sli_1_0(sl, err_ret);
2432		if (ret) {
2433			goto sim_err_out;
2434		}
2435		goto sim_sli_loaded;
2436	}
2437	if (sret != SBD_SUCCESS) {
2438		*err_ret = SBD_RET_NO_META;
2439		ret = EIO;
2440		goto sim_err_out;
2441	}
2442	/* load sli 1.1 */
2443	if (sli->sli_data_order != SMS_DATA_ORDER) {
2444		sbd_swap_lu_info_1_1(sli);
2445		if (sli->sli_data_order != SMS_DATA_ORDER) {
2446			*err_ret = SBD_RET_NO_META;
2447			ret = EIO;
2448			goto sim_err_out;
2449		}
2450	}
2451
2452	sli_buf_sz = sli->sli_sms_header.sms_size -
2453	    sizeof (sbd_lu_info_1_1_t) + 8;
2454	sli_buf_copy = kmem_alloc(sli_buf_sz + 1, KM_SLEEP);
2455	bcopy(sli->sli_buf, sli_buf_copy, sli_buf_sz);
2456	sli_buf_copy[sli_buf_sz] = 0;
2457
2458	/* Make sure all the offsets are within limits */
2459	if (((sli->sli_flags & SLI_META_FNAME_VALID) &&
2460	    (sli->sli_meta_fname_offset > sli_buf_sz)) ||
2461	    ((sli->sli_flags & SLI_DATA_FNAME_VALID) &&
2462	    (sli->sli_data_fname_offset > sli_buf_sz)) ||
2463	    ((sli->sli_flags & SLI_MGMT_URL_VALID) &&
2464	    (sli->sli_mgmt_url_offset > sli_buf_sz)) ||
2465	    ((sli->sli_flags & SLI_SERIAL_VALID) &&
2466	    ((sli->sli_serial_offset + sli->sli_serial_size) > sli_buf_sz)) ||
2467	    ((sli->sli_flags & SLI_ALIAS_VALID) &&
2468	    (sli->sli_alias_offset > sli_buf_sz))) {
2469		*err_ret = SBD_RET_NO_META;
2470		ret = EIO;
2471		goto sim_err_out;
2472	}
2473
2474	sl->sl_lu_size = sli->sli_lu_size;
2475	sl->sl_data_blocksize_shift = sli->sli_data_blocksize_shift;
2476	bcopy(sli->sli_device_id, sl->sl_device_id, 20);
2477	if (sli->sli_flags & SLI_SERIAL_VALID) {
2478		sl->sl_serial_no_size = sl->sl_serial_no_alloc_size =
2479		    sli->sli_serial_size;
2480		sl->sl_serial_no = kmem_zalloc(sli->sli_serial_size, KM_SLEEP);
2481		bcopy(sli_buf_copy + sli->sli_serial_offset, sl->sl_serial_no,
2482		    sl->sl_serial_no_size);
2483	}
2484	if (sli->sli_flags & SLI_SEPARATE_META) {
2485		sl->sl_total_data_size = sl->sl_lu_size;
2486		if (sli->sli_flags & SLI_DATA_FNAME_VALID) {
2487			sl->sl_data_fname_alloc_size = strlen((char *)
2488			    sli_buf_copy + sli->sli_data_fname_offset) + 1;
2489			sl->sl_data_filename = kmem_zalloc(
2490			    sl->sl_data_fname_alloc_size, KM_SLEEP);
2491			(void) strcpy(sl->sl_data_filename,
2492			    (char *)sli_buf_copy + sli->sli_data_fname_offset);
2493		}
2494	} else {
2495		if (sl->sl_flags & SL_ZFS_META) {
2496			sl->sl_total_data_size = sl->sl_lu_size;
2497			sl->sl_data_offset = 0;
2498		} else {
2499			sl->sl_total_data_size =
2500			    sl->sl_lu_size + SHARED_META_DATA_SIZE;
2501			sl->sl_data_offset = SHARED_META_DATA_SIZE;
2502			sl->sl_flags |= SL_SHARED_META;
2503		}
2504	}
2505	if (sli->sli_flags & SLI_ALIAS_VALID) {
2506		sl->sl_alias_alloc_size = strlen((char *)sli_buf_copy +
2507		    sli->sli_alias_offset) + 1;
2508		sl->sl_alias = kmem_alloc(sl->sl_alias_alloc_size, KM_SLEEP);
2509		(void) strcpy(sl->sl_alias, (char *)sli_buf_copy +
2510		    sli->sli_alias_offset);
2511	}
2512	if (sli->sli_flags & SLI_MGMT_URL_VALID) {
2513		sl->sl_mgmt_url_alloc_size = strlen((char *)sli_buf_copy +
2514		    sli->sli_mgmt_url_offset) + 1;
2515		sl->sl_mgmt_url = kmem_alloc(sl->sl_mgmt_url_alloc_size,
2516		    KM_SLEEP);
2517		(void) strcpy(sl->sl_mgmt_url, (char *)sli_buf_copy +
2518		    sli->sli_mgmt_url_offset);
2519	}
2520	if (sli->sli_flags & SLI_WRITE_PROTECTED) {
2521		sl->sl_flags |= SL_WRITE_PROTECTED;
2522	}
2523	if (sli->sli_flags & SLI_VID_VALID) {
2524		sl->sl_flags |= SL_VID_VALID;
2525		bcopy(sli->sli_vid, sl->sl_vendor_id, 8);
2526	}
2527	if (sli->sli_flags & SLI_PID_VALID) {
2528		sl->sl_flags |= SL_PID_VALID;
2529		bcopy(sli->sli_pid, sl->sl_product_id, 16);
2530	}
2531	if (sli->sli_flags & SLI_REV_VALID) {
2532		sl->sl_flags |= SL_REV_VALID;
2533		bcopy(sli->sli_rev, sl->sl_revision, 4);
2534	}
2535	if (sli->sli_flags & SLI_WRITEBACK_CACHE_DISABLE) {
2536		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2537	}
2538sim_sli_loaded:
2539	if ((sl->sl_flags & SL_SHARED_META) == 0) {
2540		data_opened = 0;
2541	} else {
2542		data_opened = 1;
2543		sl->sl_data_filename = sl->sl_meta_filename;
2544		sl->sl_data_vp = sl->sl_meta_vp;
2545		sl->sl_data_vtype = sl->sl_meta_vtype;
2546	}
2547
2548	sret = sbd_pgr_meta_load(sl);
2549	if (sret != SBD_SUCCESS) {
2550		*err_ret = SBD_RET_NO_META;
2551		ret = EIO;
2552		goto sim_err_out;
2553	}
2554
2555	ret = sbd_open_data_file(sl, err_ret, 1, data_opened, 0);
2556	if (ret) {
2557		goto sim_err_out;
2558	}
2559
2560	/*
2561	 * set write cache disable on the device
2562	 * Note: this shouldn't fail on import unless the cache capabilities
2563	 * of the device changed. If that happened, modify will need to
2564	 * be used to set the cache flag appropriately after import is done.
2565	 */
2566	if (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) {
2567		(void) sbd_wcd_set(1, sl);
2568		wcd = 1;
2569	/*
2570	 * if not explicitly set, attempt to set it to enable, if that fails
2571	 * get the current setting and use that
2572	 */
2573	} else {
2574		sret = sbd_wcd_set(0, sl);
2575		if (sret != SBD_SUCCESS) {
2576			sbd_wcd_get(&wcd, sl);
2577		}
2578	}
2579
2580	if (wcd) {
2581		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE |
2582		    SL_SAVED_WRITE_CACHE_DISABLE;
2583	}
2584
2585	/* we're only loading the metadata */
2586	if (!no_register) {
2587		ret = sbd_populate_and_register_lu(sl, err_ret);
2588		if (ret) {
2589			goto sim_err_out;
2590		}
2591		atomic_inc_32(&sbd_lu_count);
2592	}
2593
2594	bcopy(sl->sl_device_id + 4, ilu->ilu_ret_guid, 16);
2595	sl->sl_trans_op = SL_OP_NONE;
2596
2597	if (sli) {
2598		kmem_free(sli, sli->sli_sms_header.sms_size);
2599		sli = NULL;
2600	}
2601	if (sli_buf_copy) {
2602		kmem_free(sli_buf_copy, sli_buf_sz + 1);
2603		sli_buf_copy = NULL;
2604	}
2605	if (no_register && !standby) {
2606		*slr = sl;
2607	}
2608
2609	/*
2610	 * if this was imported from standby, set the access state
2611	 * to active.
2612	 */
2613	if (standby) {
2614		sbd_it_data_t *it;
2615		mutex_enter(&sl->sl_lock);
2616		sl->sl_access_state = SBD_LU_ACTIVE;
2617		for (it = sl->sl_it_list; it != NULL;
2618		    it = it->sbd_it_next) {
2619			it->sbd_it_ua_conditions |=
2620			    SBD_UA_ASYMMETRIC_ACCESS_CHANGED;
2621			it->sbd_it_ua_conditions |= SBD_UA_POR;
2622			it->sbd_it_flags |=  SBD_IT_PGR_CHECK_FLAG;
2623		}
2624		mutex_exit(&sl->sl_lock);
2625		/* call set access state */
2626		stret = stmf_set_lu_access(lu, STMF_LU_ACTIVE);
2627		if (stret != STMF_SUCCESS) {
2628			*err_ret = SBD_RET_ACCESS_STATE_FAILED;
2629			sl->sl_access_state = SBD_LU_STANDBY;
2630			goto sim_err_out;
2631		}
2632		if (sl->sl_alias) {
2633			lu->lu_alias = sl->sl_alias;
2634		} else {
2635			lu->lu_alias = sl->sl_name;
2636		}
2637	}
2638	sl->sl_access_state = SBD_LU_ACTIVE;
2639	return (0);
2640
2641sim_err_out:
2642	if (sli) {
2643		kmem_free(sli, sli->sli_sms_header.sms_size);
2644		sli = NULL;
2645	}
2646	if (sli_buf_copy) {
2647		kmem_free(sli_buf_copy, sli_buf_sz + 1);
2648		sli_buf_copy = NULL;
2649	}
2650
2651	if (standby) {
2652		*err_ret = SBD_RET_ACCESS_STATE_FAILED;
2653		sl->sl_trans_op = SL_OP_NONE;
2654		return (EIO);
2655	} else {
2656		return (sbd_close_delete_lu(sl, ret));
2657	}
2658}
2659
2660int
2661sbd_modify_lu(sbd_modify_lu_t *mlu, int struct_sz, uint32_t *err_ret)
2662{
2663	sbd_lu_t *sl = NULL;
2664	uint16_t alias_sz;
2665	int ret = 0;
2666	sbd_it_data_t *it;
2667	sbd_status_t sret;
2668	uint64_t old_size;
2669	int modify_unregistered = 0;
2670	int ua = 0;
2671	sbd_import_lu_t *ilu;
2672	stmf_lu_t *lu;
2673	uint32_t ilu_sz;
2674	uint32_t sz;
2675
2676	sz = struct_sz - sizeof (*mlu) + 8 + 1;
2677
2678	/* if there is data in the buf, null terminate it */
2679	if (struct_sz > sizeof (*mlu)) {
2680		mlu->mlu_buf[struct_sz - sizeof (*mlu) + 8 - 1] = 0;
2681	}
2682
2683	*err_ret = 0;
2684
2685	/* Lets validate offsets */
2686	if (((mlu->mlu_alias_valid) &&
2687	    (mlu->mlu_alias_off >= sz)) ||
2688	    ((mlu->mlu_mgmt_url_valid) &&
2689	    (mlu->mlu_mgmt_url_off >= sz)) ||
2690	    (mlu->mlu_by_fname) &&
2691	    (mlu->mlu_fname_off >= sz)) {
2692		return (EINVAL);
2693	}
2694
2695	/*
2696	 * We'll look for the device but if we don't find it registered,
2697	 * we'll still try to modify the unregistered device.
2698	 */
2699	if (mlu->mlu_by_guid) {
2700		sret = sbd_find_and_lock_lu(mlu->mlu_input_guid, NULL,
2701		    SL_OP_MODIFY_LU, &sl);
2702	} else if (mlu->mlu_by_fname) {
2703		sret = sbd_find_and_lock_lu(NULL,
2704		    (uint8_t *)&(mlu->mlu_buf[mlu->mlu_fname_off]),
2705		    SL_OP_MODIFY_LU, &sl);
2706	} else {
2707		return (EINVAL);
2708	}
2709
2710
2711	if (sret != SBD_SUCCESS) {
2712		if (sret == SBD_BUSY) {
2713			*err_ret = SBD_RET_LU_BUSY;
2714			return (EBUSY);
2715		} else if (sret != SBD_NOT_FOUND) {
2716			return (EIO);
2717		} else if (!mlu->mlu_by_fname) {
2718			return (EINVAL);
2719		}
2720		/* Okay, try to import the device */
2721		struct_sz = max(8, strlen(&(mlu->mlu_buf[mlu->mlu_fname_off]))
2722		    + 1);
2723		struct_sz += sizeof (sbd_import_lu_t) - 8;
2724		ilu_sz = struct_sz;
2725		ilu = (sbd_import_lu_t *)kmem_zalloc(ilu_sz, KM_SLEEP);
2726		ilu->ilu_struct_size = struct_sz;
2727		(void) strcpy(ilu->ilu_meta_fname,
2728		    &(mlu->mlu_buf[mlu->mlu_fname_off]));
2729		ret = sbd_import_lu(ilu, struct_sz, err_ret, 1, &sl);
2730		kmem_free(ilu, ilu_sz);
2731		if (ret != SBD_SUCCESS) {
2732			return (ENOENT);
2733		}
2734		modify_unregistered = 1;
2735	}
2736
2737	if (sl->sl_access_state != SBD_LU_ACTIVE) {
2738		*err_ret = SBD_RET_ACCESS_STATE_FAILED;
2739		ret = EINVAL;
2740		goto smm_err_out;
2741	}
2742
2743	/* check for write cache change */
2744	if (mlu->mlu_writeback_cache_disable_valid) {
2745		/* set wce on device */
2746		sret = sbd_wcd_set(mlu->mlu_writeback_cache_disable, sl);
2747		if (!mlu->mlu_writeback_cache_disable && sret != SBD_SUCCESS) {
2748			*err_ret = SBD_RET_WRITE_CACHE_SET_FAILED;
2749			ret = EFAULT;
2750			goto smm_err_out;
2751		}
2752		mutex_enter(&sl->sl_lock);
2753		if (!mlu->mlu_writeback_cache_disable) {
2754			if (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) {
2755				ua = 1;
2756				sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2757				sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2758			}
2759		} else {
2760			if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
2761				ua = 1;
2762				sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2763				sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2764			}
2765		}
2766		for (it = sl->sl_it_list; ua && it != NULL;
2767		    it = it->sbd_it_next) {
2768			it->sbd_it_ua_conditions |=
2769			    SBD_UA_MODE_PARAMETERS_CHANGED;
2770		}
2771		mutex_exit(&sl->sl_lock);
2772	}
2773	ua = 0;
2774
2775	if (mlu->mlu_alias_valid) {
2776		alias_sz = strlen((char *)mlu->mlu_buf +
2777		    mlu->mlu_alias_off) + 1;
2778		/*
2779		 * Use the allocated buffer or alloc a new one.
2780		 * Don't copy into sl_alias if sl_alias_alloc_size is 0
2781		 * otherwise or you'll be writing over the data/metadata
2782		 * filename.
2783		 */
2784		mutex_enter(&sl->sl_lock);
2785		if (sl->sl_alias_alloc_size > 0 &&
2786		    sl->sl_alias_alloc_size < alias_sz) {
2787			kmem_free(sl->sl_alias,
2788			    sl->sl_alias_alloc_size);
2789			sl->sl_alias_alloc_size = 0;
2790		}
2791		if (sl->sl_alias_alloc_size == 0) {
2792			sl->sl_alias = kmem_alloc(alias_sz, KM_SLEEP);
2793			sl->sl_alias_alloc_size = alias_sz;
2794		}
2795		(void) strcpy(sl->sl_alias, (char *)mlu->mlu_buf +
2796		    mlu->mlu_alias_off);
2797		lu = sl->sl_lu;
2798		lu->lu_alias = sl->sl_alias;
2799		mutex_exit(&sl->sl_lock);
2800	}
2801
2802	if (mlu->mlu_mgmt_url_valid) {
2803		uint16_t url_sz;
2804
2805		url_sz = strlen((char *)mlu->mlu_buf + mlu->mlu_mgmt_url_off);
2806		if (url_sz > 0)
2807			url_sz++;
2808
2809		mutex_enter(&sl->sl_lock);
2810		if (sl->sl_mgmt_url_alloc_size > 0 &&
2811		    (url_sz == 0 || sl->sl_mgmt_url_alloc_size < url_sz)) {
2812			kmem_free(sl->sl_mgmt_url, sl->sl_mgmt_url_alloc_size);
2813			sl->sl_mgmt_url = NULL;
2814			sl->sl_mgmt_url_alloc_size = 0;
2815		}
2816		if (url_sz > 0) {
2817			if (sl->sl_mgmt_url_alloc_size == 0) {
2818				sl->sl_mgmt_url = kmem_alloc(url_sz, KM_SLEEP);
2819				sl->sl_mgmt_url_alloc_size = url_sz;
2820			}
2821			(void) strcpy(sl->sl_mgmt_url, (char *)mlu->mlu_buf +
2822			    mlu->mlu_mgmt_url_off);
2823		}
2824		for (it = sl->sl_it_list; it != NULL;
2825		    it = it->sbd_it_next) {
2826			it->sbd_it_ua_conditions |=
2827			    SBD_UA_MODE_PARAMETERS_CHANGED;
2828		}
2829		mutex_exit(&sl->sl_lock);
2830	}
2831
2832	if (mlu->mlu_write_protected_valid) {
2833		mutex_enter(&sl->sl_lock);
2834		if (mlu->mlu_write_protected) {
2835			if ((sl->sl_flags & SL_WRITE_PROTECTED) == 0) {
2836				ua = 1;
2837				sl->sl_flags |= SL_WRITE_PROTECTED;
2838			}
2839		} else {
2840			if (sl->sl_flags & SL_WRITE_PROTECTED) {
2841				ua = 1;
2842				sl->sl_flags &= ~SL_WRITE_PROTECTED;
2843			}
2844		}
2845		for (it = sl->sl_it_list; ua && it != NULL;
2846		    it = it->sbd_it_next) {
2847			it->sbd_it_ua_conditions |=
2848			    SBD_UA_MODE_PARAMETERS_CHANGED;
2849		}
2850		mutex_exit(&sl->sl_lock);
2851	}
2852
2853	if (mlu->mlu_lu_size_valid) {
2854		/*
2855		 * validate lu size and set
2856		 * For open file only (registered lu)
2857		 */
2858		mutex_enter(&sl->sl_lock);
2859		old_size = sl->sl_lu_size;
2860		sl->sl_lu_size = mlu->mlu_lu_size;
2861		mutex_exit(&sl->sl_lock);
2862		ret = sbd_open_data_file(sl, err_ret, 1, 1, 1);
2863		if (ret) {
2864			mutex_enter(&sl->sl_lock);
2865			sl->sl_lu_size = old_size;
2866			mutex_exit(&sl->sl_lock);
2867			goto smm_err_out;
2868		}
2869		if (old_size != mlu->mlu_lu_size) {
2870			mutex_enter(&sl->sl_lock);
2871			for (it = sl->sl_it_list; it != NULL;
2872			    it = it->sbd_it_next) {
2873				it->sbd_it_ua_conditions |=
2874				    SBD_UA_CAPACITY_CHANGED;
2875			}
2876			mutex_exit(&sl->sl_lock);
2877		}
2878	}
2879
2880	if (sbd_write_lu_info(sl) != SBD_SUCCESS) {
2881		*err_ret = SBD_RET_META_CREATION_FAILED;
2882		ret = EIO;
2883	}
2884
2885smm_err_out:
2886	if (modify_unregistered) {
2887		(void) sbd_close_delete_lu(sl, 0);
2888	} else {
2889		sl->sl_trans_op = SL_OP_NONE;
2890	}
2891	return (ret);
2892}
2893
2894int
2895sbd_set_global_props(sbd_global_props_t *mlu, int struct_sz,
2896    uint32_t *err_ret)
2897{
2898	sbd_lu_t *sl = NULL;
2899	int ret = 0;
2900	sbd_it_data_t *it;
2901	uint32_t sz;
2902
2903	sz = struct_sz - sizeof (*mlu) + 8 + 1;
2904
2905	/* if there is data in the buf, null terminate it */
2906	if (struct_sz > sizeof (*mlu)) {
2907		mlu->mlu_buf[struct_sz - sizeof (*mlu) + 8 - 1] = 0;
2908	}
2909
2910	*err_ret = 0;
2911
2912	/* Lets validate offsets */
2913	if (((mlu->mlu_mgmt_url_valid) &&
2914	    (mlu->mlu_mgmt_url_off >= sz))) {
2915		return (EINVAL);
2916	}
2917
2918	if (mlu->mlu_mgmt_url_valid) {
2919		uint16_t url_sz;
2920
2921		url_sz = strlen((char *)mlu->mlu_buf + mlu->mlu_mgmt_url_off);
2922		if (url_sz > 0)
2923			url_sz++;
2924
2925		rw_enter(&sbd_global_prop_lock, RW_WRITER);
2926		if (sbd_mgmt_url_alloc_size > 0 &&
2927		    (url_sz == 0 || sbd_mgmt_url_alloc_size < url_sz)) {
2928			kmem_free(sbd_mgmt_url, sbd_mgmt_url_alloc_size);
2929			sbd_mgmt_url = NULL;
2930			sbd_mgmt_url_alloc_size = 0;
2931		}
2932		if (url_sz > 0) {
2933			if (sbd_mgmt_url_alloc_size == 0) {
2934				sbd_mgmt_url = kmem_alloc(url_sz, KM_SLEEP);
2935				sbd_mgmt_url_alloc_size = url_sz;
2936			}
2937			(void) strcpy(sbd_mgmt_url, (char *)mlu->mlu_buf +
2938			    mlu->mlu_mgmt_url_off);
2939		}
2940		/*
2941		 * check each lu to determine whether a UA is needed.
2942		 */
2943		mutex_enter(&sbd_lock);
2944		for (sl = sbd_lu_list; sl; sl = sl->sl_next) {
2945			if (sl->sl_mgmt_url) {
2946				continue;
2947			}
2948			mutex_enter(&sl->sl_lock);
2949			for (it = sl->sl_it_list; it != NULL;
2950			    it = it->sbd_it_next) {
2951				it->sbd_it_ua_conditions |=
2952				    SBD_UA_MODE_PARAMETERS_CHANGED;
2953			}
2954			mutex_exit(&sl->sl_lock);
2955		}
2956		mutex_exit(&sbd_lock);
2957		rw_exit(&sbd_global_prop_lock);
2958	}
2959	return (ret);
2960}
2961
2962/* ARGSUSED */
2963int
2964sbd_delete_locked_lu(sbd_lu_t *sl, uint32_t *err_ret,
2965    stmf_state_change_info_t *ssi)
2966{
2967	int i;
2968	stmf_status_t ret;
2969
2970	if ((sl->sl_state == STMF_STATE_OFFLINE) &&
2971	    !sl->sl_state_not_acked) {
2972		goto sdl_do_dereg;
2973	}
2974
2975	if ((sl->sl_state != STMF_STATE_ONLINE) ||
2976	    sl->sl_state_not_acked) {
2977		return (EBUSY);
2978	}
2979
2980	ret = stmf_ctl(STMF_CMD_LU_OFFLINE, sl->sl_lu, ssi);
2981	if ((ret != STMF_SUCCESS) && (ret != STMF_ALREADY)) {
2982		return (EBUSY);
2983	}
2984
2985	for (i = 0; i < 500; i++) {
2986		if ((sl->sl_state == STMF_STATE_OFFLINE) &&
2987		    !sl->sl_state_not_acked) {
2988			goto sdl_do_dereg;
2989		}
2990		delay(drv_usectohz(10000));
2991	}
2992	return (EBUSY);
2993
2994sdl_do_dereg:;
2995	if (stmf_deregister_lu(sl->sl_lu) != STMF_SUCCESS)
2996		return (EBUSY);
2997	atomic_dec_32(&sbd_lu_count);
2998
2999	return (sbd_close_delete_lu(sl, 0));
3000}
3001
3002int
3003sbd_delete_lu(sbd_delete_lu_t *dlu, int struct_sz, uint32_t *err_ret)
3004{
3005	sbd_lu_t *sl;
3006	sbd_status_t sret;
3007	stmf_state_change_info_t ssi;
3008	int ret;
3009
3010	if (dlu->dlu_by_meta_name) {
3011		((char *)dlu)[struct_sz - 1] = 0;
3012		sret = sbd_find_and_lock_lu(NULL, dlu->dlu_meta_name,
3013		    SL_OP_DELETE_LU, &sl);
3014	} else {
3015		sret = sbd_find_and_lock_lu(dlu->dlu_guid, NULL,
3016		    SL_OP_DELETE_LU, &sl);
3017	}
3018	if (sret != SBD_SUCCESS) {
3019		if (sret == SBD_BUSY) {
3020			*err_ret = SBD_RET_LU_BUSY;
3021			return (EBUSY);
3022		} else if (sret == SBD_NOT_FOUND) {
3023			*err_ret = SBD_RET_NOT_FOUND;
3024			return (ENOENT);
3025		}
3026		return (EIO);
3027	}
3028
3029	ssi.st_rflags = STMF_RFLAG_USER_REQUEST;
3030	ssi.st_additional_info = "sbd_delete_lu call (ioctl)";
3031	ret = sbd_delete_locked_lu(sl, err_ret, &ssi);
3032
3033	if (ret) {
3034		/* Once its locked, no need to grab mutex again */
3035		sl->sl_trans_op = SL_OP_NONE;
3036	}
3037	return (ret);
3038}
3039
3040sbd_status_t
3041sbd_data_read(sbd_lu_t *sl, struct scsi_task *task,
3042    uint64_t offset, uint64_t size, uint8_t *buf)
3043{
3044	int ret, ioflag = 0;
3045	long resid;
3046	hrtime_t xfer_start;
3047	uint8_t op = task->task_cdb[0];
3048
3049	if ((offset + size) > sl->sl_lu_size) {
3050		return (SBD_IO_PAST_EOF);
3051	}
3052
3053	offset += sl->sl_data_offset;
3054
3055	/*
3056	 * Check to see if the command is READ(10), READ(12), or READ(16).
3057	 * If it is then check for bit 3 being set to indicate if Forced
3058	 * Unit Access is being requested. If so, the FSYNC flag will be set
3059	 * on the read.
3060	 */
3061	if (((op == SCMD_READ_G1) || (op == SCMD_READ_G4) ||
3062	    (op == SCMD_READ_G5)) && (task->task_cdb[1] & BIT_3)) {
3063		ioflag = FSYNC;
3064	}
3065	if ((offset + size) > sl->sl_data_readable_size) {
3066		uint64_t store_end;
3067		if (offset > sl->sl_data_readable_size) {
3068			bzero(buf, size);
3069			return (SBD_SUCCESS);
3070		}
3071		store_end = sl->sl_data_readable_size - offset;
3072		bzero(buf + store_end, size - store_end);
3073		size = store_end;
3074	}
3075
3076	xfer_start = gethrtime();
3077	DTRACE_PROBE5(backing__store__read__start, sbd_lu_t *, sl,
3078	    uint8_t *, buf, uint64_t, size, uint64_t, offset,
3079	    scsi_task_t *, task);
3080
3081	/*
3082	 * Don't proceed if the device has been closed
3083	 * This can occur on an access state change to standby or
3084	 * a delete. The writer lock is acquired before closing the
3085	 * lu.
3086	 */
3087	rw_enter(&sl->sl_access_state_lock, RW_READER);
3088	if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3089		rw_exit(&sl->sl_access_state_lock);
3090		return (SBD_FAILURE);
3091	}
3092
3093	ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
3094	    (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
3095	    &resid);
3096	rw_exit(&sl->sl_access_state_lock);
3097
3098	stmf_lu_xfer_done(task, B_TRUE /* read */,
3099	    (gethrtime() - xfer_start));
3100	DTRACE_PROBE6(backing__store__read__end, sbd_lu_t *, sl,
3101	    uint8_t *, buf, uint64_t, size, uint64_t, offset,
3102	    int, ret, scsi_task_t *, task);
3103
3104over_sl_data_read:
3105	if (ret || resid) {
3106		stmf_trace(0, "UIO_READ failed, ret = %d, resid = %d", ret,
3107		    resid);
3108		return (SBD_FAILURE);
3109	}
3110
3111	return (SBD_SUCCESS);
3112}
3113
3114sbd_status_t
3115sbd_data_write(sbd_lu_t *sl, struct scsi_task *task,
3116    uint64_t offset, uint64_t size, uint8_t *buf)
3117{
3118	int ret;
3119	long resid;
3120	sbd_status_t sret = SBD_SUCCESS;
3121	int ioflag;
3122	hrtime_t xfer_start;
3123	uint8_t op = task->task_cdb[0];
3124	boolean_t fua_bit = B_FALSE;
3125
3126	if ((offset + size) > sl->sl_lu_size) {
3127		return (SBD_IO_PAST_EOF);
3128	}
3129
3130	offset += sl->sl_data_offset;
3131
3132	/*
3133	 * Check to see if the command is WRITE(10), WRITE(12), or WRITE(16).
3134	 * If it is then check for bit 3 being set to indicate if Forced
3135	 * Unit Access is being requested. If so, the FSYNC flag will be set
3136	 * on the write.
3137	 */
3138	if (((op == SCMD_WRITE_G1) || (op == SCMD_WRITE_G4) ||
3139	    (op == SCMD_WRITE_G5)) && (task->task_cdb[1] & BIT_3)) {
3140		fua_bit = B_TRUE;
3141	}
3142	if (((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
3143	    (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) || fua_bit) {
3144		ioflag = FSYNC;
3145	} else {
3146		ioflag = 0;
3147	}
3148
3149	xfer_start = gethrtime();
3150	DTRACE_PROBE5(backing__store__write__start, sbd_lu_t *, sl,
3151	    uint8_t *, buf, uint64_t, size, uint64_t, offset,
3152	    scsi_task_t *, task);
3153
3154	/*
3155	 * Don't proceed if the device has been closed
3156	 * This can occur on an access state change to standby or
3157	 * a delete. The writer lock is acquired before closing the
3158	 * lu.
3159	 */
3160	rw_enter(&sl->sl_access_state_lock, RW_READER);
3161	if ((sl->sl_flags & SL_MEDIA_LOADED) == 0) {
3162		rw_exit(&sl->sl_access_state_lock);
3163		return (SBD_FAILURE);
3164	}
3165	ret = vn_rdwr(UIO_WRITE, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
3166	    (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
3167	    &resid);
3168	rw_exit(&sl->sl_access_state_lock);
3169
3170	stmf_lu_xfer_done(task, B_FALSE /* write */,
3171	    (gethrtime() - xfer_start));
3172	DTRACE_PROBE6(backing__store__write__end, sbd_lu_t *, sl,
3173	    uint8_t *, buf, uint64_t, size, uint64_t, offset,
3174	    int, ret, scsi_task_t *, task);
3175
3176	if ((ret == 0) && (resid == 0) &&
3177	    (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
3178	    (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
3179		sret = sbd_flush_data_cache(sl, 1);
3180	}
3181over_sl_data_write:
3182	if ((ret || resid) || (sret != SBD_SUCCESS)) {
3183		return (SBD_FAILURE);
3184	} else if ((offset + size) > sl->sl_data_readable_size) {
3185		uint64_t old_size, new_size;
3186
3187		do {
3188			old_size = sl->sl_data_readable_size;
3189			if ((offset + size) <= old_size)
3190				break;
3191			new_size = offset + size;
3192		} while (atomic_cas_64(&sl->sl_data_readable_size, old_size,
3193		    new_size) != old_size);
3194	}
3195
3196	return (SBD_SUCCESS);
3197}
3198
3199int
3200sbd_get_global_props(sbd_global_props_t *oslp, uint32_t oslp_sz,
3201    uint32_t *err_ret)
3202{
3203	uint32_t sz = 0;
3204	uint16_t off;
3205
3206	rw_enter(&sbd_global_prop_lock, RW_READER);
3207	if (sbd_mgmt_url) {
3208		sz += strlen(sbd_mgmt_url) + 1;
3209	}
3210	bzero(oslp, sizeof (*oslp) - 8);
3211	oslp->mlu_buf_size_needed = sz;
3212
3213	if (sz > (oslp_sz - sizeof (*oslp) + 8)) {
3214		*err_ret = SBD_RET_INSUFFICIENT_BUF_SPACE;
3215		rw_exit(&sbd_global_prop_lock);
3216		return (ENOMEM);
3217	}
3218
3219	off = 0;
3220	if (sbd_mgmt_url) {
3221		oslp->mlu_mgmt_url_valid = 1;
3222		oslp->mlu_mgmt_url_off = off;
3223		(void) strcpy((char *)&oslp->mlu_buf[off], sbd_mgmt_url);
3224		off += strlen(sbd_mgmt_url) + 1;
3225	}
3226
3227	rw_exit(&sbd_global_prop_lock);
3228	return (0);
3229}
3230
3231static int
3232sbd_get_unmap_props(sbd_unmap_props_t *sup,
3233    sbd_unmap_props_t *osup, uint32_t *err_ret)
3234{
3235	sbd_status_t sret;
3236	sbd_lu_t *sl = NULL;
3237
3238	if (sup->sup_guid_valid) {
3239		sret = sbd_find_and_lock_lu(sup->sup_guid,
3240		    NULL, SL_OP_LU_PROPS, &sl);
3241	} else {
3242		sret = sbd_find_and_lock_lu(NULL,
3243		    (uint8_t *)sup->sup_zvol_path, SL_OP_LU_PROPS,
3244		    &sl);
3245	}
3246	if (sret != SBD_SUCCESS) {
3247		if (sret == SBD_BUSY) {
3248			*err_ret = SBD_RET_LU_BUSY;
3249			return (EBUSY);
3250		} else if (sret == SBD_NOT_FOUND) {
3251			*err_ret = SBD_RET_NOT_FOUND;
3252			return (ENOENT);
3253		}
3254		return (EIO);
3255	}
3256
3257	sup->sup_found_lu = 1;
3258	sup->sup_guid_valid = 1;
3259	bcopy(sl->sl_device_id + 4, sup->sup_guid, 16);
3260	if (sl->sl_flags & SL_UNMAP_ENABLED)
3261		sup->sup_unmap_enabled = 1;
3262	else
3263		sup->sup_unmap_enabled = 0;
3264
3265	*osup = *sup;
3266	sl->sl_trans_op = SL_OP_NONE;
3267
3268	return (0);
3269}
3270
3271int
3272sbd_get_lu_props(sbd_lu_props_t *islp, uint32_t islp_sz,
3273    sbd_lu_props_t *oslp, uint32_t oslp_sz, uint32_t *err_ret)
3274{
3275	sbd_status_t sret;
3276	sbd_lu_t *sl = NULL;
3277	uint32_t sz;
3278	uint16_t off;
3279
3280	if (islp->slp_input_guid) {
3281		sret = sbd_find_and_lock_lu(islp->slp_guid, NULL,
3282		    SL_OP_LU_PROPS, &sl);
3283	} else {
3284		((char *)islp)[islp_sz - 1] = 0;
3285		sret = sbd_find_and_lock_lu(NULL, islp->slp_buf,
3286		    SL_OP_LU_PROPS, &sl);
3287	}
3288	if (sret != SBD_SUCCESS) {
3289		if (sret == SBD_BUSY) {
3290			*err_ret = SBD_RET_LU_BUSY;
3291			return (EBUSY);
3292		} else if (sret == SBD_NOT_FOUND) {
3293			*err_ret = SBD_RET_NOT_FOUND;
3294			return (ENOENT);
3295		}
3296		return (EIO);
3297	}
3298
3299	sz = strlen(sl->sl_name) + 1;
3300	if ((sl->sl_flags & (SL_ZFS_META | SL_SHARED_META)) == 0) {
3301		if (sl->sl_data_filename) {
3302			sz += strlen(sl->sl_data_filename) + 1;
3303		}
3304	}
3305	sz += sl->sl_serial_no_size;
3306	if (sl->sl_alias) {
3307		sz += strlen(sl->sl_alias) + 1;
3308	}
3309
3310	rw_enter(&sbd_global_prop_lock, RW_READER);
3311	if (sl->sl_mgmt_url) {
3312		sz += strlen(sl->sl_mgmt_url) + 1;
3313	} else if (sbd_mgmt_url) {
3314		sz += strlen(sbd_mgmt_url) + 1;
3315	}
3316	bzero(oslp, sizeof (*oslp) - 8);
3317	oslp->slp_buf_size_needed = sz;
3318
3319	if (sz > (oslp_sz - sizeof (*oslp) + 8)) {
3320		sl->sl_trans_op = SL_OP_NONE;
3321		*err_ret = SBD_RET_INSUFFICIENT_BUF_SPACE;
3322		rw_exit(&sbd_global_prop_lock);
3323		return (ENOMEM);
3324	}
3325
3326	off = 0;
3327	(void) strcpy((char *)oslp->slp_buf, sl->sl_name);
3328	oslp->slp_meta_fname_off = off;
3329	off += strlen(sl->sl_name) + 1;
3330	if ((sl->sl_flags & (SL_ZFS_META | SL_SHARED_META)) == 0) {
3331		oslp->slp_meta_fname_valid = 1;
3332		oslp->slp_separate_meta = 1;
3333		if (sl->sl_data_filename) {
3334			oslp->slp_data_fname_valid = 1;
3335			oslp->slp_data_fname_off = off;
3336			(void) strcpy((char *)&oslp->slp_buf[off],
3337			    sl->sl_data_filename);
3338			off += strlen(sl->sl_data_filename) + 1;
3339		}
3340	} else {
3341		oslp->slp_data_fname_valid = 1;
3342		oslp->slp_data_fname_off = oslp->slp_meta_fname_off;
3343		if (sl->sl_flags & SL_ZFS_META) {
3344			oslp->slp_zfs_meta = 1;
3345		}
3346	}
3347	if (sl->sl_alias) {
3348		oslp->slp_alias_valid = 1;
3349		oslp->slp_alias_off = off;
3350		(void) strcpy((char *)&oslp->slp_buf[off], sl->sl_alias);
3351		off += strlen(sl->sl_alias) + 1;
3352	}
3353	if (sl->sl_mgmt_url) {
3354		oslp->slp_mgmt_url_valid = 1;
3355		oslp->slp_mgmt_url_off = off;
3356		(void) strcpy((char *)&oslp->slp_buf[off], sl->sl_mgmt_url);
3357		off += strlen(sl->sl_mgmt_url) + 1;
3358	} else if (sbd_mgmt_url) {
3359		oslp->slp_mgmt_url_valid = 1;
3360		oslp->slp_mgmt_url_off = off;
3361		(void) strcpy((char *)&oslp->slp_buf[off], sbd_mgmt_url);
3362		off += strlen(sbd_mgmt_url) + 1;
3363	}
3364	if (sl->sl_serial_no_size) {
3365		oslp->slp_serial_off = off;
3366		bcopy(sl->sl_serial_no, &oslp->slp_buf[off],
3367		    sl->sl_serial_no_size);
3368		oslp->slp_serial_size = sl->sl_serial_no_size;
3369		oslp->slp_serial_valid = 1;
3370		off += sl->sl_serial_no_size;
3371	}
3372
3373	oslp->slp_lu_size = sl->sl_lu_size;
3374	oslp->slp_blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
3375
3376	oslp->slp_access_state = sl->sl_access_state;
3377
3378	if (sl->sl_flags & SL_VID_VALID) {
3379		oslp->slp_lu_vid = 1;
3380		bcopy(sl->sl_vendor_id, oslp->slp_vid, 8);
3381	} else {
3382		bcopy(sbd_vendor_id, oslp->slp_vid, 8);
3383	}
3384	if (sl->sl_flags & SL_PID_VALID) {
3385		oslp->slp_lu_pid = 1;
3386		bcopy(sl->sl_product_id, oslp->slp_pid, 16);
3387	} else {
3388		bcopy(sbd_product_id, oslp->slp_pid, 16);
3389	}
3390	if (sl->sl_flags & SL_REV_VALID) {
3391		oslp->slp_lu_rev = 1;
3392		bcopy(sl->sl_revision, oslp->slp_rev, 4);
3393	} else {
3394		bcopy(sbd_revision, oslp->slp_rev, 4);
3395	}
3396	bcopy(sl->sl_device_id + 4, oslp->slp_guid, 16);
3397
3398	if (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE)
3399		oslp->slp_writeback_cache_disable_cur = 1;
3400	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE)
3401		oslp->slp_writeback_cache_disable_saved = 1;
3402	if (sl->sl_flags & SL_WRITE_PROTECTED)
3403		oslp->slp_write_protected = 1;
3404
3405	sl->sl_trans_op = SL_OP_NONE;
3406
3407	rw_exit(&sbd_global_prop_lock);
3408	return (0);
3409}
3410
3411/*
3412 * Returns an allocated string with the "<pool>/..." form of the zvol name.
3413 */
3414static char *
3415sbd_get_zvol_name(sbd_lu_t *sl)
3416{
3417	char *src;
3418	char *p;
3419
3420	if (sl->sl_data_filename)
3421		src = sl->sl_data_filename;
3422	else
3423		src = sl->sl_meta_filename;
3424	/* There has to be a better way */
3425	if (SBD_IS_ZVOL(src) != 0) {
3426		ASSERT(0);
3427	}
3428	src += 14;	/* Past /dev/zvol/dsk/ */
3429	if (*src == '/')
3430		src++;	/* or /dev/zvol/rdsk/ */
3431	p = (char *)kmem_alloc(strlen(src) + 1, KM_SLEEP);
3432	(void) strcpy(p, src);
3433	return (p);
3434}
3435
3436/*
3437 * this function creates a local metadata zvol property
3438 */
3439sbd_status_t
3440sbd_create_zfs_meta_object(sbd_lu_t *sl)
3441{
3442	/*
3443	 * -allocate 1/2 the property size, the zfs property
3444	 *  is 8k in size and stored as ascii hex string, all
3445	 *  we needed is 4k buffer to store the binary data.
3446	 * -initialize reader/write lock
3447	 */
3448	if ((sl->sl_zfs_meta = kmem_zalloc(ZAP_MAXVALUELEN / 2, KM_SLEEP))
3449	    == NULL)
3450		return (SBD_FAILURE);
3451	rw_init(&sl->sl_zfs_meta_lock, NULL, RW_DRIVER, NULL);
3452	return (SBD_SUCCESS);
3453}
3454
3455char
3456sbd_ctoi(char c)
3457{
3458	if ((c >= '0') && (c <= '9'))
3459		c -= '0';
3460	else if ((c >= 'A') && (c <= 'F'))
3461		c = c - 'A' + 10;
3462	else if ((c >= 'a') && (c <= 'f'))
3463		c = c - 'a' + 10;
3464	else
3465		c = -1;
3466	return (c);
3467}
3468
3469/*
3470 * read zvol property and convert to binary
3471 */
3472sbd_status_t
3473sbd_open_zfs_meta(sbd_lu_t *sl)
3474{
3475	char		*meta = NULL, cl, ch;
3476	int		i;
3477	char		*tmp, *ptr;
3478	uint64_t	rc = SBD_SUCCESS;
3479	int		len;
3480	char		*file;
3481
3482	if (sl->sl_zfs_meta == NULL) {
3483		if (sbd_create_zfs_meta_object(sl) == SBD_FAILURE)
3484			return (SBD_FAILURE);
3485	} else {
3486		bzero(sl->sl_zfs_meta, (ZAP_MAXVALUELEN / 2));
3487	}
3488
3489	rw_enter(&sl->sl_zfs_meta_lock, RW_WRITER);
3490	file = sbd_get_zvol_name(sl);
3491	if (sbd_zvolget(file, &meta)) {
3492		rc = SBD_FAILURE;
3493		goto done;
3494	}
3495	tmp = meta;
3496	/* convert ascii hex to binary meta */
3497	len = strlen(meta);
3498	ptr = sl->sl_zfs_meta;
3499	for (i = 0; i < len; i += 2) {
3500		ch = sbd_ctoi(*tmp++);
3501		cl = sbd_ctoi(*tmp++);
3502		if (ch == -1 || cl == -1) {
3503			rc = SBD_FAILURE;
3504			break;
3505		}
3506		*ptr++ = (ch << 4) + cl;
3507	}
3508done:
3509	rw_exit(&sl->sl_zfs_meta_lock);
3510	if (meta)
3511		kmem_free(meta, len + 1);
3512	kmem_free(file, strlen(file) + 1);
3513	return (rc);
3514}
3515
3516sbd_status_t
3517sbd_read_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz, uint64_t off)
3518{
3519	ASSERT(sl->sl_zfs_meta);
3520	rw_enter(&sl->sl_zfs_meta_lock, RW_READER);
3521	bcopy(&sl->sl_zfs_meta[off], buf, sz);
3522	rw_exit(&sl->sl_zfs_meta_lock);
3523	return (SBD_SUCCESS);
3524}
3525
3526sbd_status_t
3527sbd_write_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz, uint64_t off)
3528{
3529	ASSERT(sl->sl_zfs_meta);
3530	if ((off + sz) > (ZAP_MAXVALUELEN / 2 - 1)) {
3531		return (SBD_META_CORRUPTED);
3532	}
3533	if ((off + sz) > sl->sl_meta_size_used) {
3534		sl->sl_meta_size_used = off + sz;
3535		if (sl->sl_total_meta_size < sl->sl_meta_size_used) {
3536			uint64_t meta_align =
3537			    (((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1;
3538			sl->sl_total_meta_size = (sl->sl_meta_size_used +
3539			    meta_align) & (~meta_align);
3540		}
3541	}
3542	rw_enter(&sl->sl_zfs_meta_lock, RW_WRITER);
3543	bcopy(buf, &sl->sl_zfs_meta[off], sz);
3544	rw_exit(&sl->sl_zfs_meta_lock);
3545	/*
3546	 * During creation of a logical unit, sbd_update_zfs_prop will be
3547	 * called separately to avoid multiple calls as each meta section
3548	 * create/update will result in a call to sbd_write_zfs_meta().
3549	 * We only need to update the zvol once during create.
3550	 */
3551	mutex_enter(&sl->sl_lock);
3552	if (sl->sl_trans_op != SL_OP_CREATE_REGISTER_LU) {
3553		mutex_exit(&sl->sl_lock);
3554		return (sbd_update_zfs_prop(sl));
3555	}
3556	mutex_exit(&sl->sl_lock);
3557	return (SBD_SUCCESS);
3558}
3559
3560sbd_status_t
3561sbd_update_zfs_prop(sbd_lu_t *sl)
3562{
3563	char	*ptr, *ah_meta;
3564	char	*dp = NULL;
3565	int	i, num;
3566	char	*file;
3567	sbd_status_t ret = SBD_SUCCESS;
3568
3569	ASSERT(sl->sl_zfs_meta);
3570	ptr = ah_meta = kmem_zalloc(ZAP_MAXVALUELEN, KM_SLEEP);
3571	rw_enter(&sl->sl_zfs_meta_lock, RW_READER);
3572	/* convert local copy to ascii hex */
3573	dp = sl->sl_zfs_meta;
3574	for (i = 0; i < sl->sl_total_meta_size; i++, dp++) {
3575		num = ((*dp) >> 4) & 0xF;
3576		*ah_meta++ = (num < 10) ? (num + '0') : (num + ('a' - 10));
3577		num = (*dp) & 0xF;
3578		*ah_meta++ = (num < 10) ? (num + '0') : (num + ('a' - 10));
3579	}
3580	*ah_meta = '\0';
3581	file = sbd_get_zvol_name(sl);
3582	if (sbd_zvolset(file, (char *)ptr)) {
3583		ret = SBD_META_CORRUPTED;
3584	}
3585	rw_exit(&sl->sl_zfs_meta_lock);
3586	kmem_free(ptr, ZAP_MAXVALUELEN);
3587	kmem_free(file, strlen(file) + 1);
3588	return (ret);
3589}
3590
3591int
3592sbd_is_zvol(char *path)
3593{
3594	int is_zfs = 0;
3595
3596	if (SBD_IS_ZVOL(path) == 0)
3597		is_zfs = 1;
3598
3599	return (is_zfs);
3600}
3601
3602/*
3603 * set write cache disable
3604 * wcd - 1 = disable, 0 = enable
3605 */
3606sbd_status_t
3607sbd_wcd_set(int wcd, sbd_lu_t *sl)
3608{
3609	/* translate to wce bit */
3610	int wce = wcd ? 0 : 1;
3611	int ret;
3612	sbd_status_t sret = SBD_SUCCESS;
3613
3614	mutex_enter(&sl->sl_lock);
3615	sl->sl_flags &= ~SL_WRITEBACK_CACHE_SET_UNSUPPORTED;
3616
3617	if (sl->sl_data_vp->v_type == VREG) {
3618		sl->sl_flags |= SL_FLUSH_ON_DISABLED_WRITECACHE;
3619		goto done;
3620	}
3621
3622	ret = VOP_IOCTL(sl->sl_data_vp, DKIOCSETWCE, (intptr_t)&wce, FKIOCTL,
3623	    kcred, NULL, NULL);
3624	if (ret == 0) {
3625		sl->sl_flags &= ~SL_WRITEBACK_CACHE_SET_UNSUPPORTED;
3626		sl->sl_flags &= ~SL_FLUSH_ON_DISABLED_WRITECACHE;
3627	} else {
3628		sl->sl_flags |= SL_WRITEBACK_CACHE_SET_UNSUPPORTED;
3629		sl->sl_flags |= SL_FLUSH_ON_DISABLED_WRITECACHE;
3630		sret = SBD_FAILURE;
3631		goto done;
3632	}
3633
3634done:
3635	mutex_exit(&sl->sl_lock);
3636	return (sret);
3637}
3638
3639/*
3640 * get write cache disable
3641 * wcd - 1 = disable, 0 = enable
3642 */
3643void
3644sbd_wcd_get(int *wcd, sbd_lu_t *sl)
3645{
3646	int wce;
3647	int ret;
3648
3649	if (sl->sl_data_vp->v_type == VREG) {
3650		*wcd = 0;
3651		return;
3652	}
3653
3654	ret = VOP_IOCTL(sl->sl_data_vp, DKIOCGETWCE, (intptr_t)&wce, FKIOCTL,
3655	    kcred, NULL, NULL);
3656	/* if write cache get failed, assume disabled */
3657	if (ret) {
3658		*wcd = 1;
3659	} else {
3660		/* translate to wcd bit */
3661		*wcd = wce ? 0 : 1;
3662	}
3663}
3664
3665int
3666sbd_zvolget(char *zvol_name, char **comstarprop)
3667{
3668	ldi_handle_t	zfs_lh;
3669	nvlist_t	*nv = NULL, *nv2;
3670	zfs_cmd_t	*zc;
3671	char		*ptr;
3672	int size = 1024;
3673	int unused;
3674	int rc;
3675
3676	if ((rc = ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
3677	    &zfs_lh, sbd_zfs_ident)) != 0) {
3678		cmn_err(CE_WARN, "ldi_open %d", rc);
3679		return (ENXIO);
3680	}
3681
3682	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3683	(void) strlcpy(zc->zc_name, zvol_name, sizeof (zc->zc_name));
3684again:
3685	zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(size,
3686	    KM_SLEEP);
3687	zc->zc_nvlist_dst_size = size;
3688	rc = ldi_ioctl(zfs_lh, ZFS_IOC_OBJSET_STATS, (intptr_t)zc,
3689	    FKIOCTL, kcred, &unused);
3690	/*
3691	 * ENOMEM means the list is larger than what we've allocated
3692	 * ldi_ioctl will fail with ENOMEM only once
3693	 */
3694	if (rc == ENOMEM) {
3695		int newsize;
3696		newsize = zc->zc_nvlist_dst_size;
3697		kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
3698		size = newsize;
3699		goto again;
3700	} else if (rc != 0) {
3701		goto out;
3702	}
3703	rc = nvlist_unpack((char *)(uintptr_t)zc->zc_nvlist_dst,
3704	    zc->zc_nvlist_dst_size, &nv, 0);
3705	ASSERT(rc == 0);	/* nvlist_unpack should not fail */
3706	if ((rc = nvlist_lookup_nvlist(nv, "stmf_sbd_lu", &nv2)) == 0) {
3707		rc = nvlist_lookup_string(nv2, ZPROP_VALUE, &ptr);
3708		if (rc != 0) {
3709			cmn_err(CE_WARN, "couldn't get value");
3710		} else {
3711			*comstarprop = kmem_alloc(strlen(ptr) + 1,
3712			    KM_SLEEP);
3713			(void) strcpy(*comstarprop, ptr);
3714		}
3715	}
3716out:
3717	if (nv != NULL)
3718		nvlist_free(nv);
3719	kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
3720	kmem_free(zc, sizeof (zfs_cmd_t));
3721	(void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
3722
3723	return (rc);
3724}
3725
3726int
3727sbd_zvolset(char *zvol_name, char *comstarprop)
3728{
3729	ldi_handle_t	zfs_lh;
3730	nvlist_t	*nv;
3731	char		*packed = NULL;
3732	size_t		len;
3733	zfs_cmd_t	*zc;
3734	int unused;
3735	int rc;
3736
3737	if ((rc = ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
3738	    &zfs_lh, sbd_zfs_ident)) != 0) {
3739		cmn_err(CE_WARN, "ldi_open %d", rc);
3740		return (ENXIO);
3741	}
3742	(void) nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP);
3743	(void) nvlist_add_string(nv, "stmf_sbd_lu", comstarprop);
3744	if ((rc = nvlist_pack(nv, &packed, &len, NV_ENCODE_NATIVE, KM_SLEEP))) {
3745		goto out;
3746	}
3747
3748	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3749	(void) strlcpy(zc->zc_name, zvol_name, sizeof (zc->zc_name));
3750	zc->zc_nvlist_src = (uint64_t)(intptr_t)packed;
3751	zc->zc_nvlist_src_size = len;
3752	rc = ldi_ioctl(zfs_lh, ZFS_IOC_SET_PROP, (intptr_t)zc,
3753	    FKIOCTL, kcred, &unused);
3754	if (rc != 0) {
3755		cmn_err(CE_NOTE, "ioctl failed %d", rc);
3756	}
3757	kmem_free(zc, sizeof (zfs_cmd_t));
3758	if (packed)
3759		kmem_free(packed, len);
3760out:
3761	nvlist_free(nv);
3762	(void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
3763	return (rc);
3764}
3765
3766/*
3767 * Unmap a region in a volume.  Currently only supported for zvols.
3768 * The list of extents to be freed is passed in a dkioc_free_list_t
3769 * which the caller is responsible for destroying.
3770 */
3771int
3772sbd_unmap(sbd_lu_t *sl, dkioc_free_list_t *dfl)
3773{
3774	vnode_t *vp;
3775	int unused, ret;
3776
3777	/* Nothing to do */
3778	if (dfl->dfl_num_exts == 0)
3779		return (0);
3780
3781	/*
3782	 * TODO: unmap performance may be improved by not doing the synchronous
3783	 * removal of the blocks and writing of the metadata.  The
3784	 * transaction is in the zil so the state should be stable.
3785	 */
3786	dfl->dfl_flags = (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) ?
3787	    DF_WAIT_SYNC : 0;
3788
3789	/* Use the data vnode we have to send a fop_ioctl(). */
3790	vp = sl->sl_data_vp;
3791	if (vp == NULL) {
3792		cmn_err(CE_WARN, "Cannot unmap - no vnode pointer.");
3793		return (EIO);
3794	}
3795
3796	ret = VOP_IOCTL(vp, DKIOCFREE, (intptr_t)dfl, FKIOCTL, kcred,
3797	    &unused, NULL);
3798
3799	return (ret);
3800}
3801
3802/*
3803 * Check if this lu belongs to sbd or some other lu
3804 * provider. A simple check for one of the module
3805 * entry points is sufficient.
3806 */
3807int
3808sbd_is_valid_lu(stmf_lu_t *lu)
3809{
3810	if (lu->lu_new_task == sbd_new_task)
3811		return (1);
3812	return (0);
3813}
3814
3815uint8_t
3816sbd_get_lbasize_shift(stmf_lu_t *lu)
3817{
3818	sbd_lu_t *sl = (sbd_lu_t *)lu->lu_provider_private;
3819
3820	return (sl->sl_data_blocksize_shift);
3821}
3822