1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25/*
26 * Copyright (c) 2017 by Delphix. All rights reserved.
27 */
28
29#include <sys/types.h>
30#include <sys/t_lock.h>
31#include <sys/param.h>
32#include <sys/conf.h>
33#include <sys/systm.h>
34#include <sys/sysmacros.h>
35#include <sys/buf.h>
36#include <sys/cred.h>
37#include <sys/user.h>
38#include <sys/stat.h>
39#include <sys/uio.h>
40#include <sys/vnode.h>
41#include <sys/fs/snode.h>
42#include <sys/open.h>
43#include <sys/kmem.h>
44#include <sys/file.h>
45#include <sys/debug.h>
46#include <sys/tnf_probe.h>
47
48/* Don't #include <sys/ddi.h> - it #undef's getmajor() */
49
50#include <sys/sunddi.h>
51#include <sys/sunndi.h>
52#include <sys/sunpm.h>
53#include <sys/ddi_impldefs.h>
54#include <sys/ndi_impldefs.h>
55#include <sys/esunddi.h>
56#include <sys/autoconf.h>
57#include <sys/modctl.h>
58#include <sys/epm.h>
59#include <sys/dacf.h>
60#include <sys/sunmdi.h>
61#include <sys/instance.h>
62#include <sys/sdt.h>
63
64static void i_attach_ctlop(dev_info_t *, ddi_attach_cmd_t, ddi_pre_post_t, int);
65static void i_detach_ctlop(dev_info_t *, ddi_detach_cmd_t, ddi_pre_post_t, int);
66
67/* decide what to do when a double dev_lclose is detected */
68#ifdef	DEBUG
69int		dev_lclose_ce = CE_PANIC;
70#else	/* DEBUG */
71int		dev_lclose_ce = CE_WARN;
72#endif	/* DEBUG */
73
74/*
75 * Configuration-related entry points for nexus and leaf drivers
76 */
77int
78devi_identify(dev_info_t *devi)
79{
80	struct dev_ops *ops;
81	int (*fn)(dev_info_t *);
82
83	if ((ops = ddi_get_driver(devi)) == NULL ||
84	    (fn = ops->devo_identify) == NULL)
85		return (-1);
86
87	return ((*fn)(devi));
88}
89
90int
91devi_probe(dev_info_t *devi)
92{
93	int rv, probe_failed;
94	pm_ppm_cookie_t ppm_cookie;
95	struct dev_ops *ops;
96	int (*fn)(dev_info_t *);
97
98	ops = ddi_get_driver(devi);
99	ASSERT(ops);
100
101	pm_pre_probe(devi, &ppm_cookie);
102
103	/*
104	 * probe(9E) in 2.0 implies that you can get
105	 * away with not writing one of these .. so we
106	 * pretend we're 'nulldev' if we don't find one (sigh).
107	 */
108	if ((fn = ops->devo_probe) == NULL) {
109		if (ddi_dev_is_sid(devi) == DDI_SUCCESS)
110			rv = DDI_PROBE_DONTCARE;
111		else
112			rv = DDI_PROBE_FAILURE;
113	} else
114		rv = (*fn)(devi);
115
116	switch (rv) {
117	case DDI_PROBE_DONTCARE:
118	case DDI_PROBE_SUCCESS:
119		probe_failed = 0;
120		break;
121	default:
122		probe_failed = 1;
123		break;
124	}
125	pm_post_probe(&ppm_cookie, rv, probe_failed);
126
127	return (rv);
128}
129
130
131/*
132 * devi_attach()
133 * 	attach a device instance to the system if the driver supplies an
134 * 	attach(9E) entrypoint.
135 */
136int
137devi_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
138{
139	struct dev_ops *ops;
140	int error;
141	int (*fn)(dev_info_t *, ddi_attach_cmd_t);
142	pm_ppm_cookie_t pc;
143
144	if ((error = mdi_pre_attach(devi, cmd)) != DDI_SUCCESS) {
145		return (error);
146	}
147
148	pm_pre_attach(devi, &pc, cmd);
149
150	if ((cmd == DDI_RESUME || cmd == DDI_PM_RESUME) &&
151	    e_ddi_parental_suspend_resume(devi)) {
152		error = e_ddi_resume(devi, cmd);
153		goto done;
154	}
155	ops = ddi_get_driver(devi);
156	ASSERT(ops);
157	if ((fn = ops->devo_attach) == NULL) {
158		error = DDI_FAILURE;
159		goto done;
160	}
161
162	/*
163	 * Call the driver's attach(9e) entrypoint
164	 */
165	i_attach_ctlop(devi, cmd, DDI_PRE, 0);
166	error = (*fn)(devi, cmd);
167	i_attach_ctlop(devi, cmd, DDI_POST, error);
168
169done:
170	pm_post_attach(&pc, error);
171	mdi_post_attach(devi, cmd, error);
172
173	return (error);
174}
175
176/*
177 * devi_detach()
178 * 	detach a device instance from the system if the driver supplies a
179 * 	detach(9E) entrypoint.
180 */
181int
182devi_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
183{
184	struct dev_ops *ops;
185	int error;
186	int (*fn)(dev_info_t *, ddi_detach_cmd_t);
187	pm_ppm_cookie_t pc;
188
189	ASSERT(cmd == DDI_SUSPEND || cmd == DDI_PM_SUSPEND ||
190	    cmd == DDI_DETACH);
191
192	if ((cmd == DDI_SUSPEND || cmd == DDI_PM_SUSPEND) &&
193	    e_ddi_parental_suspend_resume(devi)) {
194		return (e_ddi_suspend(devi, cmd));
195	}
196	ops = ddi_get_driver(devi);
197	ASSERT(ops);
198	if ((fn = ops->devo_detach) == NULL)
199		return (DDI_FAILURE);
200
201	if ((error = mdi_pre_detach(devi, cmd)) != DDI_SUCCESS) {
202		return (error);
203	}
204	i_detach_ctlop(devi, cmd, DDI_PRE, 0);
205	pm_pre_detach(devi, cmd, &pc);
206
207	/*
208	 * Call the driver's detach routine
209	 */
210	error = (*fn)(devi, cmd);
211
212	pm_post_detach(&pc, error);
213	i_detach_ctlop(devi, cmd, DDI_POST, error);
214	mdi_post_detach(devi, cmd, error);
215
216	return (error);
217}
218
219static void
220i_attach_ctlop(dev_info_t *devi, ddi_attach_cmd_t cmd, ddi_pre_post_t w,
221    int ret)
222{
223	int error;
224	struct attachspec as;
225	dev_info_t *pdip = ddi_get_parent(devi);
226
227	as.cmd = cmd;
228	as.when = w;
229	as.pdip = pdip;
230	as.result = ret;
231	(void) ddi_ctlops(devi, devi, DDI_CTLOPS_ATTACH, &as, &error);
232}
233
234static void
235i_detach_ctlop(dev_info_t *devi, ddi_detach_cmd_t cmd, ddi_pre_post_t w,
236    int ret)
237{
238	int error;
239	struct detachspec ds;
240	dev_info_t *pdip = ddi_get_parent(devi);
241
242	ds.cmd = cmd;
243	ds.when = w;
244	ds.pdip = pdip;
245	ds.result = ret;
246	(void) ddi_ctlops(devi, devi, DDI_CTLOPS_DETACH, &ds, &error);
247}
248
249/*
250 * This entry point not defined by Solaris 2.0 DDI/DKI, so
251 * its inclusion here is somewhat moot.
252 */
253int
254devi_reset(dev_info_t *devi, ddi_reset_cmd_t cmd)
255{
256	struct dev_ops *ops;
257	int (*fn)(dev_info_t *, ddi_reset_cmd_t);
258
259	if ((ops = ddi_get_driver(devi)) == NULL ||
260	    (fn = ops->devo_reset) == NULL)
261		return (DDI_FAILURE);
262
263	return ((*fn)(devi, cmd));
264}
265
266int
267devi_quiesce(dev_info_t *devi)
268{
269	struct dev_ops *ops;
270	int (*fn)(dev_info_t *);
271
272	if (((ops = ddi_get_driver(devi)) == NULL) ||
273	    (ops->devo_rev < 4) || ((fn = ops->devo_quiesce) == NULL))
274		return (DDI_FAILURE);
275
276	return ((*fn)(devi));
277}
278
279/*
280 * Leaf driver entry points. The following [cb]dev_* functions are *not* part
281 * of the DDI, please use functions defined in <sys/sunldi.h> and driver_lyr.c.
282 */
283int
284dev_open(dev_t *devp, int flag, int type, struct cred *cred)
285{
286	struct cb_ops   *cb;
287
288	cb = devopsp[getmajor(*devp)]->devo_cb_ops;
289	return ((*cb->cb_open)(devp, flag, type, cred));
290}
291
292int
293dev_close(dev_t dev, int flag, int type, struct cred *cred)
294{
295	struct cb_ops   *cb;
296
297	cb = (devopsp[getmajor(dev)])->devo_cb_ops;
298	return ((*cb->cb_close)(dev, flag, type, cred));
299}
300
301/*
302 * New Leaf driver open entry point.  We make a vnode and go through specfs
303 * in order to obtain open close exclusions guarantees.  Note that we drop
304 * OTYP_LYR if it was specified - we are going through specfs and it provides
305 * last close semantics (FKLYR is provided to open(9E)).  Also, since
306 * spec_open will drive attach via e_ddi_hold_devi_by_dev for a makespecvp
307 * vnode with no SDIP_SET on the common snode, the dev_lopen caller no longer
308 * needs to call ddi_hold_installed_driver.
309 */
310int
311dev_lopen(dev_t *devp, int flag, int otype, struct cred *cred)
312{
313	struct vnode	*vp;
314	int		error;
315	struct vnode	*cvp;
316
317	vp = makespecvp(*devp, (otype == OTYP_BLK) ? VBLK : VCHR);
318	error = VOP_OPEN(&vp, flag | FKLYR, cred, NULL);
319	if (error == 0) {
320		/* Pick up the (possibly) new dev_t value. */
321		*devp = vp->v_rdev;
322
323		/*
324		 * Place extra hold on the common vnode, which contains the
325		 * open count, so that it is not destroyed by the VN_RELE of
326		 * the shadow makespecvp vnode below.
327		 */
328		cvp = STOV(VTOCS(vp));
329		VN_HOLD(cvp);
330	}
331
332	/* release the shadow makespecvp vnode. */
333	VN_RELE(vp);
334	return (error);
335}
336
337/*
338 * Leaf driver close entry point.  We make a vnode and go through specfs in
339 * order to obtain open close exclusions guarantees.  Note that we drop
340 * OTYP_LYR if it was specified - we are going through specfs and it provides
341 * last close semantics (FLKYR is provided to close(9E)).
342 */
343int
344dev_lclose(dev_t dev, int flag, int otype, struct cred *cred)
345{
346	struct vnode	*vp;
347	int		error;
348	struct vnode	*cvp;
349	char		*funcname;
350	ulong_t		offset;
351
352	vp = makespecvp(dev, (otype == OTYP_BLK) ? VBLK : VCHR);
353	error = VOP_CLOSE(vp, flag | FKLYR, 1, (offset_t)0, cred, NULL);
354
355	/*
356	 * Release the extra dev_lopen hold on the common vnode. We inline a
357	 * VN_RELE(cvp) call so that we can detect more dev_lclose calls than
358	 * dev_lopen calls without panic. See vn_rele.  If our inline of
359	 * vn_rele called VOP_INACTIVE(cvp, CRED(), ...) we would panic on the
360	 * "release the makespecvp vnode" VN_RELE(vp) that follows  - so
361	 * instead we diagnose this situation.  Note that the driver has
362	 * still seen a double close(9E), but that would have occurred with
363	 * the old dev_close implementation too.
364	 */
365	cvp = STOV(VTOCS(vp));
366	mutex_enter(&cvp->v_lock);
367	switch (cvp->v_count) {
368	default:
369		VN_RELE_LOCKED(cvp);
370		break;
371
372	case 0:
373		VTOS(vp)->s_commonvp = NULL;	/* avoid panic */
374		/*FALLTHROUGH*/
375	case 1:
376		/*
377		 * The following message indicates a serious problem in the
378		 * identified driver, the driver should be fixed. If obtaining
379		 * a panic dump is needed to diagnose the driver problem then
380		 * adding "set dev_lclose_ce=3" to /etc/system will cause a
381		 * panic when this occurs.
382		 */
383		funcname = modgetsymname((uintptr_t)caller(), &offset);
384		cmn_err(dev_lclose_ce, "dev_lclose: extra close of dev_t 0x%lx "
385		    "from %s`%s()", dev, mod_containing_pc(caller()),
386		    funcname ? funcname : "unknown...");
387		break;
388	}
389	mutex_exit(&cvp->v_lock);
390
391	/* release the makespecvp vnode. */
392	VN_RELE(vp);
393	return (error);
394}
395
396/*
397 * Returns -1 or the instance number of the given dev_t as
398 * interpreted by the device driver.  The code may load the driver
399 * but it does not attach any instances.
400 *
401 * Instance is supposed to be a int but drivers have assumed that
402 * the pointer was a pointer to "void *" instead of a pointer to
403 * "int *" so we now explicitly pass a pointer to "void *" and then
404 * cast the result to an int when returning the value.
405 */
406int
407dev_to_instance(dev_t dev)
408{
409	major_t		major = getmajor(dev);
410	struct dev_ops	*ops;
411	void		*vinstance;
412	int		error;
413
414	/* verify that the driver is loaded */
415	if ((ops = mod_hold_dev_by_major(major)) == NULL)
416		return (-1);
417	ASSERT(CB_DRV_INSTALLED(ops));
418
419	/* verify that it supports the getinfo(9E) entry point */
420	if (ops->devo_getinfo == NULL) {
421		mod_rele_dev_by_major(major);
422		return (-1);
423	}
424
425	/* ask the driver to extract the instance number from the devt */
426	error = (*ops->devo_getinfo)(NULL, DDI_INFO_DEVT2INSTANCE,
427	    (void *)dev, &vinstance);
428
429	/* release the driver */
430	mod_rele_dev_by_major(major);
431
432	if (error != DDI_SUCCESS)
433		return (-1);
434
435	return ((int)(uintptr_t)vinstance);
436}
437
438static void
439bdev_strategy_tnf_probe(struct buf *bp)
440{
441	/* Kernel probe */
442	TNF_PROBE_5(strategy, "io blockio", /* CSTYLED */,
443	    tnf_device, device, bp->b_edev,
444	    tnf_diskaddr, block, bp->b_lblkno,
445	    tnf_size, size, bp->b_bcount,
446	    tnf_opaque, buf, bp,
447	    tnf_bioflags, flags, bp->b_flags);
448}
449
450int
451bdev_strategy(struct buf *bp)
452{
453	struct dev_ops *ops;
454
455	ops = devopsp[getmajor(bp->b_edev)];
456
457	/*
458	 * Before we hit the io:::start probe, we need to fill in the b_dip
459	 * field of the buf structure.  This should be -- for the most part --
460	 * incredibly cheap.  If you're in this code looking to bum cycles,
461	 * there is almost certainly bigger game further down the I/O path...
462	 */
463	(void) ops->devo_getinfo(NULL, DDI_INFO_DEVT2DEVINFO,
464	    (void *)bp->b_edev, (void **)&bp->b_dip);
465
466	DTRACE_IO1(start, struct buf *, bp);
467	bp->b_flags |= B_STARTED;
468
469	/*
470	 * Call the TNF probe here instead of the inline code
471	 * to force our compiler to use the tail call optimization.
472	 */
473	bdev_strategy_tnf_probe(bp);
474
475	return (ops->devo_cb_ops->cb_strategy(bp));
476}
477
478int
479bdev_print(dev_t dev, caddr_t str)
480{
481	struct cb_ops	*cb;
482
483	cb = devopsp[getmajor(dev)]->devo_cb_ops;
484	return ((*cb->cb_print)(dev, str));
485}
486
487/*
488 * Return number of DEV_BSIZE byte blocks.
489 */
490int
491bdev_size(dev_t dev)
492{
493	uint_t		nblocks;
494	uint_t		blksize;
495
496	if ((nblocks = e_ddi_getprop(dev, VBLK, "nblocks",
497	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1)) == -1)
498		return (-1);
499
500	/* Get blksize, default to DEV_BSIZE */
501	if ((blksize = e_ddi_getprop(dev, VBLK, "blksize",
502	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1)) == -1)
503		blksize = e_ddi_getprop(DDI_DEV_T_ANY, VBLK, "device-blksize",
504		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, DEV_BSIZE);
505
506	if (blksize >= DEV_BSIZE)
507		return (nblocks * (blksize / DEV_BSIZE));
508	else
509		return (nblocks / (DEV_BSIZE / blksize));
510}
511
512/*
513 * Same for 64-bit Nblocks property
514 */
515uint64_t
516bdev_Size(dev_t dev)
517{
518	uint64_t	nblocks;
519	uint_t		blksize;
520
521	if ((nblocks = e_ddi_getprop_int64(dev, VBLK, "Nblocks",
522	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1)) == -1)
523		return (-1);
524
525	/* Get blksize, default to DEV_BSIZE */
526	if ((blksize = e_ddi_getprop(dev, VBLK, "blksize",
527	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, -1)) == -1)
528		blksize = e_ddi_getprop(DDI_DEV_T_ANY, VBLK, "device-blksize",
529		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, DEV_BSIZE);
530
531	if (blksize >= DEV_BSIZE)
532		return (nblocks * (blksize / DEV_BSIZE));
533	else
534		return (nblocks / (DEV_BSIZE / blksize));
535}
536
537int
538bdev_dump(dev_t dev, caddr_t addr, daddr_t blkno, int blkcnt)
539{
540	struct cb_ops	*cb;
541
542	cb = devopsp[getmajor(dev)]->devo_cb_ops;
543	return ((*cb->cb_dump)(dev, addr, blkno, blkcnt));
544}
545
546int
547cdev_read(dev_t dev, struct uio *uiop, struct cred *cred)
548{
549	struct cb_ops	*cb;
550
551	cb = devopsp[getmajor(dev)]->devo_cb_ops;
552	return ((*cb->cb_read)(dev, uiop, cred));
553}
554
555int
556cdev_write(dev_t dev, struct uio *uiop, struct cred *cred)
557{
558	struct cb_ops	*cb;
559
560	cb = devopsp[getmajor(dev)]->devo_cb_ops;
561	return ((*cb->cb_write)(dev, uiop, cred));
562}
563
564int
565cdev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, struct cred *cred,
566    int *rvalp)
567{
568	struct cb_ops	*cb;
569
570	cb = devopsp[getmajor(dev)]->devo_cb_ops;
571	return ((*cb->cb_ioctl)(dev, cmd, arg, mode, cred, rvalp));
572}
573
574int
575cdev_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
576    size_t *maplen, uint_t mode)
577{
578	struct cb_ops	*cb;
579
580	cb = devopsp[getmajor(dev)]->devo_cb_ops;
581	return ((*cb->cb_devmap)(dev, dhp, off, len, maplen, mode));
582}
583
584int
585cdev_mmap(int (*mapfunc)(dev_t, off_t, int), dev_t dev, off_t off, int prot)
586{
587	return ((*mapfunc)(dev, off, prot));
588}
589
590int
591cdev_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
592    uint_t prot, uint_t maxprot, uint_t flags, cred_t *credp)
593{
594	struct cb_ops	*cb;
595
596	cb = devopsp[getmajor(dev)]->devo_cb_ops;
597	return ((*cb->cb_segmap)(dev, off, as, addrp,
598	    len, prot, maxprot, flags, credp));
599}
600
601int
602cdev_poll(dev_t dev, short events, int anyyet, short *reventsp,
603    struct pollhead **pollhdrp)
604{
605	struct cb_ops	*cb;
606
607	cb = devopsp[getmajor(dev)]->devo_cb_ops;
608	return ((*cb->cb_chpoll)(dev, events, anyyet, reventsp, pollhdrp));
609}
610
611/*
612 * A 'size' property can be provided by a VCHR device.
613 *
614 * Since it's defined as zero for STREAMS devices, so we avoid the
615 * overhead of looking it up.  Note also that we don't force an
616 * unused driver into memory simply to ask about it's size.  We also
617 * don't bother to ask it its size unless it's already been attached
618 * (the attach routine is the earliest place the property will be created)
619 *
620 * XXX	In an ideal world, we'd call this at VOP_GETATTR() time.
621 */
622int
623cdev_size(dev_t dev)
624{
625	major_t maj;
626	struct devnames *dnp;
627
628	if ((maj = getmajor(dev)) >= devcnt)
629		return (0);
630
631	dnp = &(devnamesp[maj]);
632	LOCK_DEV_OPS(&dnp->dn_lock);
633	if (devopsp[maj] && devopsp[maj]->devo_cb_ops &&
634	    !devopsp[maj]->devo_cb_ops->cb_str) {
635		UNLOCK_DEV_OPS(&dnp->dn_lock);
636		return (e_ddi_getprop(dev, VCHR, "size",
637		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, 0));
638	}
639	UNLOCK_DEV_OPS(&dnp->dn_lock);
640	return (0);
641}
642
643/*
644 * same for 64-bit Size property
645 */
646uint64_t
647cdev_Size(dev_t dev)
648{
649	major_t maj;
650	struct devnames *dnp;
651
652	if ((maj = getmajor(dev)) >= devcnt)
653		return (0);
654
655	dnp = &(devnamesp[maj]);
656	LOCK_DEV_OPS(&dnp->dn_lock);
657	if (devopsp[maj] && devopsp[maj]->devo_cb_ops &&
658	    !devopsp[maj]->devo_cb_ops->cb_str) {
659		UNLOCK_DEV_OPS(&dnp->dn_lock);
660		return (e_ddi_getprop_int64(dev, VCHR, "Size",
661		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS, 0));
662	}
663	UNLOCK_DEV_OPS(&dnp->dn_lock);
664	return (0);
665}
666
667/*
668 * XXX	This routine is poorly named, because block devices can and do
669 *	have properties (see bdev_size() above).
670 *
671 * XXX	fix the comment in devops.h that claims that cb_prop_op
672 *	is character-only.
673 */
674int
675cdev_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
676    char *name, caddr_t valuep, int *lengthp)
677{
678	struct cb_ops	*cb;
679
680	if ((cb = devopsp[DEVI(dip)->devi_major]->devo_cb_ops) == NULL)
681		return (DDI_PROP_NOT_FOUND);
682
683	return ((*cb->cb_prop_op)(dev, dip, prop_op, mod_flags,
684	    name, valuep, lengthp));
685}
686