xref: /illumos-gate/usr/src/uts/common/io/drcompat.c (revision 39b361b2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Standard module for handling DLPI Style 2 attach/detach
28  */
29 
30 #include <sys/types.h>
31 #include <sys/conf.h>
32 #include <sys/modctl.h>
33 #include <sys/cmn_err.h>
34 #include <sys/sunddi.h>
35 #include <sys/esunddi.h>
36 #include <sys/strsubr.h>
37 #include <sys/ddi.h>
38 #include <sys/dlpi.h>
39 #include <sys/strsun.h>
40 #include <sys/policy.h>
41 
42 static struct streamtab drstab;
43 
44 static struct fmodsw fsw = {
45 	DRMODNAME,
46 	&drstab,
47 	D_MP
48 };
49 
50 
51 /*
52  * Module linkage information for the kernel.
53  */
54 
55 static struct modlstrmod modlstrmod = {
56 	&mod_strmodops, "dr compatibility for DLPI style 2 drivers", &fsw
57 };
58 
59 
60 static struct modlinkage modlinkage = {
61 	MODREV_1, &modlstrmod, NULL
62 };
63 
64 
65 int
_init(void)66 _init(void)
67 {
68 	return (mod_install(&modlinkage));
69 }
70 
71 int
_fini(void)72 _fini(void)
73 {
74 	return (mod_remove(&modlinkage));
75 }
76 
77 int
_info(struct modinfo * modinfop)78 _info(struct modinfo *modinfop)
79 {
80 	return (mod_info(&modlinkage, modinfop));
81 }
82 
83 
84 static int	dropen(queue_t *, dev_t *, int, int, cred_t *);
85 static int	drclose(queue_t *, int, cred_t *);
86 static int	drrput(queue_t *, mblk_t *);
87 static int	drwput(queue_t *, mblk_t *);
88 
89 static struct module_info drinfo = {
90 	0,
91 	DRMODNAME,
92 	0,
93 	INFPSZ,
94 	1,
95 	0
96 };
97 
98 static struct qinit drrinit = {
99 	(int (*)())drrput,
100 	NULL,
101 	dropen,
102 	drclose,
103 	NULL,
104 	&drinfo
105 };
106 
107 static struct qinit drwinit = {
108 	(int (*)())drwput,
109 	NULL,
110 	NULL,
111 	NULL,
112 	NULL,
113 	&drinfo
114 };
115 
116 static struct streamtab drstab = {
117 	&drrinit,
118 	&drwinit,
119 	NULL,
120 	NULL
121 };
122 
123 /*
124  * This module is pushed directly on top of the bottom driver
125  * in a DLPI style-2 stream by stropen(). It intercepts
126  * DL_ATTACH_REQ/DL_DETACH_REQ messages on the write side
127  * and acks on the read side, calls qassociate where needed.
128  * The primary purpose is to workaround a DR race condition
129  * affecting non-DDI compliant DLPI style 2 drivers, which may
130  * cause the system to panic.
131  *
132  * The following action is taken:
133  * Write side (drwput):
134  *	attach request:	hold driver instance assuming ppa == instance.
135  *		This way, the instance cannot be detached while the
136  *		driver is processing DL_ATTACH_REQ.
137  *
138  *		On a successful hold, store the dip in a ring buffer
139  *		to be processed lated by the read side.
140  *		If hold fails (most likely ppa != instance), we store
141  *		NULL in the ring buffer and read side won't take
142  *		any action on ack.
143  *
144  * Read side (drrput):
145  *	attach success: if (dip held on write side) associate queue with dip
146  *	attach failure:	if (dip held on write side) release hold on dip
147  *	detach success: associate queue with NULL
148  *	detach failure:	do nothing
149  *
150  * The module assumes that incoming DL_ATTACH_REQ/DL_DETACH_REQ
151  * messages are ordered (non-concurrent) and the bottom
152  * driver processes them and sends acknowledgements in the same
153  * order. This assumption is reasonable because concurrent
154  * association results in non-deterministic queue behavior.
155  * The module is coded carefully such that unordered messages
156  * do not result in a system panic.
157  *
158  * The module handles multiple outstanding messages queued
159  * in the bottom driver. Messages processed on the write side
160  * but not yet arrived at read side are placed in the ring buffer
161  * dr_dip[], between dr_nfirst and dr_nlast. The write side is
162  * producer and the read side is the consumer. The buffer is full
163  * when dr_nfirst == dr_nlast.
164  *
165  * The current size of the ring buffer is 64 (MAX_DLREQS) per stream.
166  * During normal testing, we have not seen outstanding messages
167  * above 10.
168  */
169 
170 #define	MAX_DLREQS	64
171 #define	INCR(x)		{(x)++; if ((x) >= MAX_DLREQS) (x) = 0; }
172 
173 struct drstate {
174 	kmutex_t dr_lock;
175 	major_t dr_major;
176 	int dr_nfirst;
177 	int dr_nlast;
178 	dev_info_t *dr_dip[MAX_DLREQS];
179 };
180 
181 /* ARGSUSED1 */
182 static int
dropen(queue_t * q,dev_t * devp,int oflag,int sflag,cred_t * crp)183 dropen(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *crp)
184 {
185 	struct drstate *dsp;
186 
187 	if (sflag != MODOPEN) {	/* must be a pushed module */
188 		return (EINVAL);
189 	}
190 
191 	if (secpolicy_net_rawaccess(crp) != 0) {
192 		return (EPERM);
193 	}
194 
195 	if (q->q_ptr != NULL) {
196 		return (0);	/* already open */
197 	}
198 
199 	dsp = kmem_zalloc(sizeof (*dsp), KM_SLEEP);
200 	dsp->dr_major = getmajor(*devp);
201 	mutex_init(&dsp->dr_lock, NULL, MUTEX_DEFAULT, NULL);
202 	q->q_ptr = OTHERQ(q)->q_ptr = dsp;
203 	qprocson(q);
204 	ddi_assoc_queue_with_devi(q, NULL);
205 	return (0);
206 }
207 
208 /* ARGSUSED1 */
209 static int
drclose(queue_t * q,int cflag,cred_t * crp)210 drclose(queue_t *q, int cflag, cred_t *crp)
211 {
212 	struct drstate *dsp = q->q_ptr;
213 
214 	ASSERT(dsp);
215 	ddi_assoc_queue_with_devi(q, NULL);
216 	qprocsoff(q);
217 
218 	mutex_destroy(&dsp->dr_lock);
219 	kmem_free(dsp, sizeof (*dsp));
220 	q->q_ptr = NULL;
221 
222 	return (0);
223 }
224 
225 static int
drrput(queue_t * q,mblk_t * mp)226 drrput(queue_t *q, mblk_t *mp)
227 {
228 	struct drstate *dsp;
229 	union DL_primitives *dlp;
230 	dev_info_t *dip;
231 
232 	switch (DB_TYPE(mp)) {
233 	case M_PROTO:
234 	case M_PCPROTO:
235 		break;
236 	default:
237 		putnext(q, mp);
238 		return (0);
239 	}
240 
241 	/* make sure size is sufficient for dl_primitive */
242 	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
243 		putnext(q, mp);
244 		return (0);
245 	}
246 
247 	dlp = (union DL_primitives *)mp->b_rptr;
248 	switch (dlp->dl_primitive) {
249 	case DL_OK_ACK: {
250 		/* check for proper size, let upper layer deal with error */
251 		if (MBLKL(mp) < DL_OK_ACK_SIZE) {
252 			putnext(q, mp);
253 			return (0);
254 		}
255 
256 		dsp = q->q_ptr;
257 		switch (dlp->ok_ack.dl_correct_primitive) {
258 		case DL_ATTACH_REQ:
259 			/*
260 			 * ddi_assoc_queue_with_devi() will hold dip,
261 			 * so release after association.
262 			 *
263 			 * dip is NULL means we didn't hold dip on read side.
264 			 * (unlikely, but possible), so we do nothing.
265 			 */
266 			mutex_enter(&dsp->dr_lock);
267 			dip = dsp->dr_dip[dsp->dr_nlast];
268 			dsp->dr_dip[dsp->dr_nlast] = NULL;
269 			INCR(dsp->dr_nlast);
270 			mutex_exit(&dsp->dr_lock);
271 			if (dip) {
272 				ddi_assoc_queue_with_devi(q, dip);
273 				ddi_release_devi(dip);
274 			}
275 			break;
276 
277 		case DL_DETACH_REQ:
278 			ddi_assoc_queue_with_devi(q, NULL);
279 			break;
280 		default:
281 			break;
282 		}
283 		break;
284 	}
285 	case DL_ERROR_ACK:
286 		if (dlp->error_ack.dl_error_primitive != DL_ATTACH_REQ)
287 			break;
288 
289 		dsp = q->q_ptr;
290 		mutex_enter(&dsp->dr_lock);
291 		dip = dsp->dr_dip[dsp->dr_nlast];
292 		dsp->dr_dip[dsp->dr_nlast] = NULL;
293 		INCR(dsp->dr_nlast);
294 		mutex_exit(&dsp->dr_lock);
295 		/*
296 		 * Release dip on attach failure
297 		 */
298 		if (dip) {
299 			ddi_release_devi(dip);
300 		}
301 		break;
302 	default:
303 		break;
304 	}
305 
306 	putnext(q, mp);
307 	return (0);
308 }
309 
310 /*
311  * Detect dl attach, hold the dip to prevent it from detaching
312  */
313 static int
drwput(queue_t * q,mblk_t * mp)314 drwput(queue_t *q, mblk_t *mp)
315 {
316 	struct drstate *dsp;
317 	union DL_primitives *dlp;
318 	dev_info_t *dip;
319 
320 	switch (DB_TYPE(mp)) {
321 	case M_PROTO:
322 	case M_PCPROTO:
323 		break;
324 	default:
325 		putnext(q, mp);
326 		return (0);
327 	}
328 
329 	/* make sure size is sufficient for dl_primitive */
330 	if (MBLKL(mp) < sizeof (t_uscalar_t)) {
331 		putnext(q, mp);
332 		return (0);
333 	}
334 
335 	dlp = (union DL_primitives *)mp->b_rptr;
336 	switch (dlp->dl_primitive) {
337 	case DL_ATTACH_REQ:
338 		/*
339 		 * Check for proper size of the message.
340 		 *
341 		 * If size is correct, get the ppa and attempt to
342 		 * hold the device assuming ppa is instance.
343 		 *
344 		 * If size is wrong, we can't get the ppa, but
345 		 * still increment dr_nfirst because the read side
346 		 * will get a error ack on DL_ATTACH_REQ.
347 		 */
348 		dip = NULL;
349 		dsp = q->q_ptr;
350 		if (MBLKL(mp) >= DL_OK_ACK_SIZE) {
351 			dip = ddi_hold_devi_by_instance(dsp->dr_major,
352 			    dlp->attach_req.dl_ppa, E_DDI_HOLD_DEVI_NOATTACH);
353 		}
354 
355 		mutex_enter(&dsp->dr_lock);
356 		dsp->dr_dip[dsp->dr_nfirst] = dip;
357 		INCR(dsp->dr_nfirst);
358 		/*
359 		 * Check if ring buffer is full. If so, assert in debug
360 		 * kernel and produce a warning in non-debug kernel.
361 		 */
362 		ASSERT(dsp->dr_nfirst != dsp->dr_nlast);
363 		if (dsp->dr_nfirst == dsp->dr_nlast) {
364 			cmn_err(CE_WARN, "drcompat: internal buffer full");
365 		}
366 		mutex_exit(&dsp->dr_lock);
367 		break;
368 	default:
369 		break;
370 	}
371 
372 	putnext(q, mp);
373 	return (0);
374 }
375