xref: /illumos-gate/usr/src/uts/common/io/ib/clients/rds/rdsib.c (revision 8257fab973a69800a3a3309e8af21fc1876d2df9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/conf.h>
31 #include <sys/ddi.h>
32 #include <sys/sunddi.h>
33 #include <sys/modctl.h>
34 #include <inet/ip.h>
35 #include <sys/ib/ibtl/ibti.h>
36 #include <sys/ib/clients/rds/rdsib_ib.h>
37 #include <sys/ib/clients/rds/rdsib_buf.h>
38 #include <sys/ib/clients/rds/rdsib_cm.h>
39 #include <sys/ib/clients/rds/rdsib_protocol.h>
40 #include <sys/ib/clients/rds/rds_transport.h>
41 #include <sys/ib/clients/rds/rds_kstat.h>
42 
43 /*
44  * Global Configuration Variables
45  * As defined in RDS proposal
46  */
47 uint_t		RdsPktSize;
48 uint_t		MaxRecvMemory		= RDS_MAX_RECV_MEMORY;
49 uint_t		MaxDataSendBuffers	= RDS_MAX_DATA_SEND_BUFFERS;
50 uint_t		MaxDataRecvBuffers	= RDS_MAX_DATA_RECV_BUFFERS;
51 uint_t		MaxCtrlSendBuffers	= RDS_MAX_CTRL_SEND_BUFFERS;
52 uint_t		MaxCtrlRecvBuffers	= RDS_MAX_CTRL_RECV_BUFFERS;
53 uint_t		DataRecvBufferLWM	= RDS_DATA_RECV_BUFFER_LWM;
54 uint_t		CtrlRecvBufferLWM	= RDS_CTRL_RECV_BUFFER_LWM;
55 uint_t		PendingRxPktsHWM	= RDS_PENDING_RX_PKTS_HWM;
56 uint_t		MinRnrRetry		= RDS_IB_RNR_RETRY;
57 uint8_t		IBPathRetryCount	= RDS_IB_PATH_RETRY;
58 uint8_t		IBPktLifeTime		= RDS_IB_PKT_LT;
59 
60 extern int rdsib_open_ib();
61 extern void rdsib_close_ib();
62 extern void rds_resume_port(in_port_t port);
63 extern int rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip,
64     in_port_t sendport, in_port_t recvport, zoneid_t zoneid);
65 extern boolean_t rds_if_lookup_by_name(char *devname);
66 
67 rds_transport_ops_t rds_ib_transport_ops = {
68 	rdsib_open_ib,
69 	rdsib_close_ib,
70 	rds_sendmsg,
71 	rds_resume_port,
72 	rds_if_lookup_by_name
73 };
74 
75 /* global */
76 rds_state_t	*rdsib_statep = NULL;
77 krwlock_t	rds_loopback_portmap_lock;
78 uint8_t		rds_loopback_portmap[RDS_PORT_MAP_SIZE];
79 ddi_taskq_t	*rds_taskq = NULL;
80 dev_info_t	*rdsib_dev_info = NULL;
81 uint_t		rds_rx_pkts_pending_hwm;
82 
83 #ifdef DEBUG
84 uint32_t	rdsdbglvl = RDS_LOG_L3;
85 #else
86 uint32_t	rdsdbglvl = RDS_LOG_L2;
87 #endif
88 
89 #define		RDS_NUM_TASKQ_THREADS	4
90 
91 static int rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
92 static int rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
93 static int rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
94     void **result);
95 static void rds_read_config_values(dev_info_t *dip);
96 
97 /* Driver entry points */
98 static struct cb_ops	rdsib_cb_ops = {
99 	nulldev,		/* open */
100 	nulldev,		/* close */
101 	nodev,			/* strategy */
102 	nodev,			/* print */
103 	nodev,			/* dump */
104 	nodev,			/* read */
105 	nodev,			/* write */
106 	nodev,			/* ioctl */
107 	nodev,			/* devmap */
108 	nodev,			/* mmap */
109 	nodev,			/* segmap */
110 	nochpoll,		/* poll */
111 	ddi_prop_op,		/* prop_op */
112 	NULL,			/* stream */
113 	D_MP,			/* cb_flag */
114 	CB_REV,			/* rev */
115 	nodev,			/* int (*cb_aread)() */
116 	nodev,			/* int (*cb_awrite)() */
117 };
118 
119 /* Device options */
120 static struct dev_ops rdsib_ops = {
121 	DEVO_REV,		/* devo_rev, */
122 	0,			/* refcnt  */
123 	rdsib_info,		/* info */
124 	nulldev,		/* identify */
125 	nulldev,		/* probe */
126 	rdsib_attach,		/* attach */
127 	rdsib_detach,		/* detach */
128 	nodev,			/* reset */
129 	&rdsib_cb_ops,		/* driver ops - devctl interfaces */
130 	NULL,			/* bus operations */
131 	NULL			/* power */
132 };
133 
134 /*
135  * Module linkage information.
136  */
137 #define	RDS_DEVDESC	"RDS IB driver %I%"
138 static struct modldrv rdsib_modldrv = {
139 	&mod_driverops,		/* Driver module */
140 	RDS_DEVDESC,		/* Driver name and version */
141 	&rdsib_ops,		/* Driver ops */
142 };
143 
144 static struct modlinkage rdsib_modlinkage = {
145 	MODREV_1,
146 	(void *)&rdsib_modldrv,
147 	NULL
148 };
149 
150 /* Called from _init */
151 int
152 rdsib_init()
153 {
154 	/* RDS supports only one instance */
155 	rdsib_statep = kmem_zalloc(sizeof (rds_state_t), KM_SLEEP);
156 
157 	rw_init(&rdsib_statep->rds_sessionlock, NULL, RW_DRIVER, NULL);
158 	rw_init(&rdsib_statep->rds_hca_lock, NULL, RW_DRIVER, NULL);
159 
160 	rw_init(&rds_loopback_portmap_lock, NULL, RW_DRIVER, NULL);
161 	bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE);
162 
163 	mutex_init(&rds_dpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
164 	cv_init(&rds_dpool.pool_cv, NULL, CV_DRIVER, NULL);
165 	mutex_init(&rds_cpool.pool_lock, NULL, MUTEX_DRIVER, NULL);
166 	cv_init(&rds_cpool.pool_cv, NULL, CV_DRIVER, NULL);
167 
168 	/* Initialize logging */
169 	rds_logging_initialization();
170 
171 	RDS_SET_NPORT(1); /* this should never be 0 */
172 
173 	ASSERT(rds_transport_ops == NULL);
174 	rds_transport_ops = &rds_ib_transport_ops;
175 
176 	return (0);
177 }
178 
179 /* Called from _fini */
180 void
181 rdsib_fini()
182 {
183 	/* Stop logging */
184 	rds_logging_destroy();
185 
186 	cv_destroy(&rds_dpool.pool_cv);
187 	mutex_destroy(&rds_dpool.pool_lock);
188 	cv_destroy(&rds_cpool.pool_cv);
189 	mutex_destroy(&rds_cpool.pool_lock);
190 
191 	rw_destroy(&rds_loopback_portmap_lock);
192 
193 	rw_destroy(&rdsib_statep->rds_hca_lock);
194 	rw_destroy(&rdsib_statep->rds_sessionlock);
195 	kmem_free(rdsib_statep, sizeof (rds_state_t));
196 
197 	rds_transport_ops = NULL;
198 }
199 
200 int
201 _init(void)
202 {
203 	int	ret;
204 
205 	if (ibt_hw_is_present() == 0) {
206 		return (ENODEV);
207 	}
208 
209 	ret = rdsib_init();
210 	if (ret != 0) {
211 		return (ret);
212 	}
213 
214 	ret = mod_install(&rdsib_modlinkage);
215 	if (ret != 0) {
216 		/*
217 		 * Could not load module
218 		 */
219 		rdsib_fini();
220 		return (ret);
221 	}
222 
223 	return (0);
224 }
225 
226 int
227 _fini()
228 {
229 	int	ret;
230 
231 	/*
232 	 * Remove module
233 	 */
234 	if ((ret = mod_remove(&rdsib_modlinkage)) != 0) {
235 		return (ret);
236 	}
237 
238 	rdsib_fini();
239 
240 	return (0);
241 }
242 
243 int
244 _info(struct modinfo *modinfop)
245 {
246 	return (mod_info(&rdsib_modlinkage, modinfop));
247 }
248 
249 static int
250 rdsib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
251 {
252 	int	ret;
253 	uint_t	ndatarx;
254 
255 	RDS_DPRINTF4("rdsib_attach", "enter");
256 
257 	if (cmd != DDI_ATTACH)
258 		return (DDI_FAILURE);
259 
260 	if (rdsib_dev_info != NULL) {
261 		RDS_DPRINTF1("rdsib_attach", "Multiple RDS instances are"
262 		    " not supported (rds_dev_info: 0x%p)", rdsib_dev_info);
263 		return (DDI_FAILURE);
264 	}
265 
266 	rdsib_dev_info = dip;
267 	rds_read_config_values(dip);
268 
269 	rds_taskq = ddi_taskq_create(dip, "rds_taskq", RDS_NUM_TASKQ_THREADS,
270 	    TASKQ_DEFAULTPRI, 0);
271 	if (rds_taskq == NULL) {
272 		RDS_DPRINTF1(LABEL, "ddi_taskq_create failed for rds_taskq");
273 		rdsib_dev_info = NULL;
274 		return (DDI_FAILURE);
275 	}
276 
277 	ret = ddi_create_minor_node(dip, "rdsib", S_IFCHR, 0, DDI_PSEUDO, 0);
278 	if (ret != DDI_SUCCESS) {
279 		cmn_err(CE_CONT, "ddi_create_minor_node failed: %d", ret);
280 		ddi_taskq_destroy(rds_taskq);
281 		rds_taskq = NULL;
282 		rdsib_dev_info = NULL;
283 		return (DDI_FAILURE);
284 	}
285 
286 	/* Max number of receive buffers on the system */
287 	ndatarx = (MaxRecvMemory * 1024)/UserBufferSize;
288 
289 	/*
290 	 * High water mark for the receive buffers in the system. If the
291 	 * number of buffers used crosses this mark then all sockets in
292 	 * would be stalled. The port quota for the sockets is set based
293 	 * on this limit.
294 	 */
295 	rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * ndatarx)/100;
296 
297 	RDS_DPRINTF4("rdsib_attach", "return");
298 
299 	return (DDI_SUCCESS);
300 }
301 
302 static int
303 rdsib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
304 {
305 	RDS_DPRINTF4("rdsib_detach", "enter");
306 
307 	if (cmd != DDI_DETACH)
308 		return (DDI_FAILURE);
309 
310 	ddi_remove_minor_node(dip, "rdsib");
311 
312 	/* destroy taskq */
313 	if (rds_taskq != NULL) {
314 		ddi_taskq_destroy(rds_taskq);
315 		rds_taskq = NULL;
316 	}
317 
318 	rdsib_dev_info = NULL;
319 
320 	RDS_DPRINTF4("rdsib_detach", "return");
321 
322 	return (DDI_SUCCESS);
323 }
324 
325 /* ARGSUSED */
326 static int
327 rdsib_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
328 {
329 	int ret = DDI_FAILURE;
330 
331 	switch (cmd) {
332 	case DDI_INFO_DEVT2DEVINFO:
333 		if (rdsib_dev_info != NULL) {
334 			*result = (void *)rdsib_dev_info;
335 			ret = DDI_SUCCESS;
336 		}
337 		break;
338 
339 	case DDI_INFO_DEVT2INSTANCE:
340 		*result = NULL;
341 		ret = DDI_SUCCESS;
342 		break;
343 
344 	default:
345 		break;
346 	}
347 
348 	return (ret);
349 }
350 
351 static void
352 rds_read_config_values(dev_info_t *dip)
353 {
354 	UserBufferSize = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
355 	    DDI_PROP_DONTPASS, "UserBufferSize", RDS_USER_DATA_BUFFER_SIZE);
356 
357 	MaxRecvMemory = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
358 	    "MaxRecvMemory", RDS_MAX_RECV_MEMORY);
359 
360 	MaxDataSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
361 	    DDI_PROP_DONTPASS, "MaxDataSendBuffers", RDS_MAX_DATA_SEND_BUFFERS);
362 
363 	MaxDataRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
364 	    DDI_PROP_DONTPASS, "MaxDataRecvBuffers", RDS_MAX_DATA_RECV_BUFFERS);
365 
366 	MaxCtrlSendBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
367 	    DDI_PROP_DONTPASS, "MaxCtrlSendBuffers", RDS_MAX_CTRL_SEND_BUFFERS);
368 
369 	MaxCtrlRecvBuffers = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
370 	    DDI_PROP_DONTPASS, "MaxCtrlRecvBuffers", RDS_MAX_CTRL_RECV_BUFFERS);
371 
372 	DataRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
373 	    DDI_PROP_DONTPASS, "DataRecvBufferLWM", RDS_DATA_RECV_BUFFER_LWM);
374 
375 	CtrlRecvBufferLWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
376 	    DDI_PROP_DONTPASS, "CtrlRecvBufferLWM", RDS_CTRL_RECV_BUFFER_LWM);
377 
378 	PendingRxPktsHWM = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
379 	    DDI_PROP_DONTPASS, "PendingRxPktsHWM", RDS_PENDING_RX_PKTS_HWM);
380 
381 	MinRnrRetry = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
382 	    "MinRnrRetry", RDS_IB_RNR_RETRY);
383 
384 	IBPathRetryCount = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
385 	    DDI_PROP_DONTPASS, "IBPathRetryCount", RDS_IB_PATH_RETRY);
386 
387 	IBPktLifeTime = (uint8_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
388 	    DDI_PROP_DONTPASS, "IBPktLifeTime", RDS_IB_PKT_LT);
389 
390 	rdsdbglvl = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
391 	    "rdsdbglvl", RDS_LOG_L2);
392 
393 	if ((MaxRecvMemory * 1024) < (MaxDataRecvBuffers * UserBufferSize)) {
394 		RDS_DPRINTF0("RDSIB",
395 		    "rds.conf: MaxRecvMemory is not sufficient "
396 		    "to accomodate MaxDataRecvBuffers. MaxRecvMemory (%d) < "
397 		    "MaxDataRecvBuffers (%d) * UserBufferSize (%d). Setting "
398 		    "these values to defaults.", MaxRecvMemory * 1024,
399 		    MaxDataRecvBuffers, UserBufferSize);
400 
401 		MaxRecvMemory = RDS_MAX_RECV_MEMORY;
402 		MaxDataRecvBuffers = RDS_MAX_DATA_RECV_BUFFERS;
403 		MaxDataSendBuffers = RDS_MAX_DATA_SEND_BUFFERS;
404 		UserBufferSize = RDS_USER_DATA_BUFFER_SIZE;
405 	}
406 }
407