xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp.c (revision 1cfa752f4e24c34133009b0f6c139127a5c461de)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/ddi.h>
27 #include <sys/sunddi.h>
28 #include <sys/strsubr.h>
29 #include <sys/socket.h>
30 #include <net/if_arp.h>
31 #include <net/if_types.h>
32 #include <sys/sockio.h>
33 #include <sys/pathname.h>
34 
35 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
36 
37 #include <sys/kstr.h>
38 #include <sys/t_kuser.h>
39 
40 #include <sys/dls.h>
41 
42 extern char cmlog[];
43 
44 extern int ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s,
45     ibt_ip_addr_t *dst_addr, ibt_ip_addr_t *src_addr, zoneid_t myzoneid);
46 extern void ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp);
47 
48 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibt_ip_addr_s))
49 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_ip_t))
50 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_ibd_insts_t))
51 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_prwqn_t))
52 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", sockaddr_in))
53 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", sockaddr_in6))
54 
55 int ibcm_printip = 0;
56 
57 /*
58  * Function:
59  *	ibcm_ip_print
60  * Input:
61  *	label		Arbitrary qualifying string
62  *	ipa		Pointer to IP Address to print
63  */
64 void
65 ibcm_ip_print(char *label, ibt_ip_addr_t *ipaddr)
66 {
67 	char    buf[INET6_ADDRSTRLEN];
68 
69 	if (ipaddr->family == AF_INET) {
70 		IBTF_DPRINTF_L2(cmlog, "%s: %s", label,
71 		    inet_ntop(AF_INET, &ipaddr->un.ip4addr, buf, sizeof (buf)));
72 	} else if (ipaddr->family == AF_INET6) {
73 		IBTF_DPRINTF_L2(cmlog, "%s: %s", label, inet_ntop(AF_INET6,
74 		    &ipaddr->un.ip6addr, buf, sizeof (buf)));
75 	} else {
76 		IBTF_DPRINTF_L2(cmlog, "%s: IP ADDR NOT SPECIFIED ", label);
77 	}
78 }
79 
80 
81 ibt_status_t
82 ibcm_arp_get_ibaddr(zoneid_t myzoneid, ibt_ip_addr_t srcaddr,
83     ibt_ip_addr_t destaddr, ib_gid_t *sgid, ib_gid_t *dgid,
84     ibt_ip_addr_t *saddrp)
85 {
86 	ibcm_arp_streams_t	*ib_s;
87 	ibcm_arp_prwqn_t	*wqnp;
88 	int			ret = 0;
89 	int			len;
90 
91 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibaddr(%d, %p, %p, %p, %p, %p)",
92 	    myzoneid, srcaddr, destaddr, sgid, dgid, saddrp);
93 
94 	ib_s = (ibcm_arp_streams_t *)kmem_zalloc(sizeof (ibcm_arp_streams_t),
95 	    KM_SLEEP);
96 
97 	mutex_init(&ib_s->lock, NULL, MUTEX_DEFAULT, NULL);
98 	cv_init(&ib_s->cv, NULL, CV_DRIVER, NULL);
99 
100 	mutex_enter(&ib_s->lock);
101 	ib_s->done = B_FALSE;
102 	mutex_exit(&ib_s->lock);
103 
104 	ret = ibcm_resolver_pr_lookup(ib_s, &destaddr, &srcaddr, myzoneid);
105 
106 	IBTF_DPRINTF_L3(cmlog, "ibcm_arp_get_ibaddr: ibcm_resolver_pr_lookup "
107 	    "returned: %d", ret);
108 	if (ret == 0) {
109 		mutex_enter(&ib_s->lock);
110 		while (ib_s->done != B_TRUE)
111 			cv_wait(&ib_s->cv, &ib_s->lock);
112 		mutex_exit(&ib_s->lock);
113 	}
114 
115 	mutex_enter(&ib_s->lock);
116 	wqnp = ib_s->wqnp;
117 	if (ib_s->status == 0) {
118 		if (sgid)
119 			*sgid = wqnp->sgid;
120 		if (dgid)
121 			*dgid = wqnp->dgid;
122 		/*
123 		 * If the user supplied a address, then verify we got
124 		 * for the same address.
125 		 */
126 		if (wqnp->usrc_addr.family && sgid) {
127 			len = (wqnp->usrc_addr.family == AF_INET) ?
128 			    IP_ADDR_LEN : sizeof (in6_addr_t);
129 			if (bcmp(&wqnp->usrc_addr.un,
130 			    &wqnp->src_addr.un, len)) {
131 				IBTF_DPRINTF_L3(cmlog, "ibcm_arp_get_ibaddr: "
132 				    "srcaddr mismatch");
133 
134 				/* Clean-up old data, and reset the done flag */
135 				ibcm_arp_delete_prwqn(wqnp);
136 				ib_s->done = B_FALSE;
137 				mutex_exit(&ib_s->lock);
138 
139 				ret = ibcm_resolver_pr_lookup(ib_s, &srcaddr,
140 				    &srcaddr, myzoneid);
141 				if (ret == 0) {
142 					mutex_enter(&ib_s->lock);
143 					while (ib_s->done != B_TRUE)
144 						cv_wait(&ib_s->cv, &ib_s->lock);
145 					mutex_exit(&ib_s->lock);
146 				}
147 				mutex_enter(&ib_s->lock);
148 				wqnp = ib_s->wqnp;
149 				if (ib_s->status == 0) {
150 					if (sgid)
151 						*sgid = wqnp->dgid;
152 
153 					if (saddrp)
154 						bcopy(&wqnp->src_addr, saddrp,
155 						    sizeof (ibt_ip_addr_t));
156 
157 					IBTF_DPRINTF_L4(cmlog,
158 					    "ibcm_arp_get_ibaddr: "
159 					    "SGID: %llX:%llX DGID: %llX:%llX",
160 					    sgid->gid_prefix, sgid->gid_guid,
161 					    dgid->gid_prefix, dgid->gid_guid);
162 
163 					ibcm_arp_delete_prwqn(wqnp);
164 				} else if (ret == 0) {
165 					if (wqnp)
166 						kmem_free(wqnp,
167 						    sizeof (ibcm_arp_prwqn_t));
168 				}
169 				goto arp_ibaddr_done;
170 			}
171 		}
172 
173 		if (saddrp)
174 			bcopy(&wqnp->src_addr, saddrp, sizeof (ibt_ip_addr_t));
175 
176 		IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibaddr: SGID: %llX:%llX"
177 		    " DGID: %llX:%llX", sgid->gid_prefix, sgid->gid_guid,
178 		    dgid->gid_prefix, dgid->gid_guid);
179 
180 		ibcm_arp_delete_prwqn(wqnp);
181 	} else if (ret == 0) {
182 		/*
183 		 * We come here only when lookup has returned empty (failed)
184 		 * via callback routine.
185 		 * i.e. ib_s->status is non-zero, while ret is zero.
186 		 */
187 		if (wqnp)
188 			kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t));
189 	}
190 arp_ibaddr_done:
191 	ret = ib_s->status;
192 	mutex_exit(&ib_s->lock);
193 
194 arp_ibaddr_error:
195 
196 	mutex_destroy(&ib_s->lock);
197 	cv_destroy(&ib_s->cv);
198 	kmem_free(ib_s, sizeof (ibcm_arp_streams_t));
199 
200 	if (ret)
201 		return (IBT_FAILURE);
202 	else
203 		return (IBT_SUCCESS);
204 }
205 
206 void
207 ibcm_arp_free_ibds(ibcm_arp_ibd_insts_t *ibds)
208 {
209 	if (ibds->ibcm_arp_ip) {
210 		kmem_free(ibds->ibcm_arp_ip, ibds->ibcm_arp_ibd_alloc *
211 		    sizeof (ibcm_arp_ip_t));
212 		ibds->ibcm_arp_ibd_alloc = 0;
213 		ibds->ibcm_arp_ibd_cnt = 0;
214 		ibds->ibcm_arp_ip = NULL;
215 	}
216 }
217 
218 static void
219 ibcm_arp_get_ibd_insts(ibcm_arp_ibd_insts_t *ibds)
220 {
221 	ibcm_arp_ip_t	*ipp;
222 	ib_gid_t	port_gid;
223 	ibt_part_attr_t	*attr_list, *attr;
224 	int		nparts;
225 
226 	if ((ibt_get_all_part_attr(&attr_list, &nparts) != IBT_SUCCESS) ||
227 	    (nparts == 0)) {
228 		ibds->ibcm_arp_ibd_alloc = 0;
229 		ibds->ibcm_arp_ibd_cnt = 0;
230 		ibds->ibcm_arp_ip = NULL;
231 		return;
232 	}
233 
234 	ibds->ibcm_arp_ibd_alloc = nparts;
235 	ibds->ibcm_arp_ibd_cnt = 0;
236 	ibds->ibcm_arp_ip = (ibcm_arp_ip_t *)kmem_zalloc(
237 	    nparts * sizeof (ibcm_arp_ip_t), KM_SLEEP);
238 
239 	attr = attr_list;
240 	while (nparts--) {
241 		if (ibt_get_port_state_byguid(attr->pa_hca_guid,
242 		    attr->pa_port, &port_gid, NULL) == IBT_SUCCESS) {
243 
244 			ipp = &ibds->ibcm_arp_ip[ibds->ibcm_arp_ibd_cnt];
245 			ipp->ip_linkid = attr->pa_plinkid;
246 			ipp->ip_pkey = attr->pa_pkey;
247 			ipp->ip_hca_guid = attr->pa_hca_guid;
248 			ipp->ip_port_gid = port_gid;
249 			ibds->ibcm_arp_ibd_cnt++;
250 		}
251 		attr++;
252 	}
253 
254 	(void) ibt_free_part_attr(attr_list, ibds->ibcm_arp_ibd_alloc);
255 }
256 
257 /*
258  * Issue an ioctl down to IP.  There are several similar versions of this
259  * function (e.g., rpcib_do_ip_ioctl()); clearly a utility routine is needed.
260  */
261 static int
262 ibcm_do_ip_ioctl(int cmd, int len, void *arg)
263 {
264 	vnode_t *kkvp;
265 	TIUSER  *tiptr;
266 	struct  strioctl iocb;
267 	int	err = 0;
268 
269 	if (lookupname("/dev/udp", UIO_SYSSPACE, FOLLOW, NULLVPP, &kkvp) != 0)
270 		return (EPROTO);
271 
272 	if (t_kopen(NULL, kkvp->v_rdev, FREAD|FWRITE, &tiptr, CRED()) != 0) {
273 		VN_RELE(kkvp);
274 		return (EPROTO);
275 	}
276 
277 	iocb.ic_cmd = cmd;
278 	iocb.ic_timout = 0;
279 	iocb.ic_len = len;
280 	iocb.ic_dp = (caddr_t)arg;
281 	err = kstr_ioctl(tiptr->fp->f_vnode, I_STR, (intptr_t)&iocb);
282 	(void) t_kclose(tiptr, 0);
283 	VN_RELE(kkvp);
284 	return (err);
285 }
286 
287 /*
288  * Issue an SIOCGLIFCONF down to IP and return the result in `lifcp'.
289  * lifcp->lifc_buf is dynamically allocated to be *bufsizep bytes.
290  */
291 static int
292 ibcm_do_lifconf(struct lifconf *lifcp, uint_t *bufsizep, sa_family_t family_loc)
293 {
294 	int err;
295 	struct lifnum lifn;
296 
297 	bzero(&lifn, sizeof (struct lifnum));
298 	lifn.lifn_family = family_loc;
299 
300 	err = ibcm_do_ip_ioctl(SIOCGLIFNUM, sizeof (struct lifnum), &lifn);
301 	if (err != 0)
302 		return (err);
303 
304 	IBTF_DPRINTF_L4(cmlog, "ibcm_do_lifconf: Family %d, lifn_count %d",
305 	    family_loc, lifn.lifn_count);
306 	/*
307 	 * Pad the interface count to account for additional interfaces that
308 	 * may have been configured between the SIOCGLIFNUM and SIOCGLIFCONF.
309 	 */
310 	lifn.lifn_count += 4;
311 
312 	bzero(lifcp, sizeof (struct lifconf));
313 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*lifcp))
314 	lifcp->lifc_family = family_loc;
315 	lifcp->lifc_len = *bufsizep = lifn.lifn_count * sizeof (struct lifreq);
316 	lifcp->lifc_buf = kmem_zalloc(*bufsizep, KM_SLEEP);
317 
318 	err = ibcm_do_ip_ioctl(SIOCGLIFCONF, sizeof (struct lifconf), lifcp);
319 	if (err != 0) {
320 		kmem_free(lifcp->lifc_buf, *bufsizep);
321 		return (err);
322 	}
323 	return (0);
324 }
325 
326 static ibcm_arp_ip_t *
327 ibcm_arp_lookup(ibcm_arp_ibd_insts_t *ibds, char *linkname)
328 {
329 	datalink_id_t	linkid;
330 	int		i;
331 
332 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: linkname =  %s\n", linkname);
333 
334 	/*
335 	 * If at first we don't succeed, try again, just in case it is in
336 	 * hiding. The first call requires the datalink management daemon
337 	 * (the authorative source of information about name to id mapping)
338 	 * to be present and answering upcalls, the second does not.
339 	 */
340 	if (dls_mgmt_get_linkid(linkname, &linkid) != 0) {
341 		if (dls_devnet_macname2linkid(linkname, &linkid) != 0) {
342 			IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: could not "
343 			    "get linkid from linkname\n");
344 			return (NULL);
345 		}
346 	}
347 
348 	for (i = 0; i < ibds->ibcm_arp_ibd_cnt; i++) {
349 		if (ibds->ibcm_arp_ip[i].ip_linkid == linkid)
350 			return (&ibds->ibcm_arp_ip[i]);
351 	}
352 
353 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_lookup: returning NULL\n");
354 	return (NULL);
355 }
356 
357 /*
358  * Fill in `ibds' with IP addresses tied to IFT_IB IP interfaces.  Returns
359  * B_TRUE if at least one address was filled in.
360  */
361 static boolean_t
362 ibcm_arp_get_ibd_ipaddr(ibcm_arp_ibd_insts_t *ibds, sa_family_t family_loc)
363 {
364 	int i, nifs, naddr = 0;
365 	uint_t bufsize;
366 	struct lifconf lifc;
367 	struct lifreq *lifrp;
368 	ibcm_arp_ip_t *ipp;
369 
370 	if (ibcm_do_lifconf(&lifc, &bufsize, family_loc) != 0)
371 		return (B_FALSE);
372 
373 	nifs = lifc.lifc_len / sizeof (struct lifreq);
374 
375 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibd_ipaddr: Family %d, nifs %d",
376 	    family_loc, nifs);
377 
378 	for (lifrp = lifc.lifc_req, i = 0; i < nifs; i++, lifrp++) {
379 		if (lifrp->lifr_type != IFT_IB)
380 			continue;
381 
382 		if ((ipp = ibcm_arp_lookup(ibds, lifrp->lifr_name)) == NULL)
383 			continue;
384 
385 		switch (lifrp->lifr_addr.ss_family) {
386 		case AF_INET:
387 			ipp->ip_inet_family = AF_INET;
388 			bcopy(&lifrp->lifr_addr, &ipp->ip_cm_sin,
389 			    sizeof (struct sockaddr_in));
390 			naddr++;
391 			break;
392 		case AF_INET6:
393 			ipp->ip_inet_family = AF_INET6;
394 			bcopy(&lifrp->lifr_addr, &ipp->ip_cm_sin6,
395 			    sizeof (struct sockaddr_in6));
396 			naddr++;
397 			break;
398 		}
399 	}
400 
401 	kmem_free(lifc.lifc_buf, bufsize);
402 	return (naddr > 0);
403 }
404 
405 ibt_status_t
406 ibcm_arp_get_ibds(ibcm_arp_ibd_insts_t *ibdp, sa_family_t family_loc)
407 {
408 #ifdef DEBUG
409 	int i;
410 #endif
411 
412 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds(%p)", ibdp);
413 
414 	ibcm_arp_get_ibd_insts(ibdp);
415 
416 	IBTF_DPRINTF_L3(cmlog, "ibcm_arp_get_ibds: Found %d ibd instances",
417 	    ibdp->ibcm_arp_ibd_cnt);
418 
419 	if (ibdp->ibcm_arp_ibd_cnt == 0)
420 		return (IBT_SRC_IP_NOT_FOUND);
421 
422 	/* Get the IP addresses of active ports. */
423 	if (!ibcm_arp_get_ibd_ipaddr(ibdp, family_loc)) {
424 		IBTF_DPRINTF_L2(cmlog, "ibcm_arp_get_ibds: failed to get "
425 		    "ibd instance: IBT_SRC_IP_NOT_FOUND");
426 		ibcm_arp_free_ibds(ibdp);
427 		return (IBT_SRC_IP_NOT_FOUND);
428 	}
429 
430 #ifdef DEBUG
431 	for (i = 0; i < ibdp->ibcm_arp_ibd_cnt; i++) {
432 		char    my_buf[INET6_ADDRSTRLEN];
433 		ibcm_arp_ip_t	*aip = &ibdp->ibcm_arp_ip[i];
434 
435 		IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds: Linkid %d Family %d "
436 		    "PKey 0x%lX \n HCAGUID 0x%llX SGID %llX:%llX",
437 		    aip->ip_linkid, aip->ip_inet_family, aip->ip_pkey,
438 		    aip->ip_hca_guid, aip->ip_port_gid.gid_prefix,
439 		    aip->ip_port_gid.gid_guid);
440 		if (aip->ip_inet_family == AF_INET) {
441 			IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds: IPV4: %s",
442 			    inet_ntop(AF_INET, &aip->ip_cm_sin.sin_addr, my_buf,
443 			    sizeof (my_buf)));
444 		} else if (aip->ip_inet_family == AF_INET6) {
445 			IBTF_DPRINTF_L4(cmlog, "ibcm_arp_get_ibds: IPV6: %s",
446 			    inet_ntop(AF_INET6, &aip->ip_cm_sin6.sin6_addr,
447 			    my_buf, sizeof (my_buf)));
448 		} else {
449 			IBTF_DPRINTF_L2(cmlog, "ibcm_arp_get_ibds: Unknown "
450 			    "Family %d", aip->ip_inet_family);
451 		}
452 	}
453 #endif
454 
455 	return (IBT_SUCCESS);
456 }
457