xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c (revision d3a82192edbbe93c6027629b50fd93fed5d0e1ab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/dlpi.h>
29 #include <sys/stropts.h>
30 #include <sys/strsun.h>
31 #include <sys/sysmacros.h>
32 #include <sys/strlog.h>
33 #include <sys/ddi.h>
34 #include <sys/cmn_err.h>
35 #include <sys/socket.h>
36 #include <net/if.h>
37 #include <net/if_types.h>
38 #include <netinet/in.h>
39 #include <sys/ethernet.h>
40 #include <inet/arp.h>
41 #include <inet/ip.h>
42 #include <inet/ip_ire.h>
43 #include <inet/ip_if.h>
44 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
45 #include <inet/ip_ftable.h>
46 
47 static areq_t ibcm_arp_areq_template = {
48 	AR_ENTRY_QUERY,	/* cmd */
49 	sizeof (areq_t) + (2 * IP_ADDR_LEN),	/* name offset */
50 	sizeof (areq_t),	/* name len */
51 	IP_ARP_PROTO_TYPE,	/* protocol, from arps perspective */
52 	sizeof (areq_t),	/* target addr offset */
53 	IP_ADDR_LEN,	/* target ADDR_length */
54 	0,	/* flags */
55 	sizeof (areq_t) + IP_ADDR_LEN,	/* sender addr offset */
56 	IP_ADDR_LEN,	/* sender addr length */
57 	IBCM_ARP_XMIT_COUNT,	/* xmit_count */
58 	IBCM_ARP_XMIT_INTERVAL,	/* (re)xmit_interval in milliseconds */
59 	4	/* max # of requests to buffer */
60 		/*
61 		 * anything else filled in by the code
62 		 */
63 };
64 
65 static area_t ibcm_arp_area_template = {
66 	AR_ENTRY_ADD,			/* cmd */
67 	sizeof (area_t) + IPOIB_ADDRL + (2 * IP_ADDR_LEN), /* name offset */
68 	sizeof (area_t),		/* name len */
69 	IP_ARP_PROTO_TYPE,		/* protocol, from arps perspective */
70 	sizeof (area_t),		/* proto addr offset */
71 	IP_ADDR_LEN,			/* proto ADDR_length */
72 	sizeof (area_t) + (IP_ADDR_LEN),	/* proto mask offset */
73 	0,				/* flags */
74 	sizeof (area_t) + (2 * IP_ADDR_LEN),	/* hw addr offset */
75 	IPOIB_ADDRL				/* hw addr length */
76 };
77 
78 extern char cmlog[];
79 
80 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", msgb))
81 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", area_t))
82 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_streams_t))
83 
84 static void ibcm_arp_timeout(void *arg);
85 static void ibcm_arp_pr_callback(ibcm_arp_prwqn_t *wqnp, int status);
86 static void ibcm_ipv6_resolver_ack(ip2mac_t *, void *);
87 static int ibcm_ipv6_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zid);
88 
89 /*
90  * issue a AR_ENTRY_QUERY to arp driver and schedule a timeout.
91  */
92 static int
93 ibcm_arp_query_arp(ibcm_arp_prwqn_t *wqnp)
94 {
95 	int len;
96 	int name_len;
97 	int name_offset;
98 	char *cp;
99 	mblk_t *mp;
100 	mblk_t *mp1;
101 	areq_t *areqp;
102 	ibcm_arp_streams_t *ib_s = (ibcm_arp_streams_t *)wqnp->arg;
103 
104 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_query_arp(ib_s: %p wqnp: %p)",
105 	    ib_s, wqnp);
106 
107 	name_offset = ibcm_arp_areq_template.areq_name_offset;
108 
109 	/*
110 	 * allocate mblk for AR_ENTRY_QUERY
111 	 */
112 	name_len = strlen(wqnp->ifname) + 1;
113 	len = name_len + name_offset;
114 	if ((mp = allocb(len, BPRI_HI)) == NULL) {
115 		return (ENOMEM);
116 	}
117 	bzero(mp->b_rptr, len);
118 	mp->b_wptr += len;
119 
120 	/*
121 	 * allocate a mblk and set wqnp in the data
122 	 */
123 	if ((mp1 = allocb(sizeof (void *), BPRI_HI)) == NULL) {
124 		freeb(mp);
125 		return (ENOMEM);
126 	}
127 
128 	mp1->b_wptr += sizeof (void *);
129 	*(uintptr_t *)(void *)mp1->b_rptr = (uintptr_t)wqnp;	/* store wqnp */
130 
131 	cp = (char *)mp->b_rptr;
132 	bcopy(&ibcm_arp_areq_template, cp, sizeof (areq_t));
133 	areqp = (void *)cp;
134 	areqp->areq_name_length = name_len;
135 
136 	cp = (char *)areqp + areqp->areq_name_offset;
137 	bcopy(wqnp->ifname, cp, name_len);
138 
139 	areqp->areq_proto = wqnp->ifproto;
140 	bcopy(&wqnp->ifproto, areqp->areq_sap, 2);
141 	cp = (char *)areqp + areqp->areq_target_addr_offset;
142 	bcopy(&wqnp->dst_addr.un.ip4addr, cp, IP_ADDR_LEN);
143 	cp = (char *)areqp + areqp->areq_sender_addr_offset;
144 	bcopy(&wqnp->src_addr.un.ip4addr, cp, IP_ADDR_LEN);
145 
146 	mp->b_cont = mp1;
147 
148 	DB_TYPE(mp) = M_PROTO;
149 
150 	/*
151 	 * issue the request to arp
152 	 */
153 	wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
154 	wqnp->timeout_id = timeout(ibcm_arp_timeout, wqnp,
155 	    drv_usectohz(IBCM_ARP_TIMEOUT * 1000));
156 	if (canputnext(ib_s->arpqueue)) {
157 		putnext(ib_s->arpqueue, mp);
158 	} else {
159 		(void) putq(ib_s->arpqueue, mp);
160 		qenable(ib_s->arpqueue);
161 	}
162 
163 	return (0);
164 }
165 
166 /*
167  * issue AR_ENTRY_SQUERY to arp driver
168  */
169 static int
170 ibcm_arp_squery_arp(ibcm_arp_prwqn_t *wqnp)
171 {
172 	int len;
173 	int name_len;
174 	char *cp;
175 	mblk_t *mp;
176 	mblk_t *mp1;
177 	area_t *areap;
178 	uint32_t  proto_mask = 0xffffffff;
179 	struct iocblk *ioc;
180 	ibcm_arp_streams_t *ib_s = (ibcm_arp_streams_t *)wqnp->arg;
181 
182 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_squery_arp(ib_s: %p wqnp: %p)",
183 	    ib_s, wqnp);
184 
185 	/*
186 	 * allocate mblk for AR_ENTRY_SQUERY
187 	 */
188 	name_len = strlen(wqnp->ifname) + 1;
189 	len = ibcm_arp_area_template.area_name_offset + name_len +
190 	    sizeof (uintptr_t);
191 	if ((mp = allocb(len, BPRI_HI)) == NULL) {
192 		return (ENOMEM);
193 	}
194 	bzero(mp->b_rptr, len);
195 	mp->b_wptr += len + sizeof (uintptr_t);
196 
197 	*(uintptr_t *)(void *)mp->b_rptr = (uintptr_t)wqnp;	/* store wqnp */
198 	mp->b_rptr += sizeof (uintptr_t);
199 
200 
201 	cp = (char *)mp->b_rptr;
202 	bcopy(&ibcm_arp_area_template, cp, sizeof (area_t));
203 
204 	areap = (void *)cp;
205 	areap->area_cmd = AR_ENTRY_SQUERY;
206 	areap->area_name_length = name_len;
207 	cp = (char *)areap + areap->area_name_offset;
208 	bcopy(wqnp->ifname, cp, name_len);
209 
210 	cp = (char *)areap + areap->area_proto_addr_offset;
211 	bcopy(&wqnp->dst_addr.un.ip4addr, cp, IP_ADDR_LEN);
212 
213 	cp = (char *)areap + areap->area_proto_mask_offset;
214 	bcopy(&proto_mask, cp, IP_ADDR_LEN);
215 
216 	mp1 = allocb(sizeof (struct iocblk), BPRI_HI);
217 	if (mp1 == NULL) {
218 		freeb(mp);
219 		return (ENOMEM);
220 	}
221 	ioc = (void *)mp1->b_rptr;
222 	ioc->ioc_cmd = AR_ENTRY_SQUERY;
223 	ioc->ioc_error = 0;
224 	ioc->ioc_cr = NULL;
225 	ioc->ioc_count = msgdsize(mp);
226 	mp1->b_wptr += sizeof (struct iocblk);
227 	mp1->b_cont = mp;
228 
229 	DB_TYPE(mp1) = M_IOCTL;
230 
231 	if (canputnext(ib_s->arpqueue)) {
232 		putnext(ib_s->arpqueue, mp1);
233 	} else {
234 		(void) putq(ib_s->arpqueue, mp1);
235 		qenable(ib_s->arpqueue);
236 	}
237 	return (0);
238 }
239 
240 /*
241  * issue a AR_ENTRY_ADD to arp driver
242  * This is required as arp driver does not maintain a cache.
243  */
244 static int
245 ibcm_arp_add(ibcm_arp_prwqn_t *wqnp)
246 {
247 	int len;
248 	int name_len;
249 	char *cp;
250 	mblk_t *mp;
251 	area_t *areap;
252 	uint32_t  proto_mask = 0xffffffff;
253 	ibcm_arp_streams_t *ib_s = (ibcm_arp_streams_t *)wqnp->arg;
254 
255 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_add(ib_s: %p wqnp: %p)", ib_s, wqnp);
256 
257 	/*
258 	 * allocate mblk for AR_ENTRY_ADD
259 	 */
260 
261 	name_len = strlen(wqnp->ifname) + 1;
262 	len = ibcm_arp_area_template.area_name_offset + name_len;
263 	if ((mp = allocb(len, BPRI_HI)) == NULL) {
264 		return (ENOMEM);
265 	}
266 	bzero(mp->b_rptr, len);
267 	mp->b_wptr += len;
268 
269 	cp = (char *)mp->b_rptr;
270 	bcopy(&ibcm_arp_area_template, cp, sizeof (area_t));
271 
272 	areap = (void *)mp->b_rptr;
273 	areap->area_name_length = name_len;
274 	cp = (char *)areap + areap->area_name_offset;
275 	bcopy(wqnp->ifname, cp, name_len);
276 
277 	cp = (char *)areap + areap->area_proto_addr_offset;
278 	bcopy(&wqnp->dst_addr.un.ip4addr, cp, IP_ADDR_LEN);
279 
280 	cp = (char *)areap + areap->area_proto_mask_offset;
281 	bcopy(&proto_mask, cp, IP_ADDR_LEN);
282 
283 	cp = (char *)areap + areap->area_hw_addr_offset;
284 	bcopy(&wqnp->dst_mac, cp, IPOIB_ADDRL);
285 
286 	DB_TYPE(mp) = M_PROTO;
287 
288 	if (canputnext(ib_s->arpqueue)) {
289 		putnext(ib_s->arpqueue, mp);
290 	} else {
291 		(void) putq(ib_s->arpqueue, mp);
292 		qenable(ib_s->arpqueue);
293 	}
294 	return (0);
295 }
296 
297 
298 /*
299  * timeout routine when there is no response to AR_ENTRY_QUERY
300  */
301 static void
302 ibcm_arp_timeout(void *arg)
303 {
304 	ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg;
305 	ibcm_arp_streams_t *ib_s = (ibcm_arp_streams_t *)wqnp->arg;
306 
307 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_timeout(ib_s: %p wqnp: %p)",
308 	    ib_s, wqnp);
309 	wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING;
310 	cv_broadcast(&ib_s->cv);
311 
312 	/*
313 	 * indicate to user
314 	 */
315 	ibcm_arp_pr_callback(wqnp, EHOSTUNREACH);
316 }
317 
318 /*
319  * delete a wait queue node from the list.
320  * assumes mutex is acquired
321  */
322 void
323 ibcm_arp_prwqn_delete(ibcm_arp_prwqn_t *wqnp)
324 {
325 	ibcm_arp_streams_t *ib_s;
326 
327 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_prwqn_delete(%p)", wqnp);
328 
329 	ib_s = (ibcm_arp_streams_t *)wqnp->arg;
330 	ib_s->wqnp = NULL;
331 	kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t));
332 }
333 
334 /*
335  * allocate a wait queue node, and insert it in the list
336  */
337 static ibcm_arp_prwqn_t *
338 ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
339     ibt_ip_addr_t *src_addr, ibcm_arp_pr_comp_func_t func)
340 {
341 	ibcm_arp_prwqn_t *wqnp;
342 
343 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s);
344 
345 	if (dst_addr == NULL) {
346 		return (NULL);
347 	}
348 	if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) ==
349 	    NULL) {
350 		return (NULL);
351 	}
352 	wqnp->dst_addr = *dst_addr;
353 
354 	if (src_addr) {
355 		wqnp->usrc_addr = *src_addr;
356 	}
357 	wqnp->func = func;
358 	wqnp->arg = ib_s;
359 	wqnp->ifproto = (dst_addr->family == AF_INET) ?
360 	    ETHERTYPE_IP : ETHERTYPE_IPV6;
361 
362 	ib_s->wqnp = wqnp;
363 
364 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp);
365 
366 	return (wqnp);
367 }
368 
369 /*
370  * call the user function
371  * called with lock held
372  */
373 static void
374 ibcm_arp_pr_callback(ibcm_arp_prwqn_t *wqnp, int status)
375 {
376 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_callback(%p, %d)", wqnp, status);
377 
378 	wqnp->func((void *)wqnp, status);
379 }
380 
381 /*
382  * Check if the interface is loopback or IB.
383  */
384 static int
385 ibcm_arp_check_interface(ill_t *ill)
386 {
387 	if (IS_LOOPBACK(ill) || ill->ill_type == IFT_IB)
388 		return (0);
389 
390 	return (ETIMEDOUT);
391 }
392 
393 int
394 ibcm_arp_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
395     ibt_ip_addr_t *src_addr, ibcm_arp_pr_comp_func_t func)
396 {
397 	ibcm_arp_prwqn_t *wqnp;
398 	ire_t	*ire = NULL;
399 	ire_t	*src_ire = NULL;
400 	ipif_t	*ipif;
401 	ill_t	*ill, *hwaddr_ill = NULL;
402 	ip_stack_t *ipst;
403 	int		len;
404 
405 	IBCM_PRINT_IP("ibcm_arp_pr_lookup: SRC", src_addr);
406 	IBCM_PRINT_IP("ibcm_arp_pr_lookup: DST", dst_addr);
407 
408 	if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr,
409 	    src_addr, func)) == NULL) {
410 		IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: "
411 		    "ibcm_arp_create_prwqn failed");
412 		ib_s->status = ENOMEM;
413 		return (1);
414 	}
415 
416 	ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip;
417 	if (dst_addr->family == AF_INET) {
418 		/*
419 		 * Get the ire for the local address
420 		 */
421 		IBTF_DPRINTF_L5(cmlog, "ibcm_arp_pr_lookup: ire_ctable_lookup");
422 		src_ire = ire_ctable_lookup(src_addr->un.ip4addr, NULL,
423 		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst);
424 		if (src_ire == NULL) {
425 			IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: "
426 			    "ire_ctable_lookup failed");
427 			ib_s->status = EFAULT;
428 			goto fail;
429 		}
430 		IBTF_DPRINTF_L5(cmlog, "ibcm_arp_pr_lookup: ire_ctable_lookup");
431 
432 		/*
433 		 * get an ire for the destination address with the matching
434 		 * source address
435 		 */
436 		ire = ire_ftable_lookup(dst_addr->un.ip4addr, 0, 0, 0,
437 		    src_ire->ire_ipif, 0, src_ire->ire_zoneid, 0, NULL,
438 		    MATCH_IRE_SRC, ipst);
439 		if (ire == NULL) {
440 			IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: "
441 			    "ire_ftable_lookup failed");
442 			ib_s->status = EFAULT;
443 			goto fail;
444 		}
445 
446 		IBTF_DPRINTF_L5(cmlog, "ibcm_arp_pr_lookup: ire_ftable_lookup:"
447 		    "done");
448 
449 		wqnp->gateway.un.ip4addr =
450 		    ((ire->ire_gateway_addr == INADDR_ANY) ?
451 		    ire->ire_addr : ire->ire_gateway_addr);
452 		wqnp->netmask.un.ip4addr = ire->ire_mask;
453 		wqnp->src_addr.un.ip4addr = ire->ire_src_addr;
454 		wqnp->src_addr.family = wqnp->gateway.family =
455 		    wqnp->netmask.family = AF_INET;
456 
457 	} else if (dst_addr->family == AF_INET6) {
458 		/*
459 		 * Get the ire for the local address
460 		 */
461 		src_ire = ire_ctable_lookup_v6(&src_addr->un.ip6addr, NULL,
462 		    IRE_LOCAL, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, ipst);
463 		if (src_ire == NULL) {
464 			IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: "
465 			    "ire_ctable_lookup_v6 failed");
466 			ib_s->status = EFAULT;
467 			goto fail;
468 		}
469 		IBTF_DPRINTF_L5(cmlog, "ibcm_arp_pr_lookup: "
470 		    "ire_ctable_lookup_v6: done");
471 
472 		/*
473 		 * get an ire for the destination address with the matching
474 		 * source address
475 		 */
476 		ire = ire_ftable_lookup_v6(&dst_addr->un.ip6addr, 0, 0, 0,
477 		    src_ire->ire_ipif, 0, src_ire->ire_zoneid, 0, NULL,
478 		    MATCH_IRE_SRC, ipst);
479 		if (ire == NULL) {
480 			IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: "
481 			    "ire_ftable_lookup_v6 failed");
482 			ib_s->status = EFAULT;
483 			goto fail;
484 		}
485 		IBTF_DPRINTF_L5(cmlog, "ibcm_arp_pr_lookup: "
486 		    "ire_ftable_lookup_v6: done");
487 
488 		wqnp->gateway.un.ip6addr =
489 		    (IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) ?
490 		    ire->ire_addr_v6 : ire->ire_gateway_addr_v6);
491 		wqnp->netmask.un.ip6addr = ire->ire_mask_v6;
492 		wqnp->src_addr.un.ip6addr = ire->ire_src_addr_v6;
493 		wqnp->src_addr.family = wqnp->gateway.family =
494 		    wqnp->netmask.family = AF_INET6;
495 	}
496 
497 	ipif = src_ire->ire_ipif;
498 	ill = ipif->ipif_ill;
499 	(void) strlcpy(wqnp->ifname, ill->ill_name, sizeof (wqnp->ifname));
500 
501 	/*
502 	 * For IPMP data addresses, we need to use the hardware address of the
503 	 * interface bound to the given address.
504 	 */
505 	if (IS_IPMP(ill)) {
506 		if ((hwaddr_ill = ipmp_ipif_hold_bound_ill(ipif)) == NULL) {
507 			IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: no bound "
508 			    "ill for IPMP interface %s", ill->ill_name);
509 			ib_s->status = EFAULT;
510 			goto fail;
511 		}
512 	} else {
513 		hwaddr_ill = ill;
514 		ill_refhold(hwaddr_ill); 	/* for symmetry */
515 	}
516 
517 	if ((ib_s->status = ibcm_arp_check_interface(hwaddr_ill)) != 0) {
518 		IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: "
519 		    "ibcm_arp_check_interface failed");
520 		goto fail;
521 	}
522 
523 	bcopy(hwaddr_ill->ill_phys_addr, &wqnp->src_mac,
524 	    hwaddr_ill->ill_phys_addr_length);
525 
526 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_lookup: outgoing if:%s",
527 	    wqnp->ifname);
528 
529 	/*
530 	 * if the user supplied a address, then verify rts returned
531 	 * the same address
532 	 */
533 	if (wqnp->usrc_addr.family) {
534 		len = (wqnp->usrc_addr.family == AF_INET) ?
535 		    IP_ADDR_LEN : sizeof (in6_addr_t);
536 		if (bcmp(&wqnp->usrc_addr.un, &wqnp->src_addr.un, len)) {
537 			IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: srcaddr "
538 			    "mismatch:%d", ENETUNREACH);
539 			goto fail;
540 		}
541 	}
542 
543 	/*
544 	 * at this stage, we have the source address and the IB
545 	 * interface, now get the destination mac address from
546 	 * arp or ipv6 drivers
547 	 */
548 	if (wqnp->dst_addr.family == AF_INET) {
549 		if ((ib_s->status = ibcm_arp_squery_arp(wqnp)) != 0) {
550 			IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: "
551 			    "ibcm_arp_squery_arp failed: %d", ib_s->status);
552 			goto fail;
553 		}
554 	} else {
555 		if ((ib_s->status = ibcm_ipv6_lookup(wqnp, ill, getzoneid())) !=
556 		    0) {
557 			IBTF_DPRINTF_L2(cmlog, "ibcm_arp_pr_lookup: "
558 			    "ibcm_ipv6_lookup failed: %d", ib_s->status);
559 			goto fail;
560 		}
561 	}
562 
563 	ill_refrele(hwaddr_ill);
564 	IRE_REFRELE(ire);
565 	IRE_REFRELE(src_ire);
566 	netstack_rele(ipst->ips_netstack);
567 
568 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_lookup: Return: 0x%p", wqnp);
569 	return (0);
570 fail:
571 	if (hwaddr_ill != NULL)
572 		ill_refrele(hwaddr_ill);
573 	if (ire != NULL)
574 		IRE_REFRELE(ire);
575 	if (src_ire != NULL)
576 		IRE_REFRELE(src_ire);
577 	ibcm_arp_prwqn_delete(wqnp);
578 	netstack_rele(ipst->ips_netstack);
579 	return (1);
580 }
581 
582 /*
583  * called from lrsrv.
584  * process a AR_ENTRY_QUERY reply from arp
585  * the message should be M_DATA -->> dl_unitdata_req
586  */
587 static void
588 ibcm_arp_pr_arp_query_ack(mblk_t *mp)
589 {
590 	ibcm_arp_prwqn_t 	*wqnp;
591 	dl_unitdata_req_t *dlreq;
592 	ibcm_arp_streams_t *ib_s;
593 	char *cp;
594 	int rc;
595 
596 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_arp_query_ack(%p)", mp);
597 
598 	/*
599 	 * the first mblk contains the wqnp pointer for the request
600 	 */
601 	if (MBLKL(mp) != sizeof (void *)) {
602 		freemsg(mp);
603 		return;
604 	}
605 
606 	wqnp = *(ibcm_arp_prwqn_t **)(void *)mp->b_rptr; /* retrieve wqnp */
607 	ib_s = (ibcm_arp_streams_t *)wqnp->arg;
608 
609 	mutex_enter(&ib_s->lock);
610 
611 	/*
612 	 * cancel the timeout for this request
613 	 */
614 	(void) untimeout(wqnp->timeout_id);
615 
616 	/*
617 	 * sanity checks on the dl_unitdata_req block
618 	 */
619 	if (!mp->b_cont) {
620 		IBTF_DPRINTF_L2(cmlog, "areq_ack: b_cont = NULL\n");
621 		rc = EPROTO;
622 		goto user_callback;
623 	}
624 	if (MBLKL(mp->b_cont) < (sizeof (dl_unitdata_req_t) + IPOIB_ADDRL)) {
625 		IBTF_DPRINTF_L2(cmlog, "areq_ack: invalid len in "
626 		    "dl_unitdatareq_t block\n");
627 		rc = EPROTO;
628 		goto user_callback;
629 	}
630 	dlreq = (void *)mp->b_cont->b_rptr;
631 	if (dlreq->dl_primitive != DL_UNITDATA_REQ) {
632 		IBTF_DPRINTF_L2(cmlog, "areq_ack: invalid dl_primitive "
633 		    "in dl_unitdatareq_t block\n");
634 		rc = EPROTO;
635 		goto user_callback;
636 	}
637 	if (dlreq->dl_dest_addr_length != (IPOIB_ADDRL + 2)) {
638 		IBTF_DPRINTF_L2(cmlog, "areq_ack: invalid hw len in "
639 		    "dl_unitdatareq_t block %d\n", dlreq->dl_dest_addr_length);
640 		rc = EPROTO;
641 		goto user_callback;
642 	}
643 	cp = (char *)mp->b_cont->b_rptr + dlreq->dl_dest_addr_offset;
644 	bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL);
645 
646 	/*
647 	 * at this point we have src/dst gid's derived from the mac addresses
648 	 * now get the hca, port
649 	 */
650 	bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t));
651 	bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t));
652 	freemsg(mp);
653 
654 	IBCM_H2N_GID(wqnp->sgid);
655 	IBCM_H2N_GID(wqnp->dgid);
656 
657 	(void) ibcm_arp_add(wqnp);
658 
659 	mutex_exit(&ib_s->lock);
660 	ibcm_arp_pr_callback(wqnp, 0);
661 
662 	return;
663 user_callback:
664 	freemsg(mp);
665 	mutex_exit(&ib_s->lock);
666 
667 	/*
668 	 * indicate to user
669 	 */
670 	ibcm_arp_pr_callback(wqnp, rc);
671 }
672 
673 /*
674  * process a AR_ENTRY_SQUERY reply from arp
675  * the message should be M_IOCACK -->> area_t
676  */
677 static void
678 ibcm_arp_pr_arp_squery_ack(mblk_t *mp)
679 {
680 	struct iocblk *ioc;
681 	mblk_t	*mp1;
682 	ibcm_arp_prwqn_t 	*wqnp;
683 	ibcm_arp_streams_t *ib_s;
684 	area_t *areap;
685 	char *cp;
686 
687 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_arp_squery_ack(%p)", mp);
688 
689 	if (MBLKL(mp) < sizeof (struct iocblk)) {
690 		freemsg(mp);
691 		return;
692 	}
693 
694 	ioc = (void *)mp->b_rptr;
695 	if ((ioc->ioc_cmd != AR_ENTRY_SQUERY) || (mp->b_cont == NULL)) {
696 		freemsg(mp);
697 		return;
698 	}
699 
700 	mp1 = mp->b_cont;
701 
702 	wqnp = *(ibcm_arp_prwqn_t **)((uintptr_t)mp1->b_rptr -
703 	    sizeof (uintptr_t));
704 	ib_s = (ibcm_arp_streams_t *)wqnp->arg;
705 
706 	mutex_enter(&ib_s->lock);
707 
708 	/*
709 	 * cancel the timeout for this request
710 	 */
711 	(void) untimeout(wqnp->timeout_id);
712 
713 	/* If the entry was not in arp cache, ioc_error is set */
714 	if (ioc->ioc_error) {
715 
716 		/*
717 		 * send out AR_ENTRY_QUERY which would send
718 		 * arp-request on wire
719 		 */
720 		IBTF_DPRINTF_L3(cmlog, "Sending a Query_ARP");
721 
722 		(void) ibcm_arp_query_arp(wqnp);
723 		freemsg(mp);
724 		mutex_exit(&ib_s->lock);
725 		return;
726 	}
727 
728 	areap = (void *)mp1->b_rptr;
729 	cp = (char *)areap + areap->area_hw_addr_offset;
730 	bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL);
731 
732 	/*
733 	 * at this point we have src/dst gid's derived from the mac addresses
734 	 * now get the hca, port
735 	 */
736 	bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t));
737 	bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t));
738 	freemsg(mp);
739 
740 	IBCM_H2N_GID(wqnp->sgid);
741 	IBCM_H2N_GID(wqnp->dgid);
742 
743 	mutex_exit(&ib_s->lock);
744 	ibcm_arp_pr_callback(wqnp, 0);
745 }
746 
747 /*
748  * Process arp ack's.
749  */
750 void
751 ibcm_arp_pr_arp_ack(mblk_t *mp)
752 {
753 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_pr_arp_ack(0x%p, DB_TYPE %lX)",
754 	    mp, DB_TYPE(mp));
755 
756 	if (DB_TYPE(mp) == M_DATA) {
757 		ibcm_arp_pr_arp_query_ack(mp);
758 	} else if ((DB_TYPE(mp) == M_IOCACK) ||
759 	    (DB_TYPE(mp) == M_IOCNAK)) {
760 		ibcm_arp_pr_arp_squery_ack(mp);
761 	} else {
762 		freemsg(mp);
763 	}
764 }
765 
766 /*
767  * query the ipv6 driver cache for ipv6 to mac address mapping.
768  */
769 static int
770 ibcm_ipv6_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zoneid)
771 {
772 	ip2mac_t ip2m;
773 	sin6_t *sin6;
774 	ip2mac_id_t ip2mid;
775 	int err;
776 
777 	if (wqnp->src_addr.family != AF_INET6) {
778 		IBTF_DPRINTF_L2(cmlog, "ibcm_ipv6_lookup: SRC_ADDR NOT INET6: "
779 		    "%d", wqnp->src_addr.family);
780 		return (1);
781 	}
782 
783 	bzero(&ip2m, sizeof (ip2m));
784 	sin6 = (sin6_t *)&ip2m.ip2mac_pa;
785 	sin6->sin6_family = AF_INET6;
786 	sin6->sin6_addr = wqnp->dst_addr.un.ip6addr;
787 	ip2m.ip2mac_ifindex = ill->ill_phyint->phyint_ifindex;
788 
789 	wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
790 	wqnp->timeout_id = timeout(ibcm_arp_timeout, wqnp,
791 	    drv_usectohz(IBCM_ARP_TIMEOUT * 1000));
792 
793 	/*
794 	 * XXX XTBD set the scopeid?
795 	 * issue the request to IP for Neighbor Discovery
796 	 */
797 	ip2mid = ip2mac(IP2MAC_RESOLVE, &ip2m, ibcm_ipv6_resolver_ack, wqnp,
798 	    zoneid);
799 	err = ip2m.ip2mac_err;
800 	if (err == EINPROGRESS) {
801 		wqnp->ip2mac_id = ip2mid;
802 		wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
803 		err = 0;
804 	} else if (err == 0) {
805 		ibcm_ipv6_resolver_ack(&ip2m, wqnp);
806 	}
807 	return (err);
808 }
809 
810 /*
811  * do sanity checks on the link-level sockaddr
812  */
813 static boolean_t
814 ibcm_check_sockdl(struct sockaddr_dl *sdl)
815 {
816 
817 	if (sdl->sdl_type != IFT_IB || sdl->sdl_alen != IPOIB_ADDRL)
818 		return (B_FALSE);
819 
820 	return (B_TRUE);
821 }
822 
823 /*
824  * callback for resolver lookups, both for success and failure.
825  * If Address resolution was succesful: return GID info.
826  */
827 static void
828 ibcm_ipv6_resolver_ack(ip2mac_t *ip2macp, void *arg)
829 {
830 	ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg;
831 	ibcm_arp_streams_t *ib_s;
832 	uchar_t *cp;
833 	int err = 0;
834 
835 	IBTF_DPRINTF_L4(cmlog, "ibcm_ipv6_resolver_ack(%p, %p)", ip2macp, wqnp);
836 
837 	ib_s = (ibcm_arp_streams_t *)wqnp->arg;
838 	mutex_enter(&ib_s->lock);
839 
840 	/*
841 	 * cancel the timeout for this request
842 	 */
843 	(void) untimeout(wqnp->timeout_id);
844 
845 	if (ip2macp->ip2mac_err != 0) {
846 		wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING;
847 		cv_broadcast(&ib_s->cv);
848 		err = EHOSTUNREACH;
849 		goto user_callback;
850 	}
851 
852 	if (!ibcm_check_sockdl(&ip2macp->ip2mac_ha)) {
853 		IBTF_DPRINTF_L2(cmlog, "ibcm_ipv6_resolver_ack: Error: "
854 		    "interface %s is not IB\n", wqnp->ifname);
855 		err = EHOSTUNREACH;
856 		goto user_callback;
857 	}
858 
859 	cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha);
860 	bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL);
861 
862 	/*
863 	 * at this point we have src/dst gid's derived from the mac addresses
864 	 * now get the hca, port
865 	 */
866 	bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t));
867 	bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t));
868 
869 	IBCM_H2N_GID(wqnp->sgid);
870 	IBCM_H2N_GID(wqnp->dgid);
871 
872 user_callback:
873 	mutex_exit(&ib_s->lock);
874 	ibcm_arp_pr_callback(wqnp, err);
875 }
876