1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <net/if.h>
28 #include <net/if_types.h>
29 #include <inet/ip.h>
30 #include <inet/ip_ire.h>
31 #include <inet/ip_if.h>
32 #include <sys/ethernet.h>
33 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
34 
35 extern char cmlog[];
36 
37 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_streams_t))
38 
39 static void ibcm_resolver_ack(ip2mac_t *, void *);
40 static int ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zid);
41 
42 /*
43  * delete a wait queue node from the list.
44  * assumes mutex is acquired
45  */
46 void
47 ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp)
48 {
49 	ibcm_arp_streams_t *ib_s;
50 
51 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_delete_prwqn(%p)", wqnp);
52 
53 	ib_s = wqnp->ib_str;
54 	ib_s->wqnp = NULL;
55 	kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t));
56 }
57 
58 /*
59  * allocate a wait queue node, and insert it in the list
60  */
61 static ibcm_arp_prwqn_t *
62 ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
63     ibt_ip_addr_t *src_addr)
64 {
65 	ibcm_arp_prwqn_t *wqnp;
66 
67 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s);
68 
69 	if (dst_addr == NULL) {
70 		return (NULL);
71 	}
72 	if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) ==
73 	    NULL) {
74 		return (NULL);
75 	}
76 	wqnp->dst_addr = *dst_addr;
77 
78 	if (src_addr) {
79 		wqnp->usrc_addr = *src_addr;
80 	}
81 	wqnp->ib_str = ib_s;
82 	wqnp->ifproto = (dst_addr->family == AF_INET) ?
83 	    ETHERTYPE_IP : ETHERTYPE_IPV6;
84 
85 	ib_s->wqnp = wqnp;
86 
87 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp);
88 
89 	return (wqnp);
90 }
91 
92 
93 /*
94  * Check if the interface is loopback or IB.
95  */
96 static int
97 ibcm_arp_check_interface(ill_t *ill)
98 {
99 	if (IS_LOOPBACK(ill) || ill->ill_type == IFT_IB)
100 		return (0);
101 
102 	return (ETIMEDOUT);
103 }
104 
105 int
106 ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
107     ibt_ip_addr_t *src_addr)
108 {
109 	ibcm_arp_prwqn_t *wqnp;
110 	ire_t	*ire = NULL;
111 	ipif_t	*ipif = NULL;
112 	ill_t	*ill = NULL;
113 	ill_t	*hwaddr_ill = NULL;
114 	ip_stack_t *ipst;
115 	int		len;
116 	ipaddr_t	setsrcv4;
117 	in6_addr_t	setsrcv6;
118 
119 	IBCM_PRINT_IP("ibcm_arp_pr_lookup: SRC", src_addr);
120 	IBCM_PRINT_IP("ibcm_arp_pr_lookup: DST", dst_addr);
121 
122 	if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr, src_addr)) == NULL) {
123 		IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
124 		    "ibcm_arp_create_prwqn failed");
125 		ib_s->status = ENOMEM;
126 		return (1);
127 	}
128 
129 	ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip;
130 	if (dst_addr->family == AF_INET) {
131 		/*
132 		 * A local address is always specified, and it is used
133 		 * to find the zoneid.
134 		 */
135 		ipif = ipif_lookup_addr(src_addr->un.ip4addr, NULL, ALL_ZONES,
136 		    ipst);
137 		if (ipif == NULL) {
138 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
139 			    "ipif_lookup_addr failed");
140 			ib_s->status = EFAULT;
141 			goto fail;
142 		}
143 
144 		/*
145 		 * get an ire for the destination adress.
146 		 * Note that we can't use MATCH_IRE_ILL since that would
147 		 * require that the first ill we find have ire_ill set. Thus
148 		 * we compare ire_ill against ipif_ill after the lookup.
149 		 */
150 		setsrcv4 = INADDR_ANY;
151 		ire = ire_route_recursive_v4(dst_addr->un.ip4addr, 0, NULL,
152 		    ipif->ipif_zoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
153 		    &setsrcv4, NULL, NULL);
154 
155 		ASSERT(ire != NULL);
156 		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
157 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
158 			    "ire_route_recursive_v4 failed");
159 			ib_s->status = EFAULT;
160 			goto fail;
161 		}
162 		ill = ire_nexthop_ill(ire);
163 		if (ill == NULL) {
164 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
165 			    "ire_nexthop_ill failed");
166 			ib_s->status = EFAULT;
167 			goto fail;
168 		}
169 		if (ill != ipif->ipif_ill) {
170 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
171 			    "wrong ill");
172 			ib_s->status = EFAULT;
173 			goto fail;
174 		}
175 
176 		wqnp->gateway.un.ip4addr = ire->ire_gateway_addr;
177 		wqnp->netmask.un.ip4addr = ire->ire_mask;
178 		wqnp->src_addr.un.ip4addr = src_addr->un.ip4addr;
179 		wqnp->src_addr.family = wqnp->gateway.family =
180 		    wqnp->netmask.family = AF_INET;
181 
182 	} else if (dst_addr->family == AF_INET6) {
183 		/*
184 		 * A local address is always specified, and it is used
185 		 * to find the zoneid.
186 		 * We should really match on scopeid for link locals here.
187 		 */
188 		ipif = ipif_lookup_addr_v6(&src_addr->un.ip6addr, NULL,
189 		    ALL_ZONES, ipst);
190 		if (ipif == NULL) {
191 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
192 			    "ipif_lookup_addr_v6 failed");
193 			ib_s->status = EFAULT;
194 			goto fail;
195 		}
196 
197 		/*
198 		 * get an ire for the destination adress.
199 		 * Note that we can't use MATCH_IRE_ILL since that would
200 		 * require that the first ill we find have ire_ill set. Thus
201 		 * we compare ire_ill against ipif_ill after the lookup.
202 		 */
203 		setsrcv6 = ipv6_all_zeros;
204 		ire = ire_route_recursive_v6(&dst_addr->un.ip6addr, 0, NULL,
205 		    ipif->ipif_zoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
206 		    &setsrcv6, NULL, NULL);
207 
208 		ASSERT(ire != NULL);
209 		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
210 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
211 			    "ire_route_recursive_v6 failed");
212 			ib_s->status = EFAULT;
213 			goto fail;
214 		}
215 		ill = ire_nexthop_ill(ire);
216 		if (ill == NULL) {
217 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
218 			    "ire_nexthop_ill failed");
219 			ib_s->status = EFAULT;
220 			goto fail;
221 		}
222 
223 		if (ill != ipif->ipif_ill) {
224 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
225 			    "wrong ill");
226 			ib_s->status = EFAULT;
227 			goto fail;
228 		}
229 
230 		wqnp->gateway.un.ip6addr = ire->ire_gateway_addr_v6;
231 		wqnp->netmask.un.ip6addr = ire->ire_mask_v6;
232 		wqnp->src_addr.un.ip6addr = src_addr->un.ip6addr;
233 		wqnp->src_addr.family = wqnp->gateway.family =
234 		    wqnp->netmask.family = AF_INET6;
235 	}
236 
237 	(void) strlcpy(wqnp->ifname, ill->ill_name, sizeof (wqnp->ifname));
238 
239 	/*
240 	 * For IPMP data addresses, we need to use the hardware address of the
241 	 * interface bound to the given address.
242 	 */
243 	if (IS_IPMP(ill)) {
244 		if ((hwaddr_ill = ipmp_ipif_hold_bound_ill(ipif)) == NULL) {
245 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
246 			    "no bound ill for IPMP interface %s",
247 			    ill->ill_name);
248 			ib_s->status = EFAULT;
249 			goto fail;
250 		}
251 	} else {
252 		hwaddr_ill = ill;
253 		ill_refhold(hwaddr_ill);	/* for symmetry */
254 	}
255 
256 	if ((ib_s->status = ibcm_arp_check_interface(hwaddr_ill)) != 0) {
257 		IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
258 		    "ibcm_arp_check_interface failed");
259 		goto fail;
260 	}
261 
262 	bcopy(hwaddr_ill->ill_phys_addr, &wqnp->src_mac,
263 	    hwaddr_ill->ill_phys_addr_length);
264 
265 	IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: outgoing if:%s",
266 	    wqnp->ifname);
267 
268 	/*
269 	 * if the user supplied a address, then verify rts returned
270 	 * the same address
271 	 */
272 	if (wqnp->usrc_addr.family) {
273 		len = (wqnp->usrc_addr.family == AF_INET) ?
274 		    IP_ADDR_LEN : sizeof (in6_addr_t);
275 		if (bcmp(&wqnp->usrc_addr.un, &wqnp->src_addr.un, len)) {
276 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
277 			    "srcaddr mismatch:%d", ENETUNREACH);
278 			goto fail;
279 		}
280 	}
281 
282 	/*
283 	 * at this stage, we have the source address and the IB
284 	 * interface, now get the destination mac address from
285 	 * arp or ipv6 drivers
286 	 */
287 	ib_s->status = ibcm_nce_lookup(wqnp, ill, getzoneid());
288 	if (ib_s->status != 0) {
289 		IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
290 		    "ibcm_nce_lookup failed: %d", ib_s->status);
291 		goto fail;
292 	}
293 
294 	ill_refrele(hwaddr_ill);
295 	ill_refrele(ill);
296 	ire_refrele(ire);
297 	ipif_refrele(ipif);
298 	netstack_rele(ipst->ips_netstack);
299 
300 	IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: Return: 0x%p", wqnp);
301 	return (0);
302 fail:
303 	if (hwaddr_ill != NULL)
304 		ill_refrele(hwaddr_ill);
305 	if (ill != NULL)
306 		ill_refrele(ill);
307 	if (ire != NULL)
308 		ire_refrele(ire);
309 	if (ipif != NULL)
310 		ipif_refrele(ipif);
311 	ibcm_arp_delete_prwqn(wqnp);
312 	netstack_rele(ipst->ips_netstack);
313 	return (1);
314 }
315 
316 /*
317  * Query the neighbor cache for IPv4/IPv6 to mac address mapping.
318  */
319 static int
320 ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zoneid)
321 {
322 	ip2mac_t	ip2m;
323 	sin_t		*sin;
324 	sin6_t		*sin6;
325 	ip2mac_id_t	ip2mid;
326 	int		err;
327 
328 	if (wqnp->src_addr.family != wqnp->dst_addr.family) {
329 		IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Mis-match SRC_ADDR "
330 		    "Family: %d, DST_ADDR Family %d", wqnp->src_addr.family,
331 		    wqnp->dst_addr.family);
332 		return (1);
333 	}
334 	bzero(&ip2m, sizeof (ip2m));
335 
336 	if (wqnp->dst_addr.family == AF_INET) {
337 		sin = (sin_t *)&ip2m.ip2mac_pa;
338 		sin->sin_family = AF_INET;
339 		sin->sin_addr.s_addr = wqnp->dst_addr.un.ip4addr;
340 	} else if (wqnp->dst_addr.family == AF_INET6) {
341 		sin6 = (sin6_t *)&ip2m.ip2mac_pa;
342 		sin6->sin6_family = AF_INET6;
343 		sin6->sin6_addr = wqnp->dst_addr.un.ip6addr;
344 	} else {
345 		IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Invalid DST_ADDR "
346 		    "Family: %d", wqnp->dst_addr.family);
347 		return (1);
348 	}
349 
350 	ip2m.ip2mac_ifindex = ill->ill_phyint->phyint_ifindex;
351 
352 	wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
353 
354 	/*
355 	 * issue the request to IP for Neighbor Discovery
356 	 */
357 	ip2mid = ip2mac(IP2MAC_RESOLVE, &ip2m, ibcm_resolver_ack, wqnp,
358 	    zoneid);
359 	err = ip2m.ip2mac_err;
360 	if (err == EINPROGRESS) {
361 		wqnp->ip2mac_id = ip2mid;
362 		wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
363 		err = 0;
364 	} else if (err == 0) {
365 		ibcm_resolver_ack(&ip2m, wqnp);
366 	}
367 	return (err);
368 }
369 
370 /*
371  * do sanity checks on the link-level sockaddr
372  */
373 static boolean_t
374 ibcm_check_sockdl(struct sockaddr_dl *sdl)
375 {
376 
377 	if (sdl->sdl_type != IFT_IB || sdl->sdl_alen != IPOIB_ADDRL)
378 		return (B_FALSE);
379 
380 	return (B_TRUE);
381 }
382 
383 /*
384  * callback for resolver lookups, both for success and failure.
385  * If Address resolution was succesful: return GID info.
386  */
387 static void
388 ibcm_resolver_ack(ip2mac_t *ip2macp, void *arg)
389 {
390 	ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg;
391 	ibcm_arp_streams_t *ib_s;
392 	uchar_t *cp;
393 	int err = 0;
394 
395 	IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_ack(%p, %p)", ip2macp, wqnp);
396 
397 	ib_s = wqnp->ib_str;
398 	mutex_enter(&ib_s->lock);
399 
400 	if (ip2macp->ip2mac_err != 0) {
401 		wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING;
402 		cv_broadcast(&ib_s->cv);
403 		err = EHOSTUNREACH;
404 		goto user_callback;
405 	}
406 
407 	if (!ibcm_check_sockdl(&ip2macp->ip2mac_ha)) {
408 		IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_ack: Error: "
409 		    "interface %s is not IB\n", wqnp->ifname);
410 		err = EHOSTUNREACH;
411 		goto user_callback;
412 	}
413 
414 	cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha);
415 	bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL);
416 
417 	/*
418 	 * at this point we have src/dst gid's derived from the mac addresses
419 	 * now get the hca, port
420 	 */
421 	bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t));
422 	bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t));
423 
424 	IBCM_H2N_GID(wqnp->sgid);
425 	IBCM_H2N_GID(wqnp->dgid);
426 
427 user_callback:
428 
429 	ib_s->status = err;
430 	ib_s->done = B_TRUE;
431 
432 	/* lock is held by the caller. */
433 	cv_signal(&ib_s->cv);
434 	mutex_exit(&ib_s->lock);
435 }
436