xref: /illumos-gate/usr/src/uts/common/io/ib/mgt/ibcm/ibcm_arp_link.c (revision bd670b35a010421b6e1a5536c34453a827007c81)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <net/if.h>
28 #include <net/if_types.h>
29 #include <inet/ip.h>
30 #include <inet/ip_ire.h>
31 #include <inet/ip_if.h>
32 #include <sys/ib/mgt/ibcm/ibcm_arp.h>
33 
34 extern char cmlog[];
35 
36 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_streams_t))
37 
38 static void ibcm_resolver_ack(ip2mac_t *, void *);
39 static int ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zid);
40 
41 /*
42  * delete a wait queue node from the list.
43  * assumes mutex is acquired
44  */
45 void
46 ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp)
47 {
48 	ibcm_arp_streams_t *ib_s;
49 
50 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_delete_prwqn(%p)", wqnp);
51 
52 	ib_s = wqnp->ib_str;
53 	ib_s->wqnp = NULL;
54 	kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t));
55 }
56 
57 /*
58  * allocate a wait queue node, and insert it in the list
59  */
60 static ibcm_arp_prwqn_t *
61 ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
62     ibt_ip_addr_t *src_addr)
63 {
64 	ibcm_arp_prwqn_t *wqnp;
65 
66 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s);
67 
68 	if (dst_addr == NULL) {
69 		return (NULL);
70 	}
71 	if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) ==
72 	    NULL) {
73 		return (NULL);
74 	}
75 	wqnp->dst_addr = *dst_addr;
76 
77 	if (src_addr) {
78 		wqnp->usrc_addr = *src_addr;
79 	}
80 	wqnp->ib_str = ib_s;
81 	wqnp->ifproto = (dst_addr->family == AF_INET) ?
82 	    ETHERTYPE_IP : ETHERTYPE_IPV6;
83 
84 	ib_s->wqnp = wqnp;
85 
86 	IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp);
87 
88 	return (wqnp);
89 }
90 
91 
92 /*
93  * Check if the interface is loopback or IB.
94  */
95 static int
96 ibcm_arp_check_interface(ill_t *ill)
97 {
98 	if (IS_LOOPBACK(ill) || ill->ill_type == IFT_IB)
99 		return (0);
100 
101 	return (ETIMEDOUT);
102 }
103 
104 int
105 ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr,
106     ibt_ip_addr_t *src_addr)
107 {
108 	ibcm_arp_prwqn_t *wqnp;
109 	ire_t	*ire = NULL;
110 	ipif_t	*ipif = NULL;
111 	ill_t	*ill = NULL;
112 	ill_t	*hwaddr_ill = NULL;
113 	ip_stack_t *ipst;
114 	int		len;
115 	ipaddr_t	setsrcv4;
116 	in6_addr_t	setsrcv6;
117 
118 	IBCM_PRINT_IP("ibcm_arp_pr_lookup: SRC", src_addr);
119 	IBCM_PRINT_IP("ibcm_arp_pr_lookup: DST", dst_addr);
120 
121 	if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr, src_addr)) == NULL) {
122 		IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
123 		    "ibcm_arp_create_prwqn failed");
124 		ib_s->status = ENOMEM;
125 		return (1);
126 	}
127 
128 	ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip;
129 	if (dst_addr->family == AF_INET) {
130 		/*
131 		 * A local address is always specified, and it is used
132 		 * to find the zoneid.
133 		 */
134 		ipif = ipif_lookup_addr(src_addr->un.ip4addr, NULL, ALL_ZONES,
135 		    ipst);
136 		if (ipif == NULL) {
137 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
138 			    "ipif_lookup_addr failed");
139 			ib_s->status = EFAULT;
140 			goto fail;
141 		}
142 
143 		/*
144 		 * get an ire for the destination adress.
145 		 * Note that we can't use MATCH_IRE_ILL since that would
146 		 * require that the first ill we find have ire_ill set. Thus
147 		 * we compare ire_ill against ipif_ill after the lookup.
148 		 */
149 		setsrcv4 = INADDR_ANY;
150 		ire = ire_route_recursive_v4(dst_addr->un.ip4addr, 0, NULL,
151 		    ipif->ipif_zoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
152 		    &setsrcv4, NULL, NULL);
153 
154 		ASSERT(ire != NULL);
155 		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
156 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
157 			    "ire_route_recursive_v4 failed");
158 			ib_s->status = EFAULT;
159 			goto fail;
160 		}
161 		ill = ire_nexthop_ill(ire);
162 		if (ill == NULL) {
163 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
164 			    "ire_nexthop_ill failed");
165 			ib_s->status = EFAULT;
166 			goto fail;
167 		}
168 		if (ill != ipif->ipif_ill) {
169 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
170 			    "wrong ill");
171 			ib_s->status = EFAULT;
172 			goto fail;
173 		}
174 
175 		wqnp->gateway.un.ip4addr = ire->ire_gateway_addr;
176 		wqnp->netmask.un.ip4addr = ire->ire_mask;
177 		wqnp->src_addr.un.ip4addr = src_addr->un.ip4addr;
178 		wqnp->src_addr.family = wqnp->gateway.family =
179 		    wqnp->netmask.family = AF_INET;
180 
181 	} else if (dst_addr->family == AF_INET6) {
182 		/*
183 		 * A local address is always specified, and it is used
184 		 * to find the zoneid.
185 		 * We should really match on scopeid for link locals here.
186 		 */
187 		ipif = ipif_lookup_addr_v6(&src_addr->un.ip6addr, NULL,
188 		    ALL_ZONES, ipst);
189 		if (ipif == NULL) {
190 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
191 			    "ipif_lookup_addr_v6 failed");
192 			ib_s->status = EFAULT;
193 			goto fail;
194 		}
195 
196 		/*
197 		 * get an ire for the destination adress.
198 		 * Note that we can't use MATCH_IRE_ILL since that would
199 		 * require that the first ill we find have ire_ill set. Thus
200 		 * we compare ire_ill against ipif_ill after the lookup.
201 		 */
202 		setsrcv6 = ipv6_all_zeros;
203 		ire = ire_route_recursive_v6(&dst_addr->un.ip6addr, 0, NULL,
204 		    ipif->ipif_zoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst,
205 		    &setsrcv6, NULL, NULL);
206 
207 		ASSERT(ire != NULL);
208 		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
209 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
210 			    "ire_route_recursive_v6 failed");
211 			ib_s->status = EFAULT;
212 			goto fail;
213 		}
214 		ill = ire_nexthop_ill(ire);
215 		if (ill == NULL) {
216 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
217 			    "ire_nexthop_ill failed");
218 			ib_s->status = EFAULT;
219 			goto fail;
220 		}
221 
222 		if (ill != ipif->ipif_ill) {
223 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
224 			    "wrong ill");
225 			ib_s->status = EFAULT;
226 			goto fail;
227 		}
228 
229 		wqnp->gateway.un.ip6addr = ire->ire_gateway_addr_v6;
230 		wqnp->netmask.un.ip6addr = ire->ire_mask_v6;
231 		wqnp->src_addr.un.ip6addr = src_addr->un.ip6addr;
232 		wqnp->src_addr.family = wqnp->gateway.family =
233 		    wqnp->netmask.family = AF_INET6;
234 	}
235 
236 	(void) strlcpy(wqnp->ifname, ill->ill_name, sizeof (wqnp->ifname));
237 
238 	/*
239 	 * For IPMP data addresses, we need to use the hardware address of the
240 	 * interface bound to the given address.
241 	 */
242 	if (IS_IPMP(ill)) {
243 		if ((hwaddr_ill = ipmp_ipif_hold_bound_ill(ipif)) == NULL) {
244 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
245 			    "no bound ill for IPMP interface %s",
246 			    ill->ill_name);
247 			ib_s->status = EFAULT;
248 			goto fail;
249 		}
250 	} else {
251 		hwaddr_ill = ill;
252 		ill_refhold(hwaddr_ill);	/* for symmetry */
253 	}
254 
255 	if ((ib_s->status = ibcm_arp_check_interface(hwaddr_ill)) != 0) {
256 		IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
257 		    "ibcm_arp_check_interface failed");
258 		goto fail;
259 	}
260 
261 	bcopy(hwaddr_ill->ill_phys_addr, &wqnp->src_mac,
262 	    hwaddr_ill->ill_phys_addr_length);
263 
264 	IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: outgoing if:%s",
265 	    wqnp->ifname);
266 
267 	/*
268 	 * if the user supplied a address, then verify rts returned
269 	 * the same address
270 	 */
271 	if (wqnp->usrc_addr.family) {
272 		len = (wqnp->usrc_addr.family == AF_INET) ?
273 		    IP_ADDR_LEN : sizeof (in6_addr_t);
274 		if (bcmp(&wqnp->usrc_addr.un, &wqnp->src_addr.un, len)) {
275 			IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
276 			    "srcaddr mismatch:%d", ENETUNREACH);
277 			goto fail;
278 		}
279 	}
280 
281 	/*
282 	 * at this stage, we have the source address and the IB
283 	 * interface, now get the destination mac address from
284 	 * arp or ipv6 drivers
285 	 */
286 	ib_s->status = ibcm_nce_lookup(wqnp, ill, getzoneid());
287 	if (ib_s->status != 0) {
288 		IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: "
289 		    "ibcm_nce_lookup failed: %d", ib_s->status);
290 		goto fail;
291 	}
292 
293 	ill_refrele(hwaddr_ill);
294 	ill_refrele(ill);
295 	ire_refrele(ire);
296 	ipif_refrele(ipif);
297 	netstack_rele(ipst->ips_netstack);
298 
299 	IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: Return: 0x%p", wqnp);
300 	return (0);
301 fail:
302 	if (hwaddr_ill != NULL)
303 		ill_refrele(hwaddr_ill);
304 	if (ill != NULL)
305 		ill_refrele(ill);
306 	if (ire != NULL)
307 		ire_refrele(ire);
308 	if (ipif != NULL)
309 		ipif_refrele(ipif);
310 	ibcm_arp_delete_prwqn(wqnp);
311 	netstack_rele(ipst->ips_netstack);
312 	return (1);
313 }
314 
315 /*
316  * Query the neighbor cache for IPv4/IPv6 to mac address mapping.
317  */
318 static int
319 ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zoneid)
320 {
321 	ip2mac_t	ip2m;
322 	sin_t		*sin;
323 	sin6_t		*sin6;
324 	ip2mac_id_t	ip2mid;
325 	int		err;
326 
327 	if (wqnp->src_addr.family != wqnp->dst_addr.family) {
328 		IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Mis-match SRC_ADDR "
329 		    "Family: %d, DST_ADDR Family %d", wqnp->src_addr.family,
330 		    wqnp->dst_addr.family);
331 		return (1);
332 	}
333 	bzero(&ip2m, sizeof (ip2m));
334 
335 	if (wqnp->dst_addr.family == AF_INET) {
336 		sin = (sin_t *)&ip2m.ip2mac_pa;
337 		sin->sin_family = AF_INET;
338 		sin->sin_addr.s_addr = wqnp->dst_addr.un.ip4addr;
339 	} else if (wqnp->dst_addr.family == AF_INET6) {
340 		sin6 = (sin6_t *)&ip2m.ip2mac_pa;
341 		sin6->sin6_family = AF_INET6;
342 		sin6->sin6_addr = wqnp->dst_addr.un.ip6addr;
343 	} else {
344 		IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Invalid DST_ADDR "
345 		    "Family: %d", wqnp->dst_addr.family);
346 		return (1);
347 	}
348 
349 	ip2m.ip2mac_ifindex = ill->ill_phyint->phyint_ifindex;
350 
351 	wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
352 
353 	/*
354 	 * issue the request to IP for Neighbor Discovery
355 	 */
356 	ip2mid = ip2mac(IP2MAC_RESOLVE, &ip2m, ibcm_resolver_ack, wqnp,
357 	    zoneid);
358 	err = ip2m.ip2mac_err;
359 	if (err == EINPROGRESS) {
360 		wqnp->ip2mac_id = ip2mid;
361 		wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING;
362 		err = 0;
363 	} else if (err == 0) {
364 		ibcm_resolver_ack(&ip2m, wqnp);
365 	}
366 	return (err);
367 }
368 
369 /*
370  * do sanity checks on the link-level sockaddr
371  */
372 static boolean_t
373 ibcm_check_sockdl(struct sockaddr_dl *sdl)
374 {
375 
376 	if (sdl->sdl_type != IFT_IB || sdl->sdl_alen != IPOIB_ADDRL)
377 		return (B_FALSE);
378 
379 	return (B_TRUE);
380 }
381 
382 /*
383  * callback for resolver lookups, both for success and failure.
384  * If Address resolution was succesful: return GID info.
385  */
386 static void
387 ibcm_resolver_ack(ip2mac_t *ip2macp, void *arg)
388 {
389 	ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg;
390 	ibcm_arp_streams_t *ib_s;
391 	uchar_t *cp;
392 	int err = 0;
393 
394 	IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_ack(%p, %p)", ip2macp, wqnp);
395 
396 	ib_s = wqnp->ib_str;
397 	mutex_enter(&ib_s->lock);
398 
399 	if (ip2macp->ip2mac_err != 0) {
400 		wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING;
401 		cv_broadcast(&ib_s->cv);
402 		err = EHOSTUNREACH;
403 		goto user_callback;
404 	}
405 
406 	if (!ibcm_check_sockdl(&ip2macp->ip2mac_ha)) {
407 		IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_ack: Error: "
408 		    "interface %s is not IB\n", wqnp->ifname);
409 		err = EHOSTUNREACH;
410 		goto user_callback;
411 	}
412 
413 	cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha);
414 	bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL);
415 
416 	/*
417 	 * at this point we have src/dst gid's derived from the mac addresses
418 	 * now get the hca, port
419 	 */
420 	bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t));
421 	bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t));
422 
423 	IBCM_H2N_GID(wqnp->sgid);
424 	IBCM_H2N_GID(wqnp->dgid);
425 
426 user_callback:
427 
428 	ib_s->status = err;
429 	ib_s->done = B_TRUE;
430 
431 	/* lock is held by the caller. */
432 	cv_signal(&ib_s->cv);
433 	mutex_exit(&ib_s->lock);
434 }
435