1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <net/if.h> 28 #include <net/if_types.h> 29 #include <inet/ip.h> 30 #include <inet/ip_ire.h> 31 #include <inet/ip_if.h> 32 #include <sys/ethernet.h> 33 #include <sys/ib/mgt/ibcm/ibcm_arp.h> 34 35 extern char cmlog[]; 36 37 _NOTE(SCHEME_PROTECTS_DATA("Unshared data", ibcm_arp_streams_t)) 38 39 static void ibcm_resolver_ack(ip2mac_t *, void *); 40 static int ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zid); 41 42 /* 43 * delete a wait queue node from the list. 44 * assumes mutex is acquired 45 */ 46 void 47 ibcm_arp_delete_prwqn(ibcm_arp_prwqn_t *wqnp) 48 { 49 ibcm_arp_streams_t *ib_s; 50 51 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_delete_prwqn(%p)", wqnp); 52 53 ib_s = wqnp->ib_str; 54 ib_s->wqnp = NULL; 55 kmem_free(wqnp, sizeof (ibcm_arp_prwqn_t)); 56 } 57 58 /* 59 * allocate a wait queue node, and insert it in the list 60 */ 61 static ibcm_arp_prwqn_t * 62 ibcm_arp_create_prwqn(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr, 63 ibt_ip_addr_t *src_addr) 64 { 65 ibcm_arp_prwqn_t *wqnp; 66 67 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn(ib_s: 0x%p)", ib_s); 68 69 if (dst_addr == NULL) { 70 return (NULL); 71 } 72 if ((wqnp = kmem_zalloc(sizeof (ibcm_arp_prwqn_t), KM_NOSLEEP)) == 73 NULL) { 74 return (NULL); 75 } 76 wqnp->dst_addr = *dst_addr; 77 78 if (src_addr) { 79 wqnp->usrc_addr = *src_addr; 80 } 81 wqnp->ib_str = ib_s; 82 wqnp->ifproto = (dst_addr->family == AF_INET) ? 83 ETHERTYPE_IP : ETHERTYPE_IPV6; 84 85 ib_s->wqnp = wqnp; 86 87 IBTF_DPRINTF_L4(cmlog, "ibcm_arp_create_prwqn: Return wqnp: %p", wqnp); 88 89 return (wqnp); 90 } 91 92 93 /* 94 * Check if the interface is loopback or IB. 95 */ 96 static int 97 ibcm_arp_check_interface(ill_t *ill) 98 { 99 if (IS_LOOPBACK(ill) || ill->ill_type == IFT_IB) 100 return (0); 101 102 return (ETIMEDOUT); 103 } 104 105 int 106 ibcm_resolver_pr_lookup(ibcm_arp_streams_t *ib_s, ibt_ip_addr_t *dst_addr, 107 ibt_ip_addr_t *src_addr) 108 { 109 ibcm_arp_prwqn_t *wqnp; 110 ire_t *ire = NULL; 111 ipif_t *ipif = NULL; 112 ill_t *ill = NULL; 113 ill_t *hwaddr_ill = NULL; 114 ip_stack_t *ipst; 115 int len; 116 ipaddr_t setsrcv4; 117 in6_addr_t setsrcv6; 118 119 IBCM_PRINT_IP("ibcm_arp_pr_lookup: SRC", src_addr); 120 IBCM_PRINT_IP("ibcm_arp_pr_lookup: DST", dst_addr); 121 122 if ((wqnp = ibcm_arp_create_prwqn(ib_s, dst_addr, src_addr)) == NULL) { 123 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 124 "ibcm_arp_create_prwqn failed"); 125 ib_s->status = ENOMEM; 126 return (1); 127 } 128 129 ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip; 130 if (dst_addr->family == AF_INET) { 131 /* 132 * A local address is always specified, and it is used 133 * to find the zoneid. 134 */ 135 ipif = ipif_lookup_addr(src_addr->un.ip4addr, NULL, ALL_ZONES, 136 ipst); 137 if (ipif == NULL) { 138 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 139 "ipif_lookup_addr failed"); 140 ib_s->status = EFAULT; 141 goto fail; 142 } 143 144 /* 145 * get an ire for the destination adress. 146 * Note that we can't use MATCH_IRE_ILL since that would 147 * require that the first ill we find have ire_ill set. Thus 148 * we compare ire_ill against ipif_ill after the lookup. 149 */ 150 setsrcv4 = INADDR_ANY; 151 ire = ire_route_recursive_v4(dst_addr->un.ip4addr, 0, NULL, 152 ipif->ipif_zoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst, 153 &setsrcv4, NULL, NULL); 154 155 ASSERT(ire != NULL); 156 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 157 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 158 "ire_route_recursive_v4 failed"); 159 ib_s->status = EFAULT; 160 goto fail; 161 } 162 ill = ire_nexthop_ill(ire); 163 if (ill == NULL) { 164 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 165 "ire_nexthop_ill failed"); 166 ib_s->status = EFAULT; 167 goto fail; 168 } 169 if (ill != ipif->ipif_ill) { 170 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 171 "wrong ill"); 172 ib_s->status = EFAULT; 173 goto fail; 174 } 175 176 wqnp->gateway.un.ip4addr = ire->ire_gateway_addr; 177 wqnp->netmask.un.ip4addr = ire->ire_mask; 178 wqnp->src_addr.un.ip4addr = src_addr->un.ip4addr; 179 wqnp->src_addr.family = wqnp->gateway.family = 180 wqnp->netmask.family = AF_INET; 181 182 } else if (dst_addr->family == AF_INET6) { 183 /* 184 * A local address is always specified, and it is used 185 * to find the zoneid. 186 * We should really match on scopeid for link locals here. 187 */ 188 ipif = ipif_lookup_addr_v6(&src_addr->un.ip6addr, NULL, 189 ALL_ZONES, ipst); 190 if (ipif == NULL) { 191 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 192 "ipif_lookup_addr_v6 failed"); 193 ib_s->status = EFAULT; 194 goto fail; 195 } 196 197 /* 198 * get an ire for the destination adress. 199 * Note that we can't use MATCH_IRE_ILL since that would 200 * require that the first ill we find have ire_ill set. Thus 201 * we compare ire_ill against ipif_ill after the lookup. 202 */ 203 setsrcv6 = ipv6_all_zeros; 204 ire = ire_route_recursive_v6(&dst_addr->un.ip6addr, 0, NULL, 205 ipif->ipif_zoneid, NULL, MATCH_IRE_DSTONLY, B_TRUE, 0, ipst, 206 &setsrcv6, NULL, NULL); 207 208 ASSERT(ire != NULL); 209 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 210 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 211 "ire_route_recursive_v6 failed"); 212 ib_s->status = EFAULT; 213 goto fail; 214 } 215 ill = ire_nexthop_ill(ire); 216 if (ill == NULL) { 217 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 218 "ire_nexthop_ill failed"); 219 ib_s->status = EFAULT; 220 goto fail; 221 } 222 223 if (ill != ipif->ipif_ill) { 224 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 225 "wrong ill"); 226 ib_s->status = EFAULT; 227 goto fail; 228 } 229 230 wqnp->gateway.un.ip6addr = ire->ire_gateway_addr_v6; 231 wqnp->netmask.un.ip6addr = ire->ire_mask_v6; 232 wqnp->src_addr.un.ip6addr = src_addr->un.ip6addr; 233 wqnp->src_addr.family = wqnp->gateway.family = 234 wqnp->netmask.family = AF_INET6; 235 } 236 237 (void) strlcpy(wqnp->ifname, ill->ill_name, sizeof (wqnp->ifname)); 238 239 /* 240 * For IPMP data addresses, we need to use the hardware address of the 241 * interface bound to the given address. 242 */ 243 if (IS_IPMP(ill)) { 244 if ((hwaddr_ill = ipmp_ipif_hold_bound_ill(ipif)) == NULL) { 245 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 246 "no bound ill for IPMP interface %s", 247 ill->ill_name); 248 ib_s->status = EFAULT; 249 goto fail; 250 } 251 } else { 252 hwaddr_ill = ill; 253 ill_refhold(hwaddr_ill); /* for symmetry */ 254 } 255 256 if ((ib_s->status = ibcm_arp_check_interface(hwaddr_ill)) != 0) { 257 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 258 "ibcm_arp_check_interface failed"); 259 goto fail; 260 } 261 262 bcopy(hwaddr_ill->ill_phys_addr, &wqnp->src_mac, 263 hwaddr_ill->ill_phys_addr_length); 264 265 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: outgoing if:%s", 266 wqnp->ifname); 267 268 /* 269 * if the user supplied a address, then verify rts returned 270 * the same address 271 */ 272 if (wqnp->usrc_addr.family) { 273 len = (wqnp->usrc_addr.family == AF_INET) ? 274 IP_ADDR_LEN : sizeof (in6_addr_t); 275 if (bcmp(&wqnp->usrc_addr.un, &wqnp->src_addr.un, len)) { 276 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 277 "srcaddr mismatch:%d", ENETUNREACH); 278 goto fail; 279 } 280 } 281 282 /* 283 * at this stage, we have the source address and the IB 284 * interface, now get the destination mac address from 285 * arp or ipv6 drivers 286 */ 287 ib_s->status = ibcm_nce_lookup(wqnp, ill, getzoneid()); 288 if (ib_s->status != 0) { 289 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_pr_lookup: " 290 "ibcm_nce_lookup failed: %d", ib_s->status); 291 goto fail; 292 } 293 294 ill_refrele(hwaddr_ill); 295 ill_refrele(ill); 296 ire_refrele(ire); 297 ipif_refrele(ipif); 298 netstack_rele(ipst->ips_netstack); 299 300 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_pr_lookup: Return: 0x%p", wqnp); 301 return (0); 302 fail: 303 if (hwaddr_ill != NULL) 304 ill_refrele(hwaddr_ill); 305 if (ill != NULL) 306 ill_refrele(ill); 307 if (ire != NULL) 308 ire_refrele(ire); 309 if (ipif != NULL) 310 ipif_refrele(ipif); 311 ibcm_arp_delete_prwqn(wqnp); 312 netstack_rele(ipst->ips_netstack); 313 return (1); 314 } 315 316 /* 317 * Query the neighbor cache for IPv4/IPv6 to mac address mapping. 318 */ 319 static int 320 ibcm_nce_lookup(ibcm_arp_prwqn_t *wqnp, ill_t *ill, zoneid_t zoneid) 321 { 322 ip2mac_t ip2m; 323 sin_t *sin; 324 sin6_t *sin6; 325 ip2mac_id_t ip2mid; 326 int err; 327 328 if (wqnp->src_addr.family != wqnp->dst_addr.family) { 329 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Mis-match SRC_ADDR " 330 "Family: %d, DST_ADDR Family %d", wqnp->src_addr.family, 331 wqnp->dst_addr.family); 332 return (1); 333 } 334 bzero(&ip2m, sizeof (ip2m)); 335 336 if (wqnp->dst_addr.family == AF_INET) { 337 sin = (sin_t *)&ip2m.ip2mac_pa; 338 sin->sin_family = AF_INET; 339 sin->sin_addr.s_addr = wqnp->dst_addr.un.ip4addr; 340 } else if (wqnp->dst_addr.family == AF_INET6) { 341 sin6 = (sin6_t *)&ip2m.ip2mac_pa; 342 sin6->sin6_family = AF_INET6; 343 sin6->sin6_addr = wqnp->dst_addr.un.ip6addr; 344 } else { 345 IBTF_DPRINTF_L2(cmlog, "ibcm_nce_lookup: Invalid DST_ADDR " 346 "Family: %d", wqnp->dst_addr.family); 347 return (1); 348 } 349 350 ip2m.ip2mac_ifindex = ill->ill_phyint->phyint_ifindex; 351 352 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING; 353 354 /* 355 * issue the request to IP for Neighbor Discovery 356 */ 357 ip2mid = ip2mac(IP2MAC_RESOLVE, &ip2m, ibcm_resolver_ack, wqnp, 358 zoneid); 359 err = ip2m.ip2mac_err; 360 if (err == EINPROGRESS) { 361 wqnp->ip2mac_id = ip2mid; 362 wqnp->flags |= IBCM_ARP_PR_RESOLVE_PENDING; 363 err = 0; 364 } else if (err == 0) { 365 ibcm_resolver_ack(&ip2m, wqnp); 366 } 367 return (err); 368 } 369 370 /* 371 * do sanity checks on the link-level sockaddr 372 */ 373 static boolean_t 374 ibcm_check_sockdl(struct sockaddr_dl *sdl) 375 { 376 377 if (sdl->sdl_type != IFT_IB || sdl->sdl_alen != IPOIB_ADDRL) 378 return (B_FALSE); 379 380 return (B_TRUE); 381 } 382 383 /* 384 * callback for resolver lookups, both for success and failure. 385 * If Address resolution was succesful: return GID info. 386 */ 387 static void 388 ibcm_resolver_ack(ip2mac_t *ip2macp, void *arg) 389 { 390 ibcm_arp_prwqn_t *wqnp = (ibcm_arp_prwqn_t *)arg; 391 ibcm_arp_streams_t *ib_s; 392 uchar_t *cp; 393 int err = 0; 394 395 IBTF_DPRINTF_L4(cmlog, "ibcm_resolver_ack(%p, %p)", ip2macp, wqnp); 396 397 ib_s = wqnp->ib_str; 398 mutex_enter(&ib_s->lock); 399 400 if (ip2macp->ip2mac_err != 0) { 401 wqnp->flags &= ~IBCM_ARP_PR_RESOLVE_PENDING; 402 cv_broadcast(&ib_s->cv); 403 err = EHOSTUNREACH; 404 goto user_callback; 405 } 406 407 if (!ibcm_check_sockdl(&ip2macp->ip2mac_ha)) { 408 IBTF_DPRINTF_L2(cmlog, "ibcm_resolver_ack: Error: " 409 "interface %s is not IB\n", wqnp->ifname); 410 err = EHOSTUNREACH; 411 goto user_callback; 412 } 413 414 cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha); 415 bcopy(cp, &wqnp->dst_mac, IPOIB_ADDRL); 416 417 /* 418 * at this point we have src/dst gid's derived from the mac addresses 419 * now get the hca, port 420 */ 421 bcopy(&wqnp->src_mac.ipoib_gidpref, &wqnp->sgid, sizeof (ib_gid_t)); 422 bcopy(&wqnp->dst_mac.ipoib_gidpref, &wqnp->dgid, sizeof (ib_gid_t)); 423 424 IBCM_H2N_GID(wqnp->sgid); 425 IBCM_H2N_GID(wqnp->dgid); 426 427 user_callback: 428 429 ib_s->status = err; 430 ib_s->done = B_TRUE; 431 432 /* lock is held by the caller. */ 433 cv_signal(&ib_s->cv); 434 mutex_exit(&ib_s->lock); 435 } 436