1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved.
24 */
25
26/*
27 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
28 */
29
30/*
31 *
32 * MODULE: dapl_name_service.c
33 *
34 * PURPOSE: Provide simple, file base name services in the absence
35 *	    of DNS hooks for a particular transport type. If an
36 *	    InfiniBand implementation supports IPoIB, this should
37 *	    not be used.
38 *
39 * Description: Interfaces in this file are completely described in
40 *		dapl_name_service.h
41 */
42
43/*
44 * Include files for setting up a network name
45 */
46#include "dapl.h"
47#include "dapl_name_service.h"
48
49#include <netinet/in.h>
50#include <sys/sockio.h>
51#include <net/if.h>
52#include <net/if_dl.h>
53#include <net/if_arp.h>
54#include <net/if_types.h>
55#include <arpa/inet.h>
56#include <poll.h>
57#include <ibd/ibd.h>
58
59#ifdef IBHOSTS_NAMING
60#define	MAP_FILE		"/etc/dapl/ibhosts"
61#define	MAX_GID_ENTRIES		32
62DAPL_GID_MAP			g_gid_map_table[MAX_GID_ENTRIES];
63
64DAT_RETURN dapli_ns_create_gid_map(void);
65DAT_RETURN dapli_ns_add_address(IN DAPL_GID_MAP	*gme);
66#endif /* IBHOSTS_NAMING */
67
68/*
69 * dapls_ns_init
70 *
71 * Initialize naming services
72 *
73 * Input:
74 *	none
75 *
76 * Output:
77 * 	none
78 *
79 * Returns:
80 * 	DAT_SUCCESS
81 *	DAT_INVALID_PARAMETER
82 */
83DAT_RETURN
84dapls_ns_init(void)
85{
86	DAT_RETURN	dat_status;
87
88	dat_status = DAT_SUCCESS;
89#ifdef IBHOSTS_NAMING
90	dat_status = dapli_ns_create_gid_map();
91#endif /* IBHOSTS_NAMING */
92
93	return (dat_status);
94}
95
96#ifdef IBHOSTS_NAMING
97/*
98 * dapls_create_gid_map()
99 *
100 * Read /usr/local/etc/ibhosts to obtain host names and GIDs.
101 * Create a table containing IP addresses and GIDs which can
102 * be used for lookups.
103 *
104 * This implementation is a simple method providing name services
105 * when more advanced mechanisms do not exist. The proper way
106 * to obtain these mappings is to use a name service such as is
107 * provided by IPoIB on InfiniBand.
108 *
109 * Input:
110 *	device_name		Name of device as reported by the provider
111 *
112 * Output:
113 * 	none
114 *
115 * Returns:
116 * 	char * to string number
117 */
118DAT_RETURN
119dapli_ns_create_gid_map(void)
120{
121	FILE			*f;
122	ib_gid_t		gid;
123	char			hostname[128];
124	int			rc;
125	struct addrinfo		*addr;
126	struct sockaddr_in	*si;
127	DAPL_GID_MAP		gmt;
128
129	f = fopen(MAP_FILE, "r");
130	if (f == NULL) {
131		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ERROR: Must have file <%s> "
132		    "for IP/GID mappings\n", MAP_FILE);
133		return (DAT_ERROR(DAT_INTERNAL_ERROR, 0));
134	}
135
136	rc = fscanf(f, "%s " F64x " " F64x, hostname,
137	    &gid.gid_prefix, &gid.gid_guid);
138	while (rc != EOF) {
139		rc = dapls_osd_getaddrinfo(hostname, &addr);
140
141		if (rc != 0) {
142			/*
143			 * hostname not registered in DNS,
144			 * provide a dummy value
145			 */
146			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
147			    "WARNING: <%s> not registered in "
148			    "DNS, using dummy IP value\n", hostname);
149			gmt.ip_address = 0x01020304;
150		} else {
151			/*
152			 * Load into the ip/gid mapping table
153			 */
154			si = (struct sockaddr_in *)addr->ai_addr;
155			if (AF_INET == addr->ai_addr->sa_family) {
156				gmt.ip_address = si->sin_addr.s_addr;
157			} else {
158				dapl_dbg_log(DAPL_DBG_TYPE_ERR,
159				    "WARNING: <%s> Address family "
160				    "not supported, using dummy "
161				    "IP value\n", hostname);
162				gmt.ip_address = 0x01020304;
163			}
164			dapls_osd_freeaddrinfo(addr);
165		}
166		gmt.gid.gid_prefix = gid.gid_prefix;
167		gmt.gid.gid_guid = gid.gid_guid;
168
169		dapli_ns_add_address(&gmt);
170		rc = fscanf(f, "%s " F64x " " F64x, hostname,
171		    &gid.gid_prefix, &gid.gid_guid);
172	}
173	(void) fclose(f);
174	return (DAT_SUCCESS);
175}
176
177/*
178 * dapli_ns_add_address
179 *
180 * Add a table entry to the  gid_map_table.
181 *
182 * Input:
183 *	remote_ia_address	remote IP address
184 *	gid			pointer to output gid
185 *
186 * Output:
187 * 	gid			filled in GID
188 *
189 * Returns:
190 * 	DAT_SUCCESS
191 *	DAT_INSUFFICIENT_RESOURCES
192 *	DAT_INVALID_PARAMETER
193 */
194DAT_RETURN
195dapli_ns_add_address(
196	IN DAPL_GID_MAP	*gme)
197{
198	DAPL_GID_MAP	*gmt;
199	int		count;
200
201	gmt = g_gid_map_table;
202	for (count = 0, gmt = g_gid_map_table; gmt->ip_address; gmt++) {
203		count++;
204	}
205	if (count > MAX_GID_ENTRIES) {
206		return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0));
207	}
208
209	*gmt = *gme;
210	return (DAT_SUCCESS);
211}
212
213/*
214 * dapls_ns_lookup_address
215 *
216 * Look up the provided IA_ADDRESS in the gid_map_table. Return
217 * the gid if found.
218 *
219 * Input:
220 *	remote_ia_address	remote IP address
221 *	gid			pointer to output gid
222 *	timeout			timeout in microseconds
223 *
224 * Output:
225 * 	gid			filled in GID
226 *
227 * Returns:
228 * 	DAT_SUCCESS
229 *	DAT_INSUFFICIENT_RESOURCES
230 *	DAT_INVALID_PARAMETER
231 */
232DAT_RETURN
233dapls_ns_lookup_address(
234	IN  DAPL_IA			*ia_ptr,
235	IN  DAT_IA_ADDRESS_PTR		remote_ia_address,
236	IN  DAT_TIMEOUT			timeout,
237	OUT ib_gid_t			*gid)
238{
239	DAPL_GID_MAP		*gmt;
240	struct sockaddr_in	*si;
241
242	/* unused here */
243	ia_ptr = ia_ptr;
244	si = (struct sockaddr_in *)remote_ia_address;
245
246	for (gmt = g_gid_map_table; gmt->ip_address; gmt++) {
247		if (gmt->ip_address == si->sin_addr.s_addr) {
248			gid->gid_guid = gmt->gid.gid_guid;
249			gid->gid_prefix = gmt->gid.gid_prefix;
250			return (DAT_SUCCESS);
251		}
252	}
253	return (DAT_ERROR(DAT_INVALID_PARAMETER, 0));
254}
255#endif /* IBHOSTS_NAMING */
256
257/*
258 * utility function for printing a socket
259 */
260char *
261dapls_inet_ntop(struct sockaddr *addr, char *buf, size_t len)
262{
263	void	*addr_ptr;
264
265	if (addr->sa_family == AF_INET) {
266		/* LINTED: E_BAD_PTR_CAST_ALIGN */
267		addr_ptr = (void *)&((struct sockaddr_in *)addr)->sin_addr;
268	} else if (addr->sa_family == AF_INET6) {
269		/* LINTED: E_BAD_PTR_CAST_ALIGN */
270		addr_ptr = (void *)&((struct sockaddr_in6 *)addr)->sin6_addr;
271	} else {
272		if (len > strlen("bad address")) {
273			(void) sprintf(buf, "bad address");
274		}
275		return (buf);
276	}
277	return ((char *)inet_ntop(addr->sa_family, addr_ptr, buf, len));
278}
279
280/*
281 * dapls_ns_lookup_address
282 *
283 * translates an IP address into a GID
284 *
285 * Input:
286 * 	ia_ptr			pointer to IA object
287 *	remote_ia_address	remote IP address
288 *	gid			pointer to output gid
289 *	timeout			timeout in microseconds
290 *
291 * Output:
292 * 	gid			filled in GID
293 *
294 * Returns:
295 * 	DAT_SUCCESS
296 *	DAT_INVALID_ADDRRESS
297 *	DAT_INVALID_PARAMETER
298 *	DAT_INTERNAL_ERROR
299 */
300
301#define	NS_MAX_RETRIES	60
302
303DAT_RETURN
304dapls_ns_lookup_v4(
305	IN  DAPL_IA			*ia_ptr,
306	IN  struct sockaddr_in		*addr,
307	IN  DAT_TIMEOUT			timeout,
308	OUT ib_gid_t			*gid);
309DAT_RETURN
310dapls_ns_lookup_v6(
311	IN  DAPL_IA			*ia_ptr,
312	IN  struct sockaddr_in6		*addr,
313	IN  DAT_TIMEOUT			timeout,
314	OUT ib_gid_t			*gid);
315
316static int dapls_ns_subnet_match_v4(int s, DAPL_IA *ia_ptr,
317    struct sockaddr_in *addr);
318static int dapls_ns_subnet_match_v6(int s, DAPL_IA *ia_ptr,
319    struct sockaddr_in6 *addr);
320
321static int dapls_ns_send_packet_v6(int s, struct sockaddr_in6 *addr);
322static int dapls_ns_resolve_addr(int af, struct sockaddr *addr,
323    DAT_TIMEOUT timeout);
324
325DAT_RETURN
326dapls_ns_lookup_address(
327	IN  DAPL_IA			*ia_ptr,
328	IN  DAT_IA_ADDRESS_PTR		remote_ia_address,
329	IN  DAT_TIMEOUT			timeout,
330	OUT ib_gid_t			*gid)
331{
332	DAT_RETURN		dat_status;
333	struct sockaddr		*sock = (struct sockaddr *)remote_ia_address;
334
335	if (sock->sa_family == AF_INET) {
336		dat_status = dapls_ns_lookup_v4(ia_ptr,
337		    /* LINTED: E_BAD_PTR_CAST_ALIGN */
338		    (struct sockaddr_in *)sock, timeout, gid);
339	} else if (sock->sa_family == AF_INET6) {
340		dat_status = dapls_ns_lookup_v6(ia_ptr,
341		    /* LINTED: E_BAD_PTR_CAST_ALIGN */
342		    (struct sockaddr_in6 *)sock, timeout, gid);
343	} else {
344		dat_status = DAT_INVALID_PARAMETER;
345	}
346	return (dat_status);
347}
348
349DAT_RETURN
350dapls_ns_lookup_v4(
351	IN  DAPL_IA			*ia_ptr,
352	IN  struct sockaddr_in		*addr,
353	IN  DAT_TIMEOUT			timeout,
354	OUT ib_gid_t			*gid)
355{
356	struct xarpreq		ar;
357	struct sockaddr_in	*sin;
358	uchar_t			*mac;
359	int			s, retries = 0;
360
361	(void) dapl_os_memzero(&ar, sizeof (ar));
362	sin = (struct sockaddr_in *)&ar.xarp_pa;
363	sin->sin_family = AF_INET;
364	sin->sin_addr.s_addr = addr->sin_addr.s_addr;
365	ar.xarp_ha.sdl_family = AF_LINK;
366
367	s = socket(AF_INET, SOCK_DGRAM, 0);
368	if (s < 0) {
369		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
370		    "ns_lookup_v4: socket: %s\n", strerror(errno));
371		return (DAT_INTERNAL_ERROR);
372	}
373	if (dapls_ns_subnet_match_v4(s, ia_ptr, addr) != 0) {
374		(void) close(s);
375		return (DAT_INVALID_ADDRESS);
376	}
377again:;
378	if (ioctl(s, SIOCGXARP, (caddr_t)&ar) < 0) {
379		/*
380		 * if SIOCGXARP failed, we force the ARP
381		 * cache to be filled by connecting to the
382		 * destination IP address.
383		 */
384		if (retries <= NS_MAX_RETRIES &&
385		    dapls_ns_resolve_addr(AF_INET, (struct sockaddr *)addr,
386		    timeout) == 0) {
387			retries++;
388			goto again;
389		}
390		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ns_lookup_v4: giving up\n");
391		(void) close(s);
392		return (DAT_ERROR(DAT_INVALID_ADDRESS,
393		    DAT_INVALID_ADDRESS_UNREACHABLE));
394	}
395	if ((ar.xarp_flags & ATF_COM) == 0 &&
396	    ar.xarp_ha.sdl_type == IFT_IB && retries <= NS_MAX_RETRIES) {
397		/*
398		 * we get here if arp resolution is still incomplete
399		 */
400		retries++;
401		(void) sleep(1);
402		goto again;
403	}
404	(void) close(s);
405
406	mac = (uchar_t *)LLADDR(&ar.xarp_ha);
407	if (ar.xarp_flags & ATF_COM &&
408	    ar.xarp_ha.sdl_type == IFT_IB &&
409	    ar.xarp_ha.sdl_alen >= sizeof (ipoib_mac_t)) {
410		ib_gid_t tmp_gid;
411
412		/* LINTED: E_BAD_PTR_CAST_ALIGN */
413		(void) dapl_os_memcpy(&tmp_gid,
414		    &((ipoib_mac_t *)mac)->ipoib_gidpref, sizeof (ib_gid_t));
415		/*
416		 * gids from the ARP table are in network order, convert
417		 * the gids from network order to host byte order
418		 */
419		gid->gid_prefix = BETOH_64(tmp_gid.gid_prefix);
420		gid->gid_guid = BETOH_64(tmp_gid.gid_guid);
421	} else {
422		int i, len;
423
424		len = ar.xarp_ha.sdl_alen;
425		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
426		    "ns_lookup_v4: failed, non IB address: "
427		    "len = %d, addr = 0x", len);
428		if (len > 0) {
429			for (i = 0; i < len; i++) {
430				dapl_dbg_log(DAPL_DBG_TYPE_ERR,
431				    "%02x", (int)mac[i] & 0xff);
432			}
433		} else {
434			dapl_dbg_log(DAPL_DBG_TYPE_ERR, "0");
435		}
436		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "\n");
437		return (DAT_INVALID_ADDRESS);
438	}
439	return (DAT_SUCCESS);
440}
441
442DAT_RETURN
443dapls_ns_lookup_v6(
444	IN  DAPL_IA			*ia_ptr,
445	IN  struct sockaddr_in6		*addr,
446	IN  DAT_TIMEOUT			timeout,
447	OUT ib_gid_t			*gid)
448{
449	struct lifreq		lifr;
450	uchar_t			*mac;
451	int			s, retries = 0;
452
453	s = socket(AF_INET6, SOCK_DGRAM, 0);
454	if (s < 0) {
455		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
456		    "ns_lookup_v6: socket: %s\n", strerror(errno));
457		return (DAT_INTERNAL_ERROR);
458	}
459	if (dapls_ns_subnet_match_v6(s, ia_ptr, addr) != 0) {
460		(void) close(s);
461		return (DAT_INVALID_ADDRESS);
462	}
463	(void) dapl_os_memzero(&lifr, sizeof (lifr));
464	(void) dapl_os_memcpy(&lifr.lifr_nd.lnr_addr, addr, sizeof (*addr));
465	(void) dapl_os_strcpy(lifr.lifr_name, ia_ptr->hca_ptr->name);
466
467again:;
468	if (ioctl(s, SIOCLIFGETND, (caddr_t)&lifr) < 0)  {
469		/*
470		 * if SIOCLIFGETND failed, we force the ND
471		 * cache to be filled by connecting to the
472		 * destination IP address.
473		 */
474		if (retries < NS_MAX_RETRIES &&
475		    dapls_ns_send_packet_v6(s, addr) == 0 &&
476		    dapls_ns_resolve_addr(AF_INET6, (struct sockaddr *)addr,
477		    timeout) == 0) {
478			retries++;
479			goto again;
480		}
481		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "ns_lookup_v6: giving up\n");
482		(void) close(s);
483		return (DAT_ERROR(DAT_INVALID_ADDRESS,
484		    DAT_INVALID_ADDRESS_UNREACHABLE));
485	}
486	if (lifr.lifr_nd.lnr_hdw_len == 0 && retries <= NS_MAX_RETRIES) {
487		/*
488		 * lnr_hdw_len == 0 means that the ND entry
489		 * is still incomplete. we need to retry the ioctl.
490		 */
491		retries++;
492		(void) sleep(1);
493		goto again;
494	}
495	(void) close(s);
496
497	mac = (uchar_t *)lifr.lifr_nd.lnr_hdw_addr;
498	if (lifr.lifr_nd.lnr_hdw_len >= sizeof (ipoib_mac_t)) {
499		ib_gid_t tmp_gid;
500		/* LINTED: E_BAD_PTR_CAST_ALIGN */
501		(void) dapl_os_memcpy(&tmp_gid,
502		    &((ipoib_mac_t *)mac)->ipoib_gidpref, sizeof (ib_gid_t));
503		/*
504		 * gids from the ND table are in network order, convert
505		 * the gids from network order to host byte order
506		 */
507		gid->gid_prefix = BETOH_64(tmp_gid.gid_prefix);
508		gid->gid_guid = BETOH_64(tmp_gid.gid_guid);
509	} else {
510		int i, len;
511
512		len = lifr.lifr_nd.lnr_hdw_len;
513		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
514		    "ns_lookup_v6: failed, non IB address: "
515		    "len = %d, addr = 0x", len);
516		if (len > 0) {
517			for (i = 0; i < len; i++) {
518				dapl_dbg_log(DAPL_DBG_TYPE_ERR,
519				    "%02x", (int)mac[i] & 0xff);
520			}
521		} else {
522			dapl_dbg_log(DAPL_DBG_TYPE_ERR, "0");
523		}
524		dapl_dbg_log(DAPL_DBG_TYPE_ERR, "\n");
525		return (DAT_INVALID_ADDRESS);
526	}
527	return (DAT_SUCCESS);
528}
529
530static int
531dapls_ns_send_packet_v6(int s, struct sockaddr_in6 *addr)
532{
533	if (sendto(s, NULL, 0, MSG_DONTROUTE, (struct sockaddr *)addr,
534	    sizeof (*addr)) < 0) {
535		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
536		    "ns_send_packet_v6: failed: %s\n", strerror(errno));
537		return (-1);
538	}
539	return (0);
540}
541
542static int
543dapls_ns_subnet_match_v4(int s, DAPL_IA *ia_ptr, struct sockaddr_in *addr)
544{
545	struct lifreq		lifreq;
546	int			retval;
547	uint32_t		netmask, netaddr, netaddr_dest;
548
549	(void) dapl_os_strcpy(lifreq.lifr_name, ia_ptr->hca_ptr->name);
550
551	retval = ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifreq);
552	if (retval < 0) {
553		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
554		    "ns_subnet_match_v4: cannot get netmask: %s\n",
555		    strerror(errno));
556		return (-1);
557	}
558	netmask = ((struct sockaddr_in *)&lifreq.lifr_addr)->
559	    sin_addr.s_addr;
560
561	/*
562	 * we need to get the interface address here because the
563	 * address in ia_ptr->hca_ptr->hca_address might not
564	 * necessarily be an IPv4 address.
565	 */
566	retval = ioctl(s, SIOCGLIFADDR, (caddr_t)&lifreq);
567	if (retval < 0) {
568		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
569		    "ns_subnet_match_v4: cannot get local addr: %s\n",
570		    strerror(errno));
571		return (-1);
572	}
573	netaddr = ((struct sockaddr_in *)&lifreq.lifr_addr)->
574	    sin_addr.s_addr & netmask;
575	netaddr_dest = addr->sin_addr.s_addr & netmask;
576
577	if (netaddr != netaddr_dest) {
578		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
579		    "ns_subnet_match_v4: netaddrs don't match: "
580		    "local %x, remote %x\n", netaddr, netaddr_dest);
581		return (-1);
582	}
583	return (0);
584}
585
586static int
587dapls_ns_subnet_match_v6(int s, DAPL_IA *ia_ptr, struct sockaddr_in6 *addr)
588{
589	struct lifreq		lifreq;
590	struct sockaddr_in6	netmask_sock;
591	uchar_t			*netmask, *local_addr, *dest_addr;
592	int			i, retval;
593
594	(void) dapl_os_strcpy(lifreq.lifr_name, ia_ptr->hca_ptr->name);
595
596	retval = ioctl(s, SIOCGLIFNETMASK, (caddr_t)&lifreq);
597	if (retval < 0) {
598		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
599		    "ns_subnet_match_v6: cannot get netmask: %s\n",
600		    strerror(errno));
601		return (-1);
602	}
603	(void) dapl_os_memcpy(&netmask_sock, &lifreq.lifr_addr,
604	    sizeof (netmask_sock));
605
606	/*
607	 * we need to get the interface address here because the
608	 * address in ia_ptr->hca_ptr->hca_address might not
609	 * necessarily be an IPv6 address.
610	 */
611	retval = ioctl(s, SIOCGLIFADDR, (caddr_t)&lifreq);
612	if (retval < 0) {
613		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
614		    "ns_subnet_match_v6: cannot get local addr: %s\n",
615		    strerror(errno));
616		return (-1);
617	}
618	netmask = (uchar_t *)&netmask_sock.sin6_addr;
619	local_addr = (uchar_t *)&((struct sockaddr_in6 *)&lifreq.lifr_addr)->
620	    sin6_addr;
621	dest_addr = (uchar_t *)&addr->sin6_addr;
622
623	for (i = 0; i < sizeof (addr->sin6_addr); i++) {
624		if (((local_addr[i] ^ dest_addr[i]) & netmask[i]) != 0) {
625			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
626			    "ns_subnet_match_v6: subnets do not match\n");
627			return (-1);
628		}
629	}
630	return (0);
631}
632
633static int
634dapls_ns_resolve_addr(int af, struct sockaddr *addr, DAT_TIMEOUT timeout)
635{
636	struct sockaddr_storage	sock;
637	struct sockaddr_in	*v4dest;
638	struct sockaddr_in6	*v6dest;
639	struct pollfd		pollfd;
640	int			fd, retval;
641	int			tmo;
642	int			ip_version;
643
644	if (af == AF_INET) {
645		ip_version = 4;
646	} else if (af == AF_INET6) {
647		ip_version = 6;
648	} else {
649		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
650		    "ns_resolve_addr: invalid af %d\n", af);
651		return (-1);
652	}
653	fd = socket(af, SOCK_STREAM, 0);
654	if (fd < 0) {
655		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
656		    "ns_resolve_addr: ipv%d, cannot create socket %s\n",
657		    ip_version, strerror(errno));
658		return (-1);
659	}
660
661	/*
662	 * set socket to non-blocking mode
663	 */
664	retval = fcntl(fd, F_SETFL, O_NONBLOCK);
665	if (retval < 0) {
666		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
667		    "ns_resolve_addr: ipv%d, fcntl failed: %s\n",
668		    ip_version, strerror(errno));
669		(void) close(fd);
670		return (-1);
671	}
672
673	/*
674	 * connect to the discard port (9) at the dest IP
675	 */
676	(void) dapl_os_memzero(&sock, sizeof (sock));
677	if (af == AF_INET) {
678		v4dest = (struct sockaddr_in *)&sock;
679		v4dest->sin_family = AF_INET;
680		v4dest->sin_addr.s_addr =
681		    /* LINTED: E_BAD_PTR_CAST_ALIGN */
682		    ((struct sockaddr_in *)addr)->sin_addr.s_addr;
683		v4dest->sin_port = htons(9);
684
685		retval = connect(fd, (struct sockaddr *)v4dest,
686		    sizeof (struct sockaddr_in));
687	} else {
688		v6dest = (struct sockaddr_in6 *)&sock;
689		v6dest->sin6_family = AF_INET6;
690		/* LINTED: E_BAD_PTR_CAST_ALIGN */
691		(void) dapl_os_memcpy(&v6dest->sin6_addr,
692		    &((struct sockaddr_in6 *)addr)->sin6_addr,
693		    sizeof (struct sockaddr_in6));
694		v6dest->sin6_port = htons(9);
695
696		retval = connect(fd, (struct sockaddr *)v6dest,
697		    sizeof (struct sockaddr_in6));
698	}
699
700	/*
701	 * we can return immediately if connect succeeds
702	 */
703	if (retval == 0) {
704		(void) close(fd);
705		return (0);
706	}
707	/*
708	 * receiving a RST means that the arp/nd entry should
709	 * already be resolved
710	 */
711	if (retval < 0 && errno == ECONNREFUSED) {
712		errno = 0;
713		(void) close(fd);
714		return (0);
715	}
716
717	/*
718	 * for all other cases, we poll on the fd
719	 */
720	pollfd.fd = fd;
721	pollfd.events = POLLIN | POLLOUT;
722	pollfd.revents = 0;
723
724	if (timeout == DAT_TIMEOUT_INFINITE ||
725	    timeout == 0) {
726		/*
727		 * -1 means infinite
728		 */
729		tmo = -1;
730	} else {
731		/*
732		 * convert timeout from usecs to msecs
733		 */
734		tmo = timeout/1000;
735	}
736	retval = poll(&pollfd, 1, tmo);
737	if (retval > 0) {
738		int	so_error = 0, len = sizeof (so_error);
739
740		retval = getsockopt(fd, SOL_SOCKET, SO_ERROR,
741		    &so_error, &len);
742		if (retval == 0) {
743			/*
744			 * we only return 0 if so_error == 0 or
745			 * so_error == ECONNREFUSED. for all other
746			 * cases retval is non-zero.
747			 */
748			if (so_error != 0 && so_error != ECONNREFUSED) {
749				retval = -1;
750				errno = so_error;
751				dapl_dbg_log(DAPL_DBG_TYPE_ERR,
752				    "ns_resolve_addr: ipv%d, so_error: %s\n",
753				    ip_version, strerror(errno));
754			}
755		} else {
756			/*
757			 * if retval != 0, it must be -1. and errno must
758			 * have been set by getsockopt.
759			 */
760			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
761			    "ns_resolve_addr: ipv%d, getsockopt: %s\n",
762			    ip_version, strerror(errno));
763		}
764	} else {
765		if (retval == 0) {
766			errno = ETIMEDOUT;
767		}
768		retval = -1;
769		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
770		    "ns_resolve_addr: ipv%d, poll: %s\n",
771		    ip_version, strerror(errno));
772	}
773	(void) close(fd);
774	return (retval);
775}
776