17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5921e7e07Smeem  * Common Development and Distribution License (the "License").
6921e7e07Smeem  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22e11c3f44Smeem  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
2480d556f9SHans Rosenfeld  *
2580d556f9SHans Rosenfeld  * Copyright 2021 Tintri by DDN, Inc. All rights reserved.
267c478bd9Sstevel@tonic-gate  */
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate #include "mpd_defs.h"
297c478bd9Sstevel@tonic-gate #include "mpd_tables.h"
307c478bd9Sstevel@tonic-gate 
317c478bd9Sstevel@tonic-gate int debug = 0;				/* Debug flag */
327c478bd9Sstevel@tonic-gate static int pollfd_num = 0;		/* Num. of poll descriptors */
337c478bd9Sstevel@tonic-gate static struct pollfd *pollfds = NULL;	/* Array of poll descriptors */
347c478bd9Sstevel@tonic-gate 					/* All times below in ms */
357c478bd9Sstevel@tonic-gate int	user_failure_detection_time;	/* user specified failure detection */
367c478bd9Sstevel@tonic-gate 					/* time (fdt) */
377c478bd9Sstevel@tonic-gate int	user_probe_interval;		/* derived from user specified fdt */
387c478bd9Sstevel@tonic-gate 
3927438c18SJon Anderson /*
4027438c18SJon Anderson  * Structure to store mib2 information returned by the kernel.
4127438c18SJon Anderson  * This is used to process routing table information.
4227438c18SJon Anderson  */
4327438c18SJon Anderson typedef struct mib_item_s {
4427438c18SJon Anderson 	struct mib_item_s	*mi_next;
4527438c18SJon Anderson 	struct opthdr		mi_opthdr;
4627438c18SJon Anderson 	void			*mi_valp;
4727438c18SJon Anderson } mib_item_t;
4827438c18SJon Anderson 
497c478bd9Sstevel@tonic-gate static int	rtsock_v4;		/* AF_INET routing socket */
507c478bd9Sstevel@tonic-gate static int	rtsock_v6;		/* AF_INET6 routing socket */
517c478bd9Sstevel@tonic-gate int	ifsock_v4 = -1;			/* IPv4 socket for ioctls  */
527c478bd9Sstevel@tonic-gate int	ifsock_v6 = -1;			/* IPv6 socket for ioctls  */
537c478bd9Sstevel@tonic-gate static int	lsock_v4;		/* Listen socket to detect mpathd */
547c478bd9Sstevel@tonic-gate static int	lsock_v6;		/* Listen socket to detect mpathd */
557c478bd9Sstevel@tonic-gate static int	mibfd = -1;		/* fd to get mib info */
567c478bd9Sstevel@tonic-gate static boolean_t force_mcast = _B_FALSE; /* Only for test purposes */
577c478bd9Sstevel@tonic-gate 
587c478bd9Sstevel@tonic-gate static uint_t	last_initifs_time;	/* Time when initifs was last run */
597c478bd9Sstevel@tonic-gate static	char **argv0;			/* Saved for re-exec on SIGHUP */
607c478bd9Sstevel@tonic-gate boolean_t handle_link_notifications = _B_TRUE;
6127438c18SJon Anderson static int	ipRouteEntrySize;	/* Size of IPv4 route entry */
6227438c18SJon Anderson static int	ipv6RouteEntrySize;	/* Size of IPv6 route entry */
637c478bd9Sstevel@tonic-gate 
647c478bd9Sstevel@tonic-gate static void	initlog(void);
657c478bd9Sstevel@tonic-gate static void	run_timeouts(void);
667c478bd9Sstevel@tonic-gate static void	initifs(void);
677c478bd9Sstevel@tonic-gate static void	check_if_removed(struct phyint_instance *pii);
687c478bd9Sstevel@tonic-gate static void	select_test_ifs(void);
6927438c18SJon Anderson static void	update_router_list(mib_item_t *item);
7027438c18SJon Anderson static void	mib_get_constants(mib_item_t *item);
7127438c18SJon Anderson static int	mibwalk(void (*proc)(mib_item_t *));
727c478bd9Sstevel@tonic-gate static void	ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len);
737c478bd9Sstevel@tonic-gate static void	ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len);
747c478bd9Sstevel@tonic-gate static void	router_add_common(int af, char *ifname,
757c478bd9Sstevel@tonic-gate     struct in6_addr nexthop);
767c478bd9Sstevel@tonic-gate static void	init_router_targets();
777c478bd9Sstevel@tonic-gate static void	cleanup(void);
787c478bd9Sstevel@tonic-gate static int	setup_listener(int af);
797c478bd9Sstevel@tonic-gate static void	check_config(void);
80e6ed03fcSmeem static void	check_testconfig(void);
8106cdd167Smeem static void	check_addr_unique(struct phyint_instance *,
8206cdd167Smeem     struct sockaddr_storage *);
837c478bd9Sstevel@tonic-gate static void	init_host_targets(void);
847c478bd9Sstevel@tonic-gate static void	dup_host_targets(struct phyint_instance *desired_pii);
857c478bd9Sstevel@tonic-gate static void	loopback_cmd(int sock, int family);
867c478bd9Sstevel@tonic-gate static boolean_t daemonize(void);
877c478bd9Sstevel@tonic-gate static int	closefunc(void *, int);
887c478bd9Sstevel@tonic-gate static unsigned int process_cmd(int newfd, union mi_commands *mpi);
897c478bd9Sstevel@tonic-gate static unsigned int process_query(int fd, mi_query_t *miq);
90e11c3f44Smeem static unsigned int send_addrinfo(int fd, ipmp_addrinfo_t *adinfop);
917c478bd9Sstevel@tonic-gate static unsigned int send_groupinfo(int fd, ipmp_groupinfo_t *grinfop);
927c478bd9Sstevel@tonic-gate static unsigned int send_grouplist(int fd, ipmp_grouplist_t *grlistp);
937c478bd9Sstevel@tonic-gate static unsigned int send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop);
947c478bd9Sstevel@tonic-gate static unsigned int send_result(int fd, unsigned int error, int syserror);
957c478bd9Sstevel@tonic-gate 
96e11c3f44Smeem addrlist_t *localaddrs;
9787e66ffcSrk 
987c478bd9Sstevel@tonic-gate /*
997c478bd9Sstevel@tonic-gate  * Return the current time in milliseconds (from an arbitrary reference)
1007c478bd9Sstevel@tonic-gate  * truncated to fit into an int. Truncation is ok since we are interested
1017c478bd9Sstevel@tonic-gate  * only in differences and not the absolute values.
1027c478bd9Sstevel@tonic-gate  */
1037c478bd9Sstevel@tonic-gate uint_t
getcurrenttime(void)1047c478bd9Sstevel@tonic-gate getcurrenttime(void)
1057c478bd9Sstevel@tonic-gate {
1067c478bd9Sstevel@tonic-gate 	uint_t	cur_time;	/* In ms */
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate 	/*
1097c478bd9Sstevel@tonic-gate 	 * Use of a non-user-adjustable source of time is
1107c478bd9Sstevel@tonic-gate 	 * required. However millisecond precision is sufficient.
1117c478bd9Sstevel@tonic-gate 	 * divide by 10^6
1127c478bd9Sstevel@tonic-gate 	 */
1137c478bd9Sstevel@tonic-gate 	cur_time = (uint_t)(gethrtime() / 1000000LL);
1147c478bd9Sstevel@tonic-gate 	return (cur_time);
1157c478bd9Sstevel@tonic-gate }
1167c478bd9Sstevel@tonic-gate 
117e6ed03fcSmeem uint64_t
getcurrentsec(void)118e6ed03fcSmeem getcurrentsec(void)
119e6ed03fcSmeem {
120e6ed03fcSmeem 	return (gethrtime() / NANOSEC);
121e6ed03fcSmeem }
122e6ed03fcSmeem 
1237c478bd9Sstevel@tonic-gate /*
1247c478bd9Sstevel@tonic-gate  * Add fd to the set being polled. Returns 0 if ok; -1 if failed.
1257c478bd9Sstevel@tonic-gate  */
1267c478bd9Sstevel@tonic-gate int
poll_add(int fd)1277c478bd9Sstevel@tonic-gate poll_add(int fd)
1287c478bd9Sstevel@tonic-gate {
1297c478bd9Sstevel@tonic-gate 	int i;
1307c478bd9Sstevel@tonic-gate 	int new_num;
1317c478bd9Sstevel@tonic-gate 	struct pollfd *newfds;
1327c478bd9Sstevel@tonic-gate retry:
1337c478bd9Sstevel@tonic-gate 	/* Check if already present */
1347c478bd9Sstevel@tonic-gate 	for (i = 0; i < pollfd_num; i++) {
1357c478bd9Sstevel@tonic-gate 		if (pollfds[i].fd == fd)
1367c478bd9Sstevel@tonic-gate 			return (0);
1377c478bd9Sstevel@tonic-gate 	}
1387c478bd9Sstevel@tonic-gate 	/* Check for empty spot already present */
1397c478bd9Sstevel@tonic-gate 	for (i = 0; i < pollfd_num; i++) {
1407c478bd9Sstevel@tonic-gate 		if (pollfds[i].fd == -1) {
1417c478bd9Sstevel@tonic-gate 			pollfds[i].fd = fd;
1427c478bd9Sstevel@tonic-gate 			return (0);
1437c478bd9Sstevel@tonic-gate 		}
1447c478bd9Sstevel@tonic-gate 	}
1457c478bd9Sstevel@tonic-gate 
1467c478bd9Sstevel@tonic-gate 	/* Allocate space for 32 more fds and initialize to -1 */
1477c478bd9Sstevel@tonic-gate 	new_num = pollfd_num + 32;
1487c478bd9Sstevel@tonic-gate 	newfds = realloc(pollfds, new_num * sizeof (struct pollfd));
1497c478bd9Sstevel@tonic-gate 	if (newfds == NULL) {
1507c478bd9Sstevel@tonic-gate 		logperror("poll_add: realloc");
1517c478bd9Sstevel@tonic-gate 		return (-1);
1527c478bd9Sstevel@tonic-gate 	}
1537c478bd9Sstevel@tonic-gate 	for (i = pollfd_num; i < new_num; i++) {
1547c478bd9Sstevel@tonic-gate 		newfds[i].fd = -1;
1557c478bd9Sstevel@tonic-gate 		newfds[i].events = POLLIN;
1567c478bd9Sstevel@tonic-gate 	}
1577c478bd9Sstevel@tonic-gate 	pollfd_num = new_num;
1587c478bd9Sstevel@tonic-gate 	pollfds = newfds;
1597c478bd9Sstevel@tonic-gate 	goto retry;
1607c478bd9Sstevel@tonic-gate }
1617c478bd9Sstevel@tonic-gate 
1627c478bd9Sstevel@tonic-gate /*
1637c478bd9Sstevel@tonic-gate  * Remove fd from the set being polled. Returns 0 if ok; -1 if failed.
1647c478bd9Sstevel@tonic-gate  */
165e11c3f44Smeem int
poll_remove(int fd)1667c478bd9Sstevel@tonic-gate poll_remove(int fd)
1677c478bd9Sstevel@tonic-gate {
1687c478bd9Sstevel@tonic-gate 	int i;
1697c478bd9Sstevel@tonic-gate 
1707c478bd9Sstevel@tonic-gate 	/* Check if already present */
1717c478bd9Sstevel@tonic-gate 	for (i = 0; i < pollfd_num; i++) {
1727c478bd9Sstevel@tonic-gate 		if (pollfds[i].fd == fd) {
1737c478bd9Sstevel@tonic-gate 			pollfds[i].fd = -1;
1747c478bd9Sstevel@tonic-gate 			return (0);
1757c478bd9Sstevel@tonic-gate 		}
1767c478bd9Sstevel@tonic-gate 	}
1777c478bd9Sstevel@tonic-gate 	return (-1);
1787c478bd9Sstevel@tonic-gate }
1797c478bd9Sstevel@tonic-gate 
1807c478bd9Sstevel@tonic-gate /*
1817c478bd9Sstevel@tonic-gate  * Extract information about the phyint instance. If the phyint instance still
1827c478bd9Sstevel@tonic-gate  * exists in the kernel then set pii_in_use, else clear it. check_if_removed()
1837c478bd9Sstevel@tonic-gate  * will use it to detect phyint instances that don't exist any longer and
1847c478bd9Sstevel@tonic-gate  * remove them, from our database of phyint instances.
1857c478bd9Sstevel@tonic-gate  * Return value:
1867c478bd9Sstevel@tonic-gate  *	returns true if the phyint instance exists in the kernel,
1877c478bd9Sstevel@tonic-gate  *	returns false otherwise
1887c478bd9Sstevel@tonic-gate  */
1897c478bd9Sstevel@tonic-gate static boolean_t
pii_process(int af,char * name,struct phyint_instance ** pii_p)1907c478bd9Sstevel@tonic-gate pii_process(int af, char *name, struct phyint_instance **pii_p)
1917c478bd9Sstevel@tonic-gate {
1927c478bd9Sstevel@tonic-gate 	int err;
1937c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
1947c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii_other;
1957c478bd9Sstevel@tonic-gate 
1967c478bd9Sstevel@tonic-gate 	if (debug & D_PHYINT)
1977c478bd9Sstevel@tonic-gate 		logdebug("pii_process(%s %s)\n", AF_STR(af), name);
1987c478bd9Sstevel@tonic-gate 
1997c478bd9Sstevel@tonic-gate 	pii = phyint_inst_lookup(af, name);
2007c478bd9Sstevel@tonic-gate 	if (pii == NULL) {
2017c478bd9Sstevel@tonic-gate 		/*
2027c478bd9Sstevel@tonic-gate 		 * Phyint instance does not exist in our tables,
2037c478bd9Sstevel@tonic-gate 		 * create new phyint instance
2047c478bd9Sstevel@tonic-gate 		 */
2057c478bd9Sstevel@tonic-gate 		pii = phyint_inst_init_from_k(af, name);
2067c478bd9Sstevel@tonic-gate 	} else {
2077c478bd9Sstevel@tonic-gate 		/* Phyint exists in our tables */
2087c478bd9Sstevel@tonic-gate 		err = phyint_inst_update_from_k(pii);
2097c478bd9Sstevel@tonic-gate 
2107c478bd9Sstevel@tonic-gate 		switch (err) {
2117c478bd9Sstevel@tonic-gate 		case PI_IOCTL_ERROR:
2127c478bd9Sstevel@tonic-gate 			/* Some ioctl error. don't change anything */
2137c478bd9Sstevel@tonic-gate 			pii->pii_in_use = 1;
2147c478bd9Sstevel@tonic-gate 			break;
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate 		case PI_GROUP_CHANGED:
2177c478bd9Sstevel@tonic-gate 		case PI_IFINDEX_CHANGED:
2187c478bd9Sstevel@tonic-gate 			/*
219e11c3f44Smeem 			 * Interface index or group membership has changed.
220e11c3f44Smeem 			 * Delete the old state and recreate based on the new
221e11c3f44Smeem 			 * state (it may no longer be in a group).
2227c478bd9Sstevel@tonic-gate 			 */
2237c478bd9Sstevel@tonic-gate 			pii_other = phyint_inst_other(pii);
2247c478bd9Sstevel@tonic-gate 			if (pii_other != NULL)
2257c478bd9Sstevel@tonic-gate 				phyint_inst_delete(pii_other);
2267c478bd9Sstevel@tonic-gate 			phyint_inst_delete(pii);
2277c478bd9Sstevel@tonic-gate 			pii = phyint_inst_init_from_k(af, name);
2287c478bd9Sstevel@tonic-gate 			break;
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate 		case PI_DELETED:
2317c478bd9Sstevel@tonic-gate 			/* Phyint instance has disappeared from kernel */
2327c478bd9Sstevel@tonic-gate 			pii->pii_in_use = 0;
2337c478bd9Sstevel@tonic-gate 			break;
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate 		case PI_OK:
2367c478bd9Sstevel@tonic-gate 			/* Phyint instance exists and is fine */
2377c478bd9Sstevel@tonic-gate 			pii->pii_in_use = 1;
2387c478bd9Sstevel@tonic-gate 			break;
2397c478bd9Sstevel@tonic-gate 
2407c478bd9Sstevel@tonic-gate 		default:
2417c478bd9Sstevel@tonic-gate 			/* Unknown status */
2427c478bd9Sstevel@tonic-gate 			logerr("pii_process: Unknown status %d\n", err);
2437c478bd9Sstevel@tonic-gate 			break;
2447c478bd9Sstevel@tonic-gate 		}
2457c478bd9Sstevel@tonic-gate 	}
2467c478bd9Sstevel@tonic-gate 
2477c478bd9Sstevel@tonic-gate 	*pii_p = pii;
2487c478bd9Sstevel@tonic-gate 	if (pii != NULL)
2497c478bd9Sstevel@tonic-gate 		return (pii->pii_in_use ? _B_TRUE : _B_FALSE);
2507c478bd9Sstevel@tonic-gate 	else
2517c478bd9Sstevel@tonic-gate 		return (_B_FALSE);
2527c478bd9Sstevel@tonic-gate }
2537c478bd9Sstevel@tonic-gate 
2547c478bd9Sstevel@tonic-gate /*
2557c478bd9Sstevel@tonic-gate  * Scan all interfaces to detect changes as well as new and deleted interfaces
2567c478bd9Sstevel@tonic-gate  */
2577c478bd9Sstevel@tonic-gate static void
initifs()2587c478bd9Sstevel@tonic-gate initifs()
2597c478bd9Sstevel@tonic-gate {
260e11c3f44Smeem 	int	i, nlifr;
2617c478bd9Sstevel@tonic-gate 	int	af;
2627c478bd9Sstevel@tonic-gate 	char	*cp;
2637c478bd9Sstevel@tonic-gate 	char	*buf;
264e11c3f44Smeem 	int	sockfd;
265e11c3f44Smeem 	uint64_t	flags;
2667c478bd9Sstevel@tonic-gate 	struct lifnum	lifn;
2677c478bd9Sstevel@tonic-gate 	struct lifconf	lifc;
268e11c3f44Smeem 	struct lifreq	lifreq;
2697c478bd9Sstevel@tonic-gate 	struct lifreq	*lifr;
2707c478bd9Sstevel@tonic-gate 	struct logint	*li;
2717c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
2727c478bd9Sstevel@tonic-gate 	struct phyint_instance *next_pii;
273e11c3f44Smeem 	struct phyint_group *pg, *next_pg;
274e11c3f44Smeem 	char		pi_name[LIFNAMSIZ + 1];
2757c478bd9Sstevel@tonic-gate 
2767c478bd9Sstevel@tonic-gate 	if (debug & D_PHYINT)
2777c478bd9Sstevel@tonic-gate 		logdebug("initifs: Scanning interfaces\n");
2787c478bd9Sstevel@tonic-gate 
2797c478bd9Sstevel@tonic-gate 	last_initifs_time = getcurrenttime();
2807c478bd9Sstevel@tonic-gate 
28187e66ffcSrk 	/*
282e11c3f44Smeem 	 * Free the existing local address list; we'll build a new list below.
28387e66ffcSrk 	 */
284e11c3f44Smeem 	addrlist_free(&localaddrs);
28587e66ffcSrk 
2867c478bd9Sstevel@tonic-gate 	/*
2877c478bd9Sstevel@tonic-gate 	 * Mark the interfaces so that we can find phyints and logints
2887c478bd9Sstevel@tonic-gate 	 * which have disappeared from the kernel. pii_process() and
2897c478bd9Sstevel@tonic-gate 	 * logint_init_from_k() will set {pii,li}_in_use when they find
2907c478bd9Sstevel@tonic-gate 	 * the interface in the kernel. Also, clear dupaddr bit on probe
2917c478bd9Sstevel@tonic-gate 	 * logint. check_addr_unique() will set the dupaddr bit on the
2927c478bd9Sstevel@tonic-gate 	 * probe logint, if the testaddress is not unique.
2937c478bd9Sstevel@tonic-gate 	 */
2947c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
2957c478bd9Sstevel@tonic-gate 		pii->pii_in_use = 0;
2967c478bd9Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = li->li_next) {
2977c478bd9Sstevel@tonic-gate 			li->li_in_use = 0;
2987c478bd9Sstevel@tonic-gate 			if (pii->pii_probe_logint == li)
2997c478bd9Sstevel@tonic-gate 				li->li_dupaddr = 0;
3007c478bd9Sstevel@tonic-gate 		}
3017c478bd9Sstevel@tonic-gate 	}
3027c478bd9Sstevel@tonic-gate 
303e11c3f44Smeem 	/*
304e11c3f44Smeem 	 * As above, mark groups so that we can detect IPMP interfaces which
305e11c3f44Smeem 	 * have been removed from the kernel.  Also, delete the group address
306e11c3f44Smeem 	 * list since we'll iteratively recreate it below.
307e11c3f44Smeem 	 */
308e11c3f44Smeem 	for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
309e11c3f44Smeem 		pg->pg_in_use = _B_FALSE;
310e11c3f44Smeem 		addrlist_free(&pg->pg_addrs);
311e11c3f44Smeem 	}
312e11c3f44Smeem 
3137c478bd9Sstevel@tonic-gate 	lifn.lifn_family = AF_UNSPEC;
314e11c3f44Smeem 	lifn.lifn_flags = LIFC_ALLZONES | LIFC_UNDER_IPMP;
315e11c3f44Smeem again:
3167c478bd9Sstevel@tonic-gate 	if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) {
317e11c3f44Smeem 		logperror("initifs: ioctl (get interface count)");
3187c478bd9Sstevel@tonic-gate 		return;
3197c478bd9Sstevel@tonic-gate 	}
320e11c3f44Smeem 	/*
321e11c3f44Smeem 	 * Pad the interface count to detect when additional interfaces have
322e11c3f44Smeem 	 * been configured between SIOCGLIFNUM and SIOCGLIFCONF.
323e11c3f44Smeem 	 */
324e11c3f44Smeem 	lifn.lifn_count += 4;
3257c478bd9Sstevel@tonic-gate 
326e11c3f44Smeem 	if ((buf = calloc(lifn.lifn_count, sizeof (struct lifreq))) == NULL) {
3277c478bd9Sstevel@tonic-gate 		logperror("initifs: calloc");
3287c478bd9Sstevel@tonic-gate 		return;
3297c478bd9Sstevel@tonic-gate 	}
3307c478bd9Sstevel@tonic-gate 
3317c478bd9Sstevel@tonic-gate 	lifc.lifc_family = AF_UNSPEC;
332e11c3f44Smeem 	lifc.lifc_flags = LIFC_ALLZONES | LIFC_UNDER_IPMP;
333e11c3f44Smeem 	lifc.lifc_len = lifn.lifn_count * sizeof (struct lifreq);
3347c478bd9Sstevel@tonic-gate 	lifc.lifc_buf = buf;
3357c478bd9Sstevel@tonic-gate 
3367c478bd9Sstevel@tonic-gate 	if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) {
337e11c3f44Smeem 		logperror("initifs: ioctl (get interface configuration)");
3387c478bd9Sstevel@tonic-gate 		free(buf);
3397c478bd9Sstevel@tonic-gate 		return;
3407c478bd9Sstevel@tonic-gate 	}
3417c478bd9Sstevel@tonic-gate 
3427c478bd9Sstevel@tonic-gate 	/*
343e11c3f44Smeem 	 * If every lifr_req slot is taken, then additional interfaces must
344e11c3f44Smeem 	 * have been plumbed between the SIOCGLIFNUM and the SIOCGLIFCONF.
345e11c3f44Smeem 	 * Recalculate to make sure we didn't miss any interfaces.
3467c478bd9Sstevel@tonic-gate 	 */
347e11c3f44Smeem 	nlifr = lifc.lifc_len / sizeof (struct lifreq);
348e11c3f44Smeem 	if (nlifr >= lifn.lifn_count) {
349e11c3f44Smeem 		free(buf);
350e11c3f44Smeem 		goto again;
351e11c3f44Smeem 	}
35287e66ffcSrk 
353e11c3f44Smeem 	/*
354e11c3f44Smeem 	 * Walk through the lifreqs returned by SIOGGLIFCONF, and refresh the
355e11c3f44Smeem 	 * global list of addresses, phyint groups, phyints, and logints.
356e11c3f44Smeem 	 */
357e11c3f44Smeem 	for (lifr = lifc.lifc_req, i = 0; i < nlifr; i++, lifr++) {
3587c478bd9Sstevel@tonic-gate 		af = lifr->lifr_addr.ss_family;
35987e66ffcSrk 		sockfd = (af == AF_INET) ? ifsock_v4 : ifsock_v6;
360e11c3f44Smeem 		(void) strlcpy(lifreq.lifr_name, lifr->lifr_name, LIFNAMSIZ);
36187e66ffcSrk 
36287e66ffcSrk 		if (ioctl(sockfd, SIOCGLIFFLAGS, &lifreq) == -1) {
36387e66ffcSrk 			if (errno != ENXIO)
36487e66ffcSrk 				logperror("initifs: ioctl (SIOCGLIFFLAGS)");
36587e66ffcSrk 			continue;
36687e66ffcSrk 		}
367e11c3f44Smeem 		flags = lifreq.lifr_flags;
368e11c3f44Smeem 
369e11c3f44Smeem 		/*
370e11c3f44Smeem 		 * If the address is IFF_UP, add it to the local address list.
371e11c3f44Smeem 		 * (We ignore addresses that aren't IFF_UP since another node
372e11c3f44Smeem 		 * might legitimately have that address IFF_UP.)
373e11c3f44Smeem 		 */
374e11c3f44Smeem 		if (flags & IFF_UP) {
375e11c3f44Smeem 			(void) addrlist_add(&localaddrs, lifr->lifr_name, flags,
376e11c3f44Smeem 			    &lifr->lifr_addr);
377e11c3f44Smeem 		}
37887e66ffcSrk 
37987e66ffcSrk 		/*
380e11c3f44Smeem 		 * If this address is on an IPMP meta-interface, update our
381e11c3f44Smeem 		 * phyint_group information (either by recording that group
382e11c3f44Smeem 		 * still exists or creating a new group), and track what
383e11c3f44Smeem 		 * group the address is part of.
38487e66ffcSrk 		 */
385e11c3f44Smeem 		if (flags & IFF_IPMP) {
386e11c3f44Smeem 			if (ioctl(sockfd, SIOCGLIFGROUPNAME, &lifreq) == -1) {
387e11c3f44Smeem 				if (errno != ENXIO)
388e11c3f44Smeem 					logperror("initifs: ioctl "
389e11c3f44Smeem 					    "(SIOCGLIFGROUPNAME)");
39087e66ffcSrk 				continue;
39187e66ffcSrk 			}
392e11c3f44Smeem 
393e11c3f44Smeem 			pg = phyint_group_lookup(lifreq.lifr_groupname);
394e11c3f44Smeem 			if (pg == NULL) {
395e11c3f44Smeem 				pg = phyint_group_create(lifreq.lifr_groupname);
396e11c3f44Smeem 				if (pg == NULL) {
397e11c3f44Smeem 					logerr("initifs: cannot create group "
398e11c3f44Smeem 					    "%s\n", lifreq.lifr_groupname);
399e11c3f44Smeem 					continue;
400e11c3f44Smeem 				}
401e11c3f44Smeem 				phyint_group_insert(pg);
402e11c3f44Smeem 			}
403e11c3f44Smeem 			pg->pg_in_use = _B_TRUE;
404e11c3f44Smeem 
405e11c3f44Smeem 			/*
406e11c3f44Smeem 			 * Add this to the group's list of data addresses.
407e11c3f44Smeem 			 */
408e11c3f44Smeem 			if (!addrlist_add(&pg->pg_addrs, lifr->lifr_name, flags,
409e11c3f44Smeem 			    &lifr->lifr_addr)) {
410e11c3f44Smeem 				logerr("initifs: insufficient memory to track "
411e11c3f44Smeem 				    "data address information for %s\n",
412e11c3f44Smeem 				    lifr->lifr_name);
41387e66ffcSrk 			}
414e11c3f44Smeem 			continue;
41587e66ffcSrk 		}
41687e66ffcSrk 
4177c478bd9Sstevel@tonic-gate 		/*
418e11c3f44Smeem 		 * This isn't an address on an IPMP meta-interface, so it's
419e11c3f44Smeem 		 * either on an underlying interface or not related to any
420e11c3f44Smeem 		 * group.  Update our phyint and logint information (via
421e11c3f44Smeem 		 * pii_process() and logint_init_from_k()) -- but first,
422e11c3f44Smeem 		 * convert the logint name to a phyint name so we can call
423e11c3f44Smeem 		 * pii_process().
4247c478bd9Sstevel@tonic-gate 		 */
42587e66ffcSrk 		(void) strlcpy(pi_name, lifr->lifr_name, sizeof (pi_name));
4267c478bd9Sstevel@tonic-gate 		if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL)
4277c478bd9Sstevel@tonic-gate 			*cp = '\0';
4287c478bd9Sstevel@tonic-gate 
429e11c3f44Smeem 		if (pii_process(af, pi_name, &pii)) {
4307c478bd9Sstevel@tonic-gate 			/* The phyint is fine. So process the logint */
4317c478bd9Sstevel@tonic-gate 			logint_init_from_k(pii, lifr->lifr_name);
43206cdd167Smeem 			check_addr_unique(pii, &lifr->lifr_addr);
4337c478bd9Sstevel@tonic-gate 		}
4347c478bd9Sstevel@tonic-gate 	}
4357c478bd9Sstevel@tonic-gate 	free(buf);
4367c478bd9Sstevel@tonic-gate 
4377c478bd9Sstevel@tonic-gate 	/*
438e11c3f44Smeem 	 * Scan for groups, phyints and logints that have disappeared from the
4397c478bd9Sstevel@tonic-gate 	 * kernel, and delete them.
4407c478bd9Sstevel@tonic-gate 	 */
441e6ed03fcSmeem 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
4427c478bd9Sstevel@tonic-gate 		next_pii = pii->pii_next;
4437c478bd9Sstevel@tonic-gate 		check_if_removed(pii);
4447c478bd9Sstevel@tonic-gate 	}
4457c478bd9Sstevel@tonic-gate 
446e11c3f44Smeem 	for (pg = phyint_groups; pg != NULL; pg = next_pg) {
447e11c3f44Smeem 		next_pg = pg->pg_next;
448e11c3f44Smeem 		if (!pg->pg_in_use) {
449e11c3f44Smeem 			phyint_group_delete(pg);
450e11c3f44Smeem 			continue;
451e11c3f44Smeem 		}
452e11c3f44Smeem 		/*
453e11c3f44Smeem 		 * Refresh the group's state.  This is necessary since the
454e11c3f44Smeem 		 * group's state is defined by the set of usable interfaces in
455e11c3f44Smeem 		 * the group, and an interface is considered unusable if all
456e11c3f44Smeem 		 * of its addresses are down.  When an address goes down/up,
457e11c3f44Smeem 		 * the RTM_DELADDR/RTM_NEWADDR brings us through here.
458e11c3f44Smeem 		 */
459e11c3f44Smeem 		phyint_group_refresh_state(pg);
460e11c3f44Smeem 	}
461e11c3f44Smeem 
4627c478bd9Sstevel@tonic-gate 	/*
4637c478bd9Sstevel@tonic-gate 	 * Select a test address for sending probes on each phyint instance
4647c478bd9Sstevel@tonic-gate 	 */
4657c478bd9Sstevel@tonic-gate 	select_test_ifs();
4667c478bd9Sstevel@tonic-gate 
4677c478bd9Sstevel@tonic-gate 	/*
468e11c3f44Smeem 	 * Handle link up/down notifications.
4697c478bd9Sstevel@tonic-gate 	 */
4707c478bd9Sstevel@tonic-gate 	process_link_state_changes();
4717c478bd9Sstevel@tonic-gate }
4727c478bd9Sstevel@tonic-gate 
4737c478bd9Sstevel@tonic-gate /*
47406cdd167Smeem  * Check that a given test address is unique across all of the interfaces in a
47506cdd167Smeem  * group.  (e.g., IPv6 link-locals may not be inherently unique, and binding
47606cdd167Smeem  * to such an (IFF_NOFAILOVER) address can produce unexpected results.)
477e6ed03fcSmeem  * Any issues will be reported by check_testconfig().
4787c478bd9Sstevel@tonic-gate  */
4797c478bd9Sstevel@tonic-gate static void
check_addr_unique(struct phyint_instance * ourpii,struct sockaddr_storage * ss)48006cdd167Smeem check_addr_unique(struct phyint_instance *ourpii, struct sockaddr_storage *ss)
4817c478bd9Sstevel@tonic-gate {
48206cdd167Smeem 	struct phyint		*pi;
48306cdd167Smeem 	struct phyint_group	*pg;
48406cdd167Smeem 	struct in6_addr		addr;
4857c478bd9Sstevel@tonic-gate 	struct phyint_instance	*pii;
4867c478bd9Sstevel@tonic-gate 	struct sockaddr_in	*sin;
4877c478bd9Sstevel@tonic-gate 
48806cdd167Smeem 	if (ss->ss_family == AF_INET) {
48906cdd167Smeem 		sin = (struct sockaddr_in *)ss;
4907c478bd9Sstevel@tonic-gate 		IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &addr);
4917c478bd9Sstevel@tonic-gate 	} else {
49206cdd167Smeem 		assert(ss->ss_family == AF_INET6);
49306cdd167Smeem 		addr = ((struct sockaddr_in6 *)ss)->sin6_addr;
4947c478bd9Sstevel@tonic-gate 	}
4957c478bd9Sstevel@tonic-gate 
4967c478bd9Sstevel@tonic-gate 	/*
49706cdd167Smeem 	 * For anonymous groups, every interface is assumed to be on its own
49806cdd167Smeem 	 * link, so there is no chance of overlapping addresses.
4997c478bd9Sstevel@tonic-gate 	 */
50006cdd167Smeem 	pg = ourpii->pii_phyint->pi_group;
50106cdd167Smeem 	if (pg == phyint_anongroup)
50206cdd167Smeem 		return;
50306cdd167Smeem 
50406cdd167Smeem 	/*
50506cdd167Smeem 	 * Walk the list of phyint instances in the group and check for test
50606cdd167Smeem 	 * addresses matching ours.  Of course, we skip ourself.
50706cdd167Smeem 	 */
50806cdd167Smeem 	for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
50906cdd167Smeem 		pii = PHYINT_INSTANCE(pi, ss->ss_family);
51006cdd167Smeem 		if (pii == NULL || pii == ourpii ||
51106cdd167Smeem 		    pii->pii_probe_logint == NULL)
5127c478bd9Sstevel@tonic-gate 			continue;
5137c478bd9Sstevel@tonic-gate 
5147c478bd9Sstevel@tonic-gate 		/*
515e6ed03fcSmeem 		 * If this test address is not unique, set the dupaddr bit.
5167c478bd9Sstevel@tonic-gate 		 */
517e6ed03fcSmeem 		if (IN6_ARE_ADDR_EQUAL(&addr, &pii->pii_probe_logint->li_addr))
518e6ed03fcSmeem 			pii->pii_probe_logint->li_dupaddr = 1;
5197c478bd9Sstevel@tonic-gate 	}
5207c478bd9Sstevel@tonic-gate }
5217c478bd9Sstevel@tonic-gate 
5227c478bd9Sstevel@tonic-gate /*
5237c478bd9Sstevel@tonic-gate  * Stop probing an interface.  Called when an interface is offlined.
5247c478bd9Sstevel@tonic-gate  * The probe socket is closed on each interface instance, and the
5257c478bd9Sstevel@tonic-gate  * interface state set to PI_OFFLINE.
5267c478bd9Sstevel@tonic-gate  */
527e11c3f44Smeem void
stop_probing(struct phyint * pi)5287c478bd9Sstevel@tonic-gate stop_probing(struct phyint *pi)
5297c478bd9Sstevel@tonic-gate {
5307c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
5317c478bd9Sstevel@tonic-gate 
5327c478bd9Sstevel@tonic-gate 	pii = pi->pi_v4;
5337c478bd9Sstevel@tonic-gate 	if (pii != NULL) {
5347c478bd9Sstevel@tonic-gate 		if (pii->pii_probe_sock != -1)
5357c478bd9Sstevel@tonic-gate 			close_probe_socket(pii, _B_TRUE);
5367c478bd9Sstevel@tonic-gate 		pii->pii_probe_logint = NULL;
5377c478bd9Sstevel@tonic-gate 	}
5387c478bd9Sstevel@tonic-gate 
5397c478bd9Sstevel@tonic-gate 	pii = pi->pi_v6;
5407c478bd9Sstevel@tonic-gate 	if (pii != NULL) {
5417c478bd9Sstevel@tonic-gate 		if (pii->pii_probe_sock != -1)
5427c478bd9Sstevel@tonic-gate 			close_probe_socket(pii, _B_TRUE);
5437c478bd9Sstevel@tonic-gate 		pii->pii_probe_logint = NULL;
5447c478bd9Sstevel@tonic-gate 	}
5457c478bd9Sstevel@tonic-gate 
5467c478bd9Sstevel@tonic-gate 	phyint_chstate(pi, PI_OFFLINE);
5477c478bd9Sstevel@tonic-gate }
5487c478bd9Sstevel@tonic-gate 
549921e7e07Smeem enum { BAD_TESTFLAGS, OK_TESTFLAGS, BEST_TESTFLAGS };
550921e7e07Smeem 
5517c478bd9Sstevel@tonic-gate /*
552921e7e07Smeem  * Rate the provided test flags.  By definition, IFF_NOFAILOVER must be set.
553921e7e07Smeem  * IFF_UP must also be set so that the associated address can be used as a
554921e7e07Smeem  * source address.  Further, we must be able to exchange packets with local
555921e7e07Smeem  * destinations, so IFF_NOXMIT and IFF_NOLOCAL must be clear.  For historical
556921e7e07Smeem  * reasons, we have a proclivity for IFF_DEPRECATED IPv4 test addresses.
557921e7e07Smeem  */
558921e7e07Smeem static int
rate_testflags(uint64_t flags)559921e7e07Smeem rate_testflags(uint64_t flags)
560921e7e07Smeem {
561921e7e07Smeem 	if ((flags & (IFF_NOFAILOVER | IFF_UP)) != (IFF_NOFAILOVER | IFF_UP))
562921e7e07Smeem 		return (BAD_TESTFLAGS);
563921e7e07Smeem 
564921e7e07Smeem 	if ((flags & (IFF_NOXMIT | IFF_NOLOCAL)) != 0)
565921e7e07Smeem 		return (BAD_TESTFLAGS);
566921e7e07Smeem 
567921e7e07Smeem 	if ((flags & (IFF_IPV6 | IFF_DEPRECATED)) == IFF_DEPRECATED)
568921e7e07Smeem 		return (BEST_TESTFLAGS);
569921e7e07Smeem 
570921e7e07Smeem 	if ((flags & (IFF_IPV6 | IFF_DEPRECATED)) == IFF_IPV6)
571921e7e07Smeem 		return (BEST_TESTFLAGS);
572921e7e07Smeem 
573921e7e07Smeem 	return (OK_TESTFLAGS);
574921e7e07Smeem }
575921e7e07Smeem 
576921e7e07Smeem /*
577921e7e07Smeem  * Attempt to select a test address for each phyint instance.
578921e7e07Smeem  * Call phyint_inst_sockinit() to complete the initializations.
5797c478bd9Sstevel@tonic-gate  */
5807c478bd9Sstevel@tonic-gate static void
select_test_ifs(void)5817c478bd9Sstevel@tonic-gate select_test_ifs(void)
5827c478bd9Sstevel@tonic-gate {
5837c478bd9Sstevel@tonic-gate 	struct phyint		*pi;
5847c478bd9Sstevel@tonic-gate 	struct phyint_instance	*pii;
5857c478bd9Sstevel@tonic-gate 	struct phyint_instance	*next_pii;
586921e7e07Smeem 	struct logint		*li;
58780d556f9SHans Rosenfeld 	struct logint		*probe_logint;
588921e7e07Smeem 	boolean_t		target_scan_reqd = _B_FALSE;
589921e7e07Smeem 	int			rating;
5907c478bd9Sstevel@tonic-gate 
5917c478bd9Sstevel@tonic-gate 	if (debug & D_PHYINT)
5927c478bd9Sstevel@tonic-gate 		logdebug("select_test_ifs\n");
5937c478bd9Sstevel@tonic-gate 
5947c478bd9Sstevel@tonic-gate 	/*
5957c478bd9Sstevel@tonic-gate 	 * For each phyint instance, do the test address selection
5967c478bd9Sstevel@tonic-gate 	 */
5977c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
5987c478bd9Sstevel@tonic-gate 		next_pii = pii->pii_next;
599921e7e07Smeem 		probe_logint = NULL;
600921e7e07Smeem 
6017c478bd9Sstevel@tonic-gate 		/*
602e11c3f44Smeem 		 * An interface that is offline should not be probed.
603e11c3f44Smeem 		 * IFF_OFFLINE interfaces should always be PI_OFFLINE
6047c478bd9Sstevel@tonic-gate 		 * unless some other entity has set the offline flag.
6057c478bd9Sstevel@tonic-gate 		 */
6067c478bd9Sstevel@tonic-gate 		if (pii->pii_phyint->pi_flags & IFF_OFFLINE) {
6077c478bd9Sstevel@tonic-gate 			if (pii->pii_phyint->pi_state != PI_OFFLINE) {
6087c478bd9Sstevel@tonic-gate 				logerr("shouldn't be probing offline"
609e6ed03fcSmeem 				    " interface %s (state is: %u)."
610e6ed03fcSmeem 				    " Stopping probes.\n",
611e6ed03fcSmeem 				    pii->pii_phyint->pi_name,
612e6ed03fcSmeem 				    pii->pii_phyint->pi_state);
6137c478bd9Sstevel@tonic-gate 				stop_probing(pii->pii_phyint);
6147c478bd9Sstevel@tonic-gate 			}
6157c478bd9Sstevel@tonic-gate 			continue;
616e11c3f44Smeem 		} else {
617e11c3f44Smeem 			/*
618e11c3f44Smeem 			 * If something cleared IFF_OFFLINE (e.g., by accident
619e11c3f44Smeem 			 * because the SIOCGLIFFLAGS/SIOCSLIFFLAGS sequence is
620e11c3f44Smeem 			 * inherently racy), the phyint may still be offline.
621e11c3f44Smeem 			 * Just ignore it.
622e11c3f44Smeem 			 */
623e11c3f44Smeem 			if (pii->pii_phyint->pi_state == PI_OFFLINE)
624e11c3f44Smeem 				continue;
6257c478bd9Sstevel@tonic-gate 		}
6267c478bd9Sstevel@tonic-gate 
627921e7e07Smeem 		li = pii->pii_probe_logint;
628921e7e07Smeem 		if (li != NULL) {
6297c478bd9Sstevel@tonic-gate 			/*
630921e7e07Smeem 			 * We've already got a test address; only proceed
631921e7e07Smeem 			 * if it's suboptimal.
6327c478bd9Sstevel@tonic-gate 			 */
633921e7e07Smeem 			if (rate_testflags(li->li_flags) == BEST_TESTFLAGS)
634921e7e07Smeem 				continue;
6357c478bd9Sstevel@tonic-gate 		}
6367c478bd9Sstevel@tonic-gate 
6377c478bd9Sstevel@tonic-gate 		/*
6387c478bd9Sstevel@tonic-gate 		 * Walk the logints of this phyint instance, and select
6397c478bd9Sstevel@tonic-gate 		 * the best available test address
6407c478bd9Sstevel@tonic-gate 		 */
6417c478bd9Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = li->li_next) {
64206cdd167Smeem 			/*
64306cdd167Smeem 			 * Skip 0.0.0.0 addresses, as those are never
64406cdd167Smeem 			 * actually usable.
64506cdd167Smeem 			 */
64606cdd167Smeem 			if (pii->pii_af == AF_INET &&
64706cdd167Smeem 			    IN6_IS_ADDR_V4MAPPED_ANY(&li->li_addr))
64806cdd167Smeem 				continue;
64906cdd167Smeem 
6507c478bd9Sstevel@tonic-gate 			/*
6517c478bd9Sstevel@tonic-gate 			 * Skip any IPv6 logints that are not link-local,
6527c478bd9Sstevel@tonic-gate 			 * since we should always have a link-local address
6537c478bd9Sstevel@tonic-gate 			 * anyway and in6_data() expects link-local replies.
6547c478bd9Sstevel@tonic-gate 			 */
6557c478bd9Sstevel@tonic-gate 			if (pii->pii_af == AF_INET6 &&
6567c478bd9Sstevel@tonic-gate 			    !IN6_IS_ADDR_LINKLOCAL(&li->li_addr))
6577c478bd9Sstevel@tonic-gate 				continue;
6587c478bd9Sstevel@tonic-gate 
659921e7e07Smeem 			/*
660921e7e07Smeem 			 * Rate the testflags. If we've found an optimal
661921e7e07Smeem 			 * match, then break out; otherwise, record the most
662921e7e07Smeem 			 * recent OK one.
663921e7e07Smeem 			 */
664921e7e07Smeem 			rating = rate_testflags(li->li_flags);
665921e7e07Smeem 			if (rating == BAD_TESTFLAGS)
666921e7e07Smeem 				continue;
667921e7e07Smeem 
668921e7e07Smeem 			probe_logint = li;
669921e7e07Smeem 			if (rating == BEST_TESTFLAGS)
670921e7e07Smeem 				break;
6717c478bd9Sstevel@tonic-gate 		}
6727c478bd9Sstevel@tonic-gate 
6737c478bd9Sstevel@tonic-gate 		/*
674921e7e07Smeem 		 * If the probe logint has changed, ditch the old one.
6757c478bd9Sstevel@tonic-gate 		 */
676921e7e07Smeem 		if (pii->pii_probe_logint != NULL &&
677921e7e07Smeem 		    pii->pii_probe_logint != probe_logint) {
6787c478bd9Sstevel@tonic-gate 			if (pii->pii_probe_sock != -1)
6797c478bd9Sstevel@tonic-gate 				close_probe_socket(pii, _B_TRUE);
6807c478bd9Sstevel@tonic-gate 			pii->pii_probe_logint = NULL;
6817c478bd9Sstevel@tonic-gate 		}
6827c478bd9Sstevel@tonic-gate 
683921e7e07Smeem 		if (probe_logint == NULL) {
6847c478bd9Sstevel@tonic-gate 			/*
685e6ed03fcSmeem 			 * We don't have a test address; zero out the probe
686e6ed03fcSmeem 			 * stats array since it is no longer relevant.
687e6ed03fcSmeem 			 * Optimize by checking if it is already zeroed out.
6887c478bd9Sstevel@tonic-gate 			 */
6897c478bd9Sstevel@tonic-gate 			int pr_ndx;
6907c478bd9Sstevel@tonic-gate 
6917c478bd9Sstevel@tonic-gate 			pr_ndx = PROBE_INDEX_PREV(pii->pii_probe_next);
6927c478bd9Sstevel@tonic-gate 			if (pii->pii_probes[pr_ndx].pr_status != PR_UNUSED) {
6937c478bd9Sstevel@tonic-gate 				clear_pii_probe_stats(pii);
6947c478bd9Sstevel@tonic-gate 				reset_crtt_all(pii->pii_phyint);
6957c478bd9Sstevel@tonic-gate 			}
6967c478bd9Sstevel@tonic-gate 			continue;
697921e7e07Smeem 		} else if (probe_logint == pii->pii_probe_logint) {
6987c478bd9Sstevel@tonic-gate 			/*
6997c478bd9Sstevel@tonic-gate 			 * If we didn't find any new test addr, go to the
7007c478bd9Sstevel@tonic-gate 			 * next phyint.
7017c478bd9Sstevel@tonic-gate 			 */
7027c478bd9Sstevel@tonic-gate 			continue;
7037c478bd9Sstevel@tonic-gate 		}
7047c478bd9Sstevel@tonic-gate 
7057c478bd9Sstevel@tonic-gate 		/*
7067c478bd9Sstevel@tonic-gate 		 * The phyint is either being assigned a new testaddr
7077c478bd9Sstevel@tonic-gate 		 * or is being assigned a testaddr for the 1st time.
7087c478bd9Sstevel@tonic-gate 		 * Need to initialize the phyint socket
7097c478bd9Sstevel@tonic-gate 		 */
710921e7e07Smeem 		pii->pii_probe_logint = probe_logint;
7117c478bd9Sstevel@tonic-gate 		if (!phyint_inst_sockinit(pii)) {
7127c478bd9Sstevel@tonic-gate 			if (debug & D_PHYINT) {
7137c478bd9Sstevel@tonic-gate 				logdebug("select_test_ifs: "
7147c478bd9Sstevel@tonic-gate 				    "phyint_sockinit failed\n");
7157c478bd9Sstevel@tonic-gate 			}
7167c478bd9Sstevel@tonic-gate 			phyint_inst_delete(pii);
7177c478bd9Sstevel@tonic-gate 			continue;
7187c478bd9Sstevel@tonic-gate 		}
7197c478bd9Sstevel@tonic-gate 
7207c478bd9Sstevel@tonic-gate 		/*
7217c478bd9Sstevel@tonic-gate 		 * This phyint instance is now enabled for probes; this
7227c478bd9Sstevel@tonic-gate 		 * impacts our state machine in two ways:
7237c478bd9Sstevel@tonic-gate 		 *
7247c478bd9Sstevel@tonic-gate 		 * 1. If we're probe *capable* as well (i.e., we have
7257c478bd9Sstevel@tonic-gate 		 *    probe targets) and the interface is in PI_NOTARGETS,
7267c478bd9Sstevel@tonic-gate 		 *    then transition to PI_RUNNING.
7277c478bd9Sstevel@tonic-gate 		 *
7287c478bd9Sstevel@tonic-gate 		 * 2. If we're not probe capable, and the other phyint
7297c478bd9Sstevel@tonic-gate 		 *    instance is also not probe capable, and we were in
7307c478bd9Sstevel@tonic-gate 		 *    PI_RUNNING, then transition to PI_NOTARGETS.
7317c478bd9Sstevel@tonic-gate 		 *
7327c478bd9Sstevel@tonic-gate 		 * Also see the state diagram in mpd_probe.c.
7337c478bd9Sstevel@tonic-gate 		 */
7347c478bd9Sstevel@tonic-gate 		if (PROBE_CAPABLE(pii)) {
7357c478bd9Sstevel@tonic-gate 			if (pii->pii_phyint->pi_state == PI_NOTARGETS)
7367c478bd9Sstevel@tonic-gate 				phyint_chstate(pii->pii_phyint, PI_RUNNING);
7377c478bd9Sstevel@tonic-gate 		} else if (!PROBE_CAPABLE(phyint_inst_other(pii))) {
7387c478bd9Sstevel@tonic-gate 			if (pii->pii_phyint->pi_state == PI_RUNNING)
7397c478bd9Sstevel@tonic-gate 				phyint_chstate(pii->pii_phyint, PI_NOTARGETS);
7407c478bd9Sstevel@tonic-gate 		}
7417c478bd9Sstevel@tonic-gate 
7427c478bd9Sstevel@tonic-gate 		/*
7437c478bd9Sstevel@tonic-gate 		 * If no targets are currently known for this phyint
7447c478bd9Sstevel@tonic-gate 		 * we need to call init_router_targets. Since
7457c478bd9Sstevel@tonic-gate 		 * init_router_targets() initializes the list of targets
7467c478bd9Sstevel@tonic-gate 		 * for all phyints it is done below the loop.
7477c478bd9Sstevel@tonic-gate 		 */
7487c478bd9Sstevel@tonic-gate 		if (pii->pii_targets == NULL)
7497c478bd9Sstevel@tonic-gate 			target_scan_reqd = _B_TRUE;
7507c478bd9Sstevel@tonic-gate 
7517c478bd9Sstevel@tonic-gate 		/*
7527c478bd9Sstevel@tonic-gate 		 * Start the probe timer for this instance.
7537c478bd9Sstevel@tonic-gate 		 */
75406cdd167Smeem 		if (!pii->pii_basetime_inited && PROBE_ENABLED(pii)) {
7557c478bd9Sstevel@tonic-gate 			start_timer(pii);
7567c478bd9Sstevel@tonic-gate 			pii->pii_basetime_inited = 1;
7577c478bd9Sstevel@tonic-gate 		}
7587c478bd9Sstevel@tonic-gate 	}
7597c478bd9Sstevel@tonic-gate 
7607c478bd9Sstevel@tonic-gate 	/*
761e11c3f44Smeem 	 * Scan the interface list for any interfaces that are PI_FAILED or
762e11c3f44Smeem 	 * PI_NOTARGETS but no longer enabled to send probes, and call
763e11c3f44Smeem 	 * phyint_check_for_repair() to see if the link state indicates that
764e11c3f44Smeem 	 * the interface should be repaired.  Also see the state diagram in
7657c478bd9Sstevel@tonic-gate 	 * mpd_probe.c.
7667c478bd9Sstevel@tonic-gate 	 */
7677c478bd9Sstevel@tonic-gate 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
768e11c3f44Smeem 		if ((!PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) &&
769e11c3f44Smeem 		    (pi->pi_state == PI_FAILED ||
770e11c3f44Smeem 		    pi->pi_state == PI_NOTARGETS)) {
7717c478bd9Sstevel@tonic-gate 			phyint_check_for_repair(pi);
7727c478bd9Sstevel@tonic-gate 		}
7737c478bd9Sstevel@tonic-gate 	}
7747c478bd9Sstevel@tonic-gate 
775e6ed03fcSmeem 	check_testconfig();
776e6ed03fcSmeem 
7777c478bd9Sstevel@tonic-gate 	/*
7787c478bd9Sstevel@tonic-gate 	 * Try to populate the target list. init_router_targets populates
7797c478bd9Sstevel@tonic-gate 	 * the target list from the routing table. If our target list is
7807c478bd9Sstevel@tonic-gate 	 * still empty, init_host_targets adds host targets based on the
7817c478bd9Sstevel@tonic-gate 	 * host target list of other phyints in the group.
7827c478bd9Sstevel@tonic-gate 	 */
7837c478bd9Sstevel@tonic-gate 	if (target_scan_reqd) {
7847c478bd9Sstevel@tonic-gate 		init_router_targets();
7857c478bd9Sstevel@tonic-gate 		init_host_targets();
7867c478bd9Sstevel@tonic-gate 	}
7877c478bd9Sstevel@tonic-gate }
7887c478bd9Sstevel@tonic-gate 
789e6ed03fcSmeem /*
790e3e7cd29Smeem  * Check test address configuration, and log notices/errors if appropriate.
791e3e7cd29Smeem  * Note that this function only logs pre-existing conditions (e.g., that
792e3e7cd29Smeem  * probe-based failure detection is disabled).
793e6ed03fcSmeem  */
794e6ed03fcSmeem static void
check_testconfig(void)795e6ed03fcSmeem check_testconfig(void)
796e6ed03fcSmeem {
797e6ed03fcSmeem 	struct phyint	*pi;
79880d556f9SHans Rosenfeld 	struct logint	*li;
799e6ed03fcSmeem 	char		abuf[INET6_ADDRSTRLEN];
800e3e7cd29Smeem 	int		pri;
801e6ed03fcSmeem 
802e6ed03fcSmeem 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
803e6ed03fcSmeem 		if (pi->pi_flags & IFF_OFFLINE)
804e6ed03fcSmeem 			continue;
805e6ed03fcSmeem 
806e6ed03fcSmeem 		if (PROBE_ENABLED(pi->pi_v4) || PROBE_ENABLED(pi->pi_v6)) {
807e6ed03fcSmeem 			if (pi->pi_taddrmsg_printed ||
808e6ed03fcSmeem 			    pi->pi_duptaddrmsg_printed) {
809e3e7cd29Smeem 				if (pi->pi_duptaddrmsg_printed)
810e3e7cd29Smeem 					pri = LOG_ERR;
811e3e7cd29Smeem 				else
812e3e7cd29Smeem 					pri = LOG_INFO;
813e3e7cd29Smeem 				logmsg(pri, "Test address now configured on "
814e6ed03fcSmeem 				    "interface %s; enabling probe-based "
815e6ed03fcSmeem 				    "failure detection on it\n", pi->pi_name);
816e6ed03fcSmeem 				pi->pi_taddrmsg_printed = 0;
817e6ed03fcSmeem 				pi->pi_duptaddrmsg_printed = 0;
818e6ed03fcSmeem 			}
819e6ed03fcSmeem 			continue;
820e6ed03fcSmeem 		}
821e6ed03fcSmeem 
822e6ed03fcSmeem 		li = NULL;
823e6ed03fcSmeem 		if (pi->pi_v4 != NULL && pi->pi_v4->pii_probe_logint != NULL &&
824e6ed03fcSmeem 		    pi->pi_v4->pii_probe_logint->li_dupaddr)
825e6ed03fcSmeem 			li = pi->pi_v4->pii_probe_logint;
826e6ed03fcSmeem 
827e6ed03fcSmeem 		if (pi->pi_v6 != NULL && pi->pi_v6->pii_probe_logint != NULL &&
828e6ed03fcSmeem 		    pi->pi_v6->pii_probe_logint->li_dupaddr)
829e6ed03fcSmeem 			li = pi->pi_v6->pii_probe_logint;
830e6ed03fcSmeem 
831e11c3f44Smeem 		if (li != NULL && li->li_dupaddr) {
832e11c3f44Smeem 			if (pi->pi_duptaddrmsg_printed)
833e11c3f44Smeem 				continue;
834e11c3f44Smeem 			logerr("Test address %s is not unique in group; "
835e11c3f44Smeem 			    "disabling probe-based failure detection on %s\n",
836e11c3f44Smeem 			    pr_addr(li->li_phyint_inst->pii_af,
837e11c3f44Smeem 			    li->li_addr, abuf, sizeof (abuf)), pi->pi_name);
838e11c3f44Smeem 			pi->pi_duptaddrmsg_printed = 1;
839e6ed03fcSmeem 			continue;
840e6ed03fcSmeem 		}
841e6ed03fcSmeem 
842e6ed03fcSmeem 		if (getcurrentsec() < pi->pi_taddrthresh)
843e6ed03fcSmeem 			continue;
844e6ed03fcSmeem 
845e6ed03fcSmeem 		if (!pi->pi_taddrmsg_printed) {
846e3e7cd29Smeem 			logtrace("No test address configured on interface %s; "
847e6ed03fcSmeem 			    "disabling probe-based failure detection on it\n",
848e6ed03fcSmeem 			    pi->pi_name);
849e6ed03fcSmeem 			pi->pi_taddrmsg_printed = 1;
850e6ed03fcSmeem 		}
851e6ed03fcSmeem 	}
852e6ed03fcSmeem }
853e6ed03fcSmeem 
8547c478bd9Sstevel@tonic-gate /*
8557c478bd9Sstevel@tonic-gate  * Check phyint group configuration, to detect any inconsistencies,
8567c478bd9Sstevel@tonic-gate  * and log an error message. This is called from runtimeouts every
8577c478bd9Sstevel@tonic-gate  * 20 secs. But the error message is displayed once. If the
8587c478bd9Sstevel@tonic-gate  * consistency is resolved by the admin, a recovery message is displayed
8597c478bd9Sstevel@tonic-gate  * once.
8607c478bd9Sstevel@tonic-gate  */
8617c478bd9Sstevel@tonic-gate static void
check_config(void)8627c478bd9Sstevel@tonic-gate check_config(void)
8637c478bd9Sstevel@tonic-gate {
8647c478bd9Sstevel@tonic-gate 	struct phyint_group *pg;
8657c478bd9Sstevel@tonic-gate 	struct phyint *pi;
8667c478bd9Sstevel@tonic-gate 	boolean_t v4_in_group;
8677c478bd9Sstevel@tonic-gate 	boolean_t v6_in_group;
8687c478bd9Sstevel@tonic-gate 
8697c478bd9Sstevel@tonic-gate 	/*
870e11c3f44Smeem 	 * All phyints of a group must be homogeneous to ensure that they can
871e11c3f44Smeem 	 * take over for one another.  If any phyint in a group has IPv4
872e11c3f44Smeem 	 * plumbed, check that all phyints have IPv4 plumbed.  Do a similar
873e11c3f44Smeem 	 * check for IPv6.
8747c478bd9Sstevel@tonic-gate 	 */
8757c478bd9Sstevel@tonic-gate 	for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
8767c478bd9Sstevel@tonic-gate 		if (pg == phyint_anongroup)
8777c478bd9Sstevel@tonic-gate 			continue;
8787c478bd9Sstevel@tonic-gate 
8797c478bd9Sstevel@tonic-gate 		v4_in_group = _B_FALSE;
8807c478bd9Sstevel@tonic-gate 		v6_in_group = _B_FALSE;
8817c478bd9Sstevel@tonic-gate 		/*
8827c478bd9Sstevel@tonic-gate 		 * 1st pass. Determine if at least 1 phyint in the group
8837c478bd9Sstevel@tonic-gate 		 * has IPv4 plumbed and if so set v4_in_group to true.
8847c478bd9Sstevel@tonic-gate 		 * Repeat similarly for IPv6.
8857c478bd9Sstevel@tonic-gate 		 */
8867c478bd9Sstevel@tonic-gate 		for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
8877c478bd9Sstevel@tonic-gate 			if (pi->pi_v4 != NULL)
8887c478bd9Sstevel@tonic-gate 				v4_in_group = _B_TRUE;
8897c478bd9Sstevel@tonic-gate 			if (pi->pi_v6 != NULL)
8907c478bd9Sstevel@tonic-gate 				v6_in_group = _B_TRUE;
8917c478bd9Sstevel@tonic-gate 		}
8927c478bd9Sstevel@tonic-gate 
8937c478bd9Sstevel@tonic-gate 		/*
8947c478bd9Sstevel@tonic-gate 		 * 2nd pass. If v4_in_group is true, check that phyint
8957c478bd9Sstevel@tonic-gate 		 * has IPv4 plumbed. Repeat similarly for IPv6. Print
8967c478bd9Sstevel@tonic-gate 		 * out a message the 1st time only.
8977c478bd9Sstevel@tonic-gate 		 */
8987c478bd9Sstevel@tonic-gate 		for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
8997c478bd9Sstevel@tonic-gate 			if (pi->pi_flags & IFF_OFFLINE)
9007c478bd9Sstevel@tonic-gate 				continue;
9017c478bd9Sstevel@tonic-gate 
9027c478bd9Sstevel@tonic-gate 			if (v4_in_group == _B_TRUE && pi->pi_v4 == NULL) {
9037c478bd9Sstevel@tonic-gate 				if (!pi->pi_cfgmsg_printed) {
904e11c3f44Smeem 					logerr("IP interface %s in group %s is"
905e11c3f44Smeem 					    " not plumbed for IPv4, affecting"
906e11c3f44Smeem 					    " IPv4 connectivity\n",
9077c478bd9Sstevel@tonic-gate 					    pi->pi_name,
9087c478bd9Sstevel@tonic-gate 					    pi->pi_group->pg_name);
9097c478bd9Sstevel@tonic-gate 					pi->pi_cfgmsg_printed = 1;
9107c478bd9Sstevel@tonic-gate 				}
9117c478bd9Sstevel@tonic-gate 			} else if (v6_in_group == _B_TRUE &&
9127c478bd9Sstevel@tonic-gate 			    pi->pi_v6 == NULL) {
9137c478bd9Sstevel@tonic-gate 				if (!pi->pi_cfgmsg_printed) {
914e11c3f44Smeem 					logerr("IP interface %s in group %s is"
915e11c3f44Smeem 					    " not plumbed for IPv6, affecting"
916e11c3f44Smeem 					    " IPv6 connectivity\n",
9177c478bd9Sstevel@tonic-gate 					    pi->pi_name,
9187c478bd9Sstevel@tonic-gate 					    pi->pi_group->pg_name);
9197c478bd9Sstevel@tonic-gate 					pi->pi_cfgmsg_printed = 1;
9207c478bd9Sstevel@tonic-gate 				}
9217c478bd9Sstevel@tonic-gate 			} else {
9227c478bd9Sstevel@tonic-gate 				/*
9237c478bd9Sstevel@tonic-gate 				 * The phyint matches the group configuration,
9247c478bd9Sstevel@tonic-gate 				 * if we have reached this point. If it was
9257c478bd9Sstevel@tonic-gate 				 * improperly configured earlier, log an
9267c478bd9Sstevel@tonic-gate 				 * error recovery message
9277c478bd9Sstevel@tonic-gate 				 */
9287c478bd9Sstevel@tonic-gate 				if (pi->pi_cfgmsg_printed) {
929e11c3f44Smeem 					logerr("IP interface %s is now"
930e11c3f44Smeem 					    " consistent with group %s "
931e11c3f44Smeem 					    " and connectivity is restored\n",
932e11c3f44Smeem 					    pi->pi_name, pi->pi_group->pg_name);
9337c478bd9Sstevel@tonic-gate 					pi->pi_cfgmsg_printed = 0;
9347c478bd9Sstevel@tonic-gate 				}
9357c478bd9Sstevel@tonic-gate 			}
9367c478bd9Sstevel@tonic-gate 
9377c478bd9Sstevel@tonic-gate 		}
9387c478bd9Sstevel@tonic-gate 	}
9397c478bd9Sstevel@tonic-gate }
9407c478bd9Sstevel@tonic-gate 
9417c478bd9Sstevel@tonic-gate /*
9427c478bd9Sstevel@tonic-gate  * Timer mechanism using relative time (in milliseconds) from the
9437c478bd9Sstevel@tonic-gate  * previous timer event. Timers exceeding TIMER_INFINITY milliseconds
9447c478bd9Sstevel@tonic-gate  * will fire after TIMER_INFINITY milliseconds.
9457c478bd9Sstevel@tonic-gate  * Unsigned arithmetic note: We assume a 32-bit circular sequence space for
9467c478bd9Sstevel@tonic-gate  * time values. Hence 2 consecutive timer events cannot be spaced farther
9477c478bd9Sstevel@tonic-gate  * than 0x7fffffff. We call this TIMER_INFINITY, and it is the maximum value
9487c478bd9Sstevel@tonic-gate  * that can be passed for the delay parameter of timer_schedule()
9497c478bd9Sstevel@tonic-gate  */
9507c478bd9Sstevel@tonic-gate static uint_t timer_next;	/* Currently scheduled timeout */
9517c478bd9Sstevel@tonic-gate static boolean_t timer_active = _B_FALSE; /* SIGALRM has not yet occurred */
9527c478bd9Sstevel@tonic-gate 
9537c478bd9Sstevel@tonic-gate static void
timer_init(void)9547c478bd9Sstevel@tonic-gate timer_init(void)
9557c478bd9Sstevel@tonic-gate {
9567c478bd9Sstevel@tonic-gate 	timer_next = getcurrenttime() + TIMER_INFINITY;
9577c478bd9Sstevel@tonic-gate 	/*
9587c478bd9Sstevel@tonic-gate 	 * The call to run_timeouts() will get the timer started
9597c478bd9Sstevel@tonic-gate 	 * Since there are no phyints at this point, the timer will
9607c478bd9Sstevel@tonic-gate 	 * be set for IF_SCAN_INTERVAL ms.
9617c478bd9Sstevel@tonic-gate 	 */
9627c478bd9Sstevel@tonic-gate 	run_timeouts();
9637c478bd9Sstevel@tonic-gate }
9647c478bd9Sstevel@tonic-gate 
9657c478bd9Sstevel@tonic-gate /*
9667c478bd9Sstevel@tonic-gate  * Make sure the next SIGALRM occurs delay milliseconds from the current
9677c478bd9Sstevel@tonic-gate  * time if not earlier. We are interested only in time differences.
9687c478bd9Sstevel@tonic-gate  */
9697c478bd9Sstevel@tonic-gate void
timer_schedule(uint_t delay)9707c478bd9Sstevel@tonic-gate timer_schedule(uint_t delay)
9717c478bd9Sstevel@tonic-gate {
9727c478bd9Sstevel@tonic-gate 	uint_t now;
9737c478bd9Sstevel@tonic-gate 	struct itimerval itimerval;
9747c478bd9Sstevel@tonic-gate 
9757c478bd9Sstevel@tonic-gate 	if (debug & D_TIMER)
9767c478bd9Sstevel@tonic-gate 		logdebug("timer_schedule(%u)\n", delay);
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate 	assert(delay <= TIMER_INFINITY);
9797c478bd9Sstevel@tonic-gate 
9807c478bd9Sstevel@tonic-gate 	now = getcurrenttime();
9817c478bd9Sstevel@tonic-gate 	if (delay == 0) {
9827c478bd9Sstevel@tonic-gate 		/* Minimum allowed delay */
9837c478bd9Sstevel@tonic-gate 		delay = 1;
9847c478bd9Sstevel@tonic-gate 	}
9857c478bd9Sstevel@tonic-gate 	/* Will this timer occur before the currently scheduled SIGALRM? */
9867c478bd9Sstevel@tonic-gate 	if (timer_active && TIME_GE(now + delay, timer_next)) {
9877c478bd9Sstevel@tonic-gate 		if (debug & D_TIMER) {
9887c478bd9Sstevel@tonic-gate 			logdebug("timer_schedule(%u) - no action: "
9897c478bd9Sstevel@tonic-gate 			    "now %u next %u\n", delay, now, timer_next);
9907c478bd9Sstevel@tonic-gate 		}
9917c478bd9Sstevel@tonic-gate 		return;
9927c478bd9Sstevel@tonic-gate 	}
9937c478bd9Sstevel@tonic-gate 	timer_next = now + delay;
9947c478bd9Sstevel@tonic-gate 
9957c478bd9Sstevel@tonic-gate 	itimerval.it_value.tv_sec = delay / 1000;
9967c478bd9Sstevel@tonic-gate 	itimerval.it_value.tv_usec = (delay % 1000) * 1000;
9977c478bd9Sstevel@tonic-gate 	itimerval.it_interval.tv_sec = 0;
9987c478bd9Sstevel@tonic-gate 	itimerval.it_interval.tv_usec = 0;
9997c478bd9Sstevel@tonic-gate 	if (debug & D_TIMER) {
10007c478bd9Sstevel@tonic-gate 		logdebug("timer_schedule(%u): sec %ld usec %ld\n",
10017c478bd9Sstevel@tonic-gate 		    delay, itimerval.it_value.tv_sec,
10027c478bd9Sstevel@tonic-gate 		    itimerval.it_value.tv_usec);
10037c478bd9Sstevel@tonic-gate 	}
10047c478bd9Sstevel@tonic-gate 	timer_active = _B_TRUE;
10057c478bd9Sstevel@tonic-gate 	if (setitimer(ITIMER_REAL, &itimerval, NULL) < 0) {
10067c478bd9Sstevel@tonic-gate 		logperror("timer_schedule: setitimer");
10077c478bd9Sstevel@tonic-gate 		exit(2);
10087c478bd9Sstevel@tonic-gate 	}
10097c478bd9Sstevel@tonic-gate }
10107c478bd9Sstevel@tonic-gate 
1011c61f3fa8Smeem static void
timer_cancel(void)1012c61f3fa8Smeem timer_cancel(void)
1013c61f3fa8Smeem {
1014c61f3fa8Smeem 	struct itimerval itimerval;
1015c61f3fa8Smeem 
1016c61f3fa8Smeem 	if (debug & D_TIMER)
1017c61f3fa8Smeem 		logdebug("timer_cancel()\n");
1018c61f3fa8Smeem 
1019c61f3fa8Smeem 	bzero(&itimerval, sizeof (itimerval));
1020c61f3fa8Smeem 	if (setitimer(ITIMER_REAL, &itimerval, NULL) < 0)
1021c61f3fa8Smeem 		logperror("timer_cancel: setitimer");
1022c61f3fa8Smeem }
1023c61f3fa8Smeem 
10247c478bd9Sstevel@tonic-gate /*
10257c478bd9Sstevel@tonic-gate  * Timer has fired. Determine when the next timer event will occur by asking
10267c478bd9Sstevel@tonic-gate  * all the timer routines. Should not be called from a timer routine.
10277c478bd9Sstevel@tonic-gate  */
10287c478bd9Sstevel@tonic-gate static void
run_timeouts(void)10297c478bd9Sstevel@tonic-gate run_timeouts(void)
10307c478bd9Sstevel@tonic-gate {
10317c478bd9Sstevel@tonic-gate 	uint_t next;
10327c478bd9Sstevel@tonic-gate 	uint_t next_event_time;
10337c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
10347c478bd9Sstevel@tonic-gate 	struct phyint_instance *next_pii;
10357c478bd9Sstevel@tonic-gate 	static boolean_t timeout_running;
10367c478bd9Sstevel@tonic-gate 
10377c478bd9Sstevel@tonic-gate 	/* assert that recursive timeouts don't happen. */
10387c478bd9Sstevel@tonic-gate 	assert(!timeout_running);
10397c478bd9Sstevel@tonic-gate 
10407c478bd9Sstevel@tonic-gate 	timeout_running = _B_TRUE;
10417c478bd9Sstevel@tonic-gate 
10427c478bd9Sstevel@tonic-gate 	if (debug & D_TIMER)
10437c478bd9Sstevel@tonic-gate 		logdebug("run_timeouts()\n");
10447c478bd9Sstevel@tonic-gate 
1045e6ed03fcSmeem 	if ((getcurrenttime() - last_initifs_time) > IF_SCAN_INTERVAL) {
1046e6ed03fcSmeem 		initifs();
1047e6ed03fcSmeem 		check_config();
1048e6ed03fcSmeem 	}
1049e6ed03fcSmeem 
10507c478bd9Sstevel@tonic-gate 	next = TIMER_INFINITY;
10517c478bd9Sstevel@tonic-gate 
10527c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
10537c478bd9Sstevel@tonic-gate 		next_pii = pii->pii_next;
10547c478bd9Sstevel@tonic-gate 		next_event_time = phyint_inst_timer(pii);
10557c478bd9Sstevel@tonic-gate 		if (next_event_time != TIMER_INFINITY && next_event_time < next)
10567c478bd9Sstevel@tonic-gate 			next = next_event_time;
10577c478bd9Sstevel@tonic-gate 
10587c478bd9Sstevel@tonic-gate 		if (debug & D_TIMER) {
10597c478bd9Sstevel@tonic-gate 			logdebug("run_timeouts(%s %s): next scheduled for"
10607c478bd9Sstevel@tonic-gate 			    " this phyint inst %u, next scheduled global"
10617c478bd9Sstevel@tonic-gate 			    " %u ms\n",
10627c478bd9Sstevel@tonic-gate 			    AF_STR(pii->pii_af), pii->pii_phyint->pi_name,
10637c478bd9Sstevel@tonic-gate 			    next_event_time, next);
10647c478bd9Sstevel@tonic-gate 		}
10657c478bd9Sstevel@tonic-gate 	}
10667c478bd9Sstevel@tonic-gate 
10677c478bd9Sstevel@tonic-gate 	/*
10687c478bd9Sstevel@tonic-gate 	 * Make sure initifs() is called at least once every
10697c478bd9Sstevel@tonic-gate 	 * IF_SCAN_INTERVAL, to make sure that we are in sync
10707c478bd9Sstevel@tonic-gate 	 * with the kernel, in case we have missed any routing
10717c478bd9Sstevel@tonic-gate 	 * socket messages.
10727c478bd9Sstevel@tonic-gate 	 */
10737c478bd9Sstevel@tonic-gate 	if (next > IF_SCAN_INTERVAL)
10747c478bd9Sstevel@tonic-gate 		next = IF_SCAN_INTERVAL;
10757c478bd9Sstevel@tonic-gate 
10767c478bd9Sstevel@tonic-gate 	if (debug & D_TIMER)
10777c478bd9Sstevel@tonic-gate 		logdebug("run_timeouts: %u ms\n", next);
10787c478bd9Sstevel@tonic-gate 
10797c478bd9Sstevel@tonic-gate 	timer_schedule(next);
10807c478bd9Sstevel@tonic-gate 	timeout_running = _B_FALSE;
10817c478bd9Sstevel@tonic-gate }
10827c478bd9Sstevel@tonic-gate 
10837c478bd9Sstevel@tonic-gate static int eventpipe_read = -1;	/* Used for synchronous signal delivery */
10847c478bd9Sstevel@tonic-gate static int eventpipe_write = -1;
1085e11c3f44Smeem boolean_t cleanup_started = _B_FALSE;	/* true if we're going away */
1086e11c3f44Smeem 
10877c478bd9Sstevel@tonic-gate /*
10887c478bd9Sstevel@tonic-gate  * Ensure that signals are processed synchronously with the rest of
10897c478bd9Sstevel@tonic-gate  * the code by just writing a one character signal number on the pipe.
10907c478bd9Sstevel@tonic-gate  * The poll loop will pick this up and process the signal event.
10917c478bd9Sstevel@tonic-gate  */
10927c478bd9Sstevel@tonic-gate static void
sig_handler(int signo)10937c478bd9Sstevel@tonic-gate sig_handler(int signo)
10947c478bd9Sstevel@tonic-gate {
10957c478bd9Sstevel@tonic-gate 	uchar_t buf = (uchar_t)signo;
10967c478bd9Sstevel@tonic-gate 
10977c478bd9Sstevel@tonic-gate 	/*
10987c478bd9Sstevel@tonic-gate 	 * Don't write to pipe if cleanup has already begun. cleanup()
10997c478bd9Sstevel@tonic-gate 	 * might have closed the pipe already
11007c478bd9Sstevel@tonic-gate 	 */
11017c478bd9Sstevel@tonic-gate 	if (cleanup_started)
11027c478bd9Sstevel@tonic-gate 		return;
11037c478bd9Sstevel@tonic-gate 
11047c478bd9Sstevel@tonic-gate 	if (eventpipe_write == -1) {
11057c478bd9Sstevel@tonic-gate 		logerr("sig_handler: no pipe found\n");
11067c478bd9Sstevel@tonic-gate 		return;
11077c478bd9Sstevel@tonic-gate 	}
11087c478bd9Sstevel@tonic-gate 	if (write(eventpipe_write, &buf, sizeof (buf)) < 0)
11097c478bd9Sstevel@tonic-gate 		logperror("sig_handler: write");
11107c478bd9Sstevel@tonic-gate }
11117c478bd9Sstevel@tonic-gate 
11127c478bd9Sstevel@tonic-gate extern struct probes_missed probes_missed;
11137c478bd9Sstevel@tonic-gate 
11147c478bd9Sstevel@tonic-gate /*
11157c478bd9Sstevel@tonic-gate  * Pick up a signal "byte" from the pipe and process it.
11167c478bd9Sstevel@tonic-gate  */
11177c478bd9Sstevel@tonic-gate static void
in_signal(int fd)11187c478bd9Sstevel@tonic-gate in_signal(int fd)
11197c478bd9Sstevel@tonic-gate {
11207c478bd9Sstevel@tonic-gate 	uchar_t buf;
11217c478bd9Sstevel@tonic-gate 	uint64_t  sent, acked, lost, unacked, unknown;
11227c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
11237c478bd9Sstevel@tonic-gate 	int pr_ndx;
11247c478bd9Sstevel@tonic-gate 
11257c478bd9Sstevel@tonic-gate 	switch (read(fd, &buf, sizeof (buf))) {
11267c478bd9Sstevel@tonic-gate 	case -1:
11277c478bd9Sstevel@tonic-gate 		logperror("in_signal: read");
11287c478bd9Sstevel@tonic-gate 		exit(1);
11297c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
11307c478bd9Sstevel@tonic-gate 	case 1:
11317c478bd9Sstevel@tonic-gate 		break;
11327c478bd9Sstevel@tonic-gate 	case 0:
11337c478bd9Sstevel@tonic-gate 		logerr("in_signal: read end of file\n");
11347c478bd9Sstevel@tonic-gate 		exit(1);
11357c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
11367c478bd9Sstevel@tonic-gate 	default:
11377c478bd9Sstevel@tonic-gate 		logerr("in_signal: read > 1\n");
11387c478bd9Sstevel@tonic-gate 		exit(1);
11397c478bd9Sstevel@tonic-gate 	}
11407c478bd9Sstevel@tonic-gate 
11417c478bd9Sstevel@tonic-gate 	if (debug & D_TIMER)
11427c478bd9Sstevel@tonic-gate 		logdebug("in_signal() got %d\n", buf);
11437c478bd9Sstevel@tonic-gate 
11447c478bd9Sstevel@tonic-gate 	switch (buf) {
11457c478bd9Sstevel@tonic-gate 	case SIGALRM:
11467c478bd9Sstevel@tonic-gate 		if (debug & D_TIMER) {
11477c478bd9Sstevel@tonic-gate 			uint_t now = getcurrenttime();
11487c478bd9Sstevel@tonic-gate 
11497c478bd9Sstevel@tonic-gate 			logdebug("in_signal(SIGALRM) delta %u\n",
11507c478bd9Sstevel@tonic-gate 			    now - timer_next);
11517c478bd9Sstevel@tonic-gate 		}
11527c478bd9Sstevel@tonic-gate 		timer_active = _B_FALSE;
11537c478bd9Sstevel@tonic-gate 		run_timeouts();
11547c478bd9Sstevel@tonic-gate 		break;
11557c478bd9Sstevel@tonic-gate 	case SIGUSR1:
11567c478bd9Sstevel@tonic-gate 		logdebug("Printing configuration:\n");
11577c478bd9Sstevel@tonic-gate 		/* Print out the internal tables */
11587c478bd9Sstevel@tonic-gate 		phyint_inst_print_all();
11597c478bd9Sstevel@tonic-gate 
11607c478bd9Sstevel@tonic-gate 		/*
11617c478bd9Sstevel@tonic-gate 		 * Print out the accumulated statistics about missed
11627c478bd9Sstevel@tonic-gate 		 * probes (happens due to scheduling delay).
11637c478bd9Sstevel@tonic-gate 		 */
11647c478bd9Sstevel@tonic-gate 		logerr("Missed sending total of %d probes spread over"
11657c478bd9Sstevel@tonic-gate 		    " %d occurrences\n", probes_missed.pm_nprobes,
11667c478bd9Sstevel@tonic-gate 		    probes_missed.pm_ntimes);
11677c478bd9Sstevel@tonic-gate 
11687c478bd9Sstevel@tonic-gate 		/*
11697c478bd9Sstevel@tonic-gate 		 * Print out the accumulated statistics about probes
11707c478bd9Sstevel@tonic-gate 		 * that were sent.
11717c478bd9Sstevel@tonic-gate 		 */
11727c478bd9Sstevel@tonic-gate 		for (pii = phyint_instances; pii != NULL;
11737c478bd9Sstevel@tonic-gate 		    pii = pii->pii_next) {
11747c478bd9Sstevel@tonic-gate 			unacked = 0;
11757c478bd9Sstevel@tonic-gate 			acked = pii->pii_cum_stats.acked;
11767c478bd9Sstevel@tonic-gate 			lost = pii->pii_cum_stats.lost;
11777c478bd9Sstevel@tonic-gate 			sent = pii->pii_cum_stats.sent;
11787c478bd9Sstevel@tonic-gate 			unknown = pii->pii_cum_stats.unknown;
11797c478bd9Sstevel@tonic-gate 			for (pr_ndx = 0; pr_ndx < PROBE_STATS_COUNT; pr_ndx++) {
11807c478bd9Sstevel@tonic-gate 				switch (pii->pii_probes[pr_ndx].pr_status) {
11817c478bd9Sstevel@tonic-gate 				case PR_ACKED:
11827c478bd9Sstevel@tonic-gate 					acked++;
11837c478bd9Sstevel@tonic-gate 					break;
11847c478bd9Sstevel@tonic-gate 				case PR_LOST:
11857c478bd9Sstevel@tonic-gate 					lost++;
11867c478bd9Sstevel@tonic-gate 					break;
11877c478bd9Sstevel@tonic-gate 				case PR_UNACKED:
11887c478bd9Sstevel@tonic-gate 					unacked++;
11897c478bd9Sstevel@tonic-gate 					break;
11907c478bd9Sstevel@tonic-gate 				}
11917c478bd9Sstevel@tonic-gate 			}
11927c478bd9Sstevel@tonic-gate 			logerr("\nProbe stats on (%s %s)\n"
11937c478bd9Sstevel@tonic-gate 			    "Number of probes sent %lld\n"
11947c478bd9Sstevel@tonic-gate 			    "Number of probe acks received %lld\n"
11957c478bd9Sstevel@tonic-gate 			    "Number of probes/acks lost %lld\n"
1196e11c3f44Smeem 			    "Number of valid unacknowledged probes %lld\n"
11977c478bd9Sstevel@tonic-gate 			    "Number of ambiguous probe acks received %lld\n",
11987c478bd9Sstevel@tonic-gate 			    AF_STR(pii->pii_af), pii->pii_name,
11997c478bd9Sstevel@tonic-gate 			    sent, acked, lost, unacked, unknown);
12007c478bd9Sstevel@tonic-gate 		}
12017c478bd9Sstevel@tonic-gate 		break;
12027c478bd9Sstevel@tonic-gate 	case SIGHUP:
12037c478bd9Sstevel@tonic-gate 		logerr("SIGHUP: restart and reread config file\n");
1204c61f3fa8Smeem 		/*
1205c61f3fa8Smeem 		 * Cancel the interval timer.  Needed since setitimer() uses
1206c61f3fa8Smeem 		 * alarm() and the time left is inherited across exec(), and
1207c61f3fa8Smeem 		 * thus the SIGALRM may be delivered before a handler has been
1208c61f3fa8Smeem 		 * setup, causing in.mpathd to erroneously exit.
1209c61f3fa8Smeem 		 */
1210c61f3fa8Smeem 		timer_cancel();
12117c478bd9Sstevel@tonic-gate 		cleanup();
12127c478bd9Sstevel@tonic-gate 		(void) execv(argv0[0], argv0);
12137c478bd9Sstevel@tonic-gate 		_exit(0177);
12147c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
12157c478bd9Sstevel@tonic-gate 	case SIGINT:
12167c478bd9Sstevel@tonic-gate 	case SIGTERM:
12177c478bd9Sstevel@tonic-gate 	case SIGQUIT:
12187c478bd9Sstevel@tonic-gate 		cleanup();
12197c478bd9Sstevel@tonic-gate 		exit(0);
12207c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
12217c478bd9Sstevel@tonic-gate 	default:
12227c478bd9Sstevel@tonic-gate 		logerr("in_signal: unknown signal: %d\n", buf);
12237c478bd9Sstevel@tonic-gate 	}
12247c478bd9Sstevel@tonic-gate }
12257c478bd9Sstevel@tonic-gate 
12267c478bd9Sstevel@tonic-gate static void
cleanup(void)12277c478bd9Sstevel@tonic-gate cleanup(void)
12287c478bd9Sstevel@tonic-gate {
12297c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
12307c478bd9Sstevel@tonic-gate 	struct phyint_instance *next_pii;
12317c478bd9Sstevel@tonic-gate 
12327c478bd9Sstevel@tonic-gate 	/*
12337c478bd9Sstevel@tonic-gate 	 * Make sure that we don't write to eventpipe in
12347c478bd9Sstevel@tonic-gate 	 * sig_handler() if any signal notably SIGALRM,
12357c478bd9Sstevel@tonic-gate 	 * occurs after we close the eventpipe descriptor below
12367c478bd9Sstevel@tonic-gate 	 */
12377c478bd9Sstevel@tonic-gate 	cleanup_started = _B_TRUE;
12387c478bd9Sstevel@tonic-gate 
12397c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
12407c478bd9Sstevel@tonic-gate 		next_pii = pii->pii_next;
12417c478bd9Sstevel@tonic-gate 		phyint_inst_delete(pii);
12427c478bd9Sstevel@tonic-gate 	}
12437c478bd9Sstevel@tonic-gate 
12447c478bd9Sstevel@tonic-gate 	(void) close(ifsock_v4);
12457c478bd9Sstevel@tonic-gate 	(void) close(ifsock_v6);
12467c478bd9Sstevel@tonic-gate 	(void) close(rtsock_v4);
12477c478bd9Sstevel@tonic-gate 	(void) close(rtsock_v6);
12487c478bd9Sstevel@tonic-gate 	(void) close(lsock_v4);
12497c478bd9Sstevel@tonic-gate 	(void) close(lsock_v6);
12507c478bd9Sstevel@tonic-gate 	(void) close(0);
12517c478bd9Sstevel@tonic-gate 	(void) close(1);
12527c478bd9Sstevel@tonic-gate 	(void) close(2);
12537c478bd9Sstevel@tonic-gate 	(void) close(mibfd);
12547c478bd9Sstevel@tonic-gate 	(void) close(eventpipe_read);
12557c478bd9Sstevel@tonic-gate 	(void) close(eventpipe_write);
12567c478bd9Sstevel@tonic-gate }
12577c478bd9Sstevel@tonic-gate 
12587c478bd9Sstevel@tonic-gate /*
12597c478bd9Sstevel@tonic-gate  * Create pipe for signal delivery and set up signal handlers.
12607c478bd9Sstevel@tonic-gate  */
12617c478bd9Sstevel@tonic-gate static void
setup_eventpipe(void)12627c478bd9Sstevel@tonic-gate setup_eventpipe(void)
12637c478bd9Sstevel@tonic-gate {
12647c478bd9Sstevel@tonic-gate 	int fds[2];
12657c478bd9Sstevel@tonic-gate 	struct sigaction act;
12667c478bd9Sstevel@tonic-gate 
12677c478bd9Sstevel@tonic-gate 	if ((pipe(fds)) < 0) {
12687c478bd9Sstevel@tonic-gate 		logperror("setup_eventpipe: pipe");
12697c478bd9Sstevel@tonic-gate 		exit(1);
12707c478bd9Sstevel@tonic-gate 	}
12717c478bd9Sstevel@tonic-gate 	eventpipe_read = fds[0];
12727c478bd9Sstevel@tonic-gate 	eventpipe_write = fds[1];
12737c478bd9Sstevel@tonic-gate 	if (poll_add(eventpipe_read) == -1) {
12747c478bd9Sstevel@tonic-gate 		exit(1);
12757c478bd9Sstevel@tonic-gate 	}
12767c478bd9Sstevel@tonic-gate 
12777c478bd9Sstevel@tonic-gate 	act.sa_handler = sig_handler;
12787c478bd9Sstevel@tonic-gate 	act.sa_flags = SA_RESTART;
12797c478bd9Sstevel@tonic-gate 	(void) sigaction(SIGALRM, &act, NULL);
12807c478bd9Sstevel@tonic-gate 
12817c478bd9Sstevel@tonic-gate 	(void) sigset(SIGHUP, sig_handler);
12827c478bd9Sstevel@tonic-gate 	(void) sigset(SIGUSR1, sig_handler);
12837c478bd9Sstevel@tonic-gate 	(void) sigset(SIGTERM, sig_handler);
12847c478bd9Sstevel@tonic-gate 	(void) sigset(SIGINT, sig_handler);
12857c478bd9Sstevel@tonic-gate 	(void) sigset(SIGQUIT, sig_handler);
12867c478bd9Sstevel@tonic-gate }
12877c478bd9Sstevel@tonic-gate 
12887c478bd9Sstevel@tonic-gate /*
12897c478bd9Sstevel@tonic-gate  * Create a routing socket for receiving RTM_IFINFO messages.
12907c478bd9Sstevel@tonic-gate  */
12917c478bd9Sstevel@tonic-gate static int
setup_rtsock(int af)12927c478bd9Sstevel@tonic-gate setup_rtsock(int af)
12937c478bd9Sstevel@tonic-gate {
12947c478bd9Sstevel@tonic-gate 	int	s;
12957c478bd9Sstevel@tonic-gate 	int	flags;
1296e11c3f44Smeem 	int	aware = RTAW_UNDER_IPMP;
12977c478bd9Sstevel@tonic-gate 
12987c478bd9Sstevel@tonic-gate 	s = socket(PF_ROUTE, SOCK_RAW, af);
12997c478bd9Sstevel@tonic-gate 	if (s == -1) {
13007c478bd9Sstevel@tonic-gate 		logperror("setup_rtsock: socket PF_ROUTE");
13017c478bd9Sstevel@tonic-gate 		exit(1);
13027c478bd9Sstevel@tonic-gate 	}
1303e11c3f44Smeem 
1304e11c3f44Smeem 	if (setsockopt(s, SOL_ROUTE, RT_AWARE, &aware, sizeof (aware)) == -1) {
1305e11c3f44Smeem 		logperror("setup_rtsock: setsockopt RT_AWARE");
1306e11c3f44Smeem 		(void) close(s);
1307e11c3f44Smeem 		exit(1);
1308e11c3f44Smeem 	}
1309e11c3f44Smeem 
13107c478bd9Sstevel@tonic-gate 	if ((flags = fcntl(s, F_GETFL, 0)) < 0) {
13117c478bd9Sstevel@tonic-gate 		logperror("setup_rtsock: fcntl F_GETFL");
13127c478bd9Sstevel@tonic-gate 		(void) close(s);
13137c478bd9Sstevel@tonic-gate 		exit(1);
13147c478bd9Sstevel@tonic-gate 	}
13157c478bd9Sstevel@tonic-gate 	if ((fcntl(s, F_SETFL, flags | O_NONBLOCK)) < 0) {
13167c478bd9Sstevel@tonic-gate 		logperror("setup_rtsock: fcntl F_SETFL");
13177c478bd9Sstevel@tonic-gate 		(void) close(s);
13187c478bd9Sstevel@tonic-gate 		exit(1);
13197c478bd9Sstevel@tonic-gate 	}
13207c478bd9Sstevel@tonic-gate 	if (poll_add(s) == -1) {
13217c478bd9Sstevel@tonic-gate 		(void) close(s);
13227c478bd9Sstevel@tonic-gate 		exit(1);
13237c478bd9Sstevel@tonic-gate 	}
13247c478bd9Sstevel@tonic-gate 	return (s);
13257c478bd9Sstevel@tonic-gate }
13267c478bd9Sstevel@tonic-gate 
13277c478bd9Sstevel@tonic-gate /*
13287c478bd9Sstevel@tonic-gate  * Process an RTM_IFINFO message received on a routing socket.
13297c478bd9Sstevel@tonic-gate  * The return value indicates whether a full interface scan is required.
1330e11c3f44Smeem  * Link up/down notifications are reflected in the IFF_RUNNING flag.
13317c478bd9Sstevel@tonic-gate  * If just the state of the IFF_RUNNING interface flag has changed, a
13327c478bd9Sstevel@tonic-gate  * a full interface scan isn't required.
13337c478bd9Sstevel@tonic-gate  */
13347c478bd9Sstevel@tonic-gate static boolean_t
process_rtm_ifinfo(if_msghdr_t * ifm,int type)13357c478bd9Sstevel@tonic-gate process_rtm_ifinfo(if_msghdr_t *ifm, int type)
13367c478bd9Sstevel@tonic-gate {
13377c478bd9Sstevel@tonic-gate 	struct sockaddr_dl *sdl;
13387c478bd9Sstevel@tonic-gate 	struct phyint *pi;
13397c478bd9Sstevel@tonic-gate 	uint64_t old_flags;
13407c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
13417c478bd9Sstevel@tonic-gate 
13427c478bd9Sstevel@tonic-gate 	assert(ifm->ifm_type == RTM_IFINFO && ifm->ifm_addrs == RTA_IFP);
13437c478bd9Sstevel@tonic-gate 
13447c478bd9Sstevel@tonic-gate 	/*
13457c478bd9Sstevel@tonic-gate 	 * Although the sockaddr_dl structure is directly after the
13467c478bd9Sstevel@tonic-gate 	 * if_msghdr_t structure. At the time of writing, the size of the
13477c478bd9Sstevel@tonic-gate 	 * if_msghdr_t structure is different on 32 and 64 bit kernels, due
13487c478bd9Sstevel@tonic-gate 	 * to the presence of a timeval structure, which contains longs,
13497c478bd9Sstevel@tonic-gate 	 * in the if_data structure.  Anyway, we know where the message ends,
13507c478bd9Sstevel@tonic-gate 	 * so we work backwards to get the start of the sockaddr_dl structure.
13517c478bd9Sstevel@tonic-gate 	 */
13527c478bd9Sstevel@tonic-gate 	/*LINTED*/
13537c478bd9Sstevel@tonic-gate 	sdl = (struct sockaddr_dl *)((char *)ifm + ifm->ifm_msglen -
1354e6ed03fcSmeem 	    sizeof (struct sockaddr_dl));
13557c478bd9Sstevel@tonic-gate 
13567c478bd9Sstevel@tonic-gate 	assert(sdl->sdl_family == AF_LINK);
13577c478bd9Sstevel@tonic-gate 
13587c478bd9Sstevel@tonic-gate 	/*
13597c478bd9Sstevel@tonic-gate 	 * The interface name is in sdl_data.
13607c478bd9Sstevel@tonic-gate 	 * RTM_IFINFO messages are only generated for logical interface
13617c478bd9Sstevel@tonic-gate 	 * zero, so there is no colon and logical interface number to
13627c478bd9Sstevel@tonic-gate 	 * strip from the name.	 The name is not null terminated, but
13637c478bd9Sstevel@tonic-gate 	 * there should be enough space in sdl_data to add the null.
13647c478bd9Sstevel@tonic-gate 	 */
13657c478bd9Sstevel@tonic-gate 	if (sdl->sdl_nlen >= sizeof (sdl->sdl_data)) {
13667c478bd9Sstevel@tonic-gate 		if (debug & D_LINKNOTE)
1367e6ed03fcSmeem 			logdebug("process_rtm_ifinfo: phyint name too long\n");
13687c478bd9Sstevel@tonic-gate 		return (_B_TRUE);
13697c478bd9Sstevel@tonic-gate 	}
13707c478bd9Sstevel@tonic-gate 	sdl->sdl_data[sdl->sdl_nlen] = 0;
13717c478bd9Sstevel@tonic-gate 
13727c478bd9Sstevel@tonic-gate 	pi = phyint_lookup(sdl->sdl_data);
13737c478bd9Sstevel@tonic-gate 	if (pi == NULL) {
13747c478bd9Sstevel@tonic-gate 		if (debug & D_LINKNOTE)
13757c478bd9Sstevel@tonic-gate 			logdebug("process_rtm_ifinfo: phyint lookup failed"
1376e6ed03fcSmeem 			    " for %s\n", sdl->sdl_data);
13777c478bd9Sstevel@tonic-gate 		return (_B_TRUE);
13787c478bd9Sstevel@tonic-gate 	}
13797c478bd9Sstevel@tonic-gate 
13807c478bd9Sstevel@tonic-gate 	/*
13817c478bd9Sstevel@tonic-gate 	 * We want to try and avoid doing a full interface scan for
1382e11c3f44Smeem 	 * link state notifications from the datalink layer, as indicated
13837c478bd9Sstevel@tonic-gate 	 * by the state of the IFF_RUNNING flag.  If just the
13847c478bd9Sstevel@tonic-gate 	 * IFF_RUNNING flag has changed state, the link state changes
13857c478bd9Sstevel@tonic-gate 	 * are processed without a full scan.
13867c478bd9Sstevel@tonic-gate 	 * If there is both an IPv4 and IPv6 instance associated with
13877c478bd9Sstevel@tonic-gate 	 * the physical interface, we will get an RTM_IFINFO message
13887c478bd9Sstevel@tonic-gate 	 * for each instance.  If we just maintained a single copy of
13897c478bd9Sstevel@tonic-gate 	 * the physical interface flags, it would appear that no flags
13907c478bd9Sstevel@tonic-gate 	 * had changed when the second message is processed, leading us
13917c478bd9Sstevel@tonic-gate 	 * to believe that the message wasn't generated by a flags change,
13927c478bd9Sstevel@tonic-gate 	 * and that a full interface scan is required.
13937c478bd9Sstevel@tonic-gate 	 * To get around this problem, two additional copies of the flags
13947c478bd9Sstevel@tonic-gate 	 * are kept, one copy for each instance.  These are only used in
13957c478bd9Sstevel@tonic-gate 	 * this routine.  At any one time, all three copies of the flags
13967c478bd9Sstevel@tonic-gate 	 * should be identical except for the IFF_RUNNING flag.	 The
13977c478bd9Sstevel@tonic-gate 	 * copy of the flags in the "phyint" structure is always up to
13987c478bd9Sstevel@tonic-gate 	 * date.
13997c478bd9Sstevel@tonic-gate 	 */
14007c478bd9Sstevel@tonic-gate 	pii = (type == AF_INET) ? pi->pi_v4 : pi->pi_v6;
14017c478bd9Sstevel@tonic-gate 	if (pii == NULL) {
14027c478bd9Sstevel@tonic-gate 		if (debug & D_LINKNOTE)
14037c478bd9Sstevel@tonic-gate 			logdebug("process_rtm_ifinfo: no instance of address "
14047c478bd9Sstevel@tonic-gate 			    "family %s for %s\n", AF_STR(type), pi->pi_name);
14057c478bd9Sstevel@tonic-gate 		return (_B_TRUE);
14067c478bd9Sstevel@tonic-gate 	}
14077c478bd9Sstevel@tonic-gate 
14087c478bd9Sstevel@tonic-gate 	old_flags = pii->pii_flags;
14097c478bd9Sstevel@tonic-gate 	pii->pii_flags = PHYINT_FLAGS(ifm->ifm_flags);
14107c478bd9Sstevel@tonic-gate 	pi->pi_flags = pii->pii_flags;
14117c478bd9Sstevel@tonic-gate 
14127c478bd9Sstevel@tonic-gate 	if (debug & D_LINKNOTE) {
14137c478bd9Sstevel@tonic-gate 		logdebug("process_rtm_ifinfo: %s address family: %s, "
14147c478bd9Sstevel@tonic-gate 		    "old flags: %llx, new flags: %llx\n", pi->pi_name,
14157c478bd9Sstevel@tonic-gate 		    AF_STR(type), old_flags, pi->pi_flags);
14167c478bd9Sstevel@tonic-gate 	}
14177c478bd9Sstevel@tonic-gate 
14187c478bd9Sstevel@tonic-gate 	/*
14197c478bd9Sstevel@tonic-gate 	 * If IFF_STANDBY has changed, indicate that the interface has changed
14209bea6098Smeem 	 * types and refresh IFF_INACTIVE if need be.
14217c478bd9Sstevel@tonic-gate 	 */
14229bea6098Smeem 	if ((old_flags ^ pii->pii_flags) & IFF_STANDBY) {
1423e11c3f44Smeem 		phyint_changed(pi);
14249bea6098Smeem 		if (pii->pii_flags & IFF_STANDBY)
14259bea6098Smeem 			phyint_standby_refresh_inactive(pi);
14269bea6098Smeem 	}
14277c478bd9Sstevel@tonic-gate 
14287c478bd9Sstevel@tonic-gate 	/* Has just the IFF_RUNNING flag changed state ? */
14297c478bd9Sstevel@tonic-gate 	if ((old_flags ^ pii->pii_flags) != IFF_RUNNING) {
14307c478bd9Sstevel@tonic-gate 		struct phyint_instance *pii_other;
14317c478bd9Sstevel@tonic-gate 		/*
14327c478bd9Sstevel@tonic-gate 		 * It wasn't just a link state change.	Update
14337c478bd9Sstevel@tonic-gate 		 * the other instance's copy of the flags.
14347c478bd9Sstevel@tonic-gate 		 */
14357c478bd9Sstevel@tonic-gate 		pii_other = phyint_inst_other(pii);
14367c478bd9Sstevel@tonic-gate 		if (pii_other != NULL)
14377c478bd9Sstevel@tonic-gate 			pii_other->pii_flags = pii->pii_flags;
14387c478bd9Sstevel@tonic-gate 		return (_B_TRUE);
14397c478bd9Sstevel@tonic-gate 	}
14407c478bd9Sstevel@tonic-gate 
14417c478bd9Sstevel@tonic-gate 	return (_B_FALSE);
14427c478bd9Sstevel@tonic-gate }
14437c478bd9Sstevel@tonic-gate 
14447c478bd9Sstevel@tonic-gate /*
14457c478bd9Sstevel@tonic-gate  * Retrieve as many routing socket messages as possible, and try to
14467c478bd9Sstevel@tonic-gate  * empty the routing sockets. Initiate full scan of targets or interfaces
14477c478bd9Sstevel@tonic-gate  * as needed.
14487c478bd9Sstevel@tonic-gate  * We listen on separate IPv4 an IPv6 sockets so that we can accurately
14497c478bd9Sstevel@tonic-gate  * detect changes in certain flags (see "process_rtm_ifinfo()" above).
14507c478bd9Sstevel@tonic-gate  */
14517c478bd9Sstevel@tonic-gate static void
process_rtsock(int rtsock_v4,int rtsock_v6)14527c478bd9Sstevel@tonic-gate process_rtsock(int rtsock_v4, int rtsock_v6)
14537c478bd9Sstevel@tonic-gate {
14547c478bd9Sstevel@tonic-gate 	int	nbytes;
14557c478bd9Sstevel@tonic-gate 	int64_t msg[2048 / 8];
14567c478bd9Sstevel@tonic-gate 	struct rt_msghdr *rtm;
14577c478bd9Sstevel@tonic-gate 	boolean_t need_if_scan = _B_FALSE;
14587c478bd9Sstevel@tonic-gate 	boolean_t need_rt_scan = _B_FALSE;
14597c478bd9Sstevel@tonic-gate 	boolean_t rtm_ifinfo_seen = _B_FALSE;
14607c478bd9Sstevel@tonic-gate 	int type;
14617c478bd9Sstevel@tonic-gate 
14627c478bd9Sstevel@tonic-gate 	/* Read as many messages as possible and try to empty the sockets */
14637c478bd9Sstevel@tonic-gate 	for (type = AF_INET; ; type = AF_INET6) {
14647c478bd9Sstevel@tonic-gate 		for (;;) {
14657c478bd9Sstevel@tonic-gate 			nbytes = read((type == AF_INET) ? rtsock_v4 :
1466e6ed03fcSmeem 			    rtsock_v6, msg, sizeof (msg));
14677c478bd9Sstevel@tonic-gate 			if (nbytes <= 0) {
14687c478bd9Sstevel@tonic-gate 				/* No more messages */
14697c478bd9Sstevel@tonic-gate 				break;
14707c478bd9Sstevel@tonic-gate 			}
14717c478bd9Sstevel@tonic-gate 			rtm = (struct rt_msghdr *)msg;
14727c478bd9Sstevel@tonic-gate 			if (rtm->rtm_version != RTM_VERSION) {
14737c478bd9Sstevel@tonic-gate 				logerr("process_rtsock: version %d "
14747c478bd9Sstevel@tonic-gate 				    "not understood\n", rtm->rtm_version);
14757c478bd9Sstevel@tonic-gate 				break;
14767c478bd9Sstevel@tonic-gate 			}
14777c478bd9Sstevel@tonic-gate 
14787c478bd9Sstevel@tonic-gate 			if (debug & D_PHYINT) {
14797c478bd9Sstevel@tonic-gate 				logdebug("process_rtsock: message %d\n",
14807c478bd9Sstevel@tonic-gate 				    rtm->rtm_type);
14817c478bd9Sstevel@tonic-gate 			}
14827c478bd9Sstevel@tonic-gate 
14837c478bd9Sstevel@tonic-gate 			switch (rtm->rtm_type) {
14847c478bd9Sstevel@tonic-gate 			case RTM_NEWADDR:
14857c478bd9Sstevel@tonic-gate 			case RTM_DELADDR:
14867c478bd9Sstevel@tonic-gate 				/*
14877c478bd9Sstevel@tonic-gate 				 * Some logical interface has changed,
14887c478bd9Sstevel@tonic-gate 				 * have to scan everything to determine
14897c478bd9Sstevel@tonic-gate 				 * what actually changed.
14907c478bd9Sstevel@tonic-gate 				 */
14917c478bd9Sstevel@tonic-gate 				need_if_scan = _B_TRUE;
14927c478bd9Sstevel@tonic-gate 				break;
14937c478bd9Sstevel@tonic-gate 
14947c478bd9Sstevel@tonic-gate 			case RTM_IFINFO:
14957c478bd9Sstevel@tonic-gate 				rtm_ifinfo_seen = _B_TRUE;
1496e6ed03fcSmeem 				need_if_scan |= process_rtm_ifinfo(
1497e6ed03fcSmeem 				    (if_msghdr_t *)rtm, type);
14987c478bd9Sstevel@tonic-gate 				break;
14997c478bd9Sstevel@tonic-gate 
15007c478bd9Sstevel@tonic-gate 			case RTM_ADD:
15017c478bd9Sstevel@tonic-gate 			case RTM_DELETE:
15027c478bd9Sstevel@tonic-gate 			case RTM_CHANGE:
15037c478bd9Sstevel@tonic-gate 			case RTM_OLDADD:
15047c478bd9Sstevel@tonic-gate 			case RTM_OLDDEL:
15057c478bd9Sstevel@tonic-gate 				need_rt_scan = _B_TRUE;
15067c478bd9Sstevel@tonic-gate 				break;
15077c478bd9Sstevel@tonic-gate 
15087c478bd9Sstevel@tonic-gate 			default:
15097c478bd9Sstevel@tonic-gate 				/* Not interesting */
15107c478bd9Sstevel@tonic-gate 				break;
15117c478bd9Sstevel@tonic-gate 			}
15127c478bd9Sstevel@tonic-gate 		}
15137c478bd9Sstevel@tonic-gate 		if (type == AF_INET6)
15147c478bd9Sstevel@tonic-gate 			break;
15157c478bd9Sstevel@tonic-gate 	}
15167c478bd9Sstevel@tonic-gate 
15177c478bd9Sstevel@tonic-gate 	if (need_if_scan) {
15187c478bd9Sstevel@tonic-gate 		if (debug & D_LINKNOTE && rtm_ifinfo_seen)
15197c478bd9Sstevel@tonic-gate 			logdebug("process_rtsock: synchronizing with kernel\n");
15207c478bd9Sstevel@tonic-gate 		initifs();
15217c478bd9Sstevel@tonic-gate 	} else if (rtm_ifinfo_seen) {
15227c478bd9Sstevel@tonic-gate 		if (debug & D_LINKNOTE)
15237c478bd9Sstevel@tonic-gate 			logdebug("process_rtsock: "
15247c478bd9Sstevel@tonic-gate 			    "link up/down notification(s) seen\n");
15257c478bd9Sstevel@tonic-gate 		process_link_state_changes();
15267c478bd9Sstevel@tonic-gate 	}
15277c478bd9Sstevel@tonic-gate 
15287c478bd9Sstevel@tonic-gate 	if (need_rt_scan)
15297c478bd9Sstevel@tonic-gate 		init_router_targets();
15307c478bd9Sstevel@tonic-gate }
15317c478bd9Sstevel@tonic-gate 
15327c478bd9Sstevel@tonic-gate /*
15337c478bd9Sstevel@tonic-gate  * Look if the phyint instance or one of its logints have been removed from
15347c478bd9Sstevel@tonic-gate  * the kernel and take appropriate action.
15357c478bd9Sstevel@tonic-gate  * Uses {pii,li}_in_use.
15367c478bd9Sstevel@tonic-gate  */
15377c478bd9Sstevel@tonic-gate static void
check_if_removed(struct phyint_instance * pii)15387c478bd9Sstevel@tonic-gate check_if_removed(struct phyint_instance *pii)
15397c478bd9Sstevel@tonic-gate {
15407c478bd9Sstevel@tonic-gate 	struct logint *li;
15417c478bd9Sstevel@tonic-gate 	struct logint *next_li;
15427c478bd9Sstevel@tonic-gate 
15437c478bd9Sstevel@tonic-gate 	/* Detect phyints that have been removed from the kernel. */
15447c478bd9Sstevel@tonic-gate 	if (!pii->pii_in_use) {
15457c478bd9Sstevel@tonic-gate 		logtrace("%s %s has been removed from kernel\n",
15467c478bd9Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_phyint->pi_name);
15477c478bd9Sstevel@tonic-gate 		phyint_inst_delete(pii);
15487c478bd9Sstevel@tonic-gate 	} else {
15497c478bd9Sstevel@tonic-gate 		/* Detect logints that have been removed. */
15507c478bd9Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = next_li) {
15517c478bd9Sstevel@tonic-gate 			next_li = li->li_next;
15527c478bd9Sstevel@tonic-gate 			if (!li->li_in_use) {
15537c478bd9Sstevel@tonic-gate 				logint_delete(li);
15547c478bd9Sstevel@tonic-gate 			}
15557c478bd9Sstevel@tonic-gate 		}
15567c478bd9Sstevel@tonic-gate 	}
15577c478bd9Sstevel@tonic-gate }
15587c478bd9Sstevel@tonic-gate 
15597c478bd9Sstevel@tonic-gate /*
156027438c18SJon Anderson  * Parse the supplied mib2 information to extract the routing information
156127438c18SJon Anderson  * table. Process the routing table to get the list of known onlink routers
156227438c18SJon Anderson  * and update our database. These onlink routers will serve as probe
156327438c18SJon Anderson  * targets.
15647c478bd9Sstevel@tonic-gate  */
156527438c18SJon Anderson static void
update_router_list(mib_item_t * item)156627438c18SJon Anderson update_router_list(mib_item_t *item)
15677c478bd9Sstevel@tonic-gate {
156827438c18SJon Anderson 	for (; item != NULL; item = item->mi_next) {
156927438c18SJon Anderson 		if (item->mi_opthdr.name == 0)
15707c478bd9Sstevel@tonic-gate 			continue;
157127438c18SJon Anderson 		if (item->mi_opthdr.level == MIB2_IP &&
157227438c18SJon Anderson 		    item->mi_opthdr.name == MIB2_IP_ROUTE) {
157327438c18SJon Anderson 			ire_process_v4((mib2_ipRouteEntry_t *)item->mi_valp,
157427438c18SJon Anderson 			    item->mi_opthdr.len);
157527438c18SJon Anderson 		} else if (item->mi_opthdr.level == MIB2_IP6 &&
157627438c18SJon Anderson 		    item->mi_opthdr.name == MIB2_IP6_ROUTE) {
157727438c18SJon Anderson 			ire_process_v6((mib2_ipv6RouteEntry_t *)item->mi_valp,
157827438c18SJon Anderson 			    item->mi_opthdr.len);
15797c478bd9Sstevel@tonic-gate 		}
15807c478bd9Sstevel@tonic-gate 	}
15817c478bd9Sstevel@tonic-gate }
15827c478bd9Sstevel@tonic-gate 
1583e11c3f44Smeem 
1584e11c3f44Smeem /*
1585e11c3f44Smeem  * Convert octet `octp' to a phyint name and store in `ifname'
1586e11c3f44Smeem  */
1587e11c3f44Smeem static void
oct2ifname(const Octet_t * octp,char * ifname,size_t ifsize)1588e11c3f44Smeem oct2ifname(const Octet_t *octp, char *ifname, size_t ifsize)
1589e11c3f44Smeem {
1590e11c3f44Smeem 	char *cp;
1591e11c3f44Smeem 	size_t len = MIN(octp->o_length, ifsize - 1);
1592e11c3f44Smeem 
1593e11c3f44Smeem 	(void) strncpy(ifname, octp->o_bytes, len);
1594e11c3f44Smeem 	ifname[len] = '\0';
1595e11c3f44Smeem 
1596e11c3f44Smeem 	if ((cp = strchr(ifname, IF_SEPARATOR)) != NULL)
1597e11c3f44Smeem 		*cp = '\0';
1598e11c3f44Smeem }
1599e11c3f44Smeem 
16007c478bd9Sstevel@tonic-gate /*
1601e11c3f44Smeem  * Examine the IPv4 routing table `buf' for possible targets.  For each
1602e11c3f44Smeem  * possible target, if it's on the same subnet an interface route, pass
1603e11c3f44Smeem  * it to router_add_common() for further consideration.
16047c478bd9Sstevel@tonic-gate  */
16057c478bd9Sstevel@tonic-gate static void
ire_process_v4(mib2_ipRouteEntry_t * buf,size_t len)16067c478bd9Sstevel@tonic-gate ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len)
16077c478bd9Sstevel@tonic-gate {
1608e11c3f44Smeem 	char ifname[LIFNAMSIZ];
1609e11c3f44Smeem 	mib2_ipRouteEntry_t	*rp, *rp1, *endp;
1610e11c3f44Smeem 	struct in_addr		nexthop_v4;
1611e11c3f44Smeem 	struct in6_addr		nexthop;
16127c478bd9Sstevel@tonic-gate 
161327438c18SJon Anderson 	if (debug & D_TARGET)
161427438c18SJon Anderson 		logdebug("ire_process_v4(len %d)\n", len);
161527438c18SJon Anderson 
16167c478bd9Sstevel@tonic-gate 	if (len == 0)
16177c478bd9Sstevel@tonic-gate 		return;
16187c478bd9Sstevel@tonic-gate 
161927438c18SJon Anderson 	assert((len % ipRouteEntrySize) == 0);
162027438c18SJon Anderson 	endp = buf + (len / ipRouteEntrySize);
16217c478bd9Sstevel@tonic-gate 
16227c478bd9Sstevel@tonic-gate 	/*
1623e11c3f44Smeem 	 * Scan the routing table entries for any IRE_OFFSUBNET entries, and
1624e11c3f44Smeem 	 * cross-reference them with the interface routes to determine if
1625e11c3f44Smeem 	 * they're possible probe targets.
16267c478bd9Sstevel@tonic-gate 	 */
16277c478bd9Sstevel@tonic-gate 	for (rp = buf; rp < endp; rp++) {
16287c478bd9Sstevel@tonic-gate 		if (!(rp->ipRouteInfo.re_ire_type & IRE_OFFSUBNET))
16297c478bd9Sstevel@tonic-gate 			continue;
16307c478bd9Sstevel@tonic-gate 
1631e11c3f44Smeem 		/* Get the nexthop address. */
16327c478bd9Sstevel@tonic-gate 		nexthop_v4.s_addr = rp->ipRouteNextHop;
16337c478bd9Sstevel@tonic-gate 
16347c478bd9Sstevel@tonic-gate 		/*
1635e11c3f44Smeem 		 * Rescan the routing table looking for interface routes that
1636e11c3f44Smeem 		 * are on the same subnet, and try to add them.  If they're
1637e11c3f44Smeem 		 * not relevant (e.g., the interface route isn't part of an
1638e11c3f44Smeem 		 * IPMP group, router_add_common() will discard).
16397c478bd9Sstevel@tonic-gate 		 */
16407c478bd9Sstevel@tonic-gate 		for (rp1 = buf; rp1 < endp; rp1++) {
1641e11c3f44Smeem 			if (!(rp1->ipRouteInfo.re_ire_type & IRE_INTERFACE) ||
1642e11c3f44Smeem 			    rp1->ipRouteIfIndex.o_length == 0)
16437c478bd9Sstevel@tonic-gate 				continue;
16447c478bd9Sstevel@tonic-gate 
1645e11c3f44Smeem 			if ((rp1->ipRouteDest & rp1->ipRouteMask) !=
1646e11c3f44Smeem 			    (nexthop_v4.s_addr & rp1->ipRouteMask))
1647e11c3f44Smeem 				continue;
1648e11c3f44Smeem 
1649e11c3f44Smeem 			oct2ifname(&rp1->ipRouteIfIndex, ifname, LIFNAMSIZ);
1650e11c3f44Smeem 			IN6_INADDR_TO_V4MAPPED(&nexthop_v4, &nexthop);
1651e11c3f44Smeem 			router_add_common(AF_INET, ifname, nexthop);
16527c478bd9Sstevel@tonic-gate 		}
16537c478bd9Sstevel@tonic-gate 	}
16547c478bd9Sstevel@tonic-gate }
16557c478bd9Sstevel@tonic-gate 
16567c478bd9Sstevel@tonic-gate void
router_add_common(int af,char * ifname,struct in6_addr nexthop)16577c478bd9Sstevel@tonic-gate router_add_common(int af, char *ifname, struct in6_addr nexthop)
16587c478bd9Sstevel@tonic-gate {
16597c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
16607c478bd9Sstevel@tonic-gate 	struct phyint *pi;
16617c478bd9Sstevel@tonic-gate 
16627c478bd9Sstevel@tonic-gate 	if (debug & D_TARGET)
16637c478bd9Sstevel@tonic-gate 		logdebug("router_add_common(%s %s)\n", AF_STR(af), ifname);
16647c478bd9Sstevel@tonic-gate 
16657c478bd9Sstevel@tonic-gate 	/*
16667c478bd9Sstevel@tonic-gate 	 * Retrieve the phyint instance; bail if it's not known to us yet.
16677c478bd9Sstevel@tonic-gate 	 */
16687c478bd9Sstevel@tonic-gate 	pii = phyint_inst_lookup(af, ifname);
16697c478bd9Sstevel@tonic-gate 	if (pii == NULL)
16707c478bd9Sstevel@tonic-gate 		return;
16717c478bd9Sstevel@tonic-gate 
16727c478bd9Sstevel@tonic-gate 	/*
16737c478bd9Sstevel@tonic-gate 	 * Don't use our own addresses as targets.
16747c478bd9Sstevel@tonic-gate 	 */
167587e66ffcSrk 	if (own_address(nexthop))
16767c478bd9Sstevel@tonic-gate 		return;
16777c478bd9Sstevel@tonic-gate 
16787c478bd9Sstevel@tonic-gate 	/*
16797c478bd9Sstevel@tonic-gate 	 * If the phyint is part a named group, then add the address to all
16807c478bd9Sstevel@tonic-gate 	 * members of the group; note that this is suboptimal in the IPv4 case
16817c478bd9Sstevel@tonic-gate 	 * as it has already been added to all matching interfaces in
16827c478bd9Sstevel@tonic-gate 	 * ire_process_v4(). Otherwise, add the address only to the phyint
16837c478bd9Sstevel@tonic-gate 	 * itself, since other phyints in the anongroup may not be on the same
16847c478bd9Sstevel@tonic-gate 	 * subnet.
16857c478bd9Sstevel@tonic-gate 	 */
16867c478bd9Sstevel@tonic-gate 	pi = pii->pii_phyint;
16877c478bd9Sstevel@tonic-gate 	if (pi->pi_group == phyint_anongroup) {
16887c478bd9Sstevel@tonic-gate 		target_add(pii, nexthop, _B_TRUE);
16897c478bd9Sstevel@tonic-gate 	} else {
16907c478bd9Sstevel@tonic-gate 		pi = pi->pi_group->pg_phyint;
16917c478bd9Sstevel@tonic-gate 		for (; pi != NULL; pi = pi->pi_pgnext)
16927c478bd9Sstevel@tonic-gate 			target_add(PHYINT_INSTANCE(pi, af), nexthop, _B_TRUE);
16937c478bd9Sstevel@tonic-gate 	}
16947c478bd9Sstevel@tonic-gate }
16957c478bd9Sstevel@tonic-gate 
16967c478bd9Sstevel@tonic-gate /*
1697e11c3f44Smeem  * Examine the IPv6 routing table `buf' for possible link-local targets, and
1698e11c3f44Smeem  * pass any contenders to router_add_common() for further consideration.
16997c478bd9Sstevel@tonic-gate  */
17007c478bd9Sstevel@tonic-gate static void
ire_process_v6(mib2_ipv6RouteEntry_t * buf,size_t len)17017c478bd9Sstevel@tonic-gate ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len)
17027c478bd9Sstevel@tonic-gate {
1703e11c3f44Smeem 	struct lifreq lifr;
1704e11c3f44Smeem 	char ifname[LIFNAMSIZ];
1705e11c3f44Smeem 	char grname[LIFGRNAMSIZ];
1706e11c3f44Smeem 	mib2_ipv6RouteEntry_t *rp, *rp1, *endp;
1707e11c3f44Smeem 	struct in6_addr nexthop_v6;
17087c478bd9Sstevel@tonic-gate 
17097c478bd9Sstevel@tonic-gate 	if (debug & D_TARGET)
17107c478bd9Sstevel@tonic-gate 		logdebug("ire_process_v6(len %d)\n", len);
17117c478bd9Sstevel@tonic-gate 
17127c478bd9Sstevel@tonic-gate 	if (len == 0)
17137c478bd9Sstevel@tonic-gate 		return;
17147c478bd9Sstevel@tonic-gate 
171527438c18SJon Anderson 	assert((len % ipv6RouteEntrySize) == 0);
171627438c18SJon Anderson 	endp = buf + (len / ipv6RouteEntrySize);
17177c478bd9Sstevel@tonic-gate 
17187c478bd9Sstevel@tonic-gate 	/*
1719e11c3f44Smeem 	 * Scan the routing table entries for any IRE_OFFSUBNET entries, and
1720e11c3f44Smeem 	 * cross-reference them with the interface routes to determine if
1721e11c3f44Smeem 	 * they're possible probe targets.
17227c478bd9Sstevel@tonic-gate 	 */
17237c478bd9Sstevel@tonic-gate 	for (rp = buf; rp < endp; rp++) {
1724e11c3f44Smeem 		if (!(rp->ipv6RouteInfo.re_ire_type & IRE_OFFSUBNET) ||
1725e11c3f44Smeem 		    !IN6_IS_ADDR_LINKLOCAL(&rp->ipv6RouteNextHop))
17267c478bd9Sstevel@tonic-gate 			continue;
17277c478bd9Sstevel@tonic-gate 
1728e11c3f44Smeem 		/* Get the nexthop address. */
17297c478bd9Sstevel@tonic-gate 		nexthop_v6 = rp->ipv6RouteNextHop;
17307c478bd9Sstevel@tonic-gate 
1731e11c3f44Smeem 		/*
1732e11c3f44Smeem 		 * The interface name should always exist for link-locals;
1733e11c3f44Smeem 		 * we use it to map this entry to an IPMP group name.
1734e11c3f44Smeem 		 */
1735e11c3f44Smeem 		if (rp->ipv6RouteIfIndex.o_length == 0)
1736e11c3f44Smeem 			continue;
17377c478bd9Sstevel@tonic-gate 
1738e11c3f44Smeem 		oct2ifname(&rp->ipv6RouteIfIndex, lifr.lifr_name, LIFNAMSIZ);
1739e11c3f44Smeem 		if (ioctl(ifsock_v6, SIOCGLIFGROUPNAME, &lifr) == -1 ||
1740e11c3f44Smeem 		    strlcpy(grname, lifr.lifr_groupname, LIFGRNAMSIZ) == 0) {
1741e11c3f44Smeem 			continue;
1742e11c3f44Smeem 		}
17437c478bd9Sstevel@tonic-gate 
1744e11c3f44Smeem 		/*
1745e11c3f44Smeem 		 * Rescan the list of routes for interface routes, and add the
1746e11c3f44Smeem 		 * above target to any interfaces in the same IPMP group.
1747e11c3f44Smeem 		 */
1748e11c3f44Smeem 		for (rp1 = buf; rp1 < endp; rp1++) {
1749e11c3f44Smeem 			if (!(rp1->ipv6RouteInfo.re_ire_type & IRE_INTERFACE) ||
1750e11c3f44Smeem 			    rp1->ipv6RouteIfIndex.o_length == 0) {
1751e11c3f44Smeem 				continue;
1752e11c3f44Smeem 			}
1753e11c3f44Smeem 			oct2ifname(&rp1->ipv6RouteIfIndex, ifname, LIFNAMSIZ);
1754e11c3f44Smeem 			(void) strlcpy(lifr.lifr_name, ifname, LIFNAMSIZ);
17557c478bd9Sstevel@tonic-gate 
1756e11c3f44Smeem 			if (ioctl(ifsock_v6, SIOCGLIFGROUPNAME, &lifr) != -1 &&
1757e11c3f44Smeem 			    strcmp(lifr.lifr_groupname, grname) == 0) {
1758e11c3f44Smeem 				router_add_common(AF_INET6, ifname, nexthop_v6);
1759e11c3f44Smeem 			}
1760e11c3f44Smeem 		}
1761e11c3f44Smeem 	}
17627c478bd9Sstevel@tonic-gate }
17637c478bd9Sstevel@tonic-gate 
17647c478bd9Sstevel@tonic-gate /*
17657c478bd9Sstevel@tonic-gate  * Build a list of target routers, by scanning the routing tables.
17667c478bd9Sstevel@tonic-gate  * It is assumed that interface routes exist, to reach the routers.
17677c478bd9Sstevel@tonic-gate  */
17687c478bd9Sstevel@tonic-gate static void
init_router_targets(void)17697c478bd9Sstevel@tonic-gate init_router_targets(void)
17707c478bd9Sstevel@tonic-gate {
17717c478bd9Sstevel@tonic-gate 	struct	target *tg;
17727c478bd9Sstevel@tonic-gate 	struct	target *next_tg;
17737c478bd9Sstevel@tonic-gate 	struct	phyint_instance *pii;
17747c478bd9Sstevel@tonic-gate 	struct	phyint *pi;
17757c478bd9Sstevel@tonic-gate 
17767c478bd9Sstevel@tonic-gate 	if (force_mcast)
17777c478bd9Sstevel@tonic-gate 		return;
17787c478bd9Sstevel@tonic-gate 
17797c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
17807c478bd9Sstevel@tonic-gate 		pi = pii->pii_phyint;
17817c478bd9Sstevel@tonic-gate 		/*
1782e11c3f44Smeem 		 * Set tg_in_use to false only for router targets.
17837c478bd9Sstevel@tonic-gate 		 */
1784e11c3f44Smeem 		if (!pii->pii_targets_are_routers)
17857c478bd9Sstevel@tonic-gate 			continue;
17867c478bd9Sstevel@tonic-gate 
17877c478bd9Sstevel@tonic-gate 		for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next)
17887c478bd9Sstevel@tonic-gate 			tg->tg_in_use = 0;
17897c478bd9Sstevel@tonic-gate 	}
17907c478bd9Sstevel@tonic-gate 
179127438c18SJon Anderson 	if (mibwalk(update_router_list) == -1)
179227438c18SJon Anderson 		exit(1);
17937c478bd9Sstevel@tonic-gate 
17947c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
1795e11c3f44Smeem 		pi = pii->pii_phyint;
1796e11c3f44Smeem 		if (!pii->pii_targets_are_routers)
17977c478bd9Sstevel@tonic-gate 			continue;
17987c478bd9Sstevel@tonic-gate 
17997c478bd9Sstevel@tonic-gate 		for (tg = pii->pii_targets; tg != NULL; tg = next_tg) {
18007c478bd9Sstevel@tonic-gate 			next_tg = tg->tg_next;
1801e11c3f44Smeem 			/*
1802e11c3f44Smeem 			 * If the group has failed, it's likely the route was
1803e11c3f44Smeem 			 * removed by an application affected by that failure.
1804e11c3f44Smeem 			 * In that case, we keep the target so that we can
1805e11c3f44Smeem 			 * reliably repair, at which point we'll refresh the
1806e11c3f44Smeem 			 * target list again.
1807e11c3f44Smeem 			 */
1808e11c3f44Smeem 			if (!tg->tg_in_use && !GROUP_FAILED(pi->pi_group))
18097c478bd9Sstevel@tonic-gate 				target_delete(tg);
18107c478bd9Sstevel@tonic-gate 		}
18117c478bd9Sstevel@tonic-gate 	}
18127c478bd9Sstevel@tonic-gate }
18137c478bd9Sstevel@tonic-gate 
18147c478bd9Sstevel@tonic-gate /*
18157c478bd9Sstevel@tonic-gate  * Attempt to assign host targets to any interfaces that do not currently
18167c478bd9Sstevel@tonic-gate  * have probe targets by sharing targets with other interfaces in the group.
18177c478bd9Sstevel@tonic-gate  */
18187c478bd9Sstevel@tonic-gate static void
init_host_targets(void)18197c478bd9Sstevel@tonic-gate init_host_targets(void)
18207c478bd9Sstevel@tonic-gate {
18217c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
18227c478bd9Sstevel@tonic-gate 	struct phyint_group *pg;
18237c478bd9Sstevel@tonic-gate 
18247c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
18257c478bd9Sstevel@tonic-gate 		pg = pii->pii_phyint->pi_group;
18267c478bd9Sstevel@tonic-gate 		if (pg != phyint_anongroup && pii->pii_targets == NULL)
18277c478bd9Sstevel@tonic-gate 			dup_host_targets(pii);
18287c478bd9Sstevel@tonic-gate 	}
18297c478bd9Sstevel@tonic-gate }
18307c478bd9Sstevel@tonic-gate 
18317c478bd9Sstevel@tonic-gate /*
18327c478bd9Sstevel@tonic-gate  * Duplicate host targets from other phyints of the group to
18337c478bd9Sstevel@tonic-gate  * the phyint instance 'desired_pii'.
18347c478bd9Sstevel@tonic-gate  */
18357c478bd9Sstevel@tonic-gate static void
dup_host_targets(struct phyint_instance * desired_pii)18367c478bd9Sstevel@tonic-gate dup_host_targets(struct phyint_instance	 *desired_pii)
18377c478bd9Sstevel@tonic-gate {
18387c478bd9Sstevel@tonic-gate 	int af;
18397c478bd9Sstevel@tonic-gate 	struct phyint *pi;
18407c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
18417c478bd9Sstevel@tonic-gate 	struct target *tg;
18427c478bd9Sstevel@tonic-gate 
18437c478bd9Sstevel@tonic-gate 	assert(desired_pii->pii_phyint->pi_group != phyint_anongroup);
18447c478bd9Sstevel@tonic-gate 
18457c478bd9Sstevel@tonic-gate 	af = desired_pii->pii_af;
18467c478bd9Sstevel@tonic-gate 
18477c478bd9Sstevel@tonic-gate 	/*
18487c478bd9Sstevel@tonic-gate 	 * For every phyint in the same group as desired_pii, check if
18497c478bd9Sstevel@tonic-gate 	 * it has any host targets. If so add them to desired_pii.
18507c478bd9Sstevel@tonic-gate 	 */
18517c478bd9Sstevel@tonic-gate 	for (pi = desired_pii->pii_phyint; pi != NULL; pi = pi->pi_pgnext) {
18527c478bd9Sstevel@tonic-gate 		pii = PHYINT_INSTANCE(pi, af);
18537c478bd9Sstevel@tonic-gate 		/*
18547c478bd9Sstevel@tonic-gate 		 * We know that we don't have targets on this phyint instance
18557c478bd9Sstevel@tonic-gate 		 * since we have been called. But we still check for
18567c478bd9Sstevel@tonic-gate 		 * pii_targets_are_routers because another phyint instance
18577c478bd9Sstevel@tonic-gate 		 * could have router targets, since IFF_NOFAILOVER addresses
18587c478bd9Sstevel@tonic-gate 		 * on different phyint instances may belong to different
18597c478bd9Sstevel@tonic-gate 		 * subnets.
18607c478bd9Sstevel@tonic-gate 		 */
18617c478bd9Sstevel@tonic-gate 		if ((pii == NULL) || (pii == desired_pii) ||
18627c478bd9Sstevel@tonic-gate 		    pii->pii_targets_are_routers)
18637c478bd9Sstevel@tonic-gate 			continue;
18647c478bd9Sstevel@tonic-gate 		for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
18657c478bd9Sstevel@tonic-gate 			target_create(desired_pii, tg->tg_address, _B_FALSE);
18667c478bd9Sstevel@tonic-gate 		}
18677c478bd9Sstevel@tonic-gate 	}
18687c478bd9Sstevel@tonic-gate }
18697c478bd9Sstevel@tonic-gate 
18707c478bd9Sstevel@tonic-gate static void
usage(char * cmd)18717c478bd9Sstevel@tonic-gate usage(char *cmd)
18727c478bd9Sstevel@tonic-gate {
18737c478bd9Sstevel@tonic-gate 	(void) fprintf(stderr, "usage: %s\n", cmd);
18747c478bd9Sstevel@tonic-gate }
18757c478bd9Sstevel@tonic-gate 
18767c478bd9Sstevel@tonic-gate 
18777c478bd9Sstevel@tonic-gate #define	MPATHD_DEFAULT_FILE	"/etc/default/mpathd"
18787c478bd9Sstevel@tonic-gate 
18797c478bd9Sstevel@tonic-gate /* Get an option from the /etc/default/mpathd file */
18807c478bd9Sstevel@tonic-gate static char *
getdefault(char * name)18817c478bd9Sstevel@tonic-gate getdefault(char *name)
18827c478bd9Sstevel@tonic-gate {
18837c478bd9Sstevel@tonic-gate 	char namebuf[BUFSIZ];
18847c478bd9Sstevel@tonic-gate 	char *value = NULL;
18857c478bd9Sstevel@tonic-gate 
18867c478bd9Sstevel@tonic-gate 	if (defopen(MPATHD_DEFAULT_FILE) == 0) {
18877c478bd9Sstevel@tonic-gate 		char	*cp;
18887c478bd9Sstevel@tonic-gate 		int	flags;
18897c478bd9Sstevel@tonic-gate 
18907c478bd9Sstevel@tonic-gate 		/*
18917c478bd9Sstevel@tonic-gate 		 * ignore case
18927c478bd9Sstevel@tonic-gate 		 */
18937c478bd9Sstevel@tonic-gate 		flags = defcntl(DC_GETFLAGS, 0);
18947c478bd9Sstevel@tonic-gate 		TURNOFF(flags, DC_CASE);
18957c478bd9Sstevel@tonic-gate 		(void) defcntl(DC_SETFLAGS, flags);
18967c478bd9Sstevel@tonic-gate 
18977c478bd9Sstevel@tonic-gate 		/* Add "=" to the name */
18987c478bd9Sstevel@tonic-gate 		(void) strncpy(namebuf, name, sizeof (namebuf) - 2);
18997c478bd9Sstevel@tonic-gate 		(void) strncat(namebuf, "=", 2);
19007c478bd9Sstevel@tonic-gate 
19017c478bd9Sstevel@tonic-gate 		if ((cp = defread(namebuf)) != NULL)
19027c478bd9Sstevel@tonic-gate 			value = strdup(cp);
19037c478bd9Sstevel@tonic-gate 
19047c478bd9Sstevel@tonic-gate 		/* close */
19057c478bd9Sstevel@tonic-gate 		(void) defopen((char *)NULL);
19067c478bd9Sstevel@tonic-gate 	}
19077c478bd9Sstevel@tonic-gate 	return (value);
19087c478bd9Sstevel@tonic-gate }
19097c478bd9Sstevel@tonic-gate 
19107c478bd9Sstevel@tonic-gate 
19117c478bd9Sstevel@tonic-gate /*
19127c478bd9Sstevel@tonic-gate  * Command line options below
19137c478bd9Sstevel@tonic-gate  */
19147c478bd9Sstevel@tonic-gate boolean_t	failback_enabled = _B_TRUE;	/* failback enabled/disabled */
1915e11c3f44Smeem boolean_t	track_all_phyints = _B_FALSE;	/* track all IP interfaces */
19167c478bd9Sstevel@tonic-gate static boolean_t adopt = _B_FALSE;
19177c478bd9Sstevel@tonic-gate static boolean_t foreground = _B_FALSE;
19187c478bd9Sstevel@tonic-gate 
19197c478bd9Sstevel@tonic-gate int
main(int argc,char * argv[])19207c478bd9Sstevel@tonic-gate main(int argc, char *argv[])
19217c478bd9Sstevel@tonic-gate {
19227c478bd9Sstevel@tonic-gate 	int i;
19237c478bd9Sstevel@tonic-gate 	int c;
1924e11c3f44Smeem 	struct phyint *pi;
19257c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
19267c478bd9Sstevel@tonic-gate 	char *value;
19277c478bd9Sstevel@tonic-gate 
19287c478bd9Sstevel@tonic-gate 	argv0 = argv;		/* Saved for re-exec on SIGHUP */
19297c478bd9Sstevel@tonic-gate 	srandom(gethostid());	/* Initialize the random number generator */
19307c478bd9Sstevel@tonic-gate 
19317c478bd9Sstevel@tonic-gate 	/*
19327c478bd9Sstevel@tonic-gate 	 * NOTE: The messages output by in.mpathd are not suitable for
19337c478bd9Sstevel@tonic-gate 	 * translation, so we do not call textdomain().
19347c478bd9Sstevel@tonic-gate 	 */
19357c478bd9Sstevel@tonic-gate 	(void) setlocale(LC_ALL, "");
19367c478bd9Sstevel@tonic-gate 
19377c478bd9Sstevel@tonic-gate 	/*
19387c478bd9Sstevel@tonic-gate 	 * Get the user specified value of 'failure detection time'
19397c478bd9Sstevel@tonic-gate 	 * from /etc/default/mpathd
19407c478bd9Sstevel@tonic-gate 	 */
19417c478bd9Sstevel@tonic-gate 	value = getdefault("FAILURE_DETECTION_TIME");
19427c478bd9Sstevel@tonic-gate 	if (value != NULL) {
19437c478bd9Sstevel@tonic-gate 		user_failure_detection_time =
19447c478bd9Sstevel@tonic-gate 		    (int)strtol((char *)value, NULL, 0);
19457c478bd9Sstevel@tonic-gate 
19467c478bd9Sstevel@tonic-gate 		if (user_failure_detection_time <= 0) {
19477c478bd9Sstevel@tonic-gate 			user_failure_detection_time = FAILURE_DETECTION_TIME;
19487c478bd9Sstevel@tonic-gate 			logerr("Invalid failure detection time %s, assuming "
1949e11c3f44Smeem 			    "default of %d ms\n", value,
1950e11c3f44Smeem 			    user_failure_detection_time);
19517c478bd9Sstevel@tonic-gate 
19527c478bd9Sstevel@tonic-gate 		} else if (user_failure_detection_time <
19537c478bd9Sstevel@tonic-gate 		    MIN_FAILURE_DETECTION_TIME) {
19547c478bd9Sstevel@tonic-gate 			user_failure_detection_time =
19557c478bd9Sstevel@tonic-gate 			    MIN_FAILURE_DETECTION_TIME;
19567c478bd9Sstevel@tonic-gate 			logerr("Too small failure detection time of %s, "
1957e11c3f44Smeem 			    "assuming minimum of %d ms\n", value,
19587c478bd9Sstevel@tonic-gate 			    user_failure_detection_time);
19597c478bd9Sstevel@tonic-gate 		}
19607c478bd9Sstevel@tonic-gate 		free(value);
19617c478bd9Sstevel@tonic-gate 	} else {
19627c478bd9Sstevel@tonic-gate 		/* User has not specified the parameter, Use default value */
19637c478bd9Sstevel@tonic-gate 		user_failure_detection_time = FAILURE_DETECTION_TIME;
19647c478bd9Sstevel@tonic-gate 	}
19657c478bd9Sstevel@tonic-gate 
19667c478bd9Sstevel@tonic-gate 	/*
19677c478bd9Sstevel@tonic-gate 	 * This gives the frequency at which probes will be sent.
19687c478bd9Sstevel@tonic-gate 	 * When fdt ms elapses, we should be able to determine
19697c478bd9Sstevel@tonic-gate 	 * whether 5 consecutive probes have failed or not.
19707c478bd9Sstevel@tonic-gate 	 * 1 probe will be sent in every user_probe_interval ms,
19717c478bd9Sstevel@tonic-gate 	 * randomly anytime in the (0.5  - 1.0) 2nd half of every
19727c478bd9Sstevel@tonic-gate 	 * user_probe_interval. Thus when we send out probe 'n' we
19737c478bd9Sstevel@tonic-gate 	 * can be sure that probe 'n - 2' is lost, if we have not
19747c478bd9Sstevel@tonic-gate 	 * got the ack. (since the probe interval is > crtt). But
19757c478bd9Sstevel@tonic-gate 	 * probe 'n - 1' may be a valid unacked probe, since the
19767c478bd9Sstevel@tonic-gate 	 * time between 2 successive probes could be as small as
19777c478bd9Sstevel@tonic-gate 	 * 0.5 * user_probe_interval.  Hence the NUM_PROBE_FAILS + 2
19787c478bd9Sstevel@tonic-gate 	 */
19797c478bd9Sstevel@tonic-gate 	user_probe_interval = user_failure_detection_time /
19807c478bd9Sstevel@tonic-gate 	    (NUM_PROBE_FAILS + 2);
19817c478bd9Sstevel@tonic-gate 
19827c478bd9Sstevel@tonic-gate 	/*
19837c478bd9Sstevel@tonic-gate 	 * Get the user specified value of failback_enabled from
19847c478bd9Sstevel@tonic-gate 	 * /etc/default/mpathd
19857c478bd9Sstevel@tonic-gate 	 */
19867c478bd9Sstevel@tonic-gate 	value = getdefault("FAILBACK");
19877c478bd9Sstevel@tonic-gate 	if (value != NULL) {
1988e11c3f44Smeem 		if (strcasecmp(value, "yes") == 0)
19897c478bd9Sstevel@tonic-gate 			failback_enabled = _B_TRUE;
1990e11c3f44Smeem 		else if (strcasecmp(value, "no") == 0)
19917c478bd9Sstevel@tonic-gate 			failback_enabled = _B_FALSE;
19927c478bd9Sstevel@tonic-gate 		else
19937c478bd9Sstevel@tonic-gate 			logerr("Invalid value for FAILBACK %s\n", value);
19947c478bd9Sstevel@tonic-gate 		free(value);
19957c478bd9Sstevel@tonic-gate 	} else {
19967c478bd9Sstevel@tonic-gate 		failback_enabled = _B_TRUE;
19977c478bd9Sstevel@tonic-gate 	}
19987c478bd9Sstevel@tonic-gate 
19997c478bd9Sstevel@tonic-gate 	/*
20007c478bd9Sstevel@tonic-gate 	 * Get the user specified value of track_all_phyints from
20017c478bd9Sstevel@tonic-gate 	 * /etc/default/mpathd. The sense is reversed in
20027c478bd9Sstevel@tonic-gate 	 * TRACK_INTERFACES_ONLY_WITH_GROUPS.
20037c478bd9Sstevel@tonic-gate 	 */
20047c478bd9Sstevel@tonic-gate 	value = getdefault("TRACK_INTERFACES_ONLY_WITH_GROUPS");
20057c478bd9Sstevel@tonic-gate 	if (value != NULL) {
2006e11c3f44Smeem 		if (strcasecmp(value, "yes") == 0)
20077c478bd9Sstevel@tonic-gate 			track_all_phyints = _B_FALSE;
2008e11c3f44Smeem 		else if (strcasecmp(value, "no") == 0)
20097c478bd9Sstevel@tonic-gate 			track_all_phyints = _B_TRUE;
20107c478bd9Sstevel@tonic-gate 		else
20117c478bd9Sstevel@tonic-gate 			logerr("Invalid value for "
20127c478bd9Sstevel@tonic-gate 			    "TRACK_INTERFACES_ONLY_WITH_GROUPS %s\n", value);
20137c478bd9Sstevel@tonic-gate 		free(value);
20147c478bd9Sstevel@tonic-gate 	} else {
20157c478bd9Sstevel@tonic-gate 		track_all_phyints = _B_FALSE;
20167c478bd9Sstevel@tonic-gate 	}
20177c478bd9Sstevel@tonic-gate 
20187c478bd9Sstevel@tonic-gate 	while ((c = getopt(argc, argv, "adD:ml")) != EOF) {
20197c478bd9Sstevel@tonic-gate 		switch (c) {
20207c478bd9Sstevel@tonic-gate 		case 'a':
20217c478bd9Sstevel@tonic-gate 			adopt = _B_TRUE;
20227c478bd9Sstevel@tonic-gate 			break;
20237c478bd9Sstevel@tonic-gate 		case 'm':
20247c478bd9Sstevel@tonic-gate 			force_mcast = _B_TRUE;
20257c478bd9Sstevel@tonic-gate 			break;
20267c478bd9Sstevel@tonic-gate 		case 'd':
20277c478bd9Sstevel@tonic-gate 			debug = D_ALL;
20287c478bd9Sstevel@tonic-gate 			foreground = _B_TRUE;
20297c478bd9Sstevel@tonic-gate 			break;
20307c478bd9Sstevel@tonic-gate 		case 'D':
20317c478bd9Sstevel@tonic-gate 			i = (int)strtol(optarg, NULL, 0);
20327c478bd9Sstevel@tonic-gate 			if (i == 0) {
20337c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr, "Bad debug flags: %s\n",
20347c478bd9Sstevel@tonic-gate 				    optarg);
20357c478bd9Sstevel@tonic-gate 				exit(1);
20367c478bd9Sstevel@tonic-gate 			}
20377c478bd9Sstevel@tonic-gate 			debug |= i;
20387c478bd9Sstevel@tonic-gate 			foreground = _B_TRUE;
20397c478bd9Sstevel@tonic-gate 			break;
20407c478bd9Sstevel@tonic-gate 		case 'l':
20417c478bd9Sstevel@tonic-gate 			/*
20427c478bd9Sstevel@tonic-gate 			 * Turn off link state notification handling.
20437c478bd9Sstevel@tonic-gate 			 * Undocumented command line flag, for debugging
20447c478bd9Sstevel@tonic-gate 			 * purposes.
20457c478bd9Sstevel@tonic-gate 			 */
20467c478bd9Sstevel@tonic-gate 			handle_link_notifications = _B_FALSE;
20477c478bd9Sstevel@tonic-gate 			break;
20487c478bd9Sstevel@tonic-gate 		default:
20497c478bd9Sstevel@tonic-gate 			usage(argv[0]);
20507c478bd9Sstevel@tonic-gate 			exit(1);
20517c478bd9Sstevel@tonic-gate 		}
20527c478bd9Sstevel@tonic-gate 	}
20537c478bd9Sstevel@tonic-gate 
20547c478bd9Sstevel@tonic-gate 	/*
20557c478bd9Sstevel@tonic-gate 	 * The sockets for the loopback command interface should be listening
20567c478bd9Sstevel@tonic-gate 	 * before we fork and exit in daemonize(). This way, whoever started us
20577c478bd9Sstevel@tonic-gate 	 * can use the loopback interface as soon as they get a zero exit
20587c478bd9Sstevel@tonic-gate 	 * status.
20597c478bd9Sstevel@tonic-gate 	 */
20607c478bd9Sstevel@tonic-gate 	lsock_v4 = setup_listener(AF_INET);
20617c478bd9Sstevel@tonic-gate 	lsock_v6 = setup_listener(AF_INET6);
20627c478bd9Sstevel@tonic-gate 
20637c478bd9Sstevel@tonic-gate 	if (lsock_v4 < 0 && lsock_v6 < 0) {
20647c478bd9Sstevel@tonic-gate 		logerr("main: setup_listener failed for both IPv4 and IPv6\n");
20657c478bd9Sstevel@tonic-gate 		exit(1);
20667c478bd9Sstevel@tonic-gate 	}
20677c478bd9Sstevel@tonic-gate 
20687c478bd9Sstevel@tonic-gate 	if (!foreground) {
20697c478bd9Sstevel@tonic-gate 		if (!daemonize()) {
20707c478bd9Sstevel@tonic-gate 			logerr("cannot daemonize\n");
20717c478bd9Sstevel@tonic-gate 			exit(EXIT_FAILURE);
20727c478bd9Sstevel@tonic-gate 		}
20737c478bd9Sstevel@tonic-gate 		initlog();
20747c478bd9Sstevel@tonic-gate 	}
20757c478bd9Sstevel@tonic-gate 
20767c478bd9Sstevel@tonic-gate 	/*
20777c478bd9Sstevel@tonic-gate 	 * Initializations:
20787c478bd9Sstevel@tonic-gate 	 * 1. Create ifsock* sockets. These are used for performing SIOC*
20797c478bd9Sstevel@tonic-gate 	 *    ioctls. We have 2 sockets 1 each for IPv4 and IPv6.
20807c478bd9Sstevel@tonic-gate 	 * 2. Initialize a pipe for handling/recording signal events.
20817c478bd9Sstevel@tonic-gate 	 * 3. Create the routing sockets,  used for listening
20827c478bd9Sstevel@tonic-gate 	 *    to routing / interface changes.
20837c478bd9Sstevel@tonic-gate 	 * 4. phyint_init() - Initialize physical interface state
20847c478bd9Sstevel@tonic-gate 	 *    (in mpd_tables.c).  Must be done before creating interfaces,
20857c478bd9Sstevel@tonic-gate 	 *    which timer_init() does indirectly.
208627438c18SJon Anderson 	 * 5. Query kernel for route entry sizes (v4 and v6).
208727438c18SJon Anderson 	 * 6. timer_init()  - Initialize timer related stuff
208827438c18SJon Anderson 	 * 7. initifs() - Initialize our database of all known interfaces
208927438c18SJon Anderson 	 * 8. init_router_targets() - Initialize our database of all known
20907c478bd9Sstevel@tonic-gate 	 *    router targets.
20917c478bd9Sstevel@tonic-gate 	 */
20927c478bd9Sstevel@tonic-gate 	ifsock_v4 = socket(AF_INET, SOCK_DGRAM, 0);
20937c478bd9Sstevel@tonic-gate 	if (ifsock_v4 < 0) {
20947c478bd9Sstevel@tonic-gate 		logperror("main: IPv4 socket open");
20957c478bd9Sstevel@tonic-gate 		exit(1);
20967c478bd9Sstevel@tonic-gate 	}
20977c478bd9Sstevel@tonic-gate 
20987c478bd9Sstevel@tonic-gate 	ifsock_v6 = socket(AF_INET6, SOCK_DGRAM, 0);
20997c478bd9Sstevel@tonic-gate 	if (ifsock_v6 < 0) {
21007c478bd9Sstevel@tonic-gate 		logperror("main: IPv6 socket open");
21017c478bd9Sstevel@tonic-gate 		exit(1);
21027c478bd9Sstevel@tonic-gate 	}
21037c478bd9Sstevel@tonic-gate 
21047c478bd9Sstevel@tonic-gate 	setup_eventpipe();
21057c478bd9Sstevel@tonic-gate 
21067c478bd9Sstevel@tonic-gate 	rtsock_v4 = setup_rtsock(AF_INET);
21077c478bd9Sstevel@tonic-gate 	rtsock_v6 = setup_rtsock(AF_INET6);
21087c478bd9Sstevel@tonic-gate 
21097c478bd9Sstevel@tonic-gate 	if (phyint_init() == -1) {
21107c478bd9Sstevel@tonic-gate 		logerr("cannot initialize physical interface structures");
21117c478bd9Sstevel@tonic-gate 		exit(1);
21127c478bd9Sstevel@tonic-gate 	}
21137c478bd9Sstevel@tonic-gate 
211427438c18SJon Anderson 	if (mibwalk(mib_get_constants) == -1)
211527438c18SJon Anderson 		exit(1);
211627438c18SJon Anderson 
21177c478bd9Sstevel@tonic-gate 	timer_init();
21187c478bd9Sstevel@tonic-gate 
21197c478bd9Sstevel@tonic-gate 	initifs();
21207c478bd9Sstevel@tonic-gate 
21217c478bd9Sstevel@tonic-gate 	/*
21227c478bd9Sstevel@tonic-gate 	 * If we're operating in "adopt" mode and no interfaces need to be
2123*bbf21555SRichard Lowe 	 * tracked, shut down (ifconfig(8) will restart us on demand if
21247c478bd9Sstevel@tonic-gate 	 * interfaces are subsequently put into multipathing groups).
21257c478bd9Sstevel@tonic-gate 	 */
21267c478bd9Sstevel@tonic-gate 	if (adopt && phyint_instances == NULL)
21277c478bd9Sstevel@tonic-gate 		exit(0);
21287c478bd9Sstevel@tonic-gate 
21297c478bd9Sstevel@tonic-gate 	/*
21307c478bd9Sstevel@tonic-gate 	 * Main body. Keep listening for activity on any of the sockets
21317c478bd9Sstevel@tonic-gate 	 * that we are monitoring and take appropriate action as necessary.
21327c478bd9Sstevel@tonic-gate 	 * signals are also handled synchronously.
21337c478bd9Sstevel@tonic-gate 	 */
21347c478bd9Sstevel@tonic-gate 	for (;;) {
21357c478bd9Sstevel@tonic-gate 		if (poll(pollfds, pollfd_num, -1) < 0) {
21367c478bd9Sstevel@tonic-gate 			if (errno == EINTR)
21377c478bd9Sstevel@tonic-gate 				continue;
21387c478bd9Sstevel@tonic-gate 			logperror("main: poll");
21397c478bd9Sstevel@tonic-gate 			exit(1);
21407c478bd9Sstevel@tonic-gate 		}
21417c478bd9Sstevel@tonic-gate 		for (i = 0; i < pollfd_num; i++) {
21427c478bd9Sstevel@tonic-gate 			if ((pollfds[i].fd == -1) ||
21437c478bd9Sstevel@tonic-gate 			    !(pollfds[i].revents & POLLIN))
21447c478bd9Sstevel@tonic-gate 				continue;
21457c478bd9Sstevel@tonic-gate 			if (pollfds[i].fd == eventpipe_read) {
21467c478bd9Sstevel@tonic-gate 				in_signal(eventpipe_read);
21477c478bd9Sstevel@tonic-gate 				break;
21487c478bd9Sstevel@tonic-gate 			}
21497c478bd9Sstevel@tonic-gate 			if (pollfds[i].fd == rtsock_v4 ||
215006cdd167Smeem 			    pollfds[i].fd == rtsock_v6) {
21517c478bd9Sstevel@tonic-gate 				process_rtsock(rtsock_v4, rtsock_v6);
21527c478bd9Sstevel@tonic-gate 				break;
21537c478bd9Sstevel@tonic-gate 			}
2154e11c3f44Smeem 
21557c478bd9Sstevel@tonic-gate 			for (pii = phyint_instances; pii != NULL;
21567c478bd9Sstevel@tonic-gate 			    pii = pii->pii_next) {
21577c478bd9Sstevel@tonic-gate 				if (pollfds[i].fd == pii->pii_probe_sock) {
21587c478bd9Sstevel@tonic-gate 					if (pii->pii_af == AF_INET)
21597c478bd9Sstevel@tonic-gate 						in_data(pii);
21607c478bd9Sstevel@tonic-gate 					else
21617c478bd9Sstevel@tonic-gate 						in6_data(pii);
21627c478bd9Sstevel@tonic-gate 					break;
21637c478bd9Sstevel@tonic-gate 				}
21647c478bd9Sstevel@tonic-gate 			}
2165e11c3f44Smeem 
2166e11c3f44Smeem 			for (pi = phyints; pi != NULL; pi = pi->pi_next) {
2167e11c3f44Smeem 				if (pi->pi_notes != 0 &&
2168e11c3f44Smeem 				    pollfds[i].fd == dlpi_fd(pi->pi_dh)) {
2169e11c3f44Smeem 					(void) dlpi_recv(pi->pi_dh, NULL, NULL,
2170e11c3f44Smeem 					    NULL, NULL, 0, NULL);
2171e11c3f44Smeem 					break;
2172e11c3f44Smeem 				}
2173e11c3f44Smeem 			}
2174e11c3f44Smeem 
21757c478bd9Sstevel@tonic-gate 			if (pollfds[i].fd == lsock_v4)
21767c478bd9Sstevel@tonic-gate 				loopback_cmd(lsock_v4, AF_INET);
21777c478bd9Sstevel@tonic-gate 			else if (pollfds[i].fd == lsock_v6)
21787c478bd9Sstevel@tonic-gate 				loopback_cmd(lsock_v6, AF_INET6);
21797c478bd9Sstevel@tonic-gate 		}
21807c478bd9Sstevel@tonic-gate 	}
21817c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
21827c478bd9Sstevel@tonic-gate 	return (EXIT_SUCCESS);
21837c478bd9Sstevel@tonic-gate }
21847c478bd9Sstevel@tonic-gate 
21857c478bd9Sstevel@tonic-gate static int
setup_listener(int af)21867c478bd9Sstevel@tonic-gate setup_listener(int af)
21877c478bd9Sstevel@tonic-gate {
21887c478bd9Sstevel@tonic-gate 	int sock;
21897c478bd9Sstevel@tonic-gate 	int on;
21907c478bd9Sstevel@tonic-gate 	int len;
21917c478bd9Sstevel@tonic-gate 	int ret;
21927c478bd9Sstevel@tonic-gate 	struct sockaddr_storage laddr;
21937c478bd9Sstevel@tonic-gate 	struct sockaddr_in  *sin;
21947c478bd9Sstevel@tonic-gate 	struct sockaddr_in6 *sin6;
21957c478bd9Sstevel@tonic-gate 	struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
21967c478bd9Sstevel@tonic-gate 
21977c478bd9Sstevel@tonic-gate 	assert(af == AF_INET || af == AF_INET6);
21987c478bd9Sstevel@tonic-gate 
21997c478bd9Sstevel@tonic-gate 	sock = socket(af, SOCK_STREAM, 0);
22007c478bd9Sstevel@tonic-gate 	if (sock < 0) {
22017c478bd9Sstevel@tonic-gate 		logperror("setup_listener: socket");
22027c478bd9Sstevel@tonic-gate 		exit(1);
22037c478bd9Sstevel@tonic-gate 	}
22047c478bd9Sstevel@tonic-gate 
22057c478bd9Sstevel@tonic-gate 	on = 1;
22067c478bd9Sstevel@tonic-gate 	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
22077c478bd9Sstevel@tonic-gate 	    sizeof (on)) < 0) {
22087c478bd9Sstevel@tonic-gate 		logperror("setup_listener: setsockopt (SO_REUSEADDR)");
22097c478bd9Sstevel@tonic-gate 		exit(1);
22107c478bd9Sstevel@tonic-gate 	}
22117c478bd9Sstevel@tonic-gate 
22127c478bd9Sstevel@tonic-gate 	bzero(&laddr, sizeof (laddr));
22137c478bd9Sstevel@tonic-gate 	laddr.ss_family = af;
22147c478bd9Sstevel@tonic-gate 
22157c478bd9Sstevel@tonic-gate 	if (af == AF_INET) {
22167c478bd9Sstevel@tonic-gate 		sin = (struct sockaddr_in *)&laddr;
22177c478bd9Sstevel@tonic-gate 		sin->sin_port = htons(MPATHD_PORT);
22187c478bd9Sstevel@tonic-gate 		sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
22197c478bd9Sstevel@tonic-gate 		len = sizeof (struct sockaddr_in);
22207c478bd9Sstevel@tonic-gate 	} else {
22217c478bd9Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)&laddr;
22227c478bd9Sstevel@tonic-gate 		sin6->sin6_port = htons(MPATHD_PORT);
22237c478bd9Sstevel@tonic-gate 		sin6->sin6_addr = loopback_addr;
22247c478bd9Sstevel@tonic-gate 		len = sizeof (struct sockaddr_in6);
22257c478bd9Sstevel@tonic-gate 	}
22267c478bd9Sstevel@tonic-gate 
22277c478bd9Sstevel@tonic-gate 	ret = bind(sock, (struct sockaddr *)&laddr, len);
22287c478bd9Sstevel@tonic-gate 	if (ret < 0) {
22297c478bd9Sstevel@tonic-gate 		if (errno == EADDRINUSE) {
22307c478bd9Sstevel@tonic-gate 			/*
22317c478bd9Sstevel@tonic-gate 			 * Another instance of mpathd may be already active.
22327c478bd9Sstevel@tonic-gate 			 */
22337c478bd9Sstevel@tonic-gate 			logerr("main: is another instance of in.mpathd "
22347c478bd9Sstevel@tonic-gate 			    "already active?\n");
22357c478bd9Sstevel@tonic-gate 			exit(1);
22367c478bd9Sstevel@tonic-gate 		} else {
22377c478bd9Sstevel@tonic-gate 			(void) close(sock);
22387c478bd9Sstevel@tonic-gate 			return (-1);
22397c478bd9Sstevel@tonic-gate 		}
22407c478bd9Sstevel@tonic-gate 	}
22417c478bd9Sstevel@tonic-gate 	if (listen(sock, 30) < 0) {
22427c478bd9Sstevel@tonic-gate 		logperror("main: listen");
22437c478bd9Sstevel@tonic-gate 		exit(1);
22447c478bd9Sstevel@tonic-gate 	}
22457c478bd9Sstevel@tonic-gate 	if (poll_add(sock) == -1) {
22467c478bd9Sstevel@tonic-gate 		(void) close(sock);
22477c478bd9Sstevel@tonic-gate 		exit(1);
22487c478bd9Sstevel@tonic-gate 	}
22497c478bd9Sstevel@tonic-gate 
22507c478bd9Sstevel@tonic-gate 	return (sock);
22517c478bd9Sstevel@tonic-gate }
22527c478bd9Sstevel@tonic-gate 
22537c478bd9Sstevel@tonic-gate /*
22547c478bd9Sstevel@tonic-gate  * Table of commands and their expected size; used by loopback_cmd().
22557c478bd9Sstevel@tonic-gate  */
22567c478bd9Sstevel@tonic-gate static struct {
22577c478bd9Sstevel@tonic-gate 	const char	*name;
22587c478bd9Sstevel@tonic-gate 	unsigned int	size;
22597c478bd9Sstevel@tonic-gate } commands[] = {
22607c478bd9Sstevel@tonic-gate 	{ "MI_PING",		sizeof (uint32_t)	},
22617c478bd9Sstevel@tonic-gate 	{ "MI_OFFLINE",		sizeof (mi_offline_t)	},
22627c478bd9Sstevel@tonic-gate 	{ "MI_UNDO_OFFLINE",	sizeof (mi_undo_offline_t) },
22637c478bd9Sstevel@tonic-gate 	{ "MI_QUERY",		sizeof (mi_query_t)	}
22647c478bd9Sstevel@tonic-gate };
22657c478bd9Sstevel@tonic-gate 
22667c478bd9Sstevel@tonic-gate /*
2267e11c3f44Smeem  * Commands received over the loopback interface come here (via libipmp).
22687c478bd9Sstevel@tonic-gate  */
22697c478bd9Sstevel@tonic-gate static void
loopback_cmd(int sock,int family)22707c478bd9Sstevel@tonic-gate loopback_cmd(int sock, int family)
22717c478bd9Sstevel@tonic-gate {
22727c478bd9Sstevel@tonic-gate 	int newfd;
22737c478bd9Sstevel@tonic-gate 	ssize_t len;
2274e11c3f44Smeem 	boolean_t is_priv = _B_FALSE;
22757c478bd9Sstevel@tonic-gate 	struct sockaddr_storage	peer;
22767c478bd9Sstevel@tonic-gate 	struct sockaddr_in	*peer_sin;
22777c478bd9Sstevel@tonic-gate 	struct sockaddr_in6	*peer_sin6;
22787c478bd9Sstevel@tonic-gate 	socklen_t peerlen;
22797c478bd9Sstevel@tonic-gate 	union mi_commands mpi;
22807c478bd9Sstevel@tonic-gate 	char abuf[INET6_ADDRSTRLEN];
22817c478bd9Sstevel@tonic-gate 	uint_t cmd;
22827c478bd9Sstevel@tonic-gate 	int retval;
22837c478bd9Sstevel@tonic-gate 
22847c478bd9Sstevel@tonic-gate 	peerlen = sizeof (peer);
22857c478bd9Sstevel@tonic-gate 	newfd = accept(sock, (struct sockaddr *)&peer, &peerlen);
22867c478bd9Sstevel@tonic-gate 	if (newfd < 0) {
22877c478bd9Sstevel@tonic-gate 		logperror("loopback_cmd: accept");
22887c478bd9Sstevel@tonic-gate 		return;
22897c478bd9Sstevel@tonic-gate 	}
22907c478bd9Sstevel@tonic-gate 
22917c478bd9Sstevel@tonic-gate 	switch (family) {
22927c478bd9Sstevel@tonic-gate 	case AF_INET:
22937c478bd9Sstevel@tonic-gate 		/*
22947c478bd9Sstevel@tonic-gate 		 * Validate the address and port to make sure that
22957c478bd9Sstevel@tonic-gate 		 * non privileged processes don't connect and start
22967c478bd9Sstevel@tonic-gate 		 * talking to us.
22977c478bd9Sstevel@tonic-gate 		 */
22987c478bd9Sstevel@tonic-gate 		if (peerlen != sizeof (struct sockaddr_in)) {
22997c478bd9Sstevel@tonic-gate 			logerr("loopback_cmd: AF_INET peerlen %d\n", peerlen);
23007c478bd9Sstevel@tonic-gate 			(void) close(newfd);
23017c478bd9Sstevel@tonic-gate 			return;
23027c478bd9Sstevel@tonic-gate 		}
23037c478bd9Sstevel@tonic-gate 		peer_sin = (struct sockaddr_in *)&peer;
2304e11c3f44Smeem 		is_priv = ntohs(peer_sin->sin_port) < IPPORT_RESERVED;
2305e11c3f44Smeem 		(void) inet_ntop(AF_INET, &peer_sin->sin_addr.s_addr,
2306e11c3f44Smeem 		    abuf, sizeof (abuf));
2307e11c3f44Smeem 
2308e11c3f44Smeem 		if (ntohl(peer_sin->sin_addr.s_addr) != INADDR_LOOPBACK) {
23097c478bd9Sstevel@tonic-gate 			logerr("Attempt to connect from addr %s port %d\n",
23107c478bd9Sstevel@tonic-gate 			    abuf, ntohs(peer_sin->sin_port));
23117c478bd9Sstevel@tonic-gate 			(void) close(newfd);
23127c478bd9Sstevel@tonic-gate 			return;
23137c478bd9Sstevel@tonic-gate 		}
23147c478bd9Sstevel@tonic-gate 		break;
23157c478bd9Sstevel@tonic-gate 
23167c478bd9Sstevel@tonic-gate 	case AF_INET6:
23177c478bd9Sstevel@tonic-gate 		if (peerlen != sizeof (struct sockaddr_in6)) {
23187c478bd9Sstevel@tonic-gate 			logerr("loopback_cmd: AF_INET6 peerlen %d\n", peerlen);
23197c478bd9Sstevel@tonic-gate 			(void) close(newfd);
23207c478bd9Sstevel@tonic-gate 			return;
23217c478bd9Sstevel@tonic-gate 		}
23227c478bd9Sstevel@tonic-gate 		/*
23237c478bd9Sstevel@tonic-gate 		 * Validate the address and port to make sure that
23247c478bd9Sstevel@tonic-gate 		 * non privileged processes don't connect and start
23257c478bd9Sstevel@tonic-gate 		 * talking to us.
23267c478bd9Sstevel@tonic-gate 		 */
23277c478bd9Sstevel@tonic-gate 		peer_sin6 = (struct sockaddr_in6 *)&peer;
2328e11c3f44Smeem 		is_priv = ntohs(peer_sin6->sin6_port) < IPPORT_RESERVED;
2329e11c3f44Smeem 		(void) inet_ntop(AF_INET6, &peer_sin6->sin6_addr, abuf,
2330e11c3f44Smeem 		    sizeof (abuf));
2331e11c3f44Smeem 		if (!IN6_IS_ADDR_LOOPBACK(&peer_sin6->sin6_addr)) {
23327c478bd9Sstevel@tonic-gate 			logerr("Attempt to connect from addr %s port %d\n",
23337c478bd9Sstevel@tonic-gate 			    abuf, ntohs(peer_sin6->sin6_port));
23347c478bd9Sstevel@tonic-gate 			(void) close(newfd);
23357c478bd9Sstevel@tonic-gate 			return;
23367c478bd9Sstevel@tonic-gate 		}
23372dfb8675SToomas Soome 		break;
23387c478bd9Sstevel@tonic-gate 
23397c478bd9Sstevel@tonic-gate 	default:
23407c478bd9Sstevel@tonic-gate 		logdebug("loopback_cmd: family %d\n", family);
23417c478bd9Sstevel@tonic-gate 		(void) close(newfd);
23427c478bd9Sstevel@tonic-gate 		return;
23437c478bd9Sstevel@tonic-gate 	}
23447c478bd9Sstevel@tonic-gate 
23457c478bd9Sstevel@tonic-gate 	/*
23467c478bd9Sstevel@tonic-gate 	 * The sizeof the 'mpi' buffer corresponds to the maximum size of
23477c478bd9Sstevel@tonic-gate 	 * all supported commands
23487c478bd9Sstevel@tonic-gate 	 */
23497c478bd9Sstevel@tonic-gate 	len = read(newfd, &mpi, sizeof (mpi));
23507c478bd9Sstevel@tonic-gate 
23517c478bd9Sstevel@tonic-gate 	/*
23527c478bd9Sstevel@tonic-gate 	 * In theory, we can receive any sized message for a stream socket,
23537c478bd9Sstevel@tonic-gate 	 * but we don't expect that to happen for a small message over a
23547c478bd9Sstevel@tonic-gate 	 * loopback connection.
23557c478bd9Sstevel@tonic-gate 	 */
23567c478bd9Sstevel@tonic-gate 	if (len < sizeof (uint32_t)) {
23577c478bd9Sstevel@tonic-gate 		logerr("loopback_cmd: bad command format or read returns "
23587c478bd9Sstevel@tonic-gate 		    "partial data %d\n", len);
2359e11c3f44Smeem 		(void) close(newfd);
2360e11c3f44Smeem 		return;
23617c478bd9Sstevel@tonic-gate 	}
23627c478bd9Sstevel@tonic-gate 
23637c478bd9Sstevel@tonic-gate 	cmd = mpi.mi_command;
23647c478bd9Sstevel@tonic-gate 	if (cmd >= MI_NCMD) {
23657c478bd9Sstevel@tonic-gate 		logerr("loopback_cmd: unknown command id `%d'\n", cmd);
23667c478bd9Sstevel@tonic-gate 		(void) close(newfd);
23677c478bd9Sstevel@tonic-gate 		return;
23687c478bd9Sstevel@tonic-gate 	}
23697c478bd9Sstevel@tonic-gate 
2370e11c3f44Smeem 	/*
2371e11c3f44Smeem 	 * Only MI_PING and MI_QUERY can come from unprivileged sources.
2372e11c3f44Smeem 	 */
2373e11c3f44Smeem 	if (!is_priv && (cmd != MI_QUERY && cmd != MI_PING)) {
2374e11c3f44Smeem 		logerr("Unprivileged request from %s for privileged "
2375e11c3f44Smeem 		    "command %s\n", abuf, commands[cmd].name);
2376e11c3f44Smeem 		(void) close(newfd);
2377e11c3f44Smeem 		return;
2378e11c3f44Smeem 	}
2379e11c3f44Smeem 
23807c478bd9Sstevel@tonic-gate 	if (len < commands[cmd].size) {
23817c478bd9Sstevel@tonic-gate 		logerr("loopback_cmd: short %s command (expected %d, got %d)\n",
23827c478bd9Sstevel@tonic-gate 		    commands[cmd].name, commands[cmd].size, len);
23837c478bd9Sstevel@tonic-gate 		(void) close(newfd);
23847c478bd9Sstevel@tonic-gate 		return;
23857c478bd9Sstevel@tonic-gate 	}
23867c478bd9Sstevel@tonic-gate 
23877c478bd9Sstevel@tonic-gate 	retval = process_cmd(newfd, &mpi);
23887c478bd9Sstevel@tonic-gate 	if (retval != IPMP_SUCCESS) {
23897c478bd9Sstevel@tonic-gate 		logerr("failed processing %s: %s\n", commands[cmd].name,
23907c478bd9Sstevel@tonic-gate 		    ipmp_errmsg(retval));
23917c478bd9Sstevel@tonic-gate 	}
23927c478bd9Sstevel@tonic-gate 	(void) close(newfd);
23937c478bd9Sstevel@tonic-gate }
23947c478bd9Sstevel@tonic-gate 
23957c478bd9Sstevel@tonic-gate /*
2396e11c3f44Smeem  * Process the commands received via libipmp.
23977c478bd9Sstevel@tonic-gate  */
23987c478bd9Sstevel@tonic-gate static unsigned int
process_cmd(int newfd,union mi_commands * mpi)23997c478bd9Sstevel@tonic-gate process_cmd(int newfd, union mi_commands *mpi)
24007c478bd9Sstevel@tonic-gate {
24017c478bd9Sstevel@tonic-gate 	struct phyint *pi;
24027c478bd9Sstevel@tonic-gate 	struct mi_offline *mio;
24037c478bd9Sstevel@tonic-gate 	struct mi_undo_offline *miu;
2404e11c3f44Smeem 	unsigned int retval;
24057c478bd9Sstevel@tonic-gate 
2406e11c3f44Smeem 	switch (mpi->mi_command) {
2407e11c3f44Smeem 	case MI_PING:
2408e11c3f44Smeem 		return (send_result(newfd, IPMP_SUCCESS, 0));
24097c478bd9Sstevel@tonic-gate 
24107c478bd9Sstevel@tonic-gate 	case MI_OFFLINE:
24117c478bd9Sstevel@tonic-gate 		mio = &mpi->mi_ocmd;
2412e11c3f44Smeem 
24137c478bd9Sstevel@tonic-gate 		pi = phyint_lookup(mio->mio_ifname);
24147c478bd9Sstevel@tonic-gate 		if (pi == NULL)
2415e11c3f44Smeem 			return (send_result(newfd, IPMP_EUNKIF, 0));
24167c478bd9Sstevel@tonic-gate 
2417e11c3f44Smeem 		retval = phyint_offline(pi, mio->mio_min_redundancy);
2418e11c3f44Smeem 		if (retval == IPMP_FAILURE)
24197c478bd9Sstevel@tonic-gate 			return (send_result(newfd, IPMP_FAILURE, errno));
24207c478bd9Sstevel@tonic-gate 
2421e11c3f44Smeem 		return (send_result(newfd, retval, 0));
24227c478bd9Sstevel@tonic-gate 
24237c478bd9Sstevel@tonic-gate 	case MI_UNDO_OFFLINE:
24247c478bd9Sstevel@tonic-gate 		miu = &mpi->mi_ucmd;
24257c478bd9Sstevel@tonic-gate 
2426e11c3f44Smeem 		pi = phyint_lookup(miu->miu_ifname);
2427e11c3f44Smeem 		if (pi == NULL)
2428e11c3f44Smeem 			return (send_result(newfd, IPMP_EUNKIF, 0));
24297c478bd9Sstevel@tonic-gate 
2430e11c3f44Smeem 		retval = phyint_undo_offline(pi);
2431e11c3f44Smeem 		if (retval == IPMP_FAILURE)
24327c478bd9Sstevel@tonic-gate 			return (send_result(newfd, IPMP_FAILURE, errno));
24337c478bd9Sstevel@tonic-gate 
2434e11c3f44Smeem 		return (send_result(newfd, retval, 0));
24357c478bd9Sstevel@tonic-gate 
24367c478bd9Sstevel@tonic-gate 	case MI_QUERY:
24377c478bd9Sstevel@tonic-gate 		return (process_query(newfd, &mpi->mi_qcmd));
24387c478bd9Sstevel@tonic-gate 
24397c478bd9Sstevel@tonic-gate 	default:
24407c478bd9Sstevel@tonic-gate 		break;
24417c478bd9Sstevel@tonic-gate 	}
24427c478bd9Sstevel@tonic-gate 
24437c478bd9Sstevel@tonic-gate 	return (send_result(newfd, IPMP_EPROTO, 0));
24447c478bd9Sstevel@tonic-gate }
24457c478bd9Sstevel@tonic-gate 
24467c478bd9Sstevel@tonic-gate /*
24477c478bd9Sstevel@tonic-gate  * Process the query request pointed to by `miq' and send a reply on file
24487c478bd9Sstevel@tonic-gate  * descriptor `fd'.  Returns an IPMP error code.
24497c478bd9Sstevel@tonic-gate  */
24507c478bd9Sstevel@tonic-gate static unsigned int
process_query(int fd,mi_query_t * miq)24517c478bd9Sstevel@tonic-gate process_query(int fd, mi_query_t *miq)
24527c478bd9Sstevel@tonic-gate {
2453e11c3f44Smeem 	ipmp_addrinfo_t		*adinfop;
2454e11c3f44Smeem 	ipmp_addrinfolist_t	*adlp;
24557c478bd9Sstevel@tonic-gate 	ipmp_groupinfo_t	*grinfop;
24567c478bd9Sstevel@tonic-gate 	ipmp_groupinfolist_t	*grlp;
24577c478bd9Sstevel@tonic-gate 	ipmp_grouplist_t	*grlistp;
24587c478bd9Sstevel@tonic-gate 	ipmp_ifinfo_t		*ifinfop;
24597c478bd9Sstevel@tonic-gate 	ipmp_ifinfolist_t	*iflp;
24607c478bd9Sstevel@tonic-gate 	ipmp_snap_t		*snap;
24617c478bd9Sstevel@tonic-gate 	unsigned int		retval;
24627c478bd9Sstevel@tonic-gate 
24637c478bd9Sstevel@tonic-gate 	switch (miq->miq_inforeq) {
2464e11c3f44Smeem 	case IPMP_ADDRINFO:
2465e11c3f44Smeem 		retval = getgraddrinfo(miq->miq_grname, &miq->miq_addr,
2466e11c3f44Smeem 		    &adinfop);
2467e11c3f44Smeem 		if (retval != IPMP_SUCCESS)
2468e11c3f44Smeem 			return (send_result(fd, retval, errno));
2469e11c3f44Smeem 
2470e11c3f44Smeem 		retval = send_result(fd, IPMP_SUCCESS, 0);
2471e11c3f44Smeem 		if (retval == IPMP_SUCCESS)
2472e11c3f44Smeem 			retval = send_addrinfo(fd, adinfop);
2473e11c3f44Smeem 
2474e11c3f44Smeem 		ipmp_freeaddrinfo(adinfop);
2475e11c3f44Smeem 		return (retval);
2476e11c3f44Smeem 
24777c478bd9Sstevel@tonic-gate 	case IPMP_GROUPLIST:
24787c478bd9Sstevel@tonic-gate 		retval = getgrouplist(&grlistp);
24797c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
24807c478bd9Sstevel@tonic-gate 			return (send_result(fd, retval, errno));
24817c478bd9Sstevel@tonic-gate 
24827c478bd9Sstevel@tonic-gate 		retval = send_result(fd, IPMP_SUCCESS, 0);
24837c478bd9Sstevel@tonic-gate 		if (retval == IPMP_SUCCESS)
24847c478bd9Sstevel@tonic-gate 			retval = send_grouplist(fd, grlistp);
24857c478bd9Sstevel@tonic-gate 
24867c478bd9Sstevel@tonic-gate 		ipmp_freegrouplist(grlistp);
24877c478bd9Sstevel@tonic-gate 		return (retval);
24887c478bd9Sstevel@tonic-gate 
24897c478bd9Sstevel@tonic-gate 	case IPMP_GROUPINFO:
24907c478bd9Sstevel@tonic-gate 		miq->miq_grname[LIFGRNAMSIZ - 1] = '\0';
2491e11c3f44Smeem 		retval = getgroupinfo(miq->miq_grname, &grinfop);
24927c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
24937c478bd9Sstevel@tonic-gate 			return (send_result(fd, retval, errno));
24947c478bd9Sstevel@tonic-gate 
24957c478bd9Sstevel@tonic-gate 		retval = send_result(fd, IPMP_SUCCESS, 0);
24967c478bd9Sstevel@tonic-gate 		if (retval == IPMP_SUCCESS)
24977c478bd9Sstevel@tonic-gate 			retval = send_groupinfo(fd, grinfop);
24987c478bd9Sstevel@tonic-gate 
24997c478bd9Sstevel@tonic-gate 		ipmp_freegroupinfo(grinfop);
25007c478bd9Sstevel@tonic-gate 		return (retval);
25017c478bd9Sstevel@tonic-gate 
25027c478bd9Sstevel@tonic-gate 	case IPMP_IFINFO:
25037c478bd9Sstevel@tonic-gate 		miq->miq_ifname[LIFNAMSIZ - 1] = '\0';
25047c478bd9Sstevel@tonic-gate 		retval = getifinfo(miq->miq_ifname, &ifinfop);
25057c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
25067c478bd9Sstevel@tonic-gate 			return (send_result(fd, retval, errno));
25077c478bd9Sstevel@tonic-gate 
25087c478bd9Sstevel@tonic-gate 		retval = send_result(fd, IPMP_SUCCESS, 0);
25097c478bd9Sstevel@tonic-gate 		if (retval == IPMP_SUCCESS)
25107c478bd9Sstevel@tonic-gate 			retval = send_ifinfo(fd, ifinfop);
25117c478bd9Sstevel@tonic-gate 
25127c478bd9Sstevel@tonic-gate 		ipmp_freeifinfo(ifinfop);
25137c478bd9Sstevel@tonic-gate 		return (retval);
25147c478bd9Sstevel@tonic-gate 
25157c478bd9Sstevel@tonic-gate 	case IPMP_SNAP:
2516e11c3f44Smeem 		/*
2517e11c3f44Smeem 		 * Before taking the snapshot, sync with the kernel.
2518e11c3f44Smeem 		 */
2519e11c3f44Smeem 		initifs();
2520e11c3f44Smeem 
25217c478bd9Sstevel@tonic-gate 		retval = getsnap(&snap);
25227c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
25237c478bd9Sstevel@tonic-gate 			return (send_result(fd, retval, errno));
25247c478bd9Sstevel@tonic-gate 
25257c478bd9Sstevel@tonic-gate 		retval = send_result(fd, IPMP_SUCCESS, 0);
25267c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
25277c478bd9Sstevel@tonic-gate 			goto out;
25287c478bd9Sstevel@tonic-gate 
252980d556f9SHans Rosenfeld 		retval = send_grouplist(fd, snap->sn_grlistp);
25307c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
25317c478bd9Sstevel@tonic-gate 			goto out;
25327c478bd9Sstevel@tonic-gate 
253380d556f9SHans Rosenfeld 		retval = ipmp_writetlv(fd, IPMP_IFCNT, sizeof (uint32_t),
253480d556f9SHans Rosenfeld 		    &snap->sn_nif);
25357c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
25367c478bd9Sstevel@tonic-gate 			goto out;
25377c478bd9Sstevel@tonic-gate 
25387c478bd9Sstevel@tonic-gate 		iflp = snap->sn_ifinfolistp;
25397c478bd9Sstevel@tonic-gate 		for (; iflp != NULL; iflp = iflp->ifl_next) {
25407c478bd9Sstevel@tonic-gate 			retval = send_ifinfo(fd, iflp->ifl_ifinfop);
25417c478bd9Sstevel@tonic-gate 			if (retval != IPMP_SUCCESS)
25427c478bd9Sstevel@tonic-gate 				goto out;
25437c478bd9Sstevel@tonic-gate 		}
25447c478bd9Sstevel@tonic-gate 
254580d556f9SHans Rosenfeld 		retval = ipmp_writetlv(fd, IPMP_GROUPCNT, sizeof (uint32_t),
254680d556f9SHans Rosenfeld 		    &snap->sn_ngroup);
254780d556f9SHans Rosenfeld 		if (retval != IPMP_SUCCESS)
254880d556f9SHans Rosenfeld 			goto out;
254980d556f9SHans Rosenfeld 
25507c478bd9Sstevel@tonic-gate 		grlp = snap->sn_grinfolistp;
25517c478bd9Sstevel@tonic-gate 		for (; grlp != NULL; grlp = grlp->grl_next) {
25527c478bd9Sstevel@tonic-gate 			retval = send_groupinfo(fd, grlp->grl_grinfop);
25537c478bd9Sstevel@tonic-gate 			if (retval != IPMP_SUCCESS)
25547c478bd9Sstevel@tonic-gate 				goto out;
25557c478bd9Sstevel@tonic-gate 		}
2556e11c3f44Smeem 
255780d556f9SHans Rosenfeld 		retval = ipmp_writetlv(fd, IPMP_ADDRCNT, sizeof (uint32_t),
255880d556f9SHans Rosenfeld 		    &snap->sn_naddr);
255980d556f9SHans Rosenfeld 		if (retval != IPMP_SUCCESS)
256080d556f9SHans Rosenfeld 			goto out;
256180d556f9SHans Rosenfeld 
2562e11c3f44Smeem 		adlp = snap->sn_adinfolistp;
2563e11c3f44Smeem 		for (; adlp != NULL; adlp = adlp->adl_next) {
2564e11c3f44Smeem 			retval = send_addrinfo(fd, adlp->adl_adinfop);
2565e11c3f44Smeem 			if (retval != IPMP_SUCCESS)
2566e11c3f44Smeem 				goto out;
2567e11c3f44Smeem 		}
25687c478bd9Sstevel@tonic-gate 	out:
25697c478bd9Sstevel@tonic-gate 		ipmp_snap_free(snap);
25707c478bd9Sstevel@tonic-gate 		return (retval);
25717c478bd9Sstevel@tonic-gate 
25727c478bd9Sstevel@tonic-gate 	default:
25737c478bd9Sstevel@tonic-gate 		break;
25747c478bd9Sstevel@tonic-gate 
25757c478bd9Sstevel@tonic-gate 	}
25767c478bd9Sstevel@tonic-gate 	return (send_result(fd, IPMP_EPROTO, 0));
25777c478bd9Sstevel@tonic-gate }
25787c478bd9Sstevel@tonic-gate 
25797c478bd9Sstevel@tonic-gate /*
25807c478bd9Sstevel@tonic-gate  * Send the group information pointed to by `grinfop' on file descriptor `fd'.
25817c478bd9Sstevel@tonic-gate  * Returns an IPMP error code.
25827c478bd9Sstevel@tonic-gate  */
25837c478bd9Sstevel@tonic-gate static unsigned int
send_groupinfo(int fd,ipmp_groupinfo_t * grinfop)25847c478bd9Sstevel@tonic-gate send_groupinfo(int fd, ipmp_groupinfo_t *grinfop)
25857c478bd9Sstevel@tonic-gate {
25867c478bd9Sstevel@tonic-gate 	ipmp_iflist_t	*iflistp = grinfop->gr_iflistp;
2587e11c3f44Smeem 	ipmp_addrlist_t	*adlistp = grinfop->gr_adlistp;
258880d556f9SHans Rosenfeld 	ipmp_groupinfo_xfer_t grxfer;
25897c478bd9Sstevel@tonic-gate 	unsigned int	retval;
25907c478bd9Sstevel@tonic-gate 
259180d556f9SHans Rosenfeld 	/*
259280d556f9SHans Rosenfeld 	 * We can't directly transfer an ipmp_groupinfo_t due to the embedded
259380d556f9SHans Rosenfeld 	 * pointers to ipmp_iflist_t and ipmp_addr_list_t. Copy the data over
259480d556f9SHans Rosenfeld 	 * to a temporary transfer structure that doesn't have these embedded
259580d556f9SHans Rosenfeld 	 * pointers.
259680d556f9SHans Rosenfeld 	 */
259780d556f9SHans Rosenfeld 	memset(&grxfer, 0, sizeof (grxfer));
259880d556f9SHans Rosenfeld 
259980d556f9SHans Rosenfeld 	grxfer.grx_sig = grinfop->gr_sig;
260080d556f9SHans Rosenfeld 	grxfer.grx_state = grinfop->gr_state;
260180d556f9SHans Rosenfeld 	grxfer.grx_fdt = grinfop->gr_fdt;
260280d556f9SHans Rosenfeld 
260380d556f9SHans Rosenfeld 	memcpy(grxfer.grx_name, grinfop->gr_name, sizeof (grxfer.grx_name));
260480d556f9SHans Rosenfeld 	memcpy(grxfer.grx_ifname, grinfop->gr_ifname,
260580d556f9SHans Rosenfeld 	    sizeof (grxfer.grx_ifname));
260680d556f9SHans Rosenfeld 	memcpy(grxfer.grx_m4ifname, grinfop->gr_m4ifname,
260780d556f9SHans Rosenfeld 	    sizeof (grxfer.grx_m4ifname));
260880d556f9SHans Rosenfeld 	memcpy(grxfer.grx_m6ifname, grinfop->gr_m6ifname,
260980d556f9SHans Rosenfeld 	    sizeof (grxfer.grx_m6ifname));
261080d556f9SHans Rosenfeld 	memcpy(grxfer.grx_bcifname, grinfop->gr_bcifname,
261180d556f9SHans Rosenfeld 	    sizeof (grxfer.grx_bcifname));
261280d556f9SHans Rosenfeld 
261380d556f9SHans Rosenfeld 	retval = ipmp_writetlv(fd, IPMP_GROUPINFO, sizeof (grxfer), &grxfer);
26147c478bd9Sstevel@tonic-gate 	if (retval != IPMP_SUCCESS)
26157c478bd9Sstevel@tonic-gate 		return (retval);
26167c478bd9Sstevel@tonic-gate 
2617e11c3f44Smeem 	retval = ipmp_writetlv(fd, IPMP_IFLIST,
2618e11c3f44Smeem 	    IPMP_IFLIST_SIZE(iflistp->il_nif), iflistp);
2619e11c3f44Smeem 	if (retval != IPMP_SUCCESS)
2620e11c3f44Smeem 		return (retval);
2621e11c3f44Smeem 
2622e11c3f44Smeem 	return (ipmp_writetlv(fd, IPMP_ADDRLIST,
2623e11c3f44Smeem 	    IPMP_ADDRLIST_SIZE(adlistp->al_naddr), adlistp));
26247c478bd9Sstevel@tonic-gate }
26257c478bd9Sstevel@tonic-gate 
26267c478bd9Sstevel@tonic-gate /*
26277c478bd9Sstevel@tonic-gate  * Send the interface information pointed to by `ifinfop' on file descriptor
26287c478bd9Sstevel@tonic-gate  * `fd'.  Returns an IPMP error code.
26297c478bd9Sstevel@tonic-gate  */
26307c478bd9Sstevel@tonic-gate static unsigned int
send_ifinfo(int fd,ipmp_ifinfo_t * ifinfop)26317c478bd9Sstevel@tonic-gate send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop)
26327c478bd9Sstevel@tonic-gate {
2633e11c3f44Smeem 	ipmp_addrlist_t	*adlist4p = ifinfop->if_targinfo4.it_targlistp;
2634e11c3f44Smeem 	ipmp_addrlist_t	*adlist6p = ifinfop->if_targinfo6.it_targlistp;
263580d556f9SHans Rosenfeld 	ipmp_ifinfo_xfer_t ifxfer;
2636e11c3f44Smeem 	unsigned int	retval;
2637e11c3f44Smeem 
263880d556f9SHans Rosenfeld 	/*
263980d556f9SHans Rosenfeld 	 * We can't directly tranfer an ipmp_ifinfo_t due to the embedded
264080d556f9SHans Rosenfeld 	 * ipmp_addrlist_t pointer in if_targinfo_t. Copy the data over to
264180d556f9SHans Rosenfeld 	 * a temporary transfer structure that doesn't have that embedded
264280d556f9SHans Rosenfeld 	 * pointer.
264380d556f9SHans Rosenfeld 	 */
264480d556f9SHans Rosenfeld 	memset(&ifxfer, 0, sizeof (ifxfer));
264580d556f9SHans Rosenfeld 
264680d556f9SHans Rosenfeld 	ifxfer.ifx_state = ifinfop->if_state;
264780d556f9SHans Rosenfeld 	ifxfer.ifx_type = ifinfop->if_type;
264880d556f9SHans Rosenfeld 	ifxfer.ifx_linkstate = ifinfop->if_linkstate;
264980d556f9SHans Rosenfeld 	ifxfer.ifx_probestate = ifinfop->if_probestate;
265080d556f9SHans Rosenfeld 	ifxfer.ifx_flags = ifinfop->if_flags;
265180d556f9SHans Rosenfeld 	ifxfer.ifx_targinfo4.itx_testaddr = ifinfop->if_targinfo4.it_testaddr;
265280d556f9SHans Rosenfeld 	ifxfer.ifx_targinfo4.itx_targmode = ifinfop->if_targinfo4.it_targmode;
265380d556f9SHans Rosenfeld 	ifxfer.ifx_targinfo6.itx_testaddr = ifinfop->if_targinfo6.it_testaddr;
265480d556f9SHans Rosenfeld 	ifxfer.ifx_targinfo6.itx_targmode = ifinfop->if_targinfo6.it_targmode;
265580d556f9SHans Rosenfeld 
265680d556f9SHans Rosenfeld 	memcpy(ifxfer.ifx_name, ifinfop->if_name, sizeof (ifxfer.ifx_name));
265780d556f9SHans Rosenfeld 	memcpy(ifxfer.ifx_group, ifinfop->if_group, sizeof (ifxfer.ifx_group));
265880d556f9SHans Rosenfeld 	memcpy(ifxfer.ifx_targinfo4.itx_name, ifinfop->if_targinfo4.it_name,
265980d556f9SHans Rosenfeld 	    sizeof (ifxfer.ifx_targinfo4.itx_name));
266080d556f9SHans Rosenfeld 	memcpy(ifxfer.ifx_targinfo6.itx_name, ifinfop->if_targinfo6.it_name,
266180d556f9SHans Rosenfeld 	    sizeof (ifxfer.ifx_targinfo6.itx_name));
266280d556f9SHans Rosenfeld 
266380d556f9SHans Rosenfeld 	retval = ipmp_writetlv(fd, IPMP_IFINFO, sizeof (ifxfer), &ifxfer);
2664e11c3f44Smeem 	if (retval != IPMP_SUCCESS)
2665e11c3f44Smeem 		return (retval);
2666e11c3f44Smeem 
2667e11c3f44Smeem 	retval = ipmp_writetlv(fd, IPMP_ADDRLIST,
2668e11c3f44Smeem 	    IPMP_ADDRLIST_SIZE(adlist4p->al_naddr), adlist4p);
2669e11c3f44Smeem 	if (retval != IPMP_SUCCESS)
2670e11c3f44Smeem 		return (retval);
2671e11c3f44Smeem 
2672e11c3f44Smeem 	return (ipmp_writetlv(fd, IPMP_ADDRLIST,
2673e11c3f44Smeem 	    IPMP_ADDRLIST_SIZE(adlist6p->al_naddr), adlist6p));
2674e11c3f44Smeem }
2675e11c3f44Smeem 
2676e11c3f44Smeem /*
2677e11c3f44Smeem  * Send the address information pointed to by `adinfop' on file descriptor
2678e11c3f44Smeem  * `fd'.  Returns an IPMP error code.
2679e11c3f44Smeem  */
2680e11c3f44Smeem static unsigned int
send_addrinfo(int fd,ipmp_addrinfo_t * adinfop)2681e11c3f44Smeem send_addrinfo(int fd, ipmp_addrinfo_t *adinfop)
2682e11c3f44Smeem {
2683e11c3f44Smeem 	return (ipmp_writetlv(fd, IPMP_ADDRINFO, sizeof (*adinfop), adinfop));
26847c478bd9Sstevel@tonic-gate }
26857c478bd9Sstevel@tonic-gate 
26867c478bd9Sstevel@tonic-gate /*
26877c478bd9Sstevel@tonic-gate  * Send the group list pointed to by `grlistp' on file descriptor `fd'.
26887c478bd9Sstevel@tonic-gate  * Returns an IPMP error code.
26897c478bd9Sstevel@tonic-gate  */
26907c478bd9Sstevel@tonic-gate static unsigned int
send_grouplist(int fd,ipmp_grouplist_t * grlistp)26917c478bd9Sstevel@tonic-gate send_grouplist(int fd, ipmp_grouplist_t *grlistp)
26927c478bd9Sstevel@tonic-gate {
26937c478bd9Sstevel@tonic-gate 	return (ipmp_writetlv(fd, IPMP_GROUPLIST,
26947c478bd9Sstevel@tonic-gate 	    IPMP_GROUPLIST_SIZE(grlistp->gl_ngroup), grlistp));
26957c478bd9Sstevel@tonic-gate }
26967c478bd9Sstevel@tonic-gate 
26977c478bd9Sstevel@tonic-gate /*
26987c478bd9Sstevel@tonic-gate  * Initialize an mi_result_t structure using `error' and `syserror' and
26997c478bd9Sstevel@tonic-gate  * send it on file descriptor `fd'.  Returns an IPMP error code.
27007c478bd9Sstevel@tonic-gate  */
27017c478bd9Sstevel@tonic-gate static unsigned int
send_result(int fd,unsigned int error,int syserror)27027c478bd9Sstevel@tonic-gate send_result(int fd, unsigned int error, int syserror)
27037c478bd9Sstevel@tonic-gate {
27047c478bd9Sstevel@tonic-gate 	mi_result_t me;
27057c478bd9Sstevel@tonic-gate 
27067c478bd9Sstevel@tonic-gate 	me.me_mpathd_error = error;
27077c478bd9Sstevel@tonic-gate 	if (error == IPMP_FAILURE)
27087c478bd9Sstevel@tonic-gate 		me.me_sys_error = syserror;
27097c478bd9Sstevel@tonic-gate 	else
27107c478bd9Sstevel@tonic-gate 		me.me_sys_error = 0;
27117c478bd9Sstevel@tonic-gate 
27127c478bd9Sstevel@tonic-gate 	return (ipmp_write(fd, &me, sizeof (me)));
27137c478bd9Sstevel@tonic-gate }
27147c478bd9Sstevel@tonic-gate 
27157c478bd9Sstevel@tonic-gate /*
27167c478bd9Sstevel@tonic-gate  * Daemonize the process.
27177c478bd9Sstevel@tonic-gate  */
27187c478bd9Sstevel@tonic-gate static boolean_t
daemonize(void)27197c478bd9Sstevel@tonic-gate daemonize(void)
27207c478bd9Sstevel@tonic-gate {
27217c478bd9Sstevel@tonic-gate 	switch (fork()) {
27227c478bd9Sstevel@tonic-gate 	case -1:
27237c478bd9Sstevel@tonic-gate 		return (_B_FALSE);
27247c478bd9Sstevel@tonic-gate 
27257c478bd9Sstevel@tonic-gate 	case  0:
27267c478bd9Sstevel@tonic-gate 		/*
27277c478bd9Sstevel@tonic-gate 		 * Lose our controlling terminal, and become both a session
27287c478bd9Sstevel@tonic-gate 		 * leader and a process group leader.
27297c478bd9Sstevel@tonic-gate 		 */
27307c478bd9Sstevel@tonic-gate 		if (setsid() == -1)
27317c478bd9Sstevel@tonic-gate 			return (_B_FALSE);
27327c478bd9Sstevel@tonic-gate 
27337c478bd9Sstevel@tonic-gate 		/*
27347c478bd9Sstevel@tonic-gate 		 * Under POSIX, a session leader can accidentally (through
27357c478bd9Sstevel@tonic-gate 		 * open(2)) acquire a controlling terminal if it does not
27367c478bd9Sstevel@tonic-gate 		 * have one.  Just to be safe, fork() again so we are not a
27377c478bd9Sstevel@tonic-gate 		 * session leader.
27387c478bd9Sstevel@tonic-gate 		 */
27397c478bd9Sstevel@tonic-gate 		switch (fork()) {
27407c478bd9Sstevel@tonic-gate 		case -1:
27417c478bd9Sstevel@tonic-gate 			return (_B_FALSE);
27427c478bd9Sstevel@tonic-gate 
27437c478bd9Sstevel@tonic-gate 		case 0:
27447c478bd9Sstevel@tonic-gate 			(void) chdir("/");
27457c478bd9Sstevel@tonic-gate 			(void) umask(022);
27467c478bd9Sstevel@tonic-gate 			(void) fdwalk(closefunc, NULL);
27477c478bd9Sstevel@tonic-gate 			break;
27487c478bd9Sstevel@tonic-gate 
27497c478bd9Sstevel@tonic-gate 		default:
27507c478bd9Sstevel@tonic-gate 			_exit(EXIT_SUCCESS);
27517c478bd9Sstevel@tonic-gate 		}
27527c478bd9Sstevel@tonic-gate 		break;
27537c478bd9Sstevel@tonic-gate 
27547c478bd9Sstevel@tonic-gate 	default:
27557c478bd9Sstevel@tonic-gate 		_exit(EXIT_SUCCESS);
27567c478bd9Sstevel@tonic-gate 	}
27577c478bd9Sstevel@tonic-gate 
27587c478bd9Sstevel@tonic-gate 	return (_B_TRUE);
27597c478bd9Sstevel@tonic-gate }
27607c478bd9Sstevel@tonic-gate 
27617c478bd9Sstevel@tonic-gate /*
27627c478bd9Sstevel@tonic-gate  * The parent has created some fds before forking on purpose, keep them open.
27637c478bd9Sstevel@tonic-gate  */
27647c478bd9Sstevel@tonic-gate static int
closefunc(void * not_used,int fd)27657c478bd9Sstevel@tonic-gate closefunc(void *not_used, int fd)
27667c478bd9Sstevel@tonic-gate {
27677c478bd9Sstevel@tonic-gate 	if (fd != lsock_v4 && fd != lsock_v6)
27687c478bd9Sstevel@tonic-gate 		(void) close(fd);
27697c478bd9Sstevel@tonic-gate 	return (0);
27707c478bd9Sstevel@tonic-gate }
27717c478bd9Sstevel@tonic-gate 
27727c478bd9Sstevel@tonic-gate /* LOGGER */
27737c478bd9Sstevel@tonic-gate 
27747c478bd9Sstevel@tonic-gate #include <syslog.h>
27757c478bd9Sstevel@tonic-gate 
27767c478bd9Sstevel@tonic-gate /*
27777c478bd9Sstevel@tonic-gate  * Logging routines.  All routines log to syslog, unless the daemon is
27787c478bd9Sstevel@tonic-gate  * running in the foreground, in which case the logging goes to stderr.
27797c478bd9Sstevel@tonic-gate  *
27807c478bd9Sstevel@tonic-gate  * The following routines are available:
27817c478bd9Sstevel@tonic-gate  *
27827c478bd9Sstevel@tonic-gate  *	logdebug(): A printf-like function for outputting debug messages
27837c478bd9Sstevel@tonic-gate  *	(messages at LOG_DEBUG) that are only of use to developers.
27847c478bd9Sstevel@tonic-gate  *
27857c478bd9Sstevel@tonic-gate  *	logtrace(): A printf-like function for outputting tracing messages
27867c478bd9Sstevel@tonic-gate  *	(messages at LOG_INFO) from the daemon.	 This is typically used
27877c478bd9Sstevel@tonic-gate  *	to log the receipt of interesting network-related conditions.
27887c478bd9Sstevel@tonic-gate  *
27897c478bd9Sstevel@tonic-gate  *	logerr(): A printf-like function for outputting error messages
27907c478bd9Sstevel@tonic-gate  *	(messages at LOG_ERR) from the daemon.
27917c478bd9Sstevel@tonic-gate  *
27927c478bd9Sstevel@tonic-gate  *	logperror*(): A set of functions used to output error messages
27937c478bd9Sstevel@tonic-gate  *	(messages at LOG_ERR); these automatically append strerror(errno)
27947c478bd9Sstevel@tonic-gate  *	and a newline to the message passed to them.
27957c478bd9Sstevel@tonic-gate  *
27967c478bd9Sstevel@tonic-gate  * NOTE: since the logging functions write to syslog, the messages passed
27977c478bd9Sstevel@tonic-gate  *	 to them are not eligible for localization.  Thus, gettext() must
27987c478bd9Sstevel@tonic-gate  *	 *not* be used.
27997c478bd9Sstevel@tonic-gate  */
28007c478bd9Sstevel@tonic-gate 
28017c478bd9Sstevel@tonic-gate static int logging = 0;
28027c478bd9Sstevel@tonic-gate 
28037c478bd9Sstevel@tonic-gate static void
initlog(void)28047c478bd9Sstevel@tonic-gate initlog(void)
28057c478bd9Sstevel@tonic-gate {
28067c478bd9Sstevel@tonic-gate 	logging++;
2807e3e7cd29Smeem 	openlog("in.mpathd", LOG_PID, LOG_DAEMON);
28087c478bd9Sstevel@tonic-gate }
28097c478bd9Sstevel@tonic-gate 
2810e3e7cd29Smeem /* PRINTFLIKE2 */
28117c478bd9Sstevel@tonic-gate void
logmsg(int pri,const char * fmt,...)2812e3e7cd29Smeem logmsg(int pri, const char *fmt, ...)
28137c478bd9Sstevel@tonic-gate {
28147c478bd9Sstevel@tonic-gate 	va_list ap;
28157c478bd9Sstevel@tonic-gate 
28167c478bd9Sstevel@tonic-gate 	va_start(ap, fmt);
28177c478bd9Sstevel@tonic-gate 
28187c478bd9Sstevel@tonic-gate 	if (logging)
2819e3e7cd29Smeem 		vsyslog(pri, fmt, ap);
28207c478bd9Sstevel@tonic-gate 	else
28217c478bd9Sstevel@tonic-gate 		(void) vfprintf(stderr, fmt, ap);
28227c478bd9Sstevel@tonic-gate 	va_end(ap);
28237c478bd9Sstevel@tonic-gate }
28247c478bd9Sstevel@tonic-gate 
28257c478bd9Sstevel@tonic-gate /* PRINTFLIKE1 */
28267c478bd9Sstevel@tonic-gate void
logperror(const char * str)2827e3e7cd29Smeem logperror(const char *str)
28287c478bd9Sstevel@tonic-gate {
28297c478bd9Sstevel@tonic-gate 	if (logging)
28307c478bd9Sstevel@tonic-gate 		syslog(LOG_ERR, "%s: %m\n", str);
28317c478bd9Sstevel@tonic-gate 	else
28327c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "%s: %s\n", str, strerror(errno));
28337c478bd9Sstevel@tonic-gate }
28347c478bd9Sstevel@tonic-gate 
28357c478bd9Sstevel@tonic-gate void
logperror_pii(struct phyint_instance * pii,const char * str)2836e3e7cd29Smeem logperror_pii(struct phyint_instance *pii, const char *str)
28377c478bd9Sstevel@tonic-gate {
28387c478bd9Sstevel@tonic-gate 	if (logging) {
28397c478bd9Sstevel@tonic-gate 		syslog(LOG_ERR, "%s (%s %s): %m\n",
28407c478bd9Sstevel@tonic-gate 		    str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name);
28417c478bd9Sstevel@tonic-gate 	} else {
28427c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "%s (%s %s): %s\n",
28437c478bd9Sstevel@tonic-gate 		    str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name,
28447c478bd9Sstevel@tonic-gate 		    strerror(errno));
28457c478bd9Sstevel@tonic-gate 	}
28467c478bd9Sstevel@tonic-gate }
28477c478bd9Sstevel@tonic-gate 
28487c478bd9Sstevel@tonic-gate void
logperror_li(struct logint * li,const char * str)2849e3e7cd29Smeem logperror_li(struct logint *li, const char *str)
28507c478bd9Sstevel@tonic-gate {
28517c478bd9Sstevel@tonic-gate 	struct	phyint_instance	*pii = li->li_phyint_inst;
28527c478bd9Sstevel@tonic-gate 
28537c478bd9Sstevel@tonic-gate 	if (logging) {
28547c478bd9Sstevel@tonic-gate 		syslog(LOG_ERR, "%s (%s %s): %m\n",
28557c478bd9Sstevel@tonic-gate 		    str, AF_STR(pii->pii_af), li->li_name);
28567c478bd9Sstevel@tonic-gate 	} else {
28577c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "%s (%s %s): %s\n",
28587c478bd9Sstevel@tonic-gate 		    str, AF_STR(pii->pii_af), li->li_name,
28597c478bd9Sstevel@tonic-gate 		    strerror(errno));
28607c478bd9Sstevel@tonic-gate 	}
28617c478bd9Sstevel@tonic-gate }
28627c478bd9Sstevel@tonic-gate 
28637c478bd9Sstevel@tonic-gate void
close_probe_socket(struct phyint_instance * pii,boolean_t polled)28647c478bd9Sstevel@tonic-gate close_probe_socket(struct phyint_instance *pii, boolean_t polled)
28657c478bd9Sstevel@tonic-gate {
28667c478bd9Sstevel@tonic-gate 	if (polled)
28677c478bd9Sstevel@tonic-gate 		(void) poll_remove(pii->pii_probe_sock);
28687c478bd9Sstevel@tonic-gate 	(void) close(pii->pii_probe_sock);
28697c478bd9Sstevel@tonic-gate 	pii->pii_probe_sock = -1;
28707c478bd9Sstevel@tonic-gate 	pii->pii_basetime_inited = 0;
28717c478bd9Sstevel@tonic-gate }
2872e11c3f44Smeem 
2873e11c3f44Smeem boolean_t
addrlist_add(addrlist_t ** addrsp,const char * name,uint64_t flags,struct sockaddr_storage * ssp)2874e11c3f44Smeem addrlist_add(addrlist_t **addrsp, const char *name, uint64_t flags,
2875e11c3f44Smeem     struct sockaddr_storage *ssp)
2876e11c3f44Smeem {
2877e11c3f44Smeem 	addrlist_t *addrp;
2878e11c3f44Smeem 
2879e11c3f44Smeem 	if ((addrp = malloc(sizeof (addrlist_t))) == NULL)
2880e11c3f44Smeem 		return (_B_FALSE);
2881e11c3f44Smeem 
2882e11c3f44Smeem 	(void) strlcpy(addrp->al_name, name, LIFNAMSIZ);
2883e11c3f44Smeem 	addrp->al_flags = flags;
2884e11c3f44Smeem 	addrp->al_addr = *ssp;
2885e11c3f44Smeem 	addrp->al_next = *addrsp;
2886e11c3f44Smeem 	*addrsp = addrp;
2887e11c3f44Smeem 	return (_B_TRUE);
2888e11c3f44Smeem }
2889e11c3f44Smeem 
2890e11c3f44Smeem void
addrlist_free(addrlist_t ** addrsp)2891e11c3f44Smeem addrlist_free(addrlist_t **addrsp)
2892e11c3f44Smeem {
2893e11c3f44Smeem 	addrlist_t *addrp, *next_addrp;
2894e11c3f44Smeem 
2895e11c3f44Smeem 	for (addrp = *addrsp; addrp != NULL; addrp = next_addrp) {
2896e11c3f44Smeem 		next_addrp = addrp->al_next;
2897e11c3f44Smeem 		free(addrp);
2898e11c3f44Smeem 	}
2899e11c3f44Smeem 	*addrsp = NULL;
2900e11c3f44Smeem }
290127438c18SJon Anderson 
290227438c18SJon Anderson /*
290327438c18SJon Anderson  * Send down a T_OPTMGMT_REQ to ip asking for all data in the various
290427438c18SJon Anderson  * tables defined by mib2.h. Pass the table information returned to the
290527438c18SJon Anderson  * supplied function.
290627438c18SJon Anderson  */
290727438c18SJon Anderson static int
mibwalk(void (* proc)(mib_item_t *))290827438c18SJon Anderson mibwalk(void (*proc)(mib_item_t *))
290927438c18SJon Anderson {
291027438c18SJon Anderson 	mib_item_t		*head_item = NULL;
291127438c18SJon Anderson 	mib_item_t		*last_item = NULL;
291227438c18SJon Anderson 	mib_item_t		*tmp;
291327438c18SJon Anderson 	struct strbuf		ctlbuf, databuf;
291427438c18SJon Anderson 	int			flags;
291527438c18SJon Anderson 	int			rval;
291627438c18SJon Anderson 	uintptr_t		buf[512 / sizeof (uintptr_t)];
291727438c18SJon Anderson 	struct T_optmgmt_req	*tor = (struct T_optmgmt_req *)buf;
291827438c18SJon Anderson 	struct T_optmgmt_ack	*toa = (struct T_optmgmt_ack *)buf;
291927438c18SJon Anderson 	struct T_error_ack	*tea = (struct T_error_ack *)buf;
292027438c18SJon Anderson 	struct opthdr		*req, *optp;
292127438c18SJon Anderson 	int			status = -1;
292227438c18SJon Anderson 
292327438c18SJon Anderson 	if (mibfd == -1) {
292427438c18SJon Anderson 		if ((mibfd = open("/dev/ip", O_RDWR)) < 0) {
292527438c18SJon Anderson 			logperror("mibwalk(): ip open");
292627438c18SJon Anderson 			return (status);
292727438c18SJon Anderson 		}
292827438c18SJon Anderson 	}
292927438c18SJon Anderson 
293027438c18SJon Anderson 	tor->PRIM_type = T_SVR4_OPTMGMT_REQ;
293127438c18SJon Anderson 	tor->OPT_offset = sizeof (struct T_optmgmt_req);
293227438c18SJon Anderson 	tor->OPT_length = sizeof (struct opthdr);
293327438c18SJon Anderson 	tor->MGMT_flags = T_CURRENT;
293427438c18SJon Anderson 
293527438c18SJon Anderson 	/*
293627438c18SJon Anderson 	 * Note: we use the special level value below so that IP will return
293727438c18SJon Anderson 	 * us information concerning IRE_MARK_TESTHIDDEN routes.
293827438c18SJon Anderson 	 */
293927438c18SJon Anderson 	req = (struct opthdr *)&tor[1];
2940bd670b35SErik Nordmark 	req->level = EXPER_IP_AND_ALL_IRES;
294127438c18SJon Anderson 	req->name  = 0;
294227438c18SJon Anderson 	req->len   = 0;
294327438c18SJon Anderson 
294427438c18SJon Anderson 	ctlbuf.buf = (char *)&buf;
294527438c18SJon Anderson 	ctlbuf.len = tor->OPT_length + tor->OPT_offset;
294627438c18SJon Anderson 
294727438c18SJon Anderson 	if (putmsg(mibfd, &ctlbuf, NULL, 0) == -1) {
294827438c18SJon Anderson 		logperror("mibwalk(): putmsg(ctl)");
294927438c18SJon Anderson 		return (status);
295027438c18SJon Anderson 	}
295127438c18SJon Anderson 
295227438c18SJon Anderson 	/*
295327438c18SJon Anderson 	 * The response consists of multiple T_OPTMGMT_ACK msgs, 1 msg for
295427438c18SJon Anderson 	 * each table defined in mib2.h.  Each T_OPTMGMT_ACK msg contains
295527438c18SJon Anderson 	 * a control and data part. The control part contains a struct
295627438c18SJon Anderson 	 * T_optmgmt_ack followed by a struct opthdr. The 'opthdr' identifies
295727438c18SJon Anderson 	 * the level, name and length of the data in the data part. The
295827438c18SJon Anderson 	 * data part contains the actual table data. The last message
295927438c18SJon Anderson 	 * is an end-of-data (EOD), consisting of a T_OPTMGMT_ACK and a
296027438c18SJon Anderson 	 * single option with zero optlen.
296127438c18SJon Anderson 	 */
296227438c18SJon Anderson 	for (;;) {
296327438c18SJon Anderson 		errno = flags = 0;
296427438c18SJon Anderson 		ctlbuf.maxlen = sizeof (buf);
296527438c18SJon Anderson 		rval = getmsg(mibfd, &ctlbuf, NULL, &flags);
296627438c18SJon Anderson 		if (rval & MORECTL || rval < 0) {
296727438c18SJon Anderson 			if (errno == EINTR)
296827438c18SJon Anderson 				continue;
296927438c18SJon Anderson 			logerr("mibwalk(): getmsg(ctl) ret: %d err: %d\n",
297027438c18SJon Anderson 			    rval, errno);
297127438c18SJon Anderson 			goto error;
297227438c18SJon Anderson 		}
297327438c18SJon Anderson 		if (ctlbuf.len < sizeof (t_scalar_t)) {
297427438c18SJon Anderson 			logerr("mibwalk(): ctlbuf.len %d\n", ctlbuf.len);
297527438c18SJon Anderson 			goto error;
297627438c18SJon Anderson 		}
297727438c18SJon Anderson 
297827438c18SJon Anderson 		switch (toa->PRIM_type) {
297927438c18SJon Anderson 		case T_ERROR_ACK:
298027438c18SJon Anderson 			if (ctlbuf.len < sizeof (struct T_error_ack)) {
298127438c18SJon Anderson 				logerr("mibwalk(): T_ERROR_ACK ctlbuf "
298227438c18SJon Anderson 				    "too short: %d\n", ctlbuf.len);
298327438c18SJon Anderson 				goto error;
298427438c18SJon Anderson 			}
298527438c18SJon Anderson 			logerr("mibwalk(): T_ERROR_ACK: TLI_err = 0x%lx: %s\n"
298627438c18SJon Anderson 			    " UNIX_err = 0x%lx\n", tea->TLI_error,
298727438c18SJon Anderson 			    t_strerror(tea->TLI_error), tea->UNIX_error);
298827438c18SJon Anderson 			goto error;
298927438c18SJon Anderson 
299027438c18SJon Anderson 		case T_OPTMGMT_ACK:
299127438c18SJon Anderson 			optp = (struct opthdr *)&toa[1];
299227438c18SJon Anderson 			if (ctlbuf.len < (sizeof (struct T_optmgmt_ack) +
299327438c18SJon Anderson 			    sizeof (struct opthdr))) {
299427438c18SJon Anderson 				logerr("mibwalk(): T_OPTMGMT_ACK ctlbuf too "
299527438c18SJon Anderson 				    "short: %d\n", ctlbuf.len);
299627438c18SJon Anderson 				goto error;
299727438c18SJon Anderson 			}
299827438c18SJon Anderson 			if (toa->MGMT_flags != T_SUCCESS) {
299927438c18SJon Anderson 				logerr("mibwalk(): MGMT_flags != T_SUCCESS: "
300027438c18SJon Anderson 				    "0x%lx\n", toa->MGMT_flags);
300127438c18SJon Anderson 				goto error;
300227438c18SJon Anderson 			}
300327438c18SJon Anderson 			break;
300427438c18SJon Anderson 
300527438c18SJon Anderson 		default:
300627438c18SJon Anderson 			goto error;
300727438c18SJon Anderson 		}
300827438c18SJon Anderson 		/* The following assert also implies MGMT_flags == T_SUCCESS */
300927438c18SJon Anderson 		assert(toa->PRIM_type == T_OPTMGMT_ACK);
301027438c18SJon Anderson 
301127438c18SJon Anderson 		/*
301227438c18SJon Anderson 		 * We have reached the end of this T_OPTMGMT_ACK
301327438c18SJon Anderson 		 * message. If this is the last message i.e EOD,
301427438c18SJon Anderson 		 * break, else process the next T_OPTMGMT_ACK msg.
301527438c18SJon Anderson 		 */
301627438c18SJon Anderson 		if (rval == 0) {
301727438c18SJon Anderson 			if (optp->len == 0 && optp->name == 0 &&
301827438c18SJon Anderson 			    optp->level == 0) {
301927438c18SJon Anderson 				/* This is the EOD message. */
302027438c18SJon Anderson 				break;
302127438c18SJon Anderson 			}
302227438c18SJon Anderson 			/* Not EOD but no data to retrieve */
302327438c18SJon Anderson 			continue;
302427438c18SJon Anderson 		}
302527438c18SJon Anderson 
302627438c18SJon Anderson 		/*
302727438c18SJon Anderson 		 * We should only be here if MOREDATA was set.
302827438c18SJon Anderson 		 * Allocate an empty mib_item_t and link into the list
302927438c18SJon Anderson 		 * of MIB items.
303027438c18SJon Anderson 		 */
303127438c18SJon Anderson 		if ((tmp = malloc(sizeof (*tmp))) == NULL) {
303227438c18SJon Anderson 			logperror("mibwalk(): malloc() failed.");
303327438c18SJon Anderson 			goto error;
303427438c18SJon Anderson 		}
303527438c18SJon Anderson 		if (last_item != NULL)
303627438c18SJon Anderson 			last_item->mi_next = tmp;
303727438c18SJon Anderson 		else
303827438c18SJon Anderson 			head_item = tmp;
303927438c18SJon Anderson 		last_item = tmp;
304027438c18SJon Anderson 		last_item->mi_next = NULL;
304127438c18SJon Anderson 		last_item->mi_opthdr = *optp;
304227438c18SJon Anderson 		last_item->mi_valp = malloc(optp->len);
304327438c18SJon Anderson 		if (last_item->mi_valp == NULL) {
304427438c18SJon Anderson 			logperror("mibwalk(): malloc() failed.");
304527438c18SJon Anderson 			goto error;
304627438c18SJon Anderson 		}
304727438c18SJon Anderson 
304827438c18SJon Anderson 		databuf.maxlen = last_item->mi_opthdr.len;
304927438c18SJon Anderson 		databuf.buf = (char *)last_item->mi_valp;
305027438c18SJon Anderson 		databuf.len = 0;
305127438c18SJon Anderson 
305227438c18SJon Anderson 		/* Retrieve the actual MIB data */
305327438c18SJon Anderson 		for (;;) {
305427438c18SJon Anderson 			flags = 0;
305527438c18SJon Anderson 			if ((rval = getmsg(mibfd, NULL, &databuf,
305627438c18SJon Anderson 			    &flags)) != 0) {
305727438c18SJon Anderson 				if (rval < 0 && errno == EINTR)
305827438c18SJon Anderson 					continue;
305927438c18SJon Anderson 				/*
306027438c18SJon Anderson 				 * We shouldn't get MOREDATA here so treat that
306127438c18SJon Anderson 				 * as an error.
306227438c18SJon Anderson 				 */
306327438c18SJon Anderson 				logperror("mibwalk(): getmsg(data)");
306427438c18SJon Anderson 				goto error;
306527438c18SJon Anderson 			}
306627438c18SJon Anderson 			break;
306727438c18SJon Anderson 		}
306827438c18SJon Anderson 	}
306927438c18SJon Anderson 	status = 0;
307027438c18SJon Anderson 	/* Pass the accumulated MIB data to the supplied function pointer */
307127438c18SJon Anderson 	(*proc)(head_item);
307227438c18SJon Anderson error:
307327438c18SJon Anderson 	while (head_item != NULL) {
307427438c18SJon Anderson 		tmp = head_item;
307527438c18SJon Anderson 		head_item = tmp->mi_next;
307627438c18SJon Anderson 		free(tmp->mi_valp);
307727438c18SJon Anderson 		free(tmp);
307827438c18SJon Anderson 	}
307927438c18SJon Anderson 	return (status);
308027438c18SJon Anderson }
308127438c18SJon Anderson 
308227438c18SJon Anderson /*
308327438c18SJon Anderson  * Parse the supplied mib2 information to get the size of routing table
308427438c18SJon Anderson  * entries. This is needed when running in a branded zone where the
308527438c18SJon Anderson  * Solaris application environment and the Solaris kernel may not be the
308627438c18SJon Anderson  * the same release version.
308727438c18SJon Anderson  */
308827438c18SJon Anderson static void
mib_get_constants(mib_item_t * item)308927438c18SJon Anderson mib_get_constants(mib_item_t *item)
309027438c18SJon Anderson {
309127438c18SJon Anderson 	mib2_ip_t		*ipv4;
309227438c18SJon Anderson 	mib2_ipv6IfStatsEntry_t	*ipv6;
309327438c18SJon Anderson 
309427438c18SJon Anderson 	for (; item != NULL; item = item->mi_next) {
309527438c18SJon Anderson 		if (item->mi_opthdr.name != 0)
309627438c18SJon Anderson 			continue;
309727438c18SJon Anderson 		if (item->mi_opthdr.level == MIB2_IP) {
309827438c18SJon Anderson 			ipv4 = (mib2_ip_t *)item->mi_valp;
309927438c18SJon Anderson 			ipRouteEntrySize = ipv4->ipRouteEntrySize;
310027438c18SJon Anderson 		} else if (item->mi_opthdr.level == MIB2_IP6) {
310127438c18SJon Anderson 			ipv6 = (mib2_ipv6IfStatsEntry_t *)item->mi_valp;
310227438c18SJon Anderson 			ipv6RouteEntrySize = ipv6->ipv6RouteEntrySize;
310327438c18SJon Anderson 		}
310427438c18SJon Anderson 	}
310527438c18SJon Anderson }
3106