17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5921e7e07Smeem  * Common Development and Distribution License (the "License").
6921e7e07Smeem  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22921e7e07Smeem  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate #include "mpd_defs.h"
297c478bd9Sstevel@tonic-gate #include "mpd_tables.h"
307c478bd9Sstevel@tonic-gate 
317c478bd9Sstevel@tonic-gate int debug = 0;				/* Debug flag */
327c478bd9Sstevel@tonic-gate static int pollfd_num = 0;		/* Num. of poll descriptors */
337c478bd9Sstevel@tonic-gate static struct pollfd *pollfds = NULL;	/* Array of poll descriptors */
347c478bd9Sstevel@tonic-gate 
357c478bd9Sstevel@tonic-gate 					/* All times below in ms */
367c478bd9Sstevel@tonic-gate int	user_failure_detection_time;	/* user specified failure detection */
377c478bd9Sstevel@tonic-gate 					/* time (fdt) */
387c478bd9Sstevel@tonic-gate int	user_probe_interval;		/* derived from user specified fdt */
397c478bd9Sstevel@tonic-gate 
407c478bd9Sstevel@tonic-gate static int	rtsock_v4;		/* AF_INET routing socket */
417c478bd9Sstevel@tonic-gate static int	rtsock_v6;		/* AF_INET6 routing socket */
427c478bd9Sstevel@tonic-gate int	ifsock_v4 = -1;			/* IPv4 socket for ioctls  */
437c478bd9Sstevel@tonic-gate int	ifsock_v6 = -1;			/* IPv6 socket for ioctls  */
447c478bd9Sstevel@tonic-gate static int	lsock_v4;		/* Listen socket to detect mpathd */
457c478bd9Sstevel@tonic-gate static int	lsock_v6;		/* Listen socket to detect mpathd */
467c478bd9Sstevel@tonic-gate static int	mibfd = -1;		/* fd to get mib info */
477c478bd9Sstevel@tonic-gate static boolean_t force_mcast = _B_FALSE; /* Only for test purposes */
487c478bd9Sstevel@tonic-gate 
497c478bd9Sstevel@tonic-gate boolean_t	full_scan_required = _B_FALSE;
507c478bd9Sstevel@tonic-gate static uint_t	last_initifs_time;	/* Time when initifs was last run */
517c478bd9Sstevel@tonic-gate static	char **argv0;			/* Saved for re-exec on SIGHUP */
527c478bd9Sstevel@tonic-gate boolean_t handle_link_notifications = _B_TRUE;
537c478bd9Sstevel@tonic-gate 
547c478bd9Sstevel@tonic-gate static void	initlog(void);
557c478bd9Sstevel@tonic-gate static void	run_timeouts(void);
567c478bd9Sstevel@tonic-gate static void	initifs(void);
577c478bd9Sstevel@tonic-gate static void	check_if_removed(struct phyint_instance *pii);
587c478bd9Sstevel@tonic-gate static void	select_test_ifs(void);
597c478bd9Sstevel@tonic-gate static void	ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len);
607c478bd9Sstevel@tonic-gate static void	ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len);
617c478bd9Sstevel@tonic-gate static void	router_add_v4(mib2_ipRouteEntry_t *rp1,
627c478bd9Sstevel@tonic-gate     struct in_addr nexthop_v4);
637c478bd9Sstevel@tonic-gate static void	router_add_v6(mib2_ipv6RouteEntry_t *rp1,
647c478bd9Sstevel@tonic-gate     struct in6_addr nexthop_v6);
657c478bd9Sstevel@tonic-gate static void	router_add_common(int af, char *ifname,
667c478bd9Sstevel@tonic-gate     struct in6_addr nexthop);
677c478bd9Sstevel@tonic-gate static void	init_router_targets();
687c478bd9Sstevel@tonic-gate static void	cleanup(void);
697c478bd9Sstevel@tonic-gate static int	setup_listener(int af);
707c478bd9Sstevel@tonic-gate static void	check_config(void);
717c478bd9Sstevel@tonic-gate static void	check_addr_unique(int af, char *name);
727c478bd9Sstevel@tonic-gate static void	init_host_targets(void);
737c478bd9Sstevel@tonic-gate static void	dup_host_targets(struct phyint_instance *desired_pii);
747c478bd9Sstevel@tonic-gate static void	loopback_cmd(int sock, int family);
757c478bd9Sstevel@tonic-gate static int	poll_remove(int fd);
767c478bd9Sstevel@tonic-gate static boolean_t daemonize(void);
777c478bd9Sstevel@tonic-gate static int	closefunc(void *, int);
787c478bd9Sstevel@tonic-gate static unsigned int process_cmd(int newfd, union mi_commands *mpi);
797c478bd9Sstevel@tonic-gate static unsigned int process_query(int fd, mi_query_t *miq);
807c478bd9Sstevel@tonic-gate static unsigned int send_groupinfo(int fd, ipmp_groupinfo_t *grinfop);
817c478bd9Sstevel@tonic-gate static unsigned int send_grouplist(int fd, ipmp_grouplist_t *grlistp);
827c478bd9Sstevel@tonic-gate static unsigned int send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop);
837c478bd9Sstevel@tonic-gate static unsigned int send_result(int fd, unsigned int error, int syserror);
847c478bd9Sstevel@tonic-gate 
85*87e66ffcSrk struct local_addr *laddr_list = NULL;
86*87e66ffcSrk 
877c478bd9Sstevel@tonic-gate /*
887c478bd9Sstevel@tonic-gate  * Return the current time in milliseconds (from an arbitrary reference)
897c478bd9Sstevel@tonic-gate  * truncated to fit into an int. Truncation is ok since we are interested
907c478bd9Sstevel@tonic-gate  * only in differences and not the absolute values.
917c478bd9Sstevel@tonic-gate  */
927c478bd9Sstevel@tonic-gate uint_t
937c478bd9Sstevel@tonic-gate getcurrenttime(void)
947c478bd9Sstevel@tonic-gate {
957c478bd9Sstevel@tonic-gate 	uint_t	cur_time;	/* In ms */
967c478bd9Sstevel@tonic-gate 
977c478bd9Sstevel@tonic-gate 	/*
987c478bd9Sstevel@tonic-gate 	 * Use of a non-user-adjustable source of time is
997c478bd9Sstevel@tonic-gate 	 * required. However millisecond precision is sufficient.
1007c478bd9Sstevel@tonic-gate 	 * divide by 10^6
1017c478bd9Sstevel@tonic-gate 	 */
1027c478bd9Sstevel@tonic-gate 	cur_time = (uint_t)(gethrtime() / 1000000LL);
1037c478bd9Sstevel@tonic-gate 	return (cur_time);
1047c478bd9Sstevel@tonic-gate }
1057c478bd9Sstevel@tonic-gate 
1067c478bd9Sstevel@tonic-gate /*
1077c478bd9Sstevel@tonic-gate  * Add fd to the set being polled. Returns 0 if ok; -1 if failed.
1087c478bd9Sstevel@tonic-gate  */
1097c478bd9Sstevel@tonic-gate int
1107c478bd9Sstevel@tonic-gate poll_add(int fd)
1117c478bd9Sstevel@tonic-gate {
1127c478bd9Sstevel@tonic-gate 	int i;
1137c478bd9Sstevel@tonic-gate 	int new_num;
1147c478bd9Sstevel@tonic-gate 	struct pollfd *newfds;
1157c478bd9Sstevel@tonic-gate retry:
1167c478bd9Sstevel@tonic-gate 	/* Check if already present */
1177c478bd9Sstevel@tonic-gate 	for (i = 0; i < pollfd_num; i++) {
1187c478bd9Sstevel@tonic-gate 		if (pollfds[i].fd == fd)
1197c478bd9Sstevel@tonic-gate 			return (0);
1207c478bd9Sstevel@tonic-gate 	}
1217c478bd9Sstevel@tonic-gate 	/* Check for empty spot already present */
1227c478bd9Sstevel@tonic-gate 	for (i = 0; i < pollfd_num; i++) {
1237c478bd9Sstevel@tonic-gate 		if (pollfds[i].fd == -1) {
1247c478bd9Sstevel@tonic-gate 			pollfds[i].fd = fd;
1257c478bd9Sstevel@tonic-gate 			return (0);
1267c478bd9Sstevel@tonic-gate 		}
1277c478bd9Sstevel@tonic-gate 	}
1287c478bd9Sstevel@tonic-gate 
1297c478bd9Sstevel@tonic-gate 	/* Allocate space for 32 more fds and initialize to -1 */
1307c478bd9Sstevel@tonic-gate 	new_num = pollfd_num + 32;
1317c478bd9Sstevel@tonic-gate 	newfds = realloc(pollfds, new_num * sizeof (struct pollfd));
1327c478bd9Sstevel@tonic-gate 	if (newfds == NULL) {
1337c478bd9Sstevel@tonic-gate 		logperror("poll_add: realloc");
1347c478bd9Sstevel@tonic-gate 		return (-1);
1357c478bd9Sstevel@tonic-gate 	}
1367c478bd9Sstevel@tonic-gate 	for (i = pollfd_num; i < new_num; i++) {
1377c478bd9Sstevel@tonic-gate 		newfds[i].fd = -1;
1387c478bd9Sstevel@tonic-gate 		newfds[i].events = POLLIN;
1397c478bd9Sstevel@tonic-gate 	}
1407c478bd9Sstevel@tonic-gate 	pollfd_num = new_num;
1417c478bd9Sstevel@tonic-gate 	pollfds = newfds;
1427c478bd9Sstevel@tonic-gate 	goto retry;
1437c478bd9Sstevel@tonic-gate }
1447c478bd9Sstevel@tonic-gate 
1457c478bd9Sstevel@tonic-gate /*
1467c478bd9Sstevel@tonic-gate  * Remove fd from the set being polled. Returns 0 if ok; -1 if failed.
1477c478bd9Sstevel@tonic-gate  */
1487c478bd9Sstevel@tonic-gate static int
1497c478bd9Sstevel@tonic-gate poll_remove(int fd)
1507c478bd9Sstevel@tonic-gate {
1517c478bd9Sstevel@tonic-gate 	int i;
1527c478bd9Sstevel@tonic-gate 
1537c478bd9Sstevel@tonic-gate 	/* Check if already present */
1547c478bd9Sstevel@tonic-gate 	for (i = 0; i < pollfd_num; i++) {
1557c478bd9Sstevel@tonic-gate 		if (pollfds[i].fd == fd) {
1567c478bd9Sstevel@tonic-gate 			pollfds[i].fd = -1;
1577c478bd9Sstevel@tonic-gate 			return (0);
1587c478bd9Sstevel@tonic-gate 		}
1597c478bd9Sstevel@tonic-gate 	}
1607c478bd9Sstevel@tonic-gate 	return (-1);
1617c478bd9Sstevel@tonic-gate }
1627c478bd9Sstevel@tonic-gate 
1637c478bd9Sstevel@tonic-gate /*
1647c478bd9Sstevel@tonic-gate  * Extract information about the phyint instance. If the phyint instance still
1657c478bd9Sstevel@tonic-gate  * exists in the kernel then set pii_in_use, else clear it. check_if_removed()
1667c478bd9Sstevel@tonic-gate  * will use it to detect phyint instances that don't exist any longer and
1677c478bd9Sstevel@tonic-gate  * remove them, from our database of phyint instances.
1687c478bd9Sstevel@tonic-gate  * Return value:
1697c478bd9Sstevel@tonic-gate  *	returns true if the phyint instance exists in the kernel,
1707c478bd9Sstevel@tonic-gate  *	returns false otherwise
1717c478bd9Sstevel@tonic-gate  */
1727c478bd9Sstevel@tonic-gate static boolean_t
1737c478bd9Sstevel@tonic-gate pii_process(int af, char *name, struct phyint_instance **pii_p)
1747c478bd9Sstevel@tonic-gate {
1757c478bd9Sstevel@tonic-gate 	int err;
1767c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
1777c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii_other;
1787c478bd9Sstevel@tonic-gate 
1797c478bd9Sstevel@tonic-gate 	if (debug & D_PHYINT)
1807c478bd9Sstevel@tonic-gate 		logdebug("pii_process(%s %s)\n", AF_STR(af), name);
1817c478bd9Sstevel@tonic-gate 
1827c478bd9Sstevel@tonic-gate 	pii = phyint_inst_lookup(af, name);
1837c478bd9Sstevel@tonic-gate 	if (pii == NULL) {
1847c478bd9Sstevel@tonic-gate 		/*
1857c478bd9Sstevel@tonic-gate 		 * Phyint instance does not exist in our tables,
1867c478bd9Sstevel@tonic-gate 		 * create new phyint instance
1877c478bd9Sstevel@tonic-gate 		 */
1887c478bd9Sstevel@tonic-gate 		pii = phyint_inst_init_from_k(af, name);
1897c478bd9Sstevel@tonic-gate 	} else {
1907c478bd9Sstevel@tonic-gate 		/* Phyint exists in our tables */
1917c478bd9Sstevel@tonic-gate 		err = phyint_inst_update_from_k(pii);
1927c478bd9Sstevel@tonic-gate 
1937c478bd9Sstevel@tonic-gate 		switch (err) {
1947c478bd9Sstevel@tonic-gate 		case PI_IOCTL_ERROR:
1957c478bd9Sstevel@tonic-gate 			/* Some ioctl error. don't change anything */
1967c478bd9Sstevel@tonic-gate 			pii->pii_in_use = 1;
1977c478bd9Sstevel@tonic-gate 			break;
1987c478bd9Sstevel@tonic-gate 
1997c478bd9Sstevel@tonic-gate 		case PI_GROUP_CHANGED:
2007c478bd9Sstevel@tonic-gate 			/*
2017c478bd9Sstevel@tonic-gate 			 * The phyint has changed group.
2027c478bd9Sstevel@tonic-gate 			 */
2037c478bd9Sstevel@tonic-gate 			restore_phyint(pii->pii_phyint);
2047c478bd9Sstevel@tonic-gate 			/* FALLTHRU */
2057c478bd9Sstevel@tonic-gate 
2067c478bd9Sstevel@tonic-gate 		case PI_IFINDEX_CHANGED:
2077c478bd9Sstevel@tonic-gate 			/*
2087c478bd9Sstevel@tonic-gate 			 * Interface index has changed. Delete and
2097c478bd9Sstevel@tonic-gate 			 * recreate the phyint as it is quite likely
2107c478bd9Sstevel@tonic-gate 			 * the interface has been unplumbed and replumbed.
2117c478bd9Sstevel@tonic-gate 			 */
2127c478bd9Sstevel@tonic-gate 			pii_other = phyint_inst_other(pii);
2137c478bd9Sstevel@tonic-gate 			if (pii_other != NULL)
2147c478bd9Sstevel@tonic-gate 				phyint_inst_delete(pii_other);
2157c478bd9Sstevel@tonic-gate 			phyint_inst_delete(pii);
2167c478bd9Sstevel@tonic-gate 			pii = phyint_inst_init_from_k(af, name);
2177c478bd9Sstevel@tonic-gate 			break;
2187c478bd9Sstevel@tonic-gate 
2197c478bd9Sstevel@tonic-gate 		case PI_DELETED:
2207c478bd9Sstevel@tonic-gate 			/* Phyint instance has disappeared from kernel */
2217c478bd9Sstevel@tonic-gate 			pii->pii_in_use = 0;
2227c478bd9Sstevel@tonic-gate 			break;
2237c478bd9Sstevel@tonic-gate 
2247c478bd9Sstevel@tonic-gate 		case PI_OK:
2257c478bd9Sstevel@tonic-gate 			/* Phyint instance exists and is fine */
2267c478bd9Sstevel@tonic-gate 			pii->pii_in_use = 1;
2277c478bd9Sstevel@tonic-gate 			break;
2287c478bd9Sstevel@tonic-gate 
2297c478bd9Sstevel@tonic-gate 		default:
2307c478bd9Sstevel@tonic-gate 			/* Unknown status */
2317c478bd9Sstevel@tonic-gate 			logerr("pii_process: Unknown status %d\n", err);
2327c478bd9Sstevel@tonic-gate 			break;
2337c478bd9Sstevel@tonic-gate 		}
2347c478bd9Sstevel@tonic-gate 	}
2357c478bd9Sstevel@tonic-gate 
2367c478bd9Sstevel@tonic-gate 	*pii_p = pii;
2377c478bd9Sstevel@tonic-gate 	if (pii != NULL)
2387c478bd9Sstevel@tonic-gate 		return (pii->pii_in_use ? _B_TRUE : _B_FALSE);
2397c478bd9Sstevel@tonic-gate 	else
2407c478bd9Sstevel@tonic-gate 		return (_B_FALSE);
2417c478bd9Sstevel@tonic-gate }
2427c478bd9Sstevel@tonic-gate 
2437c478bd9Sstevel@tonic-gate /*
2447c478bd9Sstevel@tonic-gate  * This phyint is leaving the group. Try to restore the phyint to its
2457c478bd9Sstevel@tonic-gate  * initial state. Return the addresses that belong to other group members,
2467c478bd9Sstevel@tonic-gate  * to the group, and take back any addresses owned by this phyint
2477c478bd9Sstevel@tonic-gate  */
2487c478bd9Sstevel@tonic-gate void
2497c478bd9Sstevel@tonic-gate restore_phyint(struct phyint *pi)
2507c478bd9Sstevel@tonic-gate {
2517c478bd9Sstevel@tonic-gate 	if (pi->pi_group == phyint_anongroup)
2527c478bd9Sstevel@tonic-gate 		return;
2537c478bd9Sstevel@tonic-gate 
2547c478bd9Sstevel@tonic-gate 	/*
2557c478bd9Sstevel@tonic-gate 	 * Move everthing to some other member in the group.
2567c478bd9Sstevel@tonic-gate 	 * The phyint has changed group in the kernel. But we
2577c478bd9Sstevel@tonic-gate 	 * have yet to do it in our tables.
2587c478bd9Sstevel@tonic-gate 	 */
2597c478bd9Sstevel@tonic-gate 	if (!pi->pi_empty)
2607c478bd9Sstevel@tonic-gate 		(void) try_failover(pi, FAILOVER_TO_ANY);
2617c478bd9Sstevel@tonic-gate 	/*
2627c478bd9Sstevel@tonic-gate 	 * Move all addresses owned by 'pi' back to pi, from each
2637c478bd9Sstevel@tonic-gate 	 * of the other members of the group
2647c478bd9Sstevel@tonic-gate 	 */
2657c478bd9Sstevel@tonic-gate 	(void) try_failback(pi, _B_FALSE);
2667c478bd9Sstevel@tonic-gate }
2677c478bd9Sstevel@tonic-gate 
2687c478bd9Sstevel@tonic-gate /*
2697c478bd9Sstevel@tonic-gate  * Scan all interfaces to detect changes as well as new and deleted interfaces
2707c478bd9Sstevel@tonic-gate  */
2717c478bd9Sstevel@tonic-gate static void
2727c478bd9Sstevel@tonic-gate initifs()
2737c478bd9Sstevel@tonic-gate {
2747c478bd9Sstevel@tonic-gate 	int	n;
2757c478bd9Sstevel@tonic-gate 	int	af;
2767c478bd9Sstevel@tonic-gate 	char	*cp;
2777c478bd9Sstevel@tonic-gate 	char	*buf;
2787c478bd9Sstevel@tonic-gate 	int	numifs;
2797c478bd9Sstevel@tonic-gate 	struct lifnum	lifn;
2807c478bd9Sstevel@tonic-gate 	struct lifconf	lifc;
2817c478bd9Sstevel@tonic-gate 	struct lifreq	*lifr;
2827c478bd9Sstevel@tonic-gate 	struct logint	*li;
2837c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
2847c478bd9Sstevel@tonic-gate 	struct phyint_instance *next_pii;
2857c478bd9Sstevel@tonic-gate 	char	pi_name[LIFNAMSIZ + 1];
2867c478bd9Sstevel@tonic-gate 	boolean_t exists;
2877c478bd9Sstevel@tonic-gate 	struct phyint	*pi;
288*87e66ffcSrk 	struct local_addr *next;
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate 	if (debug & D_PHYINT)
2917c478bd9Sstevel@tonic-gate 		logdebug("initifs: Scanning interfaces\n");
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate 	last_initifs_time = getcurrenttime();
2947c478bd9Sstevel@tonic-gate 
295*87e66ffcSrk 	/*
296*87e66ffcSrk 	 * Free the laddr_list before collecting the local addresses.
297*87e66ffcSrk 	 */
298*87e66ffcSrk 	while (laddr_list != NULL) {
299*87e66ffcSrk 		next = laddr_list->next;
300*87e66ffcSrk 		free(laddr_list);
301*87e66ffcSrk 		laddr_list = next;
302*87e66ffcSrk 	}
303*87e66ffcSrk 
3047c478bd9Sstevel@tonic-gate 	/*
3057c478bd9Sstevel@tonic-gate 	 * Mark the interfaces so that we can find phyints and logints
3067c478bd9Sstevel@tonic-gate 	 * which have disappeared from the kernel. pii_process() and
3077c478bd9Sstevel@tonic-gate 	 * logint_init_from_k() will set {pii,li}_in_use when they find
3087c478bd9Sstevel@tonic-gate 	 * the interface in the kernel. Also, clear dupaddr bit on probe
3097c478bd9Sstevel@tonic-gate 	 * logint. check_addr_unique() will set the dupaddr bit on the
3107c478bd9Sstevel@tonic-gate 	 * probe logint, if the testaddress is not unique.
3117c478bd9Sstevel@tonic-gate 	 */
3127c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
3137c478bd9Sstevel@tonic-gate 		pii->pii_in_use = 0;
3147c478bd9Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = li->li_next) {
3157c478bd9Sstevel@tonic-gate 			li->li_in_use = 0;
3167c478bd9Sstevel@tonic-gate 			if (pii->pii_probe_logint == li)
3177c478bd9Sstevel@tonic-gate 				li->li_dupaddr = 0;
3187c478bd9Sstevel@tonic-gate 		}
3197c478bd9Sstevel@tonic-gate 	}
3207c478bd9Sstevel@tonic-gate 
3217c478bd9Sstevel@tonic-gate 	lifn.lifn_family = AF_UNSPEC;
322*87e66ffcSrk 	lifn.lifn_flags = LIFC_ALLZONES;
3237c478bd9Sstevel@tonic-gate 	if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) {
3247c478bd9Sstevel@tonic-gate 		logperror("initifs: ioctl (get interface numbers)");
3257c478bd9Sstevel@tonic-gate 		return;
3267c478bd9Sstevel@tonic-gate 	}
3277c478bd9Sstevel@tonic-gate 	numifs = lifn.lifn_count;
3287c478bd9Sstevel@tonic-gate 
3297c478bd9Sstevel@tonic-gate 	buf = (char *)calloc(numifs, sizeof (struct lifreq));
3307c478bd9Sstevel@tonic-gate 	if (buf == NULL) {
3317c478bd9Sstevel@tonic-gate 		logperror("initifs: calloc");
3327c478bd9Sstevel@tonic-gate 		return;
3337c478bd9Sstevel@tonic-gate 	}
3347c478bd9Sstevel@tonic-gate 
3357c478bd9Sstevel@tonic-gate 	lifc.lifc_family = AF_UNSPEC;
336*87e66ffcSrk 	lifc.lifc_flags = LIFC_ALLZONES;
3377c478bd9Sstevel@tonic-gate 	lifc.lifc_len = numifs * sizeof (struct lifreq);
3387c478bd9Sstevel@tonic-gate 	lifc.lifc_buf = buf;
3397c478bd9Sstevel@tonic-gate 
3407c478bd9Sstevel@tonic-gate 	if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) {
3417c478bd9Sstevel@tonic-gate 		/*
3427c478bd9Sstevel@tonic-gate 		 * EINVAL is commonly encountered, when things change
3437c478bd9Sstevel@tonic-gate 		 * underneath us rapidly, (eg. at boot, when new interfaces
3447c478bd9Sstevel@tonic-gate 		 * are plumbed successively) and the kernel finds the buffer
3457c478bd9Sstevel@tonic-gate 		 * size we passed as too small. We will retry again
3467c478bd9Sstevel@tonic-gate 		 * when we see the next routing socket msg, or at worst after
3477c478bd9Sstevel@tonic-gate 		 * IF_SCAN_INTERVAL ms.
3487c478bd9Sstevel@tonic-gate 		 */
3497c478bd9Sstevel@tonic-gate 		if (errno != EINVAL) {
3507c478bd9Sstevel@tonic-gate 			logperror("initifs: ioctl"
3517c478bd9Sstevel@tonic-gate 			    " (get interface configuration)");
3527c478bd9Sstevel@tonic-gate 		}
3537c478bd9Sstevel@tonic-gate 		free(buf);
3547c478bd9Sstevel@tonic-gate 		return;
3557c478bd9Sstevel@tonic-gate 	}
3567c478bd9Sstevel@tonic-gate 
3577c478bd9Sstevel@tonic-gate 	lifr = (struct lifreq *)lifc.lifc_req;
3587c478bd9Sstevel@tonic-gate 
3597c478bd9Sstevel@tonic-gate 	/*
3607c478bd9Sstevel@tonic-gate 	 * For each lifreq returned by SIOGGLIFCONF, call pii_process()
3617c478bd9Sstevel@tonic-gate 	 * and get the state of the corresponding phyint_instance. If it is
3627c478bd9Sstevel@tonic-gate 	 * successful, then call logint_init_from_k() to get the state of the
3637c478bd9Sstevel@tonic-gate 	 * logint.
3647c478bd9Sstevel@tonic-gate 	 */
3657c478bd9Sstevel@tonic-gate 	for (n = lifc.lifc_len / sizeof (struct lifreq); n > 0; n--, lifr++) {
366*87e66ffcSrk 		int	sockfd;
367*87e66ffcSrk 		struct local_addr	*taddr;
368*87e66ffcSrk 		struct sockaddr_in	*sin;
369*87e66ffcSrk 		struct sockaddr_in6	*sin6;
370*87e66ffcSrk 		struct lifreq	lifreq;
371*87e66ffcSrk 
3727c478bd9Sstevel@tonic-gate 		af = lifr->lifr_addr.ss_family;
3737c478bd9Sstevel@tonic-gate 
374*87e66ffcSrk 		/*
375*87e66ffcSrk 		 * Collect all local addresses.
376*87e66ffcSrk 		 */
377*87e66ffcSrk 		sockfd = (af == AF_INET) ? ifsock_v4 : ifsock_v6;
378*87e66ffcSrk 		(void) memset(&lifreq, 0, sizeof (lifreq));
379*87e66ffcSrk 		(void) strlcpy(lifreq.lifr_name, lifr->lifr_name,
380*87e66ffcSrk 		    sizeof (lifreq.lifr_name));
381*87e66ffcSrk 
382*87e66ffcSrk 		if (ioctl(sockfd, SIOCGLIFFLAGS, &lifreq) == -1) {
383*87e66ffcSrk 			if (errno != ENXIO)
384*87e66ffcSrk 				logperror("initifs: ioctl (SIOCGLIFFLAGS)");
385*87e66ffcSrk 			continue;
386*87e66ffcSrk 		}
387*87e66ffcSrk 
388*87e66ffcSrk 		/*
389*87e66ffcSrk 		 * Add the interface address to laddr_list.
390*87e66ffcSrk 		 * Another node might have the same IP address which is up.
391*87e66ffcSrk 		 * In that case, it is appropriate  to use the address as a
392*87e66ffcSrk 		 * target, even though it is also configured (but not up) on
393*87e66ffcSrk 		 * the local system.
394*87e66ffcSrk 		 * Hence,the interface address is not added to laddr_list
395*87e66ffcSrk 		 * unless it is IFF_UP.
396*87e66ffcSrk 		 */
397*87e66ffcSrk 		if (lifreq.lifr_flags & IFF_UP) {
398*87e66ffcSrk 			taddr = malloc(sizeof (struct local_addr));
399*87e66ffcSrk 			if (taddr == NULL) {
400*87e66ffcSrk 				logperror("initifs: malloc");
401*87e66ffcSrk 				continue;
402*87e66ffcSrk 			}
403*87e66ffcSrk 			if (af == AF_INET) {
404*87e66ffcSrk 				sin = (struct sockaddr_in *)&lifr->lifr_addr;
405*87e66ffcSrk 				IN6_INADDR_TO_V4MAPPED(&sin->sin_addr,
406*87e66ffcSrk 				    &taddr->addr);
407*87e66ffcSrk 			} else {
408*87e66ffcSrk 				sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr;
409*87e66ffcSrk 				taddr->addr = sin6->sin6_addr;
410*87e66ffcSrk 			}
411*87e66ffcSrk 			taddr->next = laddr_list;
412*87e66ffcSrk 			laddr_list = taddr;
413*87e66ffcSrk 		}
414*87e66ffcSrk 
4157c478bd9Sstevel@tonic-gate 		/*
4167c478bd9Sstevel@tonic-gate 		 * Need to pass a phyint name to pii_process. Insert the
4177c478bd9Sstevel@tonic-gate 		 * null where the ':' IF_SEPARATOR is found in the logical
4187c478bd9Sstevel@tonic-gate 		 * name.
4197c478bd9Sstevel@tonic-gate 		 */
420*87e66ffcSrk 		(void) strlcpy(pi_name, lifr->lifr_name, sizeof (pi_name));
4217c478bd9Sstevel@tonic-gate 		if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL)
4227c478bd9Sstevel@tonic-gate 			*cp = '\0';
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate 		exists = pii_process(af, pi_name, &pii);
4257c478bd9Sstevel@tonic-gate 		if (exists) {
4267c478bd9Sstevel@tonic-gate 			/* The phyint is fine. So process the logint */
4277c478bd9Sstevel@tonic-gate 			logint_init_from_k(pii, lifr->lifr_name);
4287c478bd9Sstevel@tonic-gate 		}
4297c478bd9Sstevel@tonic-gate 		check_addr_unique(af, lifr->lifr_name);
4307c478bd9Sstevel@tonic-gate 	}
4317c478bd9Sstevel@tonic-gate 
4327c478bd9Sstevel@tonic-gate 	free(buf);
4337c478bd9Sstevel@tonic-gate 
4347c478bd9Sstevel@tonic-gate 	/*
4357c478bd9Sstevel@tonic-gate 	 * If the test address is now unique, and if it was not unique
4367c478bd9Sstevel@tonic-gate 	 * previously,	clear the li_dupaddrmsg_printed flag and log a
4377c478bd9Sstevel@tonic-gate 	 * recovery message
4387c478bd9Sstevel@tonic-gate 	 */
4397c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
4407c478bd9Sstevel@tonic-gate 		struct logint *li;
4417c478bd9Sstevel@tonic-gate 		char abuf[INET6_ADDRSTRLEN];
4427c478bd9Sstevel@tonic-gate 
4437c478bd9Sstevel@tonic-gate 		li = pii->pii_probe_logint;
4447c478bd9Sstevel@tonic-gate 		if ((li != NULL) && !li->li_dupaddr &&
4457c478bd9Sstevel@tonic-gate 		    li->li_dupaddrmsg_printed) {
4467c478bd9Sstevel@tonic-gate 			logerr("Test address %s is unique; enabling probe-"
4477c478bd9Sstevel@tonic-gate 			    "based failure detection\n",
4487c478bd9Sstevel@tonic-gate 			    pr_addr(pii->pii_af, li->li_addr, abuf,
4497c478bd9Sstevel@tonic-gate 				sizeof (abuf)));
4507c478bd9Sstevel@tonic-gate 			li->li_dupaddrmsg_printed = 0;
4517c478bd9Sstevel@tonic-gate 		}
4527c478bd9Sstevel@tonic-gate 	}
4537c478bd9Sstevel@tonic-gate 
4547c478bd9Sstevel@tonic-gate 	/*
4557c478bd9Sstevel@tonic-gate 	 * Scan for phyints and logints that have disappeared from the
4567c478bd9Sstevel@tonic-gate 	 * kernel, and delete them.
4577c478bd9Sstevel@tonic-gate 	 */
4587c478bd9Sstevel@tonic-gate 	pii = phyint_instances;
4597c478bd9Sstevel@tonic-gate 
4607c478bd9Sstevel@tonic-gate 	while (pii != NULL) {
4617c478bd9Sstevel@tonic-gate 		next_pii = pii->pii_next;
4627c478bd9Sstevel@tonic-gate 		check_if_removed(pii);
4637c478bd9Sstevel@tonic-gate 		pii = next_pii;
4647c478bd9Sstevel@tonic-gate 	}
4657c478bd9Sstevel@tonic-gate 
4667c478bd9Sstevel@tonic-gate 	/*
4677c478bd9Sstevel@tonic-gate 	 * Select a test address for sending probes on each phyint instance
4687c478bd9Sstevel@tonic-gate 	 */
4697c478bd9Sstevel@tonic-gate 	select_test_ifs();
4707c478bd9Sstevel@tonic-gate 
4717c478bd9Sstevel@tonic-gate 	/*
4727c478bd9Sstevel@tonic-gate 	 * Handle link up/down notifications from the NICs.
4737c478bd9Sstevel@tonic-gate 	 */
4747c478bd9Sstevel@tonic-gate 	process_link_state_changes();
4757c478bd9Sstevel@tonic-gate 
4767c478bd9Sstevel@tonic-gate 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
4777c478bd9Sstevel@tonic-gate 		/*
4787c478bd9Sstevel@tonic-gate 		 * If this is a case of group failure, we don't have much
4797c478bd9Sstevel@tonic-gate 		 * to do until the group recovers again.
4807c478bd9Sstevel@tonic-gate 		 */
4817c478bd9Sstevel@tonic-gate 		if (GROUP_FAILED(pi->pi_group))
4827c478bd9Sstevel@tonic-gate 			continue;
4837c478bd9Sstevel@tonic-gate 
4847c478bd9Sstevel@tonic-gate 		/*
4857c478bd9Sstevel@tonic-gate 		 * Try/Retry any pending failovers / failbacks, that did not
4867c478bd9Sstevel@tonic-gate 		 * not complete, or that could not be initiated previously.
4877c478bd9Sstevel@tonic-gate 		 * This implements the 3 invariants described in the big block
4887c478bd9Sstevel@tonic-gate 		 * comment at the beginning of probe.c
4897c478bd9Sstevel@tonic-gate 		 */
4907c478bd9Sstevel@tonic-gate 		if (pi->pi_flags & IFF_INACTIVE) {
49149df4566Sethindra 			if (!pi->pi_empty && (pi->pi_flags & IFF_STANDBY))
4927c478bd9Sstevel@tonic-gate 				(void) try_failover(pi, FAILOVER_TO_NONSTANDBY);
4937c478bd9Sstevel@tonic-gate 		} else {
4947c478bd9Sstevel@tonic-gate 			struct phyint_instance *pii;
4957c478bd9Sstevel@tonic-gate 
4967c478bd9Sstevel@tonic-gate 			pii = pi->pi_v4;
4977c478bd9Sstevel@tonic-gate 			if (LINK_UP(pi) && !PROBE_CAPABLE(pii))
4987c478bd9Sstevel@tonic-gate 				pii = pi->pi_v6;
4997c478bd9Sstevel@tonic-gate 			if (LINK_UP(pi) && !PROBE_CAPABLE(pii))
5007c478bd9Sstevel@tonic-gate 				continue;
5017c478bd9Sstevel@tonic-gate 			/*
5027c478bd9Sstevel@tonic-gate 			 * It is possible that the phyint has started
5037c478bd9Sstevel@tonic-gate 			 * receiving packets, after it has been marked
5047c478bd9Sstevel@tonic-gate 			 * PI_FAILED. Don't initiate failover, if the
5057c478bd9Sstevel@tonic-gate 			 * phyint has started recovering. failure_state()
5067c478bd9Sstevel@tonic-gate 			 * captures this check. A similar logic is used
5077c478bd9Sstevel@tonic-gate 			 * for failback/repair case.
5087c478bd9Sstevel@tonic-gate 			 */
5097c478bd9Sstevel@tonic-gate 			if (pi->pi_state == PI_FAILED && !pi->pi_empty &&
5107c478bd9Sstevel@tonic-gate 			    (failure_state(pii) == PHYINT_FAILURE)) {
5117c478bd9Sstevel@tonic-gate 				(void) try_failover(pi, FAILOVER_NORMAL);
5127c478bd9Sstevel@tonic-gate 			} else if (pi->pi_state == PI_RUNNING && !pi->pi_full) {
5137c478bd9Sstevel@tonic-gate 				if (try_failback(pi, _B_FALSE) !=
5147c478bd9Sstevel@tonic-gate 				    IPMP_FAILURE) {
5157c478bd9Sstevel@tonic-gate 					(void) change_lif_flags(pi, IFF_FAILED,
5167c478bd9Sstevel@tonic-gate 					    _B_FALSE);
5177c478bd9Sstevel@tonic-gate 					/* Per state diagram */
5187c478bd9Sstevel@tonic-gate 					pi->pi_empty = 0;
5197c478bd9Sstevel@tonic-gate 				}
5207c478bd9Sstevel@tonic-gate 			}
5217c478bd9Sstevel@tonic-gate 		}
5227c478bd9Sstevel@tonic-gate 	}
5237c478bd9Sstevel@tonic-gate }
5247c478bd9Sstevel@tonic-gate 
5257c478bd9Sstevel@tonic-gate /*
5267c478bd9Sstevel@tonic-gate  * Check that test/probe addresses are always unique. link-locals and
5277c478bd9Sstevel@tonic-gate  * ptp unnumbered may not be unique, and bind to such an (IFF_NOFAILOVER)
5287c478bd9Sstevel@tonic-gate  * address can produce unexpected results. Log an error and alert the user.
5297c478bd9Sstevel@tonic-gate  */
5307c478bd9Sstevel@tonic-gate static void
5317c478bd9Sstevel@tonic-gate check_addr_unique(int af, char *name)
5327c478bd9Sstevel@tonic-gate {
5337c478bd9Sstevel@tonic-gate 	struct lifreq	lifr;
5347c478bd9Sstevel@tonic-gate 	struct phyint	*pi;
5357c478bd9Sstevel@tonic-gate 	struct in6_addr	addr;
5367c478bd9Sstevel@tonic-gate 	struct phyint_instance	*pii;
5377c478bd9Sstevel@tonic-gate 	struct sockaddr_in	*sin;
5387c478bd9Sstevel@tonic-gate 	struct sockaddr_in6	*sin6;
5397c478bd9Sstevel@tonic-gate 	int ifsock;
5407c478bd9Sstevel@tonic-gate 	char abuf[INET6_ADDRSTRLEN];
5417c478bd9Sstevel@tonic-gate 
5427c478bd9Sstevel@tonic-gate 	/* Get the socket for doing ioctls */
5437c478bd9Sstevel@tonic-gate 	ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6;
5447c478bd9Sstevel@tonic-gate 
5457c478bd9Sstevel@tonic-gate 	(void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name));
5467c478bd9Sstevel@tonic-gate 	lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
5477c478bd9Sstevel@tonic-gate 	/*
5487c478bd9Sstevel@tonic-gate 	 * Get the address corresponding to 'name'. We cannot
5497c478bd9Sstevel@tonic-gate 	 * do a logint lookup in our tables, because, not all logints
5507c478bd9Sstevel@tonic-gate 	 * in the system are tracked by mpathd. (eg. things not in a group)
5517c478bd9Sstevel@tonic-gate 	 */
5527c478bd9Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFADDR, (char *)&lifr) < 0) {
5537c478bd9Sstevel@tonic-gate 		if (errno == ENXIO) {
5547c478bd9Sstevel@tonic-gate 			/* Interface has vanished */
5557c478bd9Sstevel@tonic-gate 			return;
5567c478bd9Sstevel@tonic-gate 		} else {
5577c478bd9Sstevel@tonic-gate 			logperror("ioctl (get addr)");
5587c478bd9Sstevel@tonic-gate 			return;
5597c478bd9Sstevel@tonic-gate 		}
5607c478bd9Sstevel@tonic-gate 	}
5617c478bd9Sstevel@tonic-gate 
5627c478bd9Sstevel@tonic-gate 	if (af == AF_INET) {
5637c478bd9Sstevel@tonic-gate 		sin = (struct sockaddr_in *)&lifr.lifr_addr;
5647c478bd9Sstevel@tonic-gate 		IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &addr);
5657c478bd9Sstevel@tonic-gate 	} else {
5667c478bd9Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr;
5677c478bd9Sstevel@tonic-gate 		addr = sin6->sin6_addr;
5687c478bd9Sstevel@tonic-gate 	}
5697c478bd9Sstevel@tonic-gate 
5707c478bd9Sstevel@tonic-gate 	/*
5717c478bd9Sstevel@tonic-gate 	 * Does the address 'addr' match any known test address ? If so
5727c478bd9Sstevel@tonic-gate 	 * it is a duplicate, unless we are looking at the same logint
5737c478bd9Sstevel@tonic-gate 	 */
5747c478bd9Sstevel@tonic-gate 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
5757c478bd9Sstevel@tonic-gate 		pii = PHYINT_INSTANCE(pi, af);
5767c478bd9Sstevel@tonic-gate 		if (pii == NULL || pii->pii_probe_logint == NULL)
5777c478bd9Sstevel@tonic-gate 			continue;
5787c478bd9Sstevel@tonic-gate 
5797c478bd9Sstevel@tonic-gate 		if (!IN6_ARE_ADDR_EQUAL(&addr,
5807c478bd9Sstevel@tonic-gate 		    &pii->pii_probe_logint->li_addr)) {
5817c478bd9Sstevel@tonic-gate 			continue;
5827c478bd9Sstevel@tonic-gate 		}
5837c478bd9Sstevel@tonic-gate 
5847c478bd9Sstevel@tonic-gate 		if (strncmp(pii->pii_probe_logint->li_name, name,
5857c478bd9Sstevel@tonic-gate 		    sizeof (pii->pii_probe_logint->li_name)) == 0) {
5867c478bd9Sstevel@tonic-gate 			continue;
5877c478bd9Sstevel@tonic-gate 		}
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 		/*
5907c478bd9Sstevel@tonic-gate 		 * This test address is not unique. Set the dupaddr bit
5917c478bd9Sstevel@tonic-gate 		 */
5927c478bd9Sstevel@tonic-gate 		pii->pii_probe_logint->li_dupaddr = 1;
5937c478bd9Sstevel@tonic-gate 
5947c478bd9Sstevel@tonic-gate 		/*
5957c478bd9Sstevel@tonic-gate 		 * Log an error message if not already logged
5967c478bd9Sstevel@tonic-gate 		 */
5977c478bd9Sstevel@tonic-gate 		if (pii->pii_probe_logint->li_dupaddrmsg_printed)
5987c478bd9Sstevel@tonic-gate 			continue;
5997c478bd9Sstevel@tonic-gate 
6007c478bd9Sstevel@tonic-gate 		logerr("Test address %s is not unique; disabling "
6017c478bd9Sstevel@tonic-gate 		    "probe-based failure detection\n",
6027c478bd9Sstevel@tonic-gate 		    pr_addr(af, addr, abuf, sizeof (abuf)));
6037c478bd9Sstevel@tonic-gate 
6047c478bd9Sstevel@tonic-gate 		pii->pii_probe_logint->li_dupaddrmsg_printed = 1;
6057c478bd9Sstevel@tonic-gate 	}
6067c478bd9Sstevel@tonic-gate }
6077c478bd9Sstevel@tonic-gate 
6087c478bd9Sstevel@tonic-gate /*
6097c478bd9Sstevel@tonic-gate  * Stop probing an interface.  Called when an interface is offlined.
6107c478bd9Sstevel@tonic-gate  * The probe socket is closed on each interface instance, and the
6117c478bd9Sstevel@tonic-gate  * interface state set to PI_OFFLINE.
6127c478bd9Sstevel@tonic-gate  */
6137c478bd9Sstevel@tonic-gate static void
6147c478bd9Sstevel@tonic-gate stop_probing(struct phyint *pi)
6157c478bd9Sstevel@tonic-gate {
6167c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
6177c478bd9Sstevel@tonic-gate 
6187c478bd9Sstevel@tonic-gate 	pii = pi->pi_v4;
6197c478bd9Sstevel@tonic-gate 	if (pii != NULL) {
6207c478bd9Sstevel@tonic-gate 		if (pii->pii_probe_sock != -1)
6217c478bd9Sstevel@tonic-gate 			close_probe_socket(pii, _B_TRUE);
6227c478bd9Sstevel@tonic-gate 		pii->pii_probe_logint = NULL;
6237c478bd9Sstevel@tonic-gate 	}
6247c478bd9Sstevel@tonic-gate 
6257c478bd9Sstevel@tonic-gate 	pii = pi->pi_v6;
6267c478bd9Sstevel@tonic-gate 	if (pii != NULL) {
6277c478bd9Sstevel@tonic-gate 		if (pii->pii_probe_sock != -1)
6287c478bd9Sstevel@tonic-gate 			close_probe_socket(pii, _B_TRUE);
6297c478bd9Sstevel@tonic-gate 		pii->pii_probe_logint = NULL;
6307c478bd9Sstevel@tonic-gate 	}
6317c478bd9Sstevel@tonic-gate 
6327c478bd9Sstevel@tonic-gate 	phyint_chstate(pi, PI_OFFLINE);
6337c478bd9Sstevel@tonic-gate }
6347c478bd9Sstevel@tonic-gate 
635921e7e07Smeem enum { BAD_TESTFLAGS, OK_TESTFLAGS, BEST_TESTFLAGS };
636921e7e07Smeem 
6377c478bd9Sstevel@tonic-gate /*
638921e7e07Smeem  * Rate the provided test flags.  By definition, IFF_NOFAILOVER must be set.
639921e7e07Smeem  * IFF_UP must also be set so that the associated address can be used as a
640921e7e07Smeem  * source address.  Further, we must be able to exchange packets with local
641921e7e07Smeem  * destinations, so IFF_NOXMIT and IFF_NOLOCAL must be clear.  For historical
642921e7e07Smeem  * reasons, we have a proclivity for IFF_DEPRECATED IPv4 test addresses.
643921e7e07Smeem  */
644921e7e07Smeem static int
645921e7e07Smeem rate_testflags(uint64_t flags)
646921e7e07Smeem {
647921e7e07Smeem 	if ((flags & (IFF_NOFAILOVER | IFF_UP)) != (IFF_NOFAILOVER | IFF_UP))
648921e7e07Smeem 		return (BAD_TESTFLAGS);
649921e7e07Smeem 
650921e7e07Smeem 	if ((flags & (IFF_NOXMIT | IFF_NOLOCAL)) != 0)
651921e7e07Smeem 		return (BAD_TESTFLAGS);
652921e7e07Smeem 
653921e7e07Smeem 	if ((flags & (IFF_IPV6 | IFF_DEPRECATED)) == IFF_DEPRECATED)
654921e7e07Smeem 		return (BEST_TESTFLAGS);
655921e7e07Smeem 
656921e7e07Smeem 	if ((flags & (IFF_IPV6 | IFF_DEPRECATED)) == IFF_IPV6)
657921e7e07Smeem 		return (BEST_TESTFLAGS);
658921e7e07Smeem 
659921e7e07Smeem 	return (OK_TESTFLAGS);
660921e7e07Smeem }
661921e7e07Smeem 
662921e7e07Smeem /*
663921e7e07Smeem  * Attempt to select a test address for each phyint instance.
664921e7e07Smeem  * Call phyint_inst_sockinit() to complete the initializations.
6657c478bd9Sstevel@tonic-gate  */
6667c478bd9Sstevel@tonic-gate static void
6677c478bd9Sstevel@tonic-gate select_test_ifs(void)
6687c478bd9Sstevel@tonic-gate {
6697c478bd9Sstevel@tonic-gate 	struct phyint		*pi;
6707c478bd9Sstevel@tonic-gate 	struct phyint_instance	*pii;
6717c478bd9Sstevel@tonic-gate 	struct phyint_instance	*next_pii;
672921e7e07Smeem 	struct logint		*li;
673921e7e07Smeem 	struct logint  		*probe_logint;
674921e7e07Smeem 	boolean_t		target_scan_reqd = _B_FALSE;
675921e7e07Smeem 	struct target		*tg;
676921e7e07Smeem 	int			rating;
6777c478bd9Sstevel@tonic-gate 
6787c478bd9Sstevel@tonic-gate 	if (debug & D_PHYINT)
6797c478bd9Sstevel@tonic-gate 		logdebug("select_test_ifs\n");
6807c478bd9Sstevel@tonic-gate 
6817c478bd9Sstevel@tonic-gate 	/*
6827c478bd9Sstevel@tonic-gate 	 * For each phyint instance, do the test address selection
6837c478bd9Sstevel@tonic-gate 	 */
6847c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
6857c478bd9Sstevel@tonic-gate 		next_pii = pii->pii_next;
686921e7e07Smeem 		probe_logint = NULL;
687921e7e07Smeem 
6887c478bd9Sstevel@tonic-gate 		/*
6897c478bd9Sstevel@tonic-gate 		 * An interface that is offline, should not be probed.
6907c478bd9Sstevel@tonic-gate 		 * Offline interfaces should always in PI_OFFLINE state,
6917c478bd9Sstevel@tonic-gate 		 * unless some other entity has set the offline flag.
6927c478bd9Sstevel@tonic-gate 		 */
6937c478bd9Sstevel@tonic-gate 		if (pii->pii_phyint->pi_flags & IFF_OFFLINE) {
6947c478bd9Sstevel@tonic-gate 			if (pii->pii_phyint->pi_state != PI_OFFLINE) {
6957c478bd9Sstevel@tonic-gate 				logerr("shouldn't be probing offline"
6967c478bd9Sstevel@tonic-gate 					" interface %s (state is: %u)."
6977c478bd9Sstevel@tonic-gate 					" Stopping probes.\n",
6987c478bd9Sstevel@tonic-gate 					pii->pii_phyint->pi_name,
6997c478bd9Sstevel@tonic-gate 					pii->pii_phyint->pi_state);
7007c478bd9Sstevel@tonic-gate 				stop_probing(pii->pii_phyint);
7017c478bd9Sstevel@tonic-gate 			}
7027c478bd9Sstevel@tonic-gate 			continue;
7037c478bd9Sstevel@tonic-gate 		}
7047c478bd9Sstevel@tonic-gate 
705921e7e07Smeem 		li = pii->pii_probe_logint;
706921e7e07Smeem 		if (li != NULL) {
7077c478bd9Sstevel@tonic-gate 			/*
708921e7e07Smeem 			 * We've already got a test address; only proceed
709921e7e07Smeem 			 * if it's suboptimal.
7107c478bd9Sstevel@tonic-gate 			 */
711921e7e07Smeem 			if (rate_testflags(li->li_flags) == BEST_TESTFLAGS)
712921e7e07Smeem 				continue;
7137c478bd9Sstevel@tonic-gate 		}
7147c478bd9Sstevel@tonic-gate 
7157c478bd9Sstevel@tonic-gate 		/*
7167c478bd9Sstevel@tonic-gate 		 * Walk the logints of this phyint instance, and select
7177c478bd9Sstevel@tonic-gate 		 * the best available test address
7187c478bd9Sstevel@tonic-gate 		 */
7197c478bd9Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = li->li_next) {
7207c478bd9Sstevel@tonic-gate 			/*
7217c478bd9Sstevel@tonic-gate 			 * Skip any IPv6 logints that are not link-local,
7227c478bd9Sstevel@tonic-gate 			 * since we should always have a link-local address
7237c478bd9Sstevel@tonic-gate 			 * anyway and in6_data() expects link-local replies.
7247c478bd9Sstevel@tonic-gate 			 */
7257c478bd9Sstevel@tonic-gate 			if (pii->pii_af == AF_INET6 &&
7267c478bd9Sstevel@tonic-gate 			    !IN6_IS_ADDR_LINKLOCAL(&li->li_addr))
7277c478bd9Sstevel@tonic-gate 				continue;
7287c478bd9Sstevel@tonic-gate 
729921e7e07Smeem 			/*
730921e7e07Smeem 			 * Rate the testflags. If we've found an optimal
731921e7e07Smeem 			 * match, then break out; otherwise, record the most
732921e7e07Smeem 			 * recent OK one.
733921e7e07Smeem 			 */
734921e7e07Smeem 			rating = rate_testflags(li->li_flags);
735921e7e07Smeem 			if (rating == BAD_TESTFLAGS)
736921e7e07Smeem 				continue;
737921e7e07Smeem 
738921e7e07Smeem 			probe_logint = li;
739921e7e07Smeem 			if (rating == BEST_TESTFLAGS)
740921e7e07Smeem 				break;
7417c478bd9Sstevel@tonic-gate 		}
7427c478bd9Sstevel@tonic-gate 
7437c478bd9Sstevel@tonic-gate 		/*
744921e7e07Smeem 		 * If the probe logint has changed, ditch the old one.
7457c478bd9Sstevel@tonic-gate 		 */
746921e7e07Smeem 		if (pii->pii_probe_logint != NULL &&
747921e7e07Smeem 		    pii->pii_probe_logint != probe_logint) {
7487c478bd9Sstevel@tonic-gate 			if (pii->pii_probe_sock != -1)
7497c478bd9Sstevel@tonic-gate 				close_probe_socket(pii, _B_TRUE);
7507c478bd9Sstevel@tonic-gate 			pii->pii_probe_logint = NULL;
7517c478bd9Sstevel@tonic-gate 		}
7527c478bd9Sstevel@tonic-gate 
753921e7e07Smeem 		if (probe_logint == NULL) {
7547c478bd9Sstevel@tonic-gate 			/*
7557c478bd9Sstevel@tonic-gate 			 * We don't have a test address. Don't print an
7567c478bd9Sstevel@tonic-gate 			 * error message immediately. check_config() will
7577c478bd9Sstevel@tonic-gate 			 * take care of it. Zero out the probe stats array
7587c478bd9Sstevel@tonic-gate 			 * since it is no longer relevant. Optimize by
7597c478bd9Sstevel@tonic-gate 			 * checking if it is already zeroed out.
7607c478bd9Sstevel@tonic-gate 			 */
7617c478bd9Sstevel@tonic-gate 			int pr_ndx;
7627c478bd9Sstevel@tonic-gate 
7637c478bd9Sstevel@tonic-gate 			pr_ndx = PROBE_INDEX_PREV(pii->pii_probe_next);
7647c478bd9Sstevel@tonic-gate 			if (pii->pii_probes[pr_ndx].pr_status != PR_UNUSED) {
7657c478bd9Sstevel@tonic-gate 				clear_pii_probe_stats(pii);
7667c478bd9Sstevel@tonic-gate 				reset_crtt_all(pii->pii_phyint);
7677c478bd9Sstevel@tonic-gate 			}
7687c478bd9Sstevel@tonic-gate 			continue;
769921e7e07Smeem 		} else if (probe_logint == pii->pii_probe_logint) {
7707c478bd9Sstevel@tonic-gate 			/*
7717c478bd9Sstevel@tonic-gate 			 * If we didn't find any new test addr, go to the
7727c478bd9Sstevel@tonic-gate 			 * next phyint.
7737c478bd9Sstevel@tonic-gate 			 */
7747c478bd9Sstevel@tonic-gate 			continue;
7757c478bd9Sstevel@tonic-gate 		}
7767c478bd9Sstevel@tonic-gate 
7777c478bd9Sstevel@tonic-gate 		/*
7787c478bd9Sstevel@tonic-gate 		 * The phyint is either being assigned a new testaddr
7797c478bd9Sstevel@tonic-gate 		 * or is being assigned a testaddr for the 1st time.
7807c478bd9Sstevel@tonic-gate 		 * Need to initialize the phyint socket
7817c478bd9Sstevel@tonic-gate 		 */
782921e7e07Smeem 		pii->pii_probe_logint = probe_logint;
7837c478bd9Sstevel@tonic-gate 		if (!phyint_inst_sockinit(pii)) {
7847c478bd9Sstevel@tonic-gate 			if (debug & D_PHYINT) {
7857c478bd9Sstevel@tonic-gate 				logdebug("select_test_ifs: "
7867c478bd9Sstevel@tonic-gate 				    "phyint_sockinit failed\n");
7877c478bd9Sstevel@tonic-gate 			}
7887c478bd9Sstevel@tonic-gate 			phyint_inst_delete(pii);
7897c478bd9Sstevel@tonic-gate 			continue;
7907c478bd9Sstevel@tonic-gate 		}
7917c478bd9Sstevel@tonic-gate 
7927c478bd9Sstevel@tonic-gate 		/*
7937c478bd9Sstevel@tonic-gate 		 * This phyint instance is now enabled for probes; this
7947c478bd9Sstevel@tonic-gate 		 * impacts our state machine in two ways:
7957c478bd9Sstevel@tonic-gate 		 *
7967c478bd9Sstevel@tonic-gate 		 * 1. If we're probe *capable* as well (i.e., we have
7977c478bd9Sstevel@tonic-gate 		 *    probe targets) and the interface is in PI_NOTARGETS,
7987c478bd9Sstevel@tonic-gate 		 *    then transition to PI_RUNNING.
7997c478bd9Sstevel@tonic-gate 		 *
8007c478bd9Sstevel@tonic-gate 		 * 2. If we're not probe capable, and the other phyint
8017c478bd9Sstevel@tonic-gate 		 *    instance is also not probe capable, and we were in
8027c478bd9Sstevel@tonic-gate 		 *    PI_RUNNING, then transition to PI_NOTARGETS.
8037c478bd9Sstevel@tonic-gate 		 *
8047c478bd9Sstevel@tonic-gate 		 * Also see the state diagram in mpd_probe.c.
8057c478bd9Sstevel@tonic-gate 		 */
8067c478bd9Sstevel@tonic-gate 		if (PROBE_CAPABLE(pii)) {
8077c478bd9Sstevel@tonic-gate 			if (pii->pii_phyint->pi_state == PI_NOTARGETS)
8087c478bd9Sstevel@tonic-gate 				phyint_chstate(pii->pii_phyint, PI_RUNNING);
8097c478bd9Sstevel@tonic-gate 		} else if (!PROBE_CAPABLE(phyint_inst_other(pii))) {
8107c478bd9Sstevel@tonic-gate 			if (pii->pii_phyint->pi_state == PI_RUNNING)
8117c478bd9Sstevel@tonic-gate 				phyint_chstate(pii->pii_phyint, PI_NOTARGETS);
8127c478bd9Sstevel@tonic-gate 		}
8137c478bd9Sstevel@tonic-gate 
8147c478bd9Sstevel@tonic-gate 		if (pii->pii_phyint->pi_flags & IFF_POINTOPOINT) {
8157c478bd9Sstevel@tonic-gate 			tg = pii->pii_targets;
8167c478bd9Sstevel@tonic-gate 			if (tg != NULL)
8177c478bd9Sstevel@tonic-gate 				target_delete(tg);
8187c478bd9Sstevel@tonic-gate 			assert(pii->pii_targets == NULL);
8197c478bd9Sstevel@tonic-gate 			assert(pii->pii_target_next == NULL);
8207c478bd9Sstevel@tonic-gate 			assert(pii->pii_ntargets == 0);
821921e7e07Smeem 			target_create(pii, probe_logint->li_dstaddr,
8227c478bd9Sstevel@tonic-gate 			    _B_TRUE);
8237c478bd9Sstevel@tonic-gate 		}
8247c478bd9Sstevel@tonic-gate 
8257c478bd9Sstevel@tonic-gate 		/*
8267c478bd9Sstevel@tonic-gate 		 * If no targets are currently known for this phyint
8277c478bd9Sstevel@tonic-gate 		 * we need to call init_router_targets. Since
8287c478bd9Sstevel@tonic-gate 		 * init_router_targets() initializes the list of targets
8297c478bd9Sstevel@tonic-gate 		 * for all phyints it is done below the loop.
8307c478bd9Sstevel@tonic-gate 		 */
8317c478bd9Sstevel@tonic-gate 		if (pii->pii_targets == NULL)
8327c478bd9Sstevel@tonic-gate 			target_scan_reqd = _B_TRUE;
8337c478bd9Sstevel@tonic-gate 
8347c478bd9Sstevel@tonic-gate 		/*
8357c478bd9Sstevel@tonic-gate 		 * Start the probe timer for this instance.
8367c478bd9Sstevel@tonic-gate 		 */
8377c478bd9Sstevel@tonic-gate 		if (!pii->pii_basetime_inited && pii->pii_probe_sock != -1) {
8387c478bd9Sstevel@tonic-gate 			start_timer(pii);
8397c478bd9Sstevel@tonic-gate 			pii->pii_basetime_inited = 1;
8407c478bd9Sstevel@tonic-gate 		}
8417c478bd9Sstevel@tonic-gate 	}
8427c478bd9Sstevel@tonic-gate 
8437c478bd9Sstevel@tonic-gate 	/*
8447c478bd9Sstevel@tonic-gate 	 * Check the interface list for any interfaces that are marked
8457c478bd9Sstevel@tonic-gate 	 * PI_FAILED but no longer enabled to send probes, and call
8467c478bd9Sstevel@tonic-gate 	 * phyint_check_for_repair() to see if the link now indicates that the
8477c478bd9Sstevel@tonic-gate 	 * interface should be repaired.  Also see the state diagram in
8487c478bd9Sstevel@tonic-gate 	 * mpd_probe.c.
8497c478bd9Sstevel@tonic-gate 	 */
8507c478bd9Sstevel@tonic-gate 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
8517c478bd9Sstevel@tonic-gate 		if (pi->pi_state == PI_FAILED &&
8527c478bd9Sstevel@tonic-gate 		    !PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) {
8537c478bd9Sstevel@tonic-gate 			phyint_check_for_repair(pi);
8547c478bd9Sstevel@tonic-gate 		}
8557c478bd9Sstevel@tonic-gate 	}
8567c478bd9Sstevel@tonic-gate 
8577c478bd9Sstevel@tonic-gate 	/*
8587c478bd9Sstevel@tonic-gate 	 * Try to populate the target list. init_router_targets populates
8597c478bd9Sstevel@tonic-gate 	 * the target list from the routing table. If our target list is
8607c478bd9Sstevel@tonic-gate 	 * still empty, init_host_targets adds host targets based on the
8617c478bd9Sstevel@tonic-gate 	 * host target list of other phyints in the group.
8627c478bd9Sstevel@tonic-gate 	 */
8637c478bd9Sstevel@tonic-gate 	if (target_scan_reqd) {
8647c478bd9Sstevel@tonic-gate 		init_router_targets();
8657c478bd9Sstevel@tonic-gate 		init_host_targets();
8667c478bd9Sstevel@tonic-gate 	}
8677c478bd9Sstevel@tonic-gate }
8687c478bd9Sstevel@tonic-gate 
8697c478bd9Sstevel@tonic-gate /*
8707c478bd9Sstevel@tonic-gate  * Check phyint group configuration, to detect any inconsistencies,
8717c478bd9Sstevel@tonic-gate  * and log an error message. This is called from runtimeouts every
8727c478bd9Sstevel@tonic-gate  * 20 secs. But the error message is displayed once. If the
8737c478bd9Sstevel@tonic-gate  * consistency is resolved by the admin, a recovery message is displayed
8747c478bd9Sstevel@tonic-gate  * once.
8757c478bd9Sstevel@tonic-gate  */
8767c478bd9Sstevel@tonic-gate static void
8777c478bd9Sstevel@tonic-gate check_config(void)
8787c478bd9Sstevel@tonic-gate {
8797c478bd9Sstevel@tonic-gate 	struct phyint_group *pg;
8807c478bd9Sstevel@tonic-gate 	struct phyint *pi;
8817c478bd9Sstevel@tonic-gate 	boolean_t v4_in_group;
8827c478bd9Sstevel@tonic-gate 	boolean_t v6_in_group;
8837c478bd9Sstevel@tonic-gate 
8847c478bd9Sstevel@tonic-gate 	/*
8857c478bd9Sstevel@tonic-gate 	 * All phyints of a group must be homogenous to ensure that
8867c478bd9Sstevel@tonic-gate 	 * failover or failback can be done. If any phyint in a group
8877c478bd9Sstevel@tonic-gate 	 * has IPv4 plumbed, check that all phyints have IPv4 plumbed.
8887c478bd9Sstevel@tonic-gate 	 * Do a similar check for IPv6.
8897c478bd9Sstevel@tonic-gate 	 */
8907c478bd9Sstevel@tonic-gate 	for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
8917c478bd9Sstevel@tonic-gate 		if (pg == phyint_anongroup)
8927c478bd9Sstevel@tonic-gate 			continue;
8937c478bd9Sstevel@tonic-gate 
8947c478bd9Sstevel@tonic-gate 		v4_in_group = _B_FALSE;
8957c478bd9Sstevel@tonic-gate 		v6_in_group = _B_FALSE;
8967c478bd9Sstevel@tonic-gate 		/*
8977c478bd9Sstevel@tonic-gate 		 * 1st pass. Determine if at least 1 phyint in the group
8987c478bd9Sstevel@tonic-gate 		 * has IPv4 plumbed and if so set v4_in_group to true.
8997c478bd9Sstevel@tonic-gate 		 * Repeat similarly for IPv6.
9007c478bd9Sstevel@tonic-gate 		 */
9017c478bd9Sstevel@tonic-gate 		for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
9027c478bd9Sstevel@tonic-gate 			if (pi->pi_v4 != NULL)
9037c478bd9Sstevel@tonic-gate 				v4_in_group = _B_TRUE;
9047c478bd9Sstevel@tonic-gate 			if (pi->pi_v6 != NULL)
9057c478bd9Sstevel@tonic-gate 				v6_in_group = _B_TRUE;
9067c478bd9Sstevel@tonic-gate 		}
9077c478bd9Sstevel@tonic-gate 
9087c478bd9Sstevel@tonic-gate 		/*
9097c478bd9Sstevel@tonic-gate 		 * 2nd pass. If v4_in_group is true, check that phyint
9107c478bd9Sstevel@tonic-gate 		 * has IPv4 plumbed. Repeat similarly for IPv6. Print
9117c478bd9Sstevel@tonic-gate 		 * out a message the 1st time only.
9127c478bd9Sstevel@tonic-gate 		 */
9137c478bd9Sstevel@tonic-gate 		for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
9147c478bd9Sstevel@tonic-gate 			if (pi->pi_flags & IFF_OFFLINE)
9157c478bd9Sstevel@tonic-gate 				continue;
9167c478bd9Sstevel@tonic-gate 
9177c478bd9Sstevel@tonic-gate 			if (v4_in_group == _B_TRUE && pi->pi_v4 == NULL) {
9187c478bd9Sstevel@tonic-gate 				if (!pi->pi_cfgmsg_printed) {
9197c478bd9Sstevel@tonic-gate 					logerr("NIC %s of group %s is"
9207c478bd9Sstevel@tonic-gate 					    " not plumbed for IPv4 and may"
9217c478bd9Sstevel@tonic-gate 					    " affect failover capability\n",
9227c478bd9Sstevel@tonic-gate 					    pi->pi_name,
9237c478bd9Sstevel@tonic-gate 					    pi->pi_group->pg_name);
9247c478bd9Sstevel@tonic-gate 					pi->pi_cfgmsg_printed = 1;
9257c478bd9Sstevel@tonic-gate 				}
9267c478bd9Sstevel@tonic-gate 			} else if (v6_in_group == _B_TRUE &&
9277c478bd9Sstevel@tonic-gate 			    pi->pi_v6 == NULL) {
9287c478bd9Sstevel@tonic-gate 				if (!pi->pi_cfgmsg_printed) {
9297c478bd9Sstevel@tonic-gate 					logerr("NIC %s of group %s is"
9307c478bd9Sstevel@tonic-gate 					    " not plumbed for IPv6 and may"
9317c478bd9Sstevel@tonic-gate 					    " affect failover capability\n",
9327c478bd9Sstevel@tonic-gate 					    pi->pi_name,
9337c478bd9Sstevel@tonic-gate 					    pi->pi_group->pg_name);
9347c478bd9Sstevel@tonic-gate 					pi->pi_cfgmsg_printed = 1;
9357c478bd9Sstevel@tonic-gate 				}
9367c478bd9Sstevel@tonic-gate 			} else {
9377c478bd9Sstevel@tonic-gate 				/*
9387c478bd9Sstevel@tonic-gate 				 * The phyint matches the group configuration,
9397c478bd9Sstevel@tonic-gate 				 * if we have reached this point. If it was
9407c478bd9Sstevel@tonic-gate 				 * improperly configured earlier, log an
9417c478bd9Sstevel@tonic-gate 				 * error recovery message
9427c478bd9Sstevel@tonic-gate 				 */
9437c478bd9Sstevel@tonic-gate 				if (pi->pi_cfgmsg_printed) {
9447c478bd9Sstevel@tonic-gate 					logerr("NIC %s is now consistent with "
9457c478bd9Sstevel@tonic-gate 					    "group %s and failover capability "
9467c478bd9Sstevel@tonic-gate 					    "is restored\n", pi->pi_name,
9477c478bd9Sstevel@tonic-gate 					    pi->pi_group->pg_name);
9487c478bd9Sstevel@tonic-gate 					pi->pi_cfgmsg_printed = 0;
9497c478bd9Sstevel@tonic-gate 				}
9507c478bd9Sstevel@tonic-gate 			}
9517c478bd9Sstevel@tonic-gate 
9527c478bd9Sstevel@tonic-gate 		}
9537c478bd9Sstevel@tonic-gate 	}
9547c478bd9Sstevel@tonic-gate 
9557c478bd9Sstevel@tonic-gate 	/*
9567c478bd9Sstevel@tonic-gate 	 * In order to perform probe-based failure detection, a phyint must
9577c478bd9Sstevel@tonic-gate 	 * have at least 1 test/probe address for sending and receiving probes
9587c478bd9Sstevel@tonic-gate 	 * (either on IPv4 or IPv6 instance or both).  If no test address has
9597c478bd9Sstevel@tonic-gate 	 * been configured, notify the administrator, but continue on since we
9607c478bd9Sstevel@tonic-gate 	 * can still perform load spreading, along with "link up/down" based
9617c478bd9Sstevel@tonic-gate 	 * failure detection.
9627c478bd9Sstevel@tonic-gate 	 */
9637c478bd9Sstevel@tonic-gate 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
9647c478bd9Sstevel@tonic-gate 		if (pi->pi_flags & IFF_OFFLINE)
9657c478bd9Sstevel@tonic-gate 			continue;
9667c478bd9Sstevel@tonic-gate 
9677c478bd9Sstevel@tonic-gate 		if ((pi->pi_v4 == NULL ||
9687c478bd9Sstevel@tonic-gate 		    pi->pi_v4->pii_probe_logint == NULL) &&
9697c478bd9Sstevel@tonic-gate 		    (pi->pi_v6 == NULL ||
9707c478bd9Sstevel@tonic-gate 		    pi->pi_v6->pii_probe_logint == NULL)) {
9717c478bd9Sstevel@tonic-gate 			if (!pi->pi_taddrmsg_printed) {
9727c478bd9Sstevel@tonic-gate 				logerr("No test address configured on "
9737c478bd9Sstevel@tonic-gate 				    "interface %s; disabling probe-based "
9747c478bd9Sstevel@tonic-gate 				    "failure detection on it\n", pi->pi_name);
9757c478bd9Sstevel@tonic-gate 				pi->pi_taddrmsg_printed = 1;
9767c478bd9Sstevel@tonic-gate 			}
9777c478bd9Sstevel@tonic-gate 		} else if (pi->pi_taddrmsg_printed) {
9787c478bd9Sstevel@tonic-gate 			logerr("Test address now configured on interface %s; "
9797c478bd9Sstevel@tonic-gate 			    "enabling probe-based failure detection on it\n",
9807c478bd9Sstevel@tonic-gate 			    pi->pi_name);
9817c478bd9Sstevel@tonic-gate 			pi->pi_taddrmsg_printed = 0;
9827c478bd9Sstevel@tonic-gate 		}
9837c478bd9Sstevel@tonic-gate 
9847c478bd9Sstevel@tonic-gate 	}
9857c478bd9Sstevel@tonic-gate }
9867c478bd9Sstevel@tonic-gate 
9877c478bd9Sstevel@tonic-gate /*
9887c478bd9Sstevel@tonic-gate  * Timer mechanism using relative time (in milliseconds) from the
9897c478bd9Sstevel@tonic-gate  * previous timer event. Timers exceeding TIMER_INFINITY milliseconds
9907c478bd9Sstevel@tonic-gate  * will fire after TIMER_INFINITY milliseconds.
9917c478bd9Sstevel@tonic-gate  * Unsigned arithmetic note: We assume a 32-bit circular sequence space for
9927c478bd9Sstevel@tonic-gate  * time values. Hence 2 consecutive timer events cannot be spaced farther
9937c478bd9Sstevel@tonic-gate  * than 0x7fffffff. We call this TIMER_INFINITY, and it is the maximum value
9947c478bd9Sstevel@tonic-gate  * that can be passed for the delay parameter of timer_schedule()
9957c478bd9Sstevel@tonic-gate  */
9967c478bd9Sstevel@tonic-gate static uint_t timer_next;	/* Currently scheduled timeout */
9977c478bd9Sstevel@tonic-gate static boolean_t timer_active = _B_FALSE; /* SIGALRM has not yet occurred */
9987c478bd9Sstevel@tonic-gate 
9997c478bd9Sstevel@tonic-gate static void
10007c478bd9Sstevel@tonic-gate timer_init(void)
10017c478bd9Sstevel@tonic-gate {
10027c478bd9Sstevel@tonic-gate 	timer_next = getcurrenttime() + TIMER_INFINITY;
10037c478bd9Sstevel@tonic-gate 	/*
10047c478bd9Sstevel@tonic-gate 	 * The call to run_timeouts() will get the timer started
10057c478bd9Sstevel@tonic-gate 	 * Since there are no phyints at this point, the timer will
10067c478bd9Sstevel@tonic-gate 	 * be set for IF_SCAN_INTERVAL ms.
10077c478bd9Sstevel@tonic-gate 	 */
10087c478bd9Sstevel@tonic-gate 	run_timeouts();
10097c478bd9Sstevel@tonic-gate }
10107c478bd9Sstevel@tonic-gate 
10117c478bd9Sstevel@tonic-gate /*
10127c478bd9Sstevel@tonic-gate  * Make sure the next SIGALRM occurs delay milliseconds from the current
10137c478bd9Sstevel@tonic-gate  * time if not earlier. We are interested only in time differences.
10147c478bd9Sstevel@tonic-gate  */
10157c478bd9Sstevel@tonic-gate void
10167c478bd9Sstevel@tonic-gate timer_schedule(uint_t delay)
10177c478bd9Sstevel@tonic-gate {
10187c478bd9Sstevel@tonic-gate 	uint_t now;
10197c478bd9Sstevel@tonic-gate 	struct itimerval itimerval;
10207c478bd9Sstevel@tonic-gate 
10217c478bd9Sstevel@tonic-gate 	if (debug & D_TIMER)
10227c478bd9Sstevel@tonic-gate 		logdebug("timer_schedule(%u)\n", delay);
10237c478bd9Sstevel@tonic-gate 
10247c478bd9Sstevel@tonic-gate 	assert(delay <= TIMER_INFINITY);
10257c478bd9Sstevel@tonic-gate 
10267c478bd9Sstevel@tonic-gate 	now = getcurrenttime();
10277c478bd9Sstevel@tonic-gate 	if (delay == 0) {
10287c478bd9Sstevel@tonic-gate 		/* Minimum allowed delay */
10297c478bd9Sstevel@tonic-gate 		delay = 1;
10307c478bd9Sstevel@tonic-gate 	}
10317c478bd9Sstevel@tonic-gate 	/* Will this timer occur before the currently scheduled SIGALRM? */
10327c478bd9Sstevel@tonic-gate 	if (timer_active && TIME_GE(now + delay, timer_next)) {
10337c478bd9Sstevel@tonic-gate 		if (debug & D_TIMER) {
10347c478bd9Sstevel@tonic-gate 			logdebug("timer_schedule(%u) - no action: "
10357c478bd9Sstevel@tonic-gate 			    "now %u next %u\n", delay, now, timer_next);
10367c478bd9Sstevel@tonic-gate 		}
10377c478bd9Sstevel@tonic-gate 		return;
10387c478bd9Sstevel@tonic-gate 	}
10397c478bd9Sstevel@tonic-gate 	timer_next = now + delay;
10407c478bd9Sstevel@tonic-gate 
10417c478bd9Sstevel@tonic-gate 	itimerval.it_value.tv_sec = delay / 1000;
10427c478bd9Sstevel@tonic-gate 	itimerval.it_value.tv_usec = (delay % 1000) * 1000;
10437c478bd9Sstevel@tonic-gate 	itimerval.it_interval.tv_sec = 0;
10447c478bd9Sstevel@tonic-gate 	itimerval.it_interval.tv_usec = 0;
10457c478bd9Sstevel@tonic-gate 	if (debug & D_TIMER) {
10467c478bd9Sstevel@tonic-gate 		logdebug("timer_schedule(%u): sec %ld usec %ld\n",
10477c478bd9Sstevel@tonic-gate 		    delay, itimerval.it_value.tv_sec,
10487c478bd9Sstevel@tonic-gate 		    itimerval.it_value.tv_usec);
10497c478bd9Sstevel@tonic-gate 	}
10507c478bd9Sstevel@tonic-gate 	timer_active = _B_TRUE;
10517c478bd9Sstevel@tonic-gate 	if (setitimer(ITIMER_REAL, &itimerval, NULL) < 0) {
10527c478bd9Sstevel@tonic-gate 		logperror("timer_schedule: setitimer");
10537c478bd9Sstevel@tonic-gate 		exit(2);
10547c478bd9Sstevel@tonic-gate 	}
10557c478bd9Sstevel@tonic-gate }
10567c478bd9Sstevel@tonic-gate 
10577c478bd9Sstevel@tonic-gate /*
10587c478bd9Sstevel@tonic-gate  * Timer has fired. Determine when the next timer event will occur by asking
10597c478bd9Sstevel@tonic-gate  * all the timer routines. Should not be called from a timer routine.
10607c478bd9Sstevel@tonic-gate  */
10617c478bd9Sstevel@tonic-gate static void
10627c478bd9Sstevel@tonic-gate run_timeouts(void)
10637c478bd9Sstevel@tonic-gate {
10647c478bd9Sstevel@tonic-gate 	uint_t next;
10657c478bd9Sstevel@tonic-gate 	uint_t next_event_time;
10667c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
10677c478bd9Sstevel@tonic-gate 	struct phyint_instance *next_pii;
10687c478bd9Sstevel@tonic-gate 	static boolean_t timeout_running;
10697c478bd9Sstevel@tonic-gate 
10707c478bd9Sstevel@tonic-gate 	/* assert that recursive timeouts don't happen. */
10717c478bd9Sstevel@tonic-gate 	assert(!timeout_running);
10727c478bd9Sstevel@tonic-gate 
10737c478bd9Sstevel@tonic-gate 	timeout_running = _B_TRUE;
10747c478bd9Sstevel@tonic-gate 
10757c478bd9Sstevel@tonic-gate 	if (debug & D_TIMER)
10767c478bd9Sstevel@tonic-gate 		logdebug("run_timeouts()\n");
10777c478bd9Sstevel@tonic-gate 
10787c478bd9Sstevel@tonic-gate 	next = TIMER_INFINITY;
10797c478bd9Sstevel@tonic-gate 
10807c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
10817c478bd9Sstevel@tonic-gate 		next_pii = pii->pii_next;
10827c478bd9Sstevel@tonic-gate 		next_event_time = phyint_inst_timer(pii);
10837c478bd9Sstevel@tonic-gate 		if (next_event_time != TIMER_INFINITY && next_event_time < next)
10847c478bd9Sstevel@tonic-gate 			next = next_event_time;
10857c478bd9Sstevel@tonic-gate 
10867c478bd9Sstevel@tonic-gate 		if (debug & D_TIMER) {
10877c478bd9Sstevel@tonic-gate 			logdebug("run_timeouts(%s %s): next scheduled for"
10887c478bd9Sstevel@tonic-gate 			    " this phyint inst %u, next scheduled global"
10897c478bd9Sstevel@tonic-gate 			    " %u ms\n",
10907c478bd9Sstevel@tonic-gate 			    AF_STR(pii->pii_af), pii->pii_phyint->pi_name,
10917c478bd9Sstevel@tonic-gate 			    next_event_time, next);
10927c478bd9Sstevel@tonic-gate 		}
10937c478bd9Sstevel@tonic-gate 	}
10947c478bd9Sstevel@tonic-gate 
10957c478bd9Sstevel@tonic-gate 	/*
10967c478bd9Sstevel@tonic-gate 	 * Make sure initifs() is called at least once every
10977c478bd9Sstevel@tonic-gate 	 * IF_SCAN_INTERVAL, to make sure that we are in sync
10987c478bd9Sstevel@tonic-gate 	 * with the kernel, in case we have missed any routing
10997c478bd9Sstevel@tonic-gate 	 * socket messages.
11007c478bd9Sstevel@tonic-gate 	 */
11017c478bd9Sstevel@tonic-gate 	if (next > IF_SCAN_INTERVAL)
11027c478bd9Sstevel@tonic-gate 		next = IF_SCAN_INTERVAL;
11037c478bd9Sstevel@tonic-gate 
11047c478bd9Sstevel@tonic-gate 	if ((getcurrenttime() - last_initifs_time) > IF_SCAN_INTERVAL) {
11057c478bd9Sstevel@tonic-gate 		initifs();
11067c478bd9Sstevel@tonic-gate 		check_config();
11077c478bd9Sstevel@tonic-gate 	}
11087c478bd9Sstevel@tonic-gate 
11097c478bd9Sstevel@tonic-gate 	if (debug & D_TIMER)
11107c478bd9Sstevel@tonic-gate 		logdebug("run_timeouts: %u ms\n", next);
11117c478bd9Sstevel@tonic-gate 
11127c478bd9Sstevel@tonic-gate 	timer_schedule(next);
11137c478bd9Sstevel@tonic-gate 	timeout_running = _B_FALSE;
11147c478bd9Sstevel@tonic-gate }
11157c478bd9Sstevel@tonic-gate 
11167c478bd9Sstevel@tonic-gate static int eventpipe_read = -1;	/* Used for synchronous signal delivery */
11177c478bd9Sstevel@tonic-gate static int eventpipe_write = -1;
11187c478bd9Sstevel@tonic-gate static boolean_t cleanup_started = _B_FALSE;
11197c478bd9Sstevel@tonic-gate 				/* Don't write to eventpipe if in cleanup */
11207c478bd9Sstevel@tonic-gate /*
11217c478bd9Sstevel@tonic-gate  * Ensure that signals are processed synchronously with the rest of
11227c478bd9Sstevel@tonic-gate  * the code by just writing a one character signal number on the pipe.
11237c478bd9Sstevel@tonic-gate  * The poll loop will pick this up and process the signal event.
11247c478bd9Sstevel@tonic-gate  */
11257c478bd9Sstevel@tonic-gate static void
11267c478bd9Sstevel@tonic-gate sig_handler(int signo)
11277c478bd9Sstevel@tonic-gate {
11287c478bd9Sstevel@tonic-gate 	uchar_t buf = (uchar_t)signo;
11297c478bd9Sstevel@tonic-gate 
11307c478bd9Sstevel@tonic-gate 	/*
11317c478bd9Sstevel@tonic-gate 	 * Don't write to pipe if cleanup has already begun. cleanup()
11327c478bd9Sstevel@tonic-gate 	 * might have closed the pipe already
11337c478bd9Sstevel@tonic-gate 	 */
11347c478bd9Sstevel@tonic-gate 	if (cleanup_started)
11357c478bd9Sstevel@tonic-gate 		return;
11367c478bd9Sstevel@tonic-gate 
11377c478bd9Sstevel@tonic-gate 	if (eventpipe_write == -1) {
11387c478bd9Sstevel@tonic-gate 		logerr("sig_handler: no pipe found\n");
11397c478bd9Sstevel@tonic-gate 		return;
11407c478bd9Sstevel@tonic-gate 	}
11417c478bd9Sstevel@tonic-gate 	if (write(eventpipe_write, &buf, sizeof (buf)) < 0)
11427c478bd9Sstevel@tonic-gate 		logperror("sig_handler: write");
11437c478bd9Sstevel@tonic-gate }
11447c478bd9Sstevel@tonic-gate 
11457c478bd9Sstevel@tonic-gate extern struct probes_missed probes_missed;
11467c478bd9Sstevel@tonic-gate 
11477c478bd9Sstevel@tonic-gate /*
11487c478bd9Sstevel@tonic-gate  * Pick up a signal "byte" from the pipe and process it.
11497c478bd9Sstevel@tonic-gate  */
11507c478bd9Sstevel@tonic-gate static void
11517c478bd9Sstevel@tonic-gate in_signal(int fd)
11527c478bd9Sstevel@tonic-gate {
11537c478bd9Sstevel@tonic-gate 	uchar_t buf;
11547c478bd9Sstevel@tonic-gate 	uint64_t  sent, acked, lost, unacked, unknown;
11557c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
11567c478bd9Sstevel@tonic-gate 	int pr_ndx;
11577c478bd9Sstevel@tonic-gate 
11587c478bd9Sstevel@tonic-gate 	switch (read(fd, &buf, sizeof (buf))) {
11597c478bd9Sstevel@tonic-gate 	case -1:
11607c478bd9Sstevel@tonic-gate 		logperror("in_signal: read");
11617c478bd9Sstevel@tonic-gate 		exit(1);
11627c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
11637c478bd9Sstevel@tonic-gate 	case 1:
11647c478bd9Sstevel@tonic-gate 		break;
11657c478bd9Sstevel@tonic-gate 	case 0:
11667c478bd9Sstevel@tonic-gate 		logerr("in_signal: read end of file\n");
11677c478bd9Sstevel@tonic-gate 		exit(1);
11687c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
11697c478bd9Sstevel@tonic-gate 	default:
11707c478bd9Sstevel@tonic-gate 		logerr("in_signal: read > 1\n");
11717c478bd9Sstevel@tonic-gate 		exit(1);
11727c478bd9Sstevel@tonic-gate 	}
11737c478bd9Sstevel@tonic-gate 
11747c478bd9Sstevel@tonic-gate 	if (debug & D_TIMER)
11757c478bd9Sstevel@tonic-gate 		logdebug("in_signal() got %d\n", buf);
11767c478bd9Sstevel@tonic-gate 
11777c478bd9Sstevel@tonic-gate 	switch (buf) {
11787c478bd9Sstevel@tonic-gate 	case SIGALRM:
11797c478bd9Sstevel@tonic-gate 		if (debug & D_TIMER) {
11807c478bd9Sstevel@tonic-gate 			uint_t now = getcurrenttime();
11817c478bd9Sstevel@tonic-gate 
11827c478bd9Sstevel@tonic-gate 			logdebug("in_signal(SIGALRM) delta %u\n",
11837c478bd9Sstevel@tonic-gate 			    now - timer_next);
11847c478bd9Sstevel@tonic-gate 		}
11857c478bd9Sstevel@tonic-gate 		timer_active = _B_FALSE;
11867c478bd9Sstevel@tonic-gate 		run_timeouts();
11877c478bd9Sstevel@tonic-gate 		break;
11887c478bd9Sstevel@tonic-gate 	case SIGUSR1:
11897c478bd9Sstevel@tonic-gate 		logdebug("Printing configuration:\n");
11907c478bd9Sstevel@tonic-gate 		/* Print out the internal tables */
11917c478bd9Sstevel@tonic-gate 		phyint_inst_print_all();
11927c478bd9Sstevel@tonic-gate 
11937c478bd9Sstevel@tonic-gate 		/*
11947c478bd9Sstevel@tonic-gate 		 * Print out the accumulated statistics about missed
11957c478bd9Sstevel@tonic-gate 		 * probes (happens due to scheduling delay).
11967c478bd9Sstevel@tonic-gate 		 */
11977c478bd9Sstevel@tonic-gate 		logerr("Missed sending total of %d probes spread over"
11987c478bd9Sstevel@tonic-gate 		    " %d occurrences\n", probes_missed.pm_nprobes,
11997c478bd9Sstevel@tonic-gate 		    probes_missed.pm_ntimes);
12007c478bd9Sstevel@tonic-gate 
12017c478bd9Sstevel@tonic-gate 		/*
12027c478bd9Sstevel@tonic-gate 		 * Print out the accumulated statistics about probes
12037c478bd9Sstevel@tonic-gate 		 * that were sent.
12047c478bd9Sstevel@tonic-gate 		 */
12057c478bd9Sstevel@tonic-gate 		for (pii = phyint_instances; pii != NULL;
12067c478bd9Sstevel@tonic-gate 		    pii = pii->pii_next) {
12077c478bd9Sstevel@tonic-gate 			unacked = 0;
12087c478bd9Sstevel@tonic-gate 			acked = pii->pii_cum_stats.acked;
12097c478bd9Sstevel@tonic-gate 			lost = pii->pii_cum_stats.lost;
12107c478bd9Sstevel@tonic-gate 			sent = pii->pii_cum_stats.sent;
12117c478bd9Sstevel@tonic-gate 			unknown = pii->pii_cum_stats.unknown;
12127c478bd9Sstevel@tonic-gate 			for (pr_ndx = 0; pr_ndx < PROBE_STATS_COUNT; pr_ndx++) {
12137c478bd9Sstevel@tonic-gate 				switch (pii->pii_probes[pr_ndx].pr_status) {
12147c478bd9Sstevel@tonic-gate 				case PR_ACKED:
12157c478bd9Sstevel@tonic-gate 					acked++;
12167c478bd9Sstevel@tonic-gate 					break;
12177c478bd9Sstevel@tonic-gate 				case PR_LOST:
12187c478bd9Sstevel@tonic-gate 					lost++;
12197c478bd9Sstevel@tonic-gate 					break;
12207c478bd9Sstevel@tonic-gate 				case PR_UNACKED:
12217c478bd9Sstevel@tonic-gate 					unacked++;
12227c478bd9Sstevel@tonic-gate 					break;
12237c478bd9Sstevel@tonic-gate 				}
12247c478bd9Sstevel@tonic-gate 			}
12257c478bd9Sstevel@tonic-gate 			logerr("\nProbe stats on (%s %s)\n"
12267c478bd9Sstevel@tonic-gate 			    "Number of probes sent %lld\n"
12277c478bd9Sstevel@tonic-gate 			    "Number of probe acks received %lld\n"
12287c478bd9Sstevel@tonic-gate 			    "Number of probes/acks lost %lld\n"
12297c478bd9Sstevel@tonic-gate 			    "Number of valid unacknowled probes %lld\n"
12307c478bd9Sstevel@tonic-gate 			    "Number of ambiguous probe acks received %lld\n",
12317c478bd9Sstevel@tonic-gate 			    AF_STR(pii->pii_af), pii->pii_name,
12327c478bd9Sstevel@tonic-gate 			    sent, acked, lost, unacked, unknown);
12337c478bd9Sstevel@tonic-gate 		}
12347c478bd9Sstevel@tonic-gate 		break;
12357c478bd9Sstevel@tonic-gate 	case SIGHUP:
12367c478bd9Sstevel@tonic-gate 		logerr("SIGHUP: restart and reread config file\n");
12377c478bd9Sstevel@tonic-gate 		cleanup();
12387c478bd9Sstevel@tonic-gate 		(void) execv(argv0[0], argv0);
12397c478bd9Sstevel@tonic-gate 		_exit(0177);
12407c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
12417c478bd9Sstevel@tonic-gate 	case SIGINT:
12427c478bd9Sstevel@tonic-gate 	case SIGTERM:
12437c478bd9Sstevel@tonic-gate 	case SIGQUIT:
12447c478bd9Sstevel@tonic-gate 		cleanup();
12457c478bd9Sstevel@tonic-gate 		exit(0);
12467c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
12477c478bd9Sstevel@tonic-gate 	default:
12487c478bd9Sstevel@tonic-gate 		logerr("in_signal: unknown signal: %d\n", buf);
12497c478bd9Sstevel@tonic-gate 	}
12507c478bd9Sstevel@tonic-gate }
12517c478bd9Sstevel@tonic-gate 
12527c478bd9Sstevel@tonic-gate static void
12537c478bd9Sstevel@tonic-gate cleanup(void)
12547c478bd9Sstevel@tonic-gate {
12557c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
12567c478bd9Sstevel@tonic-gate 	struct phyint_instance *next_pii;
12577c478bd9Sstevel@tonic-gate 
12587c478bd9Sstevel@tonic-gate 	/*
12597c478bd9Sstevel@tonic-gate 	 * Make sure that we don't write to eventpipe in
12607c478bd9Sstevel@tonic-gate 	 * sig_handler() if any signal notably SIGALRM,
12617c478bd9Sstevel@tonic-gate 	 * occurs after we close the eventpipe descriptor below
12627c478bd9Sstevel@tonic-gate 	 */
12637c478bd9Sstevel@tonic-gate 	cleanup_started = _B_TRUE;
12647c478bd9Sstevel@tonic-gate 
12657c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
12667c478bd9Sstevel@tonic-gate 		next_pii = pii->pii_next;
12677c478bd9Sstevel@tonic-gate 		phyint_inst_delete(pii);
12687c478bd9Sstevel@tonic-gate 	}
12697c478bd9Sstevel@tonic-gate 
12707c478bd9Sstevel@tonic-gate 	(void) close(ifsock_v4);
12717c478bd9Sstevel@tonic-gate 	(void) close(ifsock_v6);
12727c478bd9Sstevel@tonic-gate 	(void) close(rtsock_v4);
12737c478bd9Sstevel@tonic-gate 	(void) close(rtsock_v6);
12747c478bd9Sstevel@tonic-gate 	(void) close(lsock_v4);
12757c478bd9Sstevel@tonic-gate 	(void) close(lsock_v6);
12767c478bd9Sstevel@tonic-gate 	(void) close(0);
12777c478bd9Sstevel@tonic-gate 	(void) close(1);
12787c478bd9Sstevel@tonic-gate 	(void) close(2);
12797c478bd9Sstevel@tonic-gate 	(void) close(mibfd);
12807c478bd9Sstevel@tonic-gate 	(void) close(eventpipe_read);
12817c478bd9Sstevel@tonic-gate 	(void) close(eventpipe_write);
12827c478bd9Sstevel@tonic-gate }
12837c478bd9Sstevel@tonic-gate 
12847c478bd9Sstevel@tonic-gate /*
12857c478bd9Sstevel@tonic-gate  * Create pipe for signal delivery and set up signal handlers.
12867c478bd9Sstevel@tonic-gate  */
12877c478bd9Sstevel@tonic-gate static void
12887c478bd9Sstevel@tonic-gate setup_eventpipe(void)
12897c478bd9Sstevel@tonic-gate {
12907c478bd9Sstevel@tonic-gate 	int fds[2];
12917c478bd9Sstevel@tonic-gate 	struct sigaction act;
12927c478bd9Sstevel@tonic-gate 
12937c478bd9Sstevel@tonic-gate 	if ((pipe(fds)) < 0) {
12947c478bd9Sstevel@tonic-gate 		logperror("setup_eventpipe: pipe");
12957c478bd9Sstevel@tonic-gate 		exit(1);
12967c478bd9Sstevel@tonic-gate 	}
12977c478bd9Sstevel@tonic-gate 	eventpipe_read = fds[0];
12987c478bd9Sstevel@tonic-gate 	eventpipe_write = fds[1];
12997c478bd9Sstevel@tonic-gate 	if (poll_add(eventpipe_read) == -1) {
13007c478bd9Sstevel@tonic-gate 		exit(1);
13017c478bd9Sstevel@tonic-gate 	}
13027c478bd9Sstevel@tonic-gate 
13037c478bd9Sstevel@tonic-gate 	act.sa_handler = sig_handler;
13047c478bd9Sstevel@tonic-gate 	act.sa_flags = SA_RESTART;
13057c478bd9Sstevel@tonic-gate 	(void) sigaction(SIGALRM, &act, NULL);
13067c478bd9Sstevel@tonic-gate 
13077c478bd9Sstevel@tonic-gate 	(void) sigset(SIGHUP, sig_handler);
13087c478bd9Sstevel@tonic-gate 	(void) sigset(SIGUSR1, sig_handler);
13097c478bd9Sstevel@tonic-gate 	(void) sigset(SIGTERM, sig_handler);
13107c478bd9Sstevel@tonic-gate 	(void) sigset(SIGINT, sig_handler);
13117c478bd9Sstevel@tonic-gate 	(void) sigset(SIGQUIT, sig_handler);
13127c478bd9Sstevel@tonic-gate }
13137c478bd9Sstevel@tonic-gate 
13147c478bd9Sstevel@tonic-gate /*
13157c478bd9Sstevel@tonic-gate  * Create a routing socket for receiving RTM_IFINFO messages.
13167c478bd9Sstevel@tonic-gate  */
13177c478bd9Sstevel@tonic-gate static int
13187c478bd9Sstevel@tonic-gate setup_rtsock(int af)
13197c478bd9Sstevel@tonic-gate {
13207c478bd9Sstevel@tonic-gate 	int	s;
13217c478bd9Sstevel@tonic-gate 	int	flags;
13227c478bd9Sstevel@tonic-gate 
13237c478bd9Sstevel@tonic-gate 	s = socket(PF_ROUTE, SOCK_RAW, af);
13247c478bd9Sstevel@tonic-gate 	if (s == -1) {
13257c478bd9Sstevel@tonic-gate 		logperror("setup_rtsock: socket PF_ROUTE");
13267c478bd9Sstevel@tonic-gate 		exit(1);
13277c478bd9Sstevel@tonic-gate 	}
13287c478bd9Sstevel@tonic-gate 	if ((flags = fcntl(s, F_GETFL, 0)) < 0) {
13297c478bd9Sstevel@tonic-gate 		logperror("setup_rtsock: fcntl F_GETFL");
13307c478bd9Sstevel@tonic-gate 		(void) close(s);
13317c478bd9Sstevel@tonic-gate 		exit(1);
13327c478bd9Sstevel@tonic-gate 	}
13337c478bd9Sstevel@tonic-gate 	if ((fcntl(s, F_SETFL, flags | O_NONBLOCK)) < 0) {
13347c478bd9Sstevel@tonic-gate 		logperror("setup_rtsock: fcntl F_SETFL");
13357c478bd9Sstevel@tonic-gate 		(void) close(s);
13367c478bd9Sstevel@tonic-gate 		exit(1);
13377c478bd9Sstevel@tonic-gate 	}
13387c478bd9Sstevel@tonic-gate 	if (poll_add(s) == -1) {
13397c478bd9Sstevel@tonic-gate 		(void) close(s);
13407c478bd9Sstevel@tonic-gate 		exit(1);
13417c478bd9Sstevel@tonic-gate 	}
13427c478bd9Sstevel@tonic-gate 	return (s);
13437c478bd9Sstevel@tonic-gate }
13447c478bd9Sstevel@tonic-gate 
13457c478bd9Sstevel@tonic-gate /*
13467c478bd9Sstevel@tonic-gate  * Process an RTM_IFINFO message received on a routing socket.
13477c478bd9Sstevel@tonic-gate  * The return value indicates whether a full interface scan is required.
13487c478bd9Sstevel@tonic-gate  * Link up/down notifications from the NICs are reflected in the
13497c478bd9Sstevel@tonic-gate  * IFF_RUNNING flag.
13507c478bd9Sstevel@tonic-gate  * If just the state of the IFF_RUNNING interface flag has changed, a
13517c478bd9Sstevel@tonic-gate  * a full interface scan isn't required.
13527c478bd9Sstevel@tonic-gate  */
13537c478bd9Sstevel@tonic-gate static boolean_t
13547c478bd9Sstevel@tonic-gate process_rtm_ifinfo(if_msghdr_t *ifm, int type)
13557c478bd9Sstevel@tonic-gate {
13567c478bd9Sstevel@tonic-gate 	struct sockaddr_dl *sdl;
13577c478bd9Sstevel@tonic-gate 	struct phyint *pi;
13587c478bd9Sstevel@tonic-gate 	uint64_t old_flags;
13597c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
13607c478bd9Sstevel@tonic-gate 
13617c478bd9Sstevel@tonic-gate 	assert(ifm->ifm_type == RTM_IFINFO && ifm->ifm_addrs == RTA_IFP);
13627c478bd9Sstevel@tonic-gate 
13637c478bd9Sstevel@tonic-gate 	/*
13647c478bd9Sstevel@tonic-gate 	 * Although the sockaddr_dl structure is directly after the
13657c478bd9Sstevel@tonic-gate 	 * if_msghdr_t structure. At the time of writing, the size of the
13667c478bd9Sstevel@tonic-gate 	 * if_msghdr_t structure is different on 32 and 64 bit kernels, due
13677c478bd9Sstevel@tonic-gate 	 * to the presence of a timeval structure, which contains longs,
13687c478bd9Sstevel@tonic-gate 	 * in the if_data structure.  Anyway, we know where the message ends,
13697c478bd9Sstevel@tonic-gate 	 * so we work backwards to get the start of the sockaddr_dl structure.
13707c478bd9Sstevel@tonic-gate 	 */
13717c478bd9Sstevel@tonic-gate 	/*LINTED*/
13727c478bd9Sstevel@tonic-gate 	sdl = (struct sockaddr_dl *)((char *)ifm + ifm->ifm_msglen -
13737c478bd9Sstevel@tonic-gate 		sizeof (struct sockaddr_dl));
13747c478bd9Sstevel@tonic-gate 
13757c478bd9Sstevel@tonic-gate 	assert(sdl->sdl_family == AF_LINK);
13767c478bd9Sstevel@tonic-gate 
13777c478bd9Sstevel@tonic-gate 	/*
13787c478bd9Sstevel@tonic-gate 	 * The interface name is in sdl_data.
13797c478bd9Sstevel@tonic-gate 	 * RTM_IFINFO messages are only generated for logical interface
13807c478bd9Sstevel@tonic-gate 	 * zero, so there is no colon and logical interface number to
13817c478bd9Sstevel@tonic-gate 	 * strip from the name.	 The name is not null terminated, but
13827c478bd9Sstevel@tonic-gate 	 * there should be enough space in sdl_data to add the null.
13837c478bd9Sstevel@tonic-gate 	 */
13847c478bd9Sstevel@tonic-gate 	if (sdl->sdl_nlen >= sizeof (sdl->sdl_data)) {
13857c478bd9Sstevel@tonic-gate 		if (debug & D_LINKNOTE)
13867c478bd9Sstevel@tonic-gate 			logdebug("process_rtm_ifinfo: "
13877c478bd9Sstevel@tonic-gate 				"phyint name too long\n");
13887c478bd9Sstevel@tonic-gate 		return (_B_TRUE);
13897c478bd9Sstevel@tonic-gate 	}
13907c478bd9Sstevel@tonic-gate 	sdl->sdl_data[sdl->sdl_nlen] = 0;
13917c478bd9Sstevel@tonic-gate 
13927c478bd9Sstevel@tonic-gate 	pi = phyint_lookup(sdl->sdl_data);
13937c478bd9Sstevel@tonic-gate 	if (pi == NULL) {
13947c478bd9Sstevel@tonic-gate 		if (debug & D_LINKNOTE)
13957c478bd9Sstevel@tonic-gate 			logdebug("process_rtm_ifinfo: phyint lookup failed"
13967c478bd9Sstevel@tonic-gate 				" for %s\n", sdl->sdl_data);
13977c478bd9Sstevel@tonic-gate 		return (_B_TRUE);
13987c478bd9Sstevel@tonic-gate 	}
13997c478bd9Sstevel@tonic-gate 
14007c478bd9Sstevel@tonic-gate 	/*
14017c478bd9Sstevel@tonic-gate 	 * We want to try and avoid doing a full interface scan for
14027c478bd9Sstevel@tonic-gate 	 * link state notifications from the NICs, as indicated
14037c478bd9Sstevel@tonic-gate 	 * by the state of the IFF_RUNNING flag.  If just the
14047c478bd9Sstevel@tonic-gate 	 * IFF_RUNNING flag has changed state, the link state changes
14057c478bd9Sstevel@tonic-gate 	 * are processed without a full scan.
14067c478bd9Sstevel@tonic-gate 	 * If there is both an IPv4 and IPv6 instance associated with
14077c478bd9Sstevel@tonic-gate 	 * the physical interface, we will get an RTM_IFINFO message
14087c478bd9Sstevel@tonic-gate 	 * for each instance.  If we just maintained a single copy of
14097c478bd9Sstevel@tonic-gate 	 * the physical interface flags, it would appear that no flags
14107c478bd9Sstevel@tonic-gate 	 * had changed when the second message is processed, leading us
14117c478bd9Sstevel@tonic-gate 	 * to believe that the message wasn't generated by a flags change,
14127c478bd9Sstevel@tonic-gate 	 * and that a full interface scan is required.
14137c478bd9Sstevel@tonic-gate 	 * To get around this problem, two additional copies of the flags
14147c478bd9Sstevel@tonic-gate 	 * are kept, one copy for each instance.  These are only used in
14157c478bd9Sstevel@tonic-gate 	 * this routine.  At any one time, all three copies of the flags
14167c478bd9Sstevel@tonic-gate 	 * should be identical except for the IFF_RUNNING flag.	 The
14177c478bd9Sstevel@tonic-gate 	 * copy of the flags in the "phyint" structure is always up to
14187c478bd9Sstevel@tonic-gate 	 * date.
14197c478bd9Sstevel@tonic-gate 	 */
14207c478bd9Sstevel@tonic-gate 	pii = (type == AF_INET) ? pi->pi_v4 : pi->pi_v6;
14217c478bd9Sstevel@tonic-gate 	if (pii == NULL) {
14227c478bd9Sstevel@tonic-gate 		if (debug & D_LINKNOTE)
14237c478bd9Sstevel@tonic-gate 			logdebug("process_rtm_ifinfo: no instance of address "
14247c478bd9Sstevel@tonic-gate 			    "family %s for %s\n", AF_STR(type), pi->pi_name);
14257c478bd9Sstevel@tonic-gate 		return (_B_TRUE);
14267c478bd9Sstevel@tonic-gate 	}
14277c478bd9Sstevel@tonic-gate 
14287c478bd9Sstevel@tonic-gate 	old_flags = pii->pii_flags;
14297c478bd9Sstevel@tonic-gate 	pii->pii_flags = PHYINT_FLAGS(ifm->ifm_flags);
14307c478bd9Sstevel@tonic-gate 	pi->pi_flags = pii->pii_flags;
14317c478bd9Sstevel@tonic-gate 
14327c478bd9Sstevel@tonic-gate 	if (debug & D_LINKNOTE) {
14337c478bd9Sstevel@tonic-gate 		logdebug("process_rtm_ifinfo: %s address family: %s, "
14347c478bd9Sstevel@tonic-gate 		    "old flags: %llx, new flags: %llx\n", pi->pi_name,
14357c478bd9Sstevel@tonic-gate 		    AF_STR(type), old_flags, pi->pi_flags);
14367c478bd9Sstevel@tonic-gate 	}
14377c478bd9Sstevel@tonic-gate 
14387c478bd9Sstevel@tonic-gate 	/*
14397c478bd9Sstevel@tonic-gate 	 * If IFF_STANDBY has changed, indicate that the interface has changed
14407c478bd9Sstevel@tonic-gate 	 * types.
14417c478bd9Sstevel@tonic-gate 	 */
14427c478bd9Sstevel@tonic-gate 	if ((old_flags ^ pii->pii_flags) & IFF_STANDBY)
14437c478bd9Sstevel@tonic-gate 		phyint_newtype(pi);
14447c478bd9Sstevel@tonic-gate 
14457c478bd9Sstevel@tonic-gate 	/*
14467c478bd9Sstevel@tonic-gate 	 * If IFF_INACTIVE has been set, then no data addresses should be
14477c478bd9Sstevel@tonic-gate 	 * hosted on the interface.  If IFF_INACTIVE has been cleared, then
14487c478bd9Sstevel@tonic-gate 	 * move previously failed-over addresses back to it, provided it is
14497c478bd9Sstevel@tonic-gate 	 * not failed.	For details, see the state diagram in mpd_probe.c.
14507c478bd9Sstevel@tonic-gate 	 */
14517c478bd9Sstevel@tonic-gate 	if ((old_flags ^ pii->pii_flags) & IFF_INACTIVE) {
14527c478bd9Sstevel@tonic-gate 		if (pii->pii_flags & IFF_INACTIVE) {
145349df4566Sethindra 			if (!pi->pi_empty && (pi->pi_flags & IFF_STANDBY))
14547c478bd9Sstevel@tonic-gate 				(void) try_failover(pi, FAILOVER_TO_NONSTANDBY);
14557c478bd9Sstevel@tonic-gate 		} else {
14567c478bd9Sstevel@tonic-gate 			if (pi->pi_state == PI_RUNNING && !pi->pi_full) {
14577c478bd9Sstevel@tonic-gate 				pi->pi_empty = 0;
14587c478bd9Sstevel@tonic-gate 				(void) try_failback(pi, _B_FALSE);
14597c478bd9Sstevel@tonic-gate 			}
14607c478bd9Sstevel@tonic-gate 		}
14617c478bd9Sstevel@tonic-gate 	}
14627c478bd9Sstevel@tonic-gate 
14637c478bd9Sstevel@tonic-gate 	/* Has just the IFF_RUNNING flag changed state ? */
14647c478bd9Sstevel@tonic-gate 	if ((old_flags ^ pii->pii_flags) != IFF_RUNNING) {
14657c478bd9Sstevel@tonic-gate 		struct phyint_instance *pii_other;
14667c478bd9Sstevel@tonic-gate 		/*
14677c478bd9Sstevel@tonic-gate 		 * It wasn't just a link state change.	Update
14687c478bd9Sstevel@tonic-gate 		 * the other instance's copy of the flags.
14697c478bd9Sstevel@tonic-gate 		 */
14707c478bd9Sstevel@tonic-gate 		pii_other = phyint_inst_other(pii);
14717c478bd9Sstevel@tonic-gate 		if (pii_other != NULL)
14727c478bd9Sstevel@tonic-gate 			pii_other->pii_flags = pii->pii_flags;
14737c478bd9Sstevel@tonic-gate 		return (_B_TRUE);
14747c478bd9Sstevel@tonic-gate 	}
14757c478bd9Sstevel@tonic-gate 
14767c478bd9Sstevel@tonic-gate 	return (_B_FALSE);
14777c478bd9Sstevel@tonic-gate }
14787c478bd9Sstevel@tonic-gate 
14797c478bd9Sstevel@tonic-gate /*
14807c478bd9Sstevel@tonic-gate  * Retrieve as many routing socket messages as possible, and try to
14817c478bd9Sstevel@tonic-gate  * empty the routing sockets. Initiate full scan of targets or interfaces
14827c478bd9Sstevel@tonic-gate  * as needed.
14837c478bd9Sstevel@tonic-gate  * We listen on separate IPv4 an IPv6 sockets so that we can accurately
14847c478bd9Sstevel@tonic-gate  * detect changes in certain flags (see "process_rtm_ifinfo()" above).
14857c478bd9Sstevel@tonic-gate  */
14867c478bd9Sstevel@tonic-gate static void
14877c478bd9Sstevel@tonic-gate process_rtsock(int rtsock_v4, int rtsock_v6)
14887c478bd9Sstevel@tonic-gate {
14897c478bd9Sstevel@tonic-gate 	int	nbytes;
14907c478bd9Sstevel@tonic-gate 	int64_t msg[2048 / 8];
14917c478bd9Sstevel@tonic-gate 	struct rt_msghdr *rtm;
14927c478bd9Sstevel@tonic-gate 	boolean_t need_if_scan = _B_FALSE;
14937c478bd9Sstevel@tonic-gate 	boolean_t need_rt_scan = _B_FALSE;
14947c478bd9Sstevel@tonic-gate 	boolean_t rtm_ifinfo_seen = _B_FALSE;
14957c478bd9Sstevel@tonic-gate 	int type;
14967c478bd9Sstevel@tonic-gate 
14977c478bd9Sstevel@tonic-gate 	/* Read as many messages as possible and try to empty the sockets */
14987c478bd9Sstevel@tonic-gate 	for (type = AF_INET; ; type = AF_INET6) {
14997c478bd9Sstevel@tonic-gate 		for (;;) {
15007c478bd9Sstevel@tonic-gate 			nbytes = read((type == AF_INET) ? rtsock_v4 :
15017c478bd9Sstevel@tonic-gate 				rtsock_v6, msg, sizeof (msg));
15027c478bd9Sstevel@tonic-gate 			if (nbytes <= 0) {
15037c478bd9Sstevel@tonic-gate 				/* No more messages */
15047c478bd9Sstevel@tonic-gate 				break;
15057c478bd9Sstevel@tonic-gate 			}
15067c478bd9Sstevel@tonic-gate 			rtm = (struct rt_msghdr *)msg;
15077c478bd9Sstevel@tonic-gate 			if (rtm->rtm_version != RTM_VERSION) {
15087c478bd9Sstevel@tonic-gate 				logerr("process_rtsock: version %d "
15097c478bd9Sstevel@tonic-gate 				    "not understood\n", rtm->rtm_version);
15107c478bd9Sstevel@tonic-gate 				break;
15117c478bd9Sstevel@tonic-gate 			}
15127c478bd9Sstevel@tonic-gate 
15137c478bd9Sstevel@tonic-gate 			if (debug & D_PHYINT) {
15147c478bd9Sstevel@tonic-gate 				logdebug("process_rtsock: message %d\n",
15157c478bd9Sstevel@tonic-gate 				    rtm->rtm_type);
15167c478bd9Sstevel@tonic-gate 			}
15177c478bd9Sstevel@tonic-gate 
15187c478bd9Sstevel@tonic-gate 			switch (rtm->rtm_type) {
15197c478bd9Sstevel@tonic-gate 			case RTM_NEWADDR:
15207c478bd9Sstevel@tonic-gate 			case RTM_DELADDR:
15217c478bd9Sstevel@tonic-gate 				/*
15227c478bd9Sstevel@tonic-gate 				 * Some logical interface has changed,
15237c478bd9Sstevel@tonic-gate 				 * have to scan everything to determine
15247c478bd9Sstevel@tonic-gate 				 * what actually changed.
15257c478bd9Sstevel@tonic-gate 				 */
15267c478bd9Sstevel@tonic-gate 				need_if_scan = _B_TRUE;
15277c478bd9Sstevel@tonic-gate 				break;
15287c478bd9Sstevel@tonic-gate 
15297c478bd9Sstevel@tonic-gate 			case RTM_IFINFO:
15307c478bd9Sstevel@tonic-gate 				rtm_ifinfo_seen = _B_TRUE;
15317c478bd9Sstevel@tonic-gate 				need_if_scan |=
15327c478bd9Sstevel@tonic-gate 					process_rtm_ifinfo((if_msghdr_t *)rtm,
15337c478bd9Sstevel@tonic-gate 					type);
15347c478bd9Sstevel@tonic-gate 				break;
15357c478bd9Sstevel@tonic-gate 
15367c478bd9Sstevel@tonic-gate 			case RTM_ADD:
15377c478bd9Sstevel@tonic-gate 			case RTM_DELETE:
15387c478bd9Sstevel@tonic-gate 			case RTM_CHANGE:
15397c478bd9Sstevel@tonic-gate 			case RTM_OLDADD:
15407c478bd9Sstevel@tonic-gate 			case RTM_OLDDEL:
15417c478bd9Sstevel@tonic-gate 				need_rt_scan = _B_TRUE;
15427c478bd9Sstevel@tonic-gate 				break;
15437c478bd9Sstevel@tonic-gate 
15447c478bd9Sstevel@tonic-gate 			default:
15457c478bd9Sstevel@tonic-gate 				/* Not interesting */
15467c478bd9Sstevel@tonic-gate 				break;
15477c478bd9Sstevel@tonic-gate 			}
15487c478bd9Sstevel@tonic-gate 		}
15497c478bd9Sstevel@tonic-gate 		if (type == AF_INET6)
15507c478bd9Sstevel@tonic-gate 			break;
15517c478bd9Sstevel@tonic-gate 	}
15527c478bd9Sstevel@tonic-gate 
15537c478bd9Sstevel@tonic-gate 	if (need_if_scan) {
15547c478bd9Sstevel@tonic-gate 		if (debug & D_LINKNOTE && rtm_ifinfo_seen)
15557c478bd9Sstevel@tonic-gate 			logdebug("process_rtsock: synchronizing with kernel\n");
15567c478bd9Sstevel@tonic-gate 		initifs();
15577c478bd9Sstevel@tonic-gate 	} else if (rtm_ifinfo_seen) {
15587c478bd9Sstevel@tonic-gate 		if (debug & D_LINKNOTE)
15597c478bd9Sstevel@tonic-gate 			logdebug("process_rtsock: "
15607c478bd9Sstevel@tonic-gate 			    "link up/down notification(s) seen\n");
15617c478bd9Sstevel@tonic-gate 		process_link_state_changes();
15627c478bd9Sstevel@tonic-gate 	}
15637c478bd9Sstevel@tonic-gate 
15647c478bd9Sstevel@tonic-gate 	if (need_rt_scan)
15657c478bd9Sstevel@tonic-gate 		init_router_targets();
15667c478bd9Sstevel@tonic-gate }
15677c478bd9Sstevel@tonic-gate 
15687c478bd9Sstevel@tonic-gate /*
15697c478bd9Sstevel@tonic-gate  * Look if the phyint instance or one of its logints have been removed from
15707c478bd9Sstevel@tonic-gate  * the kernel and take appropriate action.
15717c478bd9Sstevel@tonic-gate  * Uses {pii,li}_in_use.
15727c478bd9Sstevel@tonic-gate  */
15737c478bd9Sstevel@tonic-gate static void
15747c478bd9Sstevel@tonic-gate check_if_removed(struct phyint_instance *pii)
15757c478bd9Sstevel@tonic-gate {
15767c478bd9Sstevel@tonic-gate 	struct logint *li;
15777c478bd9Sstevel@tonic-gate 	struct logint *next_li;
15787c478bd9Sstevel@tonic-gate 
15797c478bd9Sstevel@tonic-gate 	/* Detect phyints that have been removed from the kernel. */
15807c478bd9Sstevel@tonic-gate 	if (!pii->pii_in_use) {
15817c478bd9Sstevel@tonic-gate 		logtrace("%s %s has been removed from kernel\n",
15827c478bd9Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_phyint->pi_name);
15837c478bd9Sstevel@tonic-gate 		phyint_inst_delete(pii);
15847c478bd9Sstevel@tonic-gate 	} else {
15857c478bd9Sstevel@tonic-gate 		/* Detect logints that have been removed. */
15867c478bd9Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = next_li) {
15877c478bd9Sstevel@tonic-gate 			next_li = li->li_next;
15887c478bd9Sstevel@tonic-gate 			if (!li->li_in_use) {
15897c478bd9Sstevel@tonic-gate 				logint_delete(li);
15907c478bd9Sstevel@tonic-gate 			}
15917c478bd9Sstevel@tonic-gate 		}
15927c478bd9Sstevel@tonic-gate 	}
15937c478bd9Sstevel@tonic-gate }
15947c478bd9Sstevel@tonic-gate 
15957c478bd9Sstevel@tonic-gate /*
15967c478bd9Sstevel@tonic-gate  * Send down a T_OPTMGMT_REQ to ip asking for all data in the various
15977c478bd9Sstevel@tonic-gate  * tables defined by mib2.h. Parse the returned data and extract
15987c478bd9Sstevel@tonic-gate  * the 'routing' information table. Process the 'routing' table
15997c478bd9Sstevel@tonic-gate  * to get the list of known onlink routers, and update our database.
16007c478bd9Sstevel@tonic-gate  * These onlink routers will serve as our probe targets.
16017c478bd9Sstevel@tonic-gate  * Returns false, if any system calls resulted in errors, true otherwise.
16027c478bd9Sstevel@tonic-gate  */
16037c478bd9Sstevel@tonic-gate static boolean_t
16047c478bd9Sstevel@tonic-gate update_router_list(int fd)
16057c478bd9Sstevel@tonic-gate {
16067c478bd9Sstevel@tonic-gate 	union {
16077c478bd9Sstevel@tonic-gate 		char	ubuf[1024];
16087c478bd9Sstevel@tonic-gate 		union T_primitives uprim;
16097c478bd9Sstevel@tonic-gate 	} buf;
16107c478bd9Sstevel@tonic-gate 
16117c478bd9Sstevel@tonic-gate 	int			flags;
16127c478bd9Sstevel@tonic-gate 	struct strbuf		ctlbuf;
16137c478bd9Sstevel@tonic-gate 	struct strbuf		databuf;
16147c478bd9Sstevel@tonic-gate 	struct T_optmgmt_req	*tor;
16157c478bd9Sstevel@tonic-gate 	struct T_optmgmt_ack	*toa;
16167c478bd9Sstevel@tonic-gate 	struct T_error_ack	*tea;
16177c478bd9Sstevel@tonic-gate 	struct opthdr		*optp;
16187c478bd9Sstevel@tonic-gate 	struct opthdr		*req;
16197c478bd9Sstevel@tonic-gate 	int			status;
16207c478bd9Sstevel@tonic-gate 	t_scalar_t		prim;
16217c478bd9Sstevel@tonic-gate 
16227c478bd9Sstevel@tonic-gate 	tor = (struct T_optmgmt_req *)&buf;
16237c478bd9Sstevel@tonic-gate 
16247c478bd9Sstevel@tonic-gate 	tor->PRIM_type = T_SVR4_OPTMGMT_REQ;
16257c478bd9Sstevel@tonic-gate 	tor->OPT_offset = sizeof (struct T_optmgmt_req);
16267c478bd9Sstevel@tonic-gate 	tor->OPT_length = sizeof (struct opthdr);
16277c478bd9Sstevel@tonic-gate 	tor->MGMT_flags = T_CURRENT;
16287c478bd9Sstevel@tonic-gate 
16297c478bd9Sstevel@tonic-gate 	req = (struct opthdr *)&tor[1];
16307c478bd9Sstevel@tonic-gate 	req->level = MIB2_IP;	/* any MIB2_xxx value ok here */
16317c478bd9Sstevel@tonic-gate 	req->name  = 0;
16327c478bd9Sstevel@tonic-gate 	req->len   = 0;
16337c478bd9Sstevel@tonic-gate 
16347c478bd9Sstevel@tonic-gate 	ctlbuf.buf = (char *)&buf;
16357c478bd9Sstevel@tonic-gate 	ctlbuf.len = tor->OPT_length + tor->OPT_offset;
16367c478bd9Sstevel@tonic-gate 	ctlbuf.maxlen = sizeof (buf);
16377c478bd9Sstevel@tonic-gate 	flags = 0;
16387c478bd9Sstevel@tonic-gate 	if (putmsg(fd, &ctlbuf, NULL, flags) == -1) {
16397c478bd9Sstevel@tonic-gate 		logperror("update_router_list: putmsg(ctl)");
16407c478bd9Sstevel@tonic-gate 		return (_B_FALSE);
16417c478bd9Sstevel@tonic-gate 	}
16427c478bd9Sstevel@tonic-gate 
16437c478bd9Sstevel@tonic-gate 	/*
16447c478bd9Sstevel@tonic-gate 	 * The response consists of multiple T_OPTMGMT_ACK msgs, 1 msg for
16457c478bd9Sstevel@tonic-gate 	 * each table defined in mib2.h.  Each T_OPTMGMT_ACK msg contains
16467c478bd9Sstevel@tonic-gate 	 * a control and data part. The control part contains a struct
16477c478bd9Sstevel@tonic-gate 	 * T_optmgmt_ack followed by a struct opthdr. The 'opthdr' identifies
16487c478bd9Sstevel@tonic-gate 	 * the level, name and length of the data in the data part. The
16497c478bd9Sstevel@tonic-gate 	 * data part contains the actual table data. The last message
16507c478bd9Sstevel@tonic-gate 	 * is an end-of-data (EOD), consisting of a T_OPTMGMT_ACK and a
16517c478bd9Sstevel@tonic-gate 	 * single option with zero optlen.
16527c478bd9Sstevel@tonic-gate 	 */
16537c478bd9Sstevel@tonic-gate 
16547c478bd9Sstevel@tonic-gate 	for (;;) {
16557c478bd9Sstevel@tonic-gate 		/*
16567c478bd9Sstevel@tonic-gate 		 * Go around this loop once for each table. Ignore
16577c478bd9Sstevel@tonic-gate 		 * all tables except the routing information table.
16587c478bd9Sstevel@tonic-gate 		 */
16597c478bd9Sstevel@tonic-gate 		flags = 0;
16607c478bd9Sstevel@tonic-gate 		status = getmsg(fd, &ctlbuf, NULL, &flags);
16617c478bd9Sstevel@tonic-gate 		if (status < 0) {
16627c478bd9Sstevel@tonic-gate 			if (errno == EINTR)
16637c478bd9Sstevel@tonic-gate 				continue;
16647c478bd9Sstevel@tonic-gate 			logperror("update_router_list: getmsg(ctl)");
16657c478bd9Sstevel@tonic-gate 			return (_B_FALSE);
16667c478bd9Sstevel@tonic-gate 		}
16677c478bd9Sstevel@tonic-gate 		if (ctlbuf.len < sizeof (t_scalar_t)) {
16687c478bd9Sstevel@tonic-gate 			logerr("update_router_list: ctlbuf.len %d\n",
16697c478bd9Sstevel@tonic-gate 			    ctlbuf.len);
16707c478bd9Sstevel@tonic-gate 			return (_B_FALSE);
16717c478bd9Sstevel@tonic-gate 		}
16727c478bd9Sstevel@tonic-gate 
16737c478bd9Sstevel@tonic-gate 		prim = buf.uprim.type;
16747c478bd9Sstevel@tonic-gate 
16757c478bd9Sstevel@tonic-gate 		switch (prim) {
16767c478bd9Sstevel@tonic-gate 
16777c478bd9Sstevel@tonic-gate 		case T_ERROR_ACK:
16787c478bd9Sstevel@tonic-gate 			tea = &buf.uprim.error_ack;
16797c478bd9Sstevel@tonic-gate 			if (ctlbuf.len < sizeof (struct T_error_ack)) {
16807c478bd9Sstevel@tonic-gate 				logerr("update_router_list: T_ERROR_ACK"
16817c478bd9Sstevel@tonic-gate 				    " ctlbuf.len %d\n", ctlbuf.len);
16827c478bd9Sstevel@tonic-gate 				return (_B_FALSE);
16837c478bd9Sstevel@tonic-gate 			}
16847c478bd9Sstevel@tonic-gate 			logerr("update_router_list: T_ERROR_ACK:"
16857c478bd9Sstevel@tonic-gate 			    " TLI_error = 0x%lx, UNIX_error = 0x%lx\n",
16867c478bd9Sstevel@tonic-gate 			    tea->TLI_error, tea->UNIX_error);
16877c478bd9Sstevel@tonic-gate 			return (_B_FALSE);
16887c478bd9Sstevel@tonic-gate 
16897c478bd9Sstevel@tonic-gate 		case T_OPTMGMT_ACK:
16907c478bd9Sstevel@tonic-gate 			toa = &buf.uprim.optmgmt_ack;
16917c478bd9Sstevel@tonic-gate 			optp = (struct opthdr *)&toa[1];
16927c478bd9Sstevel@tonic-gate 			if (ctlbuf.len < sizeof (struct T_optmgmt_ack)) {
16937c478bd9Sstevel@tonic-gate 				logerr("update_router_list: ctlbuf.len %d\n",
16947c478bd9Sstevel@tonic-gate 				    ctlbuf.len);
16957c478bd9Sstevel@tonic-gate 				return (_B_FALSE);
16967c478bd9Sstevel@tonic-gate 			}
16977c478bd9Sstevel@tonic-gate 			if (toa->MGMT_flags != T_SUCCESS) {
16987c478bd9Sstevel@tonic-gate 				logerr("update_router_list: MGMT_flags 0x%lx\n",
16997c478bd9Sstevel@tonic-gate 				    toa->MGMT_flags);
17007c478bd9Sstevel@tonic-gate 				return (_B_FALSE);
17017c478bd9Sstevel@tonic-gate 			}
17027c478bd9Sstevel@tonic-gate 			break;
17037c478bd9Sstevel@tonic-gate 
17047c478bd9Sstevel@tonic-gate 		default:
17057c478bd9Sstevel@tonic-gate 			logerr("update_router_list: unknown primitive %ld\n",
17067c478bd9Sstevel@tonic-gate 			    prim);
17077c478bd9Sstevel@tonic-gate 			return (_B_FALSE);
17087c478bd9Sstevel@tonic-gate 		}
17097c478bd9Sstevel@tonic-gate 
17107c478bd9Sstevel@tonic-gate 		/* Process the T_OPGMGMT_ACK below */
17117c478bd9Sstevel@tonic-gate 		assert(prim == T_OPTMGMT_ACK);
17127c478bd9Sstevel@tonic-gate 
17137c478bd9Sstevel@tonic-gate 		switch (status) {
17147c478bd9Sstevel@tonic-gate 		case 0:
17157c478bd9Sstevel@tonic-gate 			/*
17167c478bd9Sstevel@tonic-gate 			 * We have reached the end of this T_OPTMGMT_ACK
17177c478bd9Sstevel@tonic-gate 			 * message. If this is the last message i.e EOD,
17187c478bd9Sstevel@tonic-gate 			 * return, else process the next T_OPTMGMT_ACK msg.
17197c478bd9Sstevel@tonic-gate 			 */
17207c478bd9Sstevel@tonic-gate 			if ((ctlbuf.len == sizeof (struct T_optmgmt_ack) +
17217c478bd9Sstevel@tonic-gate 			    sizeof (struct opthdr)) && optp->len == 0 &&
17227c478bd9Sstevel@tonic-gate 			    optp->name == 0 && optp->level == 0) {
17237c478bd9Sstevel@tonic-gate 				/*
17247c478bd9Sstevel@tonic-gate 				 * This is the EOD message. Return
17257c478bd9Sstevel@tonic-gate 				 */
17267c478bd9Sstevel@tonic-gate 				return (_B_TRUE);
17277c478bd9Sstevel@tonic-gate 			}
17287c478bd9Sstevel@tonic-gate 			continue;
17297c478bd9Sstevel@tonic-gate 
17307c478bd9Sstevel@tonic-gate 		case MORECTL:
17317c478bd9Sstevel@tonic-gate 		case MORECTL | MOREDATA:
17327c478bd9Sstevel@tonic-gate 			/*
17337c478bd9Sstevel@tonic-gate 			 * This should not happen. We should be able to read
17347c478bd9Sstevel@tonic-gate 			 * the control portion in a single getmsg.
17357c478bd9Sstevel@tonic-gate 			 */
17367c478bd9Sstevel@tonic-gate 			logerr("update_router_list: MORECTL\n");
17377c478bd9Sstevel@tonic-gate 			return (_B_FALSE);
17387c478bd9Sstevel@tonic-gate 
17397c478bd9Sstevel@tonic-gate 		case MOREDATA:
17407c478bd9Sstevel@tonic-gate 			databuf.maxlen = optp->len;
17417c478bd9Sstevel@tonic-gate 			/* malloc of 0 bytes is ok */
17427c478bd9Sstevel@tonic-gate 			databuf.buf = malloc((size_t)optp->len);
17437c478bd9Sstevel@tonic-gate 			if (databuf.maxlen != 0 && databuf.buf == NULL) {
17447c478bd9Sstevel@tonic-gate 				logperror("update_router_list: malloc");
17457c478bd9Sstevel@tonic-gate 				return (_B_FALSE);
17467c478bd9Sstevel@tonic-gate 			}
17477c478bd9Sstevel@tonic-gate 			databuf.len = 0;
17487c478bd9Sstevel@tonic-gate 			flags = 0;
17497c478bd9Sstevel@tonic-gate 			for (;;) {
17507c478bd9Sstevel@tonic-gate 				status = getmsg(fd, NULL, &databuf, &flags);
17517c478bd9Sstevel@tonic-gate 				if (status >= 0) {
17527c478bd9Sstevel@tonic-gate 					break;
17537c478bd9Sstevel@tonic-gate 				} else if (errno == EINTR) {
17547c478bd9Sstevel@tonic-gate 					continue;
17557c478bd9Sstevel@tonic-gate 				} else {
17567c478bd9Sstevel@tonic-gate 					logperror("update_router_list:"
17577c478bd9Sstevel@tonic-gate 					    " getmsg(data)");
17587c478bd9Sstevel@tonic-gate 					free(databuf.buf);
17597c478bd9Sstevel@tonic-gate 					return (_B_FALSE);
17607c478bd9Sstevel@tonic-gate 				}
17617c478bd9Sstevel@tonic-gate 			}
17627c478bd9Sstevel@tonic-gate 
17637c478bd9Sstevel@tonic-gate 			if (optp->level == MIB2_IP &&
17647c478bd9Sstevel@tonic-gate 			    optp->name == MIB2_IP_ROUTE) {
17657c478bd9Sstevel@tonic-gate 				/* LINTED */
17667c478bd9Sstevel@tonic-gate 				ire_process_v4((mib2_ipRouteEntry_t *)
17677c478bd9Sstevel@tonic-gate 				    databuf.buf, databuf.len);
17687c478bd9Sstevel@tonic-gate 			} else if (optp->level == MIB2_IP6 &&
17697c478bd9Sstevel@tonic-gate 			    optp->name == MIB2_IP6_ROUTE) {
17707c478bd9Sstevel@tonic-gate 				/* LINTED */
17717c478bd9Sstevel@tonic-gate 				ire_process_v6((mib2_ipv6RouteEntry_t *)
17727c478bd9Sstevel@tonic-gate 				    databuf.buf, databuf.len);
17737c478bd9Sstevel@tonic-gate 			}
17747c478bd9Sstevel@tonic-gate 			free(databuf.buf);
17757c478bd9Sstevel@tonic-gate 		}
17767c478bd9Sstevel@tonic-gate 	}
17777c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
17787c478bd9Sstevel@tonic-gate }
17797c478bd9Sstevel@tonic-gate 
17807c478bd9Sstevel@tonic-gate /*
17817c478bd9Sstevel@tonic-gate  * Examine the IPv4 routing table, for default routers. For each default
17827c478bd9Sstevel@tonic-gate  * router, populate the list of targets of each phyint that is on the same
17837c478bd9Sstevel@tonic-gate  * link as the default router
17847c478bd9Sstevel@tonic-gate  */
17857c478bd9Sstevel@tonic-gate static void
17867c478bd9Sstevel@tonic-gate ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len)
17877c478bd9Sstevel@tonic-gate {
17887c478bd9Sstevel@tonic-gate 	mib2_ipRouteEntry_t	*rp;
17897c478bd9Sstevel@tonic-gate 	mib2_ipRouteEntry_t	*rp1;
17907c478bd9Sstevel@tonic-gate 	struct	in_addr		nexthop_v4;
17917c478bd9Sstevel@tonic-gate 	mib2_ipRouteEntry_t	*endp;
17927c478bd9Sstevel@tonic-gate 
17937c478bd9Sstevel@tonic-gate 	if (len == 0)
17947c478bd9Sstevel@tonic-gate 		return;
17957c478bd9Sstevel@tonic-gate 	assert((len % sizeof (mib2_ipRouteEntry_t)) == 0);
17967c478bd9Sstevel@tonic-gate 
17977c478bd9Sstevel@tonic-gate 	endp = buf + (len / sizeof (mib2_ipRouteEntry_t));
17987c478bd9Sstevel@tonic-gate 
17997c478bd9Sstevel@tonic-gate 	/*
18007c478bd9Sstevel@tonic-gate 	 * Loop thru the routing table entries. Process any IRE_DEFAULT,
18017c478bd9Sstevel@tonic-gate 	 * IRE_PREFIX, IRE_HOST, IRE_HOST_REDIRECT ire. Ignore the others.
18027c478bd9Sstevel@tonic-gate 	 * For each such IRE_OFFSUBNET ire, get the nexthop gateway address.
18037c478bd9Sstevel@tonic-gate 	 * This is a potential target for probing, which we try to add
18047c478bd9Sstevel@tonic-gate 	 * to the list of probe targets.
18057c478bd9Sstevel@tonic-gate 	 */
18067c478bd9Sstevel@tonic-gate 	for (rp = buf; rp < endp; rp++) {
18077c478bd9Sstevel@tonic-gate 		if (!(rp->ipRouteInfo.re_ire_type & IRE_OFFSUBNET))
18087c478bd9Sstevel@tonic-gate 			continue;
18097c478bd9Sstevel@tonic-gate 
18107c478bd9Sstevel@tonic-gate 		/*  Get the nexthop address. */
18117c478bd9Sstevel@tonic-gate 		nexthop_v4.s_addr = rp->ipRouteNextHop;
18127c478bd9Sstevel@tonic-gate 
18137c478bd9Sstevel@tonic-gate 		/*
18147c478bd9Sstevel@tonic-gate 		 * Get the nexthop address. Then determine the outgoing
18157c478bd9Sstevel@tonic-gate 		 * interface, by examining all interface IREs, and picking the
18167c478bd9Sstevel@tonic-gate 		 * match. We don't look at the interface specified in the route
18177c478bd9Sstevel@tonic-gate 		 * because we need to add the router target on all matching
18187c478bd9Sstevel@tonic-gate 		 * interfaces anyway; the goal is to avoid falling back to
18197c478bd9Sstevel@tonic-gate 		 * multicast when some interfaces are in the same subnet but
18207c478bd9Sstevel@tonic-gate 		 * not in the same group.
18217c478bd9Sstevel@tonic-gate 		 */
18227c478bd9Sstevel@tonic-gate 		for (rp1 = buf; rp1 < endp; rp1++) {
18237c478bd9Sstevel@tonic-gate 			if (!(rp1->ipRouteInfo.re_ire_type & IRE_INTERFACE)) {
18247c478bd9Sstevel@tonic-gate 				continue;
18257c478bd9Sstevel@tonic-gate 			}
18267c478bd9Sstevel@tonic-gate 
18277c478bd9Sstevel@tonic-gate 			/*
18287c478bd9Sstevel@tonic-gate 			 * Determine the interface IRE that matches the nexthop.
18297c478bd9Sstevel@tonic-gate 			 * i.e.	 (IRE addr & IRE mask) == (nexthop & IRE mask)
18307c478bd9Sstevel@tonic-gate 			 */
18317c478bd9Sstevel@tonic-gate 			if ((rp1->ipRouteDest & rp1->ipRouteMask) ==
18327c478bd9Sstevel@tonic-gate 			    (nexthop_v4.s_addr & rp1->ipRouteMask)) {
18337c478bd9Sstevel@tonic-gate 				/*
18347c478bd9Sstevel@tonic-gate 				 * We found the interface ire
18357c478bd9Sstevel@tonic-gate 				 */
18367c478bd9Sstevel@tonic-gate 				router_add_v4(rp1, nexthop_v4);
18377c478bd9Sstevel@tonic-gate 			}
18387c478bd9Sstevel@tonic-gate 		}
18397c478bd9Sstevel@tonic-gate 	}
18407c478bd9Sstevel@tonic-gate }
18417c478bd9Sstevel@tonic-gate 
18427c478bd9Sstevel@tonic-gate void
18437c478bd9Sstevel@tonic-gate router_add_v4(mib2_ipRouteEntry_t *rp1, struct in_addr nexthop_v4)
18447c478bd9Sstevel@tonic-gate {
18457c478bd9Sstevel@tonic-gate 	char *cp;
18467c478bd9Sstevel@tonic-gate 	char ifname[LIFNAMSIZ + 1];
18477c478bd9Sstevel@tonic-gate 	struct in6_addr	nexthop;
18487c478bd9Sstevel@tonic-gate 	int len;
18497c478bd9Sstevel@tonic-gate 
18507c478bd9Sstevel@tonic-gate 	if (debug & D_TARGET)
18517c478bd9Sstevel@tonic-gate 		logdebug("router_add_v4()\n");
18527c478bd9Sstevel@tonic-gate 
18537c478bd9Sstevel@tonic-gate 	len = MIN(rp1->ipRouteIfIndex.o_length, sizeof (ifname) - 1);
18547c478bd9Sstevel@tonic-gate 	(void) memcpy(ifname, rp1->ipRouteIfIndex.o_bytes, len);
18557c478bd9Sstevel@tonic-gate 	ifname[len] = '\0';
18567c478bd9Sstevel@tonic-gate 
18577c478bd9Sstevel@tonic-gate 	if (ifname[0] == '\0')
18587c478bd9Sstevel@tonic-gate 		return;
18597c478bd9Sstevel@tonic-gate 
18607c478bd9Sstevel@tonic-gate 	cp = strchr(ifname, IF_SEPARATOR);
18617c478bd9Sstevel@tonic-gate 	if (cp != NULL)
18627c478bd9Sstevel@tonic-gate 		*cp = '\0';
18637c478bd9Sstevel@tonic-gate 
18647c478bd9Sstevel@tonic-gate 	IN6_INADDR_TO_V4MAPPED(&nexthop_v4, &nexthop);
18657c478bd9Sstevel@tonic-gate 	router_add_common(AF_INET, ifname, nexthop);
18667c478bd9Sstevel@tonic-gate }
18677c478bd9Sstevel@tonic-gate 
18687c478bd9Sstevel@tonic-gate void
18697c478bd9Sstevel@tonic-gate router_add_common(int af, char *ifname, struct in6_addr nexthop)
18707c478bd9Sstevel@tonic-gate {
18717c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
18727c478bd9Sstevel@tonic-gate 	struct phyint *pi;
18737c478bd9Sstevel@tonic-gate 
18747c478bd9Sstevel@tonic-gate 	if (debug & D_TARGET)
18757c478bd9Sstevel@tonic-gate 		logdebug("router_add_common(%s %s)\n", AF_STR(af), ifname);
18767c478bd9Sstevel@tonic-gate 
18777c478bd9Sstevel@tonic-gate 	/*
18787c478bd9Sstevel@tonic-gate 	 * Retrieve the phyint instance; bail if it's not known to us yet.
18797c478bd9Sstevel@tonic-gate 	 */
18807c478bd9Sstevel@tonic-gate 	pii = phyint_inst_lookup(af, ifname);
18817c478bd9Sstevel@tonic-gate 	if (pii == NULL)
18827c478bd9Sstevel@tonic-gate 		return;
18837c478bd9Sstevel@tonic-gate 
18847c478bd9Sstevel@tonic-gate 	/*
18857c478bd9Sstevel@tonic-gate 	 * Don't use our own addresses as targets.
18867c478bd9Sstevel@tonic-gate 	 */
1887*87e66ffcSrk 	if (own_address(nexthop))
18887c478bd9Sstevel@tonic-gate 		return;
18897c478bd9Sstevel@tonic-gate 
18907c478bd9Sstevel@tonic-gate 	/*
18917c478bd9Sstevel@tonic-gate 	 * If the phyint is part a named group, then add the address to all
18927c478bd9Sstevel@tonic-gate 	 * members of the group; note that this is suboptimal in the IPv4 case
18937c478bd9Sstevel@tonic-gate 	 * as it has already been added to all matching interfaces in
18947c478bd9Sstevel@tonic-gate 	 * ire_process_v4(). Otherwise, add the address only to the phyint
18957c478bd9Sstevel@tonic-gate 	 * itself, since other phyints in the anongroup may not be on the same
18967c478bd9Sstevel@tonic-gate 	 * subnet.
18977c478bd9Sstevel@tonic-gate 	 */
18987c478bd9Sstevel@tonic-gate 	pi = pii->pii_phyint;
18997c478bd9Sstevel@tonic-gate 	if (pi->pi_group == phyint_anongroup) {
19007c478bd9Sstevel@tonic-gate 		target_add(pii, nexthop, _B_TRUE);
19017c478bd9Sstevel@tonic-gate 	} else {
19027c478bd9Sstevel@tonic-gate 		pi = pi->pi_group->pg_phyint;
19037c478bd9Sstevel@tonic-gate 		for (; pi != NULL; pi = pi->pi_pgnext)
19047c478bd9Sstevel@tonic-gate 			target_add(PHYINT_INSTANCE(pi, af), nexthop, _B_TRUE);
19057c478bd9Sstevel@tonic-gate 	}
19067c478bd9Sstevel@tonic-gate }
19077c478bd9Sstevel@tonic-gate 
19087c478bd9Sstevel@tonic-gate /*
19097c478bd9Sstevel@tonic-gate  * Examine the IPv6 routing table, for default routers. For each default
19107c478bd9Sstevel@tonic-gate  * router, populate the list of targets of each phyint that is on the same
19117c478bd9Sstevel@tonic-gate  * link as the default router
19127c478bd9Sstevel@tonic-gate  */
19137c478bd9Sstevel@tonic-gate static void
19147c478bd9Sstevel@tonic-gate ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len)
19157c478bd9Sstevel@tonic-gate {
19167c478bd9Sstevel@tonic-gate 	mib2_ipv6RouteEntry_t	*rp;
19177c478bd9Sstevel@tonic-gate 	mib2_ipv6RouteEntry_t	*endp;
19187c478bd9Sstevel@tonic-gate 	struct	in6_addr nexthop_v6;
19197c478bd9Sstevel@tonic-gate 
19207c478bd9Sstevel@tonic-gate 	if (debug & D_TARGET)
19217c478bd9Sstevel@tonic-gate 		logdebug("ire_process_v6(len %d)\n", len);
19227c478bd9Sstevel@tonic-gate 
19237c478bd9Sstevel@tonic-gate 	if (len == 0)
19247c478bd9Sstevel@tonic-gate 		return;
19257c478bd9Sstevel@tonic-gate 
19267c478bd9Sstevel@tonic-gate 	assert((len % sizeof (mib2_ipv6RouteEntry_t)) == 0);
19277c478bd9Sstevel@tonic-gate 	endp = buf + (len / sizeof (mib2_ipv6RouteEntry_t));
19287c478bd9Sstevel@tonic-gate 
19297c478bd9Sstevel@tonic-gate 	/*
19307c478bd9Sstevel@tonic-gate 	 * Loop thru the routing table entries. Process any IRE_DEFAULT,
19317c478bd9Sstevel@tonic-gate 	 * IRE_PREFIX, IRE_HOST, IRE_HOST_REDIRECT ire. Ignore the others.
19327c478bd9Sstevel@tonic-gate 	 * For each such IRE_OFFSUBNET ire, get the nexthop gateway address.
19337c478bd9Sstevel@tonic-gate 	 * This is a potential target for probing, which we try to add
19347c478bd9Sstevel@tonic-gate 	 * to the list of probe targets.
19357c478bd9Sstevel@tonic-gate 	 */
19367c478bd9Sstevel@tonic-gate 	for (rp = buf; rp < endp; rp++) {
19377c478bd9Sstevel@tonic-gate 		if (!(rp->ipv6RouteInfo.re_ire_type & IRE_OFFSUBNET))
19387c478bd9Sstevel@tonic-gate 			continue;
19397c478bd9Sstevel@tonic-gate 
19407c478bd9Sstevel@tonic-gate 		/*
19417c478bd9Sstevel@tonic-gate 		 * We have the outgoing interface in ipv6RouteIfIndex
19427c478bd9Sstevel@tonic-gate 		 * if ipv6RouteIfindex.o_length is non-zero. The outgoing
19437c478bd9Sstevel@tonic-gate 		 * interface must be present for link-local addresses. Since
19447c478bd9Sstevel@tonic-gate 		 * we use only link-local addreses for probing, we don't
19457c478bd9Sstevel@tonic-gate 		 * consider the case when the outgoing interface is not
19467c478bd9Sstevel@tonic-gate 		 * known and we need to scan interface ires
19477c478bd9Sstevel@tonic-gate 		 */
19487c478bd9Sstevel@tonic-gate 		nexthop_v6 = rp->ipv6RouteNextHop;
19497c478bd9Sstevel@tonic-gate 		if (rp->ipv6RouteIfIndex.o_length != 0) {
19507c478bd9Sstevel@tonic-gate 			/*
19517c478bd9Sstevel@tonic-gate 			 * We already have the outgoing interface
19527c478bd9Sstevel@tonic-gate 			 * in ipv6RouteIfIndex.
19537c478bd9Sstevel@tonic-gate 			 */
19547c478bd9Sstevel@tonic-gate 			router_add_v6(rp, nexthop_v6);
19557c478bd9Sstevel@tonic-gate 		}
19567c478bd9Sstevel@tonic-gate 	}
19577c478bd9Sstevel@tonic-gate }
19587c478bd9Sstevel@tonic-gate 
19597c478bd9Sstevel@tonic-gate 
19607c478bd9Sstevel@tonic-gate void
19617c478bd9Sstevel@tonic-gate router_add_v6(mib2_ipv6RouteEntry_t *rp1, struct in6_addr nexthop_v6)
19627c478bd9Sstevel@tonic-gate {
19637c478bd9Sstevel@tonic-gate 	char ifname[LIFNAMSIZ + 1];
19647c478bd9Sstevel@tonic-gate 	char *cp;
19657c478bd9Sstevel@tonic-gate 	int  len;
19667c478bd9Sstevel@tonic-gate 
19677c478bd9Sstevel@tonic-gate 	if (debug & D_TARGET)
19687c478bd9Sstevel@tonic-gate 		logdebug("router_add_v6()\n");
19697c478bd9Sstevel@tonic-gate 
19707c478bd9Sstevel@tonic-gate 	len = MIN(rp1->ipv6RouteIfIndex.o_length, sizeof (ifname) - 1);
19717c478bd9Sstevel@tonic-gate 	(void) memcpy(ifname, rp1->ipv6RouteIfIndex.o_bytes, len);
19727c478bd9Sstevel@tonic-gate 	ifname[len] = '\0';
19737c478bd9Sstevel@tonic-gate 
19747c478bd9Sstevel@tonic-gate 	if (ifname[0] == '\0')
19757c478bd9Sstevel@tonic-gate 		return;
19767c478bd9Sstevel@tonic-gate 
19777c478bd9Sstevel@tonic-gate 	cp = strchr(ifname, IF_SEPARATOR);
19787c478bd9Sstevel@tonic-gate 	if (cp != NULL)
19797c478bd9Sstevel@tonic-gate 		*cp = '\0';
19807c478bd9Sstevel@tonic-gate 
19817c478bd9Sstevel@tonic-gate 	router_add_common(AF_INET6, ifname, nexthop_v6);
19827c478bd9Sstevel@tonic-gate }
19837c478bd9Sstevel@tonic-gate 
19847c478bd9Sstevel@tonic-gate 
19857c478bd9Sstevel@tonic-gate 
19867c478bd9Sstevel@tonic-gate /*
19877c478bd9Sstevel@tonic-gate  * Build a list of target routers, by scanning the routing tables.
19887c478bd9Sstevel@tonic-gate  * It is assumed that interface routes exist, to reach the routers.
19897c478bd9Sstevel@tonic-gate  */
19907c478bd9Sstevel@tonic-gate static void
19917c478bd9Sstevel@tonic-gate init_router_targets(void)
19927c478bd9Sstevel@tonic-gate {
19937c478bd9Sstevel@tonic-gate 	struct	target *tg;
19947c478bd9Sstevel@tonic-gate 	struct	target *next_tg;
19957c478bd9Sstevel@tonic-gate 	struct	phyint_instance *pii;
19967c478bd9Sstevel@tonic-gate 	struct	phyint *pi;
19977c478bd9Sstevel@tonic-gate 
19987c478bd9Sstevel@tonic-gate 	if (force_mcast)
19997c478bd9Sstevel@tonic-gate 		return;
20007c478bd9Sstevel@tonic-gate 
20017c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
20027c478bd9Sstevel@tonic-gate 		pi = pii->pii_phyint;
20037c478bd9Sstevel@tonic-gate 		/*
20047c478bd9Sstevel@tonic-gate 		 * Exclude ptp and host targets. Set tg_in_use to false,
20057c478bd9Sstevel@tonic-gate 		 * only for router targets.
20067c478bd9Sstevel@tonic-gate 		 */
20077c478bd9Sstevel@tonic-gate 		if (!pii->pii_targets_are_routers ||
20087c478bd9Sstevel@tonic-gate 		    (pi->pi_flags & IFF_POINTOPOINT))
20097c478bd9Sstevel@tonic-gate 			continue;
20107c478bd9Sstevel@tonic-gate 
20117c478bd9Sstevel@tonic-gate 		for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next)
20127c478bd9Sstevel@tonic-gate 			tg->tg_in_use = 0;
20137c478bd9Sstevel@tonic-gate 	}
20147c478bd9Sstevel@tonic-gate 
20157c478bd9Sstevel@tonic-gate 	if (mibfd < 0) {
20167c478bd9Sstevel@tonic-gate 		mibfd = open("/dev/ip", O_RDWR);
20177c478bd9Sstevel@tonic-gate 		if (mibfd < 0) {
20187c478bd9Sstevel@tonic-gate 			logperror("mibopen: ip open");
20197c478bd9Sstevel@tonic-gate 			exit(1);
20207c478bd9Sstevel@tonic-gate 		}
20217c478bd9Sstevel@tonic-gate 	}
20227c478bd9Sstevel@tonic-gate 
20237c478bd9Sstevel@tonic-gate 	if (!update_router_list(mibfd)) {
20247c478bd9Sstevel@tonic-gate 		(void) close(mibfd);
20257c478bd9Sstevel@tonic-gate 		mibfd = -1;
20267c478bd9Sstevel@tonic-gate 	}
20277c478bd9Sstevel@tonic-gate 
20287c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
20297c478bd9Sstevel@tonic-gate 		if (!pii->pii_targets_are_routers ||
20307c478bd9Sstevel@tonic-gate 		    (pi->pi_flags & IFF_POINTOPOINT))
20317c478bd9Sstevel@tonic-gate 			continue;
20327c478bd9Sstevel@tonic-gate 
20337c478bd9Sstevel@tonic-gate 		for (tg = pii->pii_targets; tg != NULL; tg = next_tg) {
20347c478bd9Sstevel@tonic-gate 			next_tg = tg->tg_next;
20357c478bd9Sstevel@tonic-gate 			if (!tg->tg_in_use) {
20367c478bd9Sstevel@tonic-gate 				target_delete(tg);
20377c478bd9Sstevel@tonic-gate 			}
20387c478bd9Sstevel@tonic-gate 		}
20397c478bd9Sstevel@tonic-gate 	}
20407c478bd9Sstevel@tonic-gate }
20417c478bd9Sstevel@tonic-gate 
20427c478bd9Sstevel@tonic-gate /*
20437c478bd9Sstevel@tonic-gate  * Attempt to assign host targets to any interfaces that do not currently
20447c478bd9Sstevel@tonic-gate  * have probe targets by sharing targets with other interfaces in the group.
20457c478bd9Sstevel@tonic-gate  */
20467c478bd9Sstevel@tonic-gate static void
20477c478bd9Sstevel@tonic-gate init_host_targets(void)
20487c478bd9Sstevel@tonic-gate {
20497c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
20507c478bd9Sstevel@tonic-gate 	struct phyint_group *pg;
20517c478bd9Sstevel@tonic-gate 
20527c478bd9Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
20537c478bd9Sstevel@tonic-gate 		pg = pii->pii_phyint->pi_group;
20547c478bd9Sstevel@tonic-gate 		if (pg != phyint_anongroup && pii->pii_targets == NULL)
20557c478bd9Sstevel@tonic-gate 			dup_host_targets(pii);
20567c478bd9Sstevel@tonic-gate 	}
20577c478bd9Sstevel@tonic-gate }
20587c478bd9Sstevel@tonic-gate 
20597c478bd9Sstevel@tonic-gate /*
20607c478bd9Sstevel@tonic-gate  * Duplicate host targets from other phyints of the group to
20617c478bd9Sstevel@tonic-gate  * the phyint instance 'desired_pii'.
20627c478bd9Sstevel@tonic-gate  */
20637c478bd9Sstevel@tonic-gate static void
20647c478bd9Sstevel@tonic-gate dup_host_targets(struct phyint_instance	 *desired_pii)
20657c478bd9Sstevel@tonic-gate {
20667c478bd9Sstevel@tonic-gate 	int af;
20677c478bd9Sstevel@tonic-gate 	struct phyint *pi;
20687c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
20697c478bd9Sstevel@tonic-gate 	struct target *tg;
20707c478bd9Sstevel@tonic-gate 
20717c478bd9Sstevel@tonic-gate 	assert(desired_pii->pii_phyint->pi_group != phyint_anongroup);
20727c478bd9Sstevel@tonic-gate 
20737c478bd9Sstevel@tonic-gate 	af = desired_pii->pii_af;
20747c478bd9Sstevel@tonic-gate 
20757c478bd9Sstevel@tonic-gate 	/*
20767c478bd9Sstevel@tonic-gate 	 * For every phyint in the same group as desired_pii, check if
20777c478bd9Sstevel@tonic-gate 	 * it has any host targets. If so add them to desired_pii.
20787c478bd9Sstevel@tonic-gate 	 */
20797c478bd9Sstevel@tonic-gate 	for (pi = desired_pii->pii_phyint; pi != NULL; pi = pi->pi_pgnext) {
20807c478bd9Sstevel@tonic-gate 		pii = PHYINT_INSTANCE(pi, af);
20817c478bd9Sstevel@tonic-gate 		/*
20827c478bd9Sstevel@tonic-gate 		 * We know that we don't have targets on this phyint instance
20837c478bd9Sstevel@tonic-gate 		 * since we have been called. But we still check for
20847c478bd9Sstevel@tonic-gate 		 * pii_targets_are_routers because another phyint instance
20857c478bd9Sstevel@tonic-gate 		 * could have router targets, since IFF_NOFAILOVER addresses
20867c478bd9Sstevel@tonic-gate 		 * on different phyint instances may belong to different
20877c478bd9Sstevel@tonic-gate 		 * subnets.
20887c478bd9Sstevel@tonic-gate 		 */
20897c478bd9Sstevel@tonic-gate 		if ((pii == NULL) || (pii == desired_pii) ||
20907c478bd9Sstevel@tonic-gate 		    pii->pii_targets_are_routers)
20917c478bd9Sstevel@tonic-gate 			continue;
20927c478bd9Sstevel@tonic-gate 		for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
20937c478bd9Sstevel@tonic-gate 			target_create(desired_pii, tg->tg_address, _B_FALSE);
20947c478bd9Sstevel@tonic-gate 		}
20957c478bd9Sstevel@tonic-gate 	}
20967c478bd9Sstevel@tonic-gate }
20977c478bd9Sstevel@tonic-gate 
20987c478bd9Sstevel@tonic-gate static void
20997c478bd9Sstevel@tonic-gate usage(char *cmd)
21007c478bd9Sstevel@tonic-gate {
21017c478bd9Sstevel@tonic-gate 	(void) fprintf(stderr, "usage: %s\n", cmd);
21027c478bd9Sstevel@tonic-gate }
21037c478bd9Sstevel@tonic-gate 
21047c478bd9Sstevel@tonic-gate 
21057c478bd9Sstevel@tonic-gate #define	MPATHD_DEFAULT_FILE	"/etc/default/mpathd"
21067c478bd9Sstevel@tonic-gate 
21077c478bd9Sstevel@tonic-gate /* Get an option from the /etc/default/mpathd file */
21087c478bd9Sstevel@tonic-gate static char *
21097c478bd9Sstevel@tonic-gate getdefault(char *name)
21107c478bd9Sstevel@tonic-gate {
21117c478bd9Sstevel@tonic-gate 	char namebuf[BUFSIZ];
21127c478bd9Sstevel@tonic-gate 	char *value = NULL;
21137c478bd9Sstevel@tonic-gate 
21147c478bd9Sstevel@tonic-gate 	if (defopen(MPATHD_DEFAULT_FILE) == 0) {
21157c478bd9Sstevel@tonic-gate 		char	*cp;
21167c478bd9Sstevel@tonic-gate 		int	flags;
21177c478bd9Sstevel@tonic-gate 
21187c478bd9Sstevel@tonic-gate 		/*
21197c478bd9Sstevel@tonic-gate 		 * ignore case
21207c478bd9Sstevel@tonic-gate 		 */
21217c478bd9Sstevel@tonic-gate 		flags = defcntl(DC_GETFLAGS, 0);
21227c478bd9Sstevel@tonic-gate 		TURNOFF(flags, DC_CASE);
21237c478bd9Sstevel@tonic-gate 		(void) defcntl(DC_SETFLAGS, flags);
21247c478bd9Sstevel@tonic-gate 
21257c478bd9Sstevel@tonic-gate 		/* Add "=" to the name */
21267c478bd9Sstevel@tonic-gate 		(void) strncpy(namebuf, name, sizeof (namebuf) - 2);
21277c478bd9Sstevel@tonic-gate 		(void) strncat(namebuf, "=", 2);
21287c478bd9Sstevel@tonic-gate 
21297c478bd9Sstevel@tonic-gate 		if ((cp = defread(namebuf)) != NULL)
21307c478bd9Sstevel@tonic-gate 			value = strdup(cp);
21317c478bd9Sstevel@tonic-gate 
21327c478bd9Sstevel@tonic-gate 		/* close */
21337c478bd9Sstevel@tonic-gate 		(void) defopen((char *)NULL);
21347c478bd9Sstevel@tonic-gate 	}
21357c478bd9Sstevel@tonic-gate 	return (value);
21367c478bd9Sstevel@tonic-gate }
21377c478bd9Sstevel@tonic-gate 
21387c478bd9Sstevel@tonic-gate 
21397c478bd9Sstevel@tonic-gate /*
21407c478bd9Sstevel@tonic-gate  * Command line options below
21417c478bd9Sstevel@tonic-gate  */
21427c478bd9Sstevel@tonic-gate boolean_t	failback_enabled = _B_TRUE;	/* failback enabled/disabled */
21437c478bd9Sstevel@tonic-gate boolean_t	track_all_phyints = _B_FALSE;	/* option to track all NICs */
21447c478bd9Sstevel@tonic-gate static boolean_t adopt = _B_FALSE;
21457c478bd9Sstevel@tonic-gate static boolean_t foreground = _B_FALSE;
21467c478bd9Sstevel@tonic-gate 
21477c478bd9Sstevel@tonic-gate int
21487c478bd9Sstevel@tonic-gate main(int argc, char *argv[])
21497c478bd9Sstevel@tonic-gate {
21507c478bd9Sstevel@tonic-gate 	int i;
21517c478bd9Sstevel@tonic-gate 	int c;
21527c478bd9Sstevel@tonic-gate 	struct phyint_instance *pii;
21537c478bd9Sstevel@tonic-gate 	char *value;
21547c478bd9Sstevel@tonic-gate 
21557c478bd9Sstevel@tonic-gate 	argv0 = argv;		/* Saved for re-exec on SIGHUP */
21567c478bd9Sstevel@tonic-gate 	srandom(gethostid());	/* Initialize the random number generator */
21577c478bd9Sstevel@tonic-gate 
21587c478bd9Sstevel@tonic-gate 	/*
21597c478bd9Sstevel@tonic-gate 	 * NOTE: The messages output by in.mpathd are not suitable for
21607c478bd9Sstevel@tonic-gate 	 * translation, so we do not call textdomain().
21617c478bd9Sstevel@tonic-gate 	 */
21627c478bd9Sstevel@tonic-gate 	(void) setlocale(LC_ALL, "");
21637c478bd9Sstevel@tonic-gate 
21647c478bd9Sstevel@tonic-gate 	/*
21657c478bd9Sstevel@tonic-gate 	 * Get the user specified value of 'failure detection time'
21667c478bd9Sstevel@tonic-gate 	 * from /etc/default/mpathd
21677c478bd9Sstevel@tonic-gate 	 */
21687c478bd9Sstevel@tonic-gate 	value = getdefault("FAILURE_DETECTION_TIME");
21697c478bd9Sstevel@tonic-gate 	if (value != NULL) {
21707c478bd9Sstevel@tonic-gate 		user_failure_detection_time =
21717c478bd9Sstevel@tonic-gate 		    (int)strtol((char *)value, NULL, 0);
21727c478bd9Sstevel@tonic-gate 
21737c478bd9Sstevel@tonic-gate 		if (user_failure_detection_time <= 0) {
21747c478bd9Sstevel@tonic-gate 			user_failure_detection_time = FAILURE_DETECTION_TIME;
21757c478bd9Sstevel@tonic-gate 			logerr("Invalid failure detection time %s, assuming "
21767c478bd9Sstevel@tonic-gate 			    "default %d\n", value, user_failure_detection_time);
21777c478bd9Sstevel@tonic-gate 
21787c478bd9Sstevel@tonic-gate 		} else if (user_failure_detection_time <
21797c478bd9Sstevel@tonic-gate 		    MIN_FAILURE_DETECTION_TIME) {
21807c478bd9Sstevel@tonic-gate 			user_failure_detection_time =
21817c478bd9Sstevel@tonic-gate 			    MIN_FAILURE_DETECTION_TIME;
21827c478bd9Sstevel@tonic-gate 			logerr("Too small failure detection time of %s, "
21837c478bd9Sstevel@tonic-gate 			    "assuming minimum %d\n", value,
21847c478bd9Sstevel@tonic-gate 			    user_failure_detection_time);
21857c478bd9Sstevel@tonic-gate 		}
21867c478bd9Sstevel@tonic-gate 		free(value);
21877c478bd9Sstevel@tonic-gate 	} else {
21887c478bd9Sstevel@tonic-gate 		/* User has not specified the parameter, Use default value */
21897c478bd9Sstevel@tonic-gate 		user_failure_detection_time = FAILURE_DETECTION_TIME;
21907c478bd9Sstevel@tonic-gate 	}
21917c478bd9Sstevel@tonic-gate 
21927c478bd9Sstevel@tonic-gate 	/*
21937c478bd9Sstevel@tonic-gate 	 * This gives the frequency at which probes will be sent.
21947c478bd9Sstevel@tonic-gate 	 * When fdt ms elapses, we should be able to determine
21957c478bd9Sstevel@tonic-gate 	 * whether 5 consecutive probes have failed or not.
21967c478bd9Sstevel@tonic-gate 	 * 1 probe will be sent in every user_probe_interval ms,
21977c478bd9Sstevel@tonic-gate 	 * randomly anytime in the (0.5  - 1.0) 2nd half of every
21987c478bd9Sstevel@tonic-gate 	 * user_probe_interval. Thus when we send out probe 'n' we
21997c478bd9Sstevel@tonic-gate 	 * can be sure that probe 'n - 2' is lost, if we have not
22007c478bd9Sstevel@tonic-gate 	 * got the ack. (since the probe interval is > crtt). But
22017c478bd9Sstevel@tonic-gate 	 * probe 'n - 1' may be a valid unacked probe, since the
22027c478bd9Sstevel@tonic-gate 	 * time between 2 successive probes could be as small as
22037c478bd9Sstevel@tonic-gate 	 * 0.5 * user_probe_interval.  Hence the NUM_PROBE_FAILS + 2
22047c478bd9Sstevel@tonic-gate 	 */
22057c478bd9Sstevel@tonic-gate 	user_probe_interval = user_failure_detection_time /
22067c478bd9Sstevel@tonic-gate 	    (NUM_PROBE_FAILS + 2);
22077c478bd9Sstevel@tonic-gate 
22087c478bd9Sstevel@tonic-gate 	/*
22097c478bd9Sstevel@tonic-gate 	 * Get the user specified value of failback_enabled from
22107c478bd9Sstevel@tonic-gate 	 * /etc/default/mpathd
22117c478bd9Sstevel@tonic-gate 	 */
22127c478bd9Sstevel@tonic-gate 	value = getdefault("FAILBACK");
22137c478bd9Sstevel@tonic-gate 	if (value != NULL) {
22147c478bd9Sstevel@tonic-gate 		if (strncasecmp(value, "yes", 3) == 0)
22157c478bd9Sstevel@tonic-gate 			failback_enabled = _B_TRUE;
22167c478bd9Sstevel@tonic-gate 		else if (strncasecmp(value, "no", 2) == 0)
22177c478bd9Sstevel@tonic-gate 			failback_enabled = _B_FALSE;
22187c478bd9Sstevel@tonic-gate 		else
22197c478bd9Sstevel@tonic-gate 			logerr("Invalid value for FAILBACK %s\n", value);
22207c478bd9Sstevel@tonic-gate 		free(value);
22217c478bd9Sstevel@tonic-gate 	} else {
22227c478bd9Sstevel@tonic-gate 		failback_enabled = _B_TRUE;
22237c478bd9Sstevel@tonic-gate 	}
22247c478bd9Sstevel@tonic-gate 
22257c478bd9Sstevel@tonic-gate 	/*
22267c478bd9Sstevel@tonic-gate 	 * Get the user specified value of track_all_phyints from
22277c478bd9Sstevel@tonic-gate 	 * /etc/default/mpathd. The sense is reversed in
22287c478bd9Sstevel@tonic-gate 	 * TRACK_INTERFACES_ONLY_WITH_GROUPS.
22297c478bd9Sstevel@tonic-gate 	 */
22307c478bd9Sstevel@tonic-gate 	value = getdefault("TRACK_INTERFACES_ONLY_WITH_GROUPS");
22317c478bd9Sstevel@tonic-gate 	if (value != NULL) {
22327c478bd9Sstevel@tonic-gate 		if (strncasecmp(value, "yes", 3) == 0)
22337c478bd9Sstevel@tonic-gate 			track_all_phyints = _B_FALSE;
22347c478bd9Sstevel@tonic-gate 		else if (strncasecmp(value, "no", 2) == 0)
22357c478bd9Sstevel@tonic-gate 			track_all_phyints = _B_TRUE;
22367c478bd9Sstevel@tonic-gate 		else
22377c478bd9Sstevel@tonic-gate 			logerr("Invalid value for "
22387c478bd9Sstevel@tonic-gate 			    "TRACK_INTERFACES_ONLY_WITH_GROUPS %s\n", value);
22397c478bd9Sstevel@tonic-gate 		free(value);
22407c478bd9Sstevel@tonic-gate 	} else {
22417c478bd9Sstevel@tonic-gate 		track_all_phyints = _B_FALSE;
22427c478bd9Sstevel@tonic-gate 	}
22437c478bd9Sstevel@tonic-gate 
22447c478bd9Sstevel@tonic-gate 	while ((c = getopt(argc, argv, "adD:ml")) != EOF) {
22457c478bd9Sstevel@tonic-gate 		switch (c) {
22467c478bd9Sstevel@tonic-gate 		case 'a':
22477c478bd9Sstevel@tonic-gate 			adopt = _B_TRUE;
22487c478bd9Sstevel@tonic-gate 			break;
22497c478bd9Sstevel@tonic-gate 		case 'm':
22507c478bd9Sstevel@tonic-gate 			force_mcast = _B_TRUE;
22517c478bd9Sstevel@tonic-gate 			break;
22527c478bd9Sstevel@tonic-gate 		case 'd':
22537c478bd9Sstevel@tonic-gate 			debug = D_ALL;
22547c478bd9Sstevel@tonic-gate 			foreground = _B_TRUE;
22557c478bd9Sstevel@tonic-gate 			break;
22567c478bd9Sstevel@tonic-gate 		case 'D':
22577c478bd9Sstevel@tonic-gate 			i = (int)strtol(optarg, NULL, 0);
22587c478bd9Sstevel@tonic-gate 			if (i == 0) {
22597c478bd9Sstevel@tonic-gate 				(void) fprintf(stderr, "Bad debug flags: %s\n",
22607c478bd9Sstevel@tonic-gate 				    optarg);
22617c478bd9Sstevel@tonic-gate 				exit(1);
22627c478bd9Sstevel@tonic-gate 			}
22637c478bd9Sstevel@tonic-gate 			debug |= i;
22647c478bd9Sstevel@tonic-gate 			foreground = _B_TRUE;
22657c478bd9Sstevel@tonic-gate 			break;
22667c478bd9Sstevel@tonic-gate 		case 'l':
22677c478bd9Sstevel@tonic-gate 			/*
22687c478bd9Sstevel@tonic-gate 			 * Turn off link state notification handling.
22697c478bd9Sstevel@tonic-gate 			 * Undocumented command line flag, for debugging
22707c478bd9Sstevel@tonic-gate 			 * purposes.
22717c478bd9Sstevel@tonic-gate 			 */
22727c478bd9Sstevel@tonic-gate 			handle_link_notifications = _B_FALSE;
22737c478bd9Sstevel@tonic-gate 			break;
22747c478bd9Sstevel@tonic-gate 		default:
22757c478bd9Sstevel@tonic-gate 			usage(argv[0]);
22767c478bd9Sstevel@tonic-gate 			exit(1);
22777c478bd9Sstevel@tonic-gate 		}
22787c478bd9Sstevel@tonic-gate 	}
22797c478bd9Sstevel@tonic-gate 
22807c478bd9Sstevel@tonic-gate 	/*
22817c478bd9Sstevel@tonic-gate 	 * The sockets for the loopback command interface should be listening
22827c478bd9Sstevel@tonic-gate 	 * before we fork and exit in daemonize(). This way, whoever started us
22837c478bd9Sstevel@tonic-gate 	 * can use the loopback interface as soon as they get a zero exit
22847c478bd9Sstevel@tonic-gate 	 * status.
22857c478bd9Sstevel@tonic-gate 	 */
22867c478bd9Sstevel@tonic-gate 	lsock_v4 = setup_listener(AF_INET);
22877c478bd9Sstevel@tonic-gate 	lsock_v6 = setup_listener(AF_INET6);
22887c478bd9Sstevel@tonic-gate 
22897c478bd9Sstevel@tonic-gate 	if (lsock_v4 < 0 && lsock_v6 < 0) {
22907c478bd9Sstevel@tonic-gate 		logerr("main: setup_listener failed for both IPv4 and IPv6\n");
22917c478bd9Sstevel@tonic-gate 		exit(1);
22927c478bd9Sstevel@tonic-gate 	}
22937c478bd9Sstevel@tonic-gate 
22947c478bd9Sstevel@tonic-gate 	if (!foreground) {
22957c478bd9Sstevel@tonic-gate 		if (!daemonize()) {
22967c478bd9Sstevel@tonic-gate 			logerr("cannot daemonize\n");
22977c478bd9Sstevel@tonic-gate 			exit(EXIT_FAILURE);
22987c478bd9Sstevel@tonic-gate 		}
22997c478bd9Sstevel@tonic-gate 		initlog();
23007c478bd9Sstevel@tonic-gate 	}
23017c478bd9Sstevel@tonic-gate 
23027c478bd9Sstevel@tonic-gate 	/*
23037c478bd9Sstevel@tonic-gate 	 * Initializations:
23047c478bd9Sstevel@tonic-gate 	 * 1. Create ifsock* sockets. These are used for performing SIOC*
23057c478bd9Sstevel@tonic-gate 	 *    ioctls. We have 2 sockets 1 each for IPv4 and IPv6.
23067c478bd9Sstevel@tonic-gate 	 * 2. Initialize a pipe for handling/recording signal events.
23077c478bd9Sstevel@tonic-gate 	 * 3. Create the routing sockets,  used for listening
23087c478bd9Sstevel@tonic-gate 	 *    to routing / interface changes.
23097c478bd9Sstevel@tonic-gate 	 * 4. phyint_init() - Initialize physical interface state
23107c478bd9Sstevel@tonic-gate 	 *    (in mpd_tables.c).  Must be done before creating interfaces,
23117c478bd9Sstevel@tonic-gate 	 *    which timer_init() does indirectly.
23127c478bd9Sstevel@tonic-gate 	 * 5. timer_init()  - Initialize timer related stuff
23137c478bd9Sstevel@tonic-gate 	 * 6. initifs() - Initialize our database of all known interfaces
23147c478bd9Sstevel@tonic-gate 	 * 7. init_router_targets() - Initialize our database of all known
23157c478bd9Sstevel@tonic-gate 	 *    router targets.
23167c478bd9Sstevel@tonic-gate 	 */
23177c478bd9Sstevel@tonic-gate 	ifsock_v4 = socket(AF_INET, SOCK_DGRAM, 0);
23187c478bd9Sstevel@tonic-gate 	if (ifsock_v4 < 0) {
23197c478bd9Sstevel@tonic-gate 		logperror("main: IPv4 socket open");
23207c478bd9Sstevel@tonic-gate 		exit(1);
23217c478bd9Sstevel@tonic-gate 	}
23227c478bd9Sstevel@tonic-gate 
23237c478bd9Sstevel@tonic-gate 	ifsock_v6 = socket(AF_INET6, SOCK_DGRAM, 0);
23247c478bd9Sstevel@tonic-gate 	if (ifsock_v6 < 0) {
23257c478bd9Sstevel@tonic-gate 		logperror("main: IPv6 socket open");
23267c478bd9Sstevel@tonic-gate 		exit(1);
23277c478bd9Sstevel@tonic-gate 	}
23287c478bd9Sstevel@tonic-gate 
23297c478bd9Sstevel@tonic-gate 	setup_eventpipe();
23307c478bd9Sstevel@tonic-gate 
23317c478bd9Sstevel@tonic-gate 	rtsock_v4 = setup_rtsock(AF_INET);
23327c478bd9Sstevel@tonic-gate 	rtsock_v6 = setup_rtsock(AF_INET6);
23337c478bd9Sstevel@tonic-gate 
23347c478bd9Sstevel@tonic-gate 	if (phyint_init() == -1) {
23357c478bd9Sstevel@tonic-gate 		logerr("cannot initialize physical interface structures");
23367c478bd9Sstevel@tonic-gate 		exit(1);
23377c478bd9Sstevel@tonic-gate 	}
23387c478bd9Sstevel@tonic-gate 
23397c478bd9Sstevel@tonic-gate 	timer_init();
23407c478bd9Sstevel@tonic-gate 
23417c478bd9Sstevel@tonic-gate 	initifs();
23427c478bd9Sstevel@tonic-gate 
234349df4566Sethindra 	/* Inform kernel whether failback is enabled or disabled */
234449df4566Sethindra 	if (ioctl(ifsock_v4, SIOCSIPMPFAILBACK, (int *)&failback_enabled) < 0) {
234549df4566Sethindra 		logperror("main: ioctl (SIOCSIPMPFAILBACK)");
234649df4566Sethindra 		exit(1);
234749df4566Sethindra 	}
234849df4566Sethindra 
23497c478bd9Sstevel@tonic-gate 	/*
23507c478bd9Sstevel@tonic-gate 	 * If we're operating in "adopt" mode and no interfaces need to be
23517c478bd9Sstevel@tonic-gate 	 * tracked, shut down (ifconfig(1M) will restart us on demand if
23527c478bd9Sstevel@tonic-gate 	 * interfaces are subsequently put into multipathing groups).
23537c478bd9Sstevel@tonic-gate 	 */
23547c478bd9Sstevel@tonic-gate 	if (adopt && phyint_instances == NULL)
23557c478bd9Sstevel@tonic-gate 		exit(0);
23567c478bd9Sstevel@tonic-gate 
23577c478bd9Sstevel@tonic-gate 	/*
23587c478bd9Sstevel@tonic-gate 	 * Main body. Keep listening for activity on any of the sockets
23597c478bd9Sstevel@tonic-gate 	 * that we are monitoring and take appropriate action as necessary.
23607c478bd9Sstevel@tonic-gate 	 * signals are also handled synchronously.
23617c478bd9Sstevel@tonic-gate 	 */
23627c478bd9Sstevel@tonic-gate 	for (;;) {
23637c478bd9Sstevel@tonic-gate 		if (poll(pollfds, pollfd_num, -1) < 0) {
23647c478bd9Sstevel@tonic-gate 			if (errno == EINTR)
23657c478bd9Sstevel@tonic-gate 				continue;
23667c478bd9Sstevel@tonic-gate 			logperror("main: poll");
23677c478bd9Sstevel@tonic-gate 			exit(1);
23687c478bd9Sstevel@tonic-gate 		}
23697c478bd9Sstevel@tonic-gate 		for (i = 0; i < pollfd_num; i++) {
23707c478bd9Sstevel@tonic-gate 			if ((pollfds[i].fd == -1) ||
23717c478bd9Sstevel@tonic-gate 			    !(pollfds[i].revents & POLLIN))
23727c478bd9Sstevel@tonic-gate 				continue;
23737c478bd9Sstevel@tonic-gate 			if (pollfds[i].fd == eventpipe_read) {
23747c478bd9Sstevel@tonic-gate 				in_signal(eventpipe_read);
23757c478bd9Sstevel@tonic-gate 				break;
23767c478bd9Sstevel@tonic-gate 			}
23777c478bd9Sstevel@tonic-gate 			if (pollfds[i].fd == rtsock_v4 ||
23787c478bd9Sstevel@tonic-gate 				pollfds[i].fd == rtsock_v6) {
23797c478bd9Sstevel@tonic-gate 				process_rtsock(rtsock_v4, rtsock_v6);
23807c478bd9Sstevel@tonic-gate 				break;
23817c478bd9Sstevel@tonic-gate 			}
23827c478bd9Sstevel@tonic-gate 			for (pii = phyint_instances; pii != NULL;
23837c478bd9Sstevel@tonic-gate 			    pii = pii->pii_next) {
23847c478bd9Sstevel@tonic-gate 				if (pollfds[i].fd == pii->pii_probe_sock) {
23857c478bd9Sstevel@tonic-gate 					if (pii->pii_af == AF_INET)
23867c478bd9Sstevel@tonic-gate 						in_data(pii);
23877c478bd9Sstevel@tonic-gate 					else
23887c478bd9Sstevel@tonic-gate 						in6_data(pii);
23897c478bd9Sstevel@tonic-gate 					break;
23907c478bd9Sstevel@tonic-gate 				}
23917c478bd9Sstevel@tonic-gate 			}
23927c478bd9Sstevel@tonic-gate 			if (pollfds[i].fd == lsock_v4)
23937c478bd9Sstevel@tonic-gate 				loopback_cmd(lsock_v4, AF_INET);
23947c478bd9Sstevel@tonic-gate 			else if (pollfds[i].fd == lsock_v6)
23957c478bd9Sstevel@tonic-gate 				loopback_cmd(lsock_v6, AF_INET6);
23967c478bd9Sstevel@tonic-gate 		}
23977c478bd9Sstevel@tonic-gate 		if (full_scan_required) {
23987c478bd9Sstevel@tonic-gate 			initifs();
23997c478bd9Sstevel@tonic-gate 			full_scan_required = _B_FALSE;
24007c478bd9Sstevel@tonic-gate 		}
24017c478bd9Sstevel@tonic-gate 	}
24027c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
24037c478bd9Sstevel@tonic-gate 	return (EXIT_SUCCESS);
24047c478bd9Sstevel@tonic-gate }
24057c478bd9Sstevel@tonic-gate 
24067c478bd9Sstevel@tonic-gate static int
24077c478bd9Sstevel@tonic-gate setup_listener(int af)
24087c478bd9Sstevel@tonic-gate {
24097c478bd9Sstevel@tonic-gate 	int sock;
24107c478bd9Sstevel@tonic-gate 	int on;
24117c478bd9Sstevel@tonic-gate 	int len;
24127c478bd9Sstevel@tonic-gate 	int ret;
24137c478bd9Sstevel@tonic-gate 	struct sockaddr_storage laddr;
24147c478bd9Sstevel@tonic-gate 	struct sockaddr_in  *sin;
24157c478bd9Sstevel@tonic-gate 	struct sockaddr_in6 *sin6;
24167c478bd9Sstevel@tonic-gate 	struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
24177c478bd9Sstevel@tonic-gate 
24187c478bd9Sstevel@tonic-gate 	assert(af == AF_INET || af == AF_INET6);
24197c478bd9Sstevel@tonic-gate 
24207c478bd9Sstevel@tonic-gate 	sock = socket(af, SOCK_STREAM, 0);
24217c478bd9Sstevel@tonic-gate 	if (sock < 0) {
24227c478bd9Sstevel@tonic-gate 		logperror("setup_listener: socket");
24237c478bd9Sstevel@tonic-gate 		exit(1);
24247c478bd9Sstevel@tonic-gate 	}
24257c478bd9Sstevel@tonic-gate 
24267c478bd9Sstevel@tonic-gate 	on = 1;
24277c478bd9Sstevel@tonic-gate 	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
24287c478bd9Sstevel@tonic-gate 	    sizeof (on)) < 0) {
24297c478bd9Sstevel@tonic-gate 		logperror("setup_listener: setsockopt (SO_REUSEADDR)");
24307c478bd9Sstevel@tonic-gate 		exit(1);
24317c478bd9Sstevel@tonic-gate 	}
24327c478bd9Sstevel@tonic-gate 
24337c478bd9Sstevel@tonic-gate 	bzero(&laddr, sizeof (laddr));
24347c478bd9Sstevel@tonic-gate 	laddr.ss_family = af;
24357c478bd9Sstevel@tonic-gate 
24367c478bd9Sstevel@tonic-gate 	if (af == AF_INET) {
24377c478bd9Sstevel@tonic-gate 		sin = (struct sockaddr_in *)&laddr;
24387c478bd9Sstevel@tonic-gate 		sin->sin_port = htons(MPATHD_PORT);
24397c478bd9Sstevel@tonic-gate 		sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
24407c478bd9Sstevel@tonic-gate 		len = sizeof (struct sockaddr_in);
24417c478bd9Sstevel@tonic-gate 	} else {
24427c478bd9Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)&laddr;
24437c478bd9Sstevel@tonic-gate 		sin6->sin6_port = htons(MPATHD_PORT);
24447c478bd9Sstevel@tonic-gate 		sin6->sin6_addr = loopback_addr;
24457c478bd9Sstevel@tonic-gate 		len = sizeof (struct sockaddr_in6);
24467c478bd9Sstevel@tonic-gate 	}
24477c478bd9Sstevel@tonic-gate 
24487c478bd9Sstevel@tonic-gate 	ret = bind(sock, (struct sockaddr *)&laddr, len);
24497c478bd9Sstevel@tonic-gate 	if (ret < 0) {
24507c478bd9Sstevel@tonic-gate 		if (errno == EADDRINUSE) {
24517c478bd9Sstevel@tonic-gate 			/*
24527c478bd9Sstevel@tonic-gate 			 * Another instance of mpathd may be already active.
24537c478bd9Sstevel@tonic-gate 			 */
24547c478bd9Sstevel@tonic-gate 			logerr("main: is another instance of in.mpathd "
24557c478bd9Sstevel@tonic-gate 			    "already active?\n");
24567c478bd9Sstevel@tonic-gate 			exit(1);
24577c478bd9Sstevel@tonic-gate 		} else {
24587c478bd9Sstevel@tonic-gate 			(void) close(sock);
24597c478bd9Sstevel@tonic-gate 			return (-1);
24607c478bd9Sstevel@tonic-gate 		}
24617c478bd9Sstevel@tonic-gate 	}
24627c478bd9Sstevel@tonic-gate 	if (listen(sock, 30) < 0) {
24637c478bd9Sstevel@tonic-gate 		logperror("main: listen");
24647c478bd9Sstevel@tonic-gate 		exit(1);
24657c478bd9Sstevel@tonic-gate 	}
24667c478bd9Sstevel@tonic-gate 	if (poll_add(sock) == -1) {
24677c478bd9Sstevel@tonic-gate 		(void) close(sock);
24687c478bd9Sstevel@tonic-gate 		exit(1);
24697c478bd9Sstevel@tonic-gate 	}
24707c478bd9Sstevel@tonic-gate 
24717c478bd9Sstevel@tonic-gate 	return (sock);
24727c478bd9Sstevel@tonic-gate }
24737c478bd9Sstevel@tonic-gate 
24747c478bd9Sstevel@tonic-gate /*
24757c478bd9Sstevel@tonic-gate  * Table of commands and their expected size; used by loopback_cmd().
24767c478bd9Sstevel@tonic-gate  */
24777c478bd9Sstevel@tonic-gate static struct {
24787c478bd9Sstevel@tonic-gate 	const char	*name;
24797c478bd9Sstevel@tonic-gate 	unsigned int	size;
24807c478bd9Sstevel@tonic-gate } commands[] = {
24817c478bd9Sstevel@tonic-gate 	{ "MI_PING",		sizeof (uint32_t)	},
24827c478bd9Sstevel@tonic-gate 	{ "MI_OFFLINE",		sizeof (mi_offline_t)	},
24837c478bd9Sstevel@tonic-gate 	{ "MI_UNDO_OFFLINE",	sizeof (mi_undo_offline_t) },
24847c478bd9Sstevel@tonic-gate 	{ "MI_SETOINDEX",	sizeof (mi_setoindex_t) },
24857c478bd9Sstevel@tonic-gate 	{ "MI_QUERY",		sizeof (mi_query_t)	}
24867c478bd9Sstevel@tonic-gate };
24877c478bd9Sstevel@tonic-gate 
24887c478bd9Sstevel@tonic-gate /*
24897c478bd9Sstevel@tonic-gate  * Commands received over the loopback interface come here. Currently
24907c478bd9Sstevel@tonic-gate  * the agents that send commands are ifconfig, if_mpadm and the RCM IPMP
24917c478bd9Sstevel@tonic-gate  * module. ifconfig only makes a connection, and closes it to check if
24927c478bd9Sstevel@tonic-gate  * in.mpathd is running.
24937c478bd9Sstevel@tonic-gate  * if_mpadm sends commands in the format specified by the mpathd_interface
24947c478bd9Sstevel@tonic-gate  * structure.
24957c478bd9Sstevel@tonic-gate  */
24967c478bd9Sstevel@tonic-gate static void
24977c478bd9Sstevel@tonic-gate loopback_cmd(int sock, int family)
24987c478bd9Sstevel@tonic-gate {
24997c478bd9Sstevel@tonic-gate 	int newfd;
25007c478bd9Sstevel@tonic-gate 	ssize_t len;
25017c478bd9Sstevel@tonic-gate 	struct sockaddr_storage	peer;
25027c478bd9Sstevel@tonic-gate 	struct sockaddr_in	*peer_sin;
25037c478bd9Sstevel@tonic-gate 	struct sockaddr_in6	*peer_sin6;
25047c478bd9Sstevel@tonic-gate 	socklen_t peerlen;
25057c478bd9Sstevel@tonic-gate 	union mi_commands mpi;
25067c478bd9Sstevel@tonic-gate 	struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
25077c478bd9Sstevel@tonic-gate 	char abuf[INET6_ADDRSTRLEN];
25087c478bd9Sstevel@tonic-gate 	uint_t cmd;
25097c478bd9Sstevel@tonic-gate 	int retval;
25107c478bd9Sstevel@tonic-gate 
25117c478bd9Sstevel@tonic-gate 	peerlen = sizeof (peer);
25127c478bd9Sstevel@tonic-gate 	newfd = accept(sock, (struct sockaddr *)&peer, &peerlen);
25137c478bd9Sstevel@tonic-gate 	if (newfd < 0) {
25147c478bd9Sstevel@tonic-gate 		logperror("loopback_cmd: accept");
25157c478bd9Sstevel@tonic-gate 		return;
25167c478bd9Sstevel@tonic-gate 	}
25177c478bd9Sstevel@tonic-gate 
25187c478bd9Sstevel@tonic-gate 	switch (family) {
25197c478bd9Sstevel@tonic-gate 	case AF_INET:
25207c478bd9Sstevel@tonic-gate 		/*
25217c478bd9Sstevel@tonic-gate 		 * Validate the address and port to make sure that
25227c478bd9Sstevel@tonic-gate 		 * non privileged processes don't connect and start
25237c478bd9Sstevel@tonic-gate 		 * talking to us.
25247c478bd9Sstevel@tonic-gate 		 */
25257c478bd9Sstevel@tonic-gate 		if (peerlen != sizeof (struct sockaddr_in)) {
25267c478bd9Sstevel@tonic-gate 			logerr("loopback_cmd: AF_INET peerlen %d\n", peerlen);
25277c478bd9Sstevel@tonic-gate 			(void) close(newfd);
25287c478bd9Sstevel@tonic-gate 			return;
25297c478bd9Sstevel@tonic-gate 		}
25307c478bd9Sstevel@tonic-gate 		peer_sin = (struct sockaddr_in *)&peer;
25317c478bd9Sstevel@tonic-gate 		if ((ntohs(peer_sin->sin_port) >= IPPORT_RESERVED) ||
25327c478bd9Sstevel@tonic-gate 		    (ntohl(peer_sin->sin_addr.s_addr) != INADDR_LOOPBACK)) {
25337c478bd9Sstevel@tonic-gate 			(void) inet_ntop(AF_INET, &peer_sin->sin_addr.s_addr,
25347c478bd9Sstevel@tonic-gate 			    abuf, sizeof (abuf));
25357c478bd9Sstevel@tonic-gate 			logerr("Attempt to connect from addr %s port %d\n",
25367c478bd9Sstevel@tonic-gate 			    abuf, ntohs(peer_sin->sin_port));
25377c478bd9Sstevel@tonic-gate 			(void) close(newfd);
25387c478bd9Sstevel@tonic-gate 			return;
25397c478bd9Sstevel@tonic-gate 		}
25407c478bd9Sstevel@tonic-gate 		break;
25417c478bd9Sstevel@tonic-gate 
25427c478bd9Sstevel@tonic-gate 	case AF_INET6:
25437c478bd9Sstevel@tonic-gate 		if (peerlen != sizeof (struct sockaddr_in6)) {
25447c478bd9Sstevel@tonic-gate 			logerr("loopback_cmd: AF_INET6 peerlen %d\n", peerlen);
25457c478bd9Sstevel@tonic-gate 			(void) close(newfd);
25467c478bd9Sstevel@tonic-gate 			return;
25477c478bd9Sstevel@tonic-gate 		}
25487c478bd9Sstevel@tonic-gate 		/*
25497c478bd9Sstevel@tonic-gate 		 * Validate the address and port to make sure that
25507c478bd9Sstevel@tonic-gate 		 * non privileged processes don't connect and start
25517c478bd9Sstevel@tonic-gate 		 * talking to us.
25527c478bd9Sstevel@tonic-gate 		 */
25537c478bd9Sstevel@tonic-gate 		peer_sin6 = (struct sockaddr_in6 *)&peer;
25547c478bd9Sstevel@tonic-gate 		if ((ntohs(peer_sin6->sin6_port) >= IPPORT_RESERVED) ||
25557c478bd9Sstevel@tonic-gate 		    (!IN6_ARE_ADDR_EQUAL(&peer_sin6->sin6_addr,
25567c478bd9Sstevel@tonic-gate 		    &loopback_addr))) {
25577c478bd9Sstevel@tonic-gate 			(void) inet_ntop(AF_INET6, &peer_sin6->sin6_addr, abuf,
25587c478bd9Sstevel@tonic-gate 			    sizeof (abuf));
25597c478bd9Sstevel@tonic-gate 			logerr("Attempt to connect from addr %s port %d\n",
25607c478bd9Sstevel@tonic-gate 			    abuf, ntohs(peer_sin6->sin6_port));
25617c478bd9Sstevel@tonic-gate 			(void) close(newfd);
25627c478bd9Sstevel@tonic-gate 			return;
25637c478bd9Sstevel@tonic-gate 		}
25647c478bd9Sstevel@tonic-gate 
25657c478bd9Sstevel@tonic-gate 	default:
25667c478bd9Sstevel@tonic-gate 		logdebug("loopback_cmd: family %d\n", family);
25677c478bd9Sstevel@tonic-gate 		(void) close(newfd);
25687c478bd9Sstevel@tonic-gate 		return;
25697c478bd9Sstevel@tonic-gate 	}
25707c478bd9Sstevel@tonic-gate 
25717c478bd9Sstevel@tonic-gate 	/*
25727c478bd9Sstevel@tonic-gate 	 * The sizeof the 'mpi' buffer corresponds to the maximum size of
25737c478bd9Sstevel@tonic-gate 	 * all supported commands
25747c478bd9Sstevel@tonic-gate 	 */
25757c478bd9Sstevel@tonic-gate 	len = read(newfd, &mpi, sizeof (mpi));
25767c478bd9Sstevel@tonic-gate 
25777c478bd9Sstevel@tonic-gate 	/*
25787c478bd9Sstevel@tonic-gate 	 * ifconfig does not send any data. Just tests to see if mpathd
25797c478bd9Sstevel@tonic-gate 	 * is already running.
25807c478bd9Sstevel@tonic-gate 	 */
25817c478bd9Sstevel@tonic-gate 	if (len <= 0) {
25827c478bd9Sstevel@tonic-gate 		(void) close(newfd);
25837c478bd9Sstevel@tonic-gate 		return;
25847c478bd9Sstevel@tonic-gate 	}
25857c478bd9Sstevel@tonic-gate 
25867c478bd9Sstevel@tonic-gate 	/*
25877c478bd9Sstevel@tonic-gate 	 * In theory, we can receive any sized message for a stream socket,
25887c478bd9Sstevel@tonic-gate 	 * but we don't expect that to happen for a small message over a
25897c478bd9Sstevel@tonic-gate 	 * loopback connection.
25907c478bd9Sstevel@tonic-gate 	 */
25917c478bd9Sstevel@tonic-gate 	if (len < sizeof (uint32_t)) {
25927c478bd9Sstevel@tonic-gate 		logerr("loopback_cmd: bad command format or read returns "
25937c478bd9Sstevel@tonic-gate 		    "partial data %d\n", len);
25947c478bd9Sstevel@tonic-gate 	}
25957c478bd9Sstevel@tonic-gate 
25967c478bd9Sstevel@tonic-gate 	cmd = mpi.mi_command;
25977c478bd9Sstevel@tonic-gate 	if (cmd >= MI_NCMD) {
25987c478bd9Sstevel@tonic-gate 		logerr("loopback_cmd: unknown command id `%d'\n", cmd);
25997c478bd9Sstevel@tonic-gate 		(void) close(newfd);
26007c478bd9Sstevel@tonic-gate 		return;
26017c478bd9Sstevel@tonic-gate 	}
26027c478bd9Sstevel@tonic-gate 
26037c478bd9Sstevel@tonic-gate 	if (len < commands[cmd].size) {
26047c478bd9Sstevel@tonic-gate 		logerr("loopback_cmd: short %s command (expected %d, got %d)\n",
26057c478bd9Sstevel@tonic-gate 		    commands[cmd].name, commands[cmd].size, len);
26067c478bd9Sstevel@tonic-gate 		(void) close(newfd);
26077c478bd9Sstevel@tonic-gate 		return;
26087c478bd9Sstevel@tonic-gate 	}
26097c478bd9Sstevel@tonic-gate 
26107c478bd9Sstevel@tonic-gate 	retval = process_cmd(newfd, &mpi);
26117c478bd9Sstevel@tonic-gate 	if (retval != IPMP_SUCCESS) {
26127c478bd9Sstevel@tonic-gate 		logerr("failed processing %s: %s\n", commands[cmd].name,
26137c478bd9Sstevel@tonic-gate 		    ipmp_errmsg(retval));
26147c478bd9Sstevel@tonic-gate 	}
26157c478bd9Sstevel@tonic-gate 	(void) close(newfd);
26167c478bd9Sstevel@tonic-gate }
26177c478bd9Sstevel@tonic-gate 
26187c478bd9Sstevel@tonic-gate extern int global_errno;	/* set by failover() or failback() */
26197c478bd9Sstevel@tonic-gate 
26207c478bd9Sstevel@tonic-gate /*
26217c478bd9Sstevel@tonic-gate  * Process the offline, undo offline and set original index commands,
26227c478bd9Sstevel@tonic-gate  * received from if_mpadm(1M)
26237c478bd9Sstevel@tonic-gate  */
26247c478bd9Sstevel@tonic-gate static unsigned int
26257c478bd9Sstevel@tonic-gate process_cmd(int newfd, union mi_commands *mpi)
26267c478bd9Sstevel@tonic-gate {
26277c478bd9Sstevel@tonic-gate 	uint_t	nif = 0;
26287c478bd9Sstevel@tonic-gate 	uint32_t cmd;
26297c478bd9Sstevel@tonic-gate 	struct phyint *pi;
26307c478bd9Sstevel@tonic-gate 	struct phyint *pi2;
26317c478bd9Sstevel@tonic-gate 	struct phyint_group *pg;
26327c478bd9Sstevel@tonic-gate 	boolean_t success;
26337c478bd9Sstevel@tonic-gate 	int error;
26347c478bd9Sstevel@tonic-gate 	struct mi_offline *mio;
26357c478bd9Sstevel@tonic-gate 	struct mi_undo_offline *miu;
26367c478bd9Sstevel@tonic-gate 	struct lifreq lifr;
26377c478bd9Sstevel@tonic-gate 	int ifsock;
26387c478bd9Sstevel@tonic-gate 	struct mi_setoindex *mis;
26397c478bd9Sstevel@tonic-gate 
26407c478bd9Sstevel@tonic-gate 	cmd = mpi->mi_command;
26417c478bd9Sstevel@tonic-gate 
26427c478bd9Sstevel@tonic-gate 	switch (cmd) {
26437c478bd9Sstevel@tonic-gate 	case MI_OFFLINE:
26447c478bd9Sstevel@tonic-gate 		mio = &mpi->mi_ocmd;
26457c478bd9Sstevel@tonic-gate 		/*
26467c478bd9Sstevel@tonic-gate 		 * Lookup the interface that needs to be offlined.
26477c478bd9Sstevel@tonic-gate 		 * If it does not exist, return a suitable error.
26487c478bd9Sstevel@tonic-gate 		 */
26497c478bd9Sstevel@tonic-gate 		pi = phyint_lookup(mio->mio_ifname);
26507c478bd9Sstevel@tonic-gate 		if (pi == NULL)
26517c478bd9Sstevel@tonic-gate 			return (send_result(newfd, IPMP_FAILURE, EINVAL));
26527c478bd9Sstevel@tonic-gate 
26537c478bd9Sstevel@tonic-gate 		/*
26547c478bd9Sstevel@tonic-gate 		 * Verify that the minimum redundancy requirements are met.
26557c478bd9Sstevel@tonic-gate 		 * The multipathing group must have at least the specified
26567c478bd9Sstevel@tonic-gate 		 * number of functional interfaces after offlining the
26577c478bd9Sstevel@tonic-gate 		 * requested interface. Otherwise return a suitable error.
26587c478bd9Sstevel@tonic-gate 		 */
26597c478bd9Sstevel@tonic-gate 		pg = pi->pi_group;
26607c478bd9Sstevel@tonic-gate 		nif = 0;
26617c478bd9Sstevel@tonic-gate 		if (pg != phyint_anongroup) {
26627c478bd9Sstevel@tonic-gate 			for (nif = 0, pi2 = pg->pg_phyint; pi2 != NULL;
26637c478bd9Sstevel@tonic-gate 			    pi2 = pi2->pi_pgnext) {
26647c478bd9Sstevel@tonic-gate 				if ((pi2->pi_state == PI_RUNNING) ||
26657c478bd9Sstevel@tonic-gate 				    (pg->pg_groupfailed &&
26667c478bd9Sstevel@tonic-gate 				    !(pi2->pi_flags & IFF_OFFLINE)))
26677c478bd9Sstevel@tonic-gate 					nif++;
26687c478bd9Sstevel@tonic-gate 			}
26697c478bd9Sstevel@tonic-gate 		}
26707c478bd9Sstevel@tonic-gate 		if (nif < mio->mio_min_redundancy)
26717c478bd9Sstevel@tonic-gate 			return (send_result(newfd, IPMP_EMINRED, 0));
26727c478bd9Sstevel@tonic-gate 
26737c478bd9Sstevel@tonic-gate 		/*
26747c478bd9Sstevel@tonic-gate 		 * The order of operation is to set IFF_OFFLINE, followed by
26757c478bd9Sstevel@tonic-gate 		 * failover. Setting IFF_OFFLINE ensures that no new ipif's
26767c478bd9Sstevel@tonic-gate 		 * can be created. Subsequent failover moves everything on
26777c478bd9Sstevel@tonic-gate 		 * the OFFLINE interface to some other functional interface.
26787c478bd9Sstevel@tonic-gate 		 */
26797c478bd9Sstevel@tonic-gate 		success = change_lif_flags(pi, IFF_OFFLINE, _B_TRUE);
26807c478bd9Sstevel@tonic-gate 		if (success) {
26817c478bd9Sstevel@tonic-gate 			if (!pi->pi_empty) {
26827c478bd9Sstevel@tonic-gate 				error = try_failover(pi, FAILOVER_NORMAL);
26837c478bd9Sstevel@tonic-gate 				if (error != 0) {
26847c478bd9Sstevel@tonic-gate 					if (!change_lif_flags(pi, IFF_OFFLINE,
26857c478bd9Sstevel@tonic-gate 					    _B_FALSE)) {
26867c478bd9Sstevel@tonic-gate 						logerr("process_cmd: couldn't"
26877c478bd9Sstevel@tonic-gate 						    " clear OFFLINE flag on"
26887c478bd9Sstevel@tonic-gate 						    " %s\n", pi->pi_name);
26897c478bd9Sstevel@tonic-gate 						/*
26907c478bd9Sstevel@tonic-gate 						 * Offline interfaces should
26917c478bd9Sstevel@tonic-gate 						 * not be probed.
26927c478bd9Sstevel@tonic-gate 						 */
26937c478bd9Sstevel@tonic-gate 						stop_probing(pi);
26947c478bd9Sstevel@tonic-gate 					}
26957c478bd9Sstevel@tonic-gate 					return (send_result(newfd, error,
26967c478bd9Sstevel@tonic-gate 					    global_errno));
26977c478bd9Sstevel@tonic-gate 				}
26987c478bd9Sstevel@tonic-gate 			}
26997c478bd9Sstevel@tonic-gate 		} else {
27007c478bd9Sstevel@tonic-gate 			return (send_result(newfd, IPMP_FAILURE, errno));
27017c478bd9Sstevel@tonic-gate 		}
27027c478bd9Sstevel@tonic-gate 
27037c478bd9Sstevel@tonic-gate 		/*
27047c478bd9Sstevel@tonic-gate 		 * The interface is now Offline, so stop probing it.
27057c478bd9Sstevel@tonic-gate 		 * Note that if_mpadm(1M) will down the test addresses,
27067c478bd9Sstevel@tonic-gate 		 * after receiving a success reply from us. The routing
27077c478bd9Sstevel@tonic-gate 		 * socket message will then make us close the socket used
27087c478bd9Sstevel@tonic-gate 		 * for sending probes. But it is more logical that an
27097c478bd9Sstevel@tonic-gate 		 * offlined interface must not be probed, even if it has
27107c478bd9Sstevel@tonic-gate 		 * test addresses.
27117c478bd9Sstevel@tonic-gate 		 */
27127c478bd9Sstevel@tonic-gate 		stop_probing(pi);
27137c478bd9Sstevel@tonic-gate 		return (send_result(newfd, IPMP_SUCCESS, 0));
27147c478bd9Sstevel@tonic-gate 
27157c478bd9Sstevel@tonic-gate 	case MI_UNDO_OFFLINE:
27167c478bd9Sstevel@tonic-gate 		miu = &mpi->mi_ucmd;
27177c478bd9Sstevel@tonic-gate 		/*
27187c478bd9Sstevel@tonic-gate 		 * Undo the offline command. As usual lookup the interface.
27197c478bd9Sstevel@tonic-gate 		 * Send an error if it does not exist.
27207c478bd9Sstevel@tonic-gate 		 */
27217c478bd9Sstevel@tonic-gate 		pi = phyint_lookup(miu->miu_ifname);
27227c478bd9Sstevel@tonic-gate 		if (pi == NULL)
27237c478bd9Sstevel@tonic-gate 			return (send_result(newfd, IPMP_FAILURE, EINVAL));
27247c478bd9Sstevel@tonic-gate 
27257c478bd9Sstevel@tonic-gate 		/*
27267c478bd9Sstevel@tonic-gate 		 * Inverse of the offline operation. Do a failback, and then
27277c478bd9Sstevel@tonic-gate 		 * clear the IFF_OFFLINE flag.
27287c478bd9Sstevel@tonic-gate 		 */
27297c478bd9Sstevel@tonic-gate 		error = do_failback(pi, _B_TRUE);
27307c478bd9Sstevel@tonic-gate 		if (error == IPMP_EFBPARTIAL)
27317c478bd9Sstevel@tonic-gate 			return (send_result(newfd, IPMP_EFBPARTIAL, 0));
27327c478bd9Sstevel@tonic-gate 		error = do_failback(pi, _B_FALSE);
27337c478bd9Sstevel@tonic-gate 
27347c478bd9Sstevel@tonic-gate 		switch (error) {
27357c478bd9Sstevel@tonic-gate 		case IPMP_SUCCESS:
27367c478bd9Sstevel@tonic-gate 			if (!change_lif_flags(pi, IFF_OFFLINE, _B_FALSE)) {
27377c478bd9Sstevel@tonic-gate 				logdebug("undo error %X\n", global_errno);
27387c478bd9Sstevel@tonic-gate 				error = IPMP_FAILURE;
27397c478bd9Sstevel@tonic-gate 				break;
27407c478bd9Sstevel@tonic-gate 			}
27417c478bd9Sstevel@tonic-gate 			/* FALLTHROUGH */
27427c478bd9Sstevel@tonic-gate 
27437c478bd9Sstevel@tonic-gate 		case IPMP_EFBPARTIAL:
27447c478bd9Sstevel@tonic-gate 			/*
27457c478bd9Sstevel@tonic-gate 			 * Reset the state of the interface based on the
27467c478bd9Sstevel@tonic-gate 			 * current link state; if this phyint subsequently
27477c478bd9Sstevel@tonic-gate 			 * acquires a test address, the state will be changed
27487c478bd9Sstevel@tonic-gate 			 * again later as a result of the probes.
27497c478bd9Sstevel@tonic-gate 			 */
27507c478bd9Sstevel@tonic-gate 			if (LINK_UP(pi))
27517c478bd9Sstevel@tonic-gate 				phyint_chstate(pi, PI_RUNNING);
27527c478bd9Sstevel@tonic-gate 			else
27537c478bd9Sstevel@tonic-gate 				phyint_chstate(pi, PI_FAILED);
27547c478bd9Sstevel@tonic-gate 			break;
27557c478bd9Sstevel@tonic-gate 
27567c478bd9Sstevel@tonic-gate 		case IPMP_FAILURE:
27577c478bd9Sstevel@tonic-gate 			break;
27587c478bd9Sstevel@tonic-gate 
27597c478bd9Sstevel@tonic-gate 		default:
27607c478bd9Sstevel@tonic-gate 			logdebug("do_failback: unexpected return value\n");
27617c478bd9Sstevel@tonic-gate 			break;
27627c478bd9Sstevel@tonic-gate 		}
27637c478bd9Sstevel@tonic-gate 		return (send_result(newfd, error, global_errno));
27647c478bd9Sstevel@tonic-gate 
27657c478bd9Sstevel@tonic-gate 	case MI_SETOINDEX:
27667c478bd9Sstevel@tonic-gate 		mis = &mpi->mi_scmd;
27677c478bd9Sstevel@tonic-gate 
27687c478bd9Sstevel@tonic-gate 		/* Get the socket for doing ioctls */
27697c478bd9Sstevel@tonic-gate 		ifsock = (mis->mis_iftype == AF_INET) ? ifsock_v4 : ifsock_v6;
27707c478bd9Sstevel@tonic-gate 
27717c478bd9Sstevel@tonic-gate 		/*
27727c478bd9Sstevel@tonic-gate 		 * Get index of new original interface.
27737c478bd9Sstevel@tonic-gate 		 * The index is returned in lifr.lifr_index.
27747c478bd9Sstevel@tonic-gate 		 */
27757c478bd9Sstevel@tonic-gate 		(void) strlcpy(lifr.lifr_name, mis->mis_new_pifname,
27767c478bd9Sstevel@tonic-gate 		    sizeof (lifr.lifr_name));
27777c478bd9Sstevel@tonic-gate 
27787c478bd9Sstevel@tonic-gate 		if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0)
27797c478bd9Sstevel@tonic-gate 			return (send_result(newfd, IPMP_FAILURE, errno));
27807c478bd9Sstevel@tonic-gate 
27817c478bd9Sstevel@tonic-gate 		/*
27827c478bd9Sstevel@tonic-gate 		 * Set new original interface index.
27837c478bd9Sstevel@tonic-gate 		 * The new index was put into lifr.lifr_index by the
27847c478bd9Sstevel@tonic-gate 		 * SIOCGLIFINDEX ioctl.
27857c478bd9Sstevel@tonic-gate 		 */
27867c478bd9Sstevel@tonic-gate 		(void) strlcpy(lifr.lifr_name, mis->mis_lifname,
27877c478bd9Sstevel@tonic-gate 		    sizeof (lifr.lifr_name));
27887c478bd9Sstevel@tonic-gate 
27897c478bd9Sstevel@tonic-gate 		if (ioctl(ifsock, SIOCSLIFOINDEX, (char *)&lifr) < 0)
27907c478bd9Sstevel@tonic-gate 			return (send_result(newfd, IPMP_FAILURE, errno));
27917c478bd9Sstevel@tonic-gate 
27927c478bd9Sstevel@tonic-gate 		return (send_result(newfd, IPMP_SUCCESS, 0));
27937c478bd9Sstevel@tonic-gate 
27947c478bd9Sstevel@tonic-gate 	case MI_QUERY:
27957c478bd9Sstevel@tonic-gate 		return (process_query(newfd, &mpi->mi_qcmd));
27967c478bd9Sstevel@tonic-gate 
27977c478bd9Sstevel@tonic-gate 	default:
27987c478bd9Sstevel@tonic-gate 		break;
27997c478bd9Sstevel@tonic-gate 	}
28007c478bd9Sstevel@tonic-gate 
28017c478bd9Sstevel@tonic-gate 	return (send_result(newfd, IPMP_EPROTO, 0));
28027c478bd9Sstevel@tonic-gate }
28037c478bd9Sstevel@tonic-gate 
28047c478bd9Sstevel@tonic-gate /*
28057c478bd9Sstevel@tonic-gate  * Process the query request pointed to by `miq' and send a reply on file
28067c478bd9Sstevel@tonic-gate  * descriptor `fd'.  Returns an IPMP error code.
28077c478bd9Sstevel@tonic-gate  */
28087c478bd9Sstevel@tonic-gate static unsigned int
28097c478bd9Sstevel@tonic-gate process_query(int fd, mi_query_t *miq)
28107c478bd9Sstevel@tonic-gate {
28117c478bd9Sstevel@tonic-gate 	ipmp_groupinfo_t	*grinfop;
28127c478bd9Sstevel@tonic-gate 	ipmp_groupinfolist_t	*grlp;
28137c478bd9Sstevel@tonic-gate 	ipmp_grouplist_t	*grlistp;
28147c478bd9Sstevel@tonic-gate 	ipmp_ifinfo_t		*ifinfop;
28157c478bd9Sstevel@tonic-gate 	ipmp_ifinfolist_t	*iflp;
28167c478bd9Sstevel@tonic-gate 	ipmp_snap_t		*snap;
28177c478bd9Sstevel@tonic-gate 	unsigned int		retval;
28187c478bd9Sstevel@tonic-gate 
28197c478bd9Sstevel@tonic-gate 	switch (miq->miq_inforeq) {
28207c478bd9Sstevel@tonic-gate 	case IPMP_GROUPLIST:
28217c478bd9Sstevel@tonic-gate 		retval = getgrouplist(&grlistp);
28227c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28237c478bd9Sstevel@tonic-gate 			return (send_result(fd, retval, errno));
28247c478bd9Sstevel@tonic-gate 
28257c478bd9Sstevel@tonic-gate 		retval = send_result(fd, IPMP_SUCCESS, 0);
28267c478bd9Sstevel@tonic-gate 		if (retval == IPMP_SUCCESS)
28277c478bd9Sstevel@tonic-gate 			retval = send_grouplist(fd, grlistp);
28287c478bd9Sstevel@tonic-gate 
28297c478bd9Sstevel@tonic-gate 		ipmp_freegrouplist(grlistp);
28307c478bd9Sstevel@tonic-gate 		return (retval);
28317c478bd9Sstevel@tonic-gate 
28327c478bd9Sstevel@tonic-gate 	case IPMP_GROUPINFO:
28337c478bd9Sstevel@tonic-gate 		miq->miq_grname[LIFGRNAMSIZ - 1] = '\0';
28347c478bd9Sstevel@tonic-gate 		retval = getgroupinfo(miq->miq_ifname, &grinfop);
28357c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28367c478bd9Sstevel@tonic-gate 			return (send_result(fd, retval, errno));
28377c478bd9Sstevel@tonic-gate 
28387c478bd9Sstevel@tonic-gate 		retval = send_result(fd, IPMP_SUCCESS, 0);
28397c478bd9Sstevel@tonic-gate 		if (retval == IPMP_SUCCESS)
28407c478bd9Sstevel@tonic-gate 			retval = send_groupinfo(fd, grinfop);
28417c478bd9Sstevel@tonic-gate 
28427c478bd9Sstevel@tonic-gate 		ipmp_freegroupinfo(grinfop);
28437c478bd9Sstevel@tonic-gate 		return (retval);
28447c478bd9Sstevel@tonic-gate 
28457c478bd9Sstevel@tonic-gate 	case IPMP_IFINFO:
28467c478bd9Sstevel@tonic-gate 		miq->miq_ifname[LIFNAMSIZ - 1] = '\0';
28477c478bd9Sstevel@tonic-gate 		retval = getifinfo(miq->miq_ifname, &ifinfop);
28487c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28497c478bd9Sstevel@tonic-gate 			return (send_result(fd, retval, errno));
28507c478bd9Sstevel@tonic-gate 
28517c478bd9Sstevel@tonic-gate 		retval = send_result(fd, IPMP_SUCCESS, 0);
28527c478bd9Sstevel@tonic-gate 		if (retval == IPMP_SUCCESS)
28537c478bd9Sstevel@tonic-gate 			retval = send_ifinfo(fd, ifinfop);
28547c478bd9Sstevel@tonic-gate 
28557c478bd9Sstevel@tonic-gate 		ipmp_freeifinfo(ifinfop);
28567c478bd9Sstevel@tonic-gate 		return (retval);
28577c478bd9Sstevel@tonic-gate 
28587c478bd9Sstevel@tonic-gate 	case IPMP_SNAP:
28597c478bd9Sstevel@tonic-gate 		retval = getsnap(&snap);
28607c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28617c478bd9Sstevel@tonic-gate 			return (send_result(fd, retval, errno));
28627c478bd9Sstevel@tonic-gate 
28637c478bd9Sstevel@tonic-gate 		retval = send_result(fd, IPMP_SUCCESS, 0);
28647c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28657c478bd9Sstevel@tonic-gate 			goto out;
28667c478bd9Sstevel@tonic-gate 
28677c478bd9Sstevel@tonic-gate 		retval = ipmp_writetlv(fd, IPMP_SNAP, sizeof (*snap), snap);
28687c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28697c478bd9Sstevel@tonic-gate 			goto out;
28707c478bd9Sstevel@tonic-gate 
28717c478bd9Sstevel@tonic-gate 		retval = send_grouplist(fd, snap->sn_grlistp);
28727c478bd9Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28737c478bd9Sstevel@tonic-gate 			goto out;
28747c478bd9Sstevel@tonic-gate 
28757c478bd9Sstevel@tonic-gate 		iflp = snap->sn_ifinfolistp;
28767c478bd9Sstevel@tonic-gate 		for (; iflp != NULL; iflp = iflp->ifl_next) {
28777c478bd9Sstevel@tonic-gate 			retval = send_ifinfo(fd, iflp->ifl_ifinfop);
28787c478bd9Sstevel@tonic-gate 			if (retval != IPMP_SUCCESS)
28797c478bd9Sstevel@tonic-gate 				goto out;
28807c478bd9Sstevel@tonic-gate 		}
28817c478bd9Sstevel@tonic-gate 
28827c478bd9Sstevel@tonic-gate 		grlp = snap->sn_grinfolistp;
28837c478bd9Sstevel@tonic-gate 		for (; grlp != NULL; grlp = grlp->grl_next) {
28847c478bd9Sstevel@tonic-gate 			retval = send_groupinfo(fd, grlp->grl_grinfop);
28857c478bd9Sstevel@tonic-gate 			if (retval != IPMP_SUCCESS)
28867c478bd9Sstevel@tonic-gate 				goto out;
28877c478bd9Sstevel@tonic-gate 		}
28887c478bd9Sstevel@tonic-gate 	out:
28897c478bd9Sstevel@tonic-gate 		ipmp_snap_free(snap);
28907c478bd9Sstevel@tonic-gate 		return (retval);
28917c478bd9Sstevel@tonic-gate 
28927c478bd9Sstevel@tonic-gate 	default:
28937c478bd9Sstevel@tonic-gate 		break;
28947c478bd9Sstevel@tonic-gate 
28957c478bd9Sstevel@tonic-gate 	}
28967c478bd9Sstevel@tonic-gate 	return (send_result(fd, IPMP_EPROTO, 0));
28977c478bd9Sstevel@tonic-gate }
28987c478bd9Sstevel@tonic-gate 
28997c478bd9Sstevel@tonic-gate /*
29007c478bd9Sstevel@tonic-gate  * Send the group information pointed to by `grinfop' on file descriptor `fd'.
29017c478bd9Sstevel@tonic-gate  * Returns an IPMP error code.
29027c478bd9Sstevel@tonic-gate  */
29037c478bd9Sstevel@tonic-gate static unsigned int
29047c478bd9Sstevel@tonic-gate send_groupinfo(int fd, ipmp_groupinfo_t *grinfop)
29057c478bd9Sstevel@tonic-gate {
29067c478bd9Sstevel@tonic-gate 	ipmp_iflist_t	*iflistp = grinfop->gr_iflistp;
29077c478bd9Sstevel@tonic-gate 	unsigned int	retval;
29087c478bd9Sstevel@tonic-gate 
29097c478bd9Sstevel@tonic-gate 	retval = ipmp_writetlv(fd, IPMP_GROUPINFO, sizeof (*grinfop), grinfop);
29107c478bd9Sstevel@tonic-gate 	if (retval != IPMP_SUCCESS)
29117c478bd9Sstevel@tonic-gate 		return (retval);
29127c478bd9Sstevel@tonic-gate 
29137c478bd9Sstevel@tonic-gate 	return (ipmp_writetlv(fd, IPMP_IFLIST,
29147c478bd9Sstevel@tonic-gate 	    IPMP_IFLIST_SIZE(iflistp->il_nif), iflistp));
29157c478bd9Sstevel@tonic-gate }
29167c478bd9Sstevel@tonic-gate 
29177c478bd9Sstevel@tonic-gate /*
29187c478bd9Sstevel@tonic-gate  * Send the interface information pointed to by `ifinfop' on file descriptor
29197c478bd9Sstevel@tonic-gate  * `fd'.  Returns an IPMP error code.
29207c478bd9Sstevel@tonic-gate  */
29217c478bd9Sstevel@tonic-gate static unsigned int
29227c478bd9Sstevel@tonic-gate send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop)
29237c478bd9Sstevel@tonic-gate {
29247c478bd9Sstevel@tonic-gate 	return (ipmp_writetlv(fd, IPMP_IFINFO, sizeof (*ifinfop), ifinfop));
29257c478bd9Sstevel@tonic-gate }
29267c478bd9Sstevel@tonic-gate 
29277c478bd9Sstevel@tonic-gate /*
29287c478bd9Sstevel@tonic-gate  * Send the group list pointed to by `grlistp' on file descriptor `fd'.
29297c478bd9Sstevel@tonic-gate  * Returns an IPMP error code.
29307c478bd9Sstevel@tonic-gate  */
29317c478bd9Sstevel@tonic-gate static unsigned int
29327c478bd9Sstevel@tonic-gate send_grouplist(int fd, ipmp_grouplist_t *grlistp)
29337c478bd9Sstevel@tonic-gate {
29347c478bd9Sstevel@tonic-gate 	return (ipmp_writetlv(fd, IPMP_GROUPLIST,
29357c478bd9Sstevel@tonic-gate 	    IPMP_GROUPLIST_SIZE(grlistp->gl_ngroup), grlistp));
29367c478bd9Sstevel@tonic-gate }
29377c478bd9Sstevel@tonic-gate 
29387c478bd9Sstevel@tonic-gate /*
29397c478bd9Sstevel@tonic-gate  * Initialize an mi_result_t structure using `error' and `syserror' and
29407c478bd9Sstevel@tonic-gate  * send it on file descriptor `fd'.  Returns an IPMP error code.
29417c478bd9Sstevel@tonic-gate  */
29427c478bd9Sstevel@tonic-gate static unsigned int
29437c478bd9Sstevel@tonic-gate send_result(int fd, unsigned int error, int syserror)
29447c478bd9Sstevel@tonic-gate {
29457c478bd9Sstevel@tonic-gate 	mi_result_t me;
29467c478bd9Sstevel@tonic-gate 
29477c478bd9Sstevel@tonic-gate 	me.me_mpathd_error = error;
29487c478bd9Sstevel@tonic-gate 	if (error == IPMP_FAILURE)
29497c478bd9Sstevel@tonic-gate 		me.me_sys_error = syserror;
29507c478bd9Sstevel@tonic-gate 	else
29517c478bd9Sstevel@tonic-gate 		me.me_sys_error = 0;
29527c478bd9Sstevel@tonic-gate 
29537c478bd9Sstevel@tonic-gate 	return (ipmp_write(fd, &me, sizeof (me)));
29547c478bd9Sstevel@tonic-gate }
29557c478bd9Sstevel@tonic-gate 
29567c478bd9Sstevel@tonic-gate /*
29577c478bd9Sstevel@tonic-gate  * Daemonize the process.
29587c478bd9Sstevel@tonic-gate  */
29597c478bd9Sstevel@tonic-gate static boolean_t
29607c478bd9Sstevel@tonic-gate daemonize(void)
29617c478bd9Sstevel@tonic-gate {
29627c478bd9Sstevel@tonic-gate 	switch (fork()) {
29637c478bd9Sstevel@tonic-gate 	case -1:
29647c478bd9Sstevel@tonic-gate 		return (_B_FALSE);
29657c478bd9Sstevel@tonic-gate 
29667c478bd9Sstevel@tonic-gate 	case  0:
29677c478bd9Sstevel@tonic-gate 		/*
29687c478bd9Sstevel@tonic-gate 		 * Lose our controlling terminal, and become both a session
29697c478bd9Sstevel@tonic-gate 		 * leader and a process group leader.
29707c478bd9Sstevel@tonic-gate 		 */
29717c478bd9Sstevel@tonic-gate 		if (setsid() == -1)
29727c478bd9Sstevel@tonic-gate 			return (_B_FALSE);
29737c478bd9Sstevel@tonic-gate 
29747c478bd9Sstevel@tonic-gate 		/*
29757c478bd9Sstevel@tonic-gate 		 * Under POSIX, a session leader can accidentally (through
29767c478bd9Sstevel@tonic-gate 		 * open(2)) acquire a controlling terminal if it does not
29777c478bd9Sstevel@tonic-gate 		 * have one.  Just to be safe, fork() again so we are not a
29787c478bd9Sstevel@tonic-gate 		 * session leader.
29797c478bd9Sstevel@tonic-gate 		 */
29807c478bd9Sstevel@tonic-gate 		switch (fork()) {
29817c478bd9Sstevel@tonic-gate 		case -1:
29827c478bd9Sstevel@tonic-gate 			return (_B_FALSE);
29837c478bd9Sstevel@tonic-gate 
29847c478bd9Sstevel@tonic-gate 		case 0:
29857c478bd9Sstevel@tonic-gate 			(void) chdir("/");
29867c478bd9Sstevel@tonic-gate 			(void) umask(022);
29877c478bd9Sstevel@tonic-gate 			(void) fdwalk(closefunc, NULL);
29887c478bd9Sstevel@tonic-gate 			break;
29897c478bd9Sstevel@tonic-gate 
29907c478bd9Sstevel@tonic-gate 		default:
29917c478bd9Sstevel@tonic-gate 			_exit(EXIT_SUCCESS);
29927c478bd9Sstevel@tonic-gate 		}
29937c478bd9Sstevel@tonic-gate 		break;
29947c478bd9Sstevel@tonic-gate 
29957c478bd9Sstevel@tonic-gate 	default:
29967c478bd9Sstevel@tonic-gate 		_exit(EXIT_SUCCESS);
29977c478bd9Sstevel@tonic-gate 	}
29987c478bd9Sstevel@tonic-gate 
29997c478bd9Sstevel@tonic-gate 	return (_B_TRUE);
30007c478bd9Sstevel@tonic-gate }
30017c478bd9Sstevel@tonic-gate 
30027c478bd9Sstevel@tonic-gate /*
30037c478bd9Sstevel@tonic-gate  * The parent has created some fds before forking on purpose, keep them open.
30047c478bd9Sstevel@tonic-gate  */
30057c478bd9Sstevel@tonic-gate static int
30067c478bd9Sstevel@tonic-gate closefunc(void *not_used, int fd)
30077c478bd9Sstevel@tonic-gate /* ARGSUSED */
30087c478bd9Sstevel@tonic-gate {
30097c478bd9Sstevel@tonic-gate 	if (fd != lsock_v4 && fd != lsock_v6)
30107c478bd9Sstevel@tonic-gate 		(void) close(fd);
30117c478bd9Sstevel@tonic-gate 	return (0);
30127c478bd9Sstevel@tonic-gate }
30137c478bd9Sstevel@tonic-gate 
30147c478bd9Sstevel@tonic-gate /* LOGGER */
30157c478bd9Sstevel@tonic-gate 
30167c478bd9Sstevel@tonic-gate #include <syslog.h>
30177c478bd9Sstevel@tonic-gate 
30187c478bd9Sstevel@tonic-gate /*
30197c478bd9Sstevel@tonic-gate  * Logging routines.  All routines log to syslog, unless the daemon is
30207c478bd9Sstevel@tonic-gate  * running in the foreground, in which case the logging goes to stderr.
30217c478bd9Sstevel@tonic-gate  *
30227c478bd9Sstevel@tonic-gate  * The following routines are available:
30237c478bd9Sstevel@tonic-gate  *
30247c478bd9Sstevel@tonic-gate  *	logdebug(): A printf-like function for outputting debug messages
30257c478bd9Sstevel@tonic-gate  *	(messages at LOG_DEBUG) that are only of use to developers.
30267c478bd9Sstevel@tonic-gate  *
30277c478bd9Sstevel@tonic-gate  *	logtrace(): A printf-like function for outputting tracing messages
30287c478bd9Sstevel@tonic-gate  *	(messages at LOG_INFO) from the daemon.	 This is typically used
30297c478bd9Sstevel@tonic-gate  *	to log the receipt of interesting network-related conditions.
30307c478bd9Sstevel@tonic-gate  *
30317c478bd9Sstevel@tonic-gate  *	logerr(): A printf-like function for outputting error messages
30327c478bd9Sstevel@tonic-gate  *	(messages at LOG_ERR) from the daemon.
30337c478bd9Sstevel@tonic-gate  *
30347c478bd9Sstevel@tonic-gate  *	logperror*(): A set of functions used to output error messages
30357c478bd9Sstevel@tonic-gate  *	(messages at LOG_ERR); these automatically append strerror(errno)
30367c478bd9Sstevel@tonic-gate  *	and a newline to the message passed to them.
30377c478bd9Sstevel@tonic-gate  *
30387c478bd9Sstevel@tonic-gate  * NOTE: since the logging functions write to syslog, the messages passed
30397c478bd9Sstevel@tonic-gate  *	 to them are not eligible for localization.  Thus, gettext() must
30407c478bd9Sstevel@tonic-gate  *	 *not* be used.
30417c478bd9Sstevel@tonic-gate  */
30427c478bd9Sstevel@tonic-gate 
30437c478bd9Sstevel@tonic-gate static int logging = 0;
30447c478bd9Sstevel@tonic-gate 
30457c478bd9Sstevel@tonic-gate static void
30467c478bd9Sstevel@tonic-gate initlog(void)
30477c478bd9Sstevel@tonic-gate {
30487c478bd9Sstevel@tonic-gate 	logging++;
30497c478bd9Sstevel@tonic-gate 	openlog("in.mpathd", LOG_PID | LOG_CONS, LOG_DAEMON);
30507c478bd9Sstevel@tonic-gate }
30517c478bd9Sstevel@tonic-gate 
30527c478bd9Sstevel@tonic-gate /* PRINTFLIKE1 */
30537c478bd9Sstevel@tonic-gate void
30547c478bd9Sstevel@tonic-gate logerr(char *fmt, ...)
30557c478bd9Sstevel@tonic-gate {
30567c478bd9Sstevel@tonic-gate 	va_list ap;
30577c478bd9Sstevel@tonic-gate 
30587c478bd9Sstevel@tonic-gate 	va_start(ap, fmt);
30597c478bd9Sstevel@tonic-gate 
30607c478bd9Sstevel@tonic-gate 	if (logging)
30617c478bd9Sstevel@tonic-gate 		vsyslog(LOG_ERR, fmt, ap);
30627c478bd9Sstevel@tonic-gate 	else
30637c478bd9Sstevel@tonic-gate 		(void) vfprintf(stderr, fmt, ap);
30647c478bd9Sstevel@tonic-gate 	va_end(ap);
30657c478bd9Sstevel@tonic-gate }
30667c478bd9Sstevel@tonic-gate 
30677c478bd9Sstevel@tonic-gate /* PRINTFLIKE1 */
30687c478bd9Sstevel@tonic-gate void
30697c478bd9Sstevel@tonic-gate logtrace(char *fmt, ...)
30707c478bd9Sstevel@tonic-gate {
30717c478bd9Sstevel@tonic-gate 	va_list ap;
30727c478bd9Sstevel@tonic-gate 
30737c478bd9Sstevel@tonic-gate 	va_start(ap, fmt);
30747c478bd9Sstevel@tonic-gate 
30757c478bd9Sstevel@tonic-gate 	if (logging)
30767c478bd9Sstevel@tonic-gate 		vsyslog(LOG_INFO, fmt, ap);
30777c478bd9Sstevel@tonic-gate 	else
30787c478bd9Sstevel@tonic-gate 		(void) vfprintf(stderr, fmt, ap);
30797c478bd9Sstevel@tonic-gate 	va_end(ap);
30807c478bd9Sstevel@tonic-gate }
30817c478bd9Sstevel@tonic-gate 
30827c478bd9Sstevel@tonic-gate /* PRINTFLIKE1 */
30837c478bd9Sstevel@tonic-gate void
30847c478bd9Sstevel@tonic-gate logdebug(char *fmt, ...)
30857c478bd9Sstevel@tonic-gate {
30867c478bd9Sstevel@tonic-gate 	va_list ap;
30877c478bd9Sstevel@tonic-gate 
30887c478bd9Sstevel@tonic-gate 	va_start(ap, fmt);
30897c478bd9Sstevel@tonic-gate 
30907c478bd9Sstevel@tonic-gate 	if (logging)
30917c478bd9Sstevel@tonic-gate 		vsyslog(LOG_DEBUG, fmt, ap);
30927c478bd9Sstevel@tonic-gate 	else
30937c478bd9Sstevel@tonic-gate 		(void) vfprintf(stderr, fmt, ap);
30947c478bd9Sstevel@tonic-gate 	va_end(ap);
30957c478bd9Sstevel@tonic-gate }
30967c478bd9Sstevel@tonic-gate 
30977c478bd9Sstevel@tonic-gate /* PRINTFLIKE1 */
30987c478bd9Sstevel@tonic-gate void
30997c478bd9Sstevel@tonic-gate logperror(char *str)
31007c478bd9Sstevel@tonic-gate {
31017c478bd9Sstevel@tonic-gate 	if (logging)
31027c478bd9Sstevel@tonic-gate 		syslog(LOG_ERR, "%s: %m\n", str);
31037c478bd9Sstevel@tonic-gate 	else
31047c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "%s: %s\n", str, strerror(errno));
31057c478bd9Sstevel@tonic-gate }
31067c478bd9Sstevel@tonic-gate 
31077c478bd9Sstevel@tonic-gate void
31087c478bd9Sstevel@tonic-gate logperror_pii(struct phyint_instance *pii, char *str)
31097c478bd9Sstevel@tonic-gate {
31107c478bd9Sstevel@tonic-gate 	if (logging) {
31117c478bd9Sstevel@tonic-gate 		syslog(LOG_ERR, "%s (%s %s): %m\n",
31127c478bd9Sstevel@tonic-gate 		    str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name);
31137c478bd9Sstevel@tonic-gate 	} else {
31147c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "%s (%s %s): %s\n",
31157c478bd9Sstevel@tonic-gate 		    str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name,
31167c478bd9Sstevel@tonic-gate 		    strerror(errno));
31177c478bd9Sstevel@tonic-gate 	}
31187c478bd9Sstevel@tonic-gate }
31197c478bd9Sstevel@tonic-gate 
31207c478bd9Sstevel@tonic-gate void
31217c478bd9Sstevel@tonic-gate logperror_li(struct logint *li, char *str)
31227c478bd9Sstevel@tonic-gate {
31237c478bd9Sstevel@tonic-gate 	struct	phyint_instance	*pii = li->li_phyint_inst;
31247c478bd9Sstevel@tonic-gate 
31257c478bd9Sstevel@tonic-gate 	if (logging) {
31267c478bd9Sstevel@tonic-gate 		syslog(LOG_ERR, "%s (%s %s): %m\n",
31277c478bd9Sstevel@tonic-gate 		    str, AF_STR(pii->pii_af), li->li_name);
31287c478bd9Sstevel@tonic-gate 	} else {
31297c478bd9Sstevel@tonic-gate 		(void) fprintf(stderr, "%s (%s %s): %s\n",
31307c478bd9Sstevel@tonic-gate 		    str, AF_STR(pii->pii_af), li->li_name,
31317c478bd9Sstevel@tonic-gate 		    strerror(errno));
31327c478bd9Sstevel@tonic-gate 	}
31337c478bd9Sstevel@tonic-gate }
31347c478bd9Sstevel@tonic-gate 
31357c478bd9Sstevel@tonic-gate void
31367c478bd9Sstevel@tonic-gate close_probe_socket(struct phyint_instance *pii, boolean_t polled)
31377c478bd9Sstevel@tonic-gate {
31387c478bd9Sstevel@tonic-gate 	if (polled)
31397c478bd9Sstevel@tonic-gate 		(void) poll_remove(pii->pii_probe_sock);
31407c478bd9Sstevel@tonic-gate 	(void) close(pii->pii_probe_sock);
31417c478bd9Sstevel@tonic-gate 	pii->pii_probe_sock = -1;
31427c478bd9Sstevel@tonic-gate 	pii->pii_basetime_inited = 0;
31437c478bd9Sstevel@tonic-gate }
3144