1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/socket.h>
29 #include <sys/sockio.h>
30 #include <sys/sysevent/vrrp.h>
31 #include <sys/sysevent/eventdefs.h>
32 #include <sys/varargs.h>
33 #include <auth_attr.h>
34 #include <ctype.h>
35 #include <fcntl.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <errno.h>
39 #include <unistd.h>
40 #include <zone.h>
41 #include <libsysevent.h>
42 #include <limits.h>
43 #include <locale.h>
44 #include <inetcfg.h>
45 #include <arpa/inet.h>
46 #include <signal.h>
47 #include <assert.h>
48 #include <ucred.h>
49 #include <bsm/adt.h>
50 #include <bsm/adt_event.h>
51 #include <priv_utils.h>
52 #include <libdllink.h>
53 #include <libdlvnic.h>
54 #include <pwd.h>
55 #include <libvrrpadm.h>
56 #include <net/route.h>
57 #include "vrrpd_impl.h"
58 
59 /*
60  * A VRRP router can be only start participating the VRRP protocol of a virtual
61  * router when all the following conditions are met:
62  *
63  * - The VRRP router is enabled (vr->vvr_conf.vvc_enabled is _B_TRUE)
64  * - The RX socket is successfully created over the physical interface to
65  *   receive the VRRP multicast advertisement. Note that one RX socket can
66  *   be shared by several VRRP routers configured over the same physical
67  *   interface. (See vrrpd_init_rxsock())
68  * - The TX socket is successfully created over the VNIC interface to send
69  *   the VRRP advertisment. (See vrrpd_init_txsock())
70  * - The primary IP address has been successfully selected over the physical
71  *   interface. (See vrrpd_select_primary())
72  *
73  * If a VRRP router is enabled but the other conditions haven't be satisfied,
74  * the router will be stay at the VRRP_STATE_INIT state. If all the above
75  * conditions are met, the VRRP router will be transit to either
76  * the VRRP_STATE_MASTER or the VRRP_STATE_BACKUP state, depends on the VRRP
77  * protocol.
78  */
79 
80 #define	skip_whitespace(p)	while (isspace(*(p))) ++(p)
81 
82 #define	BUFFSIZE	65536
83 
84 #define	VRRPCONF	"/etc/inet/vrrp.conf"
85 
86 typedef struct vrrpd_rtsock_s {
87 	int		vrt_af;		/* address family */
88 	int		vrt_fd;		/* socket for the PF_ROUTE msg */
89 	iu_event_id_t	vrt_eid;	/* event ID */
90 } vrrpd_rtsock_t;
91 
92 static int		vrrp_logflag = 0;
93 boolean_t		vrrp_debug_level = 0;
94 iu_eh_t			*vrrpd_eh = NULL;
95 iu_tq_t			*vrrpd_timerq = NULL;
96 static vrrp_handle_t	vrrpd_vh = NULL;
97 static int		vrrpd_cmdsock_fd = -1;	/* socket to communicate */
98 						/* between vrrpd/libvrrpadm */
99 static iu_event_id_t	vrrpd_cmdsock_eid = -1;
100 static int		vrrpd_ctlsock_fd = -1;	/* socket to bring up/down */
101 						/* the virtual IP addresses */
102 static int		vrrpd_ctlsock6_fd = -1;
103 static vrrpd_rtsock_t	vrrpd_rtsocks[2] = {
104 	{AF_INET, -1, -1},
105 	{AF_INET6, -1, -1}
106 };
107 static iu_timer_id_t	vrrp_scan_timer_id = -1;
108 
109 TAILQ_HEAD(vrrp_vr_list_s, vrrp_vr_s);
110 TAILQ_HEAD(vrrp_intf_list_s, vrrp_intf_s);
111 static struct vrrp_vr_list_s	vrrp_vr_list;
112 static struct vrrp_intf_list_s	vrrp_intf_list;
113 static char		vrrpd_conffile[MAXPATHLEN];
114 
115 /*
116  * Multicast address of VRRP advertisement in network byte order
117  */
118 static vrrp_addr_t	vrrp_muladdr4;
119 static vrrp_addr_t	vrrp_muladdr6;
120 
121 static int		vrrpd_scan_interval = 20000;	/* ms */
122 
123 /*
124  * macros to calculate skew_time and master_down_timer
125  *
126  * Note that the input is in centisecs and output are in msecs
127  */
128 #define	SKEW_TIME(pri, intv)	((intv) * (256 - (pri)) / 256)
129 #define	MASTER_DOWN_INTERVAL(pri, intv)	(3 * (intv) + SKEW_TIME((pri), (intv)))
130 
131 #define	SKEW_TIME_VR(vr)	\
132 	SKEW_TIME((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
133 #define	MASTER_DOWN_INTERVAL_VR(vr)	\
134 	MASTER_DOWN_INTERVAL((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
135 
136 #define	VRRP_CONF_UPDATE	0x01
137 #define	VRRP_CONF_DELETE	0x02
138 
139 static char *af_str(int);
140 
141 static iu_tq_callback_t vrrp_adv_timeout;
142 static iu_tq_callback_t vrrp_b2m_timeout;
143 static iu_eh_callback_t vrrpd_sock_handler;
144 static iu_eh_callback_t vrrpd_rtsock_handler;
145 static iu_eh_callback_t vrrpd_cmdsock_handler;
146 
147 static int daemon_init();
148 
149 static vrrp_err_t vrrpd_init();
150 static void vrrpd_fini();
151 static vrrp_err_t vrrpd_cmdsock_create();
152 static void vrrpd_cmdsock_destroy();
153 static vrrp_err_t vrrpd_rtsock_create();
154 static void vrrpd_rtsock_destroy();
155 static vrrp_err_t vrrpd_ctlsock_create();
156 static void vrrpd_ctlsock_destroy();
157 
158 static void vrrpd_scan_timer(iu_tq_t *, void *);
159 static void vrrpd_scan(int);
160 static vrrp_err_t vrrpd_init_rxsock(vrrp_vr_t *);
161 static void vrrpd_fini_rxsock(vrrp_vr_t *);
162 static vrrp_err_t vrrpd_init_txsock(vrrp_vr_t *);
163 static vrrp_err_t vrrpd_init_txsock_v4(vrrp_vr_t *);
164 static vrrp_err_t vrrpd_init_txsock_v6(vrrp_vr_t *);
165 static void vrrpd_fini_txsock(vrrp_vr_t *);
166 
167 static vrrp_err_t vrrpd_create_vr(vrrp_vr_conf_t *);
168 static vrrp_err_t vrrpd_enable_vr(vrrp_vr_t *);
169 static void vrrpd_disable_vr(vrrp_vr_t *, vrrp_intf_t *, boolean_t);
170 static void vrrpd_delete_vr(vrrp_vr_t *);
171 
172 static vrrp_err_t vrrpd_create(vrrp_vr_conf_t *, boolean_t);
173 static vrrp_err_t vrrpd_delete(const char *);
174 static vrrp_err_t vrrpd_enable(const char *, boolean_t);
175 static vrrp_err_t vrrpd_disable(const char *);
176 static vrrp_err_t vrrpd_modify(vrrp_vr_conf_t *, uint32_t);
177 static void vrrpd_list(vrid_t, char *, int, vrrp_ret_list_t *, size_t *);
178 static void vrrpd_query(const char *, vrrp_ret_query_t *, size_t *);
179 
180 static boolean_t vrrp_rd_prop_name(vrrp_vr_conf_t *, const char *);
181 static boolean_t vrrp_rd_prop_vrid(vrrp_vr_conf_t *, const char *);
182 static boolean_t vrrp_rd_prop_af(vrrp_vr_conf_t *, const char *);
183 static boolean_t vrrp_rd_prop_pri(vrrp_vr_conf_t *, const char *);
184 static boolean_t vrrp_rd_prop_adver_int(vrrp_vr_conf_t *, const char *);
185 static boolean_t vrrp_rd_prop_preempt(vrrp_vr_conf_t *, const char *);
186 static boolean_t vrrp_rd_prop_accept(vrrp_vr_conf_t *, const char *);
187 static boolean_t vrrp_rd_prop_ifname(vrrp_vr_conf_t *, const char *);
188 static boolean_t vrrp_rd_prop_enabled(vrrp_vr_conf_t *, const char *);
189 static int vrrp_wt_prop_name(vrrp_vr_conf_t *, char *, size_t);
190 static int vrrp_wt_prop_vrid(vrrp_vr_conf_t *, char *, size_t);
191 static int vrrp_wt_prop_af(vrrp_vr_conf_t *, char *, size_t);
192 static int vrrp_wt_prop_pri(vrrp_vr_conf_t *, char *, size_t);
193 static int vrrp_wt_prop_adver_int(vrrp_vr_conf_t *, char *, size_t);
194 static int vrrp_wt_prop_preempt(vrrp_vr_conf_t *, char *, size_t);
195 static int vrrp_wt_prop_accept(vrrp_vr_conf_t *, char *, size_t);
196 static int vrrp_wt_prop_ifname(vrrp_vr_conf_t *, char *, size_t);
197 static int vrrp_wt_prop_enabled(vrrp_vr_conf_t *, char *, size_t);
198 
199 static void vrrpd_cmd_create(void *, void *, size_t *);
200 static void vrrpd_cmd_delete(void *, void *, size_t *);
201 static void vrrpd_cmd_enable(void *, void *, size_t *);
202 static void vrrpd_cmd_disable(void *, void *, size_t *);
203 static void vrrpd_cmd_modify(void *, void *, size_t *);
204 static void vrrpd_cmd_list(void *, void *, size_t *);
205 static void vrrpd_cmd_query(void *, void *, size_t *);
206 
207 static vrrp_vr_t *vrrpd_lookup_vr_by_vrid(char *, vrid_t vrid_t, int);
208 static vrrp_vr_t *vrrpd_lookup_vr_by_name(const char *);
209 static vrrp_intf_t *vrrpd_lookup_if(const char *, int);
210 static vrrp_err_t vrrpd_create_if(const char *, int, uint32_t, vrrp_intf_t **);
211 static void vrrpd_delete_if(vrrp_intf_t *, boolean_t);
212 static vrrp_err_t vrrpd_create_ip(vrrp_intf_t *, const char *, vrrp_addr_t *,
213     uint64_t flags);
214 static void vrrpd_delete_ip(vrrp_intf_t *, vrrp_ip_t *);
215 
216 static void vrrpd_init_ipcache(int);
217 static void vrrpd_update_ipcache(int);
218 static int vrrpd_walk_ipaddr(icfg_if_t *, void *);
219 static vrrp_err_t vrrpd_add_ipaddr(char *, int, vrrp_addr_t *,
220     int, uint64_t);
221 static vrrp_ip_t *vrrpd_select_primary(vrrp_intf_t *);
222 static void vrrpd_reselect_primary(vrrp_intf_t *);
223 static void vrrpd_reenable_all_vr();
224 static void vrrpd_remove_if(vrrp_intf_t *, boolean_t);
225 
226 static uint16_t in_cksum(int, uint16_t, void *);
227 static uint16_t vrrp_cksum4(struct in_addr *, struct in_addr *,
228     uint16_t, vrrp_pkt_t *);
229 static uint16_t vrrp_cksum6(struct in6_addr *, struct in6_addr *,
230     uint16_t, vrrp_pkt_t *);
231 static size_t vrrpd_build_vrrp(vrrp_vr_t *, uchar_t *, int, boolean_t);
232 
233 static void vrrpd_process_adv(vrrp_vr_t *, vrrp_addr_t *, vrrp_pkt_t *);
234 static vrrp_err_t vrrpd_send_adv(vrrp_vr_t *, boolean_t);
235 
236 /* state transition functions */
237 static vrrp_err_t vrrpd_state_i2m(vrrp_vr_t *);
238 static vrrp_err_t vrrpd_state_i2b(vrrp_vr_t *);
239 static void vrrpd_state_m2i(vrrp_vr_t *);
240 static void vrrpd_state_b2i(vrrp_vr_t *);
241 static vrrp_err_t vrrpd_state_b2m(vrrp_vr_t *);
242 static vrrp_err_t vrrpd_state_m2b(vrrp_vr_t *);
243 static void vrrpd_state_trans(vrrp_state_t, vrrp_state_t, vrrp_vr_t *);
244 
245 static vrrp_err_t vrrpd_set_noaccept(vrrp_vr_t *, boolean_t);
246 static vrrp_err_t vrrpd_virtualip_update(vrrp_vr_t *, boolean_t);
247 static vrrp_err_t vrrpd_virtualip_updateone(vrrp_intf_t *, vrrp_ip_t *,
248     boolean_t);
249 static int vrrpd_post_event(const char *, vrrp_state_t, vrrp_state_t);
250 
251 static void vrrpd_initconf();
252 static vrrp_err_t vrrpd_updateconf(vrrp_vr_conf_t *, uint_t);
253 static vrrp_err_t vrrpd_write_vrconf(char *, size_t, vrrp_vr_conf_t *);
254 static vrrp_err_t vrrpd_read_vrconf(char *, vrrp_vr_conf_t *);
255 static vrrp_err_t vrrpd_readprop(const char *, vrrp_vr_conf_t *);
256 static void vrrpd_cleanup();
257 
258 static void vrrp_log(int, char *, ...);
259 static int timeval_to_milli(struct timeval);
260 static struct timeval timeval_delta(struct timeval, struct timeval);
261 
262 typedef struct vrrpd_prop_s {
263 	char		*vs_propname;
264 	boolean_t	(*vs_propread)(vrrp_vr_conf_t *, const char *);
265 	int		(*vs_propwrite)(vrrp_vr_conf_t *, char *, size_t);
266 } vrrp_prop_t;
267 
268 /*
269  * persistent VRRP properties array
270  */
271 static vrrp_prop_t vrrp_prop_info_tbl[] = {
272 	{"name", vrrp_rd_prop_name, vrrp_wt_prop_name},
273 	{"vrid", vrrp_rd_prop_vrid, vrrp_wt_prop_vrid},
274 	{"priority", vrrp_rd_prop_pri, vrrp_wt_prop_pri},
275 	{"adv_intval", vrrp_rd_prop_adver_int, vrrp_wt_prop_adver_int},
276 	{"preempt_mode", vrrp_rd_prop_preempt, vrrp_wt_prop_preempt},
277 	{"accept_mode", vrrp_rd_prop_accept, vrrp_wt_prop_accept},
278 	{"interface", vrrp_rd_prop_ifname, vrrp_wt_prop_ifname},
279 	{"af", vrrp_rd_prop_af, vrrp_wt_prop_af},
280 	{"enabled", vrrp_rd_prop_enabled, vrrp_wt_prop_enabled}
281 };
282 
283 #define	VRRP_PROP_INFO_TABSIZE	\
284 	(sizeof (vrrp_prop_info_tbl) / sizeof (vrrp_prop_t))
285 
286 typedef void vrrp_cmd_func_t(void *, void *, size_t *);
287 
288 typedef struct vrrp_cmd_info_s {
289 	vrrp_cmd_type_t	vi_cmd;
290 	size_t		vi_reqsize;
291 	size_t		vi_acksize;	/* 0 if the size is variable */
292 	boolean_t	vi_setop;	/* Set operation? Check credentials */
293 	vrrp_cmd_func_t	*vi_cmdfunc;
294 } vrrp_cmd_info_t;
295 
296 static vrrp_cmd_info_t vrrp_cmd_info_tbl[] = {
297 	{VRRP_CMD_CREATE, sizeof (vrrp_cmd_create_t),
298 	    sizeof (vrrp_ret_create_t), _B_TRUE, vrrpd_cmd_create},
299 	{VRRP_CMD_DELETE, sizeof (vrrp_cmd_delete_t),
300 	    sizeof (vrrp_ret_delete_t), _B_TRUE, vrrpd_cmd_delete},
301 	{VRRP_CMD_ENABLE, sizeof (vrrp_cmd_enable_t),
302 	    sizeof (vrrp_ret_enable_t), _B_TRUE, vrrpd_cmd_enable},
303 	{VRRP_CMD_DISABLE, sizeof (vrrp_cmd_disable_t),
304 	    sizeof (vrrp_ret_disable_t), _B_TRUE, vrrpd_cmd_disable},
305 	{VRRP_CMD_MODIFY, sizeof (vrrp_cmd_modify_t),
306 	    sizeof (vrrp_ret_modify_t), _B_TRUE, vrrpd_cmd_modify},
307 	{VRRP_CMD_QUERY, sizeof (vrrp_cmd_query_t), 0,
308 	    _B_FALSE, vrrpd_cmd_query},
309 	{VRRP_CMD_LIST, sizeof (vrrp_cmd_list_t), 0,
310 	    _B_FALSE, vrrpd_cmd_list}
311 };
312 
313 #define	VRRP_DOOR_INFO_TABLE_SIZE	\
314 	(sizeof (vrrp_cmd_info_tbl) / sizeof (vrrp_cmd_info_t))
315 
316 static int
317 ipaddr_cmp(int af, vrrp_addr_t *addr1, vrrp_addr_t *addr2)
318 {
319 	if (af == AF_INET) {
320 		return (memcmp(&addr1->in4.sin_addr,
321 		    &addr2->in4.sin_addr, sizeof (struct in_addr)));
322 	} else {
323 		return (memcmp(&addr1->in6.sin6_addr,
324 		    &addr2->in6.sin6_addr, sizeof (struct in6_addr)));
325 	}
326 }
327 
328 static vrrp_vr_t *
329 vrrpd_lookup_vr_by_vrid(char *ifname, vrid_t vrid, int af)
330 {
331 	vrrp_vr_t *vr;
332 
333 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
334 		if (strcmp(vr->vvr_conf.vvc_link, ifname) == 0 &&
335 		    vr->vvr_conf.vvc_vrid == vrid &&
336 		    vr->vvr_conf.vvc_af == af) {
337 			break;
338 		}
339 	}
340 	return (vr);
341 }
342 
343 static vrrp_vr_t *
344 vrrpd_lookup_vr_by_name(const char *name)
345 {
346 	vrrp_vr_t *vr;
347 
348 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
349 		if (strcmp(vr->vvr_conf.vvc_name, name) == 0)
350 			break;
351 	}
352 	return (vr);
353 }
354 
355 static vrrp_intf_t *
356 vrrpd_lookup_if(const char *ifname, int af)
357 {
358 	vrrp_intf_t	*intf;
359 
360 	TAILQ_FOREACH(intf, &vrrp_intf_list, vvi_next) {
361 		if (strcmp(ifname, intf->vvi_ifname) == 0 &&
362 		    af == intf->vvi_af) {
363 			break;
364 		}
365 	}
366 	return (intf);
367 }
368 
369 static vrrp_err_t
370 vrrpd_create_if(const char *ifname, int af, uint32_t ifindex,
371     vrrp_intf_t **intfp)
372 {
373 	vrrp_intf_t	*intf;
374 
375 	vrrp_log(VRRP_DBG0, "vrrpd_create_if(%s, %s, %d)",
376 	    ifname, af_str(af), ifindex);
377 
378 	if (((*intfp) = malloc(sizeof (vrrp_intf_t))) == NULL) {
379 		vrrp_log(VRRP_ERR, "vrrpd_create_if(): failed to "
380 		    "allocate %s/%s interface", ifname, af_str(af));
381 		return (VRRP_ENOMEM);
382 	}
383 
384 	intf = *intfp;
385 	TAILQ_INIT(&intf->vvi_iplist);
386 	(void) strlcpy(intf->vvi_ifname, ifname, sizeof (intf->vvi_ifname));
387 	intf->vvi_af = af;
388 	intf->vvi_sockfd = -1;
389 	intf->vvi_nvr = 0;
390 	intf->vvi_eid = -1;
391 	intf->vvi_pip = NULL;
392 	intf->vvi_ifindex = ifindex;
393 	intf->vvi_state = NODE_STATE_NEW;
394 	intf->vvi_vr_state = VRRP_STATE_INIT;
395 	TAILQ_INSERT_TAIL(&vrrp_intf_list, intf, vvi_next);
396 	return (VRRP_SUCCESS);
397 }
398 
399 /*
400  * An interface is deleted. If update_vr is true, the deletion of the interface
401  * may cause the state transition of assoicated VRRP router (if this interface
402  * is either the primary or the VNIC interface of the VRRP router); otherwise,
403  * simply delete the interface without updating the VRRP router.
404  */
405 static void
406 vrrpd_delete_if(vrrp_intf_t *intf, boolean_t update_vr)
407 {
408 	vrrp_ip_t	*ip;
409 
410 	vrrp_log(VRRP_DBG0, "vrrpd_delete_if(%s, %s, %supdate_vr)",
411 	    intf->vvi_ifname, af_str(intf->vvi_af), update_vr ? "" : "no_");
412 
413 	if (update_vr) {
414 		/*
415 		 * If a this interface is the physical interface or the VNIC
416 		 * of a VRRP router, the deletion of the interface (no IP
417 		 * address exists on this interface) may cause the state
418 		 * transition of the VRRP router. call vrrpd_remove_if()
419 		 * to find all corresponding VRRP router and update their
420 		 * states.
421 		 */
422 		vrrpd_remove_if(intf, _B_FALSE);
423 	}
424 
425 	/*
426 	 * First remove and delete all the IP addresses on the interface
427 	 */
428 	while (!TAILQ_EMPTY(&intf->vvi_iplist)) {
429 		ip = TAILQ_FIRST(&intf->vvi_iplist);
430 		vrrpd_delete_ip(intf, ip);
431 	}
432 
433 	/*
434 	 * Then remove and delete the interface
435 	 */
436 	TAILQ_REMOVE(&vrrp_intf_list, intf, vvi_next);
437 	(void) free(intf);
438 }
439 
440 static vrrp_err_t
441 vrrpd_create_ip(vrrp_intf_t *intf, const char *lifname, vrrp_addr_t *addr,
442     uint64_t flags)
443 {
444 	vrrp_ip_t	*ip;
445 	char		abuf[INET6_ADDRSTRLEN];
446 
447 	/* LINTED E_CONSTANT_CONDITION */
448 	VRRPADDR2STR(intf->vvi_af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
449 	vrrp_log(VRRP_DBG0, "vrrpd_create_ip(%s, %s, %s, 0x%x)",
450 	    intf->vvi_ifname, lifname, abuf, flags);
451 
452 	if ((ip = malloc(sizeof (vrrp_ip_t))) == NULL) {
453 		vrrp_log(VRRP_ERR, "vrrpd_create_ip(%s, %s):"
454 		    "failed to allocate IP", lifname, abuf);
455 		return (VRRP_ENOMEM);
456 	}
457 
458 	(void) strncpy(ip->vip_lifname, lifname, sizeof (ip->vip_lifname));
459 	ip->vip_state = NODE_STATE_NEW;
460 	ip->vip_flags = flags;
461 	(void) memcpy(&ip->vip_addr, addr, sizeof (ip->vip_addr));
462 
463 	/*
464 	 * Make sure link-local IPv6 IP addresses are at the head of the list
465 	 */
466 	if (intf->vvi_af == AF_INET6 &&
467 	    IN6_IS_ADDR_LINKLOCAL(&addr->in6.sin6_addr)) {
468 		TAILQ_INSERT_HEAD(&intf->vvi_iplist, ip, vip_next);
469 	} else {
470 		TAILQ_INSERT_TAIL(&intf->vvi_iplist, ip, vip_next);
471 	}
472 	return (VRRP_SUCCESS);
473 }
474 
475 static void
476 vrrpd_delete_ip(vrrp_intf_t *intf, vrrp_ip_t *ip)
477 {
478 	char	abuf[INET6_ADDRSTRLEN];
479 	int	af = intf->vvi_af;
480 
481 	/* LINTED E_CONSTANT_CONDITION */
482 	VRRPADDR2STR(af, &ip->vip_addr, abuf, sizeof (abuf), _B_FALSE);
483 	vrrp_log(VRRP_DBG0, "vrrpd_delete_ip(%s, %s, %s) is %sprimary",
484 	    intf->vvi_ifname, ip->vip_lifname, abuf,
485 	    intf->vvi_pip == ip ? "" : "not ");
486 
487 	if (intf->vvi_pip == ip)
488 		intf->vvi_pip = NULL;
489 
490 	TAILQ_REMOVE(&intf->vvi_iplist, ip, vip_next);
491 	(void) free(ip);
492 }
493 
494 static char *
495 rtm_event2str(uchar_t event)
496 {
497 	switch (event) {
498 	case RTM_NEWADDR:
499 		return ("RTM_NEWADDR");
500 	case RTM_DELADDR:
501 		return ("RTM_DELADDR");
502 	case RTM_IFINFO:
503 		return ("RTM_IFINFO");
504 	case RTM_ADD:
505 		return ("RTM_ADD");
506 	case RTM_DELETE:
507 		return ("RTM_DELETE");
508 	case RTM_CHANGE:
509 		return ("RTM_CHANGE");
510 	case RTM_OLDADD:
511 		return ("RTM_OLDADD");
512 	case RTM_OLDDEL:
513 		return ("RTM_OLDDEL");
514 	case RTM_CHGADDR:
515 		return ("RTM_CHGADDR");
516 	case RTM_FREEADDR:
517 		return ("RTM_FREEADDR");
518 	default:
519 		return ("RTM_OTHER");
520 	}
521 }
522 
523 int
524 main(int argc, char *argv[])
525 {
526 	int c, err;
527 	struct sigaction sa;
528 	sigset_t mask;
529 	struct rlimit rl;
530 
531 	(void) setlocale(LC_ALL, "");
532 	(void) textdomain(TEXT_DOMAIN);
533 
534 	/*
535 	 * We need PRIV_SYS_CONFIG to post VRRP sysevent, PRIV_NET_RAWACESS
536 	 * and PRIV_NET_ICMPACCESS to open  the raw socket, PRIV_SYS_IP_CONFIG
537 	 * to bring up/down the virtual IP addresses, and PRIV_SYS_RESOURCE to
538 	 * setrlimit().
539 	 *
540 	 * Note that sysevent is not supported in non-global zones.
541 	 */
542 	if (getzoneid() == GLOBAL_ZONEID) {
543 		err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
544 		    PRIV_SYS_CONFIG, PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
545 		    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
546 	} else {
547 		err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
548 		    PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
549 		    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
550 	}
551 
552 	if (err == -1) {
553 		vrrp_log(VRRP_ERR, "main(): init_daemon_priv() failed");
554 		return (EXIT_FAILURE);
555 	}
556 
557 	/*
558 	 * If vrrpd is started by other process, it will inherit the
559 	 * signal block mask. We unblock all signals to make sure the
560 	 * signal handling will work normally.
561 	 */
562 	(void) sigfillset(&mask);
563 	(void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
564 	sa.sa_handler = vrrpd_cleanup;
565 	sa.sa_flags = 0;
566 	(void) sigemptyset(&sa.sa_mask);
567 	(void) sigaction(SIGINT, &sa, NULL);
568 	(void) sigaction(SIGQUIT, &sa, NULL);
569 	(void) sigaction(SIGTERM, &sa, NULL);
570 
571 	vrrp_debug_level = 0;
572 	(void) strlcpy(vrrpd_conffile, VRRPCONF, sizeof (vrrpd_conffile));
573 	while ((c = getopt(argc, argv, "d:f:")) != EOF) {
574 		switch (c) {
575 		case 'd':
576 			vrrp_debug_level = atoi(optarg);
577 			break;
578 		case 'f':
579 			(void) strlcpy(vrrpd_conffile, optarg,
580 			    sizeof (vrrpd_conffile));
581 			break;
582 		default:
583 			break;
584 		}
585 	}
586 
587 	closefrom(3);
588 	if (vrrp_debug_level == 0 && (daemon_init() != 0)) {
589 		vrrp_log(VRRP_ERR, "main(): daemon_init() failed");
590 		return (EXIT_FAILURE);
591 	}
592 
593 	rl.rlim_cur = RLIM_INFINITY;
594 	rl.rlim_max = RLIM_INFINITY;
595 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
596 		vrrp_log(VRRP_ERR, "main(): setrlimit() failed");
597 		return (EXIT_FAILURE);
598 	}
599 
600 	if (vrrpd_init() != VRRP_SUCCESS) {
601 		vrrp_log(VRRP_ERR, "main(): vrrpd_init() failed");
602 		return (EXIT_FAILURE);
603 	}
604 
605 	/*
606 	 * Get rid of unneeded privileges.
607 	 */
608 	__fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
609 	    PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, PRIV_SYS_RESOURCE, NULL);
610 
611 	/*
612 	 * Read the configuration and initialize the existing VRRP
613 	 * configuration
614 	 */
615 	vrrpd_initconf();
616 
617 	/*
618 	 * Start the loop to handle the timer and the IO events.
619 	 */
620 	switch (iu_handle_events(vrrpd_eh, vrrpd_timerq)) {
621 	case -1:
622 		vrrp_log(VRRP_ERR, "main(): iu_handle_events() failed "
623 		    "abnormally");
624 		break;
625 	default:
626 		break;
627 	}
628 
629 	vrrpd_cleanup();
630 	return (EXIT_SUCCESS);
631 }
632 
633 static int
634 daemon_init()
635 {
636 	pid_t	pid;
637 
638 	vrrp_log(VRRP_DBG0, "daemon_init()");
639 
640 	if (getenv("SMF_FMRI") == NULL) {
641 		vrrp_log(VRRP_ERR, "main(): vrrpd is an smf(5) managed service "
642 		    "and should not be run from the command line.");
643 		return (-1);
644 	}
645 
646 	if ((pid = fork()) < 0)
647 		return (-1);
648 
649 	if (pid != 0) {
650 		/* in parent process: do nothing. */
651 		exit(0);
652 	}
653 
654 	/*
655 	 * in child process, became a daemon, and return to main() to continue.
656 	 */
657 	(void) chdir("/");
658 	(void) setsid();
659 	(void) close(0);
660 	(void) close(1);
661 	(void) close(2);
662 	(void) open("/dev/null", O_RDWR, 0);
663 	(void) dup2(0, 1);
664 	(void) dup2(0, 2);
665 	openlog("vrrpd", LOG_PID, LOG_DAEMON);
666 	vrrp_logflag = 1;
667 	return (0);
668 }
669 
670 static vrrp_err_t
671 vrrpd_init()
672 {
673 	vrrp_err_t	err = VRRP_ESYS;
674 
675 	vrrp_log(VRRP_DBG0, "vrrpd_init()");
676 
677 	TAILQ_INIT(&vrrp_vr_list);
678 	TAILQ_INIT(&vrrp_intf_list);
679 
680 	if (vrrp_open(&vrrpd_vh) != VRRP_SUCCESS) {
681 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrp_open() failed");
682 		goto fail;
683 	}
684 
685 	if ((vrrpd_timerq = iu_tq_create()) == NULL) {
686 		vrrp_log(VRRP_ERR, "vrrpd_init(): iu_tq_create() failed");
687 		goto fail;
688 	}
689 
690 	if ((vrrpd_eh = iu_eh_create()) == NULL) {
691 		vrrp_log(VRRP_ERR, "vrrpd_init(): iu_eh_create() failed");
692 		goto fail;
693 	}
694 
695 	/*
696 	 * Create the AF_UNIX socket used to communicate with libvrrpadm.
697 	 *
698 	 * This socket is used to receive the administrative requests and
699 	 * send back the results.
700 	 */
701 	if (vrrpd_cmdsock_create() != VRRP_SUCCESS) {
702 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_cmdsock_create() "
703 		    "failed");
704 		goto fail;
705 	}
706 
707 	/*
708 	 * Create the VRRP control socket used to bring up/down the virtual
709 	 * IP addresses. It is also used to set the IFF_NOACCEPT flag of
710 	 * the virtual IP addresses.
711 	 */
712 	if (vrrpd_ctlsock_create() != VRRP_SUCCESS) {
713 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_ctlsock_create() "
714 		    "failed");
715 		goto fail;
716 	}
717 
718 	/*
719 	 * Create the PF_ROUTER socket used to listen to the routing socket
720 	 * messages and build the interface/IP address list.
721 	 */
722 	if (vrrpd_rtsock_create() != VRRP_SUCCESS) {
723 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_rtsock_create() "
724 		    "failed");
725 		goto fail;
726 	}
727 
728 	/*
729 	 * Build the list of interfaces and IP addresses. Also, start the time
730 	 * to scan the interfaces/IP addresses periodically.
731 	 */
732 	vrrpd_scan(AF_INET);
733 	vrrpd_scan(AF_INET6);
734 	if ((vrrp_scan_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
735 	    vrrpd_scan_interval, vrrpd_scan_timer, NULL)) == -1) {
736 		vrrp_log(VRRP_ERR, "vrrpd_init(): start scan_timer failed");
737 		goto fail;
738 	}
739 
740 	/*
741 	 * Initialize the VRRP multicast address.
742 	 */
743 	bzero(&vrrp_muladdr4, sizeof (vrrp_addr_t));
744 	vrrp_muladdr4.in4.sin_family = AF_INET;
745 	(void) inet_pton(AF_INET, "224.0.0.18", &vrrp_muladdr4.in4.sin_addr);
746 
747 	bzero(&vrrp_muladdr6, sizeof (vrrp_addr_t));
748 	vrrp_muladdr6.in6.sin6_family = AF_INET6;
749 	(void) inet_pton(AF_INET6, "ff02::12", &vrrp_muladdr6.in6.sin6_addr);
750 
751 	return (VRRP_SUCCESS);
752 
753 fail:
754 	vrrpd_fini();
755 	return (err);
756 }
757 
758 static void
759 vrrpd_fini()
760 {
761 	vrrp_log(VRRP_DBG0, "vrrpd_fini()");
762 
763 	(void) iu_cancel_timer(vrrpd_timerq, vrrp_scan_timer_id, NULL);
764 	vrrp_scan_timer_id = -1;
765 
766 	vrrpd_rtsock_destroy();
767 	vrrpd_ctlsock_destroy();
768 	vrrpd_cmdsock_destroy();
769 
770 	if (vrrpd_eh != NULL) {
771 		iu_eh_destroy(vrrpd_eh);
772 		vrrpd_eh = NULL;
773 	}
774 
775 	if (vrrpd_timerq != NULL) {
776 		iu_tq_destroy(vrrpd_timerq);
777 		vrrpd_timerq = NULL;
778 	}
779 
780 	vrrp_close(vrrpd_vh);
781 	vrrpd_vh = NULL;
782 	assert(TAILQ_EMPTY(&vrrp_vr_list));
783 	assert(TAILQ_EMPTY(&vrrp_intf_list));
784 }
785 
786 static void
787 vrrpd_cleanup(void)
788 {
789 	vrrp_vr_t	*vr;
790 	vrrp_intf_t	*intf;
791 
792 	vrrp_log(VRRP_DBG0, "vrrpd_cleanup()");
793 
794 	while (!TAILQ_EMPTY(&vrrp_vr_list)) {
795 		vr = TAILQ_FIRST(&vrrp_vr_list);
796 		vrrpd_delete_vr(vr);
797 	}
798 
799 	while (!TAILQ_EMPTY(&vrrp_intf_list)) {
800 		intf = TAILQ_FIRST(&vrrp_intf_list);
801 		vrrpd_delete_if(intf, _B_FALSE);
802 	}
803 
804 	vrrpd_fini();
805 	closelog();
806 	exit(1);
807 }
808 
809 /*
810  * Read the configuration file and initialize all the existing VRRP routers.
811  */
812 static void
813 vrrpd_initconf()
814 {
815 	FILE *fp;
816 	char line[LINE_MAX];
817 	int linenum = 0;
818 	vrrp_vr_conf_t conf;
819 	vrrp_err_t err;
820 
821 	vrrp_log(VRRP_DBG0, "vrrpd_initconf()");
822 
823 	if ((fp = fopen(vrrpd_conffile, "rF")) == NULL) {
824 		vrrp_log(VRRP_ERR, "failed to open the configuration file %s",
825 		    vrrpd_conffile);
826 		return;
827 	}
828 
829 	while (fgets(line, sizeof (line), fp) != NULL) {
830 		linenum++;
831 		conf.vvc_vrid = VRRP_VRID_NONE;
832 		if ((err = vrrpd_read_vrconf(line, &conf)) != VRRP_SUCCESS) {
833 			vrrp_log(VRRP_ERR, "failed to parse %d line %s",
834 			    linenum, line);
835 			continue;
836 		}
837 
838 		/*
839 		 * Blank or comment line
840 		 */
841 		if (conf.vvc_vrid == VRRP_VRID_NONE)
842 			continue;
843 
844 		/*
845 		 * No need to update the configuration since the VRRP router
846 		 * created/enabled based on the existing configuration.
847 		 */
848 		if ((err = vrrpd_create(&conf, _B_FALSE)) != VRRP_SUCCESS) {
849 			vrrp_log(VRRP_ERR, "VRRP router %s creation failed: "
850 			    "%s", conf.vvc_name, vrrp_err2str(err));
851 			continue;
852 		}
853 
854 		if (conf.vvc_enabled &&
855 		    ((err = vrrpd_enable(conf.vvc_name, _B_FALSE)) !=
856 		    VRRP_SUCCESS)) {
857 			vrrp_log(VRRP_ERR, "VRRP router %s enable failed: %s",
858 			    conf.vvc_name, vrrp_err2str(err));
859 		}
860 	}
861 
862 	(void) fclose(fp);
863 }
864 
865 /*
866  * Create the AF_UNIX socket used to communicate with libvrrpadm.
867  *
868  * This socket is used to receive the administrative request and
869  * send back the results.
870  */
871 static vrrp_err_t
872 vrrpd_cmdsock_create()
873 {
874 	iu_event_id_t		eid;
875 	struct sockaddr_un	laddr;
876 	int			sock, flags;
877 
878 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_create()");
879 
880 	if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
881 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): socket(AF_UNIX) "
882 		    "failed: %s", strerror(errno));
883 		return (VRRP_ESYS);
884 	}
885 
886 	/*
887 	 * Set it to be non-blocking.
888 	 */
889 	flags = fcntl(sock, F_GETFL, 0);
890 	(void) fcntl(sock, F_SETFL, (flags | O_NONBLOCK));
891 
892 	/*
893 	 * Unlink first in case a previous daemon instance exited ungracefully.
894 	 */
895 	(void) unlink(VRRPD_SOCKET);
896 
897 	bzero(&laddr, sizeof (laddr));
898 	laddr.sun_family = AF_UNIX;
899 	(void) strlcpy(laddr.sun_path, VRRPD_SOCKET, sizeof (laddr.sun_path));
900 	if (bind(sock, (struct sockaddr *)&laddr, sizeof (laddr)) < 0) {
901 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): bind() failed: %s",
902 		    strerror(errno));
903 		(void) close(sock);
904 		return (VRRP_ESYS);
905 	}
906 
907 	if (listen(sock, 30) < 0) {
908 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): listen() "
909 		    "failed: %s", strerror(errno));
910 		(void) close(sock);
911 		return (VRRP_ESYS);
912 	}
913 
914 	if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
915 	    vrrpd_cmdsock_handler, NULL)) == -1) {
916 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): iu_register_event()"
917 		    " failed");
918 		(void) close(sock);
919 		return (VRRP_ESYS);
920 	}
921 
922 	vrrpd_cmdsock_fd = sock;
923 	vrrpd_cmdsock_eid = eid;
924 	return (VRRP_SUCCESS);
925 }
926 
927 static void
928 vrrpd_cmdsock_destroy()
929 {
930 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_destroy()");
931 
932 	(void) iu_unregister_event(vrrpd_eh, vrrpd_cmdsock_eid, NULL);
933 	(void) close(vrrpd_cmdsock_fd);
934 	vrrpd_cmdsock_fd = -1;
935 	vrrpd_cmdsock_eid = -1;
936 }
937 
938 /*
939  * Create the PF_ROUTER sockets used to listen to the routing socket
940  * messages and build the interface/IP address list. Create one for
941  * each address family (IPv4 and IPv6).
942  */
943 static vrrp_err_t
944 vrrpd_rtsock_create()
945 {
946 	int		i, flags, sock;
947 	iu_event_id_t	eid;
948 
949 	vrrp_log(VRRP_DBG0, "vrrpd_rtsock_create()");
950 
951 	for (i = 0; i < 2; i++) {
952 		sock = socket(PF_ROUTE, SOCK_RAW, vrrpd_rtsocks[i].vrt_af);
953 		if (sock == -1) {
954 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): socket() "
955 			    "failed: %s", strerror(errno));
956 			break;
957 		}
958 
959 		/*
960 		 * Set it to be non-blocking.
961 		 */
962 		if ((flags = fcntl(sock, F_GETFL, 0)) < 0) {
963 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
964 			    "fcntl(F_GETFL) failed: %s", strerror(errno));
965 			break;
966 		}
967 
968 		if ((fcntl(sock, F_SETFL, flags | O_NONBLOCK)) < 0) {
969 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
970 			    "fcntl(F_SETFL) failed: %s", strerror(errno));
971 			break;
972 		}
973 
974 		if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
975 		    vrrpd_rtsock_handler, &(vrrpd_rtsocks[i].vrt_af))) == -1) {
976 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): register "
977 			    "rtsock %d(%s) failed", sock,
978 			    af_str(vrrpd_rtsocks[i].vrt_af));
979 			break;
980 		}
981 
982 		vrrpd_rtsocks[i].vrt_fd = sock;
983 		vrrpd_rtsocks[i].vrt_eid = eid;
984 	}
985 
986 	if (i != 2) {
987 		(void) close(sock);
988 		vrrpd_rtsock_destroy();
989 		return (VRRP_ESYS);
990 	}
991 
992 	return (VRRP_SUCCESS);
993 }
994 
995 static void
996 vrrpd_rtsock_destroy()
997 {
998 	int		i;
999 
1000 	vrrp_log(VRRP_DBG0, "vrrpd_rtsock_destroy()");
1001 	for (i = 0; i < 2; i++) {
1002 		(void) iu_unregister_event(vrrpd_eh, vrrpd_rtsocks[i].vrt_eid,
1003 		    NULL);
1004 		(void) close(vrrpd_rtsocks[i].vrt_fd);
1005 		vrrpd_rtsocks[i].vrt_eid = -1;
1006 		vrrpd_rtsocks[i].vrt_fd = -1;
1007 	}
1008 }
1009 
1010 /*
1011  * Create the VRRP control socket used to bring up/down the virtual
1012  * IP addresses. It is also used to set the IFF_NOACCEPT flag of
1013  * the virtual IP addresses.
1014  */
1015 static vrrp_err_t
1016 vrrpd_ctlsock_create()
1017 {
1018 	int	s, s6;
1019 	int	on = _B_TRUE;
1020 
1021 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
1022 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET) "
1023 		    "failed: %s", strerror(errno));
1024 		return (VRRP_ESYS);
1025 	}
1026 	if (setsockopt(s, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
1027 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
1028 		    "setsockopt(INET, SO_VRRP) failed: %s", strerror(errno));
1029 		(void) close(s);
1030 		return (VRRP_ESYS);
1031 	}
1032 
1033 	if ((s6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
1034 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET6) "
1035 		    "failed: %s", strerror(errno));
1036 		(void) close(s);
1037 		return (VRRP_ESYS);
1038 	}
1039 	if (setsockopt(s6, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
1040 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
1041 		    "setsockopt(INET6, SO_VRRP) failed: %s", strerror(errno));
1042 		(void) close(s);
1043 		(void) close(s6);
1044 		return (VRRP_ESYS);
1045 	}
1046 
1047 	vrrpd_ctlsock_fd = s;
1048 	vrrpd_ctlsock6_fd = s6;
1049 	return (VRRP_SUCCESS);
1050 }
1051 
1052 static void
1053 vrrpd_ctlsock_destroy()
1054 {
1055 	(void) close(vrrpd_ctlsock_fd);
1056 	vrrpd_ctlsock_fd = -1;
1057 	(void) close(vrrpd_ctlsock6_fd);
1058 	vrrpd_ctlsock6_fd = -1;
1059 }
1060 
1061 /*ARGSUSED*/
1062 static void
1063 vrrpd_cmd_create(void *arg1, void *arg2, size_t *arg2_sz)
1064 {
1065 	vrrp_cmd_create_t	*cmd = (vrrp_cmd_create_t *)arg1;
1066 	vrrp_ret_create_t	*ret = (vrrp_ret_create_t *)arg2;
1067 	vrrp_err_t		err;
1068 
1069 	err = vrrpd_create(&cmd->vcc_conf, _B_TRUE);
1070 	if (err == VRRP_SUCCESS && cmd->vcc_conf.vvc_enabled) {
1071 		/*
1072 		 * No need to update the configuration since it is already
1073 		 * done in the above vrrpd_create() call
1074 		 */
1075 		err = vrrpd_enable(cmd->vcc_conf.vvc_name, _B_FALSE);
1076 		if (err != VRRP_SUCCESS)
1077 			(void) vrrpd_delete(cmd->vcc_conf.vvc_name);
1078 	}
1079 	ret->vrc_err = err;
1080 }
1081 
1082 /*ARGSUSED*/
1083 static void
1084 vrrpd_cmd_delete(void *arg1, void *arg2, size_t *arg2_sz)
1085 {
1086 	vrrp_cmd_delete_t	*cmd = (vrrp_cmd_delete_t *)arg1;
1087 	vrrp_ret_delete_t	*ret = (vrrp_ret_delete_t *)arg2;
1088 
1089 	ret->vrd_err = vrrpd_delete(cmd->vcd_name);
1090 }
1091 
1092 /*ARGSUSED*/
1093 static void
1094 vrrpd_cmd_enable(void *arg1, void *arg2, size_t *arg2_sz)
1095 {
1096 	vrrp_cmd_enable_t	*cmd = (vrrp_cmd_enable_t *)arg1;
1097 	vrrp_ret_enable_t	*ret = (vrrp_ret_enable_t *)arg2;
1098 
1099 	ret->vrs_err = vrrpd_enable(cmd->vcs_name, _B_TRUE);
1100 }
1101 
1102 /*ARGSUSED*/
1103 static void
1104 vrrpd_cmd_disable(void *arg1, void *arg2, size_t *arg2_sz)
1105 {
1106 	vrrp_cmd_disable_t	*cmd = (vrrp_cmd_disable_t *)arg1;
1107 	vrrp_ret_disable_t	*ret = (vrrp_ret_disable_t *)arg2;
1108 
1109 	ret->vrx_err = vrrpd_disable(cmd->vcx_name);
1110 }
1111 
1112 /*ARGSUSED*/
1113 static void
1114 vrrpd_cmd_modify(void *arg1, void *arg2, size_t *arg2_sz)
1115 {
1116 	vrrp_cmd_modify_t	*cmd = (vrrp_cmd_modify_t *)arg1;
1117 	vrrp_ret_modify_t	*ret = (vrrp_ret_modify_t *)arg2;
1118 
1119 	ret->vrm_err = vrrpd_modify(&cmd->vcm_conf, cmd->vcm_mask);
1120 }
1121 
1122 static void
1123 vrrpd_cmd_query(void *arg1, void *arg2, size_t *arg2_sz)
1124 {
1125 	vrrp_cmd_query_t	*cmd = (vrrp_cmd_query_t *)arg1;
1126 
1127 	vrrpd_query(cmd->vcq_name, arg2, arg2_sz);
1128 }
1129 
1130 static void
1131 vrrpd_cmd_list(void *arg1, void *arg2, size_t *arg2_sz)
1132 {
1133 	vrrp_cmd_list_t	*cmd = (vrrp_cmd_list_t *)arg1;
1134 
1135 	vrrpd_list(cmd->vcl_vrid, cmd->vcl_ifname, cmd->vcl_af, arg2, arg2_sz);
1136 }
1137 
1138 /*
1139  * Write-type requeset must have the solaris.network.vrrp authorization.
1140  */
1141 static boolean_t
1142 vrrp_auth_check(int connfd, vrrp_cmd_info_t *cinfo)
1143 {
1144 	ucred_t		*cred = NULL;
1145 	uid_t		uid;
1146 	struct passwd	*pw;
1147 	boolean_t	success = _B_FALSE;
1148 
1149 	vrrp_log(VRRP_DBG0, "vrrp_auth_check()");
1150 
1151 	if (!cinfo->vi_setop)
1152 		return (_B_TRUE);
1153 
1154 	/*
1155 	 * Validate the credential
1156 	 */
1157 	if (getpeerucred(connfd, &cred) == (uid_t)-1) {
1158 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpeerucred() "
1159 		    "failed: %s", strerror(errno));
1160 		return (_B_FALSE);
1161 	}
1162 
1163 	if ((uid = ucred_getruid((const ucred_t *)cred)) == (uid_t)-1) {
1164 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): ucred_getruid() "
1165 		    "failed: %s", strerror(errno));
1166 		goto done;
1167 	}
1168 
1169 	if ((pw = getpwuid(uid)) == NULL) {
1170 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpwuid() failed");
1171 		goto done;
1172 	}
1173 
1174 	success = (chkauthattr("solaris.network.vrrp", pw->pw_name) == 1);
1175 
1176 done:
1177 	ucred_free(cred);
1178 	return (success);
1179 }
1180 
1181 /*
1182  * Process the administrative request from libvrrpadm
1183  */
1184 /* ARGSUSED */
1185 static void
1186 vrrpd_cmdsock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
1187     void *arg)
1188 {
1189 	vrrp_cmd_info_t		*cinfo = NULL;
1190 	vrrp_err_t		err = VRRP_SUCCESS;
1191 	uchar_t			buf[BUFFSIZE], ackbuf[BUFFSIZE];
1192 	size_t			cursize, acksize, len;
1193 	uint32_t		cmd;
1194 	int			connfd, i;
1195 	struct sockaddr_in	from;
1196 	socklen_t		fromlen;
1197 
1198 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_handler()");
1199 
1200 	fromlen = (socklen_t)sizeof (from);
1201 	if ((connfd = accept(s, (struct sockaddr *)&from, &fromlen)) < 0) {
1202 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() accept(): %s",
1203 		    strerror(errno));
1204 		return;
1205 	}
1206 
1207 	/*
1208 	 * First get the type of the request
1209 	 */
1210 	cursize = 0;
1211 	while (cursize < sizeof (uint32_t)) {
1212 		len = read(connfd, buf + cursize,
1213 		    sizeof (uint32_t) - cursize);
1214 		if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
1215 			continue;
1216 		} else if (len > 0) {
1217 			cursize += len;
1218 			continue;
1219 		}
1220 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
1221 		    "length");
1222 		(void) close(connfd);
1223 		return;
1224 	}
1225 
1226 	/* LINTED E_BAD_PTR_CAST_ALIGN */
1227 	cmd = ((vrrp_cmd_t *)buf)->vc_cmd;
1228 	for (i = 0; i < VRRP_DOOR_INFO_TABLE_SIZE; i++) {
1229 		if (vrrp_cmd_info_tbl[i].vi_cmd == cmd) {
1230 			cinfo = vrrp_cmd_info_tbl + i;
1231 			break;
1232 		}
1233 	}
1234 
1235 	if (cinfo == NULL) {
1236 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid request "
1237 		    "type %d", cmd);
1238 		err = VRRP_EINVAL;
1239 		goto done;
1240 	}
1241 
1242 	/*
1243 	 * Get the rest of the request.
1244 	 */
1245 	assert(cursize == sizeof (uint32_t));
1246 	while (cursize < cinfo->vi_reqsize) {
1247 		len = read(connfd, buf + cursize,
1248 		    cinfo->vi_reqsize - cursize);
1249 		if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
1250 			continue;
1251 		} else if (len > 0) {
1252 			cursize += len;
1253 			continue;
1254 		}
1255 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
1256 		    "length");
1257 		err = VRRP_EINVAL;
1258 		goto done;
1259 	}
1260 
1261 	/*
1262 	 * Validate the authorization
1263 	 */
1264 	if (!vrrp_auth_check(connfd, cinfo)) {
1265 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): "
1266 		    "not sufficient authorization");
1267 		err = VRRP_EPERM;
1268 	}
1269 
1270 done:
1271 	/*
1272 	 * Ack the request
1273 	 */
1274 	if (err != 0) {
1275 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1276 		((vrrp_ret_t *)ackbuf)->vr_err = err;
1277 		acksize = sizeof (vrrp_ret_t);
1278 	} else {
1279 		/*
1280 		 * If the size of ack is varied, the cmdfunc callback
1281 		 * will set the right size.
1282 		 */
1283 		if ((acksize = cinfo->vi_acksize) == 0)
1284 			acksize = sizeof (ackbuf);
1285 
1286 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1287 		cinfo->vi_cmdfunc((vrrp_cmd_t *)buf, ackbuf, &acksize);
1288 	}
1289 
1290 	/*
1291 	 * Send the ack back.
1292 	 */
1293 	cursize = 0;
1294 	while (cursize < acksize) {
1295 		len = sendto(connfd, ackbuf + cursize, acksize - cursize,
1296 		    0, (struct sockaddr *)&from, fromlen);
1297 		if (len == (size_t)-1 && errno == EAGAIN) {
1298 			continue;
1299 		} else if (len > 0) {
1300 			cursize += len;
1301 			continue;
1302 		} else {
1303 			vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() failed to "
1304 			    "ack: %s", strerror(errno));
1305 			break;
1306 		}
1307 	}
1308 
1309 	(void) shutdown(connfd, SHUT_RDWR);
1310 	(void) close(connfd);
1311 }
1312 
1313 /*
1314  * Process the routing socket messages and update the interfaces/IP addresses
1315  * list
1316  */
1317 /* ARGSUSED */
1318 static void
1319 vrrpd_rtsock_handler(iu_eh_t *eh, int s, short events,
1320     iu_event_id_t id, void *arg)
1321 {
1322 	char			buf[BUFFSIZE];
1323 	struct ifa_msghdr	*ifam;
1324 	int			nbytes;
1325 	int			af = *(int *)arg;
1326 	boolean_t		scanif = _B_FALSE;
1327 
1328 	for (;;) {
1329 		nbytes = read(s, buf, sizeof (buf));
1330 		if (nbytes <= 0) {
1331 			/* No more messages */
1332 			break;
1333 		}
1334 
1335 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1336 		ifam = (struct ifa_msghdr *)buf;
1337 		if (ifam->ifam_version != RTM_VERSION) {
1338 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_handler(): version %d "
1339 			    "not understood", ifam->ifam_version);
1340 			break;
1341 		}
1342 
1343 		vrrp_log(VRRP_DBG0, "vrrpd_rtsock_handler(): recv %s event",
1344 		    rtm_event2str(ifam->ifam_type));
1345 
1346 		switch (ifam->ifam_type) {
1347 		case RTM_FREEADDR:
1348 		case RTM_CHGADDR:
1349 		case RTM_NEWADDR:
1350 		case RTM_DELADDR:
1351 			/*
1352 			 * An IP address has been created/updated/deleted or
1353 			 * brought up/down, re-initilialize the interface/IP
1354 			 * address list.
1355 			 */
1356 			scanif = _B_TRUE;
1357 			break;
1358 		default:
1359 			/* Not interesting */
1360 			break;
1361 		}
1362 	}
1363 
1364 	if (scanif)
1365 		vrrpd_scan(af);
1366 }
1367 
1368 /*
1369  * Periodically scan the interface/IP addresses on the system.
1370  */
1371 /* ARGSUSED */
1372 static void
1373 vrrpd_scan_timer(iu_tq_t *tq, void *arg)
1374 {
1375 	vrrp_log(VRRP_DBG0, "vrrpd_scan_timer()");
1376 	vrrpd_scan(AF_INET);
1377 	vrrpd_scan(AF_INET6);
1378 }
1379 
1380 /*
1381  * Get the list of the interface/IP addresses of the specified address
1382  * family.
1383  */
1384 static void
1385 vrrpd_scan(int af)
1386 {
1387 	vrrp_log(VRRP_DBG0, "vrrpd_scan(%s)", af_str(af));
1388 
1389 again:
1390 	vrrpd_init_ipcache(af);
1391 
1392 	/*
1393 	 * If interface index changes, walk again.
1394 	 */
1395 	if (icfg_iterate_if(af, ICFG_PLUMBED, NULL,
1396 	    vrrpd_walk_ipaddr) != ICFG_SUCCESS)
1397 		goto again;
1398 
1399 	vrrpd_update_ipcache(af);
1400 }
1401 
1402 /*
1403  * First mark all IP addresses of the specific address family to be removed.
1404  * This flag will then be cleared when we walk up all the IP addresses.
1405  */
1406 static void
1407 vrrpd_init_ipcache(int af)
1408 {
1409 	vrrp_intf_t	*intf, *next_intf;
1410 	vrrp_ip_t	*ip, *nextip;
1411 	char		abuf[INET6_ADDRSTRLEN];
1412 
1413 	vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s)", af_str(af));
1414 
1415 	next_intf = TAILQ_FIRST(&vrrp_intf_list);
1416 	while ((intf = next_intf) != NULL) {
1417 		next_intf = TAILQ_NEXT(intf, vvi_next);
1418 		if (intf->vvi_af != af)
1419 			continue;
1420 
1421 		/*
1422 		 * If the interface is still marked as new, it means that this
1423 		 * vrrpd_init_ipcache() call is a result of ifindex change,
1424 		 * which causes the re-walk of all the interfaces (see
1425 		 * vrrpd_add_ipaddr()), and some interfaces are still marked
1426 		 * as new during the last walk. In this case, delete this
1427 		 * interface with the "update_vr" argument to be _B_FALSE,
1428 		 * since no VRRP router has been assoicated with this
1429 		 * interface yet (the association is done in
1430 		 * vrrpd_update_ipcache()).
1431 		 *
1432 		 * This interface will be re-added later if it still exists.
1433 		 */
1434 		if (intf->vvi_state == NODE_STATE_NEW) {
1435 			vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove %s "
1436 			    "(%d), may be added later", intf->vvi_ifname,
1437 			    intf->vvi_ifindex);
1438 			vrrpd_delete_if(intf, _B_FALSE);
1439 			continue;
1440 		}
1441 
1442 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1443 		    ip = nextip) {
1444 			nextip = TAILQ_NEXT(ip, vip_next);
1445 			/* LINTED E_CONSTANT_CONDITION */
1446 			VRRPADDR2STR(af, &ip->vip_addr, abuf,
1447 			    INET6_ADDRSTRLEN, _B_FALSE);
1448 
1449 			if (ip->vip_state != NODE_STATE_NEW) {
1450 				vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s/%d, "
1451 				    "%s(%s/0x%x))", intf->vvi_ifname,
1452 				    intf->vvi_ifindex, ip->vip_lifname,
1453 				    abuf, ip->vip_flags);
1454 				ip->vip_state = NODE_STATE_STALE;
1455 				continue;
1456 			}
1457 
1458 			/*
1459 			 * If the IP is still marked as new, it means that
1460 			 * this vrrpd_init_ipcache() call is a result of
1461 			 * ifindex change, which causes the re-walk of all
1462 			 * the IP addresses (see vrrpd_add_ipaddr()).
1463 			 * Delete this IP.
1464 			 *
1465 			 * This IP will be readded later if it still exists.
1466 			 */
1467 			vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove "
1468 			    "%s/%d , %s(%s)", intf->vvi_ifname,
1469 			    intf->vvi_ifindex, ip->vip_lifname, abuf);
1470 			vrrpd_delete_ip(intf, ip);
1471 		}
1472 	}
1473 }
1474 
1475 /*
1476  * Walk all the IP addresses on the given interface and update its
1477  * addresses list. Return ICFG_FAILURE if it is required to walk
1478  * all the interfaces again (one of the interface index changes in between).
1479  */
1480 /* ARGSUSED */
1481 static int
1482 vrrpd_walk_ipaddr(icfg_if_t *intf, void *arg)
1483 {
1484 	icfg_handle_t	ih;
1485 	int		ifindex;
1486 	vrrp_addr_t	addr;
1487 	socklen_t	addrlen = (socklen_t)sizeof (struct sockaddr_in6);
1488 	int		prefixlen;
1489 	uint64_t	flags;
1490 	int		err = ICFG_SUCCESS;
1491 
1492 	vrrp_log(VRRP_DBG0, "vrrpd_walk_ipaddr(%s, %s)", intf->if_name,
1493 	    af_str(intf->if_protocol));
1494 
1495 	if (icfg_open(&ih, intf) != ICFG_SUCCESS) {
1496 		vrrp_log(VRRP_ERR, "vrrpd_walk_ipaddr(%s, %s): icfg_open() "
1497 		    "failed: %s", intf->if_name, af_str(intf->if_protocol),
1498 		    strerror(errno));
1499 		return (err);
1500 	}
1501 
1502 	if (icfg_get_flags(ih, &flags) != ICFG_SUCCESS) {
1503 		if (errno != ENXIO && errno != ENOENT) {
1504 			vrrp_log(VRRP_ERR, "vrrpd_walk_ipaddr(%s, %s): "
1505 			    "icfg_get_flags() failed %s", intf->if_name,
1506 			    af_str(intf->if_protocol), strerror(errno));
1507 		}
1508 		goto done;
1509 	}
1510 
1511 	/*
1512 	 * skip virtual/IPMP/P2P interfaces.
1513 	 */
1514 	if ((flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT)) != 0) {
1515 		vrrp_log(VRRP_DBG0, "vrrpd_walk_ipaddr(%s, %s) skipped",
1516 		    intf->if_name, af_str(intf->if_protocol));
1517 		goto done;
1518 	}
1519 
1520 	if (icfg_get_index(ih, &ifindex) != ICFG_SUCCESS) {
1521 		if (errno != ENXIO && errno != ENOENT) {
1522 			vrrp_log(VRRP_ERR, "vrrpd_walk_ipaddr(%s, %s) "
1523 			    "icfg_get_index() failed: %s", intf->if_name,
1524 			    af_str(intf->if_protocol), strerror(errno));
1525 		}
1526 		goto done;
1527 	}
1528 
1529 	if (icfg_get_addr(ih, (struct sockaddr *)&addr, &addrlen,
1530 	    &prefixlen, _B_FALSE) != ICFG_SUCCESS) {
1531 		if (errno != ENXIO && errno != ENOENT) {
1532 			vrrp_log(VRRP_ERR, "vrrpd_walk_ipaddr(%s, %s) "
1533 			    "icfg_get_addr() failed: %s", intf->if_name,
1534 			    af_str(intf->if_protocol), strerror(errno));
1535 		}
1536 		goto done;
1537 	}
1538 
1539 	/*
1540 	 * Filter out the all-zero IP address.
1541 	 */
1542 	if (VRRPADDR_UNSPECIFIED(intf->if_protocol, &addr))
1543 		goto done;
1544 
1545 	/*
1546 	 * The interface is unplumbed/replumbed during we walk the IP
1547 	 * addresses. Try walk the IP addresses one more time.
1548 	 */
1549 	if (vrrpd_add_ipaddr(intf->if_name, intf->if_protocol,
1550 	    &addr, ifindex, flags) == VRRP_EAGAIN)
1551 		err = ICFG_FAILURE;
1552 
1553 done:
1554 	icfg_close(ih);
1555 	return (err);
1556 }
1557 
1558 /*
1559  * Given the information of each IP address, update the interface and
1560  * IP addresses list
1561  */
1562 static vrrp_err_t
1563 vrrpd_add_ipaddr(char *lifname, int af, vrrp_addr_t *addr, int ifindex,
1564     uint64_t flags)
1565 {
1566 	char		ifname[LIFNAMSIZ], *c;
1567 	vrrp_intf_t	*intf;
1568 	vrrp_ip_t	*ip;
1569 	char		abuf[INET6_ADDRSTRLEN];
1570 	vrrp_err_t	err;
1571 
1572 	/* LINTED E_CONSTANT_CONDITION */
1573 	VRRPADDR2STR(af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
1574 	vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s, %d, 0x%x)", lifname,
1575 	    abuf, ifindex, flags);
1576 
1577 	/*
1578 	 * Get the physical interface name from the logical interface name.
1579 	 */
1580 	(void) strlcpy(ifname, lifname, sizeof (ifname));
1581 	if ((c = strchr(ifname, ':')) != NULL)
1582 		*c = '\0';
1583 
1584 	if ((intf = vrrpd_lookup_if(ifname, af)) == NULL) {
1585 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(): %s is new", ifname);
1586 		err = vrrpd_create_if(ifname, af, ifindex, &intf);
1587 		if (err != VRRP_SUCCESS)
1588 			return (err);
1589 	} else if (intf->vvi_ifindex != ifindex) {
1590 		/*
1591 		 * If index changes, it means that this interface is
1592 		 * unplumbed/replumbed since we last checked. If this
1593 		 * interface is not used by any VRRP router, just
1594 		 * update its ifindex, and the IP addresses list will
1595 		 * be updated later. Otherwise, return EAGAIN to rewalk
1596 		 * all the IP addresses from the beginning.
1597 		 */
1598 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s) ifindex changed ",
1599 		    "from %d to %d", ifname, intf->vvi_ifindex, ifindex);
1600 		if (!IS_PRIMARY_INTF(intf) && !IS_VIRTUAL_INTF(intf)) {
1601 			intf->vvi_ifindex = ifindex;
1602 		} else {
1603 			/*
1604 			 * delete this interface from the list if this
1605 			 * interface has already been assoicated with
1606 			 * any VRRP routers.
1607 			 */
1608 			vrrpd_delete_if(intf, _B_TRUE);
1609 			return (VRRP_EAGAIN);
1610 		}
1611 	}
1612 
1613 	/*
1614 	 * Does this IP address already exist?
1615 	 */
1616 	TAILQ_FOREACH(ip, &intf->vvi_iplist, vip_next) {
1617 		if (strcmp(ip->vip_lifname, lifname) == 0)
1618 			break;
1619 	}
1620 
1621 	if (ip != NULL) {
1622 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP exists",
1623 		    lifname, abuf);
1624 		ip->vip_state = NODE_STATE_NONE;
1625 		ip->vip_flags = flags;
1626 		if (ipaddr_cmp(af, addr, &ip->vip_addr) != 0) {
1627 			/*
1628 			 * Address has been changed, mark it as new
1629 			 * If this address is already selected as the
1630 			 * primary IP address, the new IP will be checked
1631 			 * to see whether it is still qualified as the
1632 			 * primary IP address. If not, the primary IP
1633 			 * address will be reselected.
1634 			 */
1635 			(void) memcpy(&ip->vip_addr, addr,
1636 			    sizeof (vrrp_addr_t));
1637 
1638 			ip->vip_state = NODE_STATE_NEW;
1639 		}
1640 	} else {
1641 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP is new",
1642 		    lifname, abuf);
1643 
1644 		err = vrrpd_create_ip(intf, lifname, addr, flags);
1645 		if (err != VRRP_SUCCESS)
1646 			return (err);
1647 	}
1648 	return (VRRP_SUCCESS);
1649 }
1650 
1651 /*
1652  * Update the interface and IP addresses list. Remove the ones that have been
1653  * staled since last time we walk the IP addresses and updated the ones that
1654  * have been changed.
1655  */
1656 static void
1657 vrrpd_update_ipcache(int af)
1658 {
1659 	vrrp_intf_t	*intf, *nextif;
1660 	vrrp_ip_t	*ip, *nextip;
1661 	char		abuf[INET6_ADDRSTRLEN];
1662 	boolean_t	primary_selected;
1663 	boolean_t	primary_now_selected;
1664 	boolean_t	need_reenable = _B_FALSE;
1665 
1666 	vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(%s)", af_str(af));
1667 
1668 	nextif = TAILQ_FIRST(&vrrp_intf_list);
1669 	while ((intf = nextif) != NULL) {
1670 		nextif = TAILQ_NEXT(intf, vvi_next);
1671 		if (intf->vvi_af != af)
1672 			continue;
1673 
1674 		/*
1675 		 * Does the interface already select its primary IP address?
1676 		 */
1677 		primary_selected = (intf->vvi_pip != NULL);
1678 		assert(!primary_selected || IS_PRIMARY_INTF(intf));
1679 
1680 		/*
1681 		 * Removed the IP addresses that have been unconfigured.
1682 		 */
1683 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1684 		    ip = nextip) {
1685 			nextip = TAILQ_NEXT(ip, vip_next);
1686 			if (ip->vip_state != NODE_STATE_STALE)
1687 				continue;
1688 
1689 			/* LINTED E_CONSTANT_CONDITION */
1690 			VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
1691 			    _B_FALSE);
1692 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): IP %s "
1693 			    "is removed over %s", abuf, intf->vvi_ifname);
1694 			vrrpd_delete_ip(intf, ip);
1695 		}
1696 
1697 		/*
1698 		 * No IP addresses left, delete this interface.
1699 		 */
1700 		if (TAILQ_EMPTY(&intf->vvi_iplist)) {
1701 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1702 			    "no IP left over %s", intf->vvi_ifname);
1703 			vrrpd_delete_if(intf, _B_TRUE);
1704 			continue;
1705 		}
1706 
1707 		/*
1708 		 * If this is selected ss the physical interface for any
1709 		 * VRRP router, reselect the primary address if needed.
1710 		 */
1711 		if (IS_PRIMARY_INTF(intf)) {
1712 			vrrpd_reselect_primary(intf);
1713 			primary_now_selected = (intf->vvi_pip != NULL);
1714 
1715 			/*
1716 			 * Cannot find the new primary IP address.
1717 			 */
1718 			if (primary_selected && !primary_now_selected) {
1719 				vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache() "
1720 				    "reselect primary IP on %s failed",
1721 				    intf->vvi_ifname);
1722 				vrrpd_remove_if(intf, _B_TRUE);
1723 			} else if (!primary_selected && primary_now_selected) {
1724 				/*
1725 				 * The primary IP address is successfully
1726 				 * selected on the physical interfacew we
1727 				 * need to walk through all the VRRP routers
1728 				 * that is created on this physical interface
1729 				 * and see whether they can now be enabled.
1730 				 */
1731 				need_reenable = _B_TRUE;
1732 			}
1733 		}
1734 
1735 		/*
1736 		 * For every new virtual IP address, bring up/down it based
1737 		 * on the state of VRRP router.
1738 		 *
1739 		 * Note that it is fine to not update the IP's vip_flags field
1740 		 * even if vrrpd_virtualip_updateone() changed the address's
1741 		 * up/down state, since the vip_flags field is only used for
1742 		 * select primary IP address over a physical interface, and
1743 		 * vrrpd_virtualip_updateone() only affects the virtual IP
1744 		 * address's status.
1745 		 */
1746 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1747 		    ip = nextip) {
1748 			nextip = TAILQ_NEXT(ip, vip_next);
1749 			/* LINTED E_CONSTANT_CONDITION */
1750 			VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
1751 			    _B_FALSE);
1752 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1753 			    "IP %s over %s%s", abuf, intf->vvi_ifname,
1754 			    ip->vip_state == NODE_STATE_NEW ? " is new" : "");
1755 
1756 			if (IS_VIRTUAL_INTF(intf)) {
1757 				/*
1758 				 * If this IP is new, update its up/down state
1759 				 * based on the virtual interface's state
1760 				 * (which is determined by the VRRP router's
1761 				 * state). Otherwise, check only and prompt
1762 				 * warnings if its up/down state has been
1763 				 * changed.
1764 				 */
1765 				if (vrrpd_virtualip_updateone(intf, ip,
1766 				    ip->vip_state == NODE_STATE_NONE) !=
1767 				    VRRP_SUCCESS) {
1768 					vrrp_log(VRRP_DBG0,
1769 					    "vrrpd_update_ipcache(): "
1770 					    "IP %s over %s update failed", abuf,
1771 					    intf->vvi_ifname);
1772 					vrrpd_delete_ip(intf, ip);
1773 					continue;
1774 				}
1775 			}
1776 			ip->vip_state = NODE_STATE_NONE;
1777 		}
1778 
1779 		/*
1780 		 * The IP address is deleted when it is failed to be brought
1781 		 * up. If no IP addresses are left, delete this interface.
1782 		 */
1783 		if (TAILQ_EMPTY(&intf->vvi_iplist)) {
1784 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1785 			    "no IP left over %s", intf->vvi_ifname);
1786 			vrrpd_delete_if(intf, _B_TRUE);
1787 			continue;
1788 		}
1789 
1790 		if (intf->vvi_state == NODE_STATE_NEW) {
1791 			/*
1792 			 * A new interface is found. This interface can be
1793 			 * the primary interface or the virtual VNIC
1794 			 * interface.  Again, we need to walk throught all
1795 			 * the VRRP routers to see whether some of them can
1796 			 * now be enabled because of the new primary IP
1797 			 * address or the new virtual IP addresses.
1798 			 */
1799 			intf->vvi_state = NODE_STATE_NONE;
1800 			need_reenable = _B_TRUE;
1801 		}
1802 	}
1803 
1804 	if (need_reenable)
1805 		vrrpd_reenable_all_vr();
1806 }
1807 
1808 /*
1809  * Reselect primary IP if:
1810  * - The existing primary IP is no longer qualified (removed or it is down or
1811  *   not a link-local IP for IPv6 VRRP router);
1812  * - This is a physical interface but no primary IP is chosen;
1813  */
1814 static void
1815 vrrpd_reselect_primary(vrrp_intf_t *intf)
1816 {
1817 	vrrp_ip_t	*ip;
1818 	char		abuf[INET6_ADDRSTRLEN];
1819 
1820 	assert(IS_PRIMARY_INTF(intf));
1821 
1822 	/*
1823 	 * If the interface's old primary IP address is still valid, return
1824 	 */
1825 	if (((ip = intf->vvi_pip) != NULL) && (QUALIFY_PRIMARY_ADDR(intf, ip)))
1826 		return;
1827 
1828 	if (ip != NULL) {
1829 		/* LINTED E_CONSTANT_CONDITION */
1830 		VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
1831 		    sizeof (abuf), _B_FALSE);
1832 		vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
1833 		    "is no longer qualified", intf->vvi_ifname, abuf);
1834 	}
1835 
1836 	ip = vrrpd_select_primary(intf);
1837 	intf->vvi_pip = ip;
1838 
1839 	if (ip != NULL) {
1840 		/* LINTED E_CONSTANT_CONDITION */
1841 		VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
1842 		    sizeof (abuf), _B_FALSE);
1843 		vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
1844 		    "is selected", intf->vvi_ifname, abuf);
1845 	}
1846 }
1847 
1848 /*
1849  * Select the primary IP address. Since the link-local IP address is always
1850  * at the head of the IP address list, try to find the first UP IP address
1851  * and see whether it qualify.
1852  */
1853 static vrrp_ip_t *
1854 vrrpd_select_primary(vrrp_intf_t *pif)
1855 {
1856 	vrrp_ip_t	*pip;
1857 	char		abuf[INET6_ADDRSTRLEN];
1858 
1859 	vrrp_log(VRRP_DBG1, "vrrpd_select_primary(%s)", pif->vvi_ifname);
1860 
1861 	TAILQ_FOREACH(pip, &pif->vvi_iplist, vip_next) {
1862 		assert(pip->vip_state != NODE_STATE_STALE);
1863 
1864 		/* LINTED E_CONSTANT_CONDITION */
1865 		VRRPADDR2STR(pif->vvi_af, &pip->vip_addr, abuf,
1866 		    INET6_ADDRSTRLEN, _B_FALSE);
1867 		vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s): %s is %s",
1868 		    pif->vvi_ifname, abuf,
1869 		    (pip->vip_flags & IFF_UP) ? "up" : "down");
1870 
1871 		if (pip->vip_flags & IFF_UP)
1872 			break;
1873 	}
1874 
1875 	/*
1876 	 * Is this valid primary IP address?
1877 	 */
1878 	if (pip == NULL || !QUALIFY_PRIMARY_ADDR(pif, pip)) {
1879 		vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s/%s) failed",
1880 		    pif->vvi_ifname, af_str(pif->vvi_af));
1881 		return (NULL);
1882 	}
1883 	return (pip);
1884 }
1885 
1886 /*
1887  * This is a new interface. Check whether any VRRP router is waiting for it
1888  */
1889 static void
1890 vrrpd_reenable_all_vr()
1891 {
1892 	vrrp_vr_t *vr;
1893 
1894 	vrrp_log(VRRP_DBG0, "vrrpd_reenable_all_vr()");
1895 
1896 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
1897 		if (vr->vvr_conf.vvc_enabled)
1898 			(void) vrrpd_enable_vr(vr);
1899 	}
1900 }
1901 
1902 /*
1903  * If primary_addr_gone is _B_TRUE, it means that we failed to select
1904  * the primary IP address on this (physical) interface; otherwise,
1905  * it means the interface is no longer available.
1906  */
1907 static void
1908 vrrpd_remove_if(vrrp_intf_t *intf, boolean_t primary_addr_gone)
1909 {
1910 	vrrp_vr_t *vr;
1911 
1912 	vrrp_log(VRRP_DBG0, "vrrpd_remove_if(%s): %s", intf->vvi_ifname,
1913 	    primary_addr_gone ? "primary address gone" : "interface deleted");
1914 
1915 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
1916 		if (vr->vvr_conf.vvc_enabled)
1917 			vrrpd_disable_vr(vr, intf, primary_addr_gone);
1918 	}
1919 }
1920 
1921 /*
1922  * Update the VRRP configuration file based on the given configuration.
1923  * op is either VRRP_CONF_UPDATE or VRRP_CONF_DELETE
1924  */
1925 static vrrp_err_t
1926 vrrpd_updateconf(vrrp_vr_conf_t *newconf, uint_t op)
1927 {
1928 	vrrp_vr_conf_t	conf;
1929 	FILE		*fp, *nfp;
1930 	int		nfd;
1931 	char		line[LINE_MAX];
1932 	char		newfile[MAXPATHLEN];
1933 	boolean_t	found = _B_FALSE;
1934 	vrrp_err_t	err = VRRP_SUCCESS;
1935 
1936 	vrrp_log(VRRP_DBG0, "vrrpd_updateconf(%s, %s)", newconf->vvc_name,
1937 	    op == VRRP_CONF_UPDATE ? "update" : "delete");
1938 
1939 	if ((fp = fopen(vrrpd_conffile, "r+F")) == NULL) {
1940 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
1941 		    vrrpd_conffile, strerror(errno));
1942 		return (VRRP_EDB);
1943 	}
1944 
1945 	(void) snprintf(newfile, MAXPATHLEN, "%s.new", vrrpd_conffile);
1946 	if ((nfd = open(newfile, O_WRONLY | O_CREAT | O_TRUNC,
1947 	    S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
1948 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
1949 		    newfile, strerror(errno));
1950 		(void) fclose(fp);
1951 		return (VRRP_EDB);
1952 	}
1953 
1954 	if ((nfp = fdopen(nfd, "wF")) == NULL) {
1955 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): fdopen(%s) failed: %s",
1956 		    newfile, strerror(errno));
1957 		goto done;
1958 	}
1959 
1960 	while (fgets(line, sizeof (line), fp) != NULL) {
1961 		conf.vvc_vrid = VRRP_VRID_NONE;
1962 		if (!found && (err = vrrpd_read_vrconf(line, &conf)) !=
1963 		    VRRP_SUCCESS) {
1964 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): invalid "
1965 			    "configuration format: %s", line);
1966 			goto done;
1967 		}
1968 
1969 		/*
1970 		 * Write this line out if:
1971 		 * - this is a comment line; or
1972 		 * - we've done updating/deleting the the given VR; or
1973 		 * - if the name of the VR read from this line does not match
1974 		 *   the VR name that we are about to update/delete;
1975 		 */
1976 		if (found || conf.vvc_vrid == VRRP_VRID_NONE ||
1977 		    strcmp(conf.vvc_name, newconf->vvc_name) != 0) {
1978 			if (fputs(line, nfp) != EOF)
1979 				continue;
1980 
1981 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
1982 			    "write line %s", line);
1983 			err = VRRP_EDB;
1984 			goto done;
1985 		}
1986 
1987 		/*
1988 		 * Otherwise, update/skip the line.
1989 		 */
1990 		found = _B_TRUE;
1991 		if (op == VRRP_CONF_DELETE)
1992 			continue;
1993 
1994 		assert(op == VRRP_CONF_UPDATE);
1995 		if ((err = vrrpd_write_vrconf(line, sizeof (line),
1996 		    newconf)) != VRRP_SUCCESS) {
1997 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
1998 			    "update configuration for %s", newconf->vvc_name);
1999 			goto done;
2000 		}
2001 		if (fputs(line, nfp) == EOF) {
2002 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2003 			    "write line %s", line);
2004 			err = VRRP_EDB;
2005 			goto done;
2006 		}
2007 	}
2008 
2009 	/*
2010 	 * If we get to the end of the file and have not seen the router that
2011 	 * we are about to update, write it out.
2012 	 */
2013 	if (!found && op == VRRP_CONF_UPDATE) {
2014 		if ((err = vrrpd_write_vrconf(line, sizeof (line),
2015 		    newconf)) == VRRP_SUCCESS && fputs(line, nfp) == EOF) {
2016 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2017 			    "write line %s", line);
2018 			err = VRRP_EDB;
2019 		}
2020 	} else if (!found && op == VRRP_CONF_DELETE) {
2021 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to find "
2022 		    "configuation for %s", newconf->vvc_name);
2023 		err = VRRP_ENOTFOUND;
2024 	}
2025 
2026 	if (err != VRRP_SUCCESS)
2027 		goto done;
2028 
2029 	if (fflush(nfp) == EOF || rename(newfile, vrrpd_conffile) < 0) {
2030 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2031 		    "rename file %s", newfile);
2032 		err = VRRP_EDB;
2033 	}
2034 
2035 done:
2036 	(void) fclose(fp);
2037 	(void) fclose(nfp);
2038 	(void) unlink(newfile);
2039 	return (err);
2040 }
2041 
2042 static vrrp_err_t
2043 vrrpd_write_vrconf(char *line, size_t len, vrrp_vr_conf_t *conf)
2044 {
2045 	vrrp_prop_t	*prop;
2046 	int		n, i;
2047 
2048 	vrrp_log(VRRP_DBG0, "vrrpd_write_vrconf(%s)", conf->vvc_name);
2049 
2050 	for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
2051 		prop = &vrrp_prop_info_tbl[i];
2052 		n = snprintf(line, len, i == 0 ? "%s=" : " %s=",
2053 		    prop->vs_propname);
2054 		if (n < 0 || n >= len)
2055 			break;
2056 		len -= n;
2057 		line += n;
2058 		n = prop->vs_propwrite(conf, line, len);
2059 		if (n < 0 || n >= len)
2060 			break;
2061 		len -= n;
2062 		line += n;
2063 	}
2064 	if (i != VRRP_PROP_INFO_TABSIZE) {
2065 		vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
2066 		    "small", conf->vvc_name);
2067 		return (VRRP_EDB);
2068 	}
2069 	n = snprintf(line, len, "\n");
2070 	if (n < 0 || n >= len) {
2071 		vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
2072 		    "small", conf->vvc_name);
2073 		return (VRRP_EDB);
2074 	}
2075 	return (VRRP_SUCCESS);
2076 }
2077 
2078 static vrrp_err_t
2079 vrrpd_read_vrconf(char *line, vrrp_vr_conf_t *conf)
2080 {
2081 	char		*str, *token;
2082 	char		*next;
2083 	vrrp_err_t	err = VRRP_SUCCESS;
2084 	char		tmpbuf[MAXLINELEN];
2085 
2086 	str = tmpbuf;
2087 	(void) strlcpy(tmpbuf, line, MAXLINELEN);
2088 
2089 	/*
2090 	 * Skip leading spaces, blank lines, and comments.
2091 	 */
2092 	skip_whitespace(str);
2093 	if ((str - tmpbuf == strlen(tmpbuf)) || (*str == '#')) {
2094 		conf->vvc_vrid = VRRP_VRID_NONE;
2095 		return (VRRP_SUCCESS);
2096 	}
2097 
2098 	/*
2099 	 * Read each VR properties.
2100 	 */
2101 	for (token = strtok_r(str, " \n\t", &next); token != NULL;
2102 	    token = strtok_r(NULL, " \n\t", &next)) {
2103 		if ((err = vrrpd_readprop(token, conf)) != VRRP_SUCCESS)
2104 			break;
2105 	}
2106 
2107 	/* All properties read but no VRID defined */
2108 	if (err == VRRP_SUCCESS && conf->vvc_vrid == VRRP_VRID_NONE)
2109 		err = VRRP_EINVAL;
2110 
2111 	return (err);
2112 }
2113 
2114 static vrrp_err_t
2115 vrrpd_readprop(const char *str, vrrp_vr_conf_t *conf)
2116 {
2117 	vrrp_prop_t	*prop;
2118 	char		*pstr;
2119 	int		i;
2120 
2121 	if ((pstr = strchr(str, '=')) == NULL) {
2122 		vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
2123 		return (VRRP_EINVAL);
2124 	}
2125 
2126 	*pstr++ = '\0';
2127 	for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
2128 		prop = &vrrp_prop_info_tbl[i];
2129 		if (strcasecmp(str, prop->vs_propname) == 0) {
2130 			if (prop->vs_propread(conf, pstr))
2131 				break;
2132 		}
2133 	}
2134 
2135 	if (i == VRRP_PROP_INFO_TABSIZE) {
2136 		vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
2137 		return (VRRP_EINVAL);
2138 	}
2139 
2140 	return (VRRP_SUCCESS);
2141 }
2142 
2143 static boolean_t
2144 vrrp_rd_prop_name(vrrp_vr_conf_t *conf, const char *str)
2145 {
2146 	size_t size = sizeof (conf->vvc_name);
2147 	return (strlcpy(conf->vvc_name, str, size) < size);
2148 }
2149 
2150 static boolean_t
2151 vrrp_rd_prop_vrid(vrrp_vr_conf_t *conf, const char *str)
2152 {
2153 	conf->vvc_vrid = strtol(str, NULL, 0);
2154 	return (!(conf->vvc_vrid < VRRP_VRID_MIN ||
2155 	    conf->vvc_vrid > VRRP_VRID_MAX ||
2156 	    (conf->vvc_vrid == 0 && errno != 0)));
2157 }
2158 
2159 static boolean_t
2160 vrrp_rd_prop_af(vrrp_vr_conf_t *conf, const char *str)
2161 {
2162 	if (strcasecmp(str, "AF_INET") == 0)
2163 		conf->vvc_af = AF_INET;
2164 	else if (strcasecmp(str, "AF_INET6") == 0)
2165 		conf->vvc_af = AF_INET6;
2166 	else
2167 		return (_B_FALSE);
2168 	return (_B_TRUE);
2169 }
2170 
2171 static boolean_t
2172 vrrp_rd_prop_pri(vrrp_vr_conf_t *conf, const char *str)
2173 {
2174 	conf->vvc_pri = strtol(str, NULL, 0);
2175 	return (!(conf->vvc_pri < VRRP_PRI_MIN ||
2176 	    conf->vvc_pri > VRRP_PRI_OWNER ||
2177 	    (conf->vvc_pri == 0 && errno != 0)));
2178 }
2179 
2180 static boolean_t
2181 vrrp_rd_prop_adver_int(vrrp_vr_conf_t *conf, const char *str)
2182 {
2183 	conf->vvc_adver_int = strtol(str, NULL, 0);
2184 	return (!(conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2185 	    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX ||
2186 	    (conf->vvc_adver_int == 0 && errno != 0)));
2187 }
2188 
2189 static boolean_t
2190 vrrp_rd_prop_preempt(vrrp_vr_conf_t *conf, const char *str)
2191 {
2192 	if (strcasecmp(str, "true") == 0)
2193 		conf->vvc_preempt = _B_TRUE;
2194 	else if (strcasecmp(str, "false") == 0)
2195 		conf->vvc_preempt = _B_FALSE;
2196 	else
2197 		return (_B_FALSE);
2198 	return (_B_TRUE);
2199 }
2200 
2201 static boolean_t
2202 vrrp_rd_prop_accept(vrrp_vr_conf_t *conf, const char *str)
2203 {
2204 	if (strcasecmp(str, "true") == 0)
2205 		conf->vvc_accept = _B_TRUE;
2206 	else if (strcasecmp(str, "false") == 0)
2207 		conf->vvc_accept = _B_FALSE;
2208 	else
2209 		return (_B_FALSE);
2210 	return (_B_TRUE);
2211 }
2212 
2213 static boolean_t
2214 vrrp_rd_prop_enabled(vrrp_vr_conf_t *conf, const char *str)
2215 {
2216 	if (strcasecmp(str, "enabled") == 0)
2217 		conf->vvc_enabled = _B_TRUE;
2218 	else if (strcasecmp(str, "disabled") == 0)
2219 		conf->vvc_enabled = _B_FALSE;
2220 	else
2221 		return (_B_FALSE);
2222 	return (_B_TRUE);
2223 }
2224 
2225 static boolean_t
2226 vrrp_rd_prop_ifname(vrrp_vr_conf_t *conf, const char *str)
2227 {
2228 	size_t size = sizeof (conf->vvc_link);
2229 	return (strlcpy(conf->vvc_link, str, size) < size);
2230 }
2231 
2232 static int
2233 vrrp_wt_prop_name(vrrp_vr_conf_t *conf, char *str, size_t size)
2234 {
2235 	return (snprintf(str, size, "%s", conf->vvc_name));
2236 }
2237 
2238 static int
2239 vrrp_wt_prop_pri(vrrp_vr_conf_t *conf, char *str, size_t size)
2240 {
2241 	return (snprintf(str, size, "%d", conf->vvc_pri));
2242 }
2243 
2244 static int
2245 vrrp_wt_prop_adver_int(vrrp_vr_conf_t *conf, char *str, size_t size)
2246 {
2247 	return (snprintf(str, size, "%d", conf->vvc_adver_int));
2248 }
2249 
2250 static int
2251 vrrp_wt_prop_preempt(vrrp_vr_conf_t *conf, char *str, size_t size)
2252 {
2253 	return (snprintf(str, size, "%s",
2254 	    conf->vvc_preempt ? "true" : "false"));
2255 }
2256 
2257 static int
2258 vrrp_wt_prop_accept(vrrp_vr_conf_t *conf, char *str, size_t size)
2259 {
2260 	return (snprintf(str, size, "%s",
2261 	    conf->vvc_accept ? "true" : "false"));
2262 }
2263 
2264 static int
2265 vrrp_wt_prop_enabled(vrrp_vr_conf_t *conf, char *str, size_t size)
2266 {
2267 	return (snprintf(str, size, "%s",
2268 	    conf->vvc_enabled ? "enabled" : "disabled"));
2269 }
2270 
2271 static int
2272 vrrp_wt_prop_vrid(vrrp_vr_conf_t *conf, char *str, size_t size)
2273 {
2274 	return (snprintf(str, size, "%d", conf->vvc_vrid));
2275 }
2276 
2277 static int
2278 vrrp_wt_prop_af(vrrp_vr_conf_t *conf, char *str, size_t size)
2279 {
2280 	return (snprintf(str, size, "%s",
2281 	    conf->vvc_af == AF_INET ? "AF_INET" : "AF_INET6"));
2282 }
2283 
2284 static int
2285 vrrp_wt_prop_ifname(vrrp_vr_conf_t *conf, char *str, size_t size)
2286 {
2287 	return (snprintf(str, size, "%s", conf->vvc_link));
2288 }
2289 
2290 static char *
2291 af_str(int af)
2292 {
2293 	if (af == 4 || af == AF_INET)
2294 		return ("AF_INET");
2295 	else if (af == 6 || af == AF_INET6)
2296 		return ("AF_INET6");
2297 	else if (af == AF_UNSPEC)
2298 		return ("AF_UNSPEC");
2299 	else
2300 		return ("AF_error");
2301 }
2302 
2303 static vrrp_err_t
2304 vrrpd_create_vr(vrrp_vr_conf_t *conf)
2305 {
2306 	vrrp_vr_t	*vr;
2307 
2308 	vrrp_log(VRRP_DBG0, "vrrpd_create_vr(%s)", conf->vvc_name);
2309 
2310 	if ((vr = malloc(sizeof (vrrp_vr_t))) == NULL) {
2311 		vrrp_log(VRRP_ERR, "vrrpd_create_vr(): memory allocation for %s"
2312 		    " failed", conf->vvc_name);
2313 		return (VRRP_ENOMEM);
2314 	}
2315 
2316 	bzero(vr, sizeof (vrrp_vr_t));
2317 	vr->vvr_state = VRRP_STATE_NONE;
2318 	vr->vvr_timer_id = -1;
2319 	vrrpd_state_trans(VRRP_STATE_NONE, VRRP_STATE_INIT, vr);
2320 	(void) memcpy(&vr->vvr_conf, conf, sizeof (vrrp_vr_conf_t));
2321 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2322 	TAILQ_INSERT_HEAD(&vrrp_vr_list, vr, vvr_next);
2323 	return (VRRP_SUCCESS);
2324 }
2325 
2326 static void
2327 vrrpd_delete_vr(vrrp_vr_t *vr)
2328 {
2329 	vrrp_log(VRRP_DBG0, "vrrpd_delete_vr(%s)", vr->vvr_conf.vvc_name);
2330 	if (vr->vvr_conf.vvc_enabled)
2331 		vrrpd_disable_vr(vr, NULL, _B_FALSE);
2332 	assert(vr->vvr_state == VRRP_STATE_INIT);
2333 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_NONE, vr);
2334 	TAILQ_REMOVE(&vrrp_vr_list, vr, vvr_next);
2335 	(void) free(vr);
2336 }
2337 
2338 static vrrp_err_t
2339 vrrpd_enable_vr(vrrp_vr_t *vr)
2340 {
2341 	vrrp_err_t	rx_err, tx_err, err = VRRP_EINVAL;
2342 
2343 	vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s)", vr->vvr_conf.vvc_name);
2344 
2345 	assert(vr->vvr_conf.vvc_enabled);
2346 
2347 	/*
2348 	 * This VRRP router has been successfully enabled and start
2349 	 * participating.
2350 	 */
2351 	if (vr->vvr_state != VRRP_STATE_INIT)
2352 		return (VRRP_SUCCESS);
2353 
2354 	if ((rx_err = vrrpd_init_rxsock(vr)) == VRRP_SUCCESS) {
2355 		/*
2356 		 * Select the primary IP address. Even if this time
2357 		 * primary IP selection failed, we will reselect the
2358 		 * primary IP address when new IP address comes up.
2359 		 */
2360 		vrrpd_reselect_primary(vr->vvr_pif);
2361 		if (vr->vvr_pif->vvi_pip == NULL) {
2362 			vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s): "
2363 			    "select_primary over %s failed",
2364 			    vr->vvr_conf.vvc_name, vr->vvr_pif->vvi_ifname);
2365 			rx_err = VRRP_ENOPRIM;
2366 		}
2367 	}
2368 
2369 	/*
2370 	 * Initialize the TX socket used for this vrrp_vr_t to send the
2371 	 * multicast packets.
2372 	 */
2373 	tx_err = vrrpd_init_txsock(vr);
2374 
2375 	/*
2376 	 * Only start the state transition if sockets for both RX and TX are
2377 	 * initialized correctly.
2378 	 */
2379 	if (rx_err != VRRP_SUCCESS || tx_err != VRRP_SUCCESS) {
2380 		/*
2381 		 * Record the error information for diagnose purpose.
2382 		 */
2383 		vr->vvr_err = (rx_err == VRRP_SUCCESS) ? tx_err : rx_err;
2384 		return (err);
2385 	}
2386 
2387 	if (vr->vvr_conf.vvc_pri == 255)
2388 		err = vrrpd_state_i2m(vr);
2389 	else
2390 		err = vrrpd_state_i2b(vr);
2391 
2392 	if (err != VRRP_SUCCESS) {
2393 		vr->vvr_err = err;
2394 		vr->vvr_pif->vvi_pip = NULL;
2395 		vrrpd_fini_txsock(vr);
2396 		vrrpd_fini_rxsock(vr);
2397 	}
2398 	return (err);
2399 }
2400 
2401 /*
2402  * Given the removed interface, see whether the given VRRP router would
2403  * be affected and stop participating the VRRP protocol.
2404  *
2405  * If intf is NULL, VR disabling request is coming from the admin.
2406  */
2407 static void
2408 vrrpd_disable_vr(vrrp_vr_t *vr, vrrp_intf_t *intf, boolean_t primary_addr_gone)
2409 {
2410 	vrrp_log(VRRP_DBG0, "vrrpd_disable_vr(%s): %s%s", vr->vvr_conf.vvc_name,
2411 	    intf == NULL ? "requested by admin" : intf->vvi_ifname,
2412 	    intf == NULL ? "" : (primary_addr_gone ? "primary address gone" :
2413 	    "interface deleted"));
2414 
2415 	/*
2416 	 * An interface is deleted, see whether this interface is the
2417 	 * physical interface or the VNIC of the given VRRP router.
2418 	 * If so, continue to disable the VRRP router.
2419 	 */
2420 	if (!primary_addr_gone && (intf != NULL) && (intf != vr->vvr_pif) &&
2421 	    (intf != vr->vvr_vif)) {
2422 		return;
2423 	}
2424 
2425 	/*
2426 	 * If this is the case that the primary IP address is gone,
2427 	 * and we failed to reselect another primary IP address,
2428 	 * continue to disable the VRRP router.
2429 	 */
2430 	if (primary_addr_gone && intf != vr->vvr_pif)
2431 		return;
2432 
2433 	vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabling",
2434 	    vr->vvr_conf.vvc_name);
2435 
2436 	if (vr->vvr_state == VRRP_STATE_MASTER) {
2437 		/*
2438 		 * If this router is disabled by the administrator, send
2439 		 * the zero-priority advertisement to indicate the Master
2440 		 * stops participating VRRP.
2441 		 */
2442 		if (intf == NULL)
2443 			(void) vrrpd_send_adv(vr, _B_TRUE);
2444 
2445 		vrrpd_state_m2i(vr);
2446 	} else  if (vr->vvr_state == VRRP_STATE_BACKUP) {
2447 		vrrpd_state_b2i(vr);
2448 	}
2449 
2450 	/*
2451 	 * If no primary IP address can be selected, the VRRP router
2452 	 * stays at the INIT state and will become BACKUP and MASTER when
2453 	 * a primary IP address is reselected.
2454 	 */
2455 	if (primary_addr_gone) {
2456 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): primary IP "
2457 		    "is removed", vr->vvr_conf.vvc_name);
2458 		vr->vvr_err = VRRP_ENOPRIM;
2459 	} else if (intf == NULL) {
2460 		/*
2461 		 * The VRRP router is disable by the administrator
2462 		 */
2463 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabled by admin",
2464 		    vr->vvr_conf.vvc_name);
2465 		vr->vvr_err = VRRP_SUCCESS;
2466 		vrrpd_fini_txsock(vr);
2467 		vrrpd_fini_rxsock(vr);
2468 	} else if (intf == vr->vvr_pif) {
2469 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): physical interface "
2470 		    "%s removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
2471 		vr->vvr_err = VRRP_ENOPRIM;
2472 		vrrpd_fini_rxsock(vr);
2473 	} else if (intf == vr->vvr_vif) {
2474 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): VNIC interface %s"
2475 		    " removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
2476 		vr->vvr_err = VRRP_ENOVIRT;
2477 		vrrpd_fini_txsock(vr);
2478 	}
2479 }
2480 
2481 vrrp_err_t
2482 vrrpd_create(vrrp_vr_conf_t *conf, boolean_t updateconf)
2483 {
2484 	vrrp_err_t	err = VRRP_SUCCESS;
2485 
2486 	vrrp_log(VRRP_DBG0, "vrrpd_create(%s, %s, %d)", conf->vvc_name,
2487 	    conf->vvc_link, conf->vvc_vrid);
2488 
2489 	assert(conf != NULL);
2490 
2491 	/*
2492 	 * Sanity check
2493 	 */
2494 	if ((strlen(conf->vvc_name) == 0) ||
2495 	    (strlen(conf->vvc_link) == 0) ||
2496 	    (conf->vvc_vrid < VRRP_VRID_MIN ||
2497 	    conf->vvc_vrid > VRRP_VRID_MAX) ||
2498 	    (conf->vvc_pri < VRRP_PRI_MIN ||
2499 	    conf->vvc_pri > VRRP_PRI_OWNER) ||
2500 	    (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2501 	    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) ||
2502 	    (conf->vvc_af != AF_INET && conf->vvc_af != AF_INET6) ||
2503 	    (conf->vvc_pri == VRRP_PRI_OWNER && !conf->vvc_accept)) {
2504 		vrrp_log(VRRP_DBG1, "vrrpd_create(%s): invalid argument",
2505 		    conf->vvc_name);
2506 		return (VRRP_EINVAL);
2507 	}
2508 
2509 	if (!vrrp_valid_name(conf->vvc_name)) {
2510 		vrrp_log(VRRP_DBG1, "vrrpd_create(): %s is not a valid router "
2511 		    "name", conf->vvc_name);
2512 		return (VRRP_EINVALVRNAME);
2513 	}
2514 
2515 	if (vrrpd_lookup_vr_by_name(conf->vvc_name) != NULL) {
2516 		vrrp_log(VRRP_DBG1, "vrrpd_create(): %s already exists",
2517 		    conf->vvc_name);
2518 		return (VRRP_EINSTEXIST);
2519 	}
2520 
2521 	if (vrrpd_lookup_vr_by_vrid(conf->vvc_link, conf->vvc_vrid,
2522 	    conf->vvc_af) != NULL) {
2523 		vrrp_log(VRRP_DBG1, "vrrpd_create(): VRID %d/%s over %s "
2524 		    "already exists", conf->vvc_vrid, af_str(conf->vvc_af),
2525 		    conf->vvc_link);
2526 		return (VRRP_EVREXIST);
2527 	}
2528 
2529 	if (updateconf && (err = vrrpd_updateconf(conf,
2530 	    VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
2531 		vrrp_log(VRRP_ERR, "vrrpd_create(): failed to update "
2532 		    "configuration for %s", conf->vvc_name);
2533 		return (err);
2534 	}
2535 
2536 	err = vrrpd_create_vr(conf);
2537 	if (err != VRRP_SUCCESS && updateconf)
2538 		(void) vrrpd_updateconf(conf, VRRP_CONF_DELETE);
2539 
2540 	return (err);
2541 }
2542 
2543 static vrrp_err_t
2544 vrrpd_delete(const char *vn)
2545 {
2546 	vrrp_vr_t	*vr;
2547 	vrrp_err_t	err;
2548 
2549 	vrrp_log(VRRP_DBG0, "vrrpd_delete(%s)", vn);
2550 
2551 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2552 		vrrp_log(VRRP_DBG1, "vrrpd_delete(): %s not exists", vn);
2553 		return (VRRP_ENOTFOUND);
2554 	}
2555 
2556 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_DELETE);
2557 	if (err != VRRP_SUCCESS) {
2558 		vrrp_log(VRRP_ERR, "vrrpd_delete(): failed to delete "
2559 		    "configuration for %s", vr->vvr_conf.vvc_name);
2560 		return (err);
2561 	}
2562 
2563 	vrrpd_delete_vr(vr);
2564 	return (VRRP_SUCCESS);
2565 }
2566 
2567 static vrrp_err_t
2568 vrrpd_enable(const char *vn, boolean_t updateconf)
2569 {
2570 	vrrp_vr_t		*vr;
2571 	vrrp_vr_conf_t		*conf;
2572 	uint32_t		flags;
2573 	datalink_class_t	class;
2574 	vrrp_err_t		err = VRRP_SUCCESS;
2575 
2576 	vrrp_log(VRRP_DBG0, "vrrpd_enable(%s)", vn);
2577 
2578 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2579 		vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s does not exist", vn);
2580 		return (VRRP_ENOTFOUND);
2581 	}
2582 
2583 	/*
2584 	 * The VR is already enabled.
2585 	 */
2586 	conf = &vr->vvr_conf;
2587 	if (conf->vvc_enabled) {
2588 		vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s is already "
2589 		    "enabled", vn);
2590 		return (VRRP_EALREADY);
2591 	}
2592 
2593 	/*
2594 	 * Check whether the link exists.
2595 	 */
2596 	if ((strlen(conf->vvc_link) == 0) || dladm_name2info(vrrpd_vh->vh_dh,
2597 	    conf->vvc_link, NULL, &flags, &class, NULL) != DLADM_STATUS_OK ||
2598 	    !(flags & DLADM_OPT_ACTIVE) || ((class != DATALINK_CLASS_PHYS) &&
2599 	    (class != DATALINK_CLASS_VLAN) && (class != DATALINK_CLASS_AGGR))) {
2600 		vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): invalid link %s",
2601 		    vn, conf->vvc_link);
2602 		return (VRRP_EINVALLINK);
2603 	}
2604 
2605 	/*
2606 	 * Get the associated VNIC name by the given interface/vrid/
2607 	 * address famitly.
2608 	 */
2609 	err = vrrp_get_vnicname(vrrpd_vh, conf->vvc_vrid,
2610 	    conf->vvc_af, conf->vvc_link, NULL, NULL, vr->vvr_vnic,
2611 	    sizeof (vr->vvr_vnic));
2612 	if (err != VRRP_SUCCESS) {
2613 		vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): no VNIC for VRID %d/%s "
2614 		    "over %s", vn, conf->vvc_vrid, af_str(conf->vvc_af),
2615 		    conf->vvc_link);
2616 		err = VRRP_ENOVNIC;
2617 		goto fail;
2618 	}
2619 
2620 	/*
2621 	 * Find the right VNIC, primary interface and get the list of the
2622 	 * protected IP adressses and primary IP address. Note that if
2623 	 * either interface is NULL (no IP addresses configured over the
2624 	 * interface), we will still continue and mark this VRRP router
2625 	 * as "enabled".
2626 	 */
2627 	vr->vvr_conf.vvc_enabled = _B_TRUE;
2628 	if (updateconf && (err = vrrpd_updateconf(&vr->vvr_conf,
2629 	    VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
2630 		vrrp_log(VRRP_ERR, "vrrpd_enable(): failed to update "
2631 		    "configuration for %s", vr->vvr_conf.vvc_name);
2632 		goto fail;
2633 	}
2634 
2635 	/*
2636 	 * If vrrpd_setup_vr() fails, it is possible that there is no IP
2637 	 * addresses over ether the primary interface or the VNIC yet,
2638 	 * return success in this case, the VRRP router will stay in
2639 	 * the initialized state and start to work when the IP address is
2640 	 * configured.
2641 	 */
2642 	(void) vrrpd_enable_vr(vr);
2643 	return (VRRP_SUCCESS);
2644 
2645 fail:
2646 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2647 	vr->vvr_vnic[0] = '\0';
2648 	return (err);
2649 }
2650 
2651 static vrrp_err_t
2652 vrrpd_disable(const char *vn)
2653 {
2654 	vrrp_vr_t	*vr;
2655 	vrrp_err_t	err;
2656 
2657 	vrrp_log(VRRP_DBG0, "vrrpd_disable(%s)", vn);
2658 
2659 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2660 		vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s does not exist", vn);
2661 		return (VRRP_ENOTFOUND);
2662 	}
2663 
2664 	/*
2665 	 * The VR is already disable.
2666 	 */
2667 	if (!vr->vvr_conf.vvc_enabled) {
2668 		vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s was not enabled", vn);
2669 		return (VRRP_EALREADY);
2670 	}
2671 
2672 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2673 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
2674 	if (err != VRRP_SUCCESS) {
2675 		vr->vvr_conf.vvc_enabled = _B_TRUE;
2676 		vrrp_log(VRRP_ERR, "vrrpd_disable(): failed to update "
2677 		    "configuration for %s", vr->vvr_conf.vvc_name);
2678 		return (err);
2679 	}
2680 
2681 	vrrpd_disable_vr(vr, NULL, _B_FALSE);
2682 	vr->vvr_vnic[0] = '\0';
2683 	return (VRRP_SUCCESS);
2684 }
2685 
2686 static vrrp_err_t
2687 vrrpd_modify(vrrp_vr_conf_t *conf, uint32_t mask)
2688 {
2689 	vrrp_vr_t	*vr;
2690 	vrrp_vr_conf_t	savconf;
2691 	int		pri;
2692 	boolean_t	accept, set_accept = _B_FALSE;
2693 	vrrp_err_t	err;
2694 
2695 	vrrp_log(VRRP_DBG0, "vrrpd_modify(%s)", conf->vvc_name);
2696 
2697 	if (mask == 0)
2698 		return (VRRP_SUCCESS);
2699 
2700 	if ((vr = vrrpd_lookup_vr_by_name(conf->vvc_name)) == NULL) {
2701 		vrrp_log(VRRP_DBG1, "vrrpd_modify(): cannot find the given "
2702 		    "VR instance: %s", conf->vvc_name);
2703 		return (VRRP_ENOTFOUND);
2704 	}
2705 
2706 	if (mask & VRRP_CONF_INTERVAL) {
2707 		if (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2708 		    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) {
2709 			vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
2710 			    "adver_interval %d", conf->vvc_name,
2711 			    conf->vvc_adver_int);
2712 			return (VRRP_EINVAL);
2713 		}
2714 	}
2715 
2716 	pri = vr->vvr_conf.vvc_pri;
2717 	if (mask & VRRP_CONF_PRIORITY) {
2718 		if (conf->vvc_pri < VRRP_PRI_MIN ||
2719 		    conf->vvc_pri > VRRP_PRI_OWNER) {
2720 			vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
2721 			    "priority %d", conf->vvc_name, conf->vvc_pri);
2722 			return (VRRP_EINVAL);
2723 		}
2724 		pri = conf->vvc_pri;
2725 	}
2726 
2727 	accept = vr->vvr_conf.vvc_accept;
2728 	if (mask & VRRP_CONF_ACCEPT)
2729 		accept = conf->vvc_accept;
2730 
2731 	if (pri == VRRP_PRI_OWNER && !accept) {
2732 		vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): accept mode must be "
2733 		    "true for VRRP address owner", conf->vvc_name);
2734 		return (VRRP_EINVAL);
2735 	}
2736 
2737 	if ((mask & VRRP_CONF_ACCEPT) && (vr->vvr_conf.vvc_accept != accept)) {
2738 		err = vrrpd_set_noaccept(vr, !accept);
2739 		if (err != VRRP_SUCCESS) {
2740 			vrrp_log(VRRP_ERR, "vrrpd_modify(%s): access mode "
2741 			    "updating failed: %s", conf->vvc_name,
2742 			    vrrp_err2str(err));
2743 			return (err);
2744 		}
2745 		set_accept = _B_TRUE;
2746 	}
2747 
2748 	/*
2749 	 * Save the current configuration, so it can be restored if the
2750 	 * following fails.
2751 	 */
2752 	(void) memcpy(&savconf, &vr->vvr_conf, sizeof (vrrp_vr_conf_t));
2753 	if (mask & VRRP_CONF_PREEMPT)
2754 		vr->vvr_conf.vvc_preempt = conf->vvc_preempt;
2755 
2756 	if (mask & VRRP_CONF_ACCEPT)
2757 		vr->vvr_conf.vvc_accept = accept;
2758 
2759 	if (mask & VRRP_CONF_PRIORITY)
2760 		vr->vvr_conf.vvc_pri = pri;
2761 
2762 	if (mask & VRRP_CONF_INTERVAL)
2763 		vr->vvr_conf.vvc_adver_int = conf->vvc_adver_int;
2764 
2765 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
2766 	if (err != VRRP_SUCCESS) {
2767 		vrrp_log(VRRP_ERR, "vrrpd_modify(%s): configuration update "
2768 		    "failed: %s", conf->vvc_name, vrrp_err2str(err));
2769 		if (set_accept)
2770 			(void) vrrpd_set_noaccept(vr, accept);
2771 		(void) memcpy(&vr->vvr_conf, &savconf, sizeof (vrrp_vr_conf_t));
2772 		return (err);
2773 	}
2774 
2775 	if ((mask & VRRP_CONF_PRIORITY) && (vr->vvr_state == VRRP_STATE_BACKUP))
2776 		vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
2777 
2778 	if ((mask & VRRP_CONF_INTERVAL) && (vr->vvr_state == VRRP_STATE_MASTER))
2779 		vr->vvr_timeout = conf->vvc_adver_int;
2780 
2781 	return (VRRP_SUCCESS);
2782 }
2783 
2784 static void
2785 vrrpd_list(vrid_t vrid, char *ifname, int af, vrrp_ret_list_t *ret,
2786     size_t *sizep)
2787 {
2788 	vrrp_vr_t	*vr;
2789 	char		*p = (char *)ret + sizeof (vrrp_ret_list_t);
2790 	size_t		size = (*sizep) - sizeof (vrrp_ret_list_t);
2791 
2792 	vrrp_log(VRRP_DBG0, "vrrpd_list(%d_%s_%s)", vrid, ifname, af_str(af));
2793 
2794 	ret->vrl_cnt = 0;
2795 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
2796 		if (vrid !=  VRRP_VRID_NONE && vr->vvr_conf.vvc_vrid != vrid)
2797 			continue;
2798 
2799 		if (strlen(ifname) != 0 && strcmp(ifname,
2800 		    vr->vvr_conf.vvc_link) == 0) {
2801 			continue;
2802 		}
2803 
2804 		if ((af == AF_INET || af == AF_INET6) &&
2805 		    vr->vvr_conf.vvc_af != af)
2806 			continue;
2807 
2808 		if (size < VRRP_NAME_MAX) {
2809 			vrrp_log(VRRP_DBG1, "vrrpd_list(): buffer size too "
2810 			    "small to hold %d router names", ret->vrl_cnt);
2811 			*sizep = sizeof (vrrp_ret_list_t);
2812 			ret->vrl_err = VRRP_ETOOSMALL;
2813 			return;
2814 		}
2815 		(void) strlcpy(p, vr->vvr_conf.vvc_name, VRRP_NAME_MAX);
2816 		p += (strlen(vr->vvr_conf.vvc_name) + 1);
2817 		ret->vrl_cnt++;
2818 		size -= VRRP_NAME_MAX;
2819 	}
2820 
2821 	*sizep = sizeof (vrrp_ret_list_t) + ret->vrl_cnt * VRRP_NAME_MAX;
2822 	vrrp_log(VRRP_DBG1, "vrrpd_list() return %d", ret->vrl_cnt);
2823 	ret->vrl_err = VRRP_SUCCESS;
2824 }
2825 
2826 static void
2827 vrrpd_query(const char *vn, vrrp_ret_query_t *ret, size_t *sizep)
2828 {
2829 	vrrp_queryinfo_t	*infop;
2830 	vrrp_vr_t		*vr;
2831 	vrrp_intf_t		*vif;
2832 	vrrp_ip_t		*ip;
2833 	struct timeval		now;
2834 	uint32_t		vipcnt = 0;
2835 	size_t			size = *sizep;
2836 
2837 	vrrp_log(VRRP_DBG1, "vrrpd_query(%s)", vn);
2838 
2839 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2840 		vrrp_log(VRRP_DBG1, "vrrpd_query(): %s does not exist", vn);
2841 		*sizep = sizeof (vrrp_ret_query_t);
2842 		ret->vrq_err = VRRP_ENOTFOUND;
2843 		return;
2844 	}
2845 
2846 	/*
2847 	 * Get the virtual IP list if the router is not in the INIT state.
2848 	 */
2849 	if (vr->vvr_state != VRRP_STATE_INIT) {
2850 		vif = vr->vvr_vif;
2851 		TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
2852 			vipcnt++;
2853 		}
2854 	}
2855 
2856 	*sizep = sizeof (vrrp_ret_query_t);
2857 	*sizep += (vipcnt == 0) ? 0 : (vipcnt - 1) * sizeof (vrrp_addr_t);
2858 	if (*sizep > size) {
2859 		vrrp_log(VRRP_ERR, "vrrpd_query(): not enough space to hold "
2860 		    "%d virtual IPs", vipcnt);
2861 		*sizep = sizeof (vrrp_ret_query_t);
2862 		ret->vrq_err = VRRP_ETOOSMALL;
2863 		return;
2864 	}
2865 
2866 	(void) gettimeofday(&now, NULL);
2867 
2868 	bzero(ret, *sizep);
2869 	infop = &ret->vrq_qinfo;
2870 	(void) memcpy(&infop->show_vi,
2871 	    &(vr->vvr_conf), sizeof (vrrp_vr_conf_t));
2872 	(void) memcpy(&infop->show_vs,
2873 	    &(vr->vvr_sinfo), sizeof (vrrp_stateinfo_t));
2874 	(void) strlcpy(infop->show_va.va_vnic, vr->vvr_vnic, MAXLINKNAMELEN);
2875 	infop->show_vt.vt_since_last_tran = timeval_to_milli(
2876 	    timeval_delta(now, vr->vvr_sinfo.vs_st_time));
2877 
2878 	if (vr->vvr_state == VRRP_STATE_INIT) {
2879 		ret->vrq_err = VRRP_SUCCESS;
2880 		return;
2881 	}
2882 
2883 	vipcnt = 0;
2884 	TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
2885 		(void) memcpy(&infop->show_va.va_vips[vipcnt++],
2886 		    &ip->vip_addr, sizeof (vrrp_addr_t));
2887 	}
2888 	infop->show_va.va_vipcnt = vipcnt;
2889 
2890 	(void) memcpy(&infop->show_va.va_primary,
2891 	    &vr->vvr_pif->vvi_pip->vip_addr, sizeof (vrrp_addr_t));
2892 
2893 	(void) memcpy(&infop->show_vp, &(vr->vvr_peer), sizeof (vrrp_peer_t));
2894 
2895 	/*
2896 	 * Check whether there is a peer.
2897 	 */
2898 	if (!VRRPADDR_UNSPECIFIED(vr->vvr_conf.vvc_af,
2899 	    &(vr->vvr_peer.vp_addr))) {
2900 		infop->show_vt.vt_since_last_adv = timeval_to_milli(
2901 		    timeval_delta(now, vr->vvr_peer.vp_time));
2902 	}
2903 
2904 	if (vr->vvr_state == VRRP_STATE_BACKUP) {
2905 		infop->show_vt.vt_master_down_intv =
2906 		    MASTER_DOWN_INTERVAL_VR(vr);
2907 	}
2908 
2909 	ret->vrq_err = VRRP_SUCCESS;
2910 }
2911 
2912 /*
2913  * Build the VRRP packet (not including the IP header). Return the
2914  * payload length.
2915  *
2916  * If zero_pri is set to be B_TRUE, then this is the specical zero-priority
2917  * advertisement which is sent by the Master to indicate that it has been
2918  * stopped participating in VRRP.
2919  */
2920 static size_t
2921 vrrpd_build_vrrp(vrrp_vr_t *vr, uchar_t *buf, int buflen, boolean_t zero_pri)
2922 {
2923 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2924 	vrrp_pkt_t	*vp = (vrrp_pkt_t *)buf;
2925 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2926 	struct in_addr	*a4 = (struct in_addr *)(vp + 1);
2927 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2928 	struct in6_addr *a6 = (struct in6_addr *)(vp + 1);
2929 	vrrp_intf_t	*vif = vr->vvr_vif;
2930 	vrrp_ip_t	*vip;
2931 	int		af = vif->vvi_af;
2932 	size_t		size = sizeof (vrrp_pkt_t);
2933 	uint16_t	rsvd_adver_int;
2934 	int		nip = 0;
2935 
2936 	vrrp_log(VRRP_DBG1, "vrrpd_build_vrrp(%s, %s_priority): intv %d",
2937 	    vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non-zero",
2938 	    vr->vvr_conf.vvc_adver_int);
2939 
2940 	TAILQ_FOREACH(vip, &vif->vvi_iplist, vip_next) {
2941 		if ((size += ((af == AF_INET) ? sizeof (struct in_addr) :
2942 		    sizeof (struct in6_addr))) > buflen) {
2943 			vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): buffer size "
2944 			    "not big enough %d", vr->vvr_conf.vvc_name, size);
2945 			return (0);
2946 		}
2947 
2948 		if (af == AF_INET)
2949 			a4[nip++] = vip->vip_addr.in4.sin_addr;
2950 		else
2951 			a6[nip++] = vip->vip_addr.in6.sin6_addr;
2952 	}
2953 
2954 	if (nip == 0) {
2955 		vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): no virtual IP "
2956 		    "address", vr->vvr_conf.vvc_name);
2957 		return (0);
2958 	}
2959 
2960 	vp->vp_vers_type = (VRRP_VERSION << 4) | VRRP_PKT_ADVERT;
2961 	vp->vp_vrid = vr->vvr_conf.vvc_vrid;
2962 	vp->vp_prio = zero_pri ? VRRP_PRIO_ZERO : vr->vvr_conf.vvc_pri;
2963 
2964 	rsvd_adver_int = MSEC2CENTISEC(vr->vvr_conf.vvc_adver_int) & 0x0fff;
2965 	vp->vp_rsvd_adver_int = htons(rsvd_adver_int);
2966 	vp->vp_ipnum = nip;
2967 
2968 	/*
2969 	 * Set the checksum to 0 first, then caculate it.
2970 	 */
2971 	vp->vp_chksum = 0;
2972 	if (af == AF_INET) {
2973 		vp->vp_chksum = vrrp_cksum4(
2974 		    &vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr,
2975 		    &vrrp_muladdr4.in4.sin_addr, size, vp);
2976 	} else {
2977 		vp->vp_chksum = vrrp_cksum6(
2978 		    &vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
2979 		    &vrrp_muladdr6.in6.sin6_addr, size, vp);
2980 	}
2981 
2982 	return (size);
2983 }
2984 
2985 /*
2986  * We need to build the IPv4 header on our own.
2987  */
2988 static vrrp_err_t
2989 vrrpd_send_adv_v4(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
2990 {
2991 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2992 	struct ip *ip = (struct ip *)buf;
2993 	size_t plen;
2994 
2995 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s)", vr->vvr_conf.vvc_name);
2996 
2997 	if ((plen = vrrpd_build_vrrp(vr, buf + sizeof (struct ip),
2998 	    len - sizeof (struct ip), zero_pri)) == 0) {
2999 		return (VRRP_ETOOSMALL);
3000 	}
3001 
3002 	ip->ip_hl = sizeof (struct ip) >> 2;
3003 	ip->ip_v = IPV4_VERSION;
3004 	ip->ip_tos = 0;
3005 	plen += sizeof (struct ip);
3006 	ip->ip_len = htons(plen);
3007 	ip->ip_off = 0;
3008 	ip->ip_ttl = VRRP_IP_TTL;
3009 	ip->ip_p = IPPROTO_VRRP;
3010 	ip->ip_src = vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr;
3011 	ip->ip_dst = vrrp_muladdr4.in4.sin_addr;
3012 
3013 	/*
3014 	 * The kernel will set the IP cksum and the IPv4 identification.
3015 	 */
3016 	ip->ip_id = 0;
3017 	ip->ip_sum = 0;
3018 
3019 	if ((len = sendto(vr->vvr_vif->vvi_sockfd, buf, plen, 0,
3020 	    (const struct sockaddr *)&vrrp_muladdr4,
3021 	    sizeof (struct sockaddr_in))) != plen) {
3022 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v4(): sendto() on "
3023 		    "(vrid:%d, %s, %s) failed: %s sent:%d expect:%d",
3024 		    vr->vvr_conf.vvc_vrid, vr->vvr_vif->vvi_ifname,
3025 		    af_str(vr->vvr_conf.vvc_af), strerror(errno), len, plen);
3026 		return (VRRP_ESYS);
3027 	}
3028 
3029 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s) succeed",
3030 	    vr->vvr_conf.vvc_name);
3031 	return (VRRP_SUCCESS);
3032 }
3033 
3034 static vrrp_err_t
3035 vrrpd_send_adv_v6(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
3036 {
3037 	struct msghdr msg6;
3038 	size_t hoplimit_space = 0;
3039 	size_t pktinfo_space = 0;
3040 	size_t bufspace = 0;
3041 	struct in6_pktinfo *pktinfop;
3042 	struct cmsghdr *cmsgp;
3043 	uchar_t *cmsg_datap;
3044 	struct iovec iov;
3045 	size_t plen;
3046 
3047 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s)", vr->vvr_conf.vvc_name);
3048 
3049 	if ((plen = vrrpd_build_vrrp(vr, buf, len, zero_pri)) == 0)
3050 		return (VRRP_ETOOSMALL);
3051 
3052 	msg6.msg_control = NULL;
3053 	msg6.msg_controllen = 0;
3054 
3055 	hoplimit_space = sizeof (int);
3056 	bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
3057 	    hoplimit_space + _MAX_ALIGNMENT;
3058 
3059 	pktinfo_space = sizeof (struct in6_pktinfo);
3060 	bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
3061 	    pktinfo_space + _MAX_ALIGNMENT;
3062 
3063 	/*
3064 	 * We need to temporarily set the msg6.msg_controllen to bufspace
3065 	 * (we will later trim it to actual length used). This is needed because
3066 	 * CMSG_NXTHDR() uses it to check we have not exceeded the bounds.
3067 	 */
3068 	bufspace += sizeof (struct cmsghdr);
3069 	msg6.msg_controllen = bufspace;
3070 
3071 	msg6.msg_control = (struct cmsghdr *)malloc(bufspace);
3072 	if (msg6.msg_control == NULL) {
3073 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): memory allocation "
3074 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3075 		return (VRRP_ENOMEM);
3076 	}
3077 
3078 	cmsgp = CMSG_FIRSTHDR(&msg6);
3079 
3080 	cmsgp->cmsg_level = IPPROTO_IPV6;
3081 	cmsgp->cmsg_type = IPV6_HOPLIMIT;
3082 	cmsg_datap = CMSG_DATA(cmsgp);
3083 	/* LINTED */
3084 	*(int *)cmsg_datap = VRRP_IP_TTL;
3085 	cmsgp->cmsg_len = cmsg_datap + hoplimit_space - (uchar_t *)cmsgp;
3086 	cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
3087 
3088 	cmsgp->cmsg_level = IPPROTO_IPV6;
3089 	cmsgp->cmsg_type = IPV6_PKTINFO;
3090 	cmsg_datap = CMSG_DATA(cmsgp);
3091 
3092 	/* LINTED */
3093 	pktinfop = (struct in6_pktinfo *)cmsg_datap;
3094 	/*
3095 	 * We don't know if pktinfop->ipi6_addr is aligned properly,
3096 	 * therefore let's use bcopy, instead of assignment.
3097 	 */
3098 	(void) bcopy(&vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
3099 	    &pktinfop->ipi6_addr, sizeof (struct in6_addr));
3100 
3101 	/*
3102 	 *  We can assume pktinfop->ipi6_ifindex is 32 bit aligned.
3103 	 */
3104 	pktinfop->ipi6_ifindex = vr->vvr_vif->vvi_ifindex;
3105 	cmsgp->cmsg_len = cmsg_datap + pktinfo_space - (uchar_t *)cmsgp;
3106 	cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
3107 	msg6.msg_controllen = (char *)cmsgp - (char *)msg6.msg_control;
3108 
3109 	msg6.msg_name = &vrrp_muladdr6;
3110 	msg6.msg_namelen = sizeof (struct sockaddr_in6);
3111 
3112 	iov.iov_base = buf;
3113 	iov.iov_len = plen;
3114 	msg6.msg_iov = &iov;
3115 	msg6.msg_iovlen = 1;
3116 
3117 	if ((len = sendmsg(vr->vvr_vif->vvi_sockfd,
3118 	    (const struct msghdr *)&msg6, 0)) != plen) {
3119 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): sendmsg() failed: "
3120 		    "%s expect %d sent %d", vr->vvr_conf.vvc_name,
3121 		    strerror(errno), plen, len);
3122 		(void) free(msg6.msg_control);
3123 		return (VRRP_ESYS);
3124 	}
3125 
3126 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s) succeed",
3127 	    vr->vvr_conf.vvc_name);
3128 	(void) free(msg6.msg_control);
3129 	return (VRRP_SUCCESS);
3130 }
3131 
3132 /*
3133  * Send the VRRP advertisement packets.
3134  */
3135 static vrrp_err_t
3136 vrrpd_send_adv(vrrp_vr_t *vr, boolean_t zero_pri)
3137 {
3138 	uint64_t buf[(IP_MAXPACKET + 1)/8];
3139 
3140 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv(%s, %s_priority)",
3141 	    vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non_zero");
3142 
3143 	assert(vr->vvr_pif->vvi_pip != NULL);
3144 
3145 	if (vr->vvr_pif->vvi_pip == NULL) {
3146 		vrrp_log(VRRP_DBG0, "vrrpd_send_adv(%s): no primary IP "
3147 		    "address", vr->vvr_conf.vvc_name);
3148 		return (VRRP_EINVAL);
3149 	}
3150 
3151 	if (vr->vvr_conf.vvc_af == AF_INET) {
3152 		return (vrrpd_send_adv_v4(vr, (uchar_t *)buf,
3153 		    sizeof (buf), zero_pri));
3154 	} else {
3155 		return (vrrpd_send_adv_v6(vr, (uchar_t *)buf,
3156 		    sizeof (buf), zero_pri));
3157 	}
3158 }
3159 
3160 static void
3161 vrrpd_process_adv(vrrp_vr_t *vr, vrrp_addr_t *from, vrrp_pkt_t *vp)
3162 {
3163 	vrrp_vr_conf_t *conf = &vr->vvr_conf;
3164 	char		peer[INET6_ADDRSTRLEN];
3165 	char		local[INET6_ADDRSTRLEN];
3166 	int		addr_cmp;
3167 	uint16_t	peer_adver_int;
3168 
3169 	/* LINTED E_CONSTANT_CONDITION */
3170 	VRRPADDR2STR(vr->vvr_conf.vvc_af, from, peer, INET6_ADDRSTRLEN,
3171 	    _B_FALSE);
3172 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s) from %s", conf->vvc_name,
3173 	    peer);
3174 
3175 	if (vr->vvr_state <= VRRP_STATE_INIT) {
3176 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): state: %s, not "
3177 		    "ready", conf->vvc_name, vrrp_state2str(vr->vvr_state));
3178 		return;
3179 	}
3180 
3181 	peer_adver_int = CENTISEC2MSEC(ntohs(vp->vp_rsvd_adver_int) & 0x0fff);
3182 
3183 	/* LINTED E_CONSTANT_CONDITION */
3184 	VRRPADDR2STR(vr->vvr_pif->vvi_af, &vr->vvr_pif->vvi_pip->vip_addr,
3185 	    local, INET6_ADDRSTRLEN, _B_FALSE);
3186 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local/state/pri"
3187 	    "(%s/%s/%d) peer/pri/intv(%s/%d/%d)", conf->vvc_name, local,
3188 	    vrrp_state2str(vr->vvr_state), conf->vvc_pri, peer,
3189 	    vp->vp_prio, peer_adver_int);
3190 
3191 	addr_cmp = ipaddr_cmp(vr->vvr_pif->vvi_af, from,
3192 	    &vr->vvr_pif->vvi_pip->vip_addr);
3193 	if (addr_cmp == 0) {
3194 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local message",
3195 		    conf->vvc_name);
3196 		return;
3197 	} else if (conf->vvc_pri == vp->vp_prio) {
3198 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): peer IP %s is %s"
3199 		    " than the local IP %s", conf->vvc_name, peer,
3200 		    addr_cmp > 0 ? "greater" : "less", local);
3201 	}
3202 
3203 	if (conf->vvc_pri == 255) {
3204 		vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): virtual address "
3205 		    "owner received advertisement from %s", conf->vvc_name,
3206 		    peer);
3207 		return;
3208 	}
3209 
3210 	(void) gettimeofday(&vr->vvr_peer_time, NULL);
3211 	(void) memcpy(&vr->vvr_peer_addr, from, sizeof (vrrp_addr_t));
3212 	vr->vvr_peer_prio = vp->vp_prio;
3213 	vr->vvr_peer_adver_int = peer_adver_int;
3214 
3215 	if (vr->vvr_state == VRRP_STATE_BACKUP) {
3216 		vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
3217 		if ((vp->vp_prio == VRRP_PRIO_ZERO) ||
3218 		    (conf->vvc_preempt == _B_FALSE ||
3219 		    vp->vp_prio >= conf->vvc_pri)) {
3220 			(void) iu_cancel_timer(vrrpd_timerq,
3221 			    vr->vvr_timer_id, NULL);
3222 			if (vp->vp_prio == VRRP_PRIO_ZERO) {
3223 				/* the master stops participating in VRRP */
3224 				vr->vvr_timeout = SKEW_TIME_VR(vr);
3225 			} else {
3226 				vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
3227 			}
3228 			if ((vr->vvr_timer_id = iu_schedule_timer_ms(
3229 			    vrrpd_timerq, vr->vvr_timeout, vrrp_b2m_timeout,
3230 			    vr)) == -1) {
3231 				vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
3232 				    "start vrrp_b2m_timeout(%d) failed",
3233 				    conf->vvc_name, vr->vvr_timeout);
3234 			} else {
3235 				vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
3236 				    "start vrrp_b2m_timeout(%d)",
3237 				    conf->vvc_name, vr->vvr_timeout);
3238 			}
3239 		}
3240 	} else if (vr->vvr_state == VRRP_STATE_MASTER) {
3241 		if (vp->vp_prio == VRRP_PRIO_ZERO) {
3242 			(void) vrrpd_send_adv(vr, _B_FALSE);
3243 			(void) iu_cancel_timer(vrrpd_timerq,
3244 			    vr->vvr_timer_id, NULL);
3245 			if ((vr->vvr_timer_id = iu_schedule_timer_ms(
3246 			    vrrpd_timerq, vr->vvr_timeout, vrrp_adv_timeout,
3247 			    vr)) == -1) {
3248 				vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
3249 				    "start vrrp_adv_timeout(%d) failed",
3250 				    conf->vvc_name, vr->vvr_timeout);
3251 			} else {
3252 				vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
3253 				    "start vrrp_adv_timeout(%d)",
3254 				    conf->vvc_name, vr->vvr_timeout);
3255 			}
3256 		} else if (vp->vp_prio > conf->vvc_pri ||
3257 		    (vp->vp_prio == conf->vvc_pri && addr_cmp > 0)) {
3258 			(void) vrrpd_state_m2b(vr);
3259 		}
3260 	} else {
3261 		assert(_B_FALSE);
3262 	}
3263 }
3264 
3265 static vrrp_err_t
3266 vrrpd_process_vrrp(vrrp_intf_t *pif, vrrp_pkt_t *vp, size_t len,
3267     vrrp_addr_t *from)
3268 {
3269 	vrrp_vr_t	*vr;
3270 	uint8_t		vers_type;
3271 	uint16_t	saved_cksum, cksum;
3272 	char		peer[INET6_ADDRSTRLEN];
3273 
3274 	/* LINTED E_CONSTANT_CONDITION */
3275 	VRRPADDR2STR(pif->vvi_af, from, peer, INET6_ADDRSTRLEN, _B_FALSE);
3276 	vrrp_log(VRRP_DBG0, "vrrpd_process_vrrp(%s) from %s", pif->vvi_ifname,
3277 	    peer);
3278 
3279 	if (len < sizeof (vrrp_pkt_t)) {
3280 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid message "
3281 		    "length %d", len);
3282 		return (VRRP_EINVAL);
3283 	}
3284 
3285 	/*
3286 	 * Verify: VRRP version number and packet type.
3287 	 */
3288 	vers_type = ((vp->vp_vers_type & VRRP_VER_MASK) >> 4);
3289 	if (vers_type != VRRP_VERSION) {
3290 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) unsupported "
3291 		    "version %d", pif->vvi_ifname, vers_type);
3292 		return (VRRP_EINVAL);
3293 	}
3294 
3295 	if (vp->vp_ipnum == 0) {
3296 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): zero IPvX count",
3297 		    pif->vvi_ifname);
3298 		return (VRRP_EINVAL);
3299 	}
3300 
3301 	if (len - sizeof (vrrp_pkt_t) !=
3302 	    vp->vp_ipnum * (pif->vvi_af == AF_INET ? sizeof (struct in_addr) :
3303 	    sizeof (struct in6_addr))) {
3304 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid IPvX count"
3305 		    " %d", pif->vvi_ifname, vp->vp_ipnum);
3306 		return (VRRP_EINVAL);
3307 	}
3308 
3309 	vers_type = (vp->vp_vers_type & VRRP_TYPE_MASK);
3310 
3311 	/*
3312 	 * verify: VRRP checksum. Note that vrrp_cksum returns network byte
3313 	 * order checksum value;
3314 	 */
3315 	saved_cksum = vp->vp_chksum;
3316 	vp->vp_chksum = 0;
3317 	if (pif->vvi_af == AF_INET) {
3318 		cksum = vrrp_cksum4(&from->in4.sin_addr,
3319 		    &vrrp_muladdr4.in4.sin_addr, len, vp);
3320 	} else {
3321 		cksum = vrrp_cksum6(&from->in6.sin6_addr,
3322 		    &vrrp_muladdr6.in6.sin6_addr, len, vp);
3323 	}
3324 
3325 	if (cksum != saved_cksum) {
3326 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) invalid "
3327 		    "checksum: expected/real(0x%x/0x%x)", pif->vvi_ifname,
3328 		    cksum, saved_cksum);
3329 		return (VRRP_EINVAL);
3330 	}
3331 
3332 	if ((vr = vrrpd_lookup_vr_by_vrid(pif->vvi_ifname, vp->vp_vrid,
3333 	    pif->vvi_af)) != NULL && vers_type == VRRP_PKT_ADVERT) {
3334 		vrrpd_process_adv(vr, from, vp);
3335 	} else {
3336 		vrrp_log(VRRP_DBG1, "vrrpd_process_vrrp(%s) VRID(%d/%s) "
3337 		    "not configured", pif->vvi_ifname, vp->vp_vrid,
3338 		    af_str(pif->vvi_af));
3339 	}
3340 	return (VRRP_SUCCESS);
3341 }
3342 
3343 /*
3344  * IPv4 socket, the IPv4 header is included.
3345  */
3346 static vrrp_err_t
3347 vrrpd_process_adv_v4(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
3348 {
3349 	char		abuf[INET6_ADDRSTRLEN];
3350 	struct ip	*ip;
3351 
3352 	vrrp_log(VRRP_DBG0, "vrrpd_process_adv_v4(%s, %d)",
3353 	    pif->vvi_ifname, len);
3354 
3355 	ip = (struct ip *)msgp->msg_iov->iov_base;
3356 
3357 	/* Sanity check */
3358 	if (len < sizeof (struct ip) || len < ntohs(ip->ip_len)) {
3359 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid length "
3360 		    "%d", pif->vvi_ifname, len);
3361 		return (VRRP_EINVAL);
3362 	}
3363 
3364 	assert(ip->ip_v == IPV4_VERSION);
3365 	assert(ip->ip_p == IPPROTO_VRRP);
3366 	assert(msgp->msg_namelen == sizeof (struct sockaddr_in));
3367 
3368 	if (vrrp_muladdr4.in4.sin_addr.s_addr != ip->ip_dst.s_addr) {
3369 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3370 		    "destination %s", pif->vvi_ifname,
3371 		    inet_ntop(pif->vvi_af, &(ip->ip_dst), abuf, sizeof (abuf)));
3372 		return (VRRP_EINVAL);
3373 	}
3374 
3375 	if (ip->ip_ttl != VRRP_IP_TTL) {
3376 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3377 		    "ttl %d", pif->vvi_ifname, ip->ip_ttl);
3378 		return (VRRP_EINVAL);
3379 	}
3380 
3381 	/*
3382 	 * Note that the ip_len contains only the IP payload length.
3383 	 */
3384 	return (vrrpd_process_vrrp(pif,
3385 	    /* LINTED E_BAD_PTR_CAST_ALIGN */
3386 	    (vrrp_pkt_t *)((char *)ip + ip->ip_hl * 4), ntohs(ip->ip_len),
3387 	    (vrrp_addr_t *)msgp->msg_name));
3388 }
3389 
3390 /*
3391  * IPv6 socket, check the ancillary_data.
3392  */
3393 static vrrp_err_t
3394 vrrpd_process_adv_v6(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
3395 {
3396 	struct cmsghdr		*cmsgp;
3397 	uchar_t			*cmsg_datap;
3398 	struct in6_pktinfo	*pktinfop;
3399 	char			abuf[INET6_ADDRSTRLEN];
3400 	int			ttl;
3401 
3402 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv_v6(%s, %d)",
3403 	    pif->vvi_ifname, len);
3404 
3405 	/* Sanity check */
3406 	if (len < sizeof (vrrp_pkt_t)) {
3407 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v6(%s): invalid length "
3408 		    "%d", pif->vvi_ifname, len);
3409 		return (VRRP_EINVAL);
3410 	}
3411 
3412 	assert(msgp->msg_namelen == sizeof (struct sockaddr_in6));
3413 
3414 	for (cmsgp = CMSG_FIRSTHDR(msgp); cmsgp != NULL;
3415 	    cmsgp = CMSG_NXTHDR(msgp, cmsgp)) {
3416 		assert(cmsgp->cmsg_level == IPPROTO_IPV6);
3417 		cmsg_datap = CMSG_DATA(cmsgp);
3418 
3419 		switch (cmsgp->cmsg_type) {
3420 		case IPV6_HOPLIMIT:
3421 			/* LINTED E_BAD_PTR_CAST_ALIGN */
3422 			if ((ttl = *(int *)cmsg_datap) == VRRP_IP_TTL)
3423 				break;
3424 
3425 			vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3426 			    "ttl %d", pif->vvi_ifname, ttl);
3427 			return (VRRP_EINVAL);
3428 		case IPV6_PKTINFO:
3429 			/* LINTED E_BAD_PTR_CAST_ALIGN */
3430 			pktinfop = (struct in6_pktinfo *)cmsg_datap;
3431 			if (IN6_ARE_ADDR_EQUAL(&pktinfop->ipi6_addr,
3432 			    &vrrp_muladdr6.in6.sin6_addr)) {
3433 				break;
3434 			}
3435 
3436 			vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3437 			    "destination %s", pif->vvi_ifname,
3438 			    inet_ntop(pif->vvi_af, &pktinfop->ipi6_addr, abuf,
3439 			    sizeof (abuf)));
3440 			return (VRRP_EINVAL);
3441 		}
3442 	}
3443 
3444 	return (vrrpd_process_vrrp(pif, msgp->msg_iov->iov_base, len,
3445 	    msgp->msg_name));
3446 }
3447 
3448 /* ARGSUSED */
3449 static void
3450 vrrpd_sock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
3451     void *arg)
3452 {
3453 	struct msghdr		msg;
3454 	vrrp_addr_t		from;
3455 	uint64_t		buf[(IP_MAXPACKET + 1)/8];
3456 	uint64_t		ancillary_data[(IP_MAXPACKET + 1)/8];
3457 	vrrp_intf_t		*pif = arg;
3458 	int			af = pif->vvi_af;
3459 	int			len;
3460 	struct iovec		iov;
3461 
3462 	vrrp_log(VRRP_DBG1, "vrrpd_sock_handler(%s)", pif->vvi_ifname);
3463 
3464 	msg.msg_name = (struct sockaddr *)&from;
3465 	msg.msg_namelen = (af == AF_INET) ? sizeof (struct sockaddr_in) :
3466 	    sizeof (struct sockaddr_in6);
3467 	iov.iov_base = (char *)buf;
3468 	iov.iov_len = sizeof (buf);
3469 	msg.msg_iov = &iov;
3470 	msg.msg_iovlen = 1;
3471 	msg.msg_control = ancillary_data;
3472 	msg.msg_controllen = sizeof (ancillary_data);
3473 
3474 	if ((len = recvmsg(s, &msg, 0)) == -1) {
3475 		vrrp_log(VRRP_ERR, "vrrpd_sock_handler() recvmsg(%s) "
3476 		    "failed: %s", pif->vvi_ifname, strerror(errno));
3477 		return;
3478 	}
3479 
3480 	/*
3481 	 * Ignore packets whose control buffers that don't fit
3482 	 */
3483 	if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
3484 		vrrp_log(VRRP_ERR, "vrrpd_sock_handler() %s buffer not "
3485 		    "big enough", pif->vvi_ifname);
3486 		return;
3487 	}
3488 
3489 	if (af == AF_INET)
3490 		(void) vrrpd_process_adv_v4(pif, &msg, len);
3491 	else
3492 		(void) vrrpd_process_adv_v6(pif, &msg, len);
3493 }
3494 
3495 /*
3496  * Create the socket which is used to receive VRRP packets. Virtual routers
3497  * that configured on the same physical interface share the same socket.
3498  */
3499 static vrrp_err_t
3500 vrrpd_init_rxsock(vrrp_vr_t *vr)
3501 {
3502 	vrrp_intf_t *pif;	/* Physical interface used to recv packets */
3503 	struct group_req greq;
3504 	struct sockaddr_storage *muladdr;
3505 	int af, proto;
3506 	int on = 1;
3507 	vrrp_err_t err = VRRP_SUCCESS;
3508 
3509 	vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s)", vr->vvr_conf.vvc_name);
3510 
3511 	/*
3512 	 * The RX sockets may already been initialized.
3513 	 */
3514 	if ((pif = vr->vvr_pif) != NULL) {
3515 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) already done on %s",
3516 		    vr->vvr_conf.vvc_name, pif->vvi_ifname);
3517 		assert(pif->vvi_sockfd != -1);
3518 		return (VRRP_SUCCESS);
3519 	}
3520 
3521 	/*
3522 	 * If no IP addresses configured on the primary interface,
3523 	 * return failure.
3524 	 */
3525 	af = vr->vvr_conf.vvc_af;
3526 	pif = vrrpd_lookup_if(vr->vvr_conf.vvc_link, af);
3527 	if (pif == NULL) {
3528 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): no IP address "
3529 		    "over %s/%s", vr->vvr_conf.vvc_name,
3530 		    vr->vvr_conf.vvc_link, af_str(af));
3531 		return (VRRP_ENOPRIM);
3532 	}
3533 
3534 	proto = (af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6);
3535 	if (pif->vvi_nvr++ == 0) {
3536 		assert(pif->vvi_sockfd < 0);
3537 		pif->vvi_sockfd = socket(af, SOCK_RAW, IPPROTO_VRRP);
3538 		if (pif->vvi_sockfd < 0) {
3539 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): socket() "
3540 			    "failed %s", vr->vvr_conf.vvc_name,
3541 			    strerror(errno));
3542 			err = VRRP_ESYS;
3543 			goto done;
3544 		}
3545 
3546 		/*
3547 		 * Join the multicast group to receive VRRP packets.
3548 		 */
3549 		if (af == AF_INET) {
3550 			muladdr = (struct sockaddr_storage *)
3551 			    (void *)&vrrp_muladdr4;
3552 		} else {
3553 			muladdr = (struct sockaddr_storage *)
3554 			    (void *)&vrrp_muladdr6;
3555 		}
3556 
3557 		greq.gr_interface = pif->vvi_ifindex;
3558 		(void) memcpy(&greq.gr_group, muladdr,
3559 		    sizeof (struct sockaddr_storage));
3560 		if (setsockopt(pif->vvi_sockfd, proto, MCAST_JOIN_GROUP, &greq,
3561 		    sizeof (struct group_req)) < 0) {
3562 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3563 			    "join_group(%d) failed: %s", vr->vvr_conf.vvc_name,
3564 			    pif->vvi_ifindex, strerror(errno));
3565 			err = VRRP_ESYS;
3566 			goto done;
3567 		} else {
3568 			vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): "
3569 			    "join_group(%d) succeeded", vr->vvr_conf.vvc_name,
3570 			    pif->vvi_ifindex);
3571 		}
3572 
3573 		/*
3574 		 * Unlike IPv4, the IPv6 raw socket does not pass the IP header
3575 		 * when a packet is received. Call setsockopt() to receive such
3576 		 * information.
3577 		 */
3578 		if (af == AF_INET6) {
3579 			/*
3580 			 * Enable receipt of destination address info
3581 			 */
3582 			if (setsockopt(pif->vvi_sockfd, proto, IPV6_RECVPKTINFO,
3583 			    (char *)&on, sizeof (on)) < 0) {
3584 				vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3585 				    "enable recvpktinfo failed: %s",
3586 				    vr->vvr_conf.vvc_name, strerror(errno));
3587 				err = VRRP_ESYS;
3588 				goto done;
3589 			}
3590 
3591 			/*
3592 			 * Enable receipt of hoplimit info
3593 			 */
3594 			if (setsockopt(pif->vvi_sockfd, proto,
3595 			    IPV6_RECVHOPLIMIT, (char *)&on, sizeof (on)) < 0) {
3596 				vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3597 				    "enable recvhoplimit failed: %s",
3598 				    vr->vvr_conf.vvc_name, strerror(errno));
3599 				err = VRRP_ESYS;
3600 				goto done;
3601 			}
3602 		}
3603 
3604 		if ((pif->vvi_eid = iu_register_event(vrrpd_eh,
3605 		    pif->vvi_sockfd, POLLIN, vrrpd_sock_handler, pif)) == -1) {
3606 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3607 			    "iu_register_event() failed",
3608 			    vr->vvr_conf.vvc_name);
3609 			err = VRRP_ESYS;
3610 			goto done;
3611 		}
3612 	} else {
3613 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) over %s already "
3614 		    "done %d", vr->vvr_conf.vvc_name, pif->vvi_ifname,
3615 		    pif->vvi_nvr);
3616 		assert(IS_PRIMARY_INTF(pif));
3617 	}
3618 
3619 done:
3620 	vr->vvr_pif = pif;
3621 	if (err != VRRP_SUCCESS)
3622 		vrrpd_fini_rxsock(vr);
3623 
3624 	return (err);
3625 }
3626 
3627 /*
3628  * Delete the socket which is used to receive VRRP packets for the given
3629  * VRRP router. Since all virtual routers that configured on the same
3630  * physical interface share the same socket, the socket is only closed
3631  * when the last VRRP router share this socket is deleted.
3632  */
3633 static void
3634 vrrpd_fini_rxsock(vrrp_vr_t *vr)
3635 {
3636 	vrrp_intf_t	*pif = vr->vvr_pif;
3637 
3638 	vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s)", vr->vvr_conf.vvc_name);
3639 
3640 	if (pif == NULL)
3641 		return;
3642 
3643 	if (--pif->vvi_nvr == 0) {
3644 		vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s",
3645 		    vr->vvr_conf.vvc_name, pif->vvi_ifname);
3646 		(void) iu_unregister_event(vrrpd_eh, pif->vvi_eid, NULL);
3647 		(void) close(pif->vvi_sockfd);
3648 		pif->vvi_pip = NULL;
3649 		pif->vvi_sockfd = -1;
3650 		pif->vvi_eid = -1;
3651 	} else {
3652 		vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s %d",
3653 		    vr->vvr_conf.vvc_name, pif->vvi_ifname, pif->vvi_nvr);
3654 	}
3655 	vr->vvr_pif = NULL;
3656 }
3657 
3658 /*
3659  * Create the socket which is used to send VRRP packets. Further, set
3660  * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
3661  */
3662 static vrrp_err_t
3663 vrrpd_init_txsock(vrrp_vr_t *vr)
3664 {
3665 	int		af;
3666 	vrrp_intf_t	*vif;
3667 	vrrp_err_t	err;
3668 
3669 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s)", vr->vvr_conf.vvc_name);
3670 
3671 	if (vr->vvr_vif != NULL) {
3672 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) already done on %s",
3673 		    vr->vvr_conf.vvc_name, vr->vvr_vif->vvi_ifname);
3674 		return (VRRP_SUCCESS);
3675 	}
3676 
3677 	af = vr->vvr_conf.vvc_af;
3678 	if ((vif = vrrpd_lookup_if(vr->vvr_vnic, af)) == NULL) {
3679 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) no IP address over "
3680 		    "%s/%s", vr->vvr_conf.vvc_name, vr->vvr_vnic, af_str(af));
3681 		return (VRRP_ENOVIRT);
3682 	}
3683 
3684 	vr->vvr_vif = vif;
3685 	if (vr->vvr_conf.vvc_af == AF_INET)
3686 		err = vrrpd_init_txsock_v4(vr);
3687 	else
3688 		err = vrrpd_init_txsock_v6(vr);
3689 
3690 	if (err != VRRP_SUCCESS)
3691 		goto done;
3692 
3693 	/*
3694 	 * The interface should start with IFF_NOACCEPT flag not set, only
3695 	 * call this function when the VRRP router requires IFF_NOACCEPT.
3696 	 */
3697 	if (!vr->vvr_conf.vvc_accept)
3698 		err = vrrpd_set_noaccept(vr, _B_TRUE);
3699 
3700 done:
3701 	if (err != VRRP_SUCCESS) {
3702 		(void) close(vif->vvi_sockfd);
3703 		vif->vvi_sockfd = -1;
3704 		vr->vvr_vif = NULL;
3705 	}
3706 
3707 	return (err);
3708 }
3709 
3710 /*
3711  * Create the IPv4 socket which is used to send VRRP packets. Note that
3712  * the destination MAC address of VRRP advertisement must be the virtual
3713  * MAC address, so we specify the output interface to be the specific VNIC.
3714  */
3715 static vrrp_err_t
3716 vrrpd_init_txsock_v4(vrrp_vr_t *vr)
3717 {
3718 	vrrp_intf_t *vif;	/* VNIC interface used to send packets */
3719 	vrrp_ip_t *vip;		/* The first IP over the VNIC */
3720 	int on = 1;
3721 	char off = 0;
3722 	vrrp_err_t err = VRRP_SUCCESS;
3723 	char abuf[INET6_ADDRSTRLEN];
3724 
3725 	vif = vr->vvr_vif;
3726 	assert(vr->vvr_conf.vvc_af == AF_INET);
3727 	assert(vif != NULL);
3728 
3729 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) over %s",
3730 	    vr->vvr_conf.vvc_name, vif->vvi_ifname);
3731 
3732 	if (vif->vvi_sockfd != -1) {
3733 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) already done "
3734 		    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
3735 		return (VRRP_SUCCESS);
3736 	}
3737 
3738 	vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
3739 	if (vif->vvi_sockfd < 0) {
3740 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): socket() "
3741 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3742 		err = VRRP_ESYS;
3743 		goto done;
3744 	}
3745 
3746 	/*
3747 	 * Include the IP header, so that we can specify the IP address/ttl.
3748 	 */
3749 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_HDRINCL, (char *)&on,
3750 	    sizeof (on)) < 0) {
3751 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): ip_hdrincl "
3752 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3753 		err = VRRP_ESYS;
3754 		goto done;
3755 	}
3756 
3757 	/*
3758 	 * Disable multicast loopback.
3759 	 */
3760 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_LOOP, &off,
3761 	    sizeof (char)) == -1) {
3762 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): disable "
3763 		    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
3764 		    strerror(errno));
3765 		err = VRRP_ESYS;
3766 		goto done;
3767 	}
3768 
3769 	vip = TAILQ_FIRST(&vif->vvi_iplist);
3770 	/* LINTED E_CONSTANT_CONDITION */
3771 	VRRPADDR2STR(vif->vvi_af, &vip->vip_addr, abuf, INET6_ADDRSTRLEN,
3772 	    _B_FALSE);
3773 
3774 	/*
3775 	 * Set the output interface to send the VRRP packet.
3776 	 */
3777 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_IF,
3778 	    &vip->vip_addr.in4.sin_addr, sizeof (struct in_addr)) < 0) {
3779 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
3780 		    "failed: %s", vr->vvr_conf.vvc_name, abuf, strerror(errno));
3781 		err = VRRP_ESYS;
3782 	} else {
3783 		vrrp_log(VRRP_DBG0, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
3784 		    "succeed", vr->vvr_conf.vvc_name, abuf);
3785 	}
3786 
3787 done:
3788 	if (err != VRRP_SUCCESS) {
3789 		(void) close(vif->vvi_sockfd);
3790 		vif->vvi_sockfd = -1;
3791 	}
3792 
3793 	return (err);
3794 }
3795 
3796 /*
3797  * Create the IPv6 socket which is used to send VRRP packets. Note that
3798  * the destination must be the virtual MAC address, so we specify the output
3799  * interface to be the specific VNIC.
3800  */
3801 static vrrp_err_t
3802 vrrpd_init_txsock_v6(vrrp_vr_t *vr)
3803 {
3804 	vrrp_intf_t *vif;	/* VNIC interface used to send packets */
3805 	int off = 0, ttl = VRRP_IP_TTL;
3806 	vrrp_err_t err = VRRP_SUCCESS;
3807 
3808 	vif = vr->vvr_vif;
3809 	assert(vr->vvr_conf.vvc_af == AF_INET6);
3810 	assert(vif != NULL);
3811 
3812 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) over %s",
3813 	    vr->vvr_conf.vvc_name, vif->vvi_ifname);
3814 
3815 	if (vif->vvi_sockfd != -1) {
3816 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) already done "
3817 		    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
3818 		return (VRRP_SUCCESS);
3819 	}
3820 
3821 	vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
3822 	if (vif->vvi_sockfd < 0) {
3823 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): socket() "
3824 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3825 		err = VRRP_ESYS;
3826 		goto done;
3827 	}
3828 
3829 	/*
3830 	 * Disable multicast loopback.
3831 	 */
3832 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
3833 	    &off, sizeof (int)) == -1) {
3834 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): disable "
3835 		    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
3836 		    strerror(errno));
3837 		err = VRRP_ESYS;
3838 		goto done;
3839 	}
3840 
3841 	/*
3842 	 * Set the multicast TTL.
3843 	 */
3844 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
3845 	    &ttl, sizeof (int)) == -1) {
3846 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): enable "
3847 		    "multicast_hops %d failed: %s", vr->vvr_conf.vvc_name,
3848 		    ttl, strerror(errno));
3849 		err = VRRP_ESYS;
3850 		goto done;
3851 	}
3852 
3853 	/*
3854 	 * Set the output interface to send the VRRP packet.
3855 	 */
3856 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_IF,
3857 	    &vif->vvi_ifindex, sizeof (uint32_t)) < 0) {
3858 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): multicast_if(%d) "
3859 		    "failed: %s", vr->vvr_conf.vvc_name, vif->vvi_ifindex,
3860 		    strerror(errno));
3861 		err = VRRP_ESYS;
3862 	} else {
3863 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s): multicast_if(%d)"
3864 		    " succeed", vr->vvr_conf.vvc_name, vif->vvi_ifindex);
3865 	}
3866 
3867 done:
3868 	if (err != VRRP_SUCCESS) {
3869 		(void) close(vif->vvi_sockfd);
3870 		vif->vvi_sockfd = -1;
3871 	}
3872 
3873 	return (err);
3874 }
3875 
3876 /*
3877  * Delete the socket which is used to send VRRP packets. Further, clear
3878  * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
3879  */
3880 static void
3881 vrrpd_fini_txsock(vrrp_vr_t *vr)
3882 {
3883 	vrrp_intf_t *vif = vr->vvr_vif;
3884 
3885 	vrrp_log(VRRP_DBG1, "vrrpd_fini_txsock(%s)", vr->vvr_conf.vvc_name);
3886 
3887 	if (vif != NULL) {
3888 		if (!vr->vvr_conf.vvc_accept)
3889 			(void) vrrpd_set_noaccept(vr, _B_FALSE);
3890 		(void) close(vif->vvi_sockfd);
3891 		vif->vvi_sockfd = -1;
3892 		vr->vvr_vif = NULL;
3893 	}
3894 }
3895 
3896 /*
3897  * Given the the pseudo header cksum value (sum), caculate the cksum with
3898  * the rest of VRRP packet.
3899  */
3900 static uint16_t
3901 in_cksum(int sum, uint16_t plen, void *p)
3902 {
3903 	int nleft;
3904 	uint16_t *w;
3905 	uint16_t answer;
3906 	uint16_t odd_byte = 0;
3907 
3908 	nleft = plen;
3909 	w = (uint16_t *)p;
3910 	while (nleft > 1) {
3911 		sum += *w++;
3912 		nleft -= 2;
3913 	}
3914 
3915 	/* mop up an odd byte, if necessary */
3916 	if (nleft == 1) {
3917 		*(uchar_t *)(&odd_byte) = *(uchar_t *)w;
3918 		sum += odd_byte;
3919 	}
3920 
3921 	/*
3922 	 * add back carry outs from top 16 bits to low 16 bits
3923 	 */
3924 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
3925 	sum += (sum >> 16);			/* add carry */
3926 	answer = ~sum;				/* truncate to 16 bits */
3927 	return (answer == 0 ? ~0 : answer);
3928 }
3929 
3930 /* Pseudo header for v4 */
3931 struct pshv4 {
3932 	struct in_addr	ph4_src;
3933 	struct in_addr	ph4_dst;
3934 	uint8_t		ph4_zero;	/* always zero */
3935 	uint8_t		ph4_protocol;	/* protocol used, IPPROTO_VRRP */
3936 	uint16_t	ph4_len;	/* VRRP payload len */
3937 };
3938 
3939 /*
3940  * Checksum routine for VRRP checksum. Note that plen is the upper-layer
3941  * packet length (in the host byte order), and both IP source and destination
3942  * addresses are in the network byte order.
3943  */
3944 static uint16_t
3945 vrrp_cksum4(struct in_addr *src, struct in_addr *dst, uint16_t plen,
3946     vrrp_pkt_t *vp)
3947 {
3948 	struct pshv4 ph4;
3949 	int nleft;
3950 	uint16_t *w;
3951 	int sum = 0;
3952 
3953 	ph4.ph4_src = *src;
3954 	ph4.ph4_dst = *dst;
3955 	ph4.ph4_zero = 0;
3956 	ph4.ph4_protocol = IPPROTO_VRRP;
3957 	ph4.ph4_len = htons(plen);
3958 
3959 	/*
3960 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
3961 	 *  we add sequential 16 bit words to it, and at the end, fold
3962 	 *  back all the carry bits from the top 16 bits into the lower
3963 	 *  16 bits.
3964 	 */
3965 	nleft = sizeof (struct pshv4);
3966 	w = (uint16_t *)&ph4;
3967 	while (nleft > 0) {
3968 		sum += *w++;
3969 		nleft -= 2;
3970 	}
3971 
3972 	return (in_cksum(sum, plen, vp));
3973 }
3974 
3975 /* Pseudo header for v6 */
3976 struct pshv6 {
3977 	struct in6_addr	ph6_src;
3978 	struct in6_addr	ph6_dst;
3979 	uint32_t	ph6_len;	/* VRRP payload len */
3980 	uint32_t	ph6_zero : 24,
3981 			ph6_protocol : 8; /* protocol used, IPPROTO_VRRP */
3982 };
3983 
3984 /*
3985  * Checksum routine for VRRP checksum. Note that plen is the upper-layer
3986  * packet length (in the host byte order), and both IP source and destination
3987  * addresses are in the network byte order.
3988  */
3989 static uint16_t
3990 vrrp_cksum6(struct in6_addr *src, struct in6_addr *dst, uint16_t plen,
3991     vrrp_pkt_t *vp)
3992 {
3993 	struct pshv6 ph6;
3994 	int nleft;
3995 	uint16_t *w;
3996 	int sum = 0;
3997 
3998 	ph6.ph6_src = *src;
3999 	ph6.ph6_dst = *dst;
4000 	ph6.ph6_zero = 0;
4001 	ph6.ph6_protocol = IPPROTO_VRRP;
4002 	ph6.ph6_len = htonl((uint32_t)plen);
4003 
4004 	/*
4005 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
4006 	 *  we add sequential 16 bit words to it, and at the end, fold
4007 	 *  back all the carry bits from the top 16 bits into the lower
4008 	 *  16 bits.
4009 	 */
4010 	nleft = sizeof (struct pshv6);
4011 	w = (uint16_t *)&ph6;
4012 	while (nleft > 0) {
4013 		sum += *w++;
4014 		nleft -= 2;
4015 	}
4016 
4017 	return (in_cksum(sum, plen, vp));
4018 }
4019 
4020 vrrp_err_t
4021 vrrpd_state_i2m(vrrp_vr_t *vr)
4022 {
4023 	vrrp_err_t	err;
4024 
4025 	vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s)", vr->vvr_conf.vvc_name);
4026 
4027 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_MASTER, vr);
4028 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4029 		return (err);
4030 
4031 	(void) vrrpd_send_adv(vr, _B_FALSE);
4032 
4033 	vr->vvr_err = VRRP_SUCCESS;
4034 	vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
4035 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4036 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4037 		vrrp_log(VRRP_ERR, "vrrpd_state_i2m(): unable to start timer");
4038 		return (VRRP_ESYS);
4039 	} else {
4040 		vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s): start "
4041 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4042 		    vr->vvr_timeout);
4043 	}
4044 	return (VRRP_SUCCESS);
4045 }
4046 
4047 vrrp_err_t
4048 vrrpd_state_i2b(vrrp_vr_t *vr)
4049 {
4050 	vrrp_err_t	err;
4051 
4052 	vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s)", vr->vvr_conf.vvc_name);
4053 
4054 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_BACKUP, vr);
4055 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4056 		return (err);
4057 
4058 	/*
4059 	 * Reinitialize the Master advertisement interval to be the configured
4060 	 * value.
4061 	 */
4062 	vr->vvr_err = VRRP_SUCCESS;
4063 	vr->vvr_master_adver_int = vr->vvr_conf.vvc_adver_int;
4064 	vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
4065 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4066 	    vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
4067 		vrrp_log(VRRP_ERR, "vrrpd_state_i2b(): unable to set timer");
4068 		return (VRRP_ESYS);
4069 	} else {
4070 		vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s): start "
4071 		    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
4072 		    vr->vvr_timeout);
4073 	}
4074 	return (VRRP_SUCCESS);
4075 }
4076 
4077 void
4078 vrrpd_state_m2i(vrrp_vr_t *vr)
4079 {
4080 	vrrp_log(VRRP_DBG1, "vrrpd_state_m2i(%s)", vr->vvr_conf.vvc_name);
4081 
4082 	vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_INIT, vr);
4083 	(void) vrrpd_virtualip_update(vr, _B_TRUE);
4084 	bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
4085 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4086 }
4087 
4088 void
4089 vrrpd_state_b2i(vrrp_vr_t *vr)
4090 {
4091 	vrrp_log(VRRP_DBG1, "vrrpd_state_b2i(%s)", vr->vvr_conf.vvc_name);
4092 
4093 	bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
4094 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4095 	vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_INIT, vr);
4096 	(void) vrrpd_virtualip_update(vr, _B_TRUE);
4097 }
4098 
4099 /* ARGSUSED */
4100 static void
4101 vrrp_b2m_timeout(iu_tq_t *tq, void *arg)
4102 {
4103 	vrrp_vr_t *vr = (vrrp_vr_t *)arg;
4104 
4105 	vrrp_log(VRRP_DBG1, "vrrp_b2m_timeout(%s)", vr->vvr_conf.vvc_name);
4106 	(void) vrrpd_state_b2m(vr);
4107 }
4108 
4109 /* ARGSUSED */
4110 static void
4111 vrrp_adv_timeout(iu_tq_t *tq, void *arg)
4112 {
4113 	vrrp_vr_t *vr = (vrrp_vr_t *)arg;
4114 
4115 	vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s)", vr->vvr_conf.vvc_name);
4116 
4117 	(void) vrrpd_send_adv(vr, _B_FALSE);
4118 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4119 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4120 		vrrp_log(VRRP_ERR, "vrrp_adv_timeout(%s): start timer failed",
4121 		    vr->vvr_conf.vvc_name);
4122 	} else {
4123 		vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s): start "
4124 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4125 		    vr->vvr_timeout);
4126 	}
4127 }
4128 
4129 vrrp_err_t
4130 vrrpd_state_b2m(vrrp_vr_t *vr)
4131 {
4132 	vrrp_err_t	err;
4133 
4134 	vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s)", vr->vvr_conf.vvc_name);
4135 
4136 	vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_MASTER, vr);
4137 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4138 		return (err);
4139 	(void) vrrpd_send_adv(vr, _B_FALSE);
4140 
4141 	vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
4142 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4143 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4144 		vrrp_log(VRRP_ERR, "vrrpd_state_b2m(%s): start timer failed",
4145 		    vr->vvr_conf.vvc_name);
4146 		return (VRRP_ESYS);
4147 	} else {
4148 		vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s): start "
4149 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4150 		    vr->vvr_timeout);
4151 	}
4152 	return (VRRP_SUCCESS);
4153 }
4154 
4155 vrrp_err_t
4156 vrrpd_state_m2b(vrrp_vr_t *vr)
4157 {
4158 	vrrp_err_t	err;
4159 
4160 	vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s)", vr->vvr_conf.vvc_name);
4161 
4162 	vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_BACKUP, vr);
4163 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4164 		return (err);
4165 
4166 	/*
4167 	 * Cancel the adver_timer.
4168 	 */
4169 	vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
4170 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4171 	vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
4172 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4173 	    vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
4174 		vrrp_log(VRRP_ERR, "vrrpd_state_m2b(%s): start timer failed",
4175 		    vr->vvr_conf.vvc_name);
4176 	} else {
4177 		vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s) start "
4178 		    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
4179 		    vr->vvr_timeout);
4180 	}
4181 	return (VRRP_SUCCESS);
4182 }
4183 
4184 /*
4185  * Set the IFF_NOACCESS flag on the VNIC interface of the VRRP router
4186  * based on its access mode.
4187  */
4188 static vrrp_err_t
4189 vrrpd_set_noaccept(vrrp_vr_t *vr, boolean_t on)
4190 {
4191 	vrrp_intf_t *vif = vr->vvr_vif;
4192 	uint64_t curr_flags;
4193 	struct lifreq lifr;
4194 	int s;
4195 
4196 	vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
4197 	    vr->vvr_conf.vvc_name, on ? "on" : "off");
4198 
4199 	/*
4200 	 * Possibly no virtual address exists on this VRRP router yet.
4201 	 */
4202 	if (vif == NULL)
4203 		return (VRRP_SUCCESS);
4204 
4205 	vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
4206 	    vif->vvi_ifname, vrrp_state2str(vr->vvr_state));
4207 
4208 	s = (vif->vvi_af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
4209 	(void) strncpy(lifr.lifr_name, vif->vvi_ifname,
4210 	    sizeof (lifr.lifr_name));
4211 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
4212 		if (errno != ENXIO && errno != ENOENT) {
4213 			vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(): "
4214 			    "SIOCGLIFFLAGS on %s failed: %s",
4215 			    vif->vvi_ifname, strerror(errno));
4216 		}
4217 		return (VRRP_ESYS);
4218 	}
4219 
4220 	curr_flags = lifr.lifr_flags;
4221 	if (on)
4222 		lifr.lifr_flags |= IFF_NOACCEPT;
4223 	else
4224 		lifr.lifr_flags &= ~IFF_NOACCEPT;
4225 
4226 	if (lifr.lifr_flags != curr_flags) {
4227 		if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
4228 			if (errno != ENXIO && errno != ENOENT) {
4229 				vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(%s): "
4230 				    "SIOCSLIFFLAGS 0x%llx on %s failed: %s",
4231 				    on ? "no_accept" : "accept",
4232 				    lifr.lifr_flags, vif->vvi_ifname,
4233 				    strerror(errno));
4234 			}
4235 			return (VRRP_ESYS);
4236 		}
4237 	}
4238 	return (VRRP_SUCCESS);
4239 }
4240 
4241 static vrrp_err_t
4242 vrrpd_virtualip_updateone(vrrp_intf_t *vif, vrrp_ip_t *ip, boolean_t checkonly)
4243 {
4244 	vrrp_state_t	state = vif->vvi_vr_state;
4245 	struct lifreq	lifr;
4246 	char		abuf[INET6_ADDRSTRLEN];
4247 	int		af = vif->vvi_af;
4248 	uint64_t	curr_flags;
4249 	int		s;
4250 
4251 	assert(IS_VIRTUAL_INTF(vif));
4252 
4253 	/* LINTED E_CONSTANT_CONDITION */
4254 	VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
4255 	vrrp_log(VRRP_DBG1, "vrrpd_virtualip_updateone(%s, %s%s)",
4256 	    vif->vvi_ifname, abuf, checkonly ? ", checkonly" : "");
4257 
4258 	s = (af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
4259 	(void) strncpy(lifr.lifr_name, ip->vip_lifname,
4260 	    sizeof (lifr.lifr_name));
4261 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
4262 		if (errno != ENXIO && errno != ENOENT) {
4263 			vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s): "
4264 			    "SIOCGLIFFLAGS on %s/%s failed: %s",
4265 			    vif->vvi_ifname, lifr.lifr_name, abuf,
4266 			    strerror(errno));
4267 		}
4268 		return (VRRP_ESYS);
4269 	}
4270 
4271 	curr_flags = lifr.lifr_flags;
4272 	if (state == VRRP_STATE_MASTER)
4273 		lifr.lifr_flags |= IFF_UP;
4274 	else
4275 		lifr.lifr_flags &= ~IFF_UP;
4276 
4277 	if (lifr.lifr_flags == curr_flags)
4278 		return (VRRP_SUCCESS);
4279 
4280 	if (checkonly) {
4281 		vrrp_log(VRRP_ERR, "VRRP virtual IP %s/%s was brought %s",
4282 		    ip->vip_lifname, abuf,
4283 		    state == VRRP_STATE_MASTER ? "down" : "up");
4284 		return (VRRP_ESYS);
4285 	} else if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
4286 		if (errno != ENXIO && errno != ENOENT) {
4287 			vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s, %s): "
4288 			    "bring %s %s/%s failed: %s",
4289 			    vif->vvi_ifname, vrrp_state2str(state),
4290 			    state == VRRP_STATE_MASTER ? "up" : "down",
4291 			    ip->vip_lifname, abuf, strerror(errno));
4292 		}
4293 		return (VRRP_ESYS);
4294 	}
4295 	return (VRRP_SUCCESS);
4296 }
4297 
4298 static vrrp_err_t
4299 vrrpd_virtualip_update(vrrp_vr_t *vr, boolean_t checkonly)
4300 {
4301 	vrrp_state_t		state;
4302 	vrrp_intf_t		*vif = vr->vvr_vif;
4303 	vrrp_ip_t		*ip, *nextip;
4304 	char			abuf[INET6_ADDRSTRLEN];
4305 	vrrp_err_t		err;
4306 
4307 	vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update(%s, %s, %s)%s",
4308 	    vr->vvr_conf.vvc_name, vrrp_state2str(vr->vvr_state),
4309 	    vif->vvi_ifname, checkonly ? " checkonly" : "");
4310 
4311 	state = vr->vvr_state;
4312 	assert(vif != NULL);
4313 	assert(IS_VIRTUAL_INTF(vif));
4314 	assert(vif->vvi_vr_state != state);
4315 	vif->vvi_vr_state = state;
4316 	for (ip = TAILQ_FIRST(&vif->vvi_iplist); ip != NULL; ip = nextip) {
4317 		nextip = TAILQ_NEXT(ip, vip_next);
4318 		err = vrrpd_virtualip_updateone(vif, ip, _B_FALSE);
4319 		if (!checkonly && err != VRRP_SUCCESS) {
4320 			/* LINTED E_CONSTANT_CONDITION */
4321 			VRRPADDR2STR(vif->vvi_af, &ip->vip_addr, abuf,
4322 			    INET6_ADDRSTRLEN, _B_FALSE);
4323 			vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update() update "
4324 			    "%s over %s failed", abuf, vif->vvi_ifname);
4325 			vrrpd_delete_ip(vif, ip);
4326 		}
4327 	}
4328 
4329 	/*
4330 	 * The IP address is deleted when it is failed to be brought
4331 	 * up. If no IP addresses are left, delete this interface.
4332 	 */
4333 	if (!checkonly && TAILQ_EMPTY(&vif->vvi_iplist)) {
4334 		vrrp_log(VRRP_DBG0, "vrrpd_virtualip_update(): "
4335 		    "no IP left over %s", vif->vvi_ifname);
4336 		vrrpd_delete_if(vif, _B_TRUE);
4337 		return (VRRP_ENOVIRT);
4338 	}
4339 	return (VRRP_SUCCESS);
4340 }
4341 
4342 void
4343 vrrpd_state_trans(vrrp_state_t prev_s, vrrp_state_t s, vrrp_vr_t *vr)
4344 {
4345 	vrrp_log(VRRP_DBG1, "vrrpd_state_trans(%s): %s --> %s",
4346 	    vr->vvr_conf.vvc_name, vrrp_state2str(prev_s), vrrp_state2str(s));
4347 
4348 	assert(vr->vvr_state == prev_s);
4349 	vr->vvr_state = s;
4350 	vr->vvr_prev_state = prev_s;
4351 	(void) gettimeofday(&vr->vvr_st_time, NULL);
4352 	(void) vrrpd_post_event(vr->vvr_conf.vvc_name, prev_s, s);
4353 }
4354 
4355 static int
4356 vrrpd_post_event(const char *name, vrrp_state_t prev_st, vrrp_state_t st)
4357 {
4358 	sysevent_id_t	eid;
4359 	nvlist_t	*nvl = NULL;
4360 
4361 	/*
4362 	 * sysevent is not supported in the non-global zone
4363 	 */
4364 	if (getzoneid() != GLOBAL_ZONEID)
4365 		return (0);
4366 
4367 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
4368 		goto failed;
4369 
4370 	if (nvlist_add_uint8(nvl, VRRP_EVENT_VERSION,
4371 	    VRRP_EVENT_CUR_VERSION) != 0)
4372 		goto failed;
4373 
4374 	if (nvlist_add_string(nvl, VRRP_EVENT_ROUTER_NAME, name) != 0)
4375 		goto failed;
4376 
4377 	if (nvlist_add_uint8(nvl, VRRP_EVENT_STATE, st) != 0)
4378 		goto failed;
4379 
4380 	if (nvlist_add_uint8(nvl, VRRP_EVENT_PREV_STATE, prev_st) != 0)
4381 		goto failed;
4382 
4383 	if (sysevent_post_event(EC_VRRP, ESC_VRRP_STATE_CHANGE,
4384 	    SUNW_VENDOR, VRRP_EVENT_PUBLISHER, nvl, &eid) == 0) {
4385 		nvlist_free(nvl);
4386 		return (0);
4387 	}
4388 
4389 failed:
4390 	vrrp_log(VRRP_ERR, "vrrpd_post_event(): `state change (%s --> %s)' "
4391 	    "sysevent posting failed: %s", vrrp_state2str(prev_st),
4392 	    vrrp_state2str(st), strerror(errno));
4393 
4394 	if (nvl != NULL)
4395 		nvlist_free(nvl);
4396 	return (-1);
4397 }
4398 
4399 /*
4400  * timeval processing functions
4401  */
4402 static int
4403 timeval_to_milli(struct timeval tv)
4404 {
4405 	return ((int)(tv.tv_sec * 1000 + tv.tv_usec / 1000 + 0.5));
4406 }
4407 
4408 static struct timeval
4409 timeval_delta(struct timeval t1, struct timeval t2)
4410 {
4411 	struct timeval t;
4412 	t.tv_sec = t1.tv_sec - t2.tv_sec;
4413 	t.tv_usec = t1.tv_usec - t2.tv_usec;
4414 
4415 	if (t.tv_usec < 0) {
4416 		t.tv_usec += 1000000;
4417 		t.tv_sec--;
4418 	}
4419 	return (t);
4420 }
4421 
4422 /*
4423  * print error messages to the terminal or to syslog
4424  */
4425 static void
4426 vrrp_log(int level, char *message, ...)
4427 {
4428 	va_list ap;
4429 	int log_level = -1;
4430 
4431 	va_start(ap, message);
4432 
4433 	if (vrrp_logflag == 0) {
4434 		if (level <= vrrp_debug_level) {
4435 			/*
4436 			 * VRRP_ERR goes to stderr, others go to stdout
4437 			 */
4438 			FILE *out = (level <= VRRP_ERR) ? stderr : stdout;
4439 			/* LINTED: E_SEC_PRINTF_VAR_FMT */
4440 			(void) vfprintf(out, message, ap);
4441 			(void) fprintf(out, "\n");
4442 			(void) fflush(out);
4443 		}
4444 		va_end(ap);
4445 		return;
4446 	}
4447 
4448 	/*
4449 	 * translate VRRP_* to LOG_*
4450 	 */
4451 	switch (level) {
4452 	case VRRP_ERR:
4453 		log_level = LOG_ERR;
4454 		break;
4455 	case VRRP_WARNING:
4456 		log_level = LOG_WARNING;
4457 		break;
4458 	case VRRP_NOTICE:
4459 		log_level = LOG_NOTICE;
4460 		break;
4461 	case VRRP_DBG0:
4462 		log_level = LOG_INFO;
4463 		break;
4464 	default:
4465 		log_level = LOG_DEBUG;
4466 		break;
4467 	}
4468 
4469 	/* LINTED: E_SEC_PRINTF_VAR_FMT */
4470 	(void) vsyslog(log_level, message, ap);
4471 	va_end(ap);
4472 }
4473