1*dbed73cbSSangeeta Misra /*
2*dbed73cbSSangeeta Misra  * CDDL HEADER START
3*dbed73cbSSangeeta Misra  *
4*dbed73cbSSangeeta Misra  * The contents of this file are subject to the terms of the
5*dbed73cbSSangeeta Misra  * Common Development and Distribution License (the "License").
6*dbed73cbSSangeeta Misra  * You may not use this file except in compliance with the License.
7*dbed73cbSSangeeta Misra  *
8*dbed73cbSSangeeta Misra  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*dbed73cbSSangeeta Misra  * or http://www.opensolaris.org/os/licensing.
10*dbed73cbSSangeeta Misra  * See the License for the specific language governing permissions
11*dbed73cbSSangeeta Misra  * and limitations under the License.
12*dbed73cbSSangeeta Misra  *
13*dbed73cbSSangeeta Misra  * When distributing Covered Code, include this CDDL HEADER in each
14*dbed73cbSSangeeta Misra  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*dbed73cbSSangeeta Misra  * If applicable, add the following below this CDDL HEADER, with the
16*dbed73cbSSangeeta Misra  * fields enclosed by brackets "[]" replaced with your own identifying
17*dbed73cbSSangeeta Misra  * information: Portions Copyright [yyyy] [name of copyright owner]
18*dbed73cbSSangeeta Misra  *
19*dbed73cbSSangeeta Misra  * CDDL HEADER END
20*dbed73cbSSangeeta Misra  */
21*dbed73cbSSangeeta Misra 
22*dbed73cbSSangeeta Misra /*
23*dbed73cbSSangeeta Misra  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24*dbed73cbSSangeeta Misra  * Use is subject to license terms.
25*dbed73cbSSangeeta Misra  */
26*dbed73cbSSangeeta Misra 
27*dbed73cbSSangeeta Misra #include <sys/types.h>
28*dbed73cbSSangeeta Misra #include <sys/socket.h>
29*dbed73cbSSangeeta Misra #include <sys/time.h>
30*dbed73cbSSangeeta Misra 
31*dbed73cbSSangeeta Misra #include <netinet/in_systm.h>
32*dbed73cbSSangeeta Misra #include <netinet/in.h>
33*dbed73cbSSangeeta Misra #include <netinet/ip.h>
34*dbed73cbSSangeeta Misra #include <netinet/ip6.h>
35*dbed73cbSSangeeta Misra #include <arpa/inet.h>
36*dbed73cbSSangeeta Misra #include <netinet/tcp.h>
37*dbed73cbSSangeeta Misra #include <netinet/ip_icmp.h>
38*dbed73cbSSangeeta Misra #include <netinet/icmp6.h>
39*dbed73cbSSangeeta Misra #include <netinet/udp.h>
40*dbed73cbSSangeeta Misra #include <netdb.h>
41*dbed73cbSSangeeta Misra #include <unistd.h>
42*dbed73cbSSangeeta Misra #include <stdio.h>
43*dbed73cbSSangeeta Misra #include <stdlib.h>
44*dbed73cbSSangeeta Misra #include <strings.h>
45*dbed73cbSSangeeta Misra #include <errno.h>
46*dbed73cbSSangeeta Misra #include <limits.h>
47*dbed73cbSSangeeta Misra #include <signal.h>
48*dbed73cbSSangeeta Misra #include <libgen.h>
49*dbed73cbSSangeeta Misra #include <fcntl.h>
50*dbed73cbSSangeeta Misra 
51*dbed73cbSSangeeta Misra /*
52*dbed73cbSSangeeta Misra  * The following values are what ilbd will set argv[0] to.  This determines
53*dbed73cbSSangeeta Misra  * what type of probe to send out.
54*dbed73cbSSangeeta Misra  */
55*dbed73cbSSangeeta Misra #define	PROBE_PING	"ilb_ping"
56*dbed73cbSSangeeta Misra #define	PROBE_PROTO	"ilb_probe"
57*dbed73cbSSangeeta Misra 
58*dbed73cbSSangeeta Misra /* The transport protocol to use in the probe.  Value of argv[3]. */
59*dbed73cbSSangeeta Misra #define	PROTO_TCP	"TCP"
60*dbed73cbSSangeeta Misra #define	PROTO_UDP	"UDP"
61*dbed73cbSSangeeta Misra 
62*dbed73cbSSangeeta Misra enum probe_type { ping_probe, tcp_probe, udp_probe };
63*dbed73cbSSangeeta Misra 
64*dbed73cbSSangeeta Misra /* Load balance mode.  Value of argv[4]. */
65*dbed73cbSSangeeta Misra #define	MODE_DSR	"DSR"
66*dbed73cbSSangeeta Misra #define	MODE_NAT	"NAT"
67*dbed73cbSSangeeta Misra #define	MODE_HALF_NAT	"HALF_NAT"
68*dbed73cbSSangeeta Misra 
69*dbed73cbSSangeeta Misra enum lb_mode { dsr, nat, half_nat };
70*dbed73cbSSangeeta Misra 
71*dbed73cbSSangeeta Misra /* Number of arguments to the command from ilbd. */
72*dbed73cbSSangeeta Misra #define	PROG_ARGC	7
73*dbed73cbSSangeeta Misra 
74*dbed73cbSSangeeta Misra /* Size of buffer used to receive ICMP packet */
75*dbed73cbSSangeeta Misra #define	RECV_PKT_SZ	256
76*dbed73cbSSangeeta Misra 
77*dbed73cbSSangeeta Misra /*
78*dbed73cbSSangeeta Misra  * Struct to store the probe info (most is passed in using the argv[] array to
79*dbed73cbSSangeeta Misra  * the command given by ilbd).  The argv[] contains the following.
80*dbed73cbSSangeeta Misra  *
81*dbed73cbSSangeeta Misra  * argv[0] is either PROBE_PING or PROBE_PROTO
82*dbed73cbSSangeeta Misra  * argv[1] is the VIP
83*dbed73cbSSangeeta Misra  * argv[2] is the backend server address
84*dbed73cbSSangeeta Misra  * argv[3] is the transport protocol used in the rule
85*dbed73cbSSangeeta Misra  * argv[4] is the load balance mode, "DSR", "NAT", "HALF-NAT"
86*dbed73cbSSangeeta Misra  * argv[5] is the probe port
87*dbed73cbSSangeeta Misra  * argv[6] is the probe timeout
88*dbed73cbSSangeeta Misra  *
89*dbed73cbSSangeeta Misra  * The following three fields are used in sending ICMP ECHO probe.
90*dbed73cbSSangeeta Misra  *
91*dbed73cbSSangeeta Misra  * echo_id is the ID set in the probe
92*dbed73cbSSangeeta Misra  * echo_seq is the sequence set in the probe
93*dbed73cbSSangeeta Misra  * echo_cookie is the random number data in a probe
94*dbed73cbSSangeeta Misra  * lport is the local port (in network byte order) used to send the probe
95*dbed73cbSSangeeta Misra  */
96*dbed73cbSSangeeta Misra typedef struct {
97*dbed73cbSSangeeta Misra 	enum probe_type		probe;
98*dbed73cbSSangeeta Misra 	struct in6_addr		vip;		/* argv[1] */
99*dbed73cbSSangeeta Misra 	struct in6_addr		srv_addr;	/* argv[2] */
100*dbed73cbSSangeeta Misra 	int			proto;		/* argv[3] */
101*dbed73cbSSangeeta Misra 	enum lb_mode		mode;		/* argv[4] */
102*dbed73cbSSangeeta Misra 	in_port_t		port;		/* argv[5] */
103*dbed73cbSSangeeta Misra 	uint32_t		timeout;	/* argv[6] */
104*dbed73cbSSangeeta Misra 
105*dbed73cbSSangeeta Misra 	uint16_t		echo_id;
106*dbed73cbSSangeeta Misra 	uint16_t		echo_seq;
107*dbed73cbSSangeeta Misra 	uint32_t		echo_cookie;
108*dbed73cbSSangeeta Misra 	in_port_t		lport;
109*dbed73cbSSangeeta Misra } probe_param_t;
110*dbed73cbSSangeeta Misra 
111*dbed73cbSSangeeta Misra /* Global variable to indicate whether a timeout means success. */
112*dbed73cbSSangeeta Misra static boolean_t timeout_is_good;
113*dbed73cbSSangeeta Misra 
114*dbed73cbSSangeeta Misra /* SIGALRM handler */
115*dbed73cbSSangeeta Misra /* ARGSUSED */
116*dbed73cbSSangeeta Misra static void
probe_exit(int s)117*dbed73cbSSangeeta Misra probe_exit(int s)
118*dbed73cbSSangeeta Misra {
119*dbed73cbSSangeeta Misra 	if (timeout_is_good) {
120*dbed73cbSSangeeta Misra 		(void) printf("0");
121*dbed73cbSSangeeta Misra 		exit(0);
122*dbed73cbSSangeeta Misra 	} else {
123*dbed73cbSSangeeta Misra 		(void) printf("-1");
124*dbed73cbSSangeeta Misra 		exit(255);
125*dbed73cbSSangeeta Misra 	}
126*dbed73cbSSangeeta Misra }
127*dbed73cbSSangeeta Misra 
128*dbed73cbSSangeeta Misra /*
129*dbed73cbSSangeeta Misra  * Checksum routine for Internet Protocol family headers (C Version)
130*dbed73cbSSangeeta Misra  * (copied from ping.c)
131*dbed73cbSSangeeta Misra  */
132*dbed73cbSSangeeta Misra static ushort_t
in_cksum(ushort_t * addr,int len)133*dbed73cbSSangeeta Misra in_cksum(ushort_t *addr, int len)
134*dbed73cbSSangeeta Misra {
135*dbed73cbSSangeeta Misra 	int nleft = len;
136*dbed73cbSSangeeta Misra 	ushort_t *w = addr;
137*dbed73cbSSangeeta Misra 	ushort_t answer;
138*dbed73cbSSangeeta Misra 	ushort_t odd_byte = 0;
139*dbed73cbSSangeeta Misra 	int sum = 0;
140*dbed73cbSSangeeta Misra 
141*dbed73cbSSangeeta Misra 	/*
142*dbed73cbSSangeeta Misra 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
143*dbed73cbSSangeeta Misra 	 *  we add sequential 16 bit words to it, and at the end, fold
144*dbed73cbSSangeeta Misra 	 *  back all the carry bits from the top 16 bits into the lower
145*dbed73cbSSangeeta Misra 	 *  16 bits.
146*dbed73cbSSangeeta Misra 	 */
147*dbed73cbSSangeeta Misra 	while (nleft > 1) {
148*dbed73cbSSangeeta Misra 		sum += *w++;
149*dbed73cbSSangeeta Misra 		nleft -= 2;
150*dbed73cbSSangeeta Misra 	}
151*dbed73cbSSangeeta Misra 
152*dbed73cbSSangeeta Misra 	/* mop up an odd byte, if necessary */
153*dbed73cbSSangeeta Misra 	if (nleft == 1) {
154*dbed73cbSSangeeta Misra 		*(uchar_t *)(&odd_byte) = *(uchar_t *)w;
155*dbed73cbSSangeeta Misra 		sum += odd_byte;
156*dbed73cbSSangeeta Misra 	}
157*dbed73cbSSangeeta Misra 
158*dbed73cbSSangeeta Misra 	/*
159*dbed73cbSSangeeta Misra 	 * add back carry outs from top 16 bits to low 16 bits
160*dbed73cbSSangeeta Misra 	 */
161*dbed73cbSSangeeta Misra 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
162*dbed73cbSSangeeta Misra 	sum += (sum >> 16);			/* add carry */
163*dbed73cbSSangeeta Misra 	answer = ~sum;				/* truncate to 16 bits */
164*dbed73cbSSangeeta Misra 	return (answer);
165*dbed73cbSSangeeta Misra }
166*dbed73cbSSangeeta Misra 
167*dbed73cbSSangeeta Misra /* It is assumed that argv[] contains PROBE_ARGC arguments. */
168*dbed73cbSSangeeta Misra static boolean_t
parse_probe_param(char * argv[],probe_param_t * param)169*dbed73cbSSangeeta Misra parse_probe_param(char *argv[], probe_param_t *param)
170*dbed73cbSSangeeta Misra {
171*dbed73cbSSangeeta Misra 	int32_t port;
172*dbed73cbSSangeeta Misra 	int64_t timeout;
173*dbed73cbSSangeeta Misra 	struct in_addr v4addr;
174*dbed73cbSSangeeta Misra 
175*dbed73cbSSangeeta Misra 	if (strcmp(basename(argv[0]), PROBE_PING) == 0) {
176*dbed73cbSSangeeta Misra 		param->probe = ping_probe;
177*dbed73cbSSangeeta Misra 	} else {
178*dbed73cbSSangeeta Misra 		if (strcmp(basename(argv[0]), PROBE_PROTO) != 0)
179*dbed73cbSSangeeta Misra 			return (B_FALSE);
180*dbed73cbSSangeeta Misra 
181*dbed73cbSSangeeta Misra 		if (strcasecmp(argv[3], PROTO_TCP) == 0) {
182*dbed73cbSSangeeta Misra 			param->probe = tcp_probe;
183*dbed73cbSSangeeta Misra 			param->proto = IPPROTO_TCP;
184*dbed73cbSSangeeta Misra 		} else if (strcasecmp(argv[3], PROTO_UDP) == 0) {
185*dbed73cbSSangeeta Misra 			param->probe = udp_probe;
186*dbed73cbSSangeeta Misra 			param->proto = IPPROTO_UDP;
187*dbed73cbSSangeeta Misra 		} else {
188*dbed73cbSSangeeta Misra 			return (B_FALSE);
189*dbed73cbSSangeeta Misra 		}
190*dbed73cbSSangeeta Misra 	}
191*dbed73cbSSangeeta Misra 
192*dbed73cbSSangeeta Misra 	if (strchr(argv[1], ':') != NULL) {
193*dbed73cbSSangeeta Misra 		if (inet_pton(AF_INET6, argv[1], &param->vip) == 0)
194*dbed73cbSSangeeta Misra 			return (B_FALSE);
195*dbed73cbSSangeeta Misra 	} else if (strchr(argv[1], '.') != NULL) {
196*dbed73cbSSangeeta Misra 		if (inet_pton(AF_INET, argv[1], &v4addr) == 0)
197*dbed73cbSSangeeta Misra 			return (B_FALSE);
198*dbed73cbSSangeeta Misra 		IN6_INADDR_TO_V4MAPPED(&v4addr, &param->vip);
199*dbed73cbSSangeeta Misra 	} else {
200*dbed73cbSSangeeta Misra 		return (B_FALSE);
201*dbed73cbSSangeeta Misra 	}
202*dbed73cbSSangeeta Misra 
203*dbed73cbSSangeeta Misra 	/*
204*dbed73cbSSangeeta Misra 	 * The address family of vip and srv_addr should be the same for
205*dbed73cbSSangeeta Misra 	 * now.  But in future, we may allow them to be different...  So
206*dbed73cbSSangeeta Misra 	 * we don't do a check here.
207*dbed73cbSSangeeta Misra 	 */
208*dbed73cbSSangeeta Misra 	if (strchr(argv[2], ':') != NULL) {
209*dbed73cbSSangeeta Misra 		if (inet_pton(AF_INET6, argv[2], &param->srv_addr) == 0)
210*dbed73cbSSangeeta Misra 			return (B_FALSE);
211*dbed73cbSSangeeta Misra 	} else if (strchr(argv[2], '.') != NULL) {
212*dbed73cbSSangeeta Misra 		if (inet_pton(AF_INET, argv[2], &v4addr) == 0)
213*dbed73cbSSangeeta Misra 			return (B_FALSE);
214*dbed73cbSSangeeta Misra 		IN6_INADDR_TO_V4MAPPED(&v4addr, &param->srv_addr);
215*dbed73cbSSangeeta Misra 	} else {
216*dbed73cbSSangeeta Misra 		return (B_FALSE);
217*dbed73cbSSangeeta Misra 	}
218*dbed73cbSSangeeta Misra 
219*dbed73cbSSangeeta Misra 	if (strcasecmp(argv[4], MODE_DSR) == 0)
220*dbed73cbSSangeeta Misra 		param->mode = dsr;
221*dbed73cbSSangeeta Misra 	else if (strcasecmp(argv[4], MODE_NAT) == 0)
222*dbed73cbSSangeeta Misra 		param->mode = nat;
223*dbed73cbSSangeeta Misra 	else if (strcasecmp(argv[4], MODE_HALF_NAT) == 0)
224*dbed73cbSSangeeta Misra 		param->mode = half_nat;
225*dbed73cbSSangeeta Misra 	else
226*dbed73cbSSangeeta Misra 		return (B_FALSE);
227*dbed73cbSSangeeta Misra 
228*dbed73cbSSangeeta Misra 	if ((port = atoi(argv[5])) <= 0 || port > USHRT_MAX)
229*dbed73cbSSangeeta Misra 		return (B_FALSE);
230*dbed73cbSSangeeta Misra 	param->port = port;
231*dbed73cbSSangeeta Misra 
232*dbed73cbSSangeeta Misra 	if ((timeout = strtoll(argv[6], NULL, 10)) <= 0 || timeout > UINT_MAX)
233*dbed73cbSSangeeta Misra 		return (B_FALSE);
234*dbed73cbSSangeeta Misra 	param->timeout = timeout;
235*dbed73cbSSangeeta Misra 
236*dbed73cbSSangeeta Misra 	return (B_TRUE);
237*dbed73cbSSangeeta Misra }
238*dbed73cbSSangeeta Misra 
239*dbed73cbSSangeeta Misra /*
240*dbed73cbSSangeeta Misra  * Set up the destination address to be used to send a probe based on
241*dbed73cbSSangeeta Misra  * param.
242*dbed73cbSSangeeta Misra  */
243*dbed73cbSSangeeta Misra static int
set_sockaddr(struct sockaddr_storage * addr,socklen_t * addr_len,void ** next_hop,probe_param_t * param)244*dbed73cbSSangeeta Misra set_sockaddr(struct sockaddr_storage *addr, socklen_t *addr_len,
245*dbed73cbSSangeeta Misra     void **next_hop, probe_param_t *param)
246*dbed73cbSSangeeta Misra {
247*dbed73cbSSangeeta Misra 	int af;
248*dbed73cbSSangeeta Misra 	struct in6_addr *param_addr;
249*dbed73cbSSangeeta Misra 	struct sockaddr_in *v4_addr;
250*dbed73cbSSangeeta Misra 	struct sockaddr_in6 *v6_addr;
251*dbed73cbSSangeeta Misra 	boolean_t nh = B_FALSE;
252*dbed73cbSSangeeta Misra 
253*dbed73cbSSangeeta Misra 	switch (param->mode) {
254*dbed73cbSSangeeta Misra 	case dsr:
255*dbed73cbSSangeeta Misra 		param_addr = &param->vip;
256*dbed73cbSSangeeta Misra 		nh = B_TRUE;
257*dbed73cbSSangeeta Misra 		break;
258*dbed73cbSSangeeta Misra 	case nat:
259*dbed73cbSSangeeta Misra 	case half_nat:
260*dbed73cbSSangeeta Misra 		param_addr = &param->srv_addr;
261*dbed73cbSSangeeta Misra 		break;
262*dbed73cbSSangeeta Misra 	}
263*dbed73cbSSangeeta Misra 	if (IN6_IS_ADDR_V4MAPPED(param_addr)) {
264*dbed73cbSSangeeta Misra 		af = AF_INET;
265*dbed73cbSSangeeta Misra 		v4_addr = (struct sockaddr_in *)addr;
266*dbed73cbSSangeeta Misra 		IN6_V4MAPPED_TO_INADDR(param_addr, &v4_addr->sin_addr);
267*dbed73cbSSangeeta Misra 		v4_addr->sin_family = AF_INET;
268*dbed73cbSSangeeta Misra 		v4_addr->sin_port = htons(param->port);
269*dbed73cbSSangeeta Misra 
270*dbed73cbSSangeeta Misra 		*addr_len = sizeof (*v4_addr);
271*dbed73cbSSangeeta Misra 	} else {
272*dbed73cbSSangeeta Misra 		af = AF_INET6;
273*dbed73cbSSangeeta Misra 		v6_addr = (struct sockaddr_in6 *)addr;
274*dbed73cbSSangeeta Misra 		v6_addr->sin6_family = AF_INET6;
275*dbed73cbSSangeeta Misra 		v6_addr->sin6_addr = *param_addr;
276*dbed73cbSSangeeta Misra 		v6_addr->sin6_port = htons(param->port);
277*dbed73cbSSangeeta Misra 		v6_addr->sin6_flowinfo = 0;
278*dbed73cbSSangeeta Misra 		v6_addr->sin6_scope_id = 0;
279*dbed73cbSSangeeta Misra 
280*dbed73cbSSangeeta Misra 		*addr_len = sizeof (*v6_addr);
281*dbed73cbSSangeeta Misra 	}
282*dbed73cbSSangeeta Misra 
283*dbed73cbSSangeeta Misra 	if (!nh) {
284*dbed73cbSSangeeta Misra 		*next_hop = NULL;
285*dbed73cbSSangeeta Misra 		return (af);
286*dbed73cbSSangeeta Misra 	}
287*dbed73cbSSangeeta Misra 
288*dbed73cbSSangeeta Misra 	if (af == AF_INET) {
289*dbed73cbSSangeeta Misra 		ipaddr_t *nh_addr;
290*dbed73cbSSangeeta Misra 
291*dbed73cbSSangeeta Misra 		nh_addr = malloc(sizeof (ipaddr_t));
292*dbed73cbSSangeeta Misra 		IN6_V4MAPPED_TO_IPADDR(&param->srv_addr, *nh_addr);
293*dbed73cbSSangeeta Misra 		*next_hop = nh_addr;
294*dbed73cbSSangeeta Misra 	} else {
295*dbed73cbSSangeeta Misra 		struct sockaddr_in6 *nh_addr;
296*dbed73cbSSangeeta Misra 
297*dbed73cbSSangeeta Misra 		nh_addr = malloc(sizeof (*nh_addr));
298*dbed73cbSSangeeta Misra 		nh_addr->sin6_family = AF_INET6;
299*dbed73cbSSangeeta Misra 		nh_addr->sin6_addr = param->srv_addr;
300*dbed73cbSSangeeta Misra 		nh_addr->sin6_flowinfo = 0;
301*dbed73cbSSangeeta Misra 		nh_addr->sin6_scope_id = 0;
302*dbed73cbSSangeeta Misra 		*next_hop = nh_addr;
303*dbed73cbSSangeeta Misra 	}
304*dbed73cbSSangeeta Misra 
305*dbed73cbSSangeeta Misra 	return (af);
306*dbed73cbSSangeeta Misra }
307*dbed73cbSSangeeta Misra 
308*dbed73cbSSangeeta Misra /*
309*dbed73cbSSangeeta Misra  * Use TCP to check if the peer server is alive.  Create a TCP socket and
310*dbed73cbSSangeeta Misra  * then call connect() to reach the peer server.  If connect() does not
311*dbed73cbSSangeeta Misra  * return within the timeout period, the SIGALRM handler will be invoked
312*dbed73cbSSangeeta Misra  * and tell ilbd that the peer server is not alive.
313*dbed73cbSSangeeta Misra  */
314*dbed73cbSSangeeta Misra static int
tcp_query(probe_param_t * param)315*dbed73cbSSangeeta Misra tcp_query(probe_param_t *param)
316*dbed73cbSSangeeta Misra {
317*dbed73cbSSangeeta Misra 	int ret;
318*dbed73cbSSangeeta Misra 	int sd, af;
319*dbed73cbSSangeeta Misra 	struct sockaddr_storage dst_addr;
320*dbed73cbSSangeeta Misra 	socklen_t dst_addr_len;
321*dbed73cbSSangeeta Misra 	void *next_hop;
322*dbed73cbSSangeeta Misra 	hrtime_t start, end;
323*dbed73cbSSangeeta Misra 	uint32_t rtt;
324*dbed73cbSSangeeta Misra 
325*dbed73cbSSangeeta Misra 	ret = 0;
326*dbed73cbSSangeeta Misra 	next_hop = NULL;
327*dbed73cbSSangeeta Misra 
328*dbed73cbSSangeeta Misra 	af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param);
329*dbed73cbSSangeeta Misra 
330*dbed73cbSSangeeta Misra 	if ((sd = socket(af, SOCK_STREAM, param->proto)) == -1)
331*dbed73cbSSangeeta Misra 		return (-1);
332*dbed73cbSSangeeta Misra 
333*dbed73cbSSangeeta Misra 	/* DSR mode, need to set the next hop */
334*dbed73cbSSangeeta Misra 	if (next_hop != NULL) {
335*dbed73cbSSangeeta Misra 		if (af == AF_INET) {
336*dbed73cbSSangeeta Misra 			if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop,
337*dbed73cbSSangeeta Misra 			    sizeof (ipaddr_t)) < 0) {
338*dbed73cbSSangeeta Misra 				ret = -1;
339*dbed73cbSSangeeta Misra 				goto out;
340*dbed73cbSSangeeta Misra 			}
341*dbed73cbSSangeeta Misra 		} else {
342*dbed73cbSSangeeta Misra 			if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP,
343*dbed73cbSSangeeta Misra 			    next_hop, sizeof (struct sockaddr_in6)) < 0) {
344*dbed73cbSSangeeta Misra 				ret = -1;
345*dbed73cbSSangeeta Misra 				goto out;
346*dbed73cbSSangeeta Misra 			}
347*dbed73cbSSangeeta Misra 		}
348*dbed73cbSSangeeta Misra 	}
349*dbed73cbSSangeeta Misra 
350*dbed73cbSSangeeta Misra 	timeout_is_good = B_FALSE;
351*dbed73cbSSangeeta Misra 	(void) alarm(param->timeout);
352*dbed73cbSSangeeta Misra 	start = gethrtime();
353*dbed73cbSSangeeta Misra 	if (connect(sd, (struct sockaddr *)&dst_addr, dst_addr_len) != 0) {
354*dbed73cbSSangeeta Misra 		ret = -1;
355*dbed73cbSSangeeta Misra 		goto out;
356*dbed73cbSSangeeta Misra 	}
357*dbed73cbSSangeeta Misra 	end = gethrtime();
358*dbed73cbSSangeeta Misra 
359*dbed73cbSSangeeta Misra 	rtt = (end - start) / (NANOSEC / MICROSEC);
360*dbed73cbSSangeeta Misra 	if (rtt == 0)
361*dbed73cbSSangeeta Misra 		rtt = 1;
362*dbed73cbSSangeeta Misra 	(void) printf("%u", rtt);
363*dbed73cbSSangeeta Misra 
364*dbed73cbSSangeeta Misra out:
365*dbed73cbSSangeeta Misra 	(void) close(sd);
366*dbed73cbSSangeeta Misra 	return (ret);
367*dbed73cbSSangeeta Misra }
368*dbed73cbSSangeeta Misra 
369*dbed73cbSSangeeta Misra /*
370*dbed73cbSSangeeta Misra  * Check if the ICMP packet is a port unreachable message in respnsed to
371*dbed73cbSSangeeta Misra  * our probe.  Return -1 if no, 0 if yes.
372*dbed73cbSSangeeta Misra  */
373*dbed73cbSSangeeta Misra static int
check_icmp_unreach_v4(struct icmp * icmph,probe_param_t * param)374*dbed73cbSSangeeta Misra check_icmp_unreach_v4(struct icmp *icmph, probe_param_t *param)
375*dbed73cbSSangeeta Misra {
376*dbed73cbSSangeeta Misra 	struct udphdr *udph;
377*dbed73cbSSangeeta Misra 	struct ip *iph;
378*dbed73cbSSangeeta Misra 
379*dbed73cbSSangeeta Misra 	if (icmph->icmp_type != ICMP_UNREACH)
380*dbed73cbSSangeeta Misra 		return (-1);
381*dbed73cbSSangeeta Misra 	if (icmph->icmp_code != ICMP_UNREACH_PORT)
382*dbed73cbSSangeeta Misra 		return (-1);
383*dbed73cbSSangeeta Misra 
384*dbed73cbSSangeeta Misra 	/* LINTED E_BAD_PTR_CAST_ALIGN */
385*dbed73cbSSangeeta Misra 	iph = (struct ip *)((char *)icmph + ICMP_MINLEN);
386*dbed73cbSSangeeta Misra 	if (iph->ip_p != IPPROTO_UDP)
387*dbed73cbSSangeeta Misra 		return (-1);
388*dbed73cbSSangeeta Misra 
389*dbed73cbSSangeeta Misra 	/* LINTED E_BAD_PTR_CAST_ALIGN */
390*dbed73cbSSangeeta Misra 	udph = (struct udphdr *)((char *)iph + (iph->ip_hl << 2));
391*dbed73cbSSangeeta Misra 	if (udph->uh_dport != htons(param->port))
392*dbed73cbSSangeeta Misra 		return (-1);
393*dbed73cbSSangeeta Misra 	if (udph->uh_sport != param->lport)
394*dbed73cbSSangeeta Misra 		return (-1);
395*dbed73cbSSangeeta Misra 
396*dbed73cbSSangeeta Misra 	/* All matched, it is a response to the probe we sent. */
397*dbed73cbSSangeeta Misra 	return (0);
398*dbed73cbSSangeeta Misra }
399*dbed73cbSSangeeta Misra 
400*dbed73cbSSangeeta Misra /*
401*dbed73cbSSangeeta Misra  * Check if the ICMP packet is a reply to our echo request.  Need to match
402*dbed73cbSSangeeta Misra  * the ID and sequence.
403*dbed73cbSSangeeta Misra  */
404*dbed73cbSSangeeta Misra static int
check_icmp_echo_v4(struct icmp * icmph,probe_param_t * param)405*dbed73cbSSangeeta Misra check_icmp_echo_v4(struct icmp *icmph, probe_param_t *param)
406*dbed73cbSSangeeta Misra {
407*dbed73cbSSangeeta Misra 	uint32_t cookie;
408*dbed73cbSSangeeta Misra 	in_port_t port;
409*dbed73cbSSangeeta Misra 
410*dbed73cbSSangeeta Misra 	if (icmph->icmp_type != ICMP_ECHOREPLY)
411*dbed73cbSSangeeta Misra 		return (-1);
412*dbed73cbSSangeeta Misra 	if (icmph->icmp_id != param->echo_id)
413*dbed73cbSSangeeta Misra 		return (-1);
414*dbed73cbSSangeeta Misra 	if (icmph->icmp_seq != param->echo_seq)
415*dbed73cbSSangeeta Misra 		return (-1);
416*dbed73cbSSangeeta Misra 
417*dbed73cbSSangeeta Misra 	bcopy(icmph->icmp_data, &cookie, sizeof (cookie));
418*dbed73cbSSangeeta Misra 	if (cookie != param->echo_cookie)
419*dbed73cbSSangeeta Misra 		return (-1);
420*dbed73cbSSangeeta Misra 	bcopy(icmph->icmp_data + sizeof (cookie), &port, sizeof (port));
421*dbed73cbSSangeeta Misra 	if (port != param->port)
422*dbed73cbSSangeeta Misra 		return (-1);
423*dbed73cbSSangeeta Misra 
424*dbed73cbSSangeeta Misra 	/* All matched, it is a response to the echo we sent. */
425*dbed73cbSSangeeta Misra 	return (0);
426*dbed73cbSSangeeta Misra }
427*dbed73cbSSangeeta Misra 
428*dbed73cbSSangeeta Misra /* Verify if an ICMP packet is what we expect. */
429*dbed73cbSSangeeta Misra static int
check_icmp_v4(char * buf,ssize_t rcvd,probe_param_t * param)430*dbed73cbSSangeeta Misra check_icmp_v4(char *buf, ssize_t rcvd, probe_param_t *param)
431*dbed73cbSSangeeta Misra {
432*dbed73cbSSangeeta Misra 	struct ip *iph;
433*dbed73cbSSangeeta Misra 	struct icmp *icmph;
434*dbed73cbSSangeeta Misra 
435*dbed73cbSSangeeta Misra 	/*
436*dbed73cbSSangeeta Misra 	 * We can dereference the length field without worry since the stack
437*dbed73cbSSangeeta Misra 	 * should not have sent up the packet if it is smaller than a normal
438*dbed73cbSSangeeta Misra 	 * ICMPv4 packet.
439*dbed73cbSSangeeta Misra 	 */
440*dbed73cbSSangeeta Misra 	/* LINTED E_BAD_PTR_CAST_ALIGN */
441*dbed73cbSSangeeta Misra 	iph = (struct ip *)buf;
442*dbed73cbSSangeeta Misra 	/* LINTED E_BAD_PTR_CAST_ALIGN */
443*dbed73cbSSangeeta Misra 	icmph = (struct icmp *)((char *)iph + (iph->ip_hl << 2));
444*dbed73cbSSangeeta Misra 
445*dbed73cbSSangeeta Misra 	/*
446*dbed73cbSSangeeta Misra 	 * If we sent an UDP probe, check if the packet is a port
447*dbed73cbSSangeeta Misra 	 * unreachable message in response to our probe.
448*dbed73cbSSangeeta Misra 	 *
449*dbed73cbSSangeeta Misra 	 * If we sent an ICMP echo request, check if the packet is a reply
450*dbed73cbSSangeeta Misra 	 * to our echo request.
451*dbed73cbSSangeeta Misra 	 */
452*dbed73cbSSangeeta Misra 	if (param->probe == udp_probe) {
453*dbed73cbSSangeeta Misra 		/* Is the packet large enough for further checking? */
454*dbed73cbSSangeeta Misra 		if (rcvd < 2 * sizeof (struct ip) + ICMP_MINLEN +
455*dbed73cbSSangeeta Misra 		    sizeof (struct udphdr)) {
456*dbed73cbSSangeeta Misra 			return (-1);
457*dbed73cbSSangeeta Misra 		}
458*dbed73cbSSangeeta Misra 		return (check_icmp_unreach_v4(icmph, param));
459*dbed73cbSSangeeta Misra 	} else {
460*dbed73cbSSangeeta Misra 		if (rcvd < sizeof (struct ip) + ICMP_MINLEN)
461*dbed73cbSSangeeta Misra 			return (-1);
462*dbed73cbSSangeeta Misra 		return (check_icmp_echo_v4(icmph, param));
463*dbed73cbSSangeeta Misra 	}
464*dbed73cbSSangeeta Misra }
465*dbed73cbSSangeeta Misra 
466*dbed73cbSSangeeta Misra /*
467*dbed73cbSSangeeta Misra  * Check if the ICMPv6 packet is a port unreachable message in respnsed to
468*dbed73cbSSangeeta Misra  * our probe.  Return -1 if no, 0 if yes.
469*dbed73cbSSangeeta Misra  */
470*dbed73cbSSangeeta Misra static int
check_icmp_unreach_v6(icmp6_t * icmp6h,probe_param_t * param)471*dbed73cbSSangeeta Misra check_icmp_unreach_v6(icmp6_t *icmp6h, probe_param_t *param)
472*dbed73cbSSangeeta Misra {
473*dbed73cbSSangeeta Misra 	ip6_t *ip6h;
474*dbed73cbSSangeeta Misra 	struct udphdr *udph;
475*dbed73cbSSangeeta Misra 
476*dbed73cbSSangeeta Misra 	if (icmp6h->icmp6_type != ICMP6_DST_UNREACH)
477*dbed73cbSSangeeta Misra 		return (-1);
478*dbed73cbSSangeeta Misra 	if (icmp6h->icmp6_code != ICMP6_DST_UNREACH_NOPORT)
479*dbed73cbSSangeeta Misra 		return (-1);
480*dbed73cbSSangeeta Misra 
481*dbed73cbSSangeeta Misra 	/* LINTED E_BAD_PTR_CAST_ALIGN */
482*dbed73cbSSangeeta Misra 	ip6h = (ip6_t *)((char *)icmp6h + ICMP6_MINLEN);
483*dbed73cbSSangeeta Misra 	if (ip6h->ip6_nxt != IPPROTO_UDP)
484*dbed73cbSSangeeta Misra 		return (-1);
485*dbed73cbSSangeeta Misra 
486*dbed73cbSSangeeta Misra 	udph = (struct udphdr *)(ip6h + 1);
487*dbed73cbSSangeeta Misra 
488*dbed73cbSSangeeta Misra 	if (udph->uh_dport != htons(param->port))
489*dbed73cbSSangeeta Misra 		return (-1);
490*dbed73cbSSangeeta Misra 	if (udph->uh_sport != param->lport)
491*dbed73cbSSangeeta Misra 		return (-1);
492*dbed73cbSSangeeta Misra 
493*dbed73cbSSangeeta Misra 	return (0);
494*dbed73cbSSangeeta Misra }
495*dbed73cbSSangeeta Misra 
496*dbed73cbSSangeeta Misra /*
497*dbed73cbSSangeeta Misra  * Check if the ICMPv6 packet is a reply to our echo request.  Need to match
498*dbed73cbSSangeeta Misra  * the ID and sequence.
499*dbed73cbSSangeeta Misra  */
500*dbed73cbSSangeeta Misra static int
check_icmp_echo_v6(icmp6_t * icmp6h,probe_param_t * param)501*dbed73cbSSangeeta Misra check_icmp_echo_v6(icmp6_t *icmp6h, probe_param_t *param)
502*dbed73cbSSangeeta Misra {
503*dbed73cbSSangeeta Misra 	char *tmp;
504*dbed73cbSSangeeta Misra 	uint32_t cookie;
505*dbed73cbSSangeeta Misra 	in_port_t port;
506*dbed73cbSSangeeta Misra 
507*dbed73cbSSangeeta Misra 	if (icmp6h->icmp6_type != ICMP6_ECHO_REPLY)
508*dbed73cbSSangeeta Misra 		return (-1);
509*dbed73cbSSangeeta Misra 	if (icmp6h->icmp6_id != param->echo_id)
510*dbed73cbSSangeeta Misra 		return (-1);
511*dbed73cbSSangeeta Misra 	if (icmp6h->icmp6_seq != param->echo_seq)
512*dbed73cbSSangeeta Misra 		return (-1);
513*dbed73cbSSangeeta Misra 	tmp = (char *)icmp6h + ICMP6_MINLEN;
514*dbed73cbSSangeeta Misra 	bcopy(tmp, &cookie, sizeof (cookie));
515*dbed73cbSSangeeta Misra 	if (cookie != param->echo_cookie)
516*dbed73cbSSangeeta Misra 		return (-1);
517*dbed73cbSSangeeta Misra 	tmp += sizeof (cookie);
518*dbed73cbSSangeeta Misra 	bcopy(tmp, &port, sizeof (port));
519*dbed73cbSSangeeta Misra 	if (port != param->port)
520*dbed73cbSSangeeta Misra 		return (-1);
521*dbed73cbSSangeeta Misra 
522*dbed73cbSSangeeta Misra 	/* All matched, it is a response to the echo we sent. */
523*dbed73cbSSangeeta Misra 	return (0);
524*dbed73cbSSangeeta Misra }
525*dbed73cbSSangeeta Misra 
526*dbed73cbSSangeeta Misra /* Verify if an ICMPv6 packet is what we expect. */
527*dbed73cbSSangeeta Misra static int
check_icmp_v6(char * buf,ssize_t rcvd,probe_param_t * param)528*dbed73cbSSangeeta Misra check_icmp_v6(char *buf, ssize_t rcvd, probe_param_t *param)
529*dbed73cbSSangeeta Misra {
530*dbed73cbSSangeeta Misra 	icmp6_t *icmp6h;
531*dbed73cbSSangeeta Misra 
532*dbed73cbSSangeeta Misra 	/* LINTED E_BAD_PTR_CAST_ALIGN */
533*dbed73cbSSangeeta Misra 	icmp6h = (icmp6_t *)(buf);
534*dbed73cbSSangeeta Misra 
535*dbed73cbSSangeeta Misra 	/*
536*dbed73cbSSangeeta Misra 	 * If we sent an UDP probe, check if the packet is a port
537*dbed73cbSSangeeta Misra 	 * unreachable message.
538*dbed73cbSSangeeta Misra 	 *
539*dbed73cbSSangeeta Misra 	 * If we sent an ICMPv6 echo request, check if the packet is a reply.
540*dbed73cbSSangeeta Misra 	 */
541*dbed73cbSSangeeta Misra 	if (param->probe == udp_probe) {
542*dbed73cbSSangeeta Misra 		/* Is the packet large enough for further checking? */
543*dbed73cbSSangeeta Misra 		if (rcvd < sizeof (ip6_t) + ICMP6_MINLEN +
544*dbed73cbSSangeeta Misra 		    sizeof (struct udphdr)) {
545*dbed73cbSSangeeta Misra 			return (-1);
546*dbed73cbSSangeeta Misra 		}
547*dbed73cbSSangeeta Misra 		return (check_icmp_unreach_v6(icmp6h, param));
548*dbed73cbSSangeeta Misra 	} else {
549*dbed73cbSSangeeta Misra 		if (rcvd < ICMP6_MINLEN)
550*dbed73cbSSangeeta Misra 			return (-1);
551*dbed73cbSSangeeta Misra 		return (check_icmp_echo_v6(icmp6h, param));
552*dbed73cbSSangeeta Misra 	}
553*dbed73cbSSangeeta Misra }
554*dbed73cbSSangeeta Misra 
555*dbed73cbSSangeeta Misra /*
556*dbed73cbSSangeeta Misra  * Wait for an ICMP reply indefinitely.  If we get what we expect, return 0.
557*dbed73cbSSangeeta Misra  * If an error happnes, return -1.
558*dbed73cbSSangeeta Misra  */
559*dbed73cbSSangeeta Misra static int
wait_icmp_reply(int af,int recv_sd,struct sockaddr_storage * exp_from,probe_param_t * param)560*dbed73cbSSangeeta Misra wait_icmp_reply(int af, int recv_sd, struct sockaddr_storage *exp_from,
561*dbed73cbSSangeeta Misra     probe_param_t *param)
562*dbed73cbSSangeeta Misra {
563*dbed73cbSSangeeta Misra 	char buf[RECV_PKT_SZ];
564*dbed73cbSSangeeta Misra 	socklen_t from_len;
565*dbed73cbSSangeeta Misra 	ssize_t rcvd;
566*dbed73cbSSangeeta Misra 	int ret;
567*dbed73cbSSangeeta Misra 
568*dbed73cbSSangeeta Misra 	for (;;) {
569*dbed73cbSSangeeta Misra 		if (af == AF_INET) {
570*dbed73cbSSangeeta Misra 			struct sockaddr_in v4_from;
571*dbed73cbSSangeeta Misra 
572*dbed73cbSSangeeta Misra 			from_len = sizeof (v4_from);
573*dbed73cbSSangeeta Misra 			if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0,
574*dbed73cbSSangeeta Misra 			    (struct sockaddr *)&v4_from, &from_len)) < 0) {
575*dbed73cbSSangeeta Misra 				ret = -1;
576*dbed73cbSSangeeta Misra 				break;
577*dbed73cbSSangeeta Misra 			}
578*dbed73cbSSangeeta Misra 
579*dbed73cbSSangeeta Misra 			/* Packet not from our peer, ignore it. */
580*dbed73cbSSangeeta Misra 			if ((((struct sockaddr_in *)exp_from)->sin_addr.s_addr)
581*dbed73cbSSangeeta Misra 			    != v4_from.sin_addr.s_addr) {
582*dbed73cbSSangeeta Misra 				continue;
583*dbed73cbSSangeeta Misra 			}
584*dbed73cbSSangeeta Misra 			if (check_icmp_v4(buf, rcvd, param) == 0) {
585*dbed73cbSSangeeta Misra 				ret = 0;
586*dbed73cbSSangeeta Misra 				break;
587*dbed73cbSSangeeta Misra 			}
588*dbed73cbSSangeeta Misra 		} else {
589*dbed73cbSSangeeta Misra 			struct sockaddr_in6 v6_from;
590*dbed73cbSSangeeta Misra 
591*dbed73cbSSangeeta Misra 			from_len = sizeof (struct sockaddr_in6);
592*dbed73cbSSangeeta Misra 			if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0,
593*dbed73cbSSangeeta Misra 			    (struct sockaddr *)&v6_from, &from_len)) < 0) {
594*dbed73cbSSangeeta Misra 				ret = -1;
595*dbed73cbSSangeeta Misra 				break;
596*dbed73cbSSangeeta Misra 			}
597*dbed73cbSSangeeta Misra 
598*dbed73cbSSangeeta Misra 			if (!IN6_ARE_ADDR_EQUAL(&(v6_from.sin6_addr),
599*dbed73cbSSangeeta Misra 			    &((struct sockaddr_in6 *)exp_from)->sin6_addr)) {
600*dbed73cbSSangeeta Misra 				continue;
601*dbed73cbSSangeeta Misra 			}
602*dbed73cbSSangeeta Misra 			if (check_icmp_v6(buf, rcvd, param) == 0) {
603*dbed73cbSSangeeta Misra 				ret = 0;
604*dbed73cbSSangeeta Misra 				break;
605*dbed73cbSSangeeta Misra 			}
606*dbed73cbSSangeeta Misra 		}
607*dbed73cbSSangeeta Misra 	}
608*dbed73cbSSangeeta Misra 	return (ret);
609*dbed73cbSSangeeta Misra }
610*dbed73cbSSangeeta Misra 
611*dbed73cbSSangeeta Misra /* Return the local port used (network byte order) in a socket. */
612*dbed73cbSSangeeta Misra static int
get_lport(int sd,in_port_t * lport)613*dbed73cbSSangeeta Misra get_lport(int sd, in_port_t *lport)
614*dbed73cbSSangeeta Misra {
615*dbed73cbSSangeeta Misra 	struct sockaddr_storage addr;
616*dbed73cbSSangeeta Misra 	socklen_t addr_sz;
617*dbed73cbSSangeeta Misra 
618*dbed73cbSSangeeta Misra 	addr_sz = sizeof (addr);
619*dbed73cbSSangeeta Misra 	if (getsockname(sd, (struct sockaddr *)&addr, &addr_sz) != 0)
620*dbed73cbSSangeeta Misra 		return (-1);
621*dbed73cbSSangeeta Misra 	if (addr.ss_family == AF_INET)
622*dbed73cbSSangeeta Misra 		*lport = ((struct sockaddr_in *)&addr)->sin_port;
623*dbed73cbSSangeeta Misra 	else
624*dbed73cbSSangeeta Misra 		*lport = ((struct sockaddr_in6 *)&addr)->sin6_port;
625*dbed73cbSSangeeta Misra 	return (0);
626*dbed73cbSSangeeta Misra }
627*dbed73cbSSangeeta Misra 
628*dbed73cbSSangeeta Misra /*
629*dbed73cbSSangeeta Misra  * Use UDP to check if the peer server is alive.  Send a 0 length UDP packet
630*dbed73cbSSangeeta Misra  * to the peer server.  If there is no one listening, the peer IP stack
631*dbed73cbSSangeeta Misra  * should send back a port unreachable ICMP(v4/v6) packet.  If the peer
632*dbed73cbSSangeeta Misra  * server is alive, there should be no response.  So if we get SIGALRM,
633*dbed73cbSSangeeta Misra  * the peer is alive.
634*dbed73cbSSangeeta Misra  */
635*dbed73cbSSangeeta Misra static int
udp_query(probe_param_t * param)636*dbed73cbSSangeeta Misra udp_query(probe_param_t *param)
637*dbed73cbSSangeeta Misra {
638*dbed73cbSSangeeta Misra 	int ret;
639*dbed73cbSSangeeta Misra 	int send_sd, recv_sd, af;
640*dbed73cbSSangeeta Misra 	struct sockaddr_storage dst_addr;
641*dbed73cbSSangeeta Misra 	socklen_t addr_len;
642*dbed73cbSSangeeta Misra 	void *next_hop;
643*dbed73cbSSangeeta Misra 	char buf[1];
644*dbed73cbSSangeeta Misra 	struct itimerval timeout;
645*dbed73cbSSangeeta Misra 	uint64_t tm;
646*dbed73cbSSangeeta Misra 
647*dbed73cbSSangeeta Misra 	ret = 0;
648*dbed73cbSSangeeta Misra 	next_hop = NULL;
649*dbed73cbSSangeeta Misra 
650*dbed73cbSSangeeta Misra 	af = set_sockaddr(&dst_addr, &addr_len, &next_hop, param);
651*dbed73cbSSangeeta Misra 
652*dbed73cbSSangeeta Misra 	if ((send_sd = socket(af, SOCK_DGRAM, param->proto)) == -1)
653*dbed73cbSSangeeta Misra 		return (-1);
654*dbed73cbSSangeeta Misra 	if ((recv_sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP :
655*dbed73cbSSangeeta Misra 	    IPPROTO_ICMPV6)) == -1) {
656*dbed73cbSSangeeta Misra 		return (-1);
657*dbed73cbSSangeeta Misra 	}
658*dbed73cbSSangeeta Misra 
659*dbed73cbSSangeeta Misra 	/* DSR mode, need to set the next hop */
660*dbed73cbSSangeeta Misra 	if (next_hop != NULL) {
661*dbed73cbSSangeeta Misra 		if (af == AF_INET) {
662*dbed73cbSSangeeta Misra 			if (setsockopt(send_sd, IPPROTO_IP, IP_NEXTHOP,
663*dbed73cbSSangeeta Misra 			    next_hop, sizeof (ipaddr_t)) < 0) {
664*dbed73cbSSangeeta Misra 				ret = -1;
665*dbed73cbSSangeeta Misra 				goto out;
666*dbed73cbSSangeeta Misra 			}
667*dbed73cbSSangeeta Misra 		} else {
668*dbed73cbSSangeeta Misra 			if (setsockopt(send_sd, IPPROTO_IPV6, IPV6_NEXTHOP,
669*dbed73cbSSangeeta Misra 			    next_hop, sizeof (struct sockaddr_in6)) < 0) {
670*dbed73cbSSangeeta Misra 				ret = -1;
671*dbed73cbSSangeeta Misra 				goto out;
672*dbed73cbSSangeeta Misra 			}
673*dbed73cbSSangeeta Misra 		}
674*dbed73cbSSangeeta Misra 	}
675*dbed73cbSSangeeta Misra 
676*dbed73cbSSangeeta Misra 	/*
677*dbed73cbSSangeeta Misra 	 * If ilbd asks us to wait at most t, we will wait for at most
678*dbed73cbSSangeeta Misra 	 * t', which is 3/4 of t.  If we wait for too long, ilbd may
679*dbed73cbSSangeeta Misra 	 * timeout and kill us.
680*dbed73cbSSangeeta Misra 	 */
681*dbed73cbSSangeeta Misra 	timeout.it_interval.tv_sec = 0;
682*dbed73cbSSangeeta Misra 	timeout.it_interval.tv_usec = 0;
683*dbed73cbSSangeeta Misra 	tm = (param->timeout * MICROSEC >> 2) * 3;
684*dbed73cbSSangeeta Misra 	if (tm > MICROSEC) {
685*dbed73cbSSangeeta Misra 		timeout.it_value.tv_sec = tm / MICROSEC;
686*dbed73cbSSangeeta Misra 		timeout.it_value.tv_usec = tm - (timeout.it_value.tv_sec *
687*dbed73cbSSangeeta Misra 		    MICROSEC);
688*dbed73cbSSangeeta Misra 	} else {
689*dbed73cbSSangeeta Misra 		timeout.it_value.tv_sec = 0;
690*dbed73cbSSangeeta Misra 		timeout.it_value.tv_usec = tm;
691*dbed73cbSSangeeta Misra 	}
692*dbed73cbSSangeeta Misra 	timeout_is_good = B_TRUE;
693*dbed73cbSSangeeta Misra 	if (setitimer(ITIMER_REAL, &timeout, NULL) != 0) {
694*dbed73cbSSangeeta Misra 		ret = -1;
695*dbed73cbSSangeeta Misra 		goto out;
696*dbed73cbSSangeeta Misra 	}
697*dbed73cbSSangeeta Misra 
698*dbed73cbSSangeeta Misra 	if (sendto(send_sd, buf, 0, 0, (struct sockaddr *)&dst_addr,
699*dbed73cbSSangeeta Misra 	    addr_len) != 0) {
700*dbed73cbSSangeeta Misra 		ret = -1;
701*dbed73cbSSangeeta Misra 		goto out;
702*dbed73cbSSangeeta Misra 	}
703*dbed73cbSSangeeta Misra 	if ((ret = get_lport(send_sd, &param->lport)) != 0)
704*dbed73cbSSangeeta Misra 		goto out;
705*dbed73cbSSangeeta Misra 
706*dbed73cbSSangeeta Misra 	/*
707*dbed73cbSSangeeta Misra 	 * If the server app is listening, we should not get back a
708*dbed73cbSSangeeta Misra 	 * response.  So if wait_icmp_reply() returns, either there
709*dbed73cbSSangeeta Misra 	 * is an error or we get back something.
710*dbed73cbSSangeeta Misra 	 */
711*dbed73cbSSangeeta Misra 	(void) wait_icmp_reply(af, recv_sd, &dst_addr, param);
712*dbed73cbSSangeeta Misra 	ret = -1;
713*dbed73cbSSangeeta Misra 
714*dbed73cbSSangeeta Misra out:
715*dbed73cbSSangeeta Misra 	(void) close(send_sd);
716*dbed73cbSSangeeta Misra 	(void) close(recv_sd);
717*dbed73cbSSangeeta Misra 	return (ret);
718*dbed73cbSSangeeta Misra }
719*dbed73cbSSangeeta Misra 
720*dbed73cbSSangeeta Misra /*
721*dbed73cbSSangeeta Misra  * Size (in uint32_t) of the ping packet to be sent to server.  It includes
722*dbed73cbSSangeeta Misra  * a cookie (random number) + the target port.  The cookie and port are used
723*dbed73cbSSangeeta Misra  * for matching ping request since there can be many such ping packets sent
724*dbed73cbSSangeeta Misra  * to different servers from the same source address and using the same VIP.
725*dbed73cbSSangeeta Misra  * The last two bytes are for padding.
726*dbed73cbSSangeeta Misra  *
727*dbed73cbSSangeeta Misra  */
728*dbed73cbSSangeeta Misra #define	PING_PKT_LEN \
729*dbed73cbSSangeeta Misra 	((ICMP_MINLEN + 2 * sizeof (uint32_t)) / sizeof (uint32_t))
730*dbed73cbSSangeeta Misra 
731*dbed73cbSSangeeta Misra /*
732*dbed73cbSSangeeta Misra  * Try to get a random number from the pseudo random number device
733*dbed73cbSSangeeta Misra  * /dev/urandom.  If there is any error, return (uint32_t)gethrtime()
734*dbed73cbSSangeeta Misra  * as a back up.
735*dbed73cbSSangeeta Misra  */
736*dbed73cbSSangeeta Misra static uint32_t
get_random(void)737*dbed73cbSSangeeta Misra get_random(void)
738*dbed73cbSSangeeta Misra {
739*dbed73cbSSangeeta Misra 	int fd;
740*dbed73cbSSangeeta Misra 	uint32_t num;
741*dbed73cbSSangeeta Misra 
742*dbed73cbSSangeeta Misra 	if ((fd = open("/dev/urandom", O_RDONLY)) == -1)
743*dbed73cbSSangeeta Misra 		return ((uint32_t)gethrtime());
744*dbed73cbSSangeeta Misra 
745*dbed73cbSSangeeta Misra 	if (read(fd, &num, sizeof (num)) != sizeof (num))
746*dbed73cbSSangeeta Misra 		num = ((uint32_t)gethrtime());
747*dbed73cbSSangeeta Misra 
748*dbed73cbSSangeeta Misra 	(void) close(fd);
749*dbed73cbSSangeeta Misra 	return (num);
750*dbed73cbSSangeeta Misra }
751*dbed73cbSSangeeta Misra 
752*dbed73cbSSangeeta Misra /*
753*dbed73cbSSangeeta Misra  * Use ICMP(v4/v6) echo request to check if the peer server machine is
754*dbed73cbSSangeeta Misra  * reachable.  Send a echo request and expect to get back a echo reply.
755*dbed73cbSSangeeta Misra  */
756*dbed73cbSSangeeta Misra static int
ping_query(probe_param_t * param)757*dbed73cbSSangeeta Misra ping_query(probe_param_t *param)
758*dbed73cbSSangeeta Misra {
759*dbed73cbSSangeeta Misra 	int ret;
760*dbed73cbSSangeeta Misra 	int sd, af;
761*dbed73cbSSangeeta Misra 	struct sockaddr_storage dst_addr;
762*dbed73cbSSangeeta Misra 	socklen_t dst_addr_len;
763*dbed73cbSSangeeta Misra 	void *next_hop;
764*dbed73cbSSangeeta Misra 	hrtime_t start, end;
765*dbed73cbSSangeeta Misra 	uint32_t rtt;
766*dbed73cbSSangeeta Misra 	uint32_t buf[PING_PKT_LEN];
767*dbed73cbSSangeeta Misra 	struct icmp *icmph;
768*dbed73cbSSangeeta Misra 
769*dbed73cbSSangeeta Misra 	ret = 0;
770*dbed73cbSSangeeta Misra 	next_hop = NULL;
771*dbed73cbSSangeeta Misra 
772*dbed73cbSSangeeta Misra 	af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param);
773*dbed73cbSSangeeta Misra 
774*dbed73cbSSangeeta Misra 	if ((sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP :
775*dbed73cbSSangeeta Misra 	    IPPROTO_ICMPV6)) == -1) {
776*dbed73cbSSangeeta Misra 		return (-1);
777*dbed73cbSSangeeta Misra 	}
778*dbed73cbSSangeeta Misra 
779*dbed73cbSSangeeta Misra 	/* DSR mode, need to set the next hop */
780*dbed73cbSSangeeta Misra 	if (next_hop != NULL) {
781*dbed73cbSSangeeta Misra 		if (af == AF_INET) {
782*dbed73cbSSangeeta Misra 			if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop,
783*dbed73cbSSangeeta Misra 			    sizeof (ipaddr_t)) < 0) {
784*dbed73cbSSangeeta Misra 				ret = -1;
785*dbed73cbSSangeeta Misra 				goto out;
786*dbed73cbSSangeeta Misra 			}
787*dbed73cbSSangeeta Misra 		} else {
788*dbed73cbSSangeeta Misra 			if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP,
789*dbed73cbSSangeeta Misra 			    next_hop, sizeof (struct sockaddr_in6)) < 0) {
790*dbed73cbSSangeeta Misra 				ret = -1;
791*dbed73cbSSangeeta Misra 				goto out;
792*dbed73cbSSangeeta Misra 			}
793*dbed73cbSSangeeta Misra 		}
794*dbed73cbSSangeeta Misra 	}
795*dbed73cbSSangeeta Misra 
796*dbed73cbSSangeeta Misra 	bzero(buf, sizeof (buf));
797*dbed73cbSSangeeta Misra 	icmph = (struct icmp *)buf;
798*dbed73cbSSangeeta Misra 	icmph->icmp_type = af == AF_INET ? ICMP_ECHO : ICMP6_ECHO_REQUEST;
799*dbed73cbSSangeeta Misra 	icmph->icmp_code = 0;
800*dbed73cbSSangeeta Misra 	icmph->icmp_cksum = 0;
801*dbed73cbSSangeeta Misra 	icmph->icmp_id = htons(gethrtime() % USHRT_MAX);
802*dbed73cbSSangeeta Misra 	icmph->icmp_seq = htons(gethrtime() % USHRT_MAX);
803*dbed73cbSSangeeta Misra 
804*dbed73cbSSangeeta Misra 	param->echo_cookie = get_random();
805*dbed73cbSSangeeta Misra 	bcopy(&param->echo_cookie, icmph->icmp_data,
806*dbed73cbSSangeeta Misra 	    sizeof (param->echo_cookie));
807*dbed73cbSSangeeta Misra 	bcopy(&param->port, icmph->icmp_data + sizeof (param->echo_cookie),
808*dbed73cbSSangeeta Misra 	    sizeof (param->port));
809*dbed73cbSSangeeta Misra 	icmph->icmp_cksum = in_cksum((ushort_t *)buf, sizeof (buf));
810*dbed73cbSSangeeta Misra 	param->echo_id = icmph->icmp_id;
811