1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/socket.h>
29 #include <sys/time.h>
30 
31 #include <netinet/in_systm.h>
32 #include <netinet/in.h>
33 #include <netinet/ip.h>
34 #include <netinet/ip6.h>
35 #include <arpa/inet.h>
36 #include <netinet/tcp.h>
37 #include <netinet/ip_icmp.h>
38 #include <netinet/icmp6.h>
39 #include <netinet/udp.h>
40 #include <netdb.h>
41 #include <unistd.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <strings.h>
45 #include <errno.h>
46 #include <limits.h>
47 #include <signal.h>
48 #include <libgen.h>
49 #include <fcntl.h>
50 
51 /*
52  * The following values are what ilbd will set argv[0] to.  This determines
53  * what type of probe to send out.
54  */
55 #define	PROBE_PING	"ilb_ping"
56 #define	PROBE_PROTO	"ilb_probe"
57 
58 /* The transport protocol to use in the probe.  Value of argv[3]. */
59 #define	PROTO_TCP	"TCP"
60 #define	PROTO_UDP	"UDP"
61 
62 enum probe_type { ping_probe, tcp_probe, udp_probe };
63 
64 /* Load balance mode.  Value of argv[4]. */
65 #define	MODE_DSR	"DSR"
66 #define	MODE_NAT	"NAT"
67 #define	MODE_HALF_NAT	"HALF_NAT"
68 
69 enum lb_mode { dsr, nat, half_nat };
70 
71 /* Number of arguments to the command from ilbd. */
72 #define	PROG_ARGC	7
73 
74 /* Size of buffer used to receive ICMP packet */
75 #define	RECV_PKT_SZ	256
76 
77 /*
78  * Struct to store the probe info (most is passed in using the argv[] array to
79  * the command given by ilbd).  The argv[] contains the following.
80  *
81  * argv[0] is either PROBE_PING or PROBE_PROTO
82  * argv[1] is the VIP
83  * argv[2] is the backend server address
84  * argv[3] is the transport protocol used in the rule
85  * argv[4] is the load balance mode, "DSR", "NAT", "HALF-NAT"
86  * argv[5] is the probe port
87  * argv[6] is the probe timeout
88  *
89  * The following three fields are used in sending ICMP ECHO probe.
90  *
91  * echo_id is the ID set in the probe
92  * echo_seq is the sequence set in the probe
93  * echo_cookie is the random number data in a probe
94  * lport is the local port (in network byte order) used to send the probe
95  */
96 typedef struct {
97 	enum probe_type		probe;
98 	struct in6_addr		vip;		/* argv[1] */
99 	struct in6_addr		srv_addr;	/* argv[2] */
100 	int			proto;		/* argv[3] */
101 	enum lb_mode		mode;		/* argv[4] */
102 	in_port_t		port;		/* argv[5] */
103 	uint32_t		timeout;	/* argv[6] */
104 
105 	uint16_t		echo_id;
106 	uint16_t		echo_seq;
107 	uint32_t		echo_cookie;
108 	in_port_t		lport;
109 } probe_param_t;
110 
111 /* Global variable to indicate whether a timeout means success. */
112 static boolean_t timeout_is_good;
113 
114 /* SIGALRM handler */
115 /* ARGSUSED */
116 static void
probe_exit(int s)117 probe_exit(int s)
118 {
119 	if (timeout_is_good) {
120 		(void) printf("0");
121 		exit(0);
122 	} else {
123 		(void) printf("-1");
124 		exit(255);
125 	}
126 }
127 
128 /*
129  * Checksum routine for Internet Protocol family headers (C Version)
130  * (copied from ping.c)
131  */
132 static ushort_t
in_cksum(ushort_t * addr,int len)133 in_cksum(ushort_t *addr, int len)
134 {
135 	int nleft = len;
136 	ushort_t *w = addr;
137 	ushort_t answer;
138 	ushort_t odd_byte = 0;
139 	int sum = 0;
140 
141 	/*
142 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
143 	 *  we add sequential 16 bit words to it, and at the end, fold
144 	 *  back all the carry bits from the top 16 bits into the lower
145 	 *  16 bits.
146 	 */
147 	while (nleft > 1) {
148 		sum += *w++;
149 		nleft -= 2;
150 	}
151 
152 	/* mop up an odd byte, if necessary */
153 	if (nleft == 1) {
154 		*(uchar_t *)(&odd_byte) = *(uchar_t *)w;
155 		sum += odd_byte;
156 	}
157 
158 	/*
159 	 * add back carry outs from top 16 bits to low 16 bits
160 	 */
161 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
162 	sum += (sum >> 16);			/* add carry */
163 	answer = ~sum;				/* truncate to 16 bits */
164 	return (answer);
165 }
166 
167 /* It is assumed that argv[] contains PROBE_ARGC arguments. */
168 static boolean_t
parse_probe_param(char * argv[],probe_param_t * param)169 parse_probe_param(char *argv[], probe_param_t *param)
170 {
171 	int32_t port;
172 	int64_t timeout;
173 	struct in_addr v4addr;
174 
175 	if (strcmp(basename(argv[0]), PROBE_PING) == 0) {
176 		param->probe = ping_probe;
177 	} else {
178 		if (strcmp(basename(argv[0]), PROBE_PROTO) != 0)
179 			return (B_FALSE);
180 
181 		if (strcasecmp(argv[3], PROTO_TCP) == 0) {
182 			param->probe = tcp_probe;
183 			param->proto = IPPROTO_TCP;
184 		} else if (strcasecmp(argv[3], PROTO_UDP) == 0) {
185 			param->probe = udp_probe;
186 			param->proto = IPPROTO_UDP;
187 		} else {
188 			return (B_FALSE);
189 		}
190 	}
191 
192 	if (strchr(argv[1], ':') != NULL) {
193 		if (inet_pton(AF_INET6, argv[1], &param->vip) == 0)
194 			return (B_FALSE);
195 	} else if (strchr(argv[1], '.') != NULL) {
196 		if (inet_pton(AF_INET, argv[1], &v4addr) == 0)
197 			return (B_FALSE);
198 		IN6_INADDR_TO_V4MAPPED(&v4addr, &param->vip);
199 	} else {
200 		return (B_FALSE);
201 	}
202 
203 	/*
204 	 * The address family of vip and srv_addr should be the same for
205 	 * now.  But in future, we may allow them to be different...  So
206 	 * we don't do a check here.
207 	 */
208 	if (strchr(argv[2], ':') != NULL) {
209 		if (inet_pton(AF_INET6, argv[2], &param->srv_addr) == 0)
210 			return (B_FALSE);
211 	} else if (strchr(argv[2], '.') != NULL) {
212 		if (inet_pton(AF_INET, argv[2], &v4addr) == 0)
213 			return (B_FALSE);
214 		IN6_INADDR_TO_V4MAPPED(&v4addr, &param->srv_addr);
215 	} else {
216 		return (B_FALSE);
217 	}
218 
219 	if (strcasecmp(argv[4], MODE_DSR) == 0)
220 		param->mode = dsr;
221 	else if (strcasecmp(argv[4], MODE_NAT) == 0)
222 		param->mode = nat;
223 	else if (strcasecmp(argv[4], MODE_HALF_NAT) == 0)
224 		param->mode = half_nat;
225 	else
226 		return (B_FALSE);
227 
228 	if ((port = atoi(argv[5])) <= 0 || port > USHRT_MAX)
229 		return (B_FALSE);
230 	param->port = port;
231 
232 	if ((timeout = strtoll(argv[6], NULL, 10)) <= 0 || timeout > UINT_MAX)
233 		return (B_FALSE);
234 	param->timeout = timeout;
235 
236 	return (B_TRUE);
237 }
238 
239 /*
240  * Set up the destination address to be used to send a probe based on
241  * param.
242  */
243 static int
set_sockaddr(struct sockaddr_storage * addr,socklen_t * addr_len,void ** next_hop,probe_param_t * param)244 set_sockaddr(struct sockaddr_storage *addr, socklen_t *addr_len,
245     void **next_hop, probe_param_t *param)
246 {
247 	int af;
248 	struct in6_addr *param_addr;
249 	struct sockaddr_in *v4_addr;
250 	struct sockaddr_in6 *v6_addr;
251 	boolean_t nh = B_FALSE;
252 
253 	switch (param->mode) {
254 	case dsr:
255 		param_addr = &param->vip;
256 		nh = B_TRUE;
257 		break;
258 	case nat:
259 	case half_nat:
260 		param_addr = &param->srv_addr;
261 		break;
262 	}
263 	if (IN6_IS_ADDR_V4MAPPED(param_addr)) {
264 		af = AF_INET;
265 		v4_addr = (struct sockaddr_in *)addr;
266 		IN6_V4MAPPED_TO_INADDR(param_addr, &v4_addr->sin_addr);
267 		v4_addr->sin_family = AF_INET;
268 		v4_addr->sin_port = htons(param->port);
269 
270 		*addr_len = sizeof (*v4_addr);
271 	} else {
272 		af = AF_INET6;
273 		v6_addr = (struct sockaddr_in6 *)addr;
274 		v6_addr->sin6_family = AF_INET6;
275 		v6_addr->sin6_addr = *param_addr;
276 		v6_addr->sin6_port = htons(param->port);
277 		v6_addr->sin6_flowinfo = 0;
278 		v6_addr->sin6_scope_id = 0;
279 
280 		*addr_len = sizeof (*v6_addr);
281 	}
282 
283 	if (!nh) {
284 		*next_hop = NULL;
285 		return (af);
286 	}
287 
288 	if (af == AF_INET) {
289 		ipaddr_t *nh_addr;
290 
291 		nh_addr = malloc(sizeof (ipaddr_t));
292 		IN6_V4MAPPED_TO_IPADDR(&param->srv_addr, *nh_addr);
293 		*next_hop = nh_addr;
294 	} else {
295 		struct sockaddr_in6 *nh_addr;
296 
297 		nh_addr = malloc(sizeof (*nh_addr));
298 		nh_addr->sin6_family = AF_INET6;
299 		nh_addr->sin6_addr = param->srv_addr;
300 		nh_addr->sin6_flowinfo = 0;
301 		nh_addr->sin6_scope_id = 0;
302 		*next_hop = nh_addr;
303 	}
304 
305 	return (af);
306 }
307 
308 /*
309  * Use TCP to check if the peer server is alive.  Create a TCP socket and
310  * then call connect() to reach the peer server.  If connect() does not
311  * return within the timeout period, the SIGALRM handler will be invoked
312  * and tell ilbd that the peer server is not alive.
313  */
314 static int
tcp_query(probe_param_t * param)315 tcp_query(probe_param_t *param)
316 {
317 	int ret;
318 	int sd, af;
319 	struct sockaddr_storage dst_addr;
320 	socklen_t dst_addr_len;
321 	void *next_hop;
322 	hrtime_t start, end;
323 	uint32_t rtt;
324 
325 	ret = 0;
326 	next_hop = NULL;
327 
328 	af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param);
329 
330 	if ((sd = socket(af, SOCK_STREAM, param->proto)) == -1)
331 		return (-1);
332 
333 	/* DSR mode, need to set the next hop */
334 	if (next_hop != NULL) {
335 		if (af == AF_INET) {
336 			if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop,
337 			    sizeof (ipaddr_t)) < 0) {
338 				ret = -1;
339 				goto out;
340 			}
341 		} else {
342 			if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP,
343 			    next_hop, sizeof (struct sockaddr_in6)) < 0) {
344 				ret = -1;
345 				goto out;
346 			}
347 		}
348 	}
349 
350 	timeout_is_good = B_FALSE;
351 	(void) alarm(param->timeout);
352 	start = gethrtime();
353 	if (connect(sd, (struct sockaddr *)&dst_addr, dst_addr_len) != 0) {
354 		ret = -1;
355 		goto out;
356 	}
357 	end = gethrtime();
358 
359 	rtt = (end - start) / (NANOSEC / MICROSEC);
360 	if (rtt == 0)
361 		rtt = 1;
362 	(void) printf("%u", rtt);
363 
364 out:
365 	(void) close(sd);
366 	return (ret);
367 }
368 
369 /*
370  * Check if the ICMP packet is a port unreachable message in respnsed to
371  * our probe.  Return -1 if no, 0 if yes.
372  */
373 static int
check_icmp_unreach_v4(struct icmp * icmph,probe_param_t * param)374 check_icmp_unreach_v4(struct icmp *icmph, probe_param_t *param)
375 {
376 	struct udphdr *udph;
377 	struct ip *iph;
378 
379 	if (icmph->icmp_type != ICMP_UNREACH)
380 		return (-1);
381 	if (icmph->icmp_code != ICMP_UNREACH_PORT)
382 		return (-1);
383 
384 	/* LINTED E_BAD_PTR_CAST_ALIGN */
385 	iph = (struct ip *)((char *)icmph + ICMP_MINLEN);
386 	if (iph->ip_p != IPPROTO_UDP)
387 		return (-1);
388 
389 	/* LINTED E_BAD_PTR_CAST_ALIGN */
390 	udph = (struct udphdr *)((char *)iph + (iph->ip_hl << 2));
391 	if (udph->uh_dport != htons(param->port))
392 		return (-1);
393 	if (udph->uh_sport != param->lport)
394 		return (-1);
395 
396 	/* All matched, it is a response to the probe we sent. */
397 	return (0);
398 }
399 
400 /*
401  * Check if the ICMP packet is a reply to our echo request.  Need to match
402  * the ID and sequence.
403  */
404 static int
check_icmp_echo_v4(struct icmp * icmph,probe_param_t * param)405 check_icmp_echo_v4(struct icmp *icmph, probe_param_t *param)
406 {
407 	uint32_t cookie;
408 	in_port_t port;
409 
410 	if (icmph->icmp_type != ICMP_ECHOREPLY)
411 		return (-1);
412 	if (icmph->icmp_id != param->echo_id)
413 		return (-1);
414 	if (icmph->icmp_seq != param->echo_seq)
415 		return (-1);
416 
417 	bcopy(icmph->icmp_data, &cookie, sizeof (cookie));
418 	if (cookie != param->echo_cookie)
419 		return (-1);
420 	bcopy(icmph->icmp_data + sizeof (cookie), &port, sizeof (port));
421 	if (port != param->port)
422 		return (-1);
423 
424 	/* All matched, it is a response to the echo we sent. */
425 	return (0);
426 }
427 
428 /* Verify if an ICMP packet is what we expect. */
429 static int
check_icmp_v4(char * buf,ssize_t rcvd,probe_param_t * param)430 check_icmp_v4(char *buf, ssize_t rcvd, probe_param_t *param)
431 {
432 	struct ip *iph;
433 	struct icmp *icmph;
434 
435 	/*
436 	 * We can dereference the length field without worry since the stack
437 	 * should not have sent up the packet if it is smaller than a normal
438 	 * ICMPv4 packet.
439 	 */
440 	/* LINTED E_BAD_PTR_CAST_ALIGN */
441 	iph = (struct ip *)buf;
442 	/* LINTED E_BAD_PTR_CAST_ALIGN */
443 	icmph = (struct icmp *)((char *)iph + (iph->ip_hl << 2));
444 
445 	/*
446 	 * If we sent an UDP probe, check if the packet is a port
447 	 * unreachable message in response to our probe.
448 	 *
449 	 * If we sent an ICMP echo request, check if the packet is a reply
450 	 * to our echo request.
451 	 */
452 	if (param->probe == udp_probe) {
453 		/* Is the packet large enough for further checking? */
454 		if (rcvd < 2 * sizeof (struct ip) + ICMP_MINLEN +
455 		    sizeof (struct udphdr)) {
456 			return (-1);
457 		}
458 		return (check_icmp_unreach_v4(icmph, param));
459 	} else {
460 		if (rcvd < sizeof (struct ip) + ICMP_MINLEN)
461 			return (-1);
462 		return (check_icmp_echo_v4(icmph, param));
463 	}
464 }
465 
466 /*
467  * Check if the ICMPv6 packet is a port unreachable message in respnsed to
468  * our probe.  Return -1 if no, 0 if yes.
469  */
470 static int
check_icmp_unreach_v6(icmp6_t * icmp6h,probe_param_t * param)471 check_icmp_unreach_v6(icmp6_t *icmp6h, probe_param_t *param)
472 {
473 	ip6_t *ip6h;
474 	struct udphdr *udph;
475 
476 	if (icmp6h->icmp6_type != ICMP6_DST_UNREACH)
477 		return (-1);
478 	if (icmp6h->icmp6_code != ICMP6_DST_UNREACH_NOPORT)
479 		return (-1);
480 
481 	/* LINTED E_BAD_PTR_CAST_ALIGN */
482 	ip6h = (ip6_t *)((char *)icmp6h + ICMP6_MINLEN);
483 	if (ip6h->ip6_nxt != IPPROTO_UDP)
484 		return (-1);
485 
486 	udph = (struct udphdr *)(ip6h + 1);
487 
488 	if (udph->uh_dport != htons(param->port))
489 		return (-1);
490 	if (udph->uh_sport != param->lport)
491 		return (-1);
492 
493 	return (0);
494 }
495 
496 /*
497  * Check if the ICMPv6 packet is a reply to our echo request.  Need to match
498  * the ID and sequence.
499  */
500 static int
check_icmp_echo_v6(icmp6_t * icmp6h,probe_param_t * param)501 check_icmp_echo_v6(icmp6_t *icmp6h, probe_param_t *param)
502 {
503 	char *tmp;
504 	uint32_t cookie;
505 	in_port_t port;
506 
507 	if (icmp6h->icmp6_type != ICMP6_ECHO_REPLY)
508 		return (-1);
509 	if (icmp6h->icmp6_id != param->echo_id)
510 		return (-1);
511 	if (icmp6h->icmp6_seq != param->echo_seq)
512 		return (-1);
513 	tmp = (char *)icmp6h + ICMP6_MINLEN;
514 	bcopy(tmp, &cookie, sizeof (cookie));
515 	if (cookie != param->echo_cookie)
516 		return (-1);
517 	tmp += sizeof (cookie);
518 	bcopy(tmp, &port, sizeof (port));
519 	if (port != param->port)
520 		return (-1);
521 
522 	/* All matched, it is a response to the echo we sent. */
523 	return (0);
524 }
525 
526 /* Verify if an ICMPv6 packet is what we expect. */
527 static int
check_icmp_v6(char * buf,ssize_t rcvd,probe_param_t * param)528 check_icmp_v6(char *buf, ssize_t rcvd, probe_param_t *param)
529 {
530 	icmp6_t *icmp6h;
531 
532 	/* LINTED E_BAD_PTR_CAST_ALIGN */
533 	icmp6h = (icmp6_t *)(buf);
534 
535 	/*
536 	 * If we sent an UDP probe, check if the packet is a port
537 	 * unreachable message.
538 	 *
539 	 * If we sent an ICMPv6 echo request, check if the packet is a reply.
540 	 */
541 	if (param->probe == udp_probe) {
542 		/* Is the packet large enough for further checking? */
543 		if (rcvd < sizeof (ip6_t) + ICMP6_MINLEN +
544 		    sizeof (struct udphdr)) {
545 			return (-1);
546 		}
547 		return (check_icmp_unreach_v6(icmp6h, param));
548 	} else {
549 		if (rcvd < ICMP6_MINLEN)
550 			return (-1);
551 		return (check_icmp_echo_v6(icmp6h, param));
552 	}
553 }
554 
555 /*
556  * Wait for an ICMP reply indefinitely.  If we get what we expect, return 0.
557  * If an error happnes, return -1.
558  */
559 static int
wait_icmp_reply(int af,int recv_sd,struct sockaddr_storage * exp_from,probe_param_t * param)560 wait_icmp_reply(int af, int recv_sd, struct sockaddr_storage *exp_from,
561     probe_param_t *param)
562 {
563 	char buf[RECV_PKT_SZ];
564 	socklen_t from_len;
565 	ssize_t rcvd;
566 	int ret;
567 
568 	for (;;) {
569 		if (af == AF_INET) {
570 			struct sockaddr_in v4_from;
571 
572 			from_len = sizeof (v4_from);
573 			if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0,
574 			    (struct sockaddr *)&v4_from, &from_len)) < 0) {
575 				ret = -1;
576 				break;
577 			}
578 
579 			/* Packet not from our peer, ignore it. */
580 			if ((((struct sockaddr_in *)exp_from)->sin_addr.s_addr)
581 			    != v4_from.sin_addr.s_addr) {
582 				continue;
583 			}
584 			if (check_icmp_v4(buf, rcvd, param) == 0) {
585 				ret = 0;
586 				break;
587 			}
588 		} else {
589 			struct sockaddr_in6 v6_from;
590 
591 			from_len = sizeof (struct sockaddr_in6);
592 			if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0,
593 			    (struct sockaddr *)&v6_from, &from_len)) < 0) {
594 				ret = -1;
595 				break;
596 			}
597 
598 			if (!IN6_ARE_ADDR_EQUAL(&(v6_from.sin6_addr),
599 			    &((struct sockaddr_in6 *)exp_from)->sin6_addr)) {
600 				continue;
601 			}
602 			if (check_icmp_v6(buf, rcvd, param) == 0) {
603 				ret = 0;
604 				break;
605 			}
606 		}
607 	}
608 	return (ret);
609 }
610 
611 /* Return the local port used (network byte order) in a socket. */
612 static int
get_lport(int sd,in_port_t * lport)613 get_lport(int sd, in_port_t *lport)
614 {
615 	struct sockaddr_storage addr;
616 	socklen_t addr_sz;
617 
618 	addr_sz = sizeof (addr);
619 	if (getsockname(sd, (struct sockaddr *)&addr, &addr_sz) != 0)
620 		return (-1);
621 	if (addr.ss_family == AF_INET)
622 		*lport = ((struct sockaddr_in *)&addr)->sin_port;
623 	else
624 		*lport = ((struct sockaddr_in6 *)&addr)->sin6_port;
625 	return (0);
626 }
627 
628 /*
629  * Use UDP to check if the peer server is alive.  Send a 0 length UDP packet
630  * to the peer server.  If there is no one listening, the peer IP stack
631  * should send back a port unreachable ICMP(v4/v6) packet.  If the peer
632  * server is alive, there should be no response.  So if we get SIGALRM,
633  * the peer is alive.
634  */
635 static int
udp_query(probe_param_t * param)636 udp_query(probe_param_t *param)
637 {
638 	int ret;
639 	int send_sd, recv_sd, af;
640 	struct sockaddr_storage dst_addr;
641 	socklen_t addr_len;
642 	void *next_hop;
643 	char buf[1];
644 	struct itimerval timeout;
645 	uint64_t tm;
646 
647 	ret = 0;
648 	next_hop = NULL;
649 
650 	af = set_sockaddr(&dst_addr, &addr_len, &next_hop, param);
651 
652 	if ((send_sd = socket(af, SOCK_DGRAM, param->proto)) == -1)
653 		return (-1);
654 	if ((recv_sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP :
655 	    IPPROTO_ICMPV6)) == -1) {
656 		return (-1);
657 	}
658 
659 	/* DSR mode, need to set the next hop */
660 	if (next_hop != NULL) {
661 		if (af == AF_INET) {
662 			if (setsockopt(send_sd, IPPROTO_IP, IP_NEXTHOP,
663 			    next_hop, sizeof (ipaddr_t)) < 0) {
664 				ret = -1;
665 				goto out;
666 			}
667 		} else {
668 			if (setsockopt(send_sd, IPPROTO_IPV6, IPV6_NEXTHOP,
669 			    next_hop, sizeof (struct sockaddr_in6)) < 0) {
670 				ret = -1;
671 				goto out;
672 			}
673 		}
674 	}
675 
676 	/*
677 	 * If ilbd asks us to wait at most t, we will wait for at most
678 	 * t', which is 3/4 of t.  If we wait for too long, ilbd may
679 	 * timeout and kill us.
680 	 */
681 	timeout.it_interval.tv_sec = 0;
682 	timeout.it_interval.tv_usec = 0;
683 	tm = (param->timeout * MICROSEC >> 2) * 3;
684 	if (tm > MICROSEC) {
685 		timeout.it_value.tv_sec = tm / MICROSEC;
686 		timeout.it_value.tv_usec = tm - (timeout.it_value.tv_sec *
687 		    MICROSEC);
688 	} else {
689 		timeout.it_value.tv_sec = 0;
690 		timeout.it_value.tv_usec = tm;
691 	}
692 	timeout_is_good = B_TRUE;
693 	if (setitimer(ITIMER_REAL, &timeout, NULL) != 0) {
694 		ret = -1;
695 		goto out;
696 	}
697 
698 	if (sendto(send_sd, buf, 0, 0, (struct sockaddr *)&dst_addr,
699 	    addr_len) != 0) {
700 		ret = -1;
701 		goto out;
702 	}
703 	if ((ret = get_lport(send_sd, &param->lport)) != 0)
704 		goto out;
705 
706 	/*
707 	 * If the server app is listening, we should not get back a
708 	 * response.  So if wait_icmp_reply() returns, either there
709 	 * is an error or we get back something.
710 	 */
711 	(void) wait_icmp_reply(af, recv_sd, &dst_addr, param);
712 	ret = -1;
713 
714 out:
715 	(void) close(send_sd);
716 	(void) close(recv_sd);
717 	return (ret);
718 }
719 
720 /*
721  * Size (in uint32_t) of the ping packet to be sent to server.  It includes
722  * a cookie (random number) + the target port.  The cookie and port are used
723  * for matching ping request since there can be many such ping packets sent
724  * to different servers from the same source address and using the same VIP.
725  * The last two bytes are for padding.
726  *
727  */
728 #define	PING_PKT_LEN \
729 	((ICMP_MINLEN + 2 * sizeof (uint32_t)) / sizeof (uint32_t))
730 
731 /*
732  * Try to get a random number from the pseudo random number device
733  * /dev/urandom.  If there is any error, return (uint32_t)gethrtime()
734  * as a back up.
735  */
736 static uint32_t
get_random(void)737 get_random(void)
738 {
739 	int fd;
740 	uint32_t num;
741 
742 	if ((fd = open("/dev/urandom", O_RDONLY)) == -1)
743 		return ((uint32_t)gethrtime());
744 
745 	if (read(fd, &num, sizeof (num)) != sizeof (num))
746 		num = ((uint32_t)gethrtime());
747 
748 	(void) close(fd);
749 	return (num);
750 }
751 
752 /*
753  * Use ICMP(v4/v6) echo request to check if the peer server machine is
754  * reachable.  Send a echo request and expect to get back a echo reply.
755  */
756 static int
ping_query(probe_param_t * param)757 ping_query(probe_param_t *param)
758 {
759 	int ret;
760 	int sd, af;
761 	struct sockaddr_storage dst_addr;
762 	socklen_t dst_addr_len;
763 	void *next_hop;
764 	hrtime_t start, end;
765 	uint32_t rtt;
766 	uint32_t buf[PING_PKT_LEN];
767 	struct icmp *icmph;
768 
769 	ret = 0;
770 	next_hop = NULL;
771 
772 	af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param);
773 
774 	if ((sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP :
775 	    IPPROTO_ICMPV6)) == -1) {
776 		return (-1);
777 	}
778 
779 	/* DSR mode, need to set the next hop */
780 	if (next_hop != NULL) {
781 		if (af == AF_INET) {
782 			if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop,
783 			    sizeof (ipaddr_t)) < 0) {
784 				ret = -1;
785 				goto out;
786 			}
787 		} else {
788 			if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP,
789 			    next_hop, sizeof (struct sockaddr_in6)) < 0) {
790 				ret = -1;
791 				goto out;
792 			}
793 		}
794 	}
795 
796 	bzero(buf, sizeof (buf));
797 	icmph = (struct icmp *)buf;
798 	icmph->icmp_type = af == AF_INET ? ICMP_ECHO : ICMP6_ECHO_REQUEST;
799 	icmph->icmp_code = 0;
800 	icmph->icmp_cksum = 0;
801 	icmph->icmp_id = htons(gethrtime() % USHRT_MAX);
802 	icmph->icmp_seq = htons(gethrtime() % USHRT_MAX);
803 
804 	param->echo_cookie = get_random();
805 	bcopy(&param->echo_cookie, icmph->icmp_data,
806 	    sizeof (param->echo_cookie));
807 	bcopy(&param->port, icmph->icmp_data + sizeof (param->echo_cookie),
808 	    sizeof (param->port));
809 	icmph->icmp_cksum = in_cksum((ushort_t *)buf, sizeof (buf));
810 	param->echo_id = icmph->icmp_id;
811 	param->echo_seq = icmph->icmp_seq;
812 
813 	timeout_is_good = B_FALSE;
814 	(void) alarm(param->timeout);
815 	start = gethrtime();
816 	if (sendto(sd, buf, sizeof (buf), 0, (struct sockaddr *)&dst_addr,
817 	    dst_addr_len) != sizeof (buf)) {
818 		ret = -1;
819 		goto out;
820 	}
821 	if (wait_icmp_reply(af, sd, &dst_addr, param) != 0) {
822 		ret = -1;
823 		goto out;
824 	}
825 	end = gethrtime();
826 
827 	rtt = (end - start) / (NANOSEC / MICROSEC);
828 	if (rtt == 0)
829 		rtt = 1;
830 	(void) printf("%u", rtt);
831 
832 out:
833 	(void) close(sd);
834 	return (ret);
835 }
836 
837 int
main(int argc,char * argv[])838 main(int argc, char *argv[])
839 {
840 	probe_param_t param;
841 	int ret;
842 
843 	/* ilbd should pass in PROG_ARGC parameters. */
844 	if (argc != PROG_ARGC) {
845 		(void) printf("-1");
846 		return (-1);
847 	}
848 
849 	if (signal(SIGALRM, probe_exit) == SIG_ERR) {
850 		(void) printf("-1");
851 		return (-1);
852 	}
853 
854 	if (!parse_probe_param(argv, &param)) {
855 		(void) printf("-1");
856 		return (-1);
857 	}
858 
859 	switch (param.probe) {
860 	case ping_probe:
861 		ret = ping_query(&param);
862 		break;
863 	case tcp_probe:
864 		ret = tcp_query(&param);
865 		break;
866 	case udp_probe:
867 		ret = udp_query(&param);
868 		break;
869 	}
870 
871 	if (ret == -1)
872 		(void) printf("-1");
873 
874 	return (ret);
875 }
876