1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/types.h>
28#include <sys/socket.h>
29#include <sys/time.h>
30
31#include <netinet/in_systm.h>
32#include <netinet/in.h>
33#include <netinet/ip.h>
34#include <netinet/ip6.h>
35#include <arpa/inet.h>
36#include <netinet/tcp.h>
37#include <netinet/ip_icmp.h>
38#include <netinet/icmp6.h>
39#include <netinet/udp.h>
40#include <netdb.h>
41#include <unistd.h>
42#include <stdio.h>
43#include <stdlib.h>
44#include <strings.h>
45#include <errno.h>
46#include <limits.h>
47#include <signal.h>
48#include <libgen.h>
49#include <fcntl.h>
50
51/*
52 * The following values are what ilbd will set argv[0] to.  This determines
53 * what type of probe to send out.
54 */
55#define	PROBE_PING	"ilb_ping"
56#define	PROBE_PROTO	"ilb_probe"
57
58/* The transport protocol to use in the probe.  Value of argv[3]. */
59#define	PROTO_TCP	"TCP"
60#define	PROTO_UDP	"UDP"
61
62enum probe_type { ping_probe, tcp_probe, udp_probe };
63
64/* Load balance mode.  Value of argv[4]. */
65#define	MODE_DSR	"DSR"
66#define	MODE_NAT	"NAT"
67#define	MODE_HALF_NAT	"HALF_NAT"
68
69enum lb_mode { dsr, nat, half_nat };
70
71/* Number of arguments to the command from ilbd. */
72#define	PROG_ARGC	7
73
74/* Size of buffer used to receive ICMP packet */
75#define	RECV_PKT_SZ	256
76
77/*
78 * Struct to store the probe info (most is passed in using the argv[] array to
79 * the command given by ilbd).  The argv[] contains the following.
80 *
81 * argv[0] is either PROBE_PING or PROBE_PROTO
82 * argv[1] is the VIP
83 * argv[2] is the backend server address
84 * argv[3] is the transport protocol used in the rule
85 * argv[4] is the load balance mode, "DSR", "NAT", "HALF-NAT"
86 * argv[5] is the probe port
87 * argv[6] is the probe timeout
88 *
89 * The following three fields are used in sending ICMP ECHO probe.
90 *
91 * echo_id is the ID set in the probe
92 * echo_seq is the sequence set in the probe
93 * echo_cookie is the random number data in a probe
94 * lport is the local port (in network byte order) used to send the probe
95 */
96typedef struct {
97	enum probe_type		probe;
98	struct in6_addr		vip;		/* argv[1] */
99	struct in6_addr		srv_addr;	/* argv[2] */
100	int			proto;		/* argv[3] */
101	enum lb_mode		mode;		/* argv[4] */
102	in_port_t		port;		/* argv[5] */
103	uint32_t		timeout;	/* argv[6] */
104
105	uint16_t		echo_id;
106	uint16_t		echo_seq;
107	uint32_t		echo_cookie;
108	in_port_t		lport;
109} probe_param_t;
110
111/* Global variable to indicate whether a timeout means success. */
112static boolean_t timeout_is_good;
113
114/* SIGALRM handler */
115/* ARGSUSED */
116static void
117probe_exit(int s)
118{
119	if (timeout_is_good) {
120		(void) printf("0");
121		exit(0);
122	} else {
123		(void) printf("-1");
124		exit(255);
125	}
126}
127
128/*
129 * Checksum routine for Internet Protocol family headers (C Version)
130 * (copied from ping.c)
131 */
132static ushort_t
133in_cksum(ushort_t *addr, int len)
134{
135	int nleft = len;
136	ushort_t *w = addr;
137	ushort_t answer;
138	ushort_t odd_byte = 0;
139	int sum = 0;
140
141	/*
142	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
143	 *  we add sequential 16 bit words to it, and at the end, fold
144	 *  back all the carry bits from the top 16 bits into the lower
145	 *  16 bits.
146	 */
147	while (nleft > 1) {
148		sum += *w++;
149		nleft -= 2;
150	}
151
152	/* mop up an odd byte, if necessary */
153	if (nleft == 1) {
154		*(uchar_t *)(&odd_byte) = *(uchar_t *)w;
155		sum += odd_byte;
156	}
157
158	/*
159	 * add back carry outs from top 16 bits to low 16 bits
160	 */
161	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
162	sum += (sum >> 16);			/* add carry */
163	answer = ~sum;				/* truncate to 16 bits */
164	return (answer);
165}
166
167/* It is assumed that argv[] contains PROBE_ARGC arguments. */
168static boolean_t
169parse_probe_param(char *argv[], probe_param_t *param)
170{
171	int32_t port;
172	int64_t timeout;
173	struct in_addr v4addr;
174
175	if (strcmp(basename(argv[0]), PROBE_PING) == 0) {
176		param->probe = ping_probe;
177	} else {
178		if (strcmp(basename(argv[0]), PROBE_PROTO) != 0)
179			return (B_FALSE);
180
181		if (strcasecmp(argv[3], PROTO_TCP) == 0) {
182			param->probe = tcp_probe;
183			param->proto = IPPROTO_TCP;
184		} else if (strcasecmp(argv[3], PROTO_UDP) == 0) {
185			param->probe = udp_probe;
186			param->proto = IPPROTO_UDP;
187		} else {
188			return (B_FALSE);
189		}
190	}
191
192	if (strchr(argv[1], ':') != NULL) {
193		if (inet_pton(AF_INET6, argv[1], &param->vip) == 0)
194			return (B_FALSE);
195	} else if (strchr(argv[1], '.') != NULL) {
196		if (inet_pton(AF_INET, argv[1], &v4addr) == 0)
197			return (B_FALSE);
198		IN6_INADDR_TO_V4MAPPED(&v4addr, &param->vip);
199	} else {
200		return (B_FALSE);
201	}
202
203	/*
204	 * The address family of vip and srv_addr should be the same for
205	 * now.  But in future, we may allow them to be different...  So
206	 * we don't do a check here.
207	 */
208	if (strchr(argv[2], ':') != NULL) {
209		if (inet_pton(AF_INET6, argv[2], &param->srv_addr) == 0)
210			return (B_FALSE);
211	} else if (strchr(argv[2], '.') != NULL) {
212		if (inet_pton(AF_INET, argv[2], &v4addr) == 0)
213			return (B_FALSE);
214		IN6_INADDR_TO_V4MAPPED(&v4addr, &param->srv_addr);
215	} else {
216		return (B_FALSE);
217	}
218
219	if (strcasecmp(argv[4], MODE_DSR) == 0)
220		param->mode = dsr;
221	else if (strcasecmp(argv[4], MODE_NAT) == 0)
222		param->mode = nat;
223	else if (strcasecmp(argv[4], MODE_HALF_NAT) == 0)
224		param->mode = half_nat;
225	else
226		return (B_FALSE);
227
228	if ((port = atoi(argv[5])) <= 0 || port > USHRT_MAX)
229		return (B_FALSE);
230	param->port = port;
231
232	if ((timeout = strtoll(argv[6], NULL, 10)) <= 0 || timeout > UINT_MAX)
233		return (B_FALSE);
234	param->timeout = timeout;
235
236	return (B_TRUE);
237}
238
239/*
240 * Set up the destination address to be used to send a probe based on
241 * param.
242 */
243static int
244set_sockaddr(struct sockaddr_storage *addr, socklen_t *addr_len,
245    void **next_hop, probe_param_t *param)
246{
247	int af;
248	struct in6_addr *param_addr;
249	struct sockaddr_in *v4_addr;
250	struct sockaddr_in6 *v6_addr;
251	boolean_t nh = B_FALSE;
252
253	switch (param->mode) {
254	case dsr:
255		param_addr = &param->vip;
256		nh = B_TRUE;
257		break;
258	case nat:
259	case half_nat:
260		param_addr = &param->srv_addr;
261		break;
262	}
263	if (IN6_IS_ADDR_V4MAPPED(param_addr)) {
264		af = AF_INET;
265		v4_addr = (struct sockaddr_in *)addr;
266		IN6_V4MAPPED_TO_INADDR(param_addr, &v4_addr->sin_addr);
267		v4_addr->sin_family = AF_INET;
268		v4_addr->sin_port = htons(param->port);
269
270		*addr_len = sizeof (*v4_addr);
271	} else {
272		af = AF_INET6;
273		v6_addr = (struct sockaddr_in6 *)addr;
274		v6_addr->sin6_family = AF_INET6;
275		v6_addr->sin6_addr = *param_addr;
276		v6_addr->sin6_port = htons(param->port);
277		v6_addr->sin6_flowinfo = 0;
278		v6_addr->sin6_scope_id = 0;
279
280		*addr_len = sizeof (*v6_addr);
281	}
282
283	if (!nh) {
284		*next_hop = NULL;
285		return (af);
286	}
287
288	if (af == AF_INET) {
289		ipaddr_t *nh_addr;
290
291		nh_addr = malloc(sizeof (ipaddr_t));
292		IN6_V4MAPPED_TO_IPADDR(&param->srv_addr, *nh_addr);
293		*next_hop = nh_addr;
294	} else {
295		struct sockaddr_in6 *nh_addr;
296
297		nh_addr = malloc(sizeof (*nh_addr));
298		nh_addr->sin6_family = AF_INET6;
299		nh_addr->sin6_addr = param->srv_addr;
300		nh_addr->sin6_flowinfo = 0;
301		nh_addr->sin6_scope_id = 0;
302		*next_hop = nh_addr;
303	}
304
305	return (af);
306}
307
308/*
309 * Use TCP to check if the peer server is alive.  Create a TCP socket and
310 * then call connect() to reach the peer server.  If connect() does not
311 * return within the timeout period, the SIGALRM handler will be invoked
312 * and tell ilbd that the peer server is not alive.
313 */
314static int
315tcp_query(probe_param_t *param)
316{
317	int ret;
318	int sd, af;
319	struct sockaddr_storage dst_addr;
320	socklen_t dst_addr_len;
321	void *next_hop;
322	hrtime_t start, end;
323	uint32_t rtt;
324
325	ret = 0;
326	next_hop = NULL;
327
328	af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param);
329
330	if ((sd = socket(af, SOCK_STREAM, param->proto)) == -1)
331		return (-1);
332
333	/* DSR mode, need to set the next hop */
334	if (next_hop != NULL) {
335		if (af == AF_INET) {
336			if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop,
337			    sizeof (ipaddr_t)) < 0) {
338				ret = -1;
339				goto out;
340			}
341		} else {
342			if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP,
343			    next_hop, sizeof (struct sockaddr_in6)) < 0) {
344				ret = -1;
345				goto out;
346			}
347		}
348	}
349
350	timeout_is_good = B_FALSE;
351	(void) alarm(param->timeout);
352	start = gethrtime();
353	if (connect(sd, (struct sockaddr *)&dst_addr, dst_addr_len) != 0) {
354		ret = -1;
355		goto out;
356	}
357	end = gethrtime();
358
359	rtt = (end - start) / (NANOSEC / MICROSEC);
360	if (rtt == 0)
361		rtt = 1;
362	(void) printf("%u", rtt);
363
364out:
365	(void) close(sd);
366	return (ret);
367}
368
369/*
370 * Check if the ICMP packet is a port unreachable message in respnsed to
371 * our probe.  Return -1 if no, 0 if yes.
372 */
373static int
374check_icmp_unreach_v4(struct icmp *icmph, probe_param_t *param)
375{
376	struct udphdr *udph;
377	struct ip *iph;
378
379	if (icmph->icmp_type != ICMP_UNREACH)
380		return (-1);
381	if (icmph->icmp_code != ICMP_UNREACH_PORT)
382		return (-1);
383
384	/* LINTED E_BAD_PTR_CAST_ALIGN */
385	iph = (struct ip *)((char *)icmph + ICMP_MINLEN);
386	if (iph->ip_p != IPPROTO_UDP)
387		return (-1);
388
389	/* LINTED E_BAD_PTR_CAST_ALIGN */
390	udph = (struct udphdr *)((char *)iph + (iph->ip_hl << 2));
391	if (udph->uh_dport != htons(param->port))
392		return (-1);
393	if (udph->uh_sport != param->lport)
394		return (-1);
395
396	/* All matched, it is a response to the probe we sent. */
397	return (0);
398}
399
400/*
401 * Check if the ICMP packet is a reply to our echo request.  Need to match
402 * the ID and sequence.
403 */
404static int
405check_icmp_echo_v4(struct icmp *icmph, probe_param_t *param)
406{
407	uint32_t cookie;
408	in_port_t port;
409
410	if (icmph->icmp_type != ICMP_ECHOREPLY)
411		return (-1);
412	if (icmph->icmp_id != param->echo_id)
413		return (-1);
414	if (icmph->icmp_seq != param->echo_seq)
415		return (-1);
416
417	bcopy(icmph->icmp_data, &cookie, sizeof (cookie));
418	if (cookie != param->echo_cookie)
419		return (-1);
420	bcopy(icmph->icmp_data + sizeof (cookie), &port, sizeof (port));
421	if (port != param->port)
422		return (-1);
423
424	/* All matched, it is a response to the echo we sent. */
425	return (0);
426}
427
428/* Verify if an ICMP packet is what we expect. */
429static int
430check_icmp_v4(char *buf, ssize_t rcvd, probe_param_t *param)
431{
432	struct ip *iph;
433	struct icmp *icmph;
434
435	/*
436	 * We can dereference the length field without worry since the stack
437	 * should not have sent up the packet if it is smaller than a normal
438	 * ICMPv4 packet.
439	 */
440	/* LINTED E_BAD_PTR_CAST_ALIGN */
441	iph = (struct ip *)buf;
442	/* LINTED E_BAD_PTR_CAST_ALIGN */
443	icmph = (struct icmp *)((char *)iph + (iph->ip_hl << 2));
444
445	/*
446	 * If we sent an UDP probe, check if the packet is a port
447	 * unreachable message in response to our probe.
448	 *
449	 * If we sent an ICMP echo request, check if the packet is a reply
450	 * to our echo request.
451	 */
452	if (param->probe == udp_probe) {
453		/* Is the packet large enough for further checking? */
454		if (rcvd < 2 * sizeof (struct ip) + ICMP_MINLEN +
455		    sizeof (struct udphdr)) {
456			return (-1);
457		}
458		return (check_icmp_unreach_v4(icmph, param));
459	} else {
460		if (rcvd < sizeof (struct ip) + ICMP_MINLEN)
461			return (-1);
462		return (check_icmp_echo_v4(icmph, param));
463	}
464}
465
466/*
467 * Check if the ICMPv6 packet is a port unreachable message in respnsed to
468 * our probe.  Return -1 if no, 0 if yes.
469 */
470static int
471check_icmp_unreach_v6(icmp6_t *icmp6h, probe_param_t *param)
472{
473	ip6_t *ip6h;
474	struct udphdr *udph;
475
476	if (icmp6h->icmp6_type != ICMP6_DST_UNREACH)
477		return (-1);
478	if (icmp6h->icmp6_code != ICMP6_DST_UNREACH_NOPORT)
479		return (-1);
480
481	/* LINTED E_BAD_PTR_CAST_ALIGN */
482	ip6h = (ip6_t *)((char *)icmp6h + ICMP6_MINLEN);
483	if (ip6h->ip6_nxt != IPPROTO_UDP)
484		return (-1);
485
486	udph = (struct udphdr *)(ip6h + 1);
487
488	if (udph->uh_dport != htons(param->port))
489		return (-1);
490	if (udph->uh_sport != param->lport)
491		return (-1);
492
493	return (0);
494}
495
496/*
497 * Check if the ICMPv6 packet is a reply to our echo request.  Need to match
498 * the ID and sequence.
499 */
500static int
501check_icmp_echo_v6(icmp6_t *icmp6h, probe_param_t *param)
502{
503	char *tmp;
504	uint32_t cookie;
505	in_port_t port;
506
507	if (icmp6h->icmp6_type != ICMP6_ECHO_REPLY)
508		return (-1);
509	if (icmp6h->icmp6_id != param->echo_id)
510		return (-1);
511	if (icmp6h->icmp6_seq != param->echo_seq)
512		return (-1);
513	tmp = (char *)icmp6h + ICMP6_MINLEN;
514	bcopy(tmp, &cookie, sizeof (cookie));
515	if (cookie != param->echo_cookie)
516		return (-1);
517	tmp += sizeof (cookie);
518	bcopy(tmp, &port, sizeof (port));
519	if (port != param->port)
520		return (-1);
521
522	/* All matched, it is a response to the echo we sent. */
523	return (0);
524}
525
526/* Verify if an ICMPv6 packet is what we expect. */
527static int
528check_icmp_v6(char *buf, ssize_t rcvd, probe_param_t *param)
529{
530	icmp6_t *icmp6h;
531
532	/* LINTED E_BAD_PTR_CAST_ALIGN */
533	icmp6h = (icmp6_t *)(buf);
534
535	/*
536	 * If we sent an UDP probe, check if the packet is a port
537	 * unreachable message.
538	 *
539	 * If we sent an ICMPv6 echo request, check if the packet is a reply.
540	 */
541	if (param->probe == udp_probe) {
542		/* Is the packet large enough for further checking? */
543		if (rcvd < sizeof (ip6_t) + ICMP6_MINLEN +
544		    sizeof (struct udphdr)) {
545			return (-1);
546		}
547		return (check_icmp_unreach_v6(icmp6h, param));
548	} else {
549		if (rcvd < ICMP6_MINLEN)
550			return (-1);
551		return (check_icmp_echo_v6(icmp6h, param));
552	}
553}
554
555/*
556 * Wait for an ICMP reply indefinitely.  If we get what we expect, return 0.
557 * If an error happnes, return -1.
558 */
559static int
560wait_icmp_reply(int af, int recv_sd, struct sockaddr_storage *exp_from,
561    probe_param_t *param)
562{
563	char buf[RECV_PKT_SZ];
564	socklen_t from_len;
565	ssize_t rcvd;
566	int ret;
567
568	for (;;) {
569		if (af == AF_INET) {
570			struct sockaddr_in v4_from;
571
572			from_len = sizeof (v4_from);
573			if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0,
574			    (struct sockaddr *)&v4_from, &from_len)) < 0) {
575				ret = -1;
576				break;
577			}
578
579			/* Packet not from our peer, ignore it. */
580			if ((((struct sockaddr_in *)exp_from)->sin_addr.s_addr)
581			    != v4_from.sin_addr.s_addr) {
582				continue;
583			}
584			if (check_icmp_v4(buf, rcvd, param) == 0) {
585				ret = 0;
586				break;
587			}
588		} else {
589			struct sockaddr_in6 v6_from;
590
591			from_len = sizeof (struct sockaddr_in6);
592			if ((rcvd = recvfrom(recv_sd, buf, RECV_PKT_SZ, 0,
593			    (struct sockaddr *)&v6_from, &from_len)) < 0) {
594				ret = -1;
595				break;
596			}
597
598			if (!IN6_ARE_ADDR_EQUAL(&(v6_from.sin6_addr),
599			    &((struct sockaddr_in6 *)exp_from)->sin6_addr)) {
600				continue;
601			}
602			if (check_icmp_v6(buf, rcvd, param) == 0) {
603				ret = 0;
604				break;
605			}
606		}
607	}
608	return (ret);
609}
610
611/* Return the local port used (network byte order) in a socket. */
612static int
613get_lport(int sd, in_port_t *lport)
614{
615	struct sockaddr_storage addr;
616	socklen_t addr_sz;
617
618	addr_sz = sizeof (addr);
619	if (getsockname(sd, (struct sockaddr *)&addr, &addr_sz) != 0)
620		return (-1);
621	if (addr.ss_family == AF_INET)
622		*lport = ((struct sockaddr_in *)&addr)->sin_port;
623	else
624		*lport = ((struct sockaddr_in6 *)&addr)->sin6_port;
625	return (0);
626}
627
628/*
629 * Use UDP to check if the peer server is alive.  Send a 0 length UDP packet
630 * to the peer server.  If there is no one listening, the peer IP stack
631 * should send back a port unreachable ICMP(v4/v6) packet.  If the peer
632 * server is alive, there should be no response.  So if we get SIGALRM,
633 * the peer is alive.
634 */
635static int
636udp_query(probe_param_t *param)
637{
638	int ret;
639	int send_sd, recv_sd, af;
640	struct sockaddr_storage dst_addr;
641	socklen_t addr_len;
642	void *next_hop;
643	char buf[1];
644	struct itimerval timeout;
645	uint64_t tm;
646
647	ret = 0;
648	next_hop = NULL;
649
650	af = set_sockaddr(&dst_addr, &addr_len, &next_hop, param);
651
652	if ((send_sd = socket(af, SOCK_DGRAM, param->proto)) == -1)
653		return (-1);
654	if ((recv_sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP :
655	    IPPROTO_ICMPV6)) == -1) {
656		return (-1);
657	}
658
659	/* DSR mode, need to set the next hop */
660	if (next_hop != NULL) {
661		if (af == AF_INET) {
662			if (setsockopt(send_sd, IPPROTO_IP, IP_NEXTHOP,
663			    next_hop, sizeof (ipaddr_t)) < 0) {
664				ret = -1;
665				goto out;
666			}
667		} else {
668			if (setsockopt(send_sd, IPPROTO_IPV6, IPV6_NEXTHOP,
669			    next_hop, sizeof (struct sockaddr_in6)) < 0) {
670				ret = -1;
671				goto out;
672			}
673		}
674	}
675
676	/*
677	 * If ilbd asks us to wait at most t, we will wait for at most
678	 * t', which is 3/4 of t.  If we wait for too long, ilbd may
679	 * timeout and kill us.
680	 */
681	timeout.it_interval.tv_sec = 0;
682	timeout.it_interval.tv_usec = 0;
683	tm = (param->timeout * MICROSEC >> 2) * 3;
684	if (tm > MICROSEC) {
685		timeout.it_value.tv_sec = tm / MICROSEC;
686		timeout.it_value.tv_usec = tm - (timeout.it_value.tv_sec *
687		    MICROSEC);
688	} else {
689		timeout.it_value.tv_sec = 0;
690		timeout.it_value.tv_usec = tm;
691	}
692	timeout_is_good = B_TRUE;
693	if (setitimer(ITIMER_REAL, &timeout, NULL) != 0) {
694		ret = -1;
695		goto out;
696	}
697
698	if (sendto(send_sd, buf, 0, 0, (struct sockaddr *)&dst_addr,
699	    addr_len) != 0) {
700		ret = -1;
701		goto out;
702	}
703	if ((ret = get_lport(send_sd, &param->lport)) != 0)
704		goto out;
705
706	/*
707	 * If the server app is listening, we should not get back a
708	 * response.  So if wait_icmp_reply() returns, either there
709	 * is an error or we get back something.
710	 */
711	(void) wait_icmp_reply(af, recv_sd, &dst_addr, param);
712	ret = -1;
713
714out:
715	(void) close(send_sd);
716	(void) close(recv_sd);
717	return (ret);
718}
719
720/*
721 * Size (in uint32_t) of the ping packet to be sent to server.  It includes
722 * a cookie (random number) + the target port.  The cookie and port are used
723 * for matching ping request since there can be many such ping packets sent
724 * to different servers from the same source address and using the same VIP.
725 * The last two bytes are for padding.
726 *
727 */
728#define	PING_PKT_LEN \
729	((ICMP_MINLEN + 2 * sizeof (uint32_t)) / sizeof (uint32_t))
730
731/*
732 * Try to get a random number from the pseudo random number device
733 * /dev/urandom.  If there is any error, return (uint32_t)gethrtime()
734 * as a back up.
735 */
736static uint32_t
737get_random(void)
738{
739	int fd;
740	uint32_t num;
741
742	if ((fd = open("/dev/urandom", O_RDONLY)) == -1)
743		return ((uint32_t)gethrtime());
744
745	if (read(fd, &num, sizeof (num)) != sizeof (num))
746		num = ((uint32_t)gethrtime());
747
748	(void) close(fd);
749	return (num);
750}
751
752/*
753 * Use ICMP(v4/v6) echo request to check if the peer server machine is
754 * reachable.  Send a echo request and expect to get back a echo reply.
755 */
756static int
757ping_query(probe_param_t *param)
758{
759	int ret;
760	int sd, af;
761	struct sockaddr_storage dst_addr;
762	socklen_t dst_addr_len;
763	void *next_hop;
764	hrtime_t start, end;
765	uint32_t rtt;
766	uint32_t buf[PING_PKT_LEN];
767	struct icmp *icmph;
768
769	ret = 0;
770	next_hop = NULL;
771
772	af = set_sockaddr(&dst_addr, &dst_addr_len, &next_hop, param);
773
774	if ((sd = socket(af, SOCK_RAW, (af == AF_INET) ? IPPROTO_ICMP :
775	    IPPROTO_ICMPV6)) == -1) {
776		return (-1);
777	}
778
779	/* DSR mode, need to set the next hop */
780	if (next_hop != NULL) {
781		if (af == AF_INET) {
782			if (setsockopt(sd, IPPROTO_IP, IP_NEXTHOP, next_hop,
783			    sizeof (ipaddr_t)) < 0) {
784				ret = -1;
785				goto out;
786			}
787		} else {
788			if (setsockopt(sd, IPPROTO_IPV6, IPV6_NEXTHOP,
789			    next_hop, sizeof (struct sockaddr_in6)) < 0) {
790				ret = -1;
791				goto out;
792			}
793		}
794	}
795
796	bzero(buf, sizeof (buf));
797	icmph = (struct icmp *)buf;
798	icmph->icmp_type = af == AF_INET ? ICMP_ECHO : ICMP6_ECHO_REQUEST;
799	icmph->icmp_code = 0;
800	icmph->icmp_cksum = 0;
801	icmph->icmp_id = htons(gethrtime() % USHRT_MAX);
802	icmph->icmp_seq = htons(gethrtime() % USHRT_MAX);
803
804	param->echo_cookie = get_random();
805	bcopy(&param->echo_cookie, icmph->icmp_data,
806	    sizeof (param->echo_cookie));
807	bcopy(&param->port, icmph->icmp_data + sizeof (param->echo_cookie),
808	    sizeof (param->port));
809	icmph->icmp_cksum = in_cksum((ushort_t *)buf, sizeof (buf));
810	param->echo_id = icmph->icmp_id;
811	param->echo_seq = icmph->icmp_seq;
812
813	timeout_is_good = B_FALSE;
814	(void) alarm(param->timeout);
815	start = gethrtime();
816	if (sendto(sd, buf, sizeof (buf), 0, (struct sockaddr *)&dst_addr,
817	    dst_addr_len) != sizeof (buf)) {
818		ret = -1;
819		goto out;
820	}
821	if (wait_icmp_reply(af, sd, &dst_addr, param) != 0) {
822		ret = -1;
823		goto out;
824	}
825	end = gethrtime();
826
827	rtt = (end - start) / (NANOSEC / MICROSEC);
828	if (rtt == 0)
829		rtt = 1;
830	(void) printf("%u", rtt);
831
832out:
833	(void) close(sd);
834	return (ret);
835}
836
837int
838main(int argc, char *argv[])
839{
840	probe_param_t param;
841	int ret;
842
843	/* ilbd should pass in PROG_ARGC parameters. */
844	if (argc != PROG_ARGC) {
845		(void) printf("-1");
846		return (-1);
847	}
848
849	if (signal(SIGALRM, probe_exit) == SIG_ERR) {
850		(void) printf("-1");
851		return (-1);
852	}
853
854	if (!parse_probe_param(argv, &param)) {
855		(void) printf("-1");
856		return (-1);
857	}
858
859	switch (param.probe) {
860	case ping_probe:
861		ret = ping_query(&param);
862		break;
863	case tcp_probe:
864		ret = tcp_query(&param);
865		break;
866	case udp_probe:
867		ret = udp_query(&param);
868		break;
869	}
870
871	if (ret == -1)
872		(void) printf("-1");
873
874	return (ret);
875}
876