1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * The core of ilbd daemon is a single-threaded event loop using
29  * event completion framework; it receives requests from client using
30  * the libilb functions, handles timeouts, initiates health checks, and
31  * populates the kernel state.
32  *
33  * The daemon has the following privileges (in addition to the basic ones):
34  *
35  * 	PRIV_PROC_OWNER, PRIV_NET_ICMPACCESS,
36  *	PRIV_SYS_IP_CONFIG, PRIV_PROC_AUDIT
37  *
38  * The aforementioned  privileges will be specified in the SMF manifest.
39  *
40  * AF_UNIX socket is used for IPC between libilb and this daemon as
41  * both processes will run on the same machine.
42  *
43  * To do health check, the daemon will create a timer for every health
44  * check probe. Each of these timers will be  associated with the
45  * event port. When a timer goes off, the daemon will initiate a
46  * pipe to a separate process to execute the specific health check
47  * probe. This new process will run with the same user-id as that of
48  * ilbd daemon and will inherit all the privileges from the ilbd
49  * daemon parent process except the following:
50  *
51  * PRIV_PROC_OWNER, PRIV_PROC_AUDIT
52  *
53  * All health checks, will be implemented as external methods
54  * (binary or script). The following arguments will be passed
55  * to external methods:
56  *
57  *	$1	VIP (literal IPv4 or IPv6 address)
58  *	$2	Server IP (literal IPv4 or IPv6 address)
59  *	$3	Protocol (UDP, TCP as a string)
60  *	$4	The load balance mode, "DSR", "NAT", "HALF_NAT"
61  *	$5	Numeric port range
62  *	$6	maximum time (in seconds) the method
63  * should wait before returning failure. If the method runs for
64  * longer, it may be killed, and the test considered failed.
65  *
66  * Upon success, a health check method should print the RTT to the
67  * it finds to its STDOUT for ilbd to consume.  The implicit unit
68  * is microseconds but only the number needs to be printed.  If it
69  * cannot find the RTT, it should print 0.  If the method decides
70  * that the server is dead, it should print -1 to its STDOUT.
71  *
72  * By default, an user-supplied health check probe process will
73  * also run with the same set of privileges as ILB's built-in
74  * probes.  If the administrator has an user-supplied health check
75  * program that requires a larger privilege set, he/she will have
76  * to implement setuid program.
77  *
78  * Each health check will have a timeout, such that if the health
79  * check process is hung, it will be killed after the timeout interval
80  * and the daemon will notify the kernel ILB engine of the server's
81  * unresponsiveness, so that load distribution can be appropriately
82  * adjusted.  If on the other hand the health check is successful
83  * the timeout timer is cancelled.
84  */
85 
86 #include <stdio.h>
87 #include <stdlib.h>
88 #include <strings.h>
89 #include <libgen.h>
90 #include <fcntl.h>
91 #include <stddef.h>
92 #include <signal.h>
93 #include <port.h>
94 #include <ctype.h>
95 #include <sys/types.h>
96 #include <sys/wait.h>
97 #include <sys/stat.h>
98 #include <sys/note.h>
99 #include <sys/resource.h>
100 #include <unistd.h>
101 #include <sys/socket.h>
102 #include <errno.h>
103 #include <ucred.h>
104 #include <priv_utils.h>
105 #include <net/if.h>
106 #include <libilb.h>
107 #include <assert.h>
108 #include <inet/ilb.h>
109 #include <libintl.h>
110 #include <fcntl.h>
111 #include <rpcsvc/daemon_utils.h>
112 #include "libilb_impl.h"
113 #include "ilbd.h"
114 
115 /*
116  * NOTE: The following needs to be kept up to date.
117  */
118 #define	ILBD_VERSION	"1.0"
119 #define	ILBD_COPYRIGHT	\
120 	"Copyright 2009 Sun Microsystems, Inc.  All rights reserved.\n" \
121 	"Use is subject to license terms.\n"
122 
123 /*
124  * Global reply buffer to client request.  Note that ilbd is single threaded,
125  * so a global buffer is OK.  If ilbd becomes multi-threaded, this needs to
126  * be changed.
127  */
128 static uint32_t reply_buf[ILBD_MSG_SIZE / sizeof (uint32_t)];
129 
130 static void
131 ilbd_free_cli(ilbd_client_t *cli)
132 {
133 	(void) close(cli->cli_sd);
134 	if (cli->cli_cmd == ILBD_SHOW_NAT)
135 		ilbd_show_nat_cleanup();
136 	if (cli->cli_cmd == ILBD_SHOW_PERSIST)
137 		ilbd_show_sticky_cleanup();
138 	if (cli->cli_saved_reply != NULL)
139 		free(cli->cli_saved_reply);
140 	free(cli->cli_pw_buf);
141 	free(cli);
142 }
143 
144 static void
145 ilbd_reset_kernel_state(void)
146 {
147 	ilb_status_t	rc;
148 	ilb_name_cmd_t	kcmd;
149 
150 	kcmd.cmd = ILB_DESTROY_RULE;
151 	kcmd.flags = ILB_RULE_ALLRULES;
152 	kcmd.name[0] = '\0';
153 
154 	rc = do_ioctl(&kcmd, 0);
155 	if (rc != ILB_STATUS_OK)
156 		logdebug("ilbd_reset_kernel_state: do_ioctl failed: %s",
157 		    strerror(errno));
158 }
159 
160 /* Signal handler to do clean up. */
161 /* ARGSUSED */
162 static void
163 ilbd_cleanup(int sig)
164 {
165 	(void) remove(SOCKET_PATH);
166 	ilbd_reset_kernel_state();
167 	exit(0);
168 }
169 
170 /*
171  * Create a socket and return it to caller.  If there is a failure, this
172  * function calls exit(2).  Hence it always returns a valid listener socket.
173  *
174  * Note that this function is called before ilbd becomes a daemon.  So
175  * we call perror(3C) to print out error message directly so that SMF can
176  * catch them.
177  */
178 static int
179 ilbd_create_client_socket(void)
180 {
181 	int			s;
182 	mode_t			omask;
183 	struct sockaddr_un	sa;
184 	int			sobufsz;
185 
186 	s = socket(PF_UNIX, SOCK_SEQPACKET, 0);
187 	if (s == -1) {
188 		perror("ilbd_create_client_socket: socket to"
189 		    " client failed");
190 		exit(errno);
191 	}
192 	if (fcntl(s, F_SETFD, FD_CLOEXEC) == -1) {
193 		perror("ilbd_create_client_socket: fcntl(FD_CLOEXEC)");
194 		exit(errno);
195 	}
196 
197 	sobufsz = ILBD_MSG_SIZE;
198 	if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &sobufsz,
199 	    sizeof (sobufsz)) != 0) {
200 		perror("ilbd_creat_client_socket: setsockopt(SO_SNDBUF) "
201 		    "failed");
202 		exit(errno);
203 	}
204 	if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &sobufsz,
205 	    sizeof (sobufsz)) != 0) {
206 		perror("ilbd_creat_client_socket: setsockopt(SO_RCVBUF) "
207 		    "failed");
208 		exit(errno);
209 	}
210 
211 	/*
212 	 * since everybody can talk to us, we need to open up permissions
213 	 * we check peer privileges on a per-operation basis.
214 	 * This is no security issue as long as we're single-threaded.
215 	 */
216 	omask = umask(0);
217 
218 	/* just in case we didn't clean up properly after last exit */
219 	(void) remove(SOCKET_PATH);
220 
221 	bzero(&sa, sizeof (sa));
222 	sa.sun_family = AF_UNIX;
223 	(void) strlcpy(sa.sun_path, SOCKET_PATH, sizeof (sa.sun_path));
224 
225 	if (bind(s, (struct sockaddr *)&sa, sizeof (sa)) != 0) {
226 		perror("ilbd_create_client_socket(): bind to client"
227 		    " socket failed");
228 		exit(errno);
229 	}
230 
231 	/* re-instate old umask */
232 	(void) umask(omask);
233 
234 #define	QLEN	16
235 
236 	if (listen(s, QLEN) != 0) {
237 		perror("ilbd_create_client_socket: listen to client"
238 		    " socket failed");
239 		exit(errno);
240 	}
241 
242 	(void) signal(SIGHUP, SIG_IGN);
243 	(void) signal(SIGPIPE, SIG_IGN);
244 	(void) signal(SIGSTOP, SIG_IGN);
245 	(void) signal(SIGTSTP, SIG_IGN);
246 	(void) signal(SIGTTIN, SIG_IGN);
247 	(void) signal(SIGTTOU, SIG_IGN);
248 
249 	(void) signal(SIGINT, ilbd_cleanup);
250 	(void) signal(SIGTERM, ilbd_cleanup);
251 	(void) signal(SIGQUIT, ilbd_cleanup);
252 
253 	return (s);
254 }
255 
256 /*
257  * Return the minimum size of a given request.  The returned size does not
258  * include the variable part of a request.
259  */
260 static size_t
261 ilbd_cmd_size(const ilb_comm_t *ic)
262 {
263 	size_t cmd_sz;
264 
265 	cmd_sz = sizeof (*ic);
266 	switch (ic->ic_cmd) {
267 	case ILBD_RETRIEVE_SG_NAMES:
268 	case ILBD_RETRIEVE_RULE_NAMES:
269 	case ILBD_RETRIEVE_HC_NAMES:
270 	case ILBD_CMD_OK:
271 		break;
272 	case ILBD_CMD_ERROR:
273 		cmd_sz += sizeof (ilb_status_t);
274 		break;
275 	case ILBD_RETRIEVE_SG_HOSTS:
276 	case ILBD_CREATE_SERVERGROUP:
277 	case ILBD_DESTROY_SERVERGROUP:
278 	case ILBD_DESTROY_RULE:
279 	case ILBD_ENABLE_RULE:
280 	case ILBD_DISABLE_RULE:
281 	case ILBD_RETRIEVE_RULE:
282 	case ILBD_DESTROY_HC:
283 	case ILBD_GET_HC_INFO:
284 	case ILBD_GET_HC_SRVS:
285 		cmd_sz += sizeof (ilbd_name_t);
286 		break;
287 	case ILBD_ENABLE_SERVER:
288 	case ILBD_DISABLE_SERVER:
289 	case ILBD_ADD_SERVER_TO_GROUP:
290 	case ILBD_REM_SERVER_FROM_GROUP:
291 		cmd_sz += sizeof (ilb_sg_info_t);
292 		break;
293 	case ILBD_SRV_ADDR2ID:
294 	case ILBD_SRV_ID2ADDR:
295 		cmd_sz += sizeof (ilb_sg_info_t) + sizeof (ilb_sg_srv_t);
296 		break;
297 	case ILBD_CREATE_RULE:
298 		cmd_sz += sizeof (ilb_rule_info_t);
299 		break;
300 	case ILBD_CREATE_HC:
301 		cmd_sz += sizeof (ilb_hc_info_t);
302 		break;
303 	case ILBD_SHOW_NAT:
304 	case ILBD_SHOW_PERSIST:
305 		cmd_sz += sizeof (ilb_show_info_t);
306 		break;
307 	}
308 
309 	return (cmd_sz);
310 }
311 
312 /*
313  * Given a request and its size, check that the size is big enough to
314  * contain the variable part of a request.
315  */
316 static ilb_status_t
317 ilbd_check_req_size(ilb_comm_t *ic, size_t ic_sz)
318 {
319 	ilb_status_t rc = ILB_STATUS_OK;
320 	ilb_sg_info_t *sg_info;
321 	ilbd_namelist_t *nlist;
322 
323 	switch (ic->ic_cmd) {
324 	case ILBD_CREATE_SERVERGROUP:
325 	case ILBD_ENABLE_SERVER:
326 	case ILBD_DISABLE_SERVER:
327 	case ILBD_ADD_SERVER_TO_GROUP:
328 	case ILBD_REM_SERVER_FROM_GROUP:
329 		sg_info = (ilb_sg_info_t *)&ic->ic_data;
330 
331 		if (ic_sz < ilbd_cmd_size(ic) + sg_info->sg_srvcount *
332 		    sizeof (ilb_sg_srv_t)) {
333 			rc = ILB_STATUS_EINVAL;
334 		}
335 		break;
336 	case ILBD_ENABLE_RULE:
337 	case ILBD_DISABLE_RULE:
338 	case ILBD_DESTROY_RULE:
339 		nlist = (ilbd_namelist_t *)&ic->ic_data;
340 
341 		if (ic_sz < ilbd_cmd_size(ic) + nlist->ilbl_count *
342 		    sizeof (ilbd_name_t)) {
343 			rc = ILB_STATUS_EINVAL;
344 		}
345 		break;
346 	}
347 	return (rc);
348 }
349 
350 /*
351  * this function *relies* on a complete message/data struct
352  * being passed in (currently via the SOCK_SEQPACKET socket type).
353  *
354  * Note that the size of ip is at most ILBD_MSG_SIZE.
355  */
356 static ilb_status_t
357 consume_common_struct(ilb_comm_t *ic, size_t ic_sz, ilbd_client_t *cli,
358     int ev_port)
359 {
360 	ilb_status_t	rc;
361 	struct passwd	*ps;
362 	size_t		rbufsz;
363 	ssize_t		ret;
364 	boolean_t	standard_reply = B_TRUE;
365 	ilbd_name_t	name;
366 
367 	/*
368 	 * cli_ev must be overridden during handling of individual commands,
369 	 * if there's a special need; otherwise, leave this for
370 	 * the "default" case
371 	 */
372 	cli->cli_ev = ILBD_EVENT_REQ;
373 
374 	ps = &cli->cli_pw;
375 	rbufsz = ILBD_MSG_SIZE;
376 
377 	/* Sanity check on the size of the static part of a request. */
378 	if (ic_sz < ilbd_cmd_size(ic)) {
379 		rc = ILB_STATUS_EINVAL;
380 		goto out;
381 	}
382 
383 	switch (ic->ic_cmd) {
384 	case ILBD_CREATE_SERVERGROUP: {
385 		ilb_sg_info_t sg_info;
386 
387 		/*
388 		 * ilbd_create_sg() only needs the sg_name field.  But it
389 		 * takes in a ilb_sg_info_t because it is used as a callback
390 		 * in ilbd_walk_sg_pgs().
391 		 */
392 		(void) strlcpy(sg_info.sg_name, (char *)&(ic->ic_data),
393 		    sizeof (sg_info.sg_name));
394 		rc = ilbd_create_sg(&sg_info, ev_port, ps,
395 		    cli->cli_peer_ucredp);
396 		break;
397 	}
398 
399 	case ILBD_DESTROY_SERVERGROUP:
400 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
401 		rc = ilbd_destroy_sg(name, ps, cli->cli_peer_ucredp);
402 		break;
403 
404 	case ILBD_ADD_SERVER_TO_GROUP:
405 		if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
406 			break;
407 		rc = ilbd_add_server_to_group((ilb_sg_info_t *)&ic->ic_data,
408 		    ev_port, ps, cli->cli_peer_ucredp);
409 		break;
410 
411 	case ILBD_REM_SERVER_FROM_GROUP:
412 		if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
413 			break;
414 		rc = ilbd_rem_server_from_group((ilb_sg_info_t *)&ic->ic_data,
415 		    ev_port, ps, cli->cli_peer_ucredp);
416 		break;
417 
418 	case ILBD_ENABLE_SERVER:
419 		if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
420 			break;
421 		rc = ilbd_enable_server((ilb_sg_info_t *)&ic->ic_data, ps,
422 		    cli->cli_peer_ucredp);
423 		break;
424 
425 	case ILBD_DISABLE_SERVER:
426 		if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
427 			break;
428 		rc = ilbd_disable_server((ilb_sg_info_t *)&ic->ic_data, ps,
429 		    cli->cli_peer_ucredp);
430 		break;
431 
432 	case ILBD_SRV_ADDR2ID:
433 		rc = ilbd_address_to_srvID((ilb_sg_info_t *)&ic->ic_data,
434 		    reply_buf, &rbufsz);
435 		if (rc == ILB_STATUS_OK)
436 			standard_reply = B_FALSE;
437 		break;
438 
439 	case ILBD_SRV_ID2ADDR:
440 		rc = ilbd_srvID_to_address((ilb_sg_info_t *)&ic->ic_data,
441 		    reply_buf, &rbufsz);
442 		if (rc == ILB_STATUS_OK)
443 			standard_reply = B_FALSE;
444 		break;
445 
446 	case ILBD_RETRIEVE_SG_HOSTS:
447 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
448 		rc = ilbd_retrieve_sg_hosts(name, reply_buf, &rbufsz);
449 		if (rc == ILB_STATUS_OK)
450 			standard_reply = B_FALSE;
451 		break;
452 
453 	case ILBD_RETRIEVE_SG_NAMES:
454 	case ILBD_RETRIEVE_RULE_NAMES:
455 	case ILBD_RETRIEVE_HC_NAMES:
456 		rc = ilbd_retrieve_names(ic->ic_cmd, reply_buf, &rbufsz);
457 		if (rc == ILB_STATUS_OK)
458 			standard_reply = B_FALSE;
459 		break;
460 
461 	case ILBD_CREATE_RULE:
462 		rc = ilbd_create_rule((ilb_rule_info_t *)&ic->ic_data, ev_port,
463 		    ps, cli->cli_peer_ucredp);
464 		break;
465 
466 	case ILBD_DESTROY_RULE:
467 		/* Copy the name to ensure that name is NULL terminated. */
468 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
469 		rc = ilbd_destroy_rule(name, ps, cli->cli_peer_ucredp);
470 		break;
471 
472 	case ILBD_ENABLE_RULE:
473 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
474 		rc = ilbd_enable_rule(name, ps, cli->cli_peer_ucredp);
475 		break;
476 
477 	case ILBD_DISABLE_RULE:
478 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
479 		rc = ilbd_disable_rule(name, ps, cli->cli_peer_ucredp);
480 		break;
481 
482 	case ILBD_RETRIEVE_RULE:
483 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
484 		rc = ilbd_retrieve_rule(name, reply_buf, &rbufsz);
485 		if (rc == ILB_STATUS_OK)
486 			standard_reply = B_FALSE;
487 		break;
488 
489 	case ILBD_CREATE_HC:
490 		rc = ilbd_create_hc((ilb_hc_info_t *)&ic->ic_data, ev_port, ps,
491 		    cli->cli_peer_ucredp);
492 		break;
493 
494 	case ILBD_DESTROY_HC:
495 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
496 		rc = ilbd_destroy_hc(name, ps, cli->cli_peer_ucredp);
497 		break;
498 
499 	case ILBD_GET_HC_INFO:
500 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
501 		rc = ilbd_get_hc_info(name, reply_buf, &rbufsz);
502 		if (rc == ILB_STATUS_OK)
503 			standard_reply = B_FALSE;
504 		break;
505 
506 	case ILBD_GET_HC_SRVS:
507 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
508 		rc = ilbd_get_hc_srvs(name, reply_buf, &rbufsz);
509 		if (rc == ILB_STATUS_OK)
510 			standard_reply = B_FALSE;
511 		break;
512 
513 	case ILBD_SHOW_NAT:
514 		rc = ilbd_show_nat(cli, ic, reply_buf, &rbufsz);
515 		if (rc == ILB_STATUS_OK)
516 			standard_reply = B_FALSE;
517 		break;
518 
519 	case ILBD_SHOW_PERSIST:
520 		rc = ilbd_show_sticky(cli, ic, reply_buf, &rbufsz);
521 		if (rc == ILB_STATUS_OK)
522 			standard_reply = B_FALSE;
523 		break;
524 
525 	default:
526 		logdebug("consume_common_struct: unknown command");
527 		rc = ILB_STATUS_INVAL_CMD;
528 		break;
529 	}
530 
531 out:
532 	/*
533 	 * The message exchange is always in pairs, request/response.  If
534 	 * a transaction requires multiple exchanges, the client will send
535 	 * in multiple requests to get multiple responses.  The show-nat and
536 	 * show-persist request are examples of this.  The end of transaction
537 	 * is marked with ic_flags set to ILB_COMM_END.
538 	 */
539 
540 	/* This is the standard reply. */
541 	if (standard_reply) {
542 		if (rc == ILB_STATUS_OK)
543 			ilbd_reply_ok(reply_buf, &rbufsz);
544 		else
545 			ilbd_reply_err(reply_buf, &rbufsz, rc);
546 	}
547 
548 	if ((ret = send(cli->cli_sd, reply_buf, rbufsz, 0)) != rbufsz) {
549 		if (ret == -1) {
550 			if (errno != EWOULDBLOCK) {
551 				logdebug("consume_common_struct: send: %s",
552 				    strerror(errno));
553 				rc = ILB_STATUS_SEND;
554 				goto err_out;
555 			}
556 			/*
557 			 * The reply is blocked, save the reply.  handle_req()
558 			 * will associate the event port for the re-send.
559 			 */
560 			assert(cli->cli_saved_reply == NULL);
561 			if ((cli->cli_saved_reply = malloc(rbufsz)) == NULL) {
562 				/*
563 				 * Set the error to ILB_STATUS_SEND so that
564 				 * handle_req() will free the client.
565 				 */
566 				logdebug("consume_common_struct: failure to "
567 				    "allocate memory to save reply");
568 				rc = ILB_STATUS_SEND;
569 				goto err_out;
570 			}
571 			bcopy(reply_buf, cli->cli_saved_reply, rbufsz);
572 			cli->cli_saved_size = rbufsz;
573 			return (ILB_STATUS_EWOULDBLOCK);
574 		}
575 	}
576 err_out:
577 	return (rc);
578 }
579 
580 /*
581  * Accept a new client request.  A struct ilbd_client_t is allocated to
582  * store the client info.  The accepted socket is port_associate() with
583  * the given port.  And the allocated ilbd_client_t struct is passed as
584  * the user pointer.
585  */
586 static void
587 new_req(int ev_port, int listener, void *ev_obj)
588 {
589 	struct sockaddr	sa;
590 	int		sa_len;
591 	int		new_sd;
592 	int		sflags;
593 	ilbd_client_t	*cli;
594 	int		res;
595 	uid_t		uid;
596 
597 	sa_len = sizeof (sa);
598 	if ((new_sd = accept(listener, &sa, &sa_len)) == -1) {
599 		/* don't log if we're out of file descriptors */
600 		if (errno != EINTR && errno != EMFILE)
601 			logperror("new_req: accept failed");
602 		goto done;
603 	}
604 
605 	/* Set the new socket to be non-blocking. */
606 	if ((sflags = fcntl(new_sd, F_GETFL, 0)) == -1) {
607 		logperror("new_req: fcntl(F_GETFL)");
608 		goto clean_up;
609 	}
610 	if (fcntl(new_sd, F_SETFL, sflags | O_NONBLOCK) == -1) {
611 		logperror("new_req: fcntl(F_SETFL)");
612 		goto clean_up;
613 	}
614 	if (fcntl(new_sd, F_SETFD, FD_CLOEXEC) == -1) {
615 		logperror("new_req: fcntl(FD_CLOEXEC)");
616 		goto clean_up;
617 	}
618 	if ((cli = calloc(1, sizeof (ilbd_client_t))) == NULL) {
619 		logerr("new_req: malloc(ilbd_client_t)");
620 		goto clean_up;
621 	}
622 	res = getpeerucred(new_sd, &cli->cli_peer_ucredp);
623 	if (res == -1) {
624 		logperror("new_req: getpeerucred failed");
625 		goto clean_up;
626 	}
627 	if ((uid = ucred_getruid(cli->cli_peer_ucredp)) == (uid_t)-1) {
628 		logperror("new_req: ucred_getruid failed");
629 		goto clean_up;
630 	}
631 	cli->cli_pw_bufsz = (size_t)sysconf(_SC_GETPW_R_SIZE_MAX);
632 	if ((cli->cli_pw_buf = malloc(cli->cli_pw_bufsz)) == NULL) {
633 		free(cli);
634 		logerr("new_req: malloc(cli_pw_buf)");
635 		goto clean_up;
636 	}
637 	if (getpwuid_r(uid, &cli->cli_pw, cli->cli_pw_buf,
638 	    cli->cli_pw_bufsz) == NULL) {
639 		free(cli->cli_pw_buf);
640 		free(cli);
641 		logperror("new_req: invalid user");
642 		goto clean_up;
643 	}
644 	cli->cli_ev = ILBD_EVENT_REQ;
645 	cli->cli_sd = new_sd;
646 	cli->cli_cmd = ILBD_BAD_CMD;
647 	cli->cli_saved_reply = NULL;
648 	cli->cli_saved_size = 0;
649 	if (port_associate(ev_port, PORT_SOURCE_FD, new_sd, POLLRDNORM,
650 	    cli) == -1) {
651 		logperror("new_req: port_associate(cli) failed");
652 		free(cli->cli_pw_buf);
653 		free(cli);
654 clean_up:
655 		(void) close(new_sd);
656 	}
657 
658 done:
659 	/* Re-associate the listener with the event port. */
660 	if (port_associate(ev_port, PORT_SOURCE_FD, listener, POLLRDNORM,
661 	    ev_obj) == -1) {
662 		logperror("new_req: port_associate(listener) failed");
663 		exit(1);
664 	}
665 }
666 
667 static void
668 handle_req(int ev_port, ilbd_event_t event, ilbd_client_t *cli)
669 {
670 	/* All request should be smaller than ILBD_MSG_SIZE */
671 	union {
672 		ilb_comm_t	ic;
673 		uint32_t	buf[ILBD_MSG_SIZE / sizeof (uint32_t)];
674 	} ic_u;
675 	int	rc = ILB_STATUS_OK;
676 	ssize_t	r;
677 
678 	if (event == ILBD_EVENT_REQ) {
679 		/*
680 		 * Something is wrong with the client since there is a
681 		 * pending reply, the client should not send us another
682 		 * request.  Kill this client.
683 		 */
684 		if (cli->cli_saved_reply != NULL) {
685 			logerr("handle_req: misbehaving client, more than one "
686 			    "outstanding request");
687 			rc = ILB_STATUS_INTERNAL;
688 			goto err_out;
689 		}
690 
691 		/*
692 		 * Our socket is message based so we should be able
693 		 * to get the request in one single read.
694 		 */
695 		r = recv(cli->cli_sd, (void *)ic_u.buf, sizeof (ic_u.buf), 0);
696 		if (r < 0) {
697 			if (errno != EINTR) {
698 				logperror("handle_req: read failed");
699 				rc = ILB_STATUS_READ;
700 				goto err_out;
701 			}
702 			/*
703 			 * If interrupted, just re-associate the cli_sd
704 			 * with the port.
705 			 */
706 			goto done;
707 		}
708 		cli->cli_cmd = ic_u.ic.ic_cmd;
709 
710 		rc = consume_common_struct(&ic_u.ic, r, cli, ev_port);
711 		if (rc == ILB_STATUS_EWOULDBLOCK)
712 			goto blocked;
713 		/* Fatal error communicating with client, free it. */
714 		if (rc == ILB_STATUS_SEND)
715 			goto err_out;
716 	} else {
717 		assert(event == ILBD_EVENT_REP_OK);
718 		assert(cli->cli_saved_reply != NULL);
719 
720 		/*
721 		 * The reply to client was previously blocked, we will
722 		 * send again.
723 		 */
724 		if (send(cli->cli_sd, cli->cli_saved_reply,
725 		    cli->cli_saved_size, 0) != cli->cli_saved_size) {
726 			if (errno != EWOULDBLOCK) {
727 				logdebug("handle_req: send: %s",
728 				    strerror(errno));
729 				rc = ILB_STATUS_SEND;
730 				goto err_out;
731 			}
732 			goto blocked;
733 		}
734 		free(cli->cli_saved_reply);
735 		cli->cli_saved_reply = NULL;
736 		cli->cli_saved_size = 0;
737 	}
738 done:
739 	/* Re-associate with the event port for more requests. */
740 	cli->cli_ev = ILBD_EVENT_REQ;
741 	if (port_associate(ev_port, PORT_SOURCE_FD, cli->cli_sd,
742 	    POLLRDNORM, cli) == -1) {
743 		logperror("handle_req: port_associate(POLLRDNORM)");
744 		rc = ILB_STATUS_INTERNAL;
745 		goto err_out;
746 	}
747 	return;
748 
749 blocked:
750 	/* Re-associate with the event port. */
751 	cli->cli_ev = ILBD_EVENT_REP_OK;
752 	if (port_associate(ev_port, PORT_SOURCE_FD, cli->cli_sd, POLLWRNORM,
753 	    cli) == -1) {
754 		logperror("handle_req: port_associate(POLLWRNORM)");
755 		rc = ILB_STATUS_INTERNAL;
756 		goto err_out;
757 	}
758 	return;
759 
760 err_out:
761 	ilbd_free_cli(cli);
762 }
763 
764 static void
765 i_ilbd_read_config(int ev_port)
766 {
767 	logdebug("i_ilbd_read_config: port %d", ev_port);
768 	(void) ilbd_walk_sg_pgs(ilbd_create_sg, &ev_port, NULL);
769 	(void) ilbd_walk_hc_pgs(ilbd_create_hc, &ev_port, NULL);
770 	(void) ilbd_walk_rule_pgs(ilbd_create_rule, &ev_port, NULL);
771 }
772 
773 /*
774  * main event loop for ilbd
775  * asserts that argument 'listener' is a server socket ready to accept() on.
776  */
777 static void
778 main_loop(int listener)
779 {
780 	port_event_t		p_ev;
781 	int			ev_port, ev_port_obj;
782 	ilbd_event_obj_t	ev_obj;
783 	ilbd_timer_event_obj_t	timer_ev_obj;
784 
785 	ev_port = port_create();
786 	if (ev_port == -1) {
787 		logperror("main_loop: port_create failed");
788 		exit(-1);
789 	}
790 	ilbd_hc_timer_init(ev_port, &timer_ev_obj);
791 
792 	ev_obj.ev = ILBD_EVENT_NEW_REQ;
793 	if (port_associate(ev_port, PORT_SOURCE_FD, listener, POLLRDNORM,
794 	    &ev_obj) == -1) {
795 		logperror("main_loop: port_associate failed");
796 		exit(1);
797 	}
798 
799 	i_ilbd_read_config(ev_port);
800 	ilbd_hc_timer_update(&timer_ev_obj);
801 
802 	_NOTE(CONSTCOND)
803 	while (B_TRUE) {
804 		int r;
805 		ilbd_event_t event;
806 		ilbd_client_t *cli;
807 
808 		r = port_get(ev_port, &p_ev, NULL);
809 		if (r == -1) {
810 			if (errno == EINTR)
811 				continue;
812 			logperror("main_loop: port_get failed");
813 			break;
814 		}
815 
816 		ev_port_obj = p_ev.portev_object;
817 		event = ((ilbd_event_obj_t *)p_ev.portev_user)->ev;
818 
819 		switch (event) {
820 		case ILBD_EVENT_TIMER:
821 			ilbd_hc_timeout();
822 			break;
823 
824 		case ILBD_EVENT_PROBE:
825 			ilbd_hc_probe_return(ev_port, ev_port_obj,
826 			    p_ev.portev_events,
827 			    (ilbd_hc_probe_event_t *)p_ev.portev_user);
828 			break;
829 
830 		case ILBD_EVENT_NEW_REQ:
831 			assert(ev_port_obj == listener);
832 			/*
833 			 * An error happens in the listener.  Exit
834 			 * for now....
835 			 */
836 			if (p_ev.portev_events & (POLLHUP|POLLERR)) {
837 				logerr("main_loop: listener error");
838 				exit(1);
839 			}
840 			new_req(ev_port, ev_port_obj, &ev_obj);
841 			break;
842 
843 		case ILBD_EVENT_REP_OK:
844 		case ILBD_EVENT_REQ:
845 			cli = (ilbd_client_t *)p_ev.portev_user;
846 			assert(ev_port_obj == cli->cli_sd);
847 
848 			/*
849 			 * An error happens in the newly accepted
850 			 * client request.  Clean up the client.
851 			 * this also happens when client closes socket,
852 			 * so not necessarily a reason for alarm
853 			 */
854 			if (p_ev.portev_events & (POLLHUP|POLLERR)) {
855 				ilbd_free_cli(cli);
856 				break;
857 			}
858 
859 			handle_req(ev_port, event, cli);
860 			break;
861 
862 		default:
863 			logerr("main_loop: unknown event %d", event);
864 			exit(EXIT_FAILURE);
865 			break;
866 		}
867 
868 		ilbd_hc_timer_update(&timer_ev_obj);
869 	}
870 }
871 
872 static void
873 i_ilbd_setup_lists(void)
874 {
875 	i_setup_sg_hlist();
876 	i_setup_rule_hlist();
877 	i_ilbd_setup_hc_list();
878 }
879 
880 /*
881  * Usage message - call only during startup. it will print its
882  * message on stderr and exit
883  */
884 static void
885 Usage(char *name)
886 {
887 	(void) fprintf(stderr, gettext("Usage: %s [-d|--debug]\n"), name);
888 	exit(1);
889 }
890 
891 static void
892 print_version(char *name)
893 {
894 	(void) printf("%s %s\n", basename(name), ILBD_VERSION);
895 	(void) printf(gettext(ILBD_COPYRIGHT));
896 	exit(0);
897 }
898 
899 /*
900  * Increase the file descriptor limit for handling a lot of health check
901  * processes (each requires a pipe).
902  *
903  * Note that this function is called before ilbd becomes a daemon.  So
904  * we call perror(3C) to print out error message directly so that SMF
905  * can catch them.
906  */
907 static void
908 set_rlim(void)
909 {
910 	struct rlimit rlp;
911 
912 	if (getrlimit(RLIMIT_NOFILE, &rlp) == -1) {
913 		perror("ilbd: getrlimit");
914 		exit(errno);
915 	}
916 	rlp.rlim_cur = rlp.rlim_max;
917 	if (setrlimit(RLIMIT_NOFILE, &rlp) == -1) {
918 		perror("ilbd: setrlimit");
919 		exit(errno);
920 	}
921 }
922 
923 int
924 main(int argc, char **argv)
925 {
926 	int	s;
927 	int	c;
928 
929 	(void) setlocale(LC_ALL, "");
930 #if !defined(TEXT_DOMAIN)
931 #define	TEXT_DOMAIN "SYS_TEST"
932 #endif
933 	static const char daemon_dir[] = DAEMON_DIR;
934 
935 	(void) textdomain(TEXT_DOMAIN);
936 
937 	while ((c = getopt(argc, argv, ":V?d(debug)")) != -1) {
938 		switch ((char)c) {
939 		case '?': Usage(argv[0]);
940 			/* not reached */
941 			break;
942 		case 'V': print_version(argv[0]);
943 			/* not reached */
944 			break;
945 		case 'd': ilbd_enable_debug();
946 			break;
947 		default: Usage(argv[0]);
948 			/* not reached */
949 			break;
950 		}
951 	}
952 
953 	/*
954 	 * Whenever the daemon starts, it needs to start with a clean
955 	 * slate in the kernel. We need sys_ip_config privilege for
956 	 * this.
957 	 */
958 	ilbd_reset_kernel_state();
959 
960 	/* Increase the limit on the number of file descriptors. */
961 	set_rlim();
962 
963 	/*
964 	 * ilbd daemon starts off as root, just so it can create
965 	 * /var/run/daemon if one does not exist. After that is done
966 	 * the daemon switches to "daemon" uid. This is similar to what
967 	 * rpcbind does.
968 	 */
969 	if (mkdir(daemon_dir, DAEMON_DIR_MODE) == 0 || errno == EEXIST) {
970 		(void) chmod(daemon_dir, DAEMON_DIR_MODE);
971 		(void) chown(daemon_dir, DAEMON_UID, DAEMON_GID);
972 	} else {
973 		perror("main: mkdir failed");
974 		exit(errno);
975 	}
976 	/*
977 	 * Now lets switch ilbd as uid = daemon, gid = daemon with a
978 	 * trimmed down privilege set
979 	 */
980 	if (__init_daemon_priv(PU_RESETGROUPS | PU_LIMITPRIVS | PU_INHERITPRIVS,
981 	    DAEMON_UID, DAEMON_GID, PRIV_PROC_OWNER, PRIV_PROC_AUDIT,
982 	    PRIV_NET_ICMPACCESS, PRIV_SYS_IP_CONFIG, NULL) == -1) {
983 		(void) fprintf(stderr, "Insufficient privileges\n");
984 		exit(EXIT_FAILURE);
985 	}
986 
987 	/*
988 	 * Opens a PF_UNIX socket to the client. No privilege needed
989 	 * for this.
990 	 */
991 	s = ilbd_create_client_socket();
992 
993 	/*
994 	 * Daemonify if ilbd is not running with -d option
995 	 * Need proc_fork privilege for this
996 	 */
997 	if (!is_debugging_on()) {
998 		logdebug("daemonizing...");
999 		if (daemon(0, 0) != 0) {
1000 			logperror("daemon failed");
1001 			exit(EXIT_FAILURE);
1002 		}
1003 	}
1004 	(void) priv_set(PRIV_OFF, PRIV_INHERITABLE, PRIV_PROC_OWNER,
1005 	    PRIV_PROC_AUDIT, NULL);
1006 
1007 	/* if daemonified then set up syslog */
1008 	if (!is_debugging_on())
1009 		openlog("ilbd", LOG_PID, LOG_DAEMON);
1010 
1011 	i_ilbd_setup_lists();
1012 
1013 	main_loop(s);
1014 
1015 	/*
1016 	 * if we come here, then we experienced an error or a shutdown
1017 	 * indicator, so clean up after ourselves.
1018 	 */
1019 	logdebug("main(): terminating");
1020 
1021 	(void) remove(SOCKET_PATH);
1022 	ilbd_reset_kernel_state();
1023 
1024 	return (0);
1025 }
1026