xref: /illumos-gate/usr/src/cmd/zlogin/zlogin.c (revision bbf21555)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 DEY Storage Systems, Inc.
24  * Copyright (c) 2014 Gary Mills
25  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
26  * Copyright 2019 Joyent, Inc.
27  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
28  */
29 
30 /*
31  * zlogin provides three types of login which allow users in the global
32  * zone to access non-global zones.
33  *
34  * - "interactive login" is similar to rlogin(1); for example, the user could
35  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
36  *   granted a new pty (which is then shoved into the zone), and an I/O
37  *   loop between parent and child processes takes care of the interactive
38  *   session.  In this mode, login(1) (and its -c option, which means
39  *   "already authenticated") is employed to take care of the initialization
40  *   of the user's session.
41  *
42  * - "non-interactive login" is similar to su(8); the user could issue
43  *   'zlogin my-zone ls -l' and the command would be run as specified.
44  *   In this mode, zlogin sets up pipes as the communication channel, and
45  *   'su' is used to do the login setup work.
46  *
47  * - "console login" is the equivalent to accessing the tip line for a
48  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
49  *   In this mode, zlogin contacts the zoneadmd process via unix domain
50  *   socket.  If zoneadmd is not running, it starts it.  This allows the
51  *   console to be available anytime the zone is installed, regardless of
52  *   whether it is running.
53  */
54 
55 #include <sys/socket.h>
56 #include <sys/termios.h>
57 #include <sys/utsname.h>
58 #include <sys/stat.h>
59 #include <sys/types.h>
60 #include <sys/contract/process.h>
61 #include <sys/ctfs.h>
62 #include <sys/brand.h>
63 #include <sys/wait.h>
64 #include <alloca.h>
65 #include <assert.h>
66 #include <ctype.h>
67 #include <paths.h>
68 #include <door.h>
69 #include <errno.h>
70 #include <nss_dbdefs.h>
71 #include <poll.h>
72 #include <priv.h>
73 #include <pwd.h>
74 #include <unistd.h>
75 #include <utmpx.h>
76 #include <sac.h>
77 #include <signal.h>
78 #include <stdarg.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <strings.h>
83 #include <stropts.h>
84 #include <wait.h>
85 #include <zone.h>
86 #include <fcntl.h>
87 #include <libdevinfo.h>
88 #include <libintl.h>
89 #include <locale.h>
90 #include <libzonecfg.h>
91 #include <libcontract.h>
92 #include <libbrand.h>
93 #include <auth_list.h>
94 #include <auth_attr.h>
95 #include <secdb.h>
96 
97 static int masterfd;
98 static struct termios save_termios;
99 static struct termios effective_termios;
100 static int save_fd;
101 static struct winsize winsize;
102 static volatile int dead;
103 static volatile pid_t child_pid = -1;
104 static int interactive = 0;
105 static priv_set_t *dropprivs;
106 
107 static int nocmdchar = 0;
108 static int failsafe = 0;
109 static int disconnect = 0;
110 static char cmdchar = '~';
111 static int quiet = 0;
112 
113 static int pollerr = 0;
114 
115 static const char *pname;
116 static char *username;
117 
118 /*
119  * When forced_login is true, the user is not prompted
120  * for an authentication password in the target zone.
121  */
122 static boolean_t forced_login = B_FALSE;
123 
124 #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
125 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
126 #endif
127 
128 #define	SUPATH	"/usr/bin/su"
129 #define	FAILSAFESHELL	"/sbin/sh"
130 #define	DEFAULTSHELL	"/sbin/sh"
131 #define	DEF_PATH	"/usr/sbin:/usr/bin"
132 
133 #define	CLUSTER_BRAND_NAME	"cluster"
134 
135 /*
136  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
137  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
138  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
139  * also chosen in conjunction with the HI_WATER setting to make sure we
140  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
141  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
142  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
143  * is less than HI_WATER data already in the pipe.
144  */
145 #define	ZLOGIN_BUFSIZ	8192
146 #define	ZLOGIN_RDBUFSIZ	1024
147 #define	HI_WATER	8192
148 
149 /*
150  * See canonify() below.  CANONIFY_LEN is the maximum length that a
151  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
152  */
153 #define	CANONIFY_LEN 5
154 
155 static void
usage(void)156 usage(void)
157 {
158 	(void) fprintf(stderr, gettext("usage: %s [ -dnQCES ] [ -e cmdchar ] "
159 	    "[-l user] zonename [command [args ...] ]\n"), pname);
160 	exit(2);
161 }
162 
163 static const char *
getpname(const char * arg0)164 getpname(const char *arg0)
165 {
166 	const char *p = strrchr(arg0, '/');
167 
168 	if (p == NULL)
169 		p = arg0;
170 	else
171 		p++;
172 
173 	pname = p;
174 	return (p);
175 }
176 
177 static void
zerror(const char * fmt,...)178 zerror(const char *fmt, ...)
179 {
180 	va_list alist;
181 
182 	(void) fprintf(stderr, "%s: ", pname);
183 	va_start(alist, fmt);
184 	(void) vfprintf(stderr, fmt, alist);
185 	va_end(alist);
186 	(void) fprintf(stderr, "\n");
187 }
188 
189 static void
zperror(const char * str)190 zperror(const char *str)
191 {
192 	const char *estr;
193 
194 	if ((estr = strerror(errno)) != NULL)
195 		(void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
196 	else
197 		(void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
198 }
199 
200 /*
201  * The first part of our privilege dropping scheme needs to be called before
202  * fork(), since we must have it for security; we don't want to be surprised
203  * later that we couldn't allocate the privset.
204  */
205 static int
prefork_dropprivs()206 prefork_dropprivs()
207 {
208 	if ((dropprivs = priv_allocset()) == NULL)
209 		return (1);
210 
211 	priv_basicset(dropprivs);
212 	(void) priv_delset(dropprivs, PRIV_PROC_INFO);
213 	(void) priv_delset(dropprivs, PRIV_PROC_FORK);
214 	(void) priv_delset(dropprivs, PRIV_PROC_EXEC);
215 	(void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
216 
217 	/*
218 	 * We need to keep the basic privilege PROC_SESSION and all unknown
219 	 * basic privileges as well as the privileges PROC_ZONE and
220 	 * PROC_OWNER in order to query session information and
221 	 * send signals.
222 	 */
223 	if (interactive == 0) {
224 		(void) priv_addset(dropprivs, PRIV_PROC_ZONE);
225 		(void) priv_addset(dropprivs, PRIV_PROC_OWNER);
226 	} else {
227 		(void) priv_delset(dropprivs, PRIV_PROC_SESSION);
228 	}
229 
230 	return (0);
231 }
232 
233 /*
234  * The second part of the privilege drop.  We are paranoid about being attacked
235  * by the zone, so we drop all privileges.  This should prevent a compromise
236  * which gets us to fork(), exec(), symlink(), etc.
237  */
238 static void
postfork_dropprivs()239 postfork_dropprivs()
240 {
241 	if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
242 		zperror(gettext("Warning: could not set permitted privileges"));
243 	}
244 	if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
245 		zperror(gettext("Warning: could not set limit privileges"));
246 	}
247 	if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
248 		zperror(gettext("Warning: could not set inheritable "
249 		    "privileges"));
250 	}
251 }
252 
253 /*
254  * Create the unix domain socket and call the zoneadmd server; handshake
255  * with it to determine whether it will allow us to connect.
256  */
257 static int
get_console_master(const char * zname)258 get_console_master(const char *zname)
259 {
260 	int sockfd = -1;
261 	struct sockaddr_un servaddr;
262 	char clientid[MAXPATHLEN];
263 	char handshake[MAXPATHLEN], c;
264 	int msglen;
265 	int i = 0, err = 0;
266 
267 	if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
268 		zperror(gettext("could not create socket"));
269 		return (-1);
270 	}
271 
272 	bzero(&servaddr, sizeof (servaddr));
273 	servaddr.sun_family = AF_UNIX;
274 	(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
275 	    "%s/%s.console_sock", ZONES_TMPDIR, zname);
276 
277 	if (connect(sockfd, (struct sockaddr *)&servaddr,
278 	    sizeof (servaddr)) == -1) {
279 		zperror(gettext("Could not connect to zone console"));
280 		goto bad;
281 	}
282 	masterfd = sockfd;
283 
284 	msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s %d\n",
285 	    getpid(), setlocale(LC_MESSAGES, NULL), disconnect);
286 
287 	if (msglen >= sizeof (clientid) || msglen < 0) {
288 		zerror("protocol error");
289 		goto bad;
290 	}
291 
292 	if (write(masterfd, clientid, msglen) != msglen) {
293 		zerror("protocol error");
294 		goto bad;
295 	}
296 
297 	bzero(handshake, sizeof (handshake));
298 
299 	/*
300 	 * Take care not to accumulate more than our fill, and leave room for
301 	 * the NUL at the end.
302 	 */
303 	while ((err = read(masterfd, &c, 1)) == 1) {
304 		if (i >= (sizeof (handshake) - 1))
305 			break;
306 		if (c == '\n')
307 			break;
308 		handshake[i] = c;
309 		i++;
310 	}
311 
312 	/*
313 	 * If something went wrong during the handshake we bail; perhaps
314 	 * the server died off.
315 	 */
316 	if (err == -1) {
317 		zperror(gettext("Could not connect to zone console"));
318 		goto bad;
319 	}
320 
321 	if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
322 		return (0);
323 
324 	zerror(gettext("Console is already in use by process ID %s."),
325 	    handshake);
326 bad:
327 	(void) close(sockfd);
328 	masterfd = -1;
329 	return (-1);
330 }
331 
332 
333 /*
334  * Routines to handle pty creation upon zone entry and to shuttle I/O back
335  * and forth between the two terminals.  We also compute and store the
336  * name of the slave terminal associated with the master side.
337  */
338 static int
get_master_pty()339 get_master_pty()
340 {
341 	if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
342 		zperror(gettext("failed to obtain a pseudo-tty"));
343 		return (-1);
344 	}
345 	if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
346 		zperror(gettext("failed to get terminal settings from stdin"));
347 		return (-1);
348 	}
349 	(void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
350 
351 	return (0);
352 }
353 
354 /*
355  * This is a bit tricky; normally a pts device will belong to the zone it
356  * is granted to.  But in the case of "entering" a zone, we need to establish
357  * the pty before entering the zone so that we can vector I/O to and from it
358  * from the global zone.
359  *
360  * We use the zonept() call to let the ptm driver know what we are up to;
361  * the only other hairy bit is the setting of zoneslavename (which happens
362  * above, in get_master_pty()).
363  */
364 static int
init_slave_pty(zoneid_t zoneid,char * devroot)365 init_slave_pty(zoneid_t zoneid, char *devroot)
366 {
367 	int slavefd = -1;
368 	char *slavename, zoneslavename[MAXPATHLEN];
369 
370 	/*
371 	 * Set slave permissions, zone the pts, then unlock it.
372 	 */
373 	if (grantpt(masterfd) != 0) {
374 		zperror(gettext("grantpt failed"));
375 		return (-1);
376 	}
377 
378 	if (unlockpt(masterfd) != 0) {
379 		zperror(gettext("unlockpt failed"));
380 		return (-1);
381 	}
382 
383 	/*
384 	 * We must open the slave side before zoning this pty; otherwise
385 	 * the kernel would refuse us the open-- zoning a pty makes it
386 	 * inaccessible to the global zone.  Note we are trying to open
387 	 * the device node via the $ZONEROOT/dev path for this pty.
388 	 *
389 	 * Later we'll close the slave out when once we've opened it again
390 	 * from within the target zone.  Blarg.
391 	 */
392 	if ((slavename = ptsname(masterfd)) == NULL) {
393 		zperror(gettext("failed to get name for pseudo-tty"));
394 		return (-1);
395 	}
396 
397 	(void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
398 	    devroot, slavename);
399 
400 	if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
401 		zerror(gettext("failed to open %s: %s"), zoneslavename,
402 		    strerror(errno));
403 		return (-1);
404 	}
405 
406 	/*
407 	 * Push hardware emulation (ptem), line discipline (ldterm),
408 	 * and V7/4BSD/Xenix compatibility (ttcompat) modules.
409 	 */
410 	if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
411 		zperror(gettext("failed to push ptem module"));
412 		if (!failsafe)
413 			goto bad;
414 	}
415 
416 	/*
417 	 * Anchor the stream to prevent malicious I_POPs; we prefer to do
418 	 * this prior to entering the zone so that we can detect any errors
419 	 * early, and so that we can set the anchor from the global zone.
420 	 */
421 	if (ioctl(slavefd, I_ANCHOR) == -1) {
422 		zperror(gettext("failed to set stream anchor"));
423 		if (!failsafe)
424 			goto bad;
425 	}
426 
427 	if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
428 		zperror(gettext("failed to push ldterm module"));
429 		if (!failsafe)
430 			goto bad;
431 	}
432 	if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
433 		zperror(gettext("failed to push ttcompat module"));
434 		if (!failsafe)
435 			goto bad;
436 	}
437 
438 	/*
439 	 * Propagate terminal settings from the external term to the new one.
440 	 */
441 	if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
442 		zperror(gettext("failed to set terminal settings"));
443 		if (!failsafe)
444 			goto bad;
445 	}
446 	(void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
447 
448 	if (zonept(masterfd, zoneid) != 0) {
449 		zperror(gettext("could not set zoneid of pty"));
450 		goto bad;
451 	}
452 
453 	return (slavefd);
454 
455 bad:
456 	(void) close(slavefd);
457 	return (-1);
458 }
459 
460 /*
461  * Place terminal into raw mode.
462  */
463 static int
set_tty_rawmode(int fd)464 set_tty_rawmode(int fd)
465 {
466 	struct termios term;
467 	if (tcgetattr(fd, &term) < 0) {
468 		zperror(gettext("failed to get user terminal settings"));
469 		return (-1);
470 	}
471 
472 	/* Stash for later, so we can revert back to previous mode */
473 	save_termios = term;
474 	save_fd = fd;
475 
476 	/* disable 8->7 bit strip, start/stop, enable any char to restart */
477 	term.c_iflag &= ~(ISTRIP|IXON|IXANY);
478 	/* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
479 	term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
480 	/* disable output post-processing */
481 	term.c_oflag &= ~OPOST;
482 	/* disable canonical mode, signal chars, echo & extended functions */
483 	term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
484 
485 	term.c_cc[VMIN] = 1;    /* byte-at-a-time */
486 	term.c_cc[VTIME] = 0;
487 
488 	if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
489 		zperror(gettext("failed to set user terminal to raw mode"));
490 		return (-1);
491 	}
492 
493 	/*
494 	 * We need to know the value of VEOF so that we can properly process for
495 	 * client-side ~<EOF>.  But we have obliterated VEOF in term,
496 	 * because VMIN overloads the same array slot in non-canonical mode.
497 	 * Stupid @&^%!
498 	 *
499 	 * So here we construct the "effective" termios from the current
500 	 * terminal settings, and the corrected VEOF and VEOL settings.
501 	 */
502 	if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
503 		zperror(gettext("failed to get user terminal settings"));
504 		return (-1);
505 	}
506 	effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
507 	effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
508 
509 	return (0);
510 }
511 
512 /*
513  * Copy terminal window size from our terminal to the pts.
514  */
515 /*ARGSUSED*/
516 static void
sigwinch(int s)517 sigwinch(int s)
518 {
519 	struct winsize ws;
520 
521 	if (ioctl(0, TIOCGWINSZ, &ws) == 0)
522 		(void) ioctl(masterfd, TIOCSWINSZ, &ws);
523 }
524 
525 static volatile int close_on_sig = -1;
526 
527 static void
528 /*ARGSUSED*/
sigcld(int s)529 sigcld(int s)
530 {
531 	int status;
532 	pid_t pid;
533 
534 	/*
535 	 * Peek at the exit status.  If this isn't the process we cared
536 	 * about, then just reap it.
537 	 */
538 	if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
539 		if (pid == child_pid &&
540 		    (WIFEXITED(status) || WIFSIGNALED(status))) {
541 			dead = 1;
542 			if (close_on_sig != -1) {
543 				(void) write(close_on_sig, "a", 1);
544 				(void) close(close_on_sig);
545 				close_on_sig = -1;
546 			}
547 		} else {
548 			(void) waitpid(pid, &status, WNOHANG);
549 		}
550 	}
551 }
552 
553 /*
554  * Some signals (currently, SIGINT) must be forwarded on to the process
555  * group of the child process.
556  */
557 static void
sig_forward(int s)558 sig_forward(int s)
559 {
560 	if (child_pid != -1) {
561 		(void) sigsend(P_PGID, child_pid, s);
562 	}
563 }
564 
565 /*
566  * reset terminal settings for global environment
567  */
568 static void
reset_tty()569 reset_tty()
570 {
571 	(void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
572 }
573 
574 /*
575  * Convert character to printable representation, for display with locally
576  * echoed command characters (like when we need to display ~^D)
577  */
578 static void
canonify(char c,char * cc)579 canonify(char c, char *cc)
580 {
581 	if (isprint(c)) {
582 		cc[0] = c;
583 		cc[1] = '\0';
584 	} else if (c >= 0 && c <= 31) {	/* ^@ through ^_ */
585 		cc[0] = '^';
586 		cc[1] = c + '@';
587 		cc[2] = '\0';
588 	} else {
589 		cc[0] = '\\';
590 		cc[1] = ((c >> 6) & 7) + '0';
591 		cc[2] = ((c >> 3) & 7) + '0';
592 		cc[3] = (c & 7) + '0';
593 		cc[4] = '\0';
594 	}
595 }
596 
597 /*
598  * process_user_input watches the input stream for the escape sequence for
599  * 'quit' (by default, tilde-period).  Because we might be fed just one
600  * keystroke at a time, state associated with the user input (are we at the
601  * beginning of the line?  are we locally echoing the next character?) is
602  * maintained by beginning_of_line and local_echo across calls to the routine.
603  * If the write to outfd fails, we'll try to read from infd in an attempt
604  * to prevent deadlock between the two processes.
605  *
606  * This routine returns -1 when the 'quit' escape sequence has been issued,
607  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
608  */
609 static int
process_user_input(int outfd,int infd)610 process_user_input(int outfd, int infd)
611 {
612 	static boolean_t beginning_of_line = B_TRUE;
613 	static boolean_t local_echo = B_FALSE;
614 	char ibuf[ZLOGIN_BUFSIZ];
615 	int nbytes;
616 	char *buf = ibuf;
617 
618 	nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
619 	if (nbytes == -1 && (errno != EINTR || dead))
620 		return (-1);
621 
622 	if (nbytes == -1)	/* The read was interrupted. */
623 		return (0);
624 
625 	/* 0 read means EOF, close the pipe to the child */
626 	if (nbytes == 0)
627 		return (1);
628 
629 	for (char c = *buf; nbytes > 0; c = *buf, --nbytes) {
630 		buf++;
631 		if (beginning_of_line && !nocmdchar) {
632 			beginning_of_line = B_FALSE;
633 			if (c == cmdchar) {
634 				local_echo = B_TRUE;
635 				continue;
636 			}
637 		} else if (local_echo) {
638 			local_echo = B_FALSE;
639 			if (c == '.' || c == effective_termios.c_cc[VEOF]) {
640 				char cc[CANONIFY_LEN];
641 
642 				canonify(c, cc);
643 				(void) write(STDOUT_FILENO, &cmdchar, 1);
644 				(void) write(STDOUT_FILENO, cc, strlen(cc));
645 				return (-1);
646 			}
647 		}
648 retry:
649 		if (write(outfd, &c, 1) <= 0) {
650 			/*
651 			 * Since the fd we are writing to is opened with
652 			 * O_NONBLOCK it is possible to get EAGAIN if the
653 			 * pipe is full.  One way this could happen is if we
654 			 * are writing a lot of data into the pipe in this loop
655 			 * and the application on the other end is echoing that
656 			 * data back out to its stdout.  The output pipe can
657 			 * fill up since we are stuck here in this loop and not
658 			 * draining the other pipe.  We can try to read some of
659 			 * the data to see if we can drain the pipe so that the
660 			 * application can continue to make progress.  The read
661 			 * is non-blocking so we won't hang here.  We also wait
662 			 * a bit before retrying since there could be other
663 			 * reasons why the pipe is full and we don't want to
664 			 * continuously retry.
665 			 */
666 			if (errno == EAGAIN) {
667 				struct timespec rqtp;
668 				int ln;
669 				char obuf[ZLOGIN_BUFSIZ];
670 
671 				if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
672 					(void) write(STDOUT_FILENO, obuf, ln);
673 
674 				/* sleep for 10 milliseconds */
675 				rqtp.tv_sec = 0;
676 				rqtp.tv_nsec = MSEC2NSEC(10);
677 				(void) nanosleep(&rqtp, NULL);
678 				if (!dead)
679 					goto retry;
680 			}
681 
682 			return (-1);
683 		}
684 		beginning_of_line = (c == '\r' || c == '\n' ||
685 		    c == effective_termios.c_cc[VKILL] ||
686 		    c == effective_termios.c_cc[VEOL] ||
687 		    c == effective_termios.c_cc[VSUSP] ||
688 		    c == effective_termios.c_cc[VINTR]);
689 	}
690 	return (0);
691 }
692 
693 /*
694  * This function prevents deadlock between zlogin and the application in the
695  * zone that it is talking to.  This can happen when we read from zlogin's
696  * stdin and write the data down the pipe to the application.  If the pipe
697  * is full, we'll block in the write.  Because zlogin could be blocked in
698  * the write, it would never read the application's stdout/stderr so the
699  * application can then block on those writes (when the pipe fills up).  If the
700  * the application gets blocked this way, it can never get around to reading
701  * its stdin so that zlogin can unblock from its write.  Once in this state,
702  * the two processes are deadlocked.
703  *
704  * To prevent this, we want to verify that we can write into the pipe before we
705  * read from our stdin.  If the pipe already is pretty full, we bypass the read
706  * for now.  We'll circle back here again after the poll() so that we can
707  * try again.  When this function is called, we already know there is data
708  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
709  * stdin is EOF, and 0 if everything is ok (even though we might not have
710  * read/written any data into the pipe on this iteration).
711  */
712 static int
process_raw_input(int stdin_fd,int appin_fd)713 process_raw_input(int stdin_fd, int appin_fd)
714 {
715 	int cc;
716 	struct stat64 sb;
717 	char ibuf[ZLOGIN_RDBUFSIZ];
718 
719 	/* Check how much data is already in the pipe */
720 	if (fstat64(appin_fd, &sb) == -1) {
721 		perror("stat failed");
722 		return (-1);
723 	}
724 
725 	if (dead)
726 		return (-1);
727 
728 	/*
729 	 * The pipe already has a lot of data in it,  don't write any more
730 	 * right now.
731 	 */
732 	if (sb.st_size >= HI_WATER)
733 		return (0);
734 
735 	cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
736 	if (cc == -1 && (errno != EINTR || dead))
737 		return (-1);
738 
739 	if (cc == -1)	/* The read was interrupted. */
740 		return (0);
741 
742 	/* 0 read means EOF, close the pipe to the child */
743 	if (cc == 0)
744 		return (1);
745 
746 	/*
747 	 * stdin_fd is stdin of the target; so, the thing we'll write the user
748 	 * data *to*.
749 	 */
750 	if (write(stdin_fd, ibuf, cc) == -1)
751 		return (-1);
752 
753 	return (0);
754 }
755 
756 /*
757  * Write the output from the application running in the zone.  We can get
758  * a signal during the write (usually it would be SIGCHLD when the application
759  * has exited) so we loop to make sure we have written all of the data we read.
760  */
761 static int
process_output(int in_fd,int out_fd)762 process_output(int in_fd, int out_fd)
763 {
764 	int wrote = 0;
765 	int cc;
766 	char ibuf[ZLOGIN_BUFSIZ];
767 
768 	cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
769 	if (cc == -1 && (errno != EINTR || dead))
770 		return (-1);
771 	if (cc == 0)
772 		return (-1);	/* EOF */
773 	if (cc == -1)	/* The read was interrupted. */
774 		return (0);
775 
776 	do {
777 		int len;
778 
779 		len = write(out_fd, ibuf + wrote, cc - wrote);
780 		if (len == -1 && errno != EINTR)
781 			return (-1);
782 		if (len != -1)
783 			wrote += len;
784 	} while (wrote < cc);
785 
786 	return (0);
787 }
788 
789 /*
790  * This is the main I/O loop, and is shared across all zlogin modes.
791  * Parameters:
792  *	stdin_fd:  The fd representing 'stdin' for the slave side; input to
793  *		   the zone will be written here.
794  *
795  *	appin_fd:  The fd representing the other end of the 'stdin' pipe (when
796  *		   we're running non-interactive); used in process_raw_input
797  *		   to ensure we don't fill up the application's stdin pipe.
798  *
799  *	stdout_fd: The fd representing 'stdout' for the slave side; output
800  *		   from the zone will arrive here.
801  *
802  *	stderr_fd: The fd representing 'stderr' for the slave side; output
803  *		   from the zone will arrive here.
804  *
805  *	raw_mode:  If TRUE, then no processing (for example, for '~.') will
806  *		   be performed on the input coming from STDIN.
807  *
808  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
809  * mode supplies a stderr).
810  *
811  */
812 static void
doio(int stdin_fd,int appin_fd,int stdout_fd,int stderr_fd,int sig_fd,boolean_t raw_mode)813 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
814     boolean_t raw_mode)
815 {
816 	struct pollfd pollfds[4];
817 	char ibuf[ZLOGIN_BUFSIZ];
818 	int cc, ret;
819 
820 	/* read from stdout of zone and write to stdout of global zone */
821 	pollfds[0].fd = stdout_fd;
822 	pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
823 
824 	/* read from stderr of zone and write to stderr of global zone */
825 	pollfds[1].fd = stderr_fd;
826 	pollfds[1].events = pollfds[0].events;
827 
828 	/* read from stdin of global zone and write to stdin of zone */
829 	pollfds[2].fd = STDIN_FILENO;
830 	pollfds[2].events = pollfds[0].events;
831 
832 	/* read from signalling pipe so we know when child dies */
833 	pollfds[3].fd = sig_fd;
834 	pollfds[3].events = pollfds[0].events;
835 
836 	for (;;) {
837 		pollfds[0].revents = pollfds[1].revents =
838 		    pollfds[2].revents = pollfds[3].revents = 0;
839 
840 		if (dead)
841 			break;
842 
843 		/*
844 		 * There is a race condition here where we can receive the
845 		 * child death signal, set the dead flag, but since we have
846 		 * passed the test above, we would go into poll and hang.
847 		 * To avoid this we use the sig_fd as an additional poll fd.
848 		 * The signal handler writes into the other end of this pipe
849 		 * when the child dies so that the poll will always see that
850 		 * input and proceed.  We just loop around at that point and
851 		 * then notice the dead flag.
852 		 */
853 
854 		ret = poll(pollfds,
855 		    sizeof (pollfds) / sizeof (struct pollfd), -1);
856 
857 		if (ret == -1 && errno != EINTR) {
858 			perror("poll failed");
859 			break;
860 		}
861 
862 		if (errno == EINTR && dead) {
863 			break;
864 		}
865 
866 		/* event from master side stdout */
867 		if (pollfds[0].revents) {
868 			if (pollfds[0].revents &
869 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
870 				if (process_output(stdout_fd, STDOUT_FILENO)
871 				    != 0)
872 					break;
873 			} else {
874 				pollerr = pollfds[0].revents;
875 				break;
876 			}
877 		}
878 
879 		/* event from master side stderr */
880 		if (pollfds[1].revents) {
881 			if (pollfds[1].revents &
882 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
883 				if (process_output(stderr_fd, STDERR_FILENO)
884 				    != 0)
885 					break;
886 			} else {
887 				pollerr = pollfds[1].revents;
888 				break;
889 			}
890 		}
891 
892 		/* event from user STDIN side */
893 		if (pollfds[2].revents) {
894 			if (pollfds[2].revents &
895 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
896 				/*
897 				 * stdin fd is stdin of the target; so,
898 				 * the thing we'll write the user data *to*.
899 				 *
900 				 * Also, unlike on the output side, we
901 				 * close the pipe on a zero-length message.
902 				 */
903 				int res;
904 
905 				if (raw_mode)
906 					res = process_raw_input(stdin_fd,
907 					    appin_fd);
908 				else
909 					res = process_user_input(stdin_fd,
910 					    stdout_fd);
911 
912 				if (res < 0)
913 					break;
914 				if (res > 0) {
915 					/* EOF (close) child's stdin_fd */
916 					pollfds[2].fd = -1;
917 					while ((res = close(stdin_fd)) != 0 &&
918 					    errno == EINTR)
919 						;
920 					if (res != 0)
921 						break;
922 				}
923 
924 			} else if (raw_mode && pollfds[2].revents & POLLHUP) {
925 				/*
926 				 * It's OK to get a POLLHUP on STDIN-- it
927 				 * always happens if you do:
928 				 *
929 				 * echo foo | zlogin <zone> <command>
930 				 *
931 				 * We reset fd to -1 in this case to clear
932 				 * the condition and close the pipe (EOF) to
933 				 * the other side in order to wrap things up.
934 				 */
935 				int res;
936 
937 				pollfds[2].fd = -1;
938 				while ((res = close(stdin_fd)) != 0 &&
939 				    errno == EINTR)
940 					;
941 				if (res != 0)
942 					break;
943 			} else {
944 				pollerr = pollfds[2].revents;
945 				break;
946 			}
947 		}
948 	}
949 
950 	/*
951 	 * We are in the midst of dying, but try to poll with a short
952 	 * timeout to see if we can catch the last bit of I/O from the
953 	 * children.
954 	 */
955 retry:
956 	pollfds[0].revents = pollfds[1].revents = 0;
957 	(void) poll(pollfds, 2, 100);
958 	if (pollfds[0].revents &
959 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
960 		if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
961 			(void) write(STDOUT_FILENO, ibuf, cc);
962 			goto retry;
963 		}
964 	}
965 	if (pollfds[1].revents &
966 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
967 		if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
968 			(void) write(STDERR_FILENO, ibuf, cc);
969 			goto retry;
970 		}
971 	}
972 }
973 
974 /*
975  * Fetch the user_cmd brand hook for getting a user's passwd(5) entry.
976  */
977 static const char *
zone_get_user_cmd(brand_handle_t bh,const char * login,char * user_cmd,size_t len)978 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
979     size_t len)
980 {
981 	bzero(user_cmd, sizeof (user_cmd));
982 	if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
983 		return (NULL);
984 
985 	return (user_cmd);
986 }
987 
988 /* From libc */
989 extern int str2passwd(const char *, int, void *, char *, int);
990 
991 /*
992  * exec() the user_cmd brand hook, and convert the output string to a
993  * struct passwd.  This is to be called after zone_enter().
994  *
995  */
996 static struct passwd *
zone_get_user_pw(const char * user_cmd,struct passwd * pwent,char * pwbuf,int pwbuflen)997 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
998     int pwbuflen)
999 {
1000 	char pwline[NSS_BUFLEN_PASSWD];
1001 	char *cin = NULL;
1002 	FILE *fin;
1003 	int status;
1004 
1005 	assert(getzoneid() != GLOBAL_ZONEID);
1006 
1007 	if ((fin = popen(user_cmd, "r")) == NULL)
1008 		return (NULL);
1009 
1010 	while (cin == NULL && !feof(fin))
1011 		cin = fgets(pwline, sizeof (pwline), fin);
1012 
1013 	if (cin == NULL) {
1014 		(void) pclose(fin);
1015 		return (NULL);
1016 	}
1017 
1018 	status = pclose(fin);
1019 	if (!WIFEXITED(status))
1020 		return (NULL);
1021 	if (WEXITSTATUS(status) != 0)
1022 		return (NULL);
1023 
1024 	if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1025 		return (pwent);
1026 	else
1027 		return (NULL);
1028 }
1029 
1030 static char **
zone_login_cmd(brand_handle_t bh,const char * login)1031 zone_login_cmd(brand_handle_t bh, const char *login)
1032 {
1033 	static char result_buf[ARG_MAX];
1034 	char **new_argv, *ptr, *lasts;
1035 	int n, a;
1036 
1037 	/* Get the login command for the target zone. */
1038 	bzero(result_buf, sizeof (result_buf));
1039 
1040 	if (forced_login) {
1041 		if (brand_get_forcedlogin_cmd(bh, login,
1042 		    result_buf, sizeof (result_buf)) != 0)
1043 			return (NULL);
1044 	} else {
1045 		if (brand_get_login_cmd(bh, login,
1046 		    result_buf, sizeof (result_buf)) != 0)
1047 			return (NULL);
1048 	}
1049 
1050 	/*
1051 	 * We got back a string that we'd like to execute.  But since
1052 	 * we're not doing the execution via a shell we'll need to convert
1053 	 * the exec string to an array of strings.  We'll do that here
1054 	 * but we're going to be very simplistic about it and break stuff
1055 	 * up based on spaces.  We're not even going to support any kind
1056 	 * of quoting or escape characters.  It's truly amazing that
1057 	 * there is no library function in OpenSolaris to do this for us.
1058 	 */
1059 
1060 	/*
1061 	 * Be paranoid.  Since we're deliniating based on spaces make
1062 	 * sure there are no adjacent spaces.
1063 	 */
1064 	if (strstr(result_buf, "  ") != NULL)
1065 		return (NULL);
1066 
1067 	/* Remove any trailing whitespace.  */
1068 	n = strlen(result_buf);
1069 	if (result_buf[n - 1] == ' ')
1070 		result_buf[n - 1] = '\0';
1071 
1072 	/* Count how many elements there are in the exec string. */
1073 	ptr = result_buf;
1074 	for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1075 		;
1076 
1077 	/* Allocate the argv array that we're going to return. */
1078 	if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1079 		return (NULL);
1080 
1081 	/* Tokenize the exec string and return. */
1082 	a = 0;
1083 	new_argv[a++] = result_buf;
1084 	if (n > 2) {
1085 		(void) strtok_r(result_buf, " ", &lasts);
1086 		while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1087 			;
1088 	} else {
1089 		new_argv[a++] = NULL;
1090 	}
1091 	assert(n == a);
1092 	return (new_argv);
1093 }
1094 
1095 /*
1096  * Prepare argv array for exec'd process; if we're passing commands to the
1097  * new process, then use su(8) to do the invocation.  Otherwise, use
1098  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1099  * login that we're coming from another zone, and to disregard its CONSOLE
1100  * checks).
1101  */
1102 static char **
prep_args(brand_handle_t bh,const char * login,char ** argv)1103 prep_args(brand_handle_t bh, const char *login, char **argv)
1104 {
1105 	int argc = 0, a = 0, i, n = -1;
1106 	char **new_argv;
1107 
1108 	if (argv != NULL) {
1109 		size_t subshell_len = 1;
1110 		char *subshell;
1111 
1112 		while (argv[argc] != NULL)
1113 			argc++;
1114 
1115 		for (i = 0; i < argc; i++) {
1116 			subshell_len += strlen(argv[i]) + 1;
1117 		}
1118 		if ((subshell = calloc(1, subshell_len)) == NULL)
1119 			return (NULL);
1120 
1121 		for (i = 0; i < argc; i++) {
1122 			(void) strcat(subshell, argv[i]);
1123 			(void) strcat(subshell, " ");
1124 		}
1125 
1126 		if (failsafe) {
1127 			n = 4;
1128 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1129 				return (NULL);
1130 
1131 			new_argv[a++] = FAILSAFESHELL;
1132 		} else {
1133 			n = 5;
1134 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1135 				return (NULL);
1136 
1137 			new_argv[a++] = SUPATH;
1138 			if (strcmp(login, "root") != 0) {
1139 				new_argv[a++] = "-";
1140 				n++;
1141 			}
1142 			new_argv[a++] = (char *)login;
1143 		}
1144 		new_argv[a++] = "-c";
1145 		new_argv[a++] = subshell;
1146 		new_argv[a++] = NULL;
1147 		assert(a == n);
1148 	} else {
1149 		if (failsafe) {
1150 			n = 2;
1151 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1152 				return (NULL);
1153 			new_argv[a++] = FAILSAFESHELL;
1154 			new_argv[a++] = NULL;
1155 			assert(n == a);
1156 		} else {
1157 			new_argv = zone_login_cmd(bh, login);
1158 		}
1159 	}
1160 
1161 	return (new_argv);
1162 }
1163 
1164 /*
1165  * Helper routine for prep_env below.
1166  */
1167 static char *
add_env(char * name,char * value)1168 add_env(char *name, char *value)
1169 {
1170 	size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1171 	char *str;
1172 
1173 	if ((str = malloc(sz)) == NULL)
1174 		return (NULL);
1175 
1176 	(void) snprintf(str, sz, "%s=%s", name, value);
1177 	return (str);
1178 }
1179 
1180 /*
1181  * Prepare envp array for exec'd process.
1182  */
1183 static char **
prep_env()1184 prep_env()
1185 {
1186 	int e = 0, size = 1;
1187 	char **new_env, *estr;
1188 	char *term = getenv("TERM");
1189 
1190 	size++;	/* for $PATH */
1191 	if (term != NULL)
1192 		size++;
1193 
1194 	/*
1195 	 * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1196 	 * We also set $SHELL, since neither login nor su will be around to do
1197 	 * it.
1198 	 */
1199 	if (failsafe)
1200 		size += 2;
1201 
1202 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1203 		return (NULL);
1204 
1205 	if ((estr = add_env("PATH", DEF_PATH)) == NULL)
1206 		return (NULL);
1207 	new_env[e++] = estr;
1208 
1209 	if (term != NULL) {
1210 		if ((estr = add_env("TERM", term)) == NULL)
1211 			return (NULL);
1212 		new_env[e++] = estr;
1213 	}
1214 
1215 	if (failsafe) {
1216 		if ((estr = add_env("HOME", "/")) == NULL)
1217 			return (NULL);
1218 		new_env[e++] = estr;
1219 
1220 		if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1221 			return (NULL);
1222 		new_env[e++] = estr;
1223 	}
1224 
1225 	new_env[e++] = NULL;
1226 
1227 	assert(e == size);
1228 
1229 	return (new_env);
1230 }
1231 
1232 /*
1233  * Finish the preparation of the envp array for exec'd non-interactive
1234  * zlogins.  This is called in the child process *after* we zone_enter(), since
1235  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1236  * etc.  We need only do this in the non-interactive, mode, since otherwise
1237  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1238  * additional ways in which the command could fail, and we'd prefer to avoid
1239  * that.
1240  */
1241 static char **
prep_env_noninteractive(const char * user_cmd,char ** env)1242 prep_env_noninteractive(const char *user_cmd, char **env)
1243 {
1244 	size_t size;
1245 	char **new_env;
1246 	int e, i;
1247 	char *estr;
1248 	char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1249 	char pwbuf[NSS_BUFLEN_PASSWD + 1];
1250 	struct passwd pwent;
1251 	struct passwd *pw = NULL;
1252 
1253 	assert(env != NULL);
1254 	assert(failsafe == 0);
1255 
1256 	/*
1257 	 * Exec the "user_cmd" brand hook to get a pwent for the
1258 	 * login user.  If this fails, HOME will be set to "/", SHELL
1259 	 * will be set to $DEFAULTSHELL, and we will continue to exec
1260 	 * SUPATH <login> -c <cmd>.
1261 	 */
1262 	pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1263 
1264 	/*
1265 	 * Get existing envp size.
1266 	 */
1267 	for (size = 0; env[size] != NULL; size++)
1268 		;
1269 
1270 	e = size;
1271 
1272 	/*
1273 	 * Finish filling out the environment; we duplicate the environment
1274 	 * setup described in login(1), for lack of a better precedent.
1275 	 */
1276 	if (pw != NULL)
1277 		size += 3;	/* LOGNAME, HOME, MAIL */
1278 	else
1279 		size += 1;	/* HOME */
1280 
1281 	size++;	/* always fill in SHELL */
1282 	size++; /* terminating NULL */
1283 
1284 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1285 		goto malloc_fail;
1286 
1287 	/*
1288 	 * Copy existing elements of env into new_env.
1289 	 */
1290 	for (i = 0; env[i] != NULL; i++) {
1291 		if ((new_env[i] = strdup(env[i])) == NULL)
1292 			goto malloc_fail;
1293 	}
1294 	assert(e == i);
1295 
1296 	if (pw != NULL) {
1297 		if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1298 			goto malloc_fail;
1299 		new_env[e++] = estr;
1300 
1301 		if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1302 			goto malloc_fail;
1303 		new_env[e++] = estr;
1304 
1305 		if (chdir(pw->pw_dir) != 0)
1306 			zerror(gettext("Could not chdir to home directory "
1307 			    "%s: %s"), pw->pw_dir, strerror(errno));
1308 
1309 		(void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1310 		    pw->pw_name);
1311 		if ((estr = add_env("MAIL", varmail)) == NULL)
1312 			goto malloc_fail;
1313 		new_env[e++] = estr;
1314 	} else {
1315 		if ((estr = add_env("HOME", "/")) == NULL)
1316 			goto malloc_fail;
1317 		new_env[e++] = estr;
1318 	}
1319 
1320 	if (pw != NULL && strlen(pw->pw_shell) > 0) {
1321 		if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1322 			goto malloc_fail;
1323 		new_env[e++] = estr;
1324 	} else {
1325 		if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1326 			goto malloc_fail;
1327 		new_env[e++] = estr;
1328 	}
1329 
1330 	new_env[e++] = NULL;	/* add terminating NULL */
1331 
1332 	assert(e == size);
1333 	return (new_env);
1334 
1335 malloc_fail:
1336 	zperror(gettext("failed to allocate memory for process environment"));
1337 	return (NULL);
1338 }
1339 
1340 static int
close_func(void * slavefd,int fd)1341 close_func(void *slavefd, int fd)
1342 {
1343 	if (fd != *(int *)slavefd)
1344 		(void) close(fd);
1345 	return (0);
1346 }
1347 
1348 static void
set_cmdchar(char * cmdcharstr)1349 set_cmdchar(char *cmdcharstr)
1350 {
1351 	char c;
1352 	long lc;
1353 
1354 	if ((c = *cmdcharstr) != '\\') {
1355 		cmdchar = c;
1356 		return;
1357 	}
1358 
1359 	c = cmdcharstr[1];
1360 	if (c == '\0' || c == '\\') {
1361 		cmdchar = '\\';
1362 		return;
1363 	}
1364 
1365 	if (c < '0' || c > '7') {
1366 		zerror(gettext("Unrecognized escape character option %s"),
1367 		    cmdcharstr);
1368 		usage();
1369 	}
1370 
1371 	lc = strtol(cmdcharstr + 1, NULL, 8);
1372 	if (lc < 0 || lc > 255) {
1373 		zerror(gettext("Octal escape character '%s' too large"),
1374 		    cmdcharstr);
1375 		usage();
1376 	}
1377 	cmdchar = (char)lc;
1378 }
1379 
1380 static int
setup_utmpx(char * slavename)1381 setup_utmpx(char *slavename)
1382 {
1383 	struct utmpx ut;
1384 
1385 	bzero(&ut, sizeof (ut));
1386 	(void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1387 	(void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1388 	ut.ut_pid = getpid();
1389 	ut.ut_id[0] = 'z';
1390 	ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1391 	ut.ut_type = LOGIN_PROCESS;
1392 	(void) time(&ut.ut_tv.tv_sec);
1393 
1394 	if (makeutx(&ut) == NULL) {
1395 		zerror(gettext("makeutx failed"));
1396 		return (-1);
1397 	}
1398 	return (0);
1399 }
1400 
1401 static void
release_lock_file(int lockfd)1402 release_lock_file(int lockfd)
1403 {
1404 	(void) close(lockfd);
1405 }
1406 
1407 static int
grab_lock_file(const char * zone_name,int * lockfd)1408 grab_lock_file(const char *zone_name, int *lockfd)
1409 {
1410 	char pathbuf[PATH_MAX];
1411 	struct flock flock;
1412 
1413 	if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1414 		zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1415 		    strerror(errno));
1416 		return (-1);
1417 	}
1418 	(void) chmod(ZONES_TMPDIR, S_IRWXU);
1419 	(void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1420 	    ZONES_TMPDIR, zone_name);
1421 
1422 	if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1423 		zerror(gettext("could not open %s: %s"), pathbuf,
1424 		    strerror(errno));
1425 		return (-1);
1426 	}
1427 	/*
1428 	 * Lock the file to synchronize with other zoneadmds
1429 	 */
1430 	flock.l_type = F_WRLCK;
1431 	flock.l_whence = SEEK_SET;
1432 	flock.l_start = (off_t)0;
1433 	flock.l_len = (off_t)0;
1434 	if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1435 		zerror(gettext("unable to lock %s: %s"), pathbuf,
1436 		    strerror(errno));
1437 		release_lock_file(*lockfd);
1438 		return (-1);
1439 	}
1440 	return (Z_OK);
1441 }
1442 
1443 static int
start_zoneadmd(const char * zone_name)1444 start_zoneadmd(const char *zone_name)
1445 {
1446 	pid_t retval;
1447 	int pstatus = 0, error = -1, lockfd, doorfd;
1448 	struct door_info info;
1449 	char doorpath[MAXPATHLEN];
1450 
1451 	(void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1452 
1453 	if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1454 		return (-1);
1455 	/*
1456 	 * We must do the door check with the lock held.  Otherwise, we
1457 	 * might race against another zoneadm/zlogin process and wind
1458 	 * up with two processes trying to start zoneadmd at the same
1459 	 * time.  zoneadmd will detect this, and fail, but we prefer this
1460 	 * to be as seamless as is practical, from a user perspective.
1461 	 */
1462 	if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1463 		if (errno != ENOENT) {
1464 			zerror("failed to open %s: %s", doorpath,
1465 			    strerror(errno));
1466 			goto out;
1467 		}
1468 	} else {
1469 		/*
1470 		 * Seems to be working ok.
1471 		 */
1472 		if (door_info(doorfd, &info) == 0 &&
1473 		    ((info.di_attributes & DOOR_REVOKED) == 0)) {
1474 			error = 0;
1475 			goto out;
1476 		}
1477 	}
1478 
1479 	if ((child_pid = fork()) == -1) {
1480 		zperror(gettext("could not fork"));
1481 		goto out;
1482 	} else if (child_pid == 0) {
1483 		/* child process */
1484 		(void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1485 		    zone_name, NULL);
1486 		zperror(gettext("could not exec zoneadmd"));
1487 		_exit(1);
1488 	}
1489 
1490 	/* parent process */
1491 	do {
1492 		retval = waitpid(child_pid, &pstatus, 0);
1493 	} while (retval != child_pid);
1494 	if (WIFSIGNALED(pstatus) ||
1495 	    (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1496 		zerror(gettext("could not start %s"), "zoneadmd");
1497 		goto out;
1498 	}
1499 	error = 0;
1500 out:
1501 	release_lock_file(lockfd);
1502 	(void) close(doorfd);
1503 	return (error);
1504 }
1505 
1506 static int
init_template(void)1507 init_template(void)
1508 {
1509 	int fd;
1510 	int err = 0;
1511 
1512 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1513 	if (fd == -1)
1514 		return (-1);
1515 
1516 	/*
1517 	 * zlogin doesn't do anything with the contract.
1518 	 * Deliver no events, don't inherit, and allow it to be orphaned.
1519 	 */
1520 	err |= ct_tmpl_set_critical(fd, 0);
1521 	err |= ct_tmpl_set_informative(fd, 0);
1522 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1523 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1524 	if (err || ct_tmpl_activate(fd)) {
1525 		(void) close(fd);
1526 		return (-1);
1527 	}
1528 
1529 	return (fd);
1530 }
1531 
1532 static int
noninteractive_login(char * zonename,const char * user_cmd,zoneid_t zoneid,char ** new_args,char ** new_env)1533 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1534     char **new_args, char **new_env)
1535 {
1536 	pid_t retval;
1537 	int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1538 	int child_status;
1539 	int tmpl_fd;
1540 	sigset_t block_cld;
1541 
1542 	if ((tmpl_fd = init_template()) == -1) {
1543 		reset_tty();
1544 		zperror(gettext("could not create contract"));
1545 		return (1);
1546 	}
1547 
1548 	if (pipe(stdin_pipe) != 0) {
1549 		zperror(gettext("could not create STDIN pipe"));
1550 		return (1);
1551 	}
1552 	/*
1553 	 * When the user types ^D, we get a zero length message on STDIN.
1554 	 * We need to echo that down the pipe to send it to the other side;
1555 	 * but by default, pipes don't propagate zero-length messages.  We
1556 	 * toggle that behavior off using I_SWROPT.  See streamio(4I).
1557 	 */
1558 	if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1559 		zperror(gettext("could not configure STDIN pipe"));
1560 		return (1);
1561 
1562 	}
1563 	if (pipe(stdout_pipe) != 0) {
1564 		zperror(gettext("could not create STDOUT pipe"));
1565 		return (1);
1566 	}
1567 	if (pipe(stderr_pipe) != 0) {
1568 		zperror(gettext("could not create STDERR pipe"));
1569 		return (1);
1570 	}
1571 
1572 	if (pipe(dead_child_pipe) != 0) {
1573 		zperror(gettext("could not create signalling pipe"));
1574 		return (1);
1575 	}
1576 	close_on_sig = dead_child_pipe[0];
1577 
1578 	/*
1579 	 * If any of the pipe FD's winds up being less than STDERR, then we
1580 	 * have a mess on our hands-- and we are lacking some of the I/O
1581 	 * streams we would expect anyway.  So we bail.
1582 	 */
1583 	if (stdin_pipe[0] <= STDERR_FILENO ||
1584 	    stdin_pipe[1] <= STDERR_FILENO ||
1585 	    stdout_pipe[0] <= STDERR_FILENO ||
1586 	    stdout_pipe[1] <= STDERR_FILENO ||
1587 	    stderr_pipe[0] <= STDERR_FILENO ||
1588 	    stderr_pipe[1] <= STDERR_FILENO ||
1589 	    dead_child_pipe[0] <= STDERR_FILENO ||
1590 	    dead_child_pipe[1] <= STDERR_FILENO) {
1591 		zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1592 		return (1);
1593 	}
1594 
1595 	if (prefork_dropprivs() != 0) {
1596 		zperror(gettext("could not allocate privilege set"));
1597 		return (1);
1598 	}
1599 
1600 	(void) sigset(SIGCLD, sigcld);
1601 	(void) sigemptyset(&block_cld);
1602 	(void) sigaddset(&block_cld, SIGCLD);
1603 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1604 
1605 	if ((child_pid = fork()) == -1) {
1606 		(void) ct_tmpl_clear(tmpl_fd);
1607 		(void) close(tmpl_fd);
1608 		zperror(gettext("could not fork"));
1609 		return (1);
1610 	} else if (child_pid == 0) { /* child process */
1611 		(void) ct_tmpl_clear(tmpl_fd);
1612 
1613 		/*
1614 		 * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1615 		 */
1616 		(void) close(STDIN_FILENO);
1617 		(void) close(STDOUT_FILENO);
1618 		(void) close(STDERR_FILENO);
1619 		(void) dup2(stdin_pipe[1], STDIN_FILENO);
1620 		(void) dup2(stdout_pipe[1], STDOUT_FILENO);
1621 		(void) dup2(stderr_pipe[1], STDERR_FILENO);
1622 		(void) closefrom(STDERR_FILENO + 1);
1623 
1624 		(void) sigset(SIGCLD, SIG_DFL);
1625 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1626 		/*
1627 		 * In case any of stdin, stdout or stderr are streams,
1628 		 * anchor them to prevent malicious I_POPs.
1629 		 */
1630 		(void) ioctl(STDIN_FILENO, I_ANCHOR);
1631 		(void) ioctl(STDOUT_FILENO, I_ANCHOR);
1632 		(void) ioctl(STDERR_FILENO, I_ANCHOR);
1633 
1634 		if (zone_enter(zoneid) == -1) {
1635 			zerror(gettext("could not enter zone %s: %s"),
1636 			    zonename, strerror(errno));
1637 			_exit(1);
1638 		}
1639 
1640 		/*
1641 		 * For non-native zones, tell libc where it can find locale
1642 		 * specific getttext() messages.
1643 		 */
1644 		if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1645 			(void) bindtextdomain(TEXT_DOMAIN,
1646 			    "/.SUNWnative/usr/lib/locale");
1647 		else if (access("/native/usr/lib/locale", R_OK) == 0)
1648 			(void) bindtextdomain(TEXT_DOMAIN,
1649 			    "/native/usr/lib/locale");
1650 
1651 		if (!failsafe)
1652 			new_env = prep_env_noninteractive(user_cmd, new_env);
1653 
1654 		if (new_env == NULL) {
1655 			_exit(1);
1656 		}
1657 
1658 		/*
1659 		 * Move into a new process group; the zone_enter will have
1660 		 * placed us into zsched's session, and we want to be in
1661 		 * a unique process group.
1662 		 */
1663 		(void) setpgid(getpid(), getpid());
1664 
1665 		/*
1666 		 * The child needs to run as root to
1667 		 * execute the su program.
1668 		 */
1669 		if (setuid(0) == -1) {
1670 			zperror(gettext("insufficient privilege"));
1671 			return (1);
1672 		}
1673 
1674 		(void) execve(new_args[0], new_args, new_env);
1675 		zperror(gettext("exec failure"));
1676 		_exit(1);
1677 	}
1678 	/* parent */
1679 
1680 	/* close pipe sides written by child */
1681 	(void) close(stdout_pipe[1]);
1682 	(void) close(stderr_pipe[1]);
1683 
1684 	(void) sigset(SIGINT, sig_forward);
1685 
1686 	postfork_dropprivs();
1687 
1688 	(void) ct_tmpl_clear(tmpl_fd);
1689 	(void) close(tmpl_fd);
1690 
1691 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1692 	doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1693 	    dead_child_pipe[1], B_TRUE);
1694 	do {
1695 		retval = waitpid(child_pid, &child_status, 0);
1696 		if (retval == -1) {
1697 			child_status = 0;
1698 		}
1699 	} while (retval != child_pid && errno != ECHILD);
1700 
1701 	return (WEXITSTATUS(child_status));
1702 }
1703 
1704 static char *
get_username()1705 get_username()
1706 {
1707 	uid_t	uid;
1708 	struct passwd *nptr;
1709 
1710 	/*
1711 	 * Authorizations are checked to restrict access based on the
1712 	 * requested operation and zone name, It is assumed that the
1713 	 * program is running with all privileges, but that the real
1714 	 * user ID is that of the user or role on whose behalf we are
1715 	 * operating. So we start by getting the username that will be
1716 	 * used for subsequent authorization checks.
1717 	 */
1718 
1719 	uid = getuid();
1720 	if ((nptr = getpwuid(uid)) == NULL) {
1721 		zerror(gettext("could not get user name."));
1722 		_exit(1);
1723 	}
1724 	return (nptr->pw_name);
1725 }
1726 
1727 int
main(int argc,char ** argv)1728 main(int argc, char **argv)
1729 {
1730 	int arg, console = 0;
1731 	zoneid_t zoneid;
1732 	zone_state_t st;
1733 	char *login = "root";
1734 	int lflag = 0;
1735 	int nflag = 0;
1736 	char *zonename = NULL;
1737 	char **proc_args = NULL;
1738 	char **new_args, **new_env;
1739 	sigset_t block_cld;
1740 	char devroot[MAXPATHLEN];
1741 	char *slavename, slaveshortname[MAXPATHLEN];
1742 	priv_set_t *privset;
1743 	int tmpl_fd;
1744 	char zonebrand[MAXNAMELEN];
1745 	char default_brand[MAXNAMELEN];
1746 	struct stat sb;
1747 	char kernzone[ZONENAME_MAX];
1748 	brand_handle_t bh;
1749 	char user_cmd[MAXPATHLEN];
1750 	char authname[MAXAUTHS];
1751 
1752 	(void) setlocale(LC_ALL, "");
1753 	(void) textdomain(TEXT_DOMAIN);
1754 
1755 	(void) getpname(argv[0]);
1756 	username = get_username();
1757 
1758 	while ((arg = getopt(argc, argv, "dnECR:Se:l:Q")) != EOF) {
1759 		switch (arg) {
1760 		case 'C':
1761 			console = 1;
1762 			break;
1763 		case 'E':
1764 			nocmdchar = 1;
1765 			break;
1766 		case 'R':	/* undocumented */
1767 			if (*optarg != '/') {
1768 				zerror(gettext("root path must be absolute."));
1769 				exit(2);
1770 			}
1771 			if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1772 				zerror(
1773 				    gettext("root path must be a directory."));
1774 				exit(2);
1775 			}
1776 			zonecfg_set_root(optarg);
1777 			break;
1778 		case 'Q':
1779 			quiet = 1;
1780 			break;
1781 		case 'S':
1782 			failsafe = 1;
1783 			break;
1784 		case 'd':
1785 			disconnect = 1;
1786 			break;
1787 		case 'e':
1788 			set_cmdchar(optarg);
1789 			break;
1790 		case 'l':
1791 			login = optarg;
1792 			lflag = 1;
1793 			break;
1794 		case 'n':
1795 			nflag = 1;
1796 			break;
1797 		default:
1798 			usage();
1799 		}
1800 	}
1801 
1802 	if (console != 0) {
1803 
1804 		if (lflag != 0) {
1805 			zerror(gettext(
1806 			    "-l may not be specified for console login"));
1807 			usage();
1808 		}
1809 
1810 		if (nflag != 0) {
1811 			zerror(gettext(
1812 			    "-n may not be specified for console login"));
1813 			usage();
1814 		}
1815 
1816 		if (failsafe != 0) {
1817 			zerror(gettext(
1818 			    "-S may not be specified for console login"));
1819 			usage();
1820 		}
1821 
1822 		if (zonecfg_in_alt_root()) {
1823 			zerror(gettext(
1824 			    "-R may not be specified for console login"));
1825 			exit(2);
1826 		}
1827 
1828 	}
1829 
1830 	if (failsafe != 0 && lflag != 0) {
1831 		zerror(gettext("-l may not be specified for failsafe login"));
1832 		usage();
1833 	}
1834 
1835 	if (!console && disconnect != 0) {
1836 		zerror(gettext(
1837 		    "-d may only be specified with console login"));
1838 		usage();
1839 	}
1840 
1841 	if (optind == (argc - 1)) {
1842 		/*
1843 		 * zone name, no process name; this should be an interactive
1844 		 * as long as STDIN is really a tty.
1845 		 */
1846 		if (nflag != 0) {
1847 			zerror(gettext(
1848 			    "-n may not be specified for interactive login"));
1849 			usage();
1850 		}
1851 		if (isatty(STDIN_FILENO))
1852 			interactive = 1;
1853 		zonename = argv[optind];
1854 	} else if (optind < (argc - 1)) {
1855 		if (console) {
1856 			zerror(gettext("Commands may not be specified for "
1857 			    "console login."));
1858 			usage();
1859 		}
1860 		/* zone name and process name, and possibly some args */
1861 		zonename = argv[optind];
1862 		proc_args = &argv[optind + 1];
1863 		interactive = 0;
1864 	} else {
1865 		usage();
1866 	}
1867 
1868 	if (getzoneid() != GLOBAL_ZONEID) {
1869 		zerror(gettext("'%s' may only be used from the global zone"),
1870 		    pname);
1871 		return (1);
1872 	}
1873 
1874 	if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1875 		zerror(gettext("'%s' not applicable to the global zone"),
1876 		    pname);
1877 		return (1);
1878 	}
1879 
1880 	if (zone_get_state(zonename, &st) != Z_OK) {
1881 		zerror(gettext("zone '%s' unknown"), zonename);
1882 		return (1);
1883 	}
1884 
1885 	if (st < ZONE_STATE_INSTALLED) {
1886 		zerror(gettext("cannot login to a zone which is '%s'"),
1887 		    zone_state_str(st));
1888 		return (1);
1889 	}
1890 
1891 	/*
1892 	 * In both console and non-console cases, we require all privs.
1893 	 * In the console case, because we may need to startup zoneadmd.
1894 	 * In the non-console case in order to do zone_enter(2), zonept()
1895 	 * and other tasks.
1896 	 */
1897 
1898 	if ((privset = priv_allocset()) == NULL) {
1899 		zperror(gettext("priv_allocset failed"));
1900 		return (1);
1901 	}
1902 
1903 	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1904 		zperror(gettext("getppriv failed"));
1905 		priv_freeset(privset);
1906 		return (1);
1907 	}
1908 
1909 	if (priv_isfullset(privset) == B_FALSE) {
1910 		zerror(gettext("You lack sufficient privilege to run "
1911 		    "this command (all privs required)"));
1912 		priv_freeset(privset);
1913 		return (1);
1914 	}
1915 	priv_freeset(privset);
1916 
1917 	/*
1918 	 * Check if user is authorized for requested usage of the zone
1919 	 */
1920 
1921 	(void) snprintf(authname, MAXAUTHS, "%s%s%s",
1922 	    ZONE_MANAGE_AUTH, KV_OBJECT, zonename);
1923 	if (chkauthattr(authname, username) == 0) {
1924 		if (console) {
1925 			zerror(gettext("%s is not authorized for console "
1926 			    "access to  %s zone."),
1927 			    username, zonename);
1928 			return (1);
1929 		} else {
1930 			(void) snprintf(authname, MAXAUTHS, "%s%s%s",
1931 			    ZONE_LOGIN_AUTH, KV_OBJECT, zonename);
1932 			if (failsafe || !interactive) {
1933 				zerror(gettext("%s is not authorized for  "
1934 				    "failsafe or non-interactive login "
1935 				    "to  %s zone."), username, zonename);
1936 				return (1);
1937 			} else if (chkauthattr(authname, username) == 0) {
1938 				zerror(gettext("%s is not authorized "
1939 				    " to login to %s zone."),
1940 				    username, zonename);
1941 				return (1);
1942 			}
1943 		}
1944 	} else {
1945 		forced_login = B_TRUE;
1946 	}
1947 
1948 	/*
1949 	 * The console is a separate case from the rest of the code; handle
1950 	 * it first.
1951 	 */
1952 	if (console) {
1953 		/*
1954 		 * Ensure that zoneadmd for this zone is running.
1955 		 */
1956 		if (start_zoneadmd(zonename) == -1)
1957 			return (1);
1958 
1959 		/*
1960 		 * Make contact with zoneadmd.
1961 		 */
1962 		if (get_console_master(zonename) == -1)
1963 			return (1);
1964 
1965 		if (!quiet)
1966 			(void) printf(
1967 			    gettext("[Connected to zone '%s' console]\n"),
1968 			    zonename);
1969 
1970 		if (set_tty_rawmode(STDIN_FILENO) == -1) {
1971 			reset_tty();
1972 			zperror(gettext("failed to set stdin pty to raw mode"));
1973 			return (1);
1974 		}
1975 
1976 		(void) sigset(SIGWINCH, sigwinch);
1977 		(void) sigwinch(0);
1978 
1979 		/*
1980 		 * Run the I/O loop until we get disconnected.
1981 		 */
1982 		doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
1983 		reset_tty();
1984 		if (!quiet)
1985 			(void) printf(
1986 			    gettext("\n[Connection to zone '%s' console "
1987 			    "closed]\n"), zonename);
1988 
1989 		return (0);
1990 	}
1991 
1992 	if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1993 		zerror(gettext("login allowed only to running zones "
1994 		    "(%s is '%s')."), zonename, zone_state_str(st));
1995 		return (1);
1996 	}
1997 
1998 	(void) strlcpy(kernzone, zonename, sizeof (kernzone));
1999 	if (zonecfg_in_alt_root()) {
2000 		FILE *fp = zonecfg_open_scratch("", B_FALSE);
2001 
2002 		if (fp == NULL || zonecfg_find_scratch(fp, zonename,
2003 		    zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
2004 			zerror(gettext("cannot find scratch zone %s"),
2005 			    zonename);
2006 			if (fp != NULL)
2007 				zonecfg_close_scratch(fp);
2008 			return (1);
2009 		}
2010 		zonecfg_close_scratch(fp);
2011 	}
2012 
2013 	if ((zoneid = getzoneidbyname(kernzone)) == -1) {
2014 		zerror(gettext("failed to get zoneid for zone '%s'"),
2015 		    zonename);
2016 		return (1);
2017 	}
2018 
2019 	/*
2020 	 * We need the zone root path only if we are setting up a pty.
2021 	 */
2022 	if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
2023 		zerror(gettext("could not get dev path for zone %s"),
2024 		    zonename);
2025 		return (1);
2026 	}
2027 
2028 	if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
2029 		zerror(gettext("could not get brand for zone %s"), zonename);
2030 		return (1);
2031 	}
2032 	/*
2033 	 * In the alternate root environment, the only supported
2034 	 * operations are mount and unmount.  In this case, just treat
2035 	 * the zone as native if it is cluster.  Cluster zones can be
2036 	 * native for the purpose of LU or upgrade, and the cluster
2037 	 * brand may not exist in the miniroot (such as in net install
2038 	 * upgrade).
2039 	 */
2040 	if (zonecfg_default_brand(default_brand,
2041 	    sizeof (default_brand)) != Z_OK) {
2042 		zerror(gettext("unable to determine default brand"));
2043 		return (1);
2044 	}
2045 	if (zonecfg_in_alt_root() &&
2046 	    strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
2047 		(void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
2048 	}
2049 
2050 	if ((bh = brand_open(zonebrand)) == NULL) {
2051 		zerror(gettext("could not open brand for zone %s"), zonename);
2052 		return (1);
2053 	}
2054 
2055 	if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
2056 		zperror(gettext("could not assemble new arguments"));
2057 		brand_close(bh);
2058 		return (1);
2059 	}
2060 	/*
2061 	 * Get the brand specific user_cmd.  This command is used to get
2062 	 * a passwd(5) entry for login.
2063 	 */
2064 	if (!interactive && !failsafe) {
2065 		if (zone_get_user_cmd(bh, login, user_cmd,
2066 		    sizeof (user_cmd)) == NULL) {
2067 			zerror(gettext("could not get user_cmd for zone %s"),
2068 			    zonename);
2069 			brand_close(bh);
2070 			return (1);
2071 		}
2072 	}
2073 	brand_close(bh);
2074 
2075 	if ((new_env = prep_env()) == NULL) {
2076 		zperror(gettext("could not assemble new environment"));
2077 		return (1);
2078 	}
2079 
2080 	if (!interactive) {
2081 		if (nflag) {
2082 			int nfd;
2083 
2084 			if ((nfd = open(_PATH_DEVNULL, O_RDONLY)) < 0) {
2085 				zperror(gettext("failed to open null device"));
2086 				return (1);
2087 			}
2088 			if (nfd != STDIN_FILENO) {
2089 				if (dup2(nfd, STDIN_FILENO) < 0) {
2090 					zperror(gettext(
2091 					    "failed to dup2 null device"));
2092 					return (1);
2093 				}
2094 				(void) close(nfd);
2095 			}
2096 			/* /dev/null is now standard input */
2097 		}
2098 		return (noninteractive_login(zonename, user_cmd, zoneid,
2099 		    new_args, new_env));
2100 	}
2101 
2102 	if (zonecfg_in_alt_root()) {
2103 		zerror(gettext("cannot use interactive login with scratch "
2104 		    "zone"));
2105 		return (1);
2106 	}
2107 
2108 	/*
2109 	 * Things are more complex in interactive mode; we get the
2110 	 * master side of the pty, then place the user's terminal into
2111 	 * raw mode.
2112 	 */
2113 	if (get_master_pty() == -1) {
2114 		zerror(gettext("could not setup master pty device"));
2115 		return (1);
2116 	}
2117 
2118 	/*
2119 	 * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
2120 	 */
2121 	if ((slavename = ptsname(masterfd)) == NULL) {
2122 		zperror(gettext("failed to get name for pseudo-tty"));
2123 		return (1);
2124 	}
2125 	if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
2126 		(void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
2127 		    sizeof (slaveshortname));
2128 	else
2129 		(void) strlcpy(slaveshortname, slavename,
2130 		    sizeof (slaveshortname));
2131 
2132 	if (!quiet)
2133 		(void) printf(gettext("[Connected to zone '%s' %s]\n"),
2134 		    zonename, slaveshortname);
2135 
2136 	if (set_tty_rawmode(STDIN_FILENO) == -1) {
2137 		reset_tty();
2138 		zperror(gettext("failed to set stdin pty to raw mode"));
2139 		return (1);
2140 	}
2141 
2142 	if (prefork_dropprivs() != 0) {
2143 		reset_tty();
2144 		zperror(gettext("could not allocate privilege set"));
2145 		return (1);
2146 	}
2147 
2148 	/*
2149 	 * We must mask SIGCLD until after we have coped with the fork
2150 	 * sufficiently to deal with it; otherwise we can race and receive the
2151 	 * signal before child_pid has been initialized (yes, this really
2152 	 * happens).
2153 	 */
2154 	(void) sigset(SIGCLD, sigcld);
2155 	(void) sigemptyset(&block_cld);
2156 	(void) sigaddset(&block_cld, SIGCLD);
2157 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2158 
2159 	/*
2160 	 * We activate the contract template at the last minute to
2161 	 * avoid intermediate functions that could be using fork(2)
2162 	 * internally.
2163 	 */
2164 	if ((tmpl_fd = init_template()) == -1) {
2165 		reset_tty();
2166 		zperror(gettext("could not create contract"));
2167 		return (1);
2168 	}
2169 
2170 	if ((child_pid = fork()) == -1) {
2171 		(void) ct_tmpl_clear(tmpl_fd);
2172 		reset_tty();
2173 		zperror(gettext("could not fork"));
2174 		return (1);
2175 	} else if (child_pid == 0) { /* child process */
2176 		int slavefd, newslave;
2177 
2178 		(void) ct_tmpl_clear(tmpl_fd);
2179 		(void) close(tmpl_fd);
2180 
2181 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2182 
2183 		if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2184 			return (1);
2185 
2186 		/*
2187 		 * Close all fds except for the slave pty.
2188 		 */
2189 		(void) fdwalk(close_func, &slavefd);
2190 
2191 		/*
2192 		 * Temporarily dup slavefd to stderr; that way if we have
2193 		 * to print out that zone_enter failed, the output will
2194 		 * have somewhere to go.
2195 		 */
2196 		if (slavefd != STDERR_FILENO)
2197 			(void) dup2(slavefd, STDERR_FILENO);
2198 
2199 		if (zone_enter(zoneid) == -1) {
2200 			zerror(gettext("could not enter zone %s: %s"),
2201 			    zonename, strerror(errno));
2202 			return (1);
2203 		}
2204 
2205 		if (slavefd != STDERR_FILENO)
2206 			(void) close(STDERR_FILENO);
2207 
2208 		/*
2209 		 * We take pains to get this process into a new process
2210 		 * group, and subsequently a new session.  In this way,
2211 		 * we'll have a session which doesn't yet have a controlling
2212 		 * terminal.  When we open the slave, it will become the
2213 		 * controlling terminal; no PIDs concerning pgrps or sids
2214 		 * will leak inappropriately into the zone.
2215 		 */
2216 		(void) setpgrp();
2217 
2218 		/*
2219 		 * We need the slave pty to be referenced from the zone's
2220 		 * /dev in order to ensure that the devt's, etc are all
2221 		 * correct.  Otherwise we break ttyname and the like.
2222 		 */
2223 		if ((newslave = open(slavename, O_RDWR)) == -1) {
2224 			(void) close(slavefd);
2225 			return (1);
2226 		}
2227 		(void) close(slavefd);
2228 		slavefd = newslave;
2229 
2230 		/*
2231 		 * dup the slave to the various FDs, so that when the
2232 		 * spawned process does a write/read it maps to the slave
2233 		 * pty.
2234 		 */
2235 		(void) dup2(slavefd, STDIN_FILENO);
2236 		(void) dup2(slavefd, STDOUT_FILENO);
2237 		(void) dup2(slavefd, STDERR_FILENO);
2238 		if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2239 		    slavefd != STDERR_FILENO) {
2240 			(void) close(slavefd);
2241 		}
2242 
2243 		/*
2244 		 * In failsafe mode, we don't use login(1), so don't try
2245 		 * setting up a utmpx entry.
2246 		 */
2247 		if (!failsafe)
2248 			if (setup_utmpx(slaveshortname) == -1)
2249 				return (1);
2250 
2251 		/*
2252 		 * The child needs to run as root to
2253 		 * execute the brand's login program.
2254 		 */
2255 		if (setuid(0) == -1) {
2256 			zperror(gettext("insufficient privilege"));
2257 			return (1);
2258 		}
2259 
2260 		(void) execve(new_args[0], new_args, new_env);
2261 		zperror(gettext("exec failure"));
2262 		return (1);
2263 	}
2264 
2265 	(void) ct_tmpl_clear(tmpl_fd);
2266 	(void) close(tmpl_fd);
2267 
2268 	/*
2269 	 * The rest is only for the parent process.
2270 	 */
2271 	(void) sigset(SIGWINCH, sigwinch);
2272 
2273 	postfork_dropprivs();
2274 
2275 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2276 	doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2277 
2278 	reset_tty();
2279 	if (!quiet)
2280 		(void) fprintf(stderr,
2281 		    gettext("\n[Connection to zone '%s' %s closed]\n"),
2282 		    zonename, slaveshortname);
2283 
2284 	if (pollerr != 0) {
2285 		(void) fprintf(stderr, gettext("Error: connection closed due "
2286 		    "to unexpected pollevents=0x%x.\n"), pollerr);
2287 		return (1);
2288 	}
2289 
2290 	return (0);
2291 }
2292