1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2013 DEY Storage Systems, Inc.
  24  * Copyright (c) 2014 Gary Mills
  25  * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
  26  */
  27 
  28 /*
  29  * zlogin provides three types of login which allow users in the global
  30  * zone to access non-global zones.
  31  *
  32  * - "interactive login" is similar to rlogin(1); for example, the user could
  33  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
  34  *   granted a new pty (which is then shoved into the zone), and an I/O
  35  *   loop between parent and child processes takes care of the interactive
  36  *   session.  In this mode, login(1) (and its -c option, which means
  37  *   "already authenticated") is employed to take care of the initialization
  38  *   of the user's session.
  39  *
  40  * - "non-interactive login" is similar to su(1M); the user could issue
  41  *   'zlogin my-zone ls -l' and the command would be run as specified.
  42  *   In this mode, zlogin sets up pipes as the communication channel, and
  43  *   'su' is used to do the login setup work.
  44  *
  45  * - "console login" is the equivalent to accessing the tip line for a
  46  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
  47  *   In this mode, zlogin contacts the zoneadmd process via unix domain
  48  *   socket.  If zoneadmd is not running, it starts it.  This allows the
  49  *   console to be available anytime the zone is installed, regardless of
  50  *   whether it is running.
  51  */
  52 
  53 #include <sys/socket.h>
  54 #include <sys/termios.h>
  55 #include <sys/utsname.h>
  56 #include <sys/stat.h>
  57 #include <sys/types.h>
  58 #include <sys/contract/process.h>
  59 #include <sys/ctfs.h>
  60 #include <sys/brand.h>
  61 #include <sys/wait.h>
  62 #include <alloca.h>
  63 #include <assert.h>
  64 #include <ctype.h>
  65 #include <paths.h>
  66 #include <door.h>
  67 #include <errno.h>
  68 #include <nss_dbdefs.h>
  69 #include <poll.h>
  70 #include <priv.h>
  71 #include <pwd.h>
  72 #include <unistd.h>
  73 #include <utmpx.h>
  74 #include <sac.h>
  75 #include <signal.h>
  76 #include <stdarg.h>
  77 #include <stdio.h>
  78 #include <stdlib.h>
  79 #include <string.h>
  80 #include <strings.h>
  81 #include <stropts.h>
  82 #include <wait.h>
  83 #include <zone.h>
  84 #include <fcntl.h>
  85 #include <libdevinfo.h>
  86 #include <libintl.h>
  87 #include <locale.h>
  88 #include <libzonecfg.h>
  89 #include <libcontract.h>
  90 #include <libbrand.h>
  91 #include <auth_list.h>
  92 #include <auth_attr.h>
  93 #include <secdb.h>
  94 
  95 static int masterfd;
  96 static struct termios save_termios;
  97 static struct termios effective_termios;
  98 static int save_fd;
  99 static struct winsize winsize;
 100 static volatile int dead;
 101 static volatile pid_t child_pid = -1;
 102 static int interactive = 0;
 103 static priv_set_t *dropprivs;
 104 
 105 static int nocmdchar = 0;
 106 static int failsafe = 0;
 107 static char cmdchar = '~';
 108 static int quiet = 0;
 109 
 110 static int pollerr = 0;
 111 
 112 static const char *pname;
 113 static char *username;
 114 
 115 /*
 116  * When forced_login is true, the user is not prompted
 117  * for an authentication password in the target zone.
 118  */
 119 static boolean_t forced_login = B_FALSE;
 120 
 121 #if !defined(TEXT_DOMAIN)               /* should be defined by cc -D */
 122 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
 123 #endif
 124 
 125 #define SUPATH  "/usr/bin/su"
 126 #define FAILSAFESHELL   "/sbin/sh"
 127 #define DEFAULTSHELL    "/sbin/sh"
 128 #define DEF_PATH        "/usr/sbin:/usr/bin"
 129 
 130 #define CLUSTER_BRAND_NAME      "cluster"
 131 
 132 /*
 133  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
 134  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
 135  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
 136  * also chosen in conjunction with the HI_WATER setting to make sure we
 137  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
 138  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
 139  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
 140  * is less than HI_WATER data already in the pipe.
 141  */
 142 #define ZLOGIN_BUFSIZ   8192
 143 #define ZLOGIN_RDBUFSIZ 1024
 144 #define HI_WATER        8192
 145 
 146 /*
 147  * See canonify() below.  CANONIFY_LEN is the maximum length that a
 148  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
 149  */
 150 #define CANONIFY_LEN 5
 151 
 152 static void
 153 usage(void)
 154 {
 155         (void) fprintf(stderr, gettext("usage: %s [ -nQCES ] [ -e cmdchar ] "
 156             "[-l user] zonename [command [args ...] ]\n"), pname);
 157         exit(2);
 158 }
 159 
 160 static const char *
 161 getpname(const char *arg0)
 162 {
 163         const char *p = strrchr(arg0, '/');
 164 
 165         if (p == NULL)
 166                 p = arg0;
 167         else
 168                 p++;
 169 
 170         pname = p;
 171         return (p);
 172 }
 173 
 174 static void
 175 zerror(const char *fmt, ...)
 176 {
 177         va_list alist;
 178 
 179         (void) fprintf(stderr, "%s: ", pname);
 180         va_start(alist, fmt);
 181         (void) vfprintf(stderr, fmt, alist);
 182         va_end(alist);
 183         (void) fprintf(stderr, "\n");
 184 }
 185 
 186 static void
 187 zperror(const char *str)
 188 {
 189         const char *estr;
 190 
 191         if ((estr = strerror(errno)) != NULL)
 192                 (void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
 193         else
 194                 (void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
 195 }
 196 
 197 /*
 198  * The first part of our privilege dropping scheme needs to be called before
 199  * fork(), since we must have it for security; we don't want to be surprised
 200  * later that we couldn't allocate the privset.
 201  */
 202 static int
 203 prefork_dropprivs()
 204 {
 205         if ((dropprivs = priv_allocset()) == NULL)
 206                 return (1);
 207 
 208         priv_basicset(dropprivs);
 209         (void) priv_delset(dropprivs, PRIV_PROC_INFO);
 210         (void) priv_delset(dropprivs, PRIV_PROC_FORK);
 211         (void) priv_delset(dropprivs, PRIV_PROC_EXEC);
 212         (void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
 213 
 214         /*
 215          * We need to keep the basic privilege PROC_SESSION and all unknown
 216          * basic privileges as well as the privileges PROC_ZONE and
 217          * PROC_OWNER in order to query session information and
 218          * send signals.
 219          */
 220         if (interactive == 0) {
 221                 (void) priv_addset(dropprivs, PRIV_PROC_ZONE);
 222                 (void) priv_addset(dropprivs, PRIV_PROC_OWNER);
 223         } else {
 224                 (void) priv_delset(dropprivs, PRIV_PROC_SESSION);
 225         }
 226 
 227         return (0);
 228 }
 229 
 230 /*
 231  * The second part of the privilege drop.  We are paranoid about being attacked
 232  * by the zone, so we drop all privileges.  This should prevent a compromise
 233  * which gets us to fork(), exec(), symlink(), etc.
 234  */
 235 static void
 236 postfork_dropprivs()
 237 {
 238         if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
 239                 zperror(gettext("Warning: could not set permitted privileges"));
 240         }
 241         if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
 242                 zperror(gettext("Warning: could not set limit privileges"));
 243         }
 244         if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
 245                 zperror(gettext("Warning: could not set inheritable "
 246                     "privileges"));
 247         }
 248 }
 249 
 250 /*
 251  * Create the unix domain socket and call the zoneadmd server; handshake
 252  * with it to determine whether it will allow us to connect.
 253  */
 254 static int
 255 get_console_master(const char *zname)
 256 {
 257         int sockfd = -1;
 258         struct sockaddr_un servaddr;
 259         char clientid[MAXPATHLEN];
 260         char handshake[MAXPATHLEN], c;
 261         int msglen;
 262         int i = 0, err = 0;
 263 
 264         if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
 265                 zperror(gettext("could not create socket"));
 266                 return (-1);
 267         }
 268 
 269         bzero(&servaddr, sizeof (servaddr));
 270         servaddr.sun_family = AF_UNIX;
 271         (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
 272             "%s/%s.console_sock", ZONES_TMPDIR, zname);
 273 
 274         if (connect(sockfd, (struct sockaddr *)&servaddr,
 275             sizeof (servaddr)) == -1) {
 276                 zperror(gettext("Could not connect to zone console"));
 277                 goto bad;
 278         }
 279         masterfd = sockfd;
 280 
 281         msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s\n",
 282             getpid(), setlocale(LC_MESSAGES, NULL));
 283 
 284         if (msglen >= sizeof (clientid) || msglen < 0) {
 285                 zerror("protocol error");
 286                 goto bad;
 287         }
 288 
 289         if (write(masterfd, clientid, msglen) != msglen) {
 290                 zerror("protocol error");
 291                 goto bad;
 292         }
 293 
 294         bzero(handshake, sizeof (handshake));
 295 
 296         /*
 297          * Take care not to accumulate more than our fill, and leave room for
 298          * the NUL at the end.
 299          */
 300         while ((err = read(masterfd, &c, 1)) == 1) {
 301                 if (i >= (sizeof (handshake) - 1))
 302                         break;
 303                 if (c == '\n')
 304                         break;
 305                 handshake[i] = c;
 306                 i++;
 307         }
 308 
 309         /*
 310          * If something went wrong during the handshake we bail; perhaps
 311          * the server died off.
 312          */
 313         if (err == -1) {
 314                 zperror(gettext("Could not connect to zone console"));
 315                 goto bad;
 316         }
 317 
 318         if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
 319                 return (0);
 320 
 321         zerror(gettext("Console is already in use by process ID %s."),
 322             handshake);
 323 bad:
 324         (void) close(sockfd);
 325         masterfd = -1;
 326         return (-1);
 327 }
 328 
 329 
 330 /*
 331  * Routines to handle pty creation upon zone entry and to shuttle I/O back
 332  * and forth between the two terminals.  We also compute and store the
 333  * name of the slave terminal associated with the master side.
 334  */
 335 static int
 336 get_master_pty()
 337 {
 338         if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
 339                 zperror(gettext("failed to obtain a pseudo-tty"));
 340                 return (-1);
 341         }
 342         if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
 343                 zperror(gettext("failed to get terminal settings from stdin"));
 344                 return (-1);
 345         }
 346         (void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
 347 
 348         return (0);
 349 }
 350 
 351 /*
 352  * This is a bit tricky; normally a pts device will belong to the zone it
 353  * is granted to.  But in the case of "entering" a zone, we need to establish
 354  * the pty before entering the zone so that we can vector I/O to and from it
 355  * from the global zone.
 356  *
 357  * We use the zonept() call to let the ptm driver know what we are up to;
 358  * the only other hairy bit is the setting of zoneslavename (which happens
 359  * above, in get_master_pty()).
 360  */
 361 static int
 362 init_slave_pty(zoneid_t zoneid, char *devroot)
 363 {
 364         int slavefd = -1;
 365         char *slavename, zoneslavename[MAXPATHLEN];
 366 
 367         /*
 368          * Set slave permissions, zone the pts, then unlock it.
 369          */
 370         if (grantpt(masterfd) != 0) {
 371                 zperror(gettext("grantpt failed"));
 372                 return (-1);
 373         }
 374 
 375         if (unlockpt(masterfd) != 0) {
 376                 zperror(gettext("unlockpt failed"));
 377                 return (-1);
 378         }
 379 
 380         /*
 381          * We must open the slave side before zoning this pty; otherwise
 382          * the kernel would refuse us the open-- zoning a pty makes it
 383          * inaccessible to the global zone.  Note we are trying to open
 384          * the device node via the $ZONEROOT/dev path for this pty.
 385          *
 386          * Later we'll close the slave out when once we've opened it again
 387          * from within the target zone.  Blarg.
 388          */
 389         if ((slavename = ptsname(masterfd)) == NULL) {
 390                 zperror(gettext("failed to get name for pseudo-tty"));
 391                 return (-1);
 392         }
 393 
 394         (void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
 395             devroot, slavename);
 396 
 397         if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
 398                 zerror(gettext("failed to open %s: %s"), zoneslavename,
 399                     strerror(errno));
 400                 return (-1);
 401         }
 402 
 403         /*
 404          * Push hardware emulation (ptem), line discipline (ldterm),
 405          * and V7/4BSD/Xenix compatibility (ttcompat) modules.
 406          */
 407         if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
 408                 zperror(gettext("failed to push ptem module"));
 409                 if (!failsafe)
 410                         goto bad;
 411         }
 412 
 413         /*
 414          * Anchor the stream to prevent malicious I_POPs; we prefer to do
 415          * this prior to entering the zone so that we can detect any errors
 416          * early, and so that we can set the anchor from the global zone.
 417          */
 418         if (ioctl(slavefd, I_ANCHOR) == -1) {
 419                 zperror(gettext("failed to set stream anchor"));
 420                 if (!failsafe)
 421                         goto bad;
 422         }
 423 
 424         if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
 425                 zperror(gettext("failed to push ldterm module"));
 426                 if (!failsafe)
 427                         goto bad;
 428         }
 429         if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
 430                 zperror(gettext("failed to push ttcompat module"));
 431                 if (!failsafe)
 432                         goto bad;
 433         }
 434 
 435         /*
 436          * Propagate terminal settings from the external term to the new one.
 437          */
 438         if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
 439                 zperror(gettext("failed to set terminal settings"));
 440                 if (!failsafe)
 441                         goto bad;
 442         }
 443         (void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
 444 
 445         if (zonept(masterfd, zoneid) != 0) {
 446                 zperror(gettext("could not set zoneid of pty"));
 447                 goto bad;
 448         }
 449 
 450         return (slavefd);
 451 
 452 bad:
 453         (void) close(slavefd);
 454         return (-1);
 455 }
 456 
 457 /*
 458  * Place terminal into raw mode.
 459  */
 460 static int
 461 set_tty_rawmode(int fd)
 462 {
 463         struct termios term;
 464         if (tcgetattr(fd, &term) < 0) {
 465                 zperror(gettext("failed to get user terminal settings"));
 466                 return (-1);
 467         }
 468 
 469         /* Stash for later, so we can revert back to previous mode */
 470         save_termios = term;
 471         save_fd = fd;
 472 
 473         /* disable 8->7 bit strip, start/stop, enable any char to restart */
 474         term.c_iflag &= ~(ISTRIP|IXON|IXANY);
 475         /* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
 476         term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
 477         /* disable output post-processing */
 478         term.c_oflag &= ~OPOST;
 479         /* disable canonical mode, signal chars, echo & extended functions */
 480         term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
 481 
 482         term.c_cc[VMIN] = 1;    /* byte-at-a-time */
 483         term.c_cc[VTIME] = 0;
 484 
 485         if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
 486                 zperror(gettext("failed to set user terminal to raw mode"));
 487                 return (-1);
 488         }
 489 
 490         /*
 491          * We need to know the value of VEOF so that we can properly process for
 492          * client-side ~<EOF>.  But we have obliterated VEOF in term,
 493          * because VMIN overloads the same array slot in non-canonical mode.
 494          * Stupid @&^%!
 495          *
 496          * So here we construct the "effective" termios from the current
 497          * terminal settings, and the corrected VEOF and VEOL settings.
 498          */
 499         if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
 500                 zperror(gettext("failed to get user terminal settings"));
 501                 return (-1);
 502         }
 503         effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
 504         effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
 505 
 506         return (0);
 507 }
 508 
 509 /*
 510  * Copy terminal window size from our terminal to the pts.
 511  */
 512 /*ARGSUSED*/
 513 static void
 514 sigwinch(int s)
 515 {
 516         struct winsize ws;
 517 
 518         if (ioctl(0, TIOCGWINSZ, &ws) == 0)
 519                 (void) ioctl(masterfd, TIOCSWINSZ, &ws);
 520 }
 521 
 522 static volatile int close_on_sig = -1;
 523 
 524 static void
 525 /*ARGSUSED*/
 526 sigcld(int s)
 527 {
 528         int status;
 529         pid_t pid;
 530 
 531         /*
 532          * Peek at the exit status.  If this isn't the process we cared
 533          * about, then just reap it.
 534          */
 535         if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
 536                 if (pid == child_pid &&
 537                     (WIFEXITED(status) || WIFSIGNALED(status))) {
 538                         dead = 1;
 539                         if (close_on_sig != -1) {
 540                                 (void) write(close_on_sig, "a", 1);
 541                                 (void) close(close_on_sig);
 542                                 close_on_sig = -1;
 543                         }
 544                 } else {
 545                         (void) waitpid(pid, &status, WNOHANG);
 546                 }
 547         }
 548 }
 549 
 550 /*
 551  * Some signals (currently, SIGINT) must be forwarded on to the process
 552  * group of the child process.
 553  */
 554 static void
 555 sig_forward(int s)
 556 {
 557         if (child_pid != -1) {
 558                 (void) sigsend(P_PGID, child_pid, s);
 559         }
 560 }
 561 
 562 /*
 563  * reset terminal settings for global environment
 564  */
 565 static void
 566 reset_tty()
 567 {
 568         (void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
 569 }
 570 
 571 /*
 572  * Convert character to printable representation, for display with locally
 573  * echoed command characters (like when we need to display ~^D)
 574  */
 575 static void
 576 canonify(char c, char *cc)
 577 {
 578         if (isprint(c)) {
 579                 cc[0] = c;
 580                 cc[1] = '\0';
 581         } else if (c >= 0 && c <= 31) {   /* ^@ through ^_ */
 582                 cc[0] = '^';
 583                 cc[1] = c + '@';
 584                 cc[2] = '\0';
 585         } else {
 586                 cc[0] = '\\';
 587                 cc[1] = ((c >> 6) & 7) + '0';
 588                 cc[2] = ((c >> 3) & 7) + '0';
 589                 cc[3] = (c & 7) + '0';
 590                 cc[4] = '\0';
 591         }
 592 }
 593 
 594 /*
 595  * process_user_input watches the input stream for the escape sequence for
 596  * 'quit' (by default, tilde-period).  Because we might be fed just one
 597  * keystroke at a time, state associated with the user input (are we at the
 598  * beginning of the line?  are we locally echoing the next character?) is
 599  * maintained by beginning_of_line and local_echo across calls to the routine.
 600  * If the write to outfd fails, we'll try to read from infd in an attempt
 601  * to prevent deadlock between the two processes.
 602  *
 603  * This routine returns -1 when the 'quit' escape sequence has been issued,
 604  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
 605  */
 606 static int
 607 process_user_input(int outfd, int infd)
 608 {
 609         static boolean_t beginning_of_line = B_TRUE;
 610         static boolean_t local_echo = B_FALSE;
 611         char ibuf[ZLOGIN_BUFSIZ];
 612         int nbytes;
 613         char *buf = ibuf;
 614         char c = *buf;
 615 
 616         nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 617         if (nbytes == -1 && (errno != EINTR || dead))
 618                 return (-1);
 619 
 620         if (nbytes == -1)       /* The read was interrupted. */
 621                 return (0);
 622 
 623         /* 0 read means EOF, close the pipe to the child */
 624         if (nbytes == 0)
 625                 return (1);
 626 
 627         for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
 628                 buf++;
 629                 if (beginning_of_line && !nocmdchar) {
 630                         beginning_of_line = B_FALSE;
 631                         if (c == cmdchar) {
 632                                 local_echo = B_TRUE;
 633                                 continue;
 634                         }
 635                 } else if (local_echo) {
 636                         local_echo = B_FALSE;
 637                         if (c == '.' || c == effective_termios.c_cc[VEOF]) {
 638                                 char cc[CANONIFY_LEN];
 639 
 640                                 canonify(c, cc);
 641                                 (void) write(STDOUT_FILENO, &cmdchar, 1);
 642                                 (void) write(STDOUT_FILENO, cc, strlen(cc));
 643                                 return (-1);
 644                         }
 645                 }
 646 retry:
 647                 if (write(outfd, &c, 1) <= 0) {
 648                         /*
 649                          * Since the fd we are writing to is opened with
 650                          * O_NONBLOCK it is possible to get EAGAIN if the
 651                          * pipe is full.  One way this could happen is if we
 652                          * are writing a lot of data into the pipe in this loop
 653                          * and the application on the other end is echoing that
 654                          * data back out to its stdout.  The output pipe can
 655                          * fill up since we are stuck here in this loop and not
 656                          * draining the other pipe.  We can try to read some of
 657                          * the data to see if we can drain the pipe so that the
 658                          * application can continue to make progress.  The read
 659                          * is non-blocking so we won't hang here.  We also wait
 660                          * a bit before retrying since there could be other
 661                          * reasons why the pipe is full and we don't want to
 662                          * continuously retry.
 663                          */
 664                         if (errno == EAGAIN) {
 665                                 struct timespec rqtp;
 666                                 int ln;
 667                                 char obuf[ZLOGIN_BUFSIZ];
 668 
 669                                 if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
 670                                         (void) write(STDOUT_FILENO, obuf, ln);
 671 
 672                                 /* sleep for 10 milliseconds */
 673                                 rqtp.tv_sec = 0;
 674                                 rqtp.tv_nsec = MSEC2NSEC(10);
 675                                 (void) nanosleep(&rqtp, NULL);
 676                                 if (!dead)
 677                                         goto retry;
 678                         }
 679 
 680                         return (-1);
 681                 }
 682                 beginning_of_line = (c == '\r' || c == '\n' ||
 683                     c == effective_termios.c_cc[VKILL] ||
 684                     c == effective_termios.c_cc[VEOL] ||
 685                     c == effective_termios.c_cc[VSUSP] ||
 686                     c == effective_termios.c_cc[VINTR]);
 687         }
 688         return (0);
 689 }
 690 
 691 /*
 692  * This function prevents deadlock between zlogin and the application in the
 693  * zone that it is talking to.  This can happen when we read from zlogin's
 694  * stdin and write the data down the pipe to the application.  If the pipe
 695  * is full, we'll block in the write.  Because zlogin could be blocked in
 696  * the write, it would never read the application's stdout/stderr so the
 697  * application can then block on those writes (when the pipe fills up).  If the
 698  * the application gets blocked this way, it can never get around to reading
 699  * its stdin so that zlogin can unblock from its write.  Once in this state,
 700  * the two processes are deadlocked.
 701  *
 702  * To prevent this, we want to verify that we can write into the pipe before we
 703  * read from our stdin.  If the pipe already is pretty full, we bypass the read
 704  * for now.  We'll circle back here again after the poll() so that we can
 705  * try again.  When this function is called, we already know there is data
 706  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
 707  * stdin is EOF, and 0 if everything is ok (even though we might not have
 708  * read/written any data into the pipe on this iteration).
 709  */
 710 static int
 711 process_raw_input(int stdin_fd, int appin_fd)
 712 {
 713         int cc;
 714         struct stat64 sb;
 715         char ibuf[ZLOGIN_RDBUFSIZ];
 716 
 717         /* Check how much data is already in the pipe */
 718         if (fstat64(appin_fd, &sb) == -1) {
 719                 perror("stat failed");
 720                 return (-1);
 721         }
 722 
 723         if (dead)
 724                 return (-1);
 725 
 726         /*
 727          * The pipe already has a lot of data in it,  don't write any more
 728          * right now.
 729          */
 730         if (sb.st_size >= HI_WATER)
 731                 return (0);
 732 
 733         cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
 734         if (cc == -1 && (errno != EINTR || dead))
 735                 return (-1);
 736 
 737         if (cc == -1)   /* The read was interrupted. */
 738                 return (0);
 739 
 740         /* 0 read means EOF, close the pipe to the child */
 741         if (cc == 0)
 742                 return (1);
 743 
 744         /*
 745          * stdin_fd is stdin of the target; so, the thing we'll write the user
 746          * data *to*.
 747          */
 748         if (write(stdin_fd, ibuf, cc) == -1)
 749                 return (-1);
 750 
 751         return (0);
 752 }
 753 
 754 /*
 755  * Write the output from the application running in the zone.  We can get
 756  * a signal during the write (usually it would be SIGCHLD when the application
 757  * has exited) so we loop to make sure we have written all of the data we read.
 758  */
 759 static int
 760 process_output(int in_fd, int out_fd)
 761 {
 762         int wrote = 0;
 763         int cc;
 764         char ibuf[ZLOGIN_BUFSIZ];
 765 
 766         cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
 767         if (cc == -1 && (errno != EINTR || dead))
 768                 return (-1);
 769         if (cc == 0)    /* EOF */
 770                 return (-1);
 771         if (cc == -1)   /* The read was interrupted. */
 772                 return (0);
 773 
 774         do {
 775                 int len;
 776 
 777                 len = write(out_fd, ibuf + wrote, cc - wrote);
 778                 if (len == -1 && errno != EINTR)
 779                         return (-1);
 780                 if (len != -1)
 781                         wrote += len;
 782         } while (wrote < cc);
 783 
 784         return (0);
 785 }
 786 
 787 /*
 788  * This is the main I/O loop, and is shared across all zlogin modes.
 789  * Parameters:
 790  *      stdin_fd:  The fd representing 'stdin' for the slave side; input to
 791  *                 the zone will be written here.
 792  *
 793  *      appin_fd:  The fd representing the other end of the 'stdin' pipe (when
 794  *                 we're running non-interactive); used in process_raw_input
 795  *                 to ensure we don't fill up the application's stdin pipe.
 796  *
 797  *      stdout_fd: The fd representing 'stdout' for the slave side; output
 798  *                 from the zone will arrive here.
 799  *
 800  *      stderr_fd: The fd representing 'stderr' for the slave side; output
 801  *                 from the zone will arrive here.
 802  *
 803  *      raw_mode:  If TRUE, then no processing (for example, for '~.') will
 804  *                 be performed on the input coming from STDIN.
 805  *
 806  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
 807  * mode supplies a stderr).
 808  *
 809  */
 810 static void
 811 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
 812     boolean_t raw_mode)
 813 {
 814         struct pollfd pollfds[4];
 815         char ibuf[ZLOGIN_BUFSIZ];
 816         int cc, ret;
 817 
 818         /* read from stdout of zone and write to stdout of global zone */
 819         pollfds[0].fd = stdout_fd;
 820         pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
 821 
 822         /* read from stderr of zone and write to stderr of global zone */
 823         pollfds[1].fd = stderr_fd;
 824         pollfds[1].events = pollfds[0].events;
 825 
 826         /* read from stdin of global zone and write to stdin of zone */
 827         pollfds[2].fd = STDIN_FILENO;
 828         pollfds[2].events = pollfds[0].events;
 829 
 830         /* read from signalling pipe so we know when child dies */
 831         pollfds[3].fd = sig_fd;
 832         pollfds[3].events = pollfds[0].events;
 833 
 834         for (;;) {
 835                 pollfds[0].revents = pollfds[1].revents =
 836                     pollfds[2].revents = pollfds[3].revents = 0;
 837 
 838                 if (dead)
 839                         break;
 840 
 841                 /*
 842                  * There is a race condition here where we can receive the
 843                  * child death signal, set the dead flag, but since we have
 844                  * passed the test above, we would go into poll and hang.
 845                  * To avoid this we use the sig_fd as an additional poll fd.
 846                  * The signal handler writes into the other end of this pipe
 847                  * when the child dies so that the poll will always see that
 848                  * input and proceed.  We just loop around at that point and
 849                  * then notice the dead flag.
 850                  */
 851 
 852                 ret = poll(pollfds,
 853                     sizeof (pollfds) / sizeof (struct pollfd), -1);
 854 
 855                 if (ret == -1 && errno != EINTR) {
 856                         perror("poll failed");
 857                         break;
 858                 }
 859 
 860                 if (errno == EINTR && dead) {
 861                         break;
 862                 }
 863 
 864                 /* event from master side stdout */
 865                 if (pollfds[0].revents) {
 866                         if (pollfds[0].revents &
 867                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 868                                 if (process_output(stdout_fd, STDOUT_FILENO)
 869                                     != 0)
 870                                         break;
 871                         } else {
 872                                 pollerr = pollfds[0].revents;
 873                                 break;
 874                         }
 875                 }
 876 
 877                 /* event from master side stderr */
 878                 if (pollfds[1].revents) {
 879                         if (pollfds[1].revents &
 880                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 881                                 if (process_output(stderr_fd, STDERR_FILENO)
 882                                     != 0)
 883                                         break;
 884                         } else {
 885                                 pollerr = pollfds[1].revents;
 886                                 break;
 887                         }
 888                 }
 889 
 890                 /* event from user STDIN side */
 891                 if (pollfds[2].revents) {
 892                         if (pollfds[2].revents &
 893                             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 894                                 /*
 895                                  * stdin fd is stdin of the target; so,
 896                                  * the thing we'll write the user data *to*.
 897                                  *
 898                                  * Also, unlike on the output side, we
 899                                  * close the pipe on a zero-length message.
 900                                  */
 901                                 int res;
 902 
 903                                 if (raw_mode)
 904                                         res = process_raw_input(stdin_fd,
 905                                             appin_fd);
 906                                 else
 907                                         res = process_user_input(stdin_fd,
 908                                             stdout_fd);
 909 
 910                                 if (res < 0)
 911                                         break;
 912                                 if (res > 0) {
 913                                         /* EOF (close) child's stdin_fd */
 914                                         pollfds[2].fd = -1;
 915                                         while ((res = close(stdin_fd)) != 0 &&
 916                                             errno == EINTR)
 917                                                 ;
 918                                         if (res != 0)
 919                                                 break;
 920                                 }
 921 
 922                         } else if (raw_mode && pollfds[2].revents & POLLHUP) {
 923                                 /*
 924                                  * It's OK to get a POLLHUP on STDIN-- it
 925                                  * always happens if you do:
 926                                  *
 927                                  * echo foo | zlogin <zone> <command>
 928                                  *
 929                                  * We reset fd to -1 in this case to clear
 930                                  * the condition and close the pipe (EOF) to
 931                                  * the other side in order to wrap things up.
 932                                  */
 933                                 int res;
 934 
 935                                 pollfds[2].fd = -1;
 936                                 while ((res = close(stdin_fd)) != 0 &&
 937                                     errno == EINTR)
 938                                         ;
 939                                 if (res != 0)
 940                                         break;
 941                         } else {
 942                                 pollerr = pollfds[2].revents;
 943                                 break;
 944                         }
 945                 }
 946         }
 947 
 948         /*
 949          * We are in the midst of dying, but try to poll with a short
 950          * timeout to see if we can catch the last bit of I/O from the
 951          * children.
 952          */
 953 retry:
 954         pollfds[0].revents = pollfds[1].revents = 0;
 955         (void) poll(pollfds, 2, 100);
 956         if (pollfds[0].revents &
 957             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 958                 if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
 959                         (void) write(STDOUT_FILENO, ibuf, cc);
 960                         goto retry;
 961                 }
 962         }
 963         if (pollfds[1].revents &
 964             (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
 965                 if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
 966                         (void) write(STDERR_FILENO, ibuf, cc);
 967                         goto retry;
 968                 }
 969         }
 970 }
 971 
 972 /*
 973  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
 974  */
 975 static const char *
 976 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
 977     size_t len)
 978 {
 979         bzero(user_cmd, sizeof (user_cmd));
 980         if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
 981                 return (NULL);
 982 
 983         return (user_cmd);
 984 }
 985 
 986 /* From libc */
 987 extern int str2passwd(const char *, int, void *, char *, int);
 988 
 989 /*
 990  * exec() the user_cmd brand hook, and convert the output string to a
 991  * struct passwd.  This is to be called after zone_enter().
 992  *
 993  */
 994 static struct passwd *
 995 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
 996     int pwbuflen)
 997 {
 998         char pwline[NSS_BUFLEN_PASSWD];
 999         char *cin = NULL;
1000         FILE *fin;
1001         int status;
1002 
1003         assert(getzoneid() != GLOBAL_ZONEID);
1004 
1005         if ((fin = popen(user_cmd, "r")) == NULL)
1006                 return (NULL);
1007 
1008         while (cin == NULL && !feof(fin))
1009                 cin = fgets(pwline, sizeof (pwline), fin);
1010 
1011         if (cin == NULL) {
1012                 (void) pclose(fin);
1013                 return (NULL);
1014         }
1015 
1016         status = pclose(fin);
1017         if (!WIFEXITED(status))
1018                 return (NULL);
1019         if (WEXITSTATUS(status) != 0)
1020                 return (NULL);
1021 
1022         if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1023                 return (pwent);
1024         else
1025                 return (NULL);
1026 }
1027 
1028 static char **
1029 zone_login_cmd(brand_handle_t bh, const char *login)
1030 {
1031         static char result_buf[ARG_MAX];
1032         char **new_argv, *ptr, *lasts;
1033         int n, a;
1034 
1035         /* Get the login command for the target zone. */
1036         bzero(result_buf, sizeof (result_buf));
1037 
1038         if (forced_login) {
1039                 if (brand_get_forcedlogin_cmd(bh, login,
1040                     result_buf, sizeof (result_buf)) != 0)
1041                         return (NULL);
1042         } else {
1043                 if (brand_get_login_cmd(bh, login,
1044                     result_buf, sizeof (result_buf)) != 0)
1045                         return (NULL);
1046         }
1047 
1048         /*
1049          * We got back a string that we'd like to execute.  But since
1050          * we're not doing the execution via a shell we'll need to convert
1051          * the exec string to an array of strings.  We'll do that here
1052          * but we're going to be very simplistic about it and break stuff
1053          * up based on spaces.  We're not even going to support any kind
1054          * of quoting or escape characters.  It's truly amazing that
1055          * there is no library function in OpenSolaris to do this for us.
1056          */
1057 
1058         /*
1059          * Be paranoid.  Since we're deliniating based on spaces make
1060          * sure there are no adjacent spaces.
1061          */
1062         if (strstr(result_buf, "  ") != NULL)
1063                 return (NULL);
1064 
1065         /* Remove any trailing whitespace.  */
1066         n = strlen(result_buf);
1067         if (result_buf[n - 1] == ' ')
1068                 result_buf[n - 1] = '\0';
1069 
1070         /* Count how many elements there are in the exec string. */
1071         ptr = result_buf;
1072         for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1073                 ;
1074 
1075         /* Allocate the argv array that we're going to return. */
1076         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1077                 return (NULL);
1078 
1079         /* Tokenize the exec string and return. */
1080         a = 0;
1081         new_argv[a++] = result_buf;
1082         if (n > 2) {
1083                 (void) strtok_r(result_buf, " ", &lasts);
1084                 while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1085                         ;
1086         } else {
1087                 new_argv[a++] = NULL;
1088         }
1089         assert(n == a);
1090         return (new_argv);
1091 }
1092 
1093 /*
1094  * Prepare argv array for exec'd process; if we're passing commands to the
1095  * new process, then use su(1M) to do the invocation.  Otherwise, use
1096  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1097  * login that we're coming from another zone, and to disregard its CONSOLE
1098  * checks).
1099  */
1100 static char **
1101 prep_args(brand_handle_t bh, const char *login, char **argv)
1102 {
1103         int argc = 0, a = 0, i, n = -1;
1104         char **new_argv;
1105 
1106         if (argv != NULL) {
1107                 size_t subshell_len = 1;
1108                 char *subshell;
1109 
1110                 while (argv[argc] != NULL)
1111                         argc++;
1112 
1113                 for (i = 0; i < argc; i++) {
1114                         subshell_len += strlen(argv[i]) + 1;
1115                 }
1116                 if ((subshell = calloc(1, subshell_len)) == NULL)
1117                         return (NULL);
1118 
1119                 for (i = 0; i < argc; i++) {
1120                         (void) strcat(subshell, argv[i]);
1121                         (void) strcat(subshell, " ");
1122                 }
1123 
1124                 if (failsafe) {
1125                         n = 4;
1126                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1127                                 return (NULL);
1128 
1129                         new_argv[a++] = FAILSAFESHELL;
1130                 } else {
1131                         n = 5;
1132                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1133                                 return (NULL);
1134 
1135                         new_argv[a++] = SUPATH;
1136                         if (strcmp(login, "root") != 0) {
1137                                 new_argv[a++] = "-";
1138                                 n++;
1139                         }
1140                         new_argv[a++] = (char *)login;
1141                 }
1142                 new_argv[a++] = "-c";
1143                 new_argv[a++] = subshell;
1144                 new_argv[a++] = NULL;
1145                 assert(a == n);
1146         } else {
1147                 if (failsafe) {
1148                         n = 2;
1149                         if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1150                                 return (NULL);
1151                         new_argv[a++] = FAILSAFESHELL;
1152                         new_argv[a++] = NULL;
1153                         assert(n == a);
1154                 } else {
1155                         new_argv = zone_login_cmd(bh, login);
1156                 }
1157         }
1158 
1159         return (new_argv);
1160 }
1161 
1162 /*
1163  * Helper routine for prep_env below.
1164  */
1165 static char *
1166 add_env(char *name, char *value)
1167 {
1168         size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1169         char *str;
1170 
1171         if ((str = malloc(sz)) == NULL)
1172                 return (NULL);
1173 
1174         (void) snprintf(str, sz, "%s=%s", name, value);
1175         return (str);
1176 }
1177 
1178 /*
1179  * Prepare envp array for exec'd process.
1180  */
1181 static char **
1182 prep_env()
1183 {
1184         int e = 0, size = 1;
1185         char **new_env, *estr;
1186         char *term = getenv("TERM");
1187 
1188         size++; /* for $PATH */
1189         if (term != NULL)
1190                 size++;
1191 
1192         /*
1193          * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1194          * We also set $SHELL, since neither login nor su will be around to do
1195          * it.
1196          */
1197         if (failsafe)
1198                 size += 2;
1199 
1200         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1201                 return (NULL);
1202 
1203         if ((estr = add_env("PATH", DEF_PATH)) == NULL)
1204                 return (NULL);
1205         new_env[e++] = estr;
1206 
1207         if (term != NULL) {
1208                 if ((estr = add_env("TERM", term)) == NULL)
1209                         return (NULL);
1210                 new_env[e++] = estr;
1211         }
1212 
1213         if (failsafe) {
1214                 if ((estr = add_env("HOME", "/")) == NULL)
1215                         return (NULL);
1216                 new_env[e++] = estr;
1217 
1218                 if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1219                         return (NULL);
1220                 new_env[e++] = estr;
1221         }
1222 
1223         new_env[e++] = NULL;
1224 
1225         assert(e == size);
1226 
1227         return (new_env);
1228 }
1229 
1230 /*
1231  * Finish the preparation of the envp array for exec'd non-interactive
1232  * zlogins.  This is called in the child process *after* we zone_enter(), since
1233  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1234  * etc.  We need only do this in the non-interactive, mode, since otherwise
1235  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1236  * additional ways in which the command could fail, and we'd prefer to avoid
1237  * that.
1238  */
1239 static char **
1240 prep_env_noninteractive(const char *user_cmd, char **env)
1241 {
1242         size_t size;
1243         char **new_env;
1244         int e, i;
1245         char *estr;
1246         char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1247         char pwbuf[NSS_BUFLEN_PASSWD + 1];
1248         struct passwd pwent;
1249         struct passwd *pw = NULL;
1250 
1251         assert(env != NULL);
1252         assert(failsafe == 0);
1253 
1254         /*
1255          * Exec the "user_cmd" brand hook to get a pwent for the
1256          * login user.  If this fails, HOME will be set to "/", SHELL
1257          * will be set to $DEFAULTSHELL, and we will continue to exec
1258          * SUPATH <login> -c <cmd>.
1259          */
1260         pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1261 
1262         /*
1263          * Get existing envp size.
1264          */
1265         for (size = 0; env[size] != NULL; size++)
1266                 ;
1267 
1268         e = size;
1269 
1270         /*
1271          * Finish filling out the environment; we duplicate the environment
1272          * setup described in login(1), for lack of a better precedent.
1273          */
1274         if (pw != NULL)
1275                 size += 3;      /* LOGNAME, HOME, MAIL */
1276         else
1277                 size += 1;      /* HOME */
1278 
1279         size++; /* always fill in SHELL */
1280         size++; /* terminating NULL */
1281 
1282         if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1283                 goto malloc_fail;
1284 
1285         /*
1286          * Copy existing elements of env into new_env.
1287          */
1288         for (i = 0; env[i] != NULL; i++) {
1289                 if ((new_env[i] = strdup(env[i])) == NULL)
1290                         goto malloc_fail;
1291         }
1292         assert(e == i);
1293 
1294         if (pw != NULL) {
1295                 if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1296                         goto malloc_fail;
1297                 new_env[e++] = estr;
1298 
1299                 if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1300                         goto malloc_fail;
1301                 new_env[e++] = estr;
1302 
1303                 if (chdir(pw->pw_dir) != 0)
1304                         zerror(gettext("Could not chdir to home directory "
1305                             "%s: %s"), pw->pw_dir, strerror(errno));
1306 
1307                 (void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1308                     pw->pw_name);
1309                 if ((estr = add_env("MAIL", varmail)) == NULL)
1310                         goto malloc_fail;
1311                 new_env[e++] = estr;
1312         } else {
1313                 if ((estr = add_env("HOME", "/")) == NULL)
1314                         goto malloc_fail;
1315                 new_env[e++] = estr;
1316         }
1317 
1318         if (pw != NULL && strlen(pw->pw_shell) > 0) {
1319                 if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1320                         goto malloc_fail;
1321                 new_env[e++] = estr;
1322         } else {
1323                 if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1324                         goto malloc_fail;
1325                 new_env[e++] = estr;
1326         }
1327 
1328         new_env[e++] = NULL;    /* add terminating NULL */
1329 
1330         assert(e == size);
1331         return (new_env);
1332 
1333 malloc_fail:
1334         zperror(gettext("failed to allocate memory for process environment"));
1335         return (NULL);
1336 }
1337 
1338 static int
1339 close_func(void *slavefd, int fd)
1340 {
1341         if (fd != *(int *)slavefd)
1342                 (void) close(fd);
1343         return (0);
1344 }
1345 
1346 static void
1347 set_cmdchar(char *cmdcharstr)
1348 {
1349         char c;
1350         long lc;
1351 
1352         if ((c = *cmdcharstr) != '\\') {
1353                 cmdchar = c;
1354                 return;
1355         }
1356 
1357         c = cmdcharstr[1];
1358         if (c == '\0' || c == '\\') {
1359                 cmdchar = '\\';
1360                 return;
1361         }
1362 
1363         if (c < '0' || c > '7') {
1364                 zerror(gettext("Unrecognized escape character option %s"),
1365                     cmdcharstr);
1366                 usage();
1367         }
1368 
1369         lc = strtol(cmdcharstr + 1, NULL, 8);
1370         if (lc < 0 || lc > 255) {
1371                 zerror(gettext("Octal escape character '%s' too large"),
1372                     cmdcharstr);
1373                 usage();
1374         }
1375         cmdchar = (char)lc;
1376 }
1377 
1378 static int
1379 setup_utmpx(char *slavename)
1380 {
1381         struct utmpx ut;
1382 
1383         bzero(&ut, sizeof (ut));
1384         (void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1385         (void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1386         ut.ut_pid = getpid();
1387         ut.ut_id[0] = 'z';
1388         ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1389         ut.ut_type = LOGIN_PROCESS;
1390         (void) time(&ut.ut_tv.tv_sec);
1391 
1392         if (makeutx(&ut) == NULL) {
1393                 zerror(gettext("makeutx failed"));
1394                 return (-1);
1395         }
1396         return (0);
1397 }
1398 
1399 static void
1400 release_lock_file(int lockfd)
1401 {
1402         (void) close(lockfd);
1403 }
1404 
1405 static int
1406 grab_lock_file(const char *zone_name, int *lockfd)
1407 {
1408         char pathbuf[PATH_MAX];
1409         struct flock flock;
1410 
1411         if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1412                 zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1413                     strerror(errno));
1414                 return (-1);
1415         }
1416         (void) chmod(ZONES_TMPDIR, S_IRWXU);
1417         (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1418             ZONES_TMPDIR, zone_name);
1419 
1420         if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1421                 zerror(gettext("could not open %s: %s"), pathbuf,
1422                     strerror(errno));
1423                 return (-1);
1424         }
1425         /*
1426          * Lock the file to synchronize with other zoneadmds
1427          */
1428         flock.l_type = F_WRLCK;
1429         flock.l_whence = SEEK_SET;
1430         flock.l_start = (off_t)0;
1431         flock.l_len = (off_t)0;
1432         if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1433                 zerror(gettext("unable to lock %s: %s"), pathbuf,
1434                     strerror(errno));
1435                 release_lock_file(*lockfd);
1436                 return (-1);
1437         }
1438         return (Z_OK);
1439 }
1440 
1441 static int
1442 start_zoneadmd(const char *zone_name)
1443 {
1444         pid_t retval;
1445         int pstatus = 0, error = -1, lockfd, doorfd;
1446         struct door_info info;
1447         char doorpath[MAXPATHLEN];
1448 
1449         (void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1450 
1451         if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1452                 return (-1);
1453         /*
1454          * We must do the door check with the lock held.  Otherwise, we
1455          * might race against another zoneadm/zlogin process and wind
1456          * up with two processes trying to start zoneadmd at the same
1457          * time.  zoneadmd will detect this, and fail, but we prefer this
1458          * to be as seamless as is practical, from a user perspective.
1459          */
1460         if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1461                 if (errno != ENOENT) {
1462                         zerror("failed to open %s: %s", doorpath,
1463                             strerror(errno));
1464                         goto out;
1465                 }
1466         } else {
1467                 /*
1468                  * Seems to be working ok.
1469                  */
1470                 if (door_info(doorfd, &info) == 0 &&
1471                     ((info.di_attributes & DOOR_REVOKED) == 0)) {
1472                         error = 0;
1473                         goto out;
1474                 }
1475         }
1476 
1477         if ((child_pid = fork()) == -1) {
1478                 zperror(gettext("could not fork"));
1479                 goto out;
1480         } else if (child_pid == 0) {
1481                 /* child process */
1482                 (void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1483                     zone_name, NULL);
1484                 zperror(gettext("could not exec zoneadmd"));
1485                 _exit(1);
1486         }
1487 
1488         /* parent process */
1489         do {
1490                 retval = waitpid(child_pid, &pstatus, 0);
1491         } while (retval != child_pid);
1492         if (WIFSIGNALED(pstatus) ||
1493             (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1494                 zerror(gettext("could not start %s"), "zoneadmd");
1495                 goto out;
1496         }
1497         error = 0;
1498 out:
1499         release_lock_file(lockfd);
1500         (void) close(doorfd);
1501         return (error);
1502 }
1503 
1504 static int
1505 init_template(void)
1506 {
1507         int fd;
1508         int err = 0;
1509 
1510         fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1511         if (fd == -1)
1512                 return (-1);
1513 
1514         /*
1515          * zlogin doesn't do anything with the contract.
1516          * Deliver no events, don't inherit, and allow it to be orphaned.
1517          */
1518         err |= ct_tmpl_set_critical(fd, 0);
1519         err |= ct_tmpl_set_informative(fd, 0);
1520         err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1521         err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1522         if (err || ct_tmpl_activate(fd)) {
1523                 (void) close(fd);
1524                 return (-1);
1525         }
1526 
1527         return (fd);
1528 }
1529 
1530 static int
1531 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1532     char **new_args, char **new_env)
1533 {
1534         pid_t retval;
1535         int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1536         int child_status;
1537         int tmpl_fd;
1538         sigset_t block_cld;
1539 
1540         if ((tmpl_fd = init_template()) == -1) {
1541                 reset_tty();
1542                 zperror(gettext("could not create contract"));
1543                 return (1);
1544         }
1545 
1546         if (pipe(stdin_pipe) != 0) {
1547                 zperror(gettext("could not create STDIN pipe"));
1548                 return (1);
1549         }
1550         /*
1551          * When the user types ^D, we get a zero length message on STDIN.
1552          * We need to echo that down the pipe to send it to the other side;
1553          * but by default, pipes don't propagate zero-length messages.  We
1554          * toggle that behavior off using I_SWROPT.  See streamio(7i).
1555          */
1556         if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1557                 zperror(gettext("could not configure STDIN pipe"));
1558                 return (1);
1559 
1560         }
1561         if (pipe(stdout_pipe) != 0) {
1562                 zperror(gettext("could not create STDOUT pipe"));
1563                 return (1);
1564         }
1565         if (pipe(stderr_pipe) != 0) {
1566                 zperror(gettext("could not create STDERR pipe"));
1567                 return (1);
1568         }
1569 
1570         if (pipe(dead_child_pipe) != 0) {
1571                 zperror(gettext("could not create signalling pipe"));
1572                 return (1);
1573         }
1574         close_on_sig = dead_child_pipe[0];
1575 
1576         /*
1577          * If any of the pipe FD's winds up being less than STDERR, then we
1578          * have a mess on our hands-- and we are lacking some of the I/O
1579          * streams we would expect anyway.  So we bail.
1580          */
1581         if (stdin_pipe[0] <= STDERR_FILENO ||
1582             stdin_pipe[1] <= STDERR_FILENO ||
1583             stdout_pipe[0] <= STDERR_FILENO ||
1584             stdout_pipe[1] <= STDERR_FILENO ||
1585             stderr_pipe[0] <= STDERR_FILENO ||
1586             stderr_pipe[1] <= STDERR_FILENO ||
1587             dead_child_pipe[0] <= STDERR_FILENO ||
1588             dead_child_pipe[1] <= STDERR_FILENO) {
1589                 zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1590                 return (1);
1591         }
1592 
1593         if (prefork_dropprivs() != 0) {
1594                 zperror(gettext("could not allocate privilege set"));
1595                 return (1);
1596         }
1597 
1598         (void) sigset(SIGCLD, sigcld);
1599         (void) sigemptyset(&block_cld);
1600         (void) sigaddset(&block_cld, SIGCLD);
1601         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1602 
1603         if ((child_pid = fork()) == -1) {
1604                 (void) ct_tmpl_clear(tmpl_fd);
1605                 (void) close(tmpl_fd);
1606                 zperror(gettext("could not fork"));
1607                 return (1);
1608         } else if (child_pid == 0) { /* child process */
1609                 (void) ct_tmpl_clear(tmpl_fd);
1610 
1611                 /*
1612                  * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1613                  */
1614                 (void) close(STDIN_FILENO);
1615                 (void) close(STDOUT_FILENO);
1616                 (void) close(STDERR_FILENO);
1617                 (void) dup2(stdin_pipe[1], STDIN_FILENO);
1618                 (void) dup2(stdout_pipe[1], STDOUT_FILENO);
1619                 (void) dup2(stderr_pipe[1], STDERR_FILENO);
1620                 (void) closefrom(STDERR_FILENO + 1);
1621 
1622                 (void) sigset(SIGCLD, SIG_DFL);
1623                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1624                 /*
1625                  * In case any of stdin, stdout or stderr are streams,
1626                  * anchor them to prevent malicious I_POPs.
1627                  */
1628                 (void) ioctl(STDIN_FILENO, I_ANCHOR);
1629                 (void) ioctl(STDOUT_FILENO, I_ANCHOR);
1630                 (void) ioctl(STDERR_FILENO, I_ANCHOR);
1631 
1632                 if (zone_enter(zoneid) == -1) {
1633                         zerror(gettext("could not enter zone %s: %s"),
1634                             zonename, strerror(errno));
1635                         _exit(1);
1636                 }
1637 
1638                 /*
1639                  * For non-native zones, tell libc where it can find locale
1640                  * specific getttext() messages.
1641                  */
1642                 if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1643                         (void) bindtextdomain(TEXT_DOMAIN,
1644                             "/.SUNWnative/usr/lib/locale");
1645                 else if (access("/native/usr/lib/locale", R_OK) == 0)
1646                         (void) bindtextdomain(TEXT_DOMAIN,
1647                             "/native/usr/lib/locale");
1648 
1649                 if (!failsafe)
1650                         new_env = prep_env_noninteractive(user_cmd, new_env);
1651 
1652                 if (new_env == NULL) {
1653                         _exit(1);
1654                 }
1655 
1656                 /*
1657                  * Move into a new process group; the zone_enter will have
1658                  * placed us into zsched's session, and we want to be in
1659                  * a unique process group.
1660                  */
1661                 (void) setpgid(getpid(), getpid());
1662 
1663                 /*
1664                  * The child needs to run as root to
1665                  * execute the su program.
1666                  */
1667                 if (setuid(0) == -1) {
1668                         zperror(gettext("insufficient privilege"));
1669                         return (1);
1670                 }
1671 
1672                 (void) execve(new_args[0], new_args, new_env);
1673                 zperror(gettext("exec failure"));
1674                 _exit(1);
1675         }
1676         /* parent */
1677 
1678         /* close pipe sides written by child */
1679         (void) close(stdout_pipe[1]);
1680         (void) close(stderr_pipe[1]);
1681 
1682         (void) sigset(SIGINT, sig_forward);
1683 
1684         postfork_dropprivs();
1685 
1686         (void) ct_tmpl_clear(tmpl_fd);
1687         (void) close(tmpl_fd);
1688 
1689         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1690         doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1691             dead_child_pipe[1], B_TRUE);
1692         do {
1693                 retval = waitpid(child_pid, &child_status, 0);
1694                 if (retval == -1) {
1695                         child_status = 0;
1696                 }
1697         } while (retval != child_pid && errno != ECHILD);
1698 
1699         return (WEXITSTATUS(child_status));
1700 }
1701 
1702 static char *
1703 get_username()
1704 {
1705         uid_t   uid;
1706         struct passwd *nptr;
1707 
1708         /*
1709          * Authorizations are checked to restrict access based on the
1710          * requested operation and zone name, It is assumed that the
1711          * program is running with all privileges, but that the real
1712          * user ID is that of the user or role on whose behalf we are
1713          * operating. So we start by getting the username that will be
1714          * used for subsequent authorization checks.
1715          */
1716 
1717         uid = getuid();
1718         if ((nptr = getpwuid(uid)) == NULL) {
1719                 zerror(gettext("could not get user name."));
1720                 _exit(1);
1721         }
1722         return (nptr->pw_name);
1723 }
1724 
1725 int
1726 main(int argc, char **argv)
1727 {
1728         int arg, console = 0;
1729         zoneid_t zoneid;
1730         zone_state_t st;
1731         char *login = "root";
1732         int lflag = 0;
1733         int nflag = 0;
1734         char *zonename = NULL;
1735         char **proc_args = NULL;
1736         char **new_args, **new_env;
1737         sigset_t block_cld;
1738         char devroot[MAXPATHLEN];
1739         char *slavename, slaveshortname[MAXPATHLEN];
1740         priv_set_t *privset;
1741         int tmpl_fd;
1742         char zonebrand[MAXNAMELEN];
1743         char default_brand[MAXNAMELEN];
1744         struct stat sb;
1745         char kernzone[ZONENAME_MAX];
1746         brand_handle_t bh;
1747         char user_cmd[MAXPATHLEN];
1748         char authname[MAXAUTHS];
1749 
1750         (void) setlocale(LC_ALL, "");
1751         (void) textdomain(TEXT_DOMAIN);
1752 
1753         (void) getpname(argv[0]);
1754         username = get_username();
1755 
1756         while ((arg = getopt(argc, argv, "nECR:Se:l:Q")) != EOF) {
1757                 switch (arg) {
1758                 case 'C':
1759                         console = 1;
1760                         break;
1761                 case 'E':
1762                         nocmdchar = 1;
1763                         break;
1764                 case 'R':       /* undocumented */
1765                         if (*optarg != '/') {
1766                                 zerror(gettext("root path must be absolute."));
1767                                 exit(2);
1768                         }
1769                         if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1770                                 zerror(
1771                                     gettext("root path must be a directory."));
1772                                 exit(2);
1773                         }
1774                         zonecfg_set_root(optarg);
1775                         break;
1776                 case 'Q':
1777                         quiet = 1;
1778                         break;
1779                 case 'S':
1780                         failsafe = 1;
1781                         break;
1782                 case 'e':
1783                         set_cmdchar(optarg);
1784                         break;
1785                 case 'l':
1786                         login = optarg;
1787                         lflag = 1;
1788                         break;
1789                 case 'n':
1790                         nflag = 1;
1791                         break;
1792                 default:
1793                         usage();
1794                 }
1795         }
1796 
1797         if (console != 0) {
1798 
1799                 if (lflag != 0) {
1800                         zerror(gettext(
1801                             "-l may not be specified for console login"));
1802                         usage();
1803                 }
1804 
1805                 if (nflag != 0) {
1806                         zerror(gettext(
1807                             "-n may not be specified for console login"));
1808                         usage();
1809                 }
1810 
1811                 if (failsafe != 0) {
1812                         zerror(gettext(
1813                             "-S may not be specified for console login"));
1814                         usage();
1815                 }
1816 
1817                 if (zonecfg_in_alt_root()) {
1818                         zerror(gettext(
1819                             "-R may not be specified for console login"));
1820                         exit(2);
1821                 }
1822 
1823         }
1824 
1825         if (failsafe != 0 && lflag != 0) {
1826                 zerror(gettext("-l may not be specified for failsafe login"));
1827                 usage();
1828         }
1829 
1830         if (optind == (argc - 1)) {
1831                 /*
1832                  * zone name, no process name; this should be an interactive
1833                  * as long as STDIN is really a tty.
1834                  */
1835                 if (nflag != 0) {
1836                         zerror(gettext(
1837                             "-n may not be specified for interactive login"));
1838                         usage();
1839                 }
1840                 if (isatty(STDIN_FILENO))
1841                         interactive = 1;
1842                 zonename = argv[optind];
1843         } else if (optind < (argc - 1)) {
1844                 if (console) {
1845                         zerror(gettext("Commands may not be specified for "
1846                             "console login."));
1847                         usage();
1848                 }
1849                 /* zone name and process name, and possibly some args */
1850                 zonename = argv[optind];
1851                 proc_args = &argv[optind + 1];
1852                 interactive = 0;
1853         } else {
1854                 usage();
1855         }
1856 
1857         if (getzoneid() != GLOBAL_ZONEID) {
1858                 zerror(gettext("'%s' may only be used from the global zone"),
1859                     pname);
1860                 return (1);
1861         }
1862 
1863         if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1864                 zerror(gettext("'%s' not applicable to the global zone"),
1865                     pname);
1866                 return (1);
1867         }
1868 
1869         if (zone_get_state(zonename, &st) != Z_OK) {
1870                 zerror(gettext("zone '%s' unknown"), zonename);
1871                 return (1);
1872         }
1873 
1874         if (st < ZONE_STATE_INSTALLED) {
1875                 zerror(gettext("cannot login to a zone which is '%s'"),
1876                     zone_state_str(st));
1877                 return (1);
1878         }
1879 
1880         /*
1881          * In both console and non-console cases, we require all privs.
1882          * In the console case, because we may need to startup zoneadmd.
1883          * In the non-console case in order to do zone_enter(2), zonept()
1884          * and other tasks.
1885          */
1886 
1887         if ((privset = priv_allocset()) == NULL) {
1888                 zperror(gettext("priv_allocset failed"));
1889                 return (1);
1890         }
1891 
1892         if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1893                 zperror(gettext("getppriv failed"));
1894                 priv_freeset(privset);
1895                 return (1);
1896         }
1897 
1898         if (priv_isfullset(privset) == B_FALSE) {
1899                 zerror(gettext("You lack sufficient privilege to run "
1900                     "this command (all privs required)"));
1901                 priv_freeset(privset);
1902                 return (1);
1903         }
1904         priv_freeset(privset);
1905 
1906         /*
1907          * Check if user is authorized for requested usage of the zone
1908          */
1909 
1910         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
1911             ZONE_MANAGE_AUTH, KV_OBJECT, zonename);
1912         if (chkauthattr(authname, username) == 0) {
1913                 if (console) {
1914                         zerror(gettext("%s is not authorized for console "
1915                             "access to  %s zone."),
1916                             username, zonename);
1917                         return (1);
1918                 } else {
1919                         (void) snprintf(authname, MAXAUTHS, "%s%s%s",
1920                             ZONE_LOGIN_AUTH, KV_OBJECT, zonename);
1921                         if (failsafe || !interactive) {
1922                                 zerror(gettext("%s is not authorized for  "
1923                                     "failsafe or non-interactive login "
1924                                     "to  %s zone."), username, zonename);
1925                                 return (1);
1926                         } else if (chkauthattr(authname, username) == 0) {
1927                                 zerror(gettext("%s is not authorized "
1928                                     " to login to %s zone."),
1929                                     username, zonename);
1930                                 return (1);
1931                         }
1932                 }
1933         } else {
1934                 forced_login = B_TRUE;
1935         }
1936 
1937         /*
1938          * The console is a separate case from the rest of the code; handle
1939          * it first.
1940          */
1941         if (console) {
1942                 /*
1943                  * Ensure that zoneadmd for this zone is running.
1944                  */
1945                 if (start_zoneadmd(zonename) == -1)
1946                         return (1);
1947 
1948                 /*
1949                  * Make contact with zoneadmd.
1950                  */
1951                 if (get_console_master(zonename) == -1)
1952                         return (1);
1953 
1954                 if (!quiet)
1955                         (void) printf(
1956                             gettext("[Connected to zone '%s' console]\n"),
1957                             zonename);
1958 
1959                 if (set_tty_rawmode(STDIN_FILENO) == -1) {
1960                         reset_tty();
1961                         zperror(gettext("failed to set stdin pty to raw mode"));
1962                         return (1);
1963                 }
1964 
1965                 (void) sigset(SIGWINCH, sigwinch);
1966                 (void) sigwinch(0);
1967 
1968                 /*
1969                  * Run the I/O loop until we get disconnected.
1970                  */
1971                 doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
1972                 reset_tty();
1973                 if (!quiet)
1974                         (void) printf(
1975                             gettext("\n[Connection to zone '%s' console "
1976                             "closed]\n"), zonename);
1977 
1978                 return (0);
1979         }
1980 
1981         if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1982                 zerror(gettext("login allowed only to running zones "
1983                     "(%s is '%s')."), zonename, zone_state_str(st));
1984                 return (1);
1985         }
1986 
1987         (void) strlcpy(kernzone, zonename, sizeof (kernzone));
1988         if (zonecfg_in_alt_root()) {
1989                 FILE *fp = zonecfg_open_scratch("", B_FALSE);
1990 
1991                 if (fp == NULL || zonecfg_find_scratch(fp, zonename,
1992                     zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
1993                         zerror(gettext("cannot find scratch zone %s"),
1994                             zonename);
1995                         if (fp != NULL)
1996                                 zonecfg_close_scratch(fp);
1997                         return (1);
1998                 }
1999                 zonecfg_close_scratch(fp);
2000         }
2001 
2002         if ((zoneid = getzoneidbyname(kernzone)) == -1) {
2003                 zerror(gettext("failed to get zoneid for zone '%s'"),
2004                     zonename);
2005                 return (1);
2006         }
2007 
2008         /*
2009          * We need the zone root path only if we are setting up a pty.
2010          */
2011         if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
2012                 zerror(gettext("could not get dev path for zone %s"),
2013                     zonename);
2014                 return (1);
2015         }
2016 
2017         if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
2018                 zerror(gettext("could not get brand for zone %s"), zonename);
2019                 return (1);
2020         }
2021         /*
2022          * In the alternate root environment, the only supported
2023          * operations are mount and unmount.  In this case, just treat
2024          * the zone as native if it is cluster.  Cluster zones can be
2025          * native for the purpose of LU or upgrade, and the cluster
2026          * brand may not exist in the miniroot (such as in net install
2027          * upgrade).
2028          */
2029         if (zonecfg_default_brand(default_brand,
2030             sizeof (default_brand)) != Z_OK) {
2031                 zerror(gettext("unable to determine default brand"));
2032                 return (1);
2033         }
2034         if (zonecfg_in_alt_root() &&
2035             strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
2036                 (void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
2037         }
2038 
2039         if ((bh = brand_open(zonebrand)) == NULL) {
2040                 zerror(gettext("could not open brand for zone %s"), zonename);
2041                 return (1);
2042         }
2043 
2044         if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
2045                 zperror(gettext("could not assemble new arguments"));
2046                 brand_close(bh);
2047                 return (1);
2048         }
2049         /*
2050          * Get the brand specific user_cmd.  This command is used to get
2051          * a passwd(4) entry for login.
2052          */
2053         if (!interactive && !failsafe) {
2054                 if (zone_get_user_cmd(bh, login, user_cmd,
2055                     sizeof (user_cmd)) == NULL) {
2056                         zerror(gettext("could not get user_cmd for zone %s"),
2057                             zonename);
2058                         brand_close(bh);
2059                         return (1);
2060                 }
2061         }
2062         brand_close(bh);
2063 
2064         if ((new_env = prep_env()) == NULL) {
2065                 zperror(gettext("could not assemble new environment"));
2066                 return (1);
2067         }
2068 
2069         if (!interactive) {
2070                 if (nflag) {
2071                         int nfd;
2072 
2073                         if ((nfd = open(_PATH_DEVNULL, O_RDONLY)) < 0) {
2074                                 zperror(gettext("failed to open null device"));
2075                                 return (1);
2076                         }
2077                         if (nfd != STDIN_FILENO) {
2078                                 if (dup2(nfd, STDIN_FILENO) < 0) {
2079                                         zperror(gettext(
2080                                             "failed to dup2 null device"));
2081                                         return (1);
2082                                 }
2083                                 (void) close(nfd);
2084                         }
2085                         /* /dev/null is now standard input */
2086                 }
2087                 return (noninteractive_login(zonename, user_cmd, zoneid,
2088                     new_args, new_env));
2089         }
2090 
2091         if (zonecfg_in_alt_root()) {
2092                 zerror(gettext("cannot use interactive login with scratch "
2093                     "zone"));
2094                 return (1);
2095         }
2096 
2097         /*
2098          * Things are more complex in interactive mode; we get the
2099          * master side of the pty, then place the user's terminal into
2100          * raw mode.
2101          */
2102         if (get_master_pty() == -1) {
2103                 zerror(gettext("could not setup master pty device"));
2104                 return (1);
2105         }
2106 
2107         /*
2108          * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
2109          */
2110         if ((slavename = ptsname(masterfd)) == NULL) {
2111                 zperror(gettext("failed to get name for pseudo-tty"));
2112                 return (1);
2113         }
2114         if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
2115                 (void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
2116                     sizeof (slaveshortname));
2117         else
2118                 (void) strlcpy(slaveshortname, slavename,
2119                     sizeof (slaveshortname));
2120 
2121         if (!quiet)
2122                 (void) printf(gettext("[Connected to zone '%s' %s]\n"),
2123                     zonename, slaveshortname);
2124 
2125         if (set_tty_rawmode(STDIN_FILENO) == -1) {
2126                 reset_tty();
2127                 zperror(gettext("failed to set stdin pty to raw mode"));
2128                 return (1);
2129         }
2130 
2131         if (prefork_dropprivs() != 0) {
2132                 reset_tty();
2133                 zperror(gettext("could not allocate privilege set"));
2134                 return (1);
2135         }
2136 
2137         /*
2138          * We must mask SIGCLD until after we have coped with the fork
2139          * sufficiently to deal with it; otherwise we can race and receive the
2140          * signal before child_pid has been initialized (yes, this really
2141          * happens).
2142          */
2143         (void) sigset(SIGCLD, sigcld);
2144         (void) sigemptyset(&block_cld);
2145         (void) sigaddset(&block_cld, SIGCLD);
2146         (void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2147 
2148         /*
2149          * We activate the contract template at the last minute to
2150          * avoid intermediate functions that could be using fork(2)
2151          * internally.
2152          */
2153         if ((tmpl_fd = init_template()) == -1) {
2154                 reset_tty();
2155                 zperror(gettext("could not create contract"));
2156                 return (1);
2157         }
2158 
2159         if ((child_pid = fork()) == -1) {
2160                 (void) ct_tmpl_clear(tmpl_fd);
2161                 reset_tty();
2162                 zperror(gettext("could not fork"));
2163                 return (1);
2164         } else if (child_pid == 0) { /* child process */
2165                 int slavefd, newslave;
2166 
2167                 (void) ct_tmpl_clear(tmpl_fd);
2168                 (void) close(tmpl_fd);
2169 
2170                 (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2171 
2172                 if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2173                         return (1);
2174 
2175                 /*
2176                  * Close all fds except for the slave pty.
2177                  */
2178                 (void) fdwalk(close_func, &slavefd);
2179 
2180                 /*
2181                  * Temporarily dup slavefd to stderr; that way if we have
2182                  * to print out that zone_enter failed, the output will
2183                  * have somewhere to go.
2184                  */
2185                 if (slavefd != STDERR_FILENO)
2186                         (void) dup2(slavefd, STDERR_FILENO);
2187 
2188                 if (zone_enter(zoneid) == -1) {
2189                         zerror(gettext("could not enter zone %s: %s"),
2190                             zonename, strerror(errno));
2191                         return (1);
2192                 }
2193 
2194                 if (slavefd != STDERR_FILENO)
2195                         (void) close(STDERR_FILENO);
2196 
2197                 /*
2198                  * We take pains to get this process into a new process
2199                  * group, and subsequently a new session.  In this way,
2200                  * we'll have a session which doesn't yet have a controlling
2201                  * terminal.  When we open the slave, it will become the
2202                  * controlling terminal; no PIDs concerning pgrps or sids
2203                  * will leak inappropriately into the zone.
2204                  */
2205                 (void) setpgrp();
2206 
2207                 /*
2208                  * We need the slave pty to be referenced from the zone's
2209                  * /dev in order to ensure that the devt's, etc are all
2210                  * correct.  Otherwise we break ttyname and the like.
2211                  */
2212                 if ((newslave = open(slavename, O_RDWR)) == -1) {
2213                         (void) close(slavefd);
2214                         return (1);
2215                 }
2216                 (void) close(slavefd);
2217                 slavefd = newslave;
2218 
2219                 /*
2220                  * dup the slave to the various FDs, so that when the
2221                  * spawned process does a write/read it maps to the slave
2222                  * pty.
2223                  */
2224                 (void) dup2(slavefd, STDIN_FILENO);
2225                 (void) dup2(slavefd, STDOUT_FILENO);
2226                 (void) dup2(slavefd, STDERR_FILENO);
2227                 if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2228                     slavefd != STDERR_FILENO) {
2229                         (void) close(slavefd);
2230                 }
2231 
2232                 /*
2233                  * In failsafe mode, we don't use login(1), so don't try
2234                  * setting up a utmpx entry.
2235                  */
2236                 if (!failsafe)
2237                         if (setup_utmpx(slaveshortname) == -1)
2238                                 return (1);
2239 
2240                 /*
2241                  * The child needs to run as root to
2242                  * execute the brand's login program.
2243                  */
2244                 if (setuid(0) == -1) {
2245                         zperror(gettext("insufficient privilege"));
2246                         return (1);
2247                 }
2248 
2249                 (void) execve(new_args[0], new_args, new_env);
2250                 zperror(gettext("exec failure"));
2251                 return (1);
2252         }
2253 
2254         (void) ct_tmpl_clear(tmpl_fd);
2255         (void) close(tmpl_fd);
2256 
2257         /*
2258          * The rest is only for the parent process.
2259          */
2260         (void) sigset(SIGWINCH, sigwinch);
2261 
2262         postfork_dropprivs();
2263 
2264         (void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2265         doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2266 
2267         reset_tty();
2268         if (!quiet)
2269                 (void) fprintf(stderr,
2270                     gettext("\n[Connection to zone '%s' %s closed]\n"),
2271                     zonename, slaveshortname);
2272 
2273         if (pollerr != 0) {
2274                 (void) fprintf(stderr, gettext("Error: connection closed due "
2275                     "to unexpected pollevents=0x%x.\n"), pollerr);
2276                 return (1);
2277         }
2278 
2279         return (0);
2280 }