1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  26  */
  27 
  28 /*
  29  * This file contains the functions used to support the ZFS integration
  30  * with zones.  This includes validation (e.g. zonecfg dataset), cloning,
  31  * file system creation and destruction.
  32  */
  33 
  34 #include <stdio.h>
  35 #include <errno.h>
  36 #include <unistd.h>
  37 #include <string.h>
  38 #include <locale.h>
  39 #include <libintl.h>
  40 #include <sys/stat.h>
  41 #include <sys/statvfs.h>
  42 #include <libgen.h>
  43 #include <libzonecfg.h>
  44 #include <sys/mnttab.h>
  45 #include <libzfs.h>
  46 #include <sys/mntent.h>
  47 #include <values.h>
  48 #include <strings.h>
  49 #include <assert.h>
  50 
  51 #include "zoneadm.h"
  52 
  53 libzfs_handle_t *g_zfs;
  54 
  55 typedef struct zfs_mount_data {
  56         char            *match_name;
  57         zfs_handle_t    *match_handle;
  58 } zfs_mount_data_t;
  59 
  60 typedef struct zfs_snapshot_data {
  61         char    *match_name;    /* zonename@SUNWzone */
  62         int     len;            /* strlen of match_name */
  63         int     max;            /* highest digit appended to snap name */
  64         int     num;            /* number of snapshots to rename */
  65         int     cntr;           /* counter for renaming snapshots */
  66 } zfs_snapshot_data_t;
  67 
  68 typedef struct clone_data {
  69         zfs_handle_t    *clone_zhp;     /* clone dataset to promote */
  70         time_t          origin_creation; /* snapshot creation time of clone */
  71         const char      *snapshot;      /* snapshot of dataset being demoted */
  72 } clone_data_t;
  73 
  74 /*
  75  * A ZFS file system iterator call-back function which returns the
  76  * zfs_handle_t for a ZFS file system on the specified mount point.
  77  */
  78 static int
  79 match_mountpoint(zfs_handle_t *zhp, void *data)
  80 {
  81         int                     res;
  82         zfs_mount_data_t        *cbp;
  83         char                    mp[ZFS_MAXPROPLEN];
  84 
  85         if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
  86                 zfs_close(zhp);
  87                 return (0);
  88         }
  89 
  90         /* First check if the dataset is mounted. */
  91         if (zfs_prop_get(zhp, ZFS_PROP_MOUNTED, mp, sizeof (mp), NULL, NULL,
  92             0, B_FALSE) != 0 || strcmp(mp, "no") == 0) {
  93                 zfs_close(zhp);
  94                 return (0);
  95         }
  96 
  97         /* Now check mount point. */
  98         if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
  99             0, B_FALSE) != 0) {
 100                 zfs_close(zhp);
 101                 return (0);
 102         }
 103 
 104         cbp = (zfs_mount_data_t *)data;
 105 
 106         if (strcmp(mp, "legacy") == 0) {
 107                 /* If legacy, must look in mnttab for mountpoint. */
 108                 FILE            *fp;
 109                 struct mnttab   entry;
 110                 const char      *nm;
 111 
 112                 nm = zfs_get_name(zhp);
 113                 if ((fp = fopen(MNTTAB, "r")) == NULL) {
 114                         zfs_close(zhp);
 115                         return (0);
 116                 }
 117 
 118                 while (getmntent(fp, &entry) == 0) {
 119                         if (strcmp(nm, entry.mnt_special) == 0) {
 120                                 if (strcmp(entry.mnt_mountp, cbp->match_name)
 121                                     == 0) {
 122                                         (void) fclose(fp);
 123                                         cbp->match_handle = zhp;
 124                                         return (1);
 125                                 }
 126                                 break;
 127                         }
 128                 }
 129                 (void) fclose(fp);
 130 
 131         } else if (strcmp(mp, cbp->match_name) == 0) {
 132                 cbp->match_handle = zhp;
 133                 return (1);
 134         }
 135 
 136         /* Iterate over any nested datasets. */
 137         res = zfs_iter_filesystems(zhp, match_mountpoint, data);
 138         zfs_close(zhp);
 139         return (res);
 140 }
 141 
 142 /*
 143  * Get ZFS handle for the specified mount point.
 144  */
 145 static zfs_handle_t *
 146 mount2zhandle(char *mountpoint)
 147 {
 148         zfs_mount_data_t        cb;
 149 
 150         cb.match_name = mountpoint;
 151         cb.match_handle = NULL;
 152         (void) zfs_iter_root(g_zfs, match_mountpoint, &cb);
 153         return (cb.match_handle);
 154 }
 155 
 156 /*
 157  * Check if there is already a file system (zfs or any other type) mounted on
 158  * path.
 159  */
 160 static boolean_t
 161 is_mountpnt(char *path)
 162 {
 163         FILE            *fp;
 164         struct mnttab   entry;
 165 
 166         if ((fp = fopen(MNTTAB, "r")) == NULL)
 167                 return (B_FALSE);
 168 
 169         while (getmntent(fp, &entry) == 0) {
 170                 if (strcmp(path, entry.mnt_mountp) == 0) {
 171                         (void) fclose(fp);
 172                         return (B_TRUE);
 173                 }
 174         }
 175 
 176         (void) fclose(fp);
 177         return (B_FALSE);
 178 }
 179 
 180 /*
 181  * Run the brand's pre-snapshot hook before we take a ZFS snapshot of the zone.
 182  */
 183 static int
 184 pre_snapshot(char *presnapbuf)
 185 {
 186         int status;
 187 
 188         /* No brand-specific handler */
 189         if (presnapbuf[0] == '\0')
 190                 return (Z_OK);
 191 
 192         /* Run the hook */
 193         status = do_subproc(presnapbuf);
 194         if ((status = subproc_status(gettext("brand-specific presnapshot"),
 195             status, B_FALSE)) != ZONE_SUBPROC_OK)
 196                 return (Z_ERR);
 197 
 198         return (Z_OK);
 199 }
 200 
 201 /*
 202  * Run the brand's post-snapshot hook after we take a ZFS snapshot of the zone.
 203  */
 204 static int
 205 post_snapshot(char *postsnapbuf)
 206 {
 207         int status;
 208 
 209         /* No brand-specific handler */
 210         if (postsnapbuf[0] == '\0')
 211                 return (Z_OK);
 212 
 213         /* Run the hook */
 214         status = do_subproc(postsnapbuf);
 215         if ((status = subproc_status(gettext("brand-specific postsnapshot"),
 216             status, B_FALSE)) != ZONE_SUBPROC_OK)
 217                 return (Z_ERR);
 218 
 219         return (Z_OK);
 220 }
 221 
 222 /*
 223  * This is a ZFS snapshot iterator call-back function which returns the
 224  * highest number of SUNWzone snapshots that have been taken.
 225  */
 226 static int
 227 get_snap_max(zfs_handle_t *zhp, void *data)
 228 {
 229         int                     res;
 230         zfs_snapshot_data_t     *cbp;
 231 
 232         if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
 233                 zfs_close(zhp);
 234                 return (0);
 235         }
 236 
 237         cbp = (zfs_snapshot_data_t *)data;
 238 
 239         if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) == 0) {
 240                 char    *nump;
 241                 int     num;
 242 
 243                 cbp->num++;
 244                 nump = (char *)(zfs_get_name(zhp) + cbp->len);
 245                 num = atoi(nump);
 246                 if (num > cbp->max)
 247                         cbp->max = num;
 248         }
 249 
 250         res = zfs_iter_snapshots(zhp, get_snap_max, data);
 251         zfs_close(zhp);
 252         return (res);
 253 }
 254 
 255 /*
 256  * Take a ZFS snapshot to be used for cloning the zone.
 257  */
 258 static int
 259 take_snapshot(zfs_handle_t *zhp, char *snapshot_name, int snap_size,
 260     char *presnapbuf, char *postsnapbuf)
 261 {
 262         int                     res;
 263         char                    template[ZFS_MAXNAMELEN];
 264         zfs_snapshot_data_t     cb;
 265 
 266         /*
 267          * First we need to figure out the next available name for the
 268          * zone snapshot.  Look through the list of zones snapshots for
 269          * this file system to determine the maximum snapshot name.
 270          */
 271         if (snprintf(template, sizeof (template), "%s@SUNWzone",
 272             zfs_get_name(zhp)) >=  sizeof (template))
 273                 return (Z_ERR);
 274 
 275         cb.match_name = template;
 276         cb.len = strlen(template);
 277         cb.max = 0;
 278 
 279         if (zfs_iter_snapshots(zhp, get_snap_max, &cb) != 0)
 280                 return (Z_ERR);
 281 
 282         cb.max++;
 283 
 284         if (snprintf(snapshot_name, snap_size, "%s@SUNWzone%d",
 285             zfs_get_name(zhp), cb.max) >= snap_size)
 286                 return (Z_ERR);
 287 
 288         if (pre_snapshot(presnapbuf) != Z_OK)
 289                 return (Z_ERR);
 290         res = zfs_snapshot(g_zfs, snapshot_name, B_FALSE, NULL);
 291         if (post_snapshot(postsnapbuf) != Z_OK)
 292                 return (Z_ERR);
 293 
 294         if (res != 0)
 295                 return (Z_ERR);
 296         return (Z_OK);
 297 }
 298 
 299 /*
 300  * We are using an explicit snapshot from some earlier point in time so
 301  * we need to validate it.  Run the brand specific hook.
 302  */
 303 static int
 304 validate_snapshot(char *snapshot_name, char *snap_path, char *validsnapbuf)
 305 {
 306         int status;
 307         char cmdbuf[MAXPATHLEN];
 308 
 309         /* No brand-specific handler */
 310         if (validsnapbuf[0] == '\0')
 311                 return (Z_OK);
 312 
 313         /* pass args - snapshot_name & snap_path */
 314         if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %s %s", validsnapbuf,
 315             snapshot_name, snap_path) >= sizeof (cmdbuf)) {
 316                 zerror("Command line too long");
 317                 return (Z_ERR);
 318         }
 319 
 320         /* Run the hook */
 321         status = do_subproc(cmdbuf);
 322         if ((status = subproc_status(gettext("brand-specific validatesnapshot"),
 323             status, B_FALSE)) != ZONE_SUBPROC_OK)
 324                 return (Z_ERR);
 325 
 326         return (Z_OK);
 327 }
 328 
 329 /*
 330  * Remove the sw inventory file from inside this zonepath that we picked up out
 331  * of the snapshot.
 332  */
 333 static int
 334 clean_out_clone()
 335 {
 336         int err;
 337         zone_dochandle_t handle;
 338 
 339         if ((handle = zonecfg_init_handle()) == NULL) {
 340                 zperror(cmd_to_str(CMD_CLONE), B_TRUE);
 341                 return (Z_ERR);
 342         }
 343 
 344         if ((err = zonecfg_get_handle(target_zone, handle)) != Z_OK) {
 345                 errno = err;
 346                 zperror(cmd_to_str(CMD_CLONE), B_TRUE);
 347                 zonecfg_fini_handle(handle);
 348                 return (Z_ERR);
 349         }
 350 
 351         zonecfg_rm_detached(handle, B_FALSE);
 352         zonecfg_fini_handle(handle);
 353 
 354         return (Z_OK);
 355 }
 356 
 357 /*
 358  * Make a ZFS clone on zonepath from snapshot_name.
 359  */
 360 static int
 361 clone_snap(char *snapshot_name, char *zonepath)
 362 {
 363         int             res = Z_OK;
 364         int             err;
 365         zfs_handle_t    *zhp;
 366         zfs_handle_t    *clone;
 367         nvlist_t        *props = NULL;
 368 
 369         if ((zhp = zfs_open(g_zfs, snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL)
 370                 return (Z_NO_ENTRY);
 371 
 372         (void) printf(gettext("Cloning snapshot %s\n"), snapshot_name);
 373 
 374         /*
 375          * We turn off zfs SHARENFS and SHARESMB properties on the
 376          * zoneroot dataset in order to prevent the GZ from sharing
 377          * NGZ data by accident.
 378          */
 379         if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
 380             (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
 381             "off") != 0) ||
 382             (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
 383             "off") != 0)) {
 384                 if (props != NULL)
 385                         nvlist_free(props);
 386                 (void) fprintf(stderr, gettext("could not create ZFS clone "
 387                     "%s: out of memory\n"), zonepath);
 388                 return (Z_ERR);
 389         }
 390 
 391         err = zfs_clone(zhp, zonepath, props);
 392         zfs_close(zhp);
 393 
 394         nvlist_free(props);
 395 
 396         if (err != 0)
 397                 return (Z_ERR);
 398 
 399         /* create the mountpoint if necessary */
 400         if ((clone = zfs_open(g_zfs, zonepath, ZFS_TYPE_DATASET)) == NULL)
 401                 return (Z_ERR);
 402 
 403         /*
 404          * The clone has been created so we need to print a diagnostic
 405          * message if one of the following steps fails for some reason.
 406          */
 407         if (zfs_mount(clone, NULL, 0) != 0) {
 408                 (void) fprintf(stderr, gettext("could not mount ZFS clone "
 409                     "%s\n"), zfs_get_name(clone));
 410                 res = Z_ERR;
 411 
 412         } else if (clean_out_clone() != Z_OK) {
 413                 (void) fprintf(stderr, gettext("could not remove the "
 414                     "software inventory from ZFS clone %s\n"),
 415                     zfs_get_name(clone));
 416                 res = Z_ERR;
 417         }
 418 
 419         zfs_close(clone);
 420         return (res);
 421 }
 422 
 423 /*
 424  * This function takes a zonepath and attempts to determine what the ZFS
 425  * file system name (not mountpoint) should be for that path.  We do not
 426  * assume that zonepath is an existing directory or ZFS fs since we use
 427  * this function as part of the process of creating a new ZFS fs or clone.
 428  *
 429  * The way this works is that we look at the parent directory of the zonepath
 430  * to see if it is a ZFS fs.  If it is, we get the name of that ZFS fs and
 431  * append the last component of the zonepath to generate the ZFS name for the
 432  * zonepath.  This matches the algorithm that ZFS uses for automatically
 433  * mounting a new fs after it is created.
 434  *
 435  * Although a ZFS fs can be mounted anywhere, we don't worry about handling
 436  * all of the complexity that a user could possibly configure with arbitrary
 437  * mounts since there is no way to generate a ZFS name from a random path in
 438  * the file system.  We only try to handle the automatic mounts that ZFS does
 439  * for each file system.  ZFS restricts this so that a new fs must be created
 440  * in an existing parent ZFS fs.  It then automatically mounts the new fs
 441  * directly under the mountpoint for the parent fs using the last component
 442  * of the name as the mountpoint directory.
 443  *
 444  * For example:
 445  *    Name                      Mountpoint
 446  *    space/eng/dev/test/zone1  /project1/eng/dev/test/zone1
 447  *
 448  * Return Z_OK if the path mapped to a ZFS file system name, otherwise return
 449  * Z_ERR.
 450  */
 451 static int
 452 path2name(char *zonepath, char *zfs_name, int len)
 453 {
 454         int             res;
 455         char            *bnm, *dnm, *dname, *bname;
 456         zfs_handle_t    *zhp;
 457         struct stat     stbuf;
 458 
 459         /*
 460          * We need two tmp strings to handle paths directly in / (e.g. /foo)
 461          * since dirname will overwrite the first char after "/" in this case.
 462          */
 463         if ((bnm = strdup(zonepath)) == NULL)
 464                 return (Z_ERR);
 465 
 466         if ((dnm = strdup(zonepath)) == NULL) {
 467                 free(bnm);
 468                 return (Z_ERR);
 469         }
 470 
 471         bname = basename(bnm);
 472         dname = dirname(dnm);
 473 
 474         /*
 475          * This is a quick test to save iterating over all of the zfs datasets
 476          * on the system (which can be a lot).  If the parent dir is not in a
 477          * ZFS fs, then we're done.
 478          */
 479         if (stat(dname, &stbuf) != 0 || !S_ISDIR(stbuf.st_mode) ||
 480             strcmp(stbuf.st_fstype, MNTTYPE_ZFS) != 0) {
 481                 free(bnm);
 482                 free(dnm);
 483                 return (Z_ERR);
 484         }
 485 
 486         /* See if the parent directory is its own ZFS dataset. */
 487         if ((zhp = mount2zhandle(dname)) == NULL) {
 488                 /*
 489                  * The parent is not a ZFS dataset so we can't automatically
 490                  * create a dataset on the given path.
 491                  */
 492                 free(bnm);
 493                 free(dnm);
 494                 return (Z_ERR);
 495         }
 496 
 497         res = snprintf(zfs_name, len, "%s/%s", zfs_get_name(zhp), bname);
 498 
 499         free(bnm);
 500         free(dnm);
 501         zfs_close(zhp);
 502         if (res >= len)
 503                 return (Z_ERR);
 504 
 505         return (Z_OK);
 506 }
 507 
 508 /*
 509  * A ZFS file system iterator call-back function used to determine if the
 510  * file system has dependents (snapshots & clones).
 511  */
 512 /* ARGSUSED */
 513 static int
 514 has_dependent(zfs_handle_t *zhp, void *data)
 515 {
 516         zfs_close(zhp);
 517         return (1);
 518 }
 519 
 520 /*
 521  * Given a snapshot name, get the file system path where the snapshot lives.
 522  * A snapshot name is of the form fs_name@snap_name.  For example, snapshot
 523  * pl/zones/z1@SUNWzone1 would have a path of
 524  * /pl/zones/z1/.zfs/snapshot/SUNWzone1.
 525  */
 526 static int
 527 snap2path(char *snap_name, char *path, int len)
 528 {
 529         char            *p;
 530         zfs_handle_t    *zhp;
 531         char            mp[ZFS_MAXPROPLEN];
 532 
 533         if ((p = strrchr(snap_name, '@')) == NULL)
 534                 return (Z_ERR);
 535 
 536         /* Get the file system name from the snap_name. */
 537         *p = '\0';
 538         zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_DATASET);
 539         *p = '@';
 540         if (zhp == NULL)
 541                 return (Z_ERR);
 542 
 543         /* Get the file system mount point. */
 544         if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
 545             0, B_FALSE) != 0) {
 546                 zfs_close(zhp);
 547                 return (Z_ERR);
 548         }
 549         zfs_close(zhp);
 550 
 551         p++;
 552         if (snprintf(path, len, "%s/.zfs/snapshot/%s", mp, p) >= len)
 553                 return (Z_ERR);
 554 
 555         return (Z_OK);
 556 }
 557 
 558 /*
 559  * This callback function is used to iterate through a snapshot's dependencies
 560  * to find a filesystem that is a direct clone of the snapshot being iterated.
 561  */
 562 static int
 563 get_direct_clone(zfs_handle_t *zhp, void *data)
 564 {
 565         clone_data_t    *cd = data;
 566         char            origin[ZFS_MAXNAMELEN];
 567         char            ds_path[ZFS_MAXNAMELEN];
 568 
 569         if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
 570                 zfs_close(zhp);
 571                 return (0);
 572         }
 573 
 574         (void) strlcpy(ds_path, zfs_get_name(zhp), sizeof (ds_path));
 575 
 576         /* Make sure this is a direct clone of the snapshot we're iterating. */
 577         if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
 578             NULL, 0, B_FALSE) != 0 || strcmp(origin, cd->snapshot) != 0) {
 579                 zfs_close(zhp);
 580                 return (0);
 581         }
 582 
 583         if (cd->clone_zhp != NULL)
 584                 zfs_close(cd->clone_zhp);
 585 
 586         cd->clone_zhp = zhp;
 587         return (1);
 588 }
 589 
 590 /*
 591  * A ZFS file system iterator call-back function used to determine the clone
 592  * to promote.  This function finds the youngest (i.e. last one taken) snapshot
 593  * that has a clone.  If found, it returns a reference to that clone in the
 594  * callback data.
 595  */
 596 static int
 597 find_clone(zfs_handle_t *zhp, void *data)
 598 {
 599         clone_data_t    *cd = data;
 600         time_t          snap_creation;
 601         int             zret = 0;
 602 
 603         /* If snapshot has no clones, skip it */
 604         if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) == 0) {
 605                 zfs_close(zhp);
 606                 return (0);
 607         }
 608 
 609         cd->snapshot = zfs_get_name(zhp);
 610 
 611         /* Get the creation time of this snapshot */
 612         snap_creation = (time_t)zfs_prop_get_int(zhp, ZFS_PROP_CREATION);
 613 
 614         /*
 615          * If this snapshot's creation time is greater than (i.e. younger than)
 616          * the current youngest snapshot found, iterate this snapshot to
 617          * get the right clone.
 618          */
 619         if (snap_creation >= cd->origin_creation) {
 620                 /*
 621                  * Iterate the dependents of this snapshot to find a clone
 622                  * that's a direct dependent.
 623                  */
 624                 if ((zret = zfs_iter_dependents(zhp, B_FALSE, get_direct_clone,
 625                     cd)) == -1) {
 626                         zfs_close(zhp);
 627                         return (1);
 628                 } else if (zret == 1) {
 629                         /*
 630                          * Found a clone, update the origin_creation time
 631                          * in the callback data.
 632                          */
 633                         cd->origin_creation = snap_creation;
 634                 }
 635         }
 636 
 637         zfs_close(zhp);
 638         return (0);
 639 }
 640 
 641 /*
 642  * A ZFS file system iterator call-back function used to remove standalone
 643  * snapshots.
 644  */
 645 /* ARGSUSED */
 646 static int
 647 rm_snap(zfs_handle_t *zhp, void *data)
 648 {
 649         /* If snapshot has clones, something is wrong */
 650         if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) != 0) {
 651                 zfs_close(zhp);
 652                 return (1);
 653         }
 654 
 655         if (zfs_unmount(zhp, NULL, 0) == 0) {
 656                 (void) zfs_destroy(zhp, B_FALSE);
 657         }
 658 
 659         zfs_close(zhp);
 660         return (0);
 661 }
 662 
 663 /*
 664  * A ZFS snapshot iterator call-back function which renames snapshots.
 665  */
 666 static int
 667 rename_snap(zfs_handle_t *zhp, void *data)
 668 {
 669         int                     res;
 670         zfs_snapshot_data_t     *cbp;
 671         char                    template[ZFS_MAXNAMELEN];
 672 
 673         cbp = (zfs_snapshot_data_t *)data;
 674 
 675         /*
 676          * When renaming snapshots with the iterator, the iterator can see
 677          * the same snapshot after we've renamed up in the namespace.  To
 678          * prevent this we check the count for the number of snapshots we have
 679          * to rename and stop at that point.
 680          */
 681         if (cbp->cntr >= cbp->num) {
 682                 zfs_close(zhp);
 683                 return (0);
 684         }
 685 
 686         if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
 687                 zfs_close(zhp);
 688                 return (0);
 689         }
 690 
 691         /* Only rename the snapshots we automatically generate when we clone. */
 692         if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) != 0) {
 693                 zfs_close(zhp);
 694                 return (0);
 695         }
 696 
 697         (void) snprintf(template, sizeof (template), "%s%d", cbp->match_name,
 698             cbp->max++);
 699 
 700         res = (zfs_rename(zhp, template, B_FALSE, B_FALSE) != 0);
 701         if (res != 0)
 702                 (void) fprintf(stderr, gettext("failed to rename snapshot %s "
 703                     "to %s: %s\n"), zfs_get_name(zhp), template,
 704                     libzfs_error_description(g_zfs));
 705 
 706         cbp->cntr++;
 707 
 708         zfs_close(zhp);
 709         return (res);
 710 }
 711 
 712 /*
 713  * Rename the source dataset's snapshots that are automatically generated when
 714  * we clone a zone so that there won't be a name collision when we promote the
 715  * cloned dataset.  Once the snapshots have been renamed, then promote the
 716  * clone.
 717  *
 718  * The snapshot rename process gets the highest number on the snapshot names
 719  * (the format is zonename@SUNWzoneXX where XX are digits) on both the source
 720  * and clone datasets, then renames the source dataset snapshots starting at
 721  * the next number.
 722  */
 723 static int
 724 promote_clone(zfs_handle_t *src_zhp, zfs_handle_t *cln_zhp)
 725 {
 726         zfs_snapshot_data_t     sd;
 727         char                    nm[ZFS_MAXNAMELEN];
 728         char                    template[ZFS_MAXNAMELEN];
 729 
 730         (void) strlcpy(nm, zfs_get_name(cln_zhp), sizeof (nm));
 731         /*
 732          * Start by getting the clone's snapshot max which we use
 733          * during the rename of the original dataset's snapshots.
 734          */
 735         (void) snprintf(template, sizeof (template), "%s@SUNWzone", nm);
 736         sd.match_name = template;
 737         sd.len = strlen(template);
 738         sd.max = 0;
 739 
 740         if (zfs_iter_snapshots(cln_zhp, get_snap_max, &sd) != 0)
 741                 return (Z_ERR);
 742 
 743         /*
 744          * Now make sure the source's snapshot max is at least as high as
 745          * the clone's snapshot max.
 746          */
 747         (void) snprintf(template, sizeof (template), "%s@SUNWzone",
 748             zfs_get_name(src_zhp));
 749         sd.match_name = template;
 750         sd.len = strlen(template);
 751         sd.num = 0;
 752 
 753         if (zfs_iter_snapshots(src_zhp, get_snap_max, &sd) != 0)
 754                 return (Z_ERR);
 755 
 756         /*
 757          * Now rename the source dataset's snapshots so there's no
 758          * conflict when we promote the clone.
 759          */
 760         sd.max++;
 761         sd.cntr = 0;
 762         if (zfs_iter_snapshots(src_zhp, rename_snap, &sd) != 0)
 763                 return (Z_ERR);
 764 
 765         /* close and reopen the clone dataset to get the latest info */
 766         zfs_close(cln_zhp);
 767         if ((cln_zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
 768                 return (Z_ERR);
 769 
 770         if (zfs_promote(cln_zhp) != 0) {
 771                 (void) fprintf(stderr, gettext("failed to promote %s: %s\n"),
 772                     nm, libzfs_error_description(g_zfs));
 773                 return (Z_ERR);
 774         }
 775 
 776         zfs_close(cln_zhp);
 777         return (Z_OK);
 778 }
 779 
 780 /*
 781  * Promote the youngest clone.  That clone will then become the origin of all
 782  * of the other clones that were hanging off of the source dataset.
 783  */
 784 int
 785 promote_all_clones(zfs_handle_t *zhp)
 786 {
 787         clone_data_t    cd;
 788         char            nm[ZFS_MAXNAMELEN];
 789 
 790         cd.clone_zhp = NULL;
 791         cd.origin_creation = 0;
 792         cd.snapshot = NULL;
 793 
 794         if (zfs_iter_snapshots(zhp, find_clone, &cd) != 0) {
 795                 zfs_close(zhp);
 796                 return (Z_ERR);
 797         }
 798 
 799         /* Nothing to promote. */
 800         if (cd.clone_zhp == NULL)
 801                 return (Z_OK);
 802 
 803         /* Found the youngest clone to promote.  Promote it. */
 804         if (promote_clone(zhp, cd.clone_zhp) != 0) {
 805                 zfs_close(cd.clone_zhp);
 806                 zfs_close(zhp);
 807                 return (Z_ERR);
 808         }
 809 
 810         /* close and reopen the main dataset to get the latest info */
 811         (void) strlcpy(nm, zfs_get_name(zhp), sizeof (nm));
 812         zfs_close(zhp);
 813         if ((zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
 814                 return (Z_ERR);
 815 
 816         return (Z_OK);
 817 }
 818 
 819 /*
 820  * Clone a pre-existing ZFS snapshot, either by making a direct ZFS clone, if
 821  * possible, or by copying the data from the snapshot to the zonepath.
 822  */
 823 int
 824 clone_snapshot_zfs(char *snap_name, char *zonepath, char *validatesnap)
 825 {
 826         int     err = Z_OK;
 827         char    clone_name[MAXPATHLEN];
 828         char    snap_path[MAXPATHLEN];
 829 
 830         if (snap2path(snap_name, snap_path, sizeof (snap_path)) != Z_OK) {
 831                 (void) fprintf(stderr, gettext("unable to find path for %s.\n"),
 832                     snap_name);
 833                 return (Z_ERR);
 834         }
 835 
 836         if (validate_snapshot(snap_name, snap_path, validatesnap) != Z_OK)
 837                 return (Z_NO_ENTRY);
 838 
 839         /*
 840          * The zonepath cannot be ZFS cloned, try to copy the data from
 841          * within the snapshot to the zonepath.
 842          */
 843         if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
 844                 if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
 845                         if (clean_out_clone() != Z_OK)
 846                                 (void) fprintf(stderr,
 847                                     gettext("could not remove the "
 848                                     "software inventory from %s\n"), zonepath);
 849 
 850                 return (err);
 851         }
 852 
 853         if ((err = clone_snap(snap_name, clone_name)) != Z_OK) {
 854                 if (err != Z_NO_ENTRY) {
 855                         /*
 856                          * Cloning the snapshot failed.  Fall back to trying
 857                          * to install the zone by copying from the snapshot.
 858                          */
 859                         if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
 860                                 if (clean_out_clone() != Z_OK)
 861                                         (void) fprintf(stderr,
 862                                             gettext("could not remove the "
 863                                             "software inventory from %s\n"),
 864                                             zonepath);
 865                 } else {
 866                         /*
 867                          * The snapshot is unusable for some reason so restore
 868                          * the zone state to configured since we were unable to
 869                          * actually do anything about getting the zone
 870                          * installed.
 871                          */
 872                         int tmp;
 873 
 874                         if ((tmp = zone_set_state(target_zone,
 875                             ZONE_STATE_CONFIGURED)) != Z_OK) {
 876                                 errno = tmp;
 877                                 zperror2(target_zone,
 878                                     gettext("could not set state"));
 879                         }
 880                 }
 881         }
 882 
 883         return (err);
 884 }
 885 
 886 /*
 887  * Attempt to clone a source_zone to a target zonepath by using a ZFS clone.
 888  */
 889 int
 890 clone_zfs(char *source_zonepath, char *zonepath, char *presnapbuf,
 891     char *postsnapbuf)
 892 {
 893         zfs_handle_t    *zhp;
 894         char            clone_name[MAXPATHLEN];
 895         char            snap_name[MAXPATHLEN];
 896 
 897         /*
 898          * Try to get a zfs handle for the source_zonepath.  If this fails
 899          * the source_zonepath is not ZFS so return an error.
 900          */
 901         if ((zhp = mount2zhandle(source_zonepath)) == NULL)
 902                 return (Z_ERR);
 903 
 904         /*
 905          * Check if there is a file system already mounted on zonepath.  If so,
 906          * we can't clone to the path so we should fall back to copying.
 907          */
 908         if (is_mountpnt(zonepath)) {
 909                 zfs_close(zhp);
 910                 (void) fprintf(stderr,
 911                     gettext("A file system is already mounted on %s,\n"
 912                     "preventing use of a ZFS clone.\n"), zonepath);
 913                 return (Z_ERR);
 914         }
 915 
 916         /*
 917          * Instead of using path2name to get the clone name from the zonepath,
 918          * we could generate a name from the source zone ZFS name.  However,
 919          * this would mean we would create the clone under the ZFS fs of the
 920          * source instead of what the zonepath says.  For example,
 921          *
 922          * source_zonepath              zonepath
 923          * /pl/zones/dev/z1             /pl/zones/deploy/z2
 924          *
 925          * We don't want the clone to be under "dev", we want it under
 926          * "deploy", so that we can leverage the normal attribute inheritance
 927          * that ZFS provides in the fs hierarchy.
 928          */
 929         if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
 930                 zfs_close(zhp);
 931                 return (Z_ERR);
 932         }
 933 
 934         if (take_snapshot(zhp, snap_name, sizeof (snap_name), presnapbuf,
 935             postsnapbuf) != Z_OK) {
 936                 zfs_close(zhp);
 937                 return (Z_ERR);
 938         }
 939         zfs_close(zhp);
 940 
 941         if (clone_snap(snap_name, clone_name) != Z_OK) {
 942                 /* Clean up the snapshot we just took. */
 943                 if ((zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_SNAPSHOT))
 944                     != NULL) {
 945                         if (zfs_unmount(zhp, NULL, 0) == 0)
 946                                 (void) zfs_destroy(zhp, B_FALSE);
 947                         zfs_close(zhp);
 948                 }
 949 
 950                 return (Z_ERR);
 951         }
 952 
 953         (void) printf(gettext("Instead of copying, a ZFS clone has been "
 954             "created for this zone.\n"));
 955 
 956         return (Z_OK);
 957 }
 958 
 959 /*
 960  * Attempt to create a ZFS file system for the specified zonepath.
 961  * We either will successfully create a ZFS file system and get it mounted
 962  * on the zonepath or we don't.  The caller doesn't care since a regular
 963  * directory is used for the zonepath if no ZFS file system is mounted there.
 964  */
 965 void
 966 create_zfs_zonepath(char *zonepath)
 967 {
 968         zfs_handle_t    *zhp;
 969         char            zfs_name[MAXPATHLEN];
 970         nvlist_t        *props = NULL;
 971 
 972         if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK)
 973                 return;
 974 
 975         /* Check if the dataset already exists. */
 976         if ((zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) != NULL) {
 977                 zfs_close(zhp);
 978                 return;
 979         }
 980 
 981         /*
 982          * We turn off zfs SHARENFS and SHARESMB properties on the
 983          * zoneroot dataset in order to prevent the GZ from sharing
 984          * NGZ data by accident.
 985          */
 986         if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
 987             (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
 988             "off") != 0) ||
 989             (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
 990             "off") != 0)) {
 991                 if (props != NULL)
 992                         nvlist_free(props);
 993                 (void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
 994                     "out of memory\n"), zfs_name);
 995         }
 996 
 997         if (zfs_create(g_zfs, zfs_name, ZFS_TYPE_FILESYSTEM, props) != 0 ||
 998             (zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) == NULL) {
 999                 (void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
1000                     "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1001                 nvlist_free(props);
1002                 return;
1003         }
1004 
1005         nvlist_free(props);
1006 
1007         if (zfs_mount(zhp, NULL, 0) != 0) {
1008                 (void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
1009                     "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1010                 (void) zfs_destroy(zhp, B_FALSE);
1011         } else {
1012                 if (chmod(zonepath, S_IRWXU) != 0) {
1013                         (void) fprintf(stderr, gettext("file system %s "
1014                             "successfully created, but chmod %o failed: %s\n"),
1015                             zfs_name, S_IRWXU, strerror(errno));
1016                         (void) destroy_zfs(zonepath);
1017                 } else {
1018                         (void) printf(gettext("A ZFS file system has been "
1019                             "created for this zone.\n"));
1020                 }
1021         }
1022 
1023         zfs_close(zhp);
1024 }
1025 
1026 /*
1027  * If the zonepath is a ZFS file system, attempt to destroy it.  We return Z_OK
1028  * if we were able to zfs_destroy the zonepath, otherwise we return Z_ERR
1029  * which means the caller should clean up the zonepath in the traditional
1030  * way.
1031  */
1032 int
1033 destroy_zfs(char *zonepath)
1034 {
1035         zfs_handle_t    *zhp;
1036         boolean_t       is_clone = B_FALSE;
1037         char            origin[ZFS_MAXPROPLEN];
1038 
1039         if ((zhp = mount2zhandle(zonepath)) == NULL)
1040                 return (Z_ERR);
1041 
1042         if (promote_all_clones(zhp) != 0)
1043                 return (Z_ERR);
1044 
1045         /* Now cleanup any snapshots remaining. */
1046         if (zfs_iter_snapshots(zhp, rm_snap, NULL) != 0) {
1047                 zfs_close(zhp);
1048                 return (Z_ERR);
1049         }
1050 
1051         /*
1052          * We can't destroy the file system if it has still has dependents.
1053          * There shouldn't be any at this point, but we'll double check.
1054          */
1055         if (zfs_iter_dependents(zhp, B_TRUE, has_dependent, NULL) != 0) {
1056                 (void) fprintf(stderr, gettext("zfs destroy %s failed: the "
1057                     "dataset still has dependents\n"), zfs_get_name(zhp));
1058                 zfs_close(zhp);
1059                 return (Z_ERR);
1060         }
1061 
1062         /*
1063          * This might be a clone.  Try to get the snapshot so we can attempt
1064          * to destroy that as well.
1065          */
1066         if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
1067             NULL, 0, B_FALSE) == 0)
1068                 is_clone = B_TRUE;
1069 
1070         if (zfs_unmount(zhp, NULL, 0) != 0) {
1071                 (void) fprintf(stderr, gettext("zfs unmount %s failed: %s\n"),
1072                     zfs_get_name(zhp), libzfs_error_description(g_zfs));
1073                 zfs_close(zhp);
1074                 return (Z_ERR);
1075         }
1076 
1077         if (zfs_destroy(zhp, B_FALSE) != 0) {
1078                 /*
1079                  * If the destroy fails for some reason, try to remount
1080                  * the file system so that we can use "rm -rf" to clean up
1081                  * instead.
1082                  */
1083                 (void) fprintf(stderr, gettext("zfs destroy %s failed: %s\n"),
1084                     zfs_get_name(zhp), libzfs_error_description(g_zfs));
1085                 (void) zfs_mount(zhp, NULL, 0);
1086                 zfs_close(zhp);
1087                 return (Z_ERR);
1088         }
1089 
1090         /*
1091          * If the zone has ever been moved then the mountpoint dir will not be
1092          * cleaned up by the zfs_destroy().  To handle this case try to clean
1093          * it up now but don't worry if it fails, that will be normal.
1094          */
1095         (void) rmdir(zonepath);
1096 
1097         (void) printf(gettext("The ZFS file system for this zone has been "
1098             "destroyed.\n"));
1099 
1100         if (is_clone) {
1101                 zfs_handle_t    *ohp;
1102 
1103                 /*
1104                  * Try to clean up the snapshot that the clone was taken from.
1105                  */
1106                 if ((ohp = zfs_open(g_zfs, origin,
1107                     ZFS_TYPE_SNAPSHOT)) != NULL) {
1108                         if (zfs_iter_dependents(ohp, B_TRUE, has_dependent,
1109                             NULL) == 0 && zfs_unmount(ohp, NULL, 0) == 0)
1110                                 (void) zfs_destroy(ohp, B_FALSE);
1111                         zfs_close(ohp);
1112                 }
1113         }
1114 
1115         zfs_close(zhp);
1116         return (Z_OK);
1117 }
1118 
1119 /*
1120  * Return true if the path is its own zfs file system.  We determine this
1121  * by stat-ing the path to see if it is zfs and stat-ing the parent to see
1122  * if it is a different fs.
1123  */
1124 boolean_t
1125 is_zonepath_zfs(char *zonepath)
1126 {
1127         int res;
1128         char *path;
1129         char *parent;
1130         struct statvfs64 buf1, buf2;
1131 
1132         if (statvfs64(zonepath, &buf1) != 0)
1133                 return (B_FALSE);
1134 
1135         if (strcmp(buf1.f_basetype, "zfs") != 0)
1136                 return (B_FALSE);
1137 
1138         if ((path = strdup(zonepath)) == NULL)
1139                 return (B_FALSE);
1140 
1141         parent = dirname(path);
1142         res = statvfs64(parent, &buf2);
1143         free(path);
1144 
1145         if (res != 0)
1146                 return (B_FALSE);
1147 
1148         if (buf1.f_fsid == buf2.f_fsid)
1149                 return (B_FALSE);
1150 
1151         return (B_TRUE);
1152 }
1153 
1154 /*
1155  * Implement the fast move of a ZFS file system by simply updating the
1156  * mountpoint.  Since it is file system already, we don't have the
1157  * issue of cross-file system copying.
1158  */
1159 int
1160 move_zfs(char *zonepath, char *new_zonepath)
1161 {
1162         int             ret = Z_ERR;
1163         zfs_handle_t    *zhp;
1164 
1165         if ((zhp = mount2zhandle(zonepath)) == NULL)
1166                 return (Z_ERR);
1167 
1168         if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1169             new_zonepath) == 0) {
1170                 /*
1171                  * Clean up the old mount point.  We ignore any failure since
1172                  * the zone is already successfully mounted on the new path.
1173                  */
1174                 (void) rmdir(zonepath);
1175                 ret = Z_OK;
1176         }
1177 
1178         zfs_close(zhp);
1179 
1180         return (ret);
1181 }
1182 
1183 /*
1184  * Validate that the given dataset exists on the system, and that neither it nor
1185  * its children are zvols.
1186  *
1187  * Note that we don't do anything with the 'zoned' property here.  All
1188  * management is done in zoneadmd when the zone is actually rebooted.  This
1189  * allows us to automatically set the zoned property even when a zone is
1190  * rebooted by the administrator.
1191  */
1192 int
1193 verify_datasets(zone_dochandle_t handle)
1194 {
1195         int return_code = Z_OK;
1196         struct zone_dstab dstab;
1197         zfs_handle_t *zhp;
1198         char propbuf[ZFS_MAXPROPLEN];
1199         char source[ZFS_MAXNAMELEN];
1200         zprop_source_t srctype;
1201 
1202         if (zonecfg_setdsent(handle) != Z_OK) {
1203                 /*
1204                  * TRANSLATION_NOTE
1205                  * zfs and dataset are literals that should not be translated.
1206                  */
1207                 (void) fprintf(stderr, gettext("could not verify zfs datasets: "
1208                     "unable to enumerate datasets\n"));
1209                 return (Z_ERR);
1210         }
1211 
1212         while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
1213 
1214                 if ((zhp = zfs_open(g_zfs, dstab.zone_dataset_name,
1215                     ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
1216                         (void) fprintf(stderr, gettext("could not verify zfs "
1217                             "dataset %s: %s\n"), dstab.zone_dataset_name,
1218                             libzfs_error_description(g_zfs));
1219                         return_code = Z_ERR;
1220                         continue;
1221                 }
1222 
1223                 if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf,
1224                     sizeof (propbuf), &srctype, source,
1225                     sizeof (source), 0) == 0 &&
1226                     (srctype == ZPROP_SRC_INHERITED)) {
1227                         (void) fprintf(stderr, gettext("could not verify zfs "
1228                             "dataset %s: mountpoint cannot be inherited\n"),
1229                             dstab.zone_dataset_name);
1230                         return_code = Z_ERR;
1231                         zfs_close(zhp);
1232                         continue;
1233                 }
1234 
1235                 zfs_close(zhp);
1236         }
1237         (void) zonecfg_enddsent(handle);
1238 
1239         return (return_code);
1240 }
1241 
1242 /*
1243  * Verify that the ZFS dataset exists, and its mountpoint
1244  * property is set to "legacy".
1245  */
1246 int
1247 verify_fs_zfs(struct zone_fstab *fstab)
1248 {
1249         zfs_handle_t *zhp;
1250         char propbuf[ZFS_MAXPROPLEN];
1251 
1252         if ((zhp = zfs_open(g_zfs, fstab->zone_fs_special,
1253             ZFS_TYPE_DATASET)) == NULL) {
1254                 (void) fprintf(stderr, gettext("could not verify fs %s: "
1255                     "could not access zfs dataset '%s'\n"),
1256                     fstab->zone_fs_dir, fstab->zone_fs_special);
1257                 return (Z_ERR);
1258         }
1259 
1260         if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
1261                 (void) fprintf(stderr, gettext("cannot verify fs %s: "
1262                     "'%s' is not a file system\n"),
1263                     fstab->zone_fs_dir, fstab->zone_fs_special);
1264                 zfs_close(zhp);
1265                 return (Z_ERR);
1266         }
1267 
1268         if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1269             NULL, NULL, 0, 0) != 0 || strcmp(propbuf, "legacy") != 0) {
1270                 (void) fprintf(stderr, gettext("could not verify fs %s: "
1271                     "zfs '%s' mountpoint is not \"legacy\"\n"),
1272                     fstab->zone_fs_dir, fstab->zone_fs_special);
1273                 zfs_close(zhp);
1274                 return (Z_ERR);
1275         }
1276 
1277         zfs_close(zhp);
1278         return (Z_OK);
1279 }
1280 
1281 /*
1282  * Destroy the specified mnttab structure that was created by mnttab_dup().
1283  * NOTE: The structure's mnt_time field isn't freed.
1284  */
1285 static void
1286 mnttab_destroy(struct mnttab *tabp)
1287 {
1288         assert(tabp != NULL);
1289 
1290         free(tabp->mnt_mountp);
1291         free(tabp->mnt_special);
1292         free(tabp->mnt_fstype);
1293         free(tabp->mnt_mntopts);
1294         free(tabp);
1295 }
1296 
1297 /*
1298  * Duplicate the specified mnttab structure.  The mnt_mountp and mnt_time
1299  * fields aren't duplicated.  This function returns a pointer to the new mnttab
1300  * structure or NULL if an error occurred.  If an error occurs, then this
1301  * function sets errno to reflect the error.  mnttab structures created by
1302  * this function should be destroyed via mnttab_destroy().
1303  */
1304 static struct mnttab *
1305 mnttab_dup(const struct mnttab *srcp)
1306 {
1307         struct mnttab *retval;
1308 
1309         assert(srcp != NULL);
1310 
1311         retval = (struct mnttab *)calloc(1, sizeof (*retval));
1312         if (retval == NULL) {
1313                 errno = ENOMEM;
1314                 return (NULL);
1315         }
1316         if (srcp->mnt_special != NULL) {
1317                 retval->mnt_special = strdup(srcp->mnt_special);
1318                 if (retval->mnt_special == NULL)
1319                         goto err;
1320         }
1321         if (srcp->mnt_fstype != NULL) {
1322                 retval->mnt_fstype = strdup(srcp->mnt_fstype);
1323                 if (retval->mnt_fstype == NULL)
1324                         goto err;
1325         }
1326         retval->mnt_mntopts = (char *)malloc(MAX_MNTOPT_STR * sizeof (char));
1327         if (retval->mnt_mntopts == NULL)
1328                 goto err;
1329         if (srcp->mnt_mntopts != NULL) {
1330                 if (strlcpy(retval->mnt_mntopts, srcp->mnt_mntopts,
1331                     MAX_MNTOPT_STR * sizeof (char)) >= MAX_MNTOPT_STR *
1332                     sizeof (char)) {
1333                         mnttab_destroy(retval);
1334                         errno = EOVERFLOW; /* similar to mount(2) behavior */
1335                         return (NULL);
1336                 }
1337         } else {
1338                 retval->mnt_mntopts[0] = '\0';
1339         }
1340         return (retval);
1341 
1342 err:
1343         mnttab_destroy(retval);
1344         errno = ENOMEM;
1345         return (NULL);
1346 }
1347 
1348 /*
1349  * Determine whether the specified ZFS dataset's mountpoint property is set
1350  * to "legacy".  If the specified dataset does not have a legacy mountpoint,
1351  * then the string pointer to which the mountpoint argument points is assigned
1352  * a dynamically-allocated string containing the dataset's mountpoint
1353  * property.  If the dataset's mountpoint property is "legacy" or a libzfs
1354  * error occurs, then the string pointer to which the mountpoint argument
1355  * points isn't modified.
1356  *
1357  * This function returns B_TRUE if it doesn't encounter any fatal errors.
1358  * It returns B_FALSE if it encounters a fatal error and sets errno to the
1359  * appropriate error code.
1360  */
1361 static boolean_t
1362 get_zfs_non_legacy_mountpoint(const char *dataset_name, char **mountpoint)
1363 {
1364         zfs_handle_t *zhp;
1365         char propbuf[ZFS_MAXPROPLEN];
1366 
1367         assert(dataset_name != NULL);
1368         assert(mountpoint != NULL);
1369 
1370         if ((zhp = zfs_open(g_zfs, dataset_name, ZFS_TYPE_DATASET)) == NULL) {
1371                 errno = EINVAL;
1372                 return (B_FALSE);
1373         }
1374         if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1375             NULL, NULL, 0, 0) != 0) {
1376                 zfs_close(zhp);
1377                 errno = EINVAL;
1378                 return (B_FALSE);
1379         }
1380         zfs_close(zhp);
1381         if (strcmp(propbuf, "legacy") != 0) {
1382                 if ((*mountpoint = strdup(propbuf)) == NULL) {
1383                         errno = ENOMEM;
1384                         return (B_FALSE);
1385                 }
1386         }
1387         return (B_TRUE);
1388 }
1389 
1390 
1391 /*
1392  * This zonecfg_find_mounts() callback records information about mounts of
1393  * interest in a zonepath.  It also tallies the number of zone
1394  * root overlay mounts and the number of unexpected mounts found.
1395  * This function outputs errors using zerror() if it finds unexpected
1396  * mounts.  cookiep should point to an initialized zone_mounts_t structure.
1397  *
1398  * This function returns zero on success and a nonzero value on failure.
1399  */
1400 static int
1401 zone_mounts_cb(const struct mnttab *mountp, void *cookiep)
1402 {
1403         zone_mounts_t *mounts;
1404         const char *zone_mount_dir;
1405 
1406         assert(mountp != NULL);
1407         assert(cookiep != NULL);
1408 
1409         mounts = (zone_mounts_t *)cookiep;
1410         zone_mount_dir = mountp->mnt_mountp + mounts->zonepath_len;
1411         if (strcmp(zone_mount_dir, "/root") == 0) {
1412                 /*
1413                  * Check for an overlay mount.  If we already detected a /root
1414                  * mount, then the current mount must be an overlay mount.
1415                  */
1416                 if (mounts->root_mnttab != NULL) {
1417                         mounts->num_root_overlay_mounts++;
1418                         return (0);
1419                 }
1420 
1421                 /*
1422                  * Store the root mount's mnttab information in the
1423                  * zone_mounts_t structure for future use.
1424                  */
1425                 if ((mounts->root_mnttab = mnttab_dup(mountp)) == NULL) {
1426                         zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1427                         return (-1);
1428                 }
1429 
1430                 /*
1431                  * Determine if the filesystem is a ZFS filesystem with a
1432                  * non-legacy mountpoint.  If it is, then set the root
1433                  * filesystem's mnttab's mnt_mountp field to a non-NULL
1434                  * value, which will serve as a flag to indicate this special
1435                  * condition.
1436                  */
1437                 if (strcmp(mountp->mnt_fstype, MNTTYPE_ZFS) == 0 &&
1438                     get_zfs_non_legacy_mountpoint(mountp->mnt_special,
1439                     &mounts->root_mnttab->mnt_mountp) != B_TRUE) {
1440                         zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1441                         return (-1);
1442                 }
1443         } else {
1444                 /*
1445                  * An unexpected mount was found.  Notify the user.
1446                  */
1447                 if (mounts->num_unexpected_mounts == 0)
1448                         zerror(gettext("These file systems are mounted on "
1449                             "subdirectories of %s.\n"), mounts->zonepath);
1450                 mounts->num_unexpected_mounts++;
1451                 (void) zfm_print(mountp, NULL);
1452         }
1453         return (0);
1454 }
1455 
1456 /*
1457  * Initialize the specified zone_mounts_t structure for the given zonepath.
1458  * If this function succeeds, it returns zero and the specified zone_mounts_t
1459  * structure contains information about mounts in the specified zonepath.
1460  * The function returns a nonzero value if it fails.  The zone_mounts_t
1461  * structure doesn't need be destroyed via zone_mounts_destroy() if this
1462  * function fails.
1463  */
1464 int
1465 zone_mounts_init(zone_mounts_t *mounts, const char *zonepath)
1466 {
1467         assert(mounts != NULL);
1468         assert(zonepath != NULL);
1469 
1470         bzero(mounts, sizeof (*mounts));
1471         if ((mounts->zonepath = strdup(zonepath)) == NULL) {
1472                 zerror(gettext("the process ran out of memory while checking "
1473                     "for mounts in zonepath %s."), zonepath);
1474                 return (-1);
1475         }
1476         mounts->zonepath_len = strlen(zonepath);
1477         if (zonecfg_find_mounts((char *)zonepath, zone_mounts_cb, mounts) ==
1478             -1) {
1479                 zerror(gettext("an error occurred while checking for mounts "
1480                     "in zonepath %s."), zonepath);
1481                 zone_mounts_destroy(mounts);
1482                 return (-1);
1483         }
1484         return (0);
1485 }
1486 
1487 /*
1488  * Destroy the memory used by the specified zone_mounts_t structure's fields.
1489  * This function doesn't free the memory occupied by the structure itself
1490  * (i.e., it doesn't free the parameter).
1491  */
1492 void
1493 zone_mounts_destroy(zone_mounts_t *mounts)
1494 {
1495         assert(mounts != NULL);
1496 
1497         free(mounts->zonepath);
1498         if (mounts->root_mnttab != NULL)
1499                 mnttab_destroy(mounts->root_mnttab);
1500 }
1501 
1502 /*
1503  * Mount a moving zone's root filesystem (if it had a root filesystem mount
1504  * prior to the move) using the specified zonepath.  mounts should refer to
1505  * the zone_mounts_t structure describing the zone's mount information.
1506  *
1507  * This function returns zero if the mount succeeds and a nonzero value
1508  * if it doesn't.
1509  */
1510 int
1511 zone_mount_rootfs(zone_mounts_t *mounts, const char *zonepath)
1512 {
1513         char zoneroot[MAXPATHLEN];
1514         struct mnttab *mtab;
1515         int flags;
1516 
1517         assert(mounts != NULL);
1518         assert(zonepath != NULL);
1519 
1520         /*
1521          * If there isn't a root filesystem, then don't do anything.
1522          */
1523         mtab = mounts->root_mnttab;
1524         if (mtab == NULL)
1525                 return (0);
1526 
1527         /*
1528          * Determine the root filesystem's new mountpoint.
1529          */
1530         if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1531             sizeof (zoneroot)) {
1532                 zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1533                 return (-1);
1534         }
1535 
1536         /*
1537          * If the root filesystem is a non-legacy ZFS filesystem (i.e., if it's
1538          * mnt_mountp field is non-NULL), then make the filesystem's new
1539          * mount point its mountpoint property and mount the filesystem.
1540          */
1541         if (mtab->mnt_mountp != NULL) {
1542                 zfs_handle_t *zhp;
1543 
1544                 if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1545                     ZFS_TYPE_DATASET)) == NULL) {
1546                         zerror(gettext("could not get ZFS handle for the zone's"
1547                             " root filesystem"));
1548                         return (-1);
1549                 }
1550                 if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1551                     zoneroot) != 0) {
1552                         zerror(gettext("could not modify zone's root "
1553                             "filesystem's mountpoint property"));
1554                         zfs_close(zhp);
1555                         return (-1);
1556                 }
1557                 if (zfs_mount(zhp, mtab->mnt_mntopts, 0) != 0) {
1558                         zerror(gettext("unable to mount zone root %s: %s"),
1559                             zoneroot, libzfs_error_description(g_zfs));
1560                         if (zfs_prop_set(zhp,
1561                             zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1562                             mtab->mnt_mountp) != 0)
1563                                 zerror(gettext("unable to restore zone's root "
1564                                     "filesystem's mountpoint property"));
1565                         zfs_close(zhp);
1566                         return (-1);
1567                 }
1568                 zfs_close(zhp);
1569                 return (0);
1570         }
1571 
1572         /*
1573          * The root filesystem is either a legacy-mounted ZFS filesystem or
1574          * a non-ZFS filesystem.  Use mount(2) to mount the root filesystem.
1575          */
1576         if (mtab->mnt_mntopts != NULL)
1577                 flags = MS_OPTIONSTR;
1578         else
1579                 flags = 0;
1580         if (mount(mtab->mnt_special, zoneroot, flags, mtab->mnt_fstype, NULL, 0,
1581             mtab->mnt_mntopts, MAX_MNTOPT_STR * sizeof (char)) != 0) {
1582                 flags = errno;
1583                 zerror(gettext("unable to mount zone root %s: %s"), zoneroot,
1584                     strerror(flags));
1585                 return (-1);
1586         }
1587         return (0);
1588 }
1589 
1590 /*
1591  * Unmount a moving zone's root filesystem (if such a mount exists) using the
1592  * specified zonepath.  mounts should refer to the zone_mounts_t structure
1593  * describing the zone's mount information.  If force is B_TRUE, then if the
1594  * unmount fails, then the function will try to forcibly unmount the zone's root
1595  * filesystem.
1596  *
1597  * This function returns zero if the unmount (forced or otherwise) succeeds;
1598  * otherwise, it returns a nonzero value.
1599  */
1600 int
1601 zone_unmount_rootfs(zone_mounts_t *mounts, const char *zonepath,
1602     boolean_t force)
1603 {
1604         char zoneroot[MAXPATHLEN];
1605         struct mnttab *mtab;
1606         int err;
1607 
1608         assert(mounts != NULL);
1609         assert(zonepath != NULL);
1610 
1611         /*
1612          * If there isn't a root filesystem, then don't do anything.
1613          */
1614         mtab = mounts->root_mnttab;
1615         if (mtab == NULL)
1616                 return (0);
1617 
1618         /*
1619          * Determine the root filesystem's mountpoint.
1620          */
1621         if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1622             sizeof (zoneroot)) {
1623                 zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1624                 return (-1);
1625         }
1626 
1627         /*
1628          * If the root filesystem is a non-legacy ZFS fileystem, then unmount
1629          * the filesystem via libzfs.
1630          */
1631         if (mtab->mnt_mountp != NULL) {
1632                 zfs_handle_t *zhp;
1633 
1634                 if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1635                     ZFS_TYPE_DATASET)) == NULL) {
1636                         zerror(gettext("could not get ZFS handle for the zone's"
1637                             " root filesystem"));
1638                         return (-1);
1639                 }
1640                 if (zfs_unmount(zhp, zoneroot, 0) != 0) {
1641                         if (force && zfs_unmount(zhp, zoneroot, MS_FORCE) ==
1642                             0) {
1643                                 zfs_close(zhp);
1644                                 return (0);
1645                         }
1646                         zerror(gettext("unable to unmount zone root %s: %s"),
1647                             zoneroot, libzfs_error_description(g_zfs));
1648                         zfs_close(zhp);
1649                         return (-1);
1650                 }
1651                 zfs_close(zhp);
1652                 return (0);
1653         }
1654 
1655         /*
1656          * Use umount(2) to unmount the root filesystem.  If this fails, then
1657          * forcibly unmount it if the force flag is set.
1658          */
1659         if (umount(zoneroot) != 0) {
1660                 if (force && umount2(zoneroot, MS_FORCE) == 0)
1661                         return (0);
1662                 err = errno;
1663                 zerror(gettext("unable to unmount zone root %s: %s"), zoneroot,
1664                     strerror(err));
1665                 return (-1);
1666         }
1667         return (0);
1668 }
1669 
1670 int
1671 init_zfs(void)
1672 {
1673         if ((g_zfs = libzfs_init()) == NULL) {
1674                 (void) fprintf(stderr, gettext("failed to initialize ZFS "
1675                     "library\n"));
1676                 return (Z_ERR);
1677         }
1678 
1679         return (Z_OK);
1680 }