diff options
Diffstat (limited to 'usr/src/lib/brand/jcommon')
-rw-r--r-- | usr/src/lib/brand/jcommon/Makefile | 28 | ||||
-rw-r--r-- | usr/src/lib/brand/jcommon/README.smf | 107 | ||||
-rw-r--r-- | usr/src/lib/brand/jcommon/cattach | 63 | ||||
-rw-r--r-- | usr/src/lib/brand/jcommon/cdetach | 50 | ||||
-rw-r--r-- | usr/src/lib/brand/jcommon/cinstall | 183 | ||||
-rw-r--r-- | usr/src/lib/brand/jcommon/cuninstall | 74 | ||||
-rw-r--r-- | usr/src/lib/brand/jcommon/libhooks.ksh | 86 | ||||
-rw-r--r-- | usr/src/lib/brand/jcommon/poststate | 34 | ||||
-rw-r--r-- | usr/src/lib/brand/jcommon/prestate | 34 | ||||
-rwxr-xr-x | usr/src/lib/brand/jcommon/query | 52 | ||||
-rw-r--r-- | usr/src/lib/brand/jcommon/statechange | 906 |
11 files changed, 1617 insertions, 0 deletions
diff --git a/usr/src/lib/brand/jcommon/Makefile b/usr/src/lib/brand/jcommon/Makefile new file mode 100644 index 0000000000..59bc0de644 --- /dev/null +++ b/usr/src/lib/brand/jcommon/Makefile @@ -0,0 +1,28 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# Copyright 2020 Joyent, Inc. +# + +PROGS = cattach cdetach cinstall cuninstall libhooks.ksh \ + poststate prestate query statechange +BRAND = jcommon +FILEMODE = 0444 + +include ../Makefile.brand + +clean: + +clobber: + $(RM) $(ROOTPROGS) + +all: + +install: $(ROOTPROGS) diff --git a/usr/src/lib/brand/jcommon/README.smf b/usr/src/lib/brand/jcommon/README.smf new file mode 100644 index 0000000000..64cd3ade4f --- /dev/null +++ b/usr/src/lib/brand/jcommon/README.smf @@ -0,0 +1,107 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2015 Joyent, Inc. +# + +There are a few rather brand specific issues related to how SMF works inside of +them. This README describes how the different pieces interact and where they are +defined. Note that this README only applies to the Joyent-style sparse zones. It +does not apply to the KVM brand or the traditional ipkg and S10 branded zones. + +# +# Per-brand manifests file +# + +Each brand has a file called `manifests`. This file lists the set of manifests +that the brand cares about being available to the zone. It is formatted as: + +<manifest_name> <enabled | disabled> + +Examples are: + +network/smb/client.xml disabled +network/ssh.xml enabled + +The use of enabled or disabled determines the default disposition of the service +when it is imported. + +This list is used in various places throughout the rest of the system for +determining what shows up in the SMF repositories by default and what shows up +in /lib/svc/manifest. + +# +# Files in /lib/svc/manifest +# + +/lib/svc/manifest is part of the sparse filesystem that gets placed into every +zone. Unlike /usr, /lib/svc/manifest is brand specific. The zones service +(svc:/system/zones:default) is responsible for creating the per-brand +/lib/svc/manifest directories and they live in /zones/manifests/<brand-name>. +This brand specific directory is lofs-mounted read-only into each zone. + +The presence of the enabled and disabled option in the brand's manifests file +determine whether or not the service is enabled by default when imported. The +xml file is changed to match the setting. + +# +# Initial SMF repositories +# + +SMF in the minimal brand works differently than it does in the normal Joyent +Brand when it comes to specifying the initial services that are inside of the +dataset and what files are in the SMF repository. + +SMF has the notion of a `seed repository`. This repository is the initial one +that is used or copied for new zones. This repository contains various services +already imported, whether or not they are enabled or disabled, and the various +service properties. + +The traditional `joyent` brand gets this from the dataset itself. In other words, +the database is already populated with the proper SMF state. + +In the `joyent-minimal` brand we handle this differently. We want to be able to +reuse the datasets that exist but not be stuck with their rather large seed +repositories that contain many things which are harmful in the minimal context, +particularly manifest import (both the early and normal kind). To handle this +the joyent-minimal brand defines a seed repository of its own that gets +installed at zone creation time and replaces any existing repository. + +This seed repository is generated using the `svc.configd-native` and +`svccfg-native` binaries. Every manifest listed in the brand's manifests file is +included. + +Manifests available to the `joyent` brand not imported into the database are +available for manual import in /lib/svc/manifest. With the minimal brand, only +the bare minimum number of manifests should be imported. + +# +# Using non-imported manifests +# + +To use one of the manifests that exists but hasn't been imported is pretty easy. +At some point in time after the initial creation of the zone (during the first +boot setup script for example), you can import the service. For example, if you +were going to import the cron service you would run: + +svccfg import /lib/svc/manifest/system/cron.xml + +Next, you need to potentially enable the service depending on the default +disposition of the service. You enable the service by running: + +svcadm enable -s <service> + +Adding the `-s` flag causes the enabling to be synchronous. If you do not +include the flag then it will poke svc.startd to enable the service and return. +If the service is already enabled by default, then this is safe to run and it +won't change anything. It is safer to just always enable or disable the service +after importing it based on your needs. diff --git a/usr/src/lib/brand/jcommon/cattach b/usr/src/lib/brand/jcommon/cattach new file mode 100644 index 0000000000..4f2e65ad83 --- /dev/null +++ b/usr/src/lib/brand/jcommon/cattach @@ -0,0 +1,63 @@ +#!/bin/ksh -p +# +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +unset LD_LIBRARY_PATH +PATH=/usr/bin:/usr/sbin +export PATH + +. /usr/lib/brand/shared/common.ksh + +ZONENAME="" +ZONEPATH="" +# Default to 10GB diskset quota +ZQUOTA=10 + +while getopts "R:t:U:q:z:" opt +do + case "$opt" in + R) ZONEPATH="$OPTARG";; + q) ZQUOTA="$OPTARG";; + z) ZONENAME="$OPTARG";; + *) printf "$m_usage\n" + exit $ZONE_SUBPROC_USAGE;; + esac +done +shift OPTIND-1 + +if [[ -z $ZONEPATH || -z $ZONENAME ]]; then + print -u2 "Brand error: No zone path or name" + exit $ZONE_SUBPROC_USAGE +fi + +# The dataset quota must be a number. +case $ZQUOTA in *[!0-9]*) + print -u2 "Brand error: The quota $ZQUOTA is not a number" + exit $ZONE_SUBPROC_USAGE;; +esac + +ZROOT=$ZONEPATH/root + +# Get the dataset of the parent directory of the zonepath. +dname=${ZONEPATH%/*} +bname=${ZONEPATH##*/} +PDS_NAME=`mount | nawk -v p=$dname '{if ($1 == p) print $3}'` +[ -z "$PDS_NAME" ] && \ + print -u2 "Brand error: missing parent ZFS dataset for $dname" + +jcommon_attach_hook + +exit $ZONE_SUBPROC_OK diff --git a/usr/src/lib/brand/jcommon/cdetach b/usr/src/lib/brand/jcommon/cdetach new file mode 100644 index 0000000000..4b67b31fc5 --- /dev/null +++ b/usr/src/lib/brand/jcommon/cdetach @@ -0,0 +1,50 @@ +#!/bin/ksh -p +# +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +unset LD_LIBRARY_PATH +PATH=/usr/bin:/usr/sbin +export PATH + +. /usr/lib/brand/shared/common.ksh + +ZONENAME="" +ZONEPATH="" + +while getopts "R:t:U:z:" opt +do + case "$opt" in + R) ZONEPATH="$OPTARG";; + z) ZONENAME="$OPTARG";; + *) printf "$m_usage\n" + exit $ZONE_SUBPROC_USAGE;; + esac +done +shift OPTIND-1 + +if [[ -z $ZONEPATH || -z $ZONENAME ]]; then + print -u2 "Brand error: No zone path or name" + exit $ZONE_SUBPROC_USAGE +fi + +# +# We just need a brand hook so that we can bypass the mount checking +# that zoneadm does. This is needed because we have the cores dataset +# mounted under {zonepath}/root. +# +cp /etc/zones/${ZONENAME}.xml ${ZONEPATH}/SUNWdetached.xml + +exit $ZONE_SUBPROC_OK diff --git a/usr/src/lib/brand/jcommon/cinstall b/usr/src/lib/brand/jcommon/cinstall new file mode 100644 index 0000000000..98b9322b5f --- /dev/null +++ b/usr/src/lib/brand/jcommon/cinstall @@ -0,0 +1,183 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 2016, Joyent, Inc. All rights reserved. +# Use is subject to license terms. +# + +unset LD_LIBRARY_PATH +PATH=/usr/bin:/usr/sbin +export PATH + +. /usr/lib/brand/shared/common.ksh + +REPROVISONING="" +ZONENAME="" +ZONEPATH="" +# Default to 10GB diskset quota +ZQUOTA=10 +ZVOL_NAME=`/usr/bin/uuid -v 4` + +# +# The following are the list of features that a corresponding brand may +# enable. If they wish to do so, then they must set the following flags +# to values such that [[ -z $flag ]] is true. The following are the +# currently supported flags: +# +# o jst_reprovision - Brand supports reprovision +# o jst_tmplopt - Template image optional +# + +function fixup_images +{ + # New imgadm renames the dataset's snapshot at import to @final for us + # and when it exists, we use that. However, when it does not exist we + # still use the old method of creating a zones/<uuid>@<uuid> snapshot + # so we can support old datasets. + exists=$(zfs list -Ho name ${PDS_NAME}/${TMPLZONE}@final 2>&1) + if [[ $? == 0 && ${exists} == "${PDS_NAME}/${TMPLZONE}@final" ]]; then + zfs clone -o devices=off -F ${QUOTA_ARG} ${PDS_NAME}/${TMPLZONE}@final \ + ${PDS_NAME}/${bname} || fatal "failed to clone zone dataset" + elif [[ ${exists} =~ "dataset does not exist" ]]; then + zfs snapshot ${PDS_NAME}/${TMPLZONE}@${bname} + zfs clone -o devices=off -F ${QUOTA_ARG} ${PDS_NAME}/${TMPLZONE}@${bname} \ + ${PDS_NAME}/${bname} || fatal "failed to clone zone dataset" + else + fatal "Unable to determine snapshot for ${PDS_NAME}/${TMPLZONE}" + fi +} + +while getopts "rR:t:U:q:z:" opt +do + case "$opt" in + r) + set -x + if [[ -z "$jst_reprovision" ]]; then + print -u2 "unsupported reprovision requested" + exit $ZONE_SUBPROC_USAGE + fi + REPROVISIONING="true" + ;; + R) ZONEPATH="$OPTARG";; + t) TMPLZONE="$OPTARG";; + # UUID is only used in the postinstall script + U) UUID="$OPTARG";; + q) ZQUOTA="$OPTARG";; + z) ZONENAME="$OPTARG";; + *) printf "$m_usage\n" + exit $ZONE_SUBPROC_USAGE;; + esac +done +shift OPTIND-1 + +# +# IMPORTANT: all actions below need to consider reprovision. If the action +# modifies files in the zoneroot itself, it should be run on reprovision +# any other changes to the zone or dataset should not be done on reprovision. +# + +if [[ -z ${REPROVISIONING} \ + && -n $(zonecfg -z "${ZONENAME}" info attr name=transition \ + | grep "value: receiving:") ]]; then + + # Here we're doing an install for a received zone, the dataset should have + # already been created. + exit $ZONE_SUBPROC_OK +fi + +if [[ -z $ZONEPATH || -z $ZONENAME ]]; then + print -u2 "Brand error: No zone path or name" + exit $ZONE_SUBPROC_USAGE +fi + +if [[ -z ${REPROVISIONING} ]]; then + # The install may requires a template zone. + if [[ -z $TMPLZONE && -z "$jst_tmplopt" ]]; then + print -u2 "Brand error: a zone template is required" + exit $ZONE_SUBPROC_USAGE + fi + + # The dataset quota must be a number. + case $ZQUOTA in *[!0-9]*) + print -u2 "Brand error: The quota $ZQUOTA is not a number" + exit $ZONE_SUBPROC_USAGE;; + esac +fi + +ZROOT=$ZONEPATH/root + +if [[ -z ${REPROVISIONING} ]]; then + # Get the dataset of the parent directory of the zonepath. + dname=${ZONEPATH%/*} + bname=${ZONEPATH##*/} + PDS_NAME=`mount | nawk -v p=$dname '{if ($1 == p) print $3}'` + [ -z "$PDS_NAME" ] && \ + print -u2 "Brand error: missing parent ZFS dataset for $dname" + + # We expect that zoneadm was invoked with '-x nodataset', so it won't have + # created the dataset. + + QUOTA_ARG= + if [[ ${ZQUOTA} != "0" ]]; then + QUOTA_ARG="-o quota=${ZQUOTA}g" + fi + + # + # Some zone brands (KVM) optionally have a top level dataset. If + # they don't have one, we need to set the quota on the data + # disk. Otherwise, we set it on the normal top level dataset. + # + if [[ -z ${TMPLZONE} ]]; then + zfs set quota=${ZQUOTA}g ${PDS_NAME}/${bname} + else + fixup_images + fi + +fi + +# The rest should be run when REPROVISIONING is set as well. + +# Make sure zoneinit is setup to use -o xtrace, this handles old datasets where +# is not yet enabled by default. +if [[ -f ${ZROOT}/root/zoneinit && \ + -z $(grep "^set -o xtrace" ${ZROOT}/root/zoneinit) ]]; then + sed -i "" -e "s/^#set -o xtrace/set -o xtrace/" ${ZROOT}/root/zoneinit +fi + +if [ ! -d ${ZONEPATH}/config ]; then + mkdir -p ${ZONEPATH}/config + chmod 755 ${ZONEPATH}/config +fi + +if [ ! -d ${ZROOT}/tmp ]; then + mkdir -p ${ZROOT}/tmp + chmod 1777 ${ZROOT}/tmp +fi + +# make /var/svc for the 'provisioning file' +if [ ! -d ${ZROOT}/var/svc ]; then + mkdir -p ${ZROOT}/var/svc + chmod 0755 ${ZROOT}/var/svc +fi + +jcommon_attach_hook + +exit $ZONE_SUBPROC_OK diff --git a/usr/src/lib/brand/jcommon/cuninstall b/usr/src/lib/brand/jcommon/cuninstall new file mode 100644 index 0000000000..9be35bc7a9 --- /dev/null +++ b/usr/src/lib/brand/jcommon/cuninstall @@ -0,0 +1,74 @@ +#!/bin/ksh -p +# +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2016 Joyent, Inc. All rights reserved. +# + +unset LD_LIBRARY_PATH +PATH=/usr/bin:/usr/sbin +export PATH + +. /usr/lib/brand/shared/common.ksh + +ZONENAME="" +ZONEPATH="" + +while getopts "FR:z:" opt +do + case "$opt" in + F) ;; + R) ZONEPATH="$OPTARG";; + z) ZONENAME="$OPTARG";; + *) printf "$m_usage\n" + exit $ZONE_SUBPROC_USAGE;; + esac +done +shift OPTIND-1 + +if [[ -z $ZONEPATH || -z $ZONENAME ]]; then + print -u2 "Brand error: No zone path or name" + exit $ZONE_SUBPROC_USAGE +fi + +# Get the dataset of the parent directory of the zonepath. +dname=${ZONEPATH%/*} +bname=${ZONEPATH##*/} +PDS_NAME=`mount | nawk -v p=$dname '{if ($1 == p) print $3}'` +if [[ -z "$PDS_NAME" ]]; then + print -u2 "Brand error: missing parent ZFS dataset for $dname" + exit $ZONE_SUBPROC_USAGE +fi + +# check if the origin is not an @final dataset, if not, we'll destroy it +ORIGIN=`zfs get -H -ovalue origin $PDS_NAME/$bname | grep -v "@final$"` + +zfs destroy -rF $PDS_NAME/cores/$bname +zfs destroy -rF $PDS_NAME/$bname +if [[ $? != 0 ]]; then + echo "processes in zone: " + fuser ${ZONEPATH} + # Since we are destroying the zone, we don't want to leave a zoneroot behind + # just because something couldn't be umounted. So we'll also force the + # umount with the 'f' option here. + zfs destroy -rfF $PDS_NAME/$bname +fi + +[[ -n ${ORIGIN} && ${ORIGIN} != "-" ]] && zfs destroy -F $ORIGIN + +rm -rf $ZONEPATH +rm -rf /var/zonecontrol/${ZONENAME} + +jcommon_uninstall_hook + +exit $ZONE_SUBPROC_OK diff --git a/usr/src/lib/brand/jcommon/libhooks.ksh b/usr/src/lib/brand/jcommon/libhooks.ksh new file mode 100644 index 0000000000..913d81899b --- /dev/null +++ b/usr/src/lib/brand/jcommon/libhooks.ksh @@ -0,0 +1,86 @@ +#!/bin/ksh -p +# +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +# +# This file contains various hooks that are used by more than a single +# brand. This file should be included by the brand-specific files. +# + +jattach_kvm_final_setup() +{ + ZRAM=$(zonecfg -z ${ZONENAME} info attr name=ram | \ + grep "value: " | cut -d ':' -f2 | tr -d ' ') + + if [[ -z ${ZRAM} ]]; then + echo "Unable to find RAM value for KVM VM" + exit $ZONE_SUBPROC_FATAL + fi + + # 100G unless the VM has 80G or more DRAM, in which case: DRAM + 20G. + CORE_QUOTA=102400 + if [[ ${ZRAM} -gt 81920 ]]; then + CORE_QUOTA=$((${ZRAM} + 20480)) + fi + + # The cores quota exists to control run-away zones. As such we make it + # such that it will protect the system from a single run-away, but + # still allow us to get most cores. + rm -rf $ZONEPATH/cores + zfs create -o quota=${CORE_QUOTA}m -o mountpoint=/${PDS_NAME}/$bname/cores \ + ${PDS_NAME}/cores/$bname +} + +jattach_zone_final_setup() +{ + if [[ -z ${REPROVISIONING} ]]; then + # The cores quota exists to control run-away zones. As such we make it + # such that it will protect the system from a single run-away, but + # still allow us to get most cores. 100G seems good enough based on + # samples from JPC. + rm -rf $ZONEPATH/cores + CORE_QUOTA=102400 + zfs create -o quota=${CORE_QUOTA}m -o mountpoint=/${PDS_NAME}/$bname/cores \ + ${PDS_NAME}/cores/$bname + + chmod 700 $ZONEPATH + fi + + egrep -s "netcfg:" $ZROOT/etc/passwd + if (( $? != 0 )); then + echo "netcfg:x:17:65:Network Configuration Admin:/:" \ + >> $ZROOT/etc/passwd + echo "netcfg:*LK*:::::::" >> $ZROOT/etc/shadow + fi + egrep -s "netadm:" $ZROOT/etc/group + (( $? != 0 )) && echo "netadm::65:" >> $ZROOT/etc/group + + # /etc/svc/profile needs to be a directory with some contents which we + # can get from the template. The early manifest import svc + # (lib/svc/method/manifest-import) copies some symlinks from the + # template's var/svc/profile dir and we need to make sure those are + # pointing at the right files and not left dangling. + ZPROFILE=$ZROOT/etc/svc/profile + if [ ! -d $ZPROFILE ]; then + mkdir $ZPROFILE + cp -p $ZROOT/var/svc/profile/generic_limited_net.xml $ZPROFILE + cp -p $ZROOT/var/svc/profile/inetd_generic.xml $ZPROFILE + cp -p $ZROOT/var/svc/profile/ns_dns.xml $ZPROFILE + cp -p $ZROOT/var/svc/profile/platform_none.xml $ZPROFILE + fi + + touch $ZROOT/var/log/courier.log +} diff --git a/usr/src/lib/brand/jcommon/poststate b/usr/src/lib/brand/jcommon/poststate new file mode 100644 index 0000000000..4e4b1207ed --- /dev/null +++ b/usr/src/lib/brand/jcommon/poststate @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +unset LD_LIBRARY_PATH +PATH=/usr/bin:/usr/sbin +export PATH + +if [[ -n $_ZONEADMD_brand_debug ]]; then + logfile=/var/log/zone_bh.$1 + date >>$logfile + echo "zone $1 post-state-change $3 $4" >>$logfile + ksh -x /usr/lib/brand/$ps_brand/statechange "post" $@ 2>>$logfile + res=$? + echo "zone $1 post-state-change result $res" >>$logfile +else + /usr/lib/brand/$ps_brand/statechange "post" $@ + res=$? +fi + +exit $res diff --git a/usr/src/lib/brand/jcommon/prestate b/usr/src/lib/brand/jcommon/prestate new file mode 100644 index 0000000000..a61af89752 --- /dev/null +++ b/usr/src/lib/brand/jcommon/prestate @@ -0,0 +1,34 @@ +#!/bin/ksh -p +# +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2014 Joyent, Inc. All rights reserved. +# + +unset LD_LIBRARY_PATH +PATH=/usr/bin:/usr/sbin +export PATH + +if [[ -n $_ZONEADMD_brand_debug ]]; then + logfile=/var/log/zone_bh.$1 + date >>$logfile + echo "zone $1 pre-state-change $3 $4" >>$logfile + ksh -x /usr/lib/brand/$ps_brand/statechange "pre" $@ 2>>$logfile + res=$? + echo "zone $1 pre-state-change result $res" >>$logfile +else + /usr/lib/brand/$ps_brand/statechange "pre" $@ + res=$? +fi + +exit $res diff --git a/usr/src/lib/brand/jcommon/query b/usr/src/lib/brand/jcommon/query new file mode 100755 index 0000000000..70ea39d8e2 --- /dev/null +++ b/usr/src/lib/brand/jcommon/query @@ -0,0 +1,52 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. +# + +PATH=/usr/bin:/usr/sbin +export PATH + +. /usr/lib/brand/shared/common.ksh + +zonename=$1 +zonepath=$2 +cmd=$3 + +if [ $3 == "env" ]; then + # + # zoneadmd reads one (arbitrary length) line of input from the query + # hook. If there is more than one environment variable to pass back, + # delimit each one with tabs. zoneadmd will split the line at the tabs + # and set each key/value pair in its environment. + # + # Currently, only _ZONEADMD_ZPOOL is used to set the %P substitution + # for the brand configuration. + # + entry=$(svccfg -s smartdc/init listprop '*/zpool') + if [ -n "$entry" ]; then + val=${entry##* * } + [ -n "$val" ] && echo "_ZONEADMD_ZPOOL=/${val}\c" + fi +fi + +exit $ZONE_SUBPROC_OK diff --git a/usr/src/lib/brand/jcommon/statechange b/usr/src/lib/brand/jcommon/statechange new file mode 100644 index 0000000000..58eae5ab5a --- /dev/null +++ b/usr/src/lib/brand/jcommon/statechange @@ -0,0 +1,906 @@ +#!/bin/ksh -p +# +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2018 Joyent, Inc. All rights reserved. +# + +unset LD_LIBRARY_PATH +PATH=/usr/bin:/usr/sbin +export PATH + +. /lib/sdc/config.sh + +# subcommand: +# pre +# post + +# state +# ZONE_STATE_CONFIGURED 0 (script will never see this) +# ZONE_STATE_INCOMPLETE 1 (script will never see this) +# ZONE_STATE_INSTALLED 2 +# ZONE_STATE_READY 3 +# ZONE_STATE_RUNNING 4 +# ZONE_STATE_SHUTTING_DOWN 5 +# ZONE_STATE_DOWN 6 +# ZONE_STATE_MOUNTED 7 + +# cmd +# +# ready 0 +# boot 1 +# forceboot 2 +# reboot 3 +# halt 4 +# uninstalling 5 +# mount 6 +# forcemount 7 +# unmount 8 + +subcommand=$1 +ZONENAME=$2 +ZONEPATH=$3 +state=$4 +cmd=$5 + +VNDADM=/usr/sbin/vndadm +SNAPSHOT_DIR=root/checkpoints +OVERLAY_RULES=/var/run/smartdc/networking/overlay_rules.json +DEFAULT_MTU=1500 + +# +# The following are the list of features that a corresponding brand may +# enable. If they wish to do so, then they must set the following flags +# to values such that [[ -z $flag ]] is true. The following are the +# currently supported flags: +# +# o jst_vrrp - Enables vrrp +# o jst_ufpromisc - Supports unfiltered promiscuous mode +# o jst_createvnd - Create vnd devices +# o jst_simplefs - Only setup lastbooted in the FS +# o jst_showsnap - Show snapshots in the FS +# +# In addition, the brand must also specify the following parameters: +# +# o jst_mdatapath - The path the metadata socket is expected in the zone +# + +get_boolean_nic_property() +{ + bool_val=$(eval echo \$_ZONECFG_net_${1}_${2}) + if [[ "${bool_val}" == "1" ]] || [[ "${bool_val}" == "true" ]]; then + echo "true" + else + echo "false" + fi +} + +nm2prefix() +{ + prefix=0 + numparts=0 + OLDIFS=$IFS + IFS=. + for digit in $1 ; do + (( numparts+=1 )) + case $digit in + 255) + (( prefix+=8 )) + ;; + 254) + (( prefix+=7 )) + ;; + 252) + (( prefix+=6 )) + ;; + 248) + (( prefix+=5 )) + ;; + 240) + (( prefix+=4 )) + ;; + 224) + (( prefix+=3 )) + ;; + 192) + (( prefix+=2 )) + ;; + 128) + (( prefix+=1 )) + ;; + 0);; + *) + echo "Invalid digit in netmask: $digit" 1>&2; + IFS=$OLDIFS + return 1;; + esac + done + if [[ $numparts -ne 4 ]]; then + echo "Too many parts in the IP address" 1>&2; + IFS=$OLDIFS + return 1 + fi + IFS=$OLDIFS + echo "$prefix" +} + +# +# Set up the vnic(s) for the zone. +# +setup_net() +{ + typeset tmp overlay tag id rule + for nic in $_ZONECFG_net_resources + do + # Get simplified versions of the network config. variables. + address=$(eval echo \$_ZONECFG_net_${nic}_address) + # If address set, must be a shared stack zone + [[ -n $address ]] && exit 0 + + global_nic=$(eval echo \$_ZONECFG_net_${nic}_global_nic) + # If no global-nic, must be a dedicated physical NIC instead + # of a vnic + [[ -z $global_nic ]] && continue + + dhcp_server=$(get_boolean_nic_property ${nic} dhcp_server) + mac_addr=$(eval echo \$_ZONECFG_net_${nic}_mac_addr) + vlan_id=$(eval echo \$_ZONECFG_net_${nic}_vlan_id) + blocked_outgoing_ports=$(eval \ + echo \$_ZONECFG_net_${nic}_blocked_outgoing_ports) + zone_ips=$(eval echo \$_ZONECFG_net_${nic}_ips) + zone_ip=$(eval echo \$_ZONECFG_net_${nic}_ip) + zone_netmask=$(eval echo \$_ZONECFG_net_${nic}_netmask) + allow_dhcp_spoof=$(get_boolean_nic_property ${nic} allow_dhcp_spoofing) + allow_ip_spoof=$(get_boolean_nic_property ${nic} allow_ip_spoofing) + allow_mac_spoof=$(get_boolean_nic_property ${nic} allow_mac_spoofing) + allow_restricted_traffic=$(get_boolean_nic_property ${nic} \ + allow_restricted_traffic) + allow_unfiltered_promisc=$(get_boolean_nic_property ${nic} \ + allow_unfiltered_promisc) + allowed_ips=$(eval echo \$_ZONECFG_net_${nic}_allowed_ips) + allowed_dhcp_cids=$(eval echo \$_ZONECFG_net_${nic}_allowed_dhcp_cids) + vrid=$(eval echo \$_ZONECFG_net_${nic}_vrrp_vrid) + vrrp_primary_ip=$(eval \ + echo \$_ZONECFG_net_${nic}_vrrp_primary_ip) + mtu=$(eval echo \$_ZONECFG_net_${nic}_mtu) + isoverlay= + + # If we don't have our zone_ips, it may be because the configuration + # was made on an older platform. In that case, use the "ip" and + # "netmask" properties for this NIC, and save it as "ips". + if [[ -z $zone_ips && -n $zone_ip ]]; then + [[ -n $zone_netmask ]] && + zone_ip=$zone_ip/`nm2prefix $zone_netmask` + + zone_ips=$zone_ip + fi + + + + orig_global=$global_nic + + # + # The nic tag for a device (the zonecfg global_nic) can come in + # one of a few forms. It may: + # + # 1) Be a traditional tag which refers to a physical device or + # aggregation to create a VNIC over. The source of this + # mapping is sysinfo. + # + # 2) It can be the name of an etherstub. The source of these is + # from dladm show-etherstub + # + # 3) It can take the form of an overlay device rule. An overlay + # device rule is an invalid DLPI device and invalid nic tag. + # It has the form of <name>/<number>. For example, + # sdc_sdn/23. That refers to the overlay rule sdc_sdn. If we + # have an overlay rule, we may need to dynamically create the + # overlay device if it doesn't exist. + # + # To handle these cases, we first check if it's an overlay + # device, and then if not, check the other cases. + # + + tmp=$(echo $orig_global | sed -E 's_[a-zA-Z_0-9]+/[0-9]+__') + if [[ -n "$tmp" ]]; then + + # + # We only need sysinfo if we get here, and we only need to load it + # once. Loading is about the same cost as looking up a single + # value. + # + if [[ -z $SYSINFO_LOADED ]]; then + load_sdc_sysinfo + SYSINFO_LOADED="LOADED" + fi + global_nic=$(eval echo \$SYSINFO_NIC_${orig_global}) + + # If the global nic is specified as a device or etherstub name + # rather than a tag. + if [[ -z $global_nic ]]; then + echo "$(dladm show-phys -p -o LINK) $(dladm show-etherstub -p -o LINK)" \ + | egrep "(^| )${orig_global}( |$)" > /dev/null + (( $? == 0 )) && global_nic=${orig_global} + fi + else + isoverlay="true" + tag=${orig_global%/*} + num=${orig_global#*/} + global_nic="$tag$num" + rule=$(json -f $OVERLAY_RULES $tag) + if [[ $? -ne 0 || -z "$rule" ]]; then + logger -p daemon.err "zone $ZONENAME had tag " \ + "$tag which indicates an overlay rule, " \ + "no corresponding overlay rule found." + exit 1 + fi + fi + + # For backwards compatibility with the other parts of the + # system, check if this zone already has this vnic setup. + # If so, move on to the next vnic. + dladm show-vnic -p -o LINK -z $ZONENAME $nic >/dev/null 2>&1 + (( $? == 0 )) && continue + + if [[ -z $global_nic ]]; then + echo "undefined VNIC $nic " \ + "(global NIC $orig_global)" + logger -p daemon.err "zone $ZONENAME " \ + "undefined VNIC $nic (global NIC $orig_global)" + exit 1 + fi + + # + # If we have an overlay device, do we need to create it, or does + # it already exist? + # + if [[ -n "$isoverlay" ]]; then + if ! dladm show-overlay $global_nic 2>/dev/null; then + dladm create-overlay $rule -v $num $global_nic + if [[ $? -ne 0 ]]; then + # If creation fails, ALSO check + # for existence again, in case + # someone beat us to it. + if ! dladm show-overlay $global_nic \ + 2> /dev/null; then + logger -p daemon.err "zone $ZONENAME " \ + "failed to create overlay device " \ + "$global_nic with command " \ + "'dladm create-overlay $rule -v " \ + "$num $global_nic" + exit 1 + fi + fi + fi + fi + + + # + # Create the vnic. + # + + opt_str="-p " + + # + # Traditionally we created VNICs without ever specifying + # the MTU. In the world before we supported any kind of + # jumbo frames, this is fine, because it would always + # match the physical which was 1500 by default for + # almost all of our devices. However, when we added + # support for mtu in nictagadm and changing it in boot + # up, we didn't properly assert the default MTU. This + # has led to VMs potentially getting the wrong MTU and + # ending up using jumbo frames when the network is + # expecting 1500 byte frames. Marx Brothers-esque comedy + # and despair ensues. + # + # Thus we always assert that if no MTU is specified by + # the VM, then we go back to the traditional 'default' + # value which is 1500. + # + if [[ -z "$mtu" ]]; then + mtu=$DEFAULT_MTU + fi + + opt_str="$opt_str mtu=$mtu," + + # + # Always append the zone as the last property. This is + # to work around the fact that once we associate it with + # a zone, the zone will have a hold on the device and + # we'll not be able to delete it if a create fails due + # to a bad property (say an invalid MTU). Note if we + # have other properties, it is their responsibility to + # put a trailing comma on it. + # + opt_str="${opt_str}zone=$ZONENAME" + + if [[ -n "$jst_vrrp" && -n $vrid ]]; then + # MAC addresses for VRRP vnics are determined by the VRID + mac_addr="vrrp" + opt_str="$opt_str -V $vrid -A inet" + fi + + [[ -n $mac_addr ]] && opt_str="$opt_str -m $mac_addr" + + [[ -n $vlan_id && $vlan_id != 0 ]] && \ + opt_str="$opt_str -v $vlan_id" + + + # + # Creating a VNIC in a zone is a multi-step process internally. + # This means there is a short window where the VNIC exists in + # the global zone and that could lead to a race condition if + # two zones boot at the same time with the same VNIC name. Use + # a temp. name to create the VNIC then rename it to have the + # correct name. + # + tname=tmp$$0 + dout=`dladm create-vnic -t -l $global_nic $opt_str $tname 2>&1` + if (( $? != 0 )); then + printf "error creating VNIC %s (global NIC %s)\n" \ + "$nic" "$orig_global" + printf "msg: %s\n" "$dout" + printf "Failed cmd: dladm create-vnic %s" \ + "-t -l $global_nic $opt_str $tname" + logger -p daemon.err "zone $ZONENAME error creating " \ + "VNIC $nic (global NIC $orig_global $global_nic)" + logger -p daemon.err "msg: $dout" + logger -p daemon.err "Failed cmd: dladm create-vnic " \ + "-t -l $global_nic $opt_str $tname" + + # Show more info if dup MAC addr. + echo $dout | egrep -s "MAC address is already in use" + if (( $? == 0 )); then + entry=`dladm show-vnic -olink,macaddress,zone \ + | nawk -v addr=$mac_addr '{ + if ($2 == addr) + print $0 + }'` + if [[ -n $entry ]]; then + print -f "LINK\tMACADDRESS\tZONE\n" + print -f "%s\n" "$entry" + fi + fi + exit 1 + fi + dladm rename-link -z $ZONENAME $tname $nic + if (( $? != 0 )); then + echo "error renaming VNIC $tname $nic" + logger -p daemon.err "zone $ZONENAME error renaming " \ + "VNIC $tname $nic" + exit 1 + fi + + if [[ -z $mac_addr ]]; then + # There was no assigned mac address + + # Get newly assigned mac address. + mac_addr=$(dladm show-vnic -z $ZONENAME -p -o \ + MACADDRESS ${nic}) + + # Save newly assigned mac address + [[ -n $mac_addr ]] && zonecfg -z $ZONENAME \ + "select net physical=$nic; " \ + "set mac-addr=$mac_addr; end; exit" + fi + + # Set up antispoof options + + if [[ $dhcp_server == "true" ]] || [[ $allow_dhcp_spoof == "true" ]]; then + enable_dhcp="true" + # This needs to be off for dhcp server zones + allow_ip_spoof="true" + else + enable_dhcp="false" + fi + + comma="" + spoof_opts="" + if [[ $allow_mac_spoof != "true" ]]; then + spoof_opts="${spoof_opts}${comma}mac-nospoof" + comma="," + fi + if [[ $allow_ip_spoof != "true" ]]; then + spoof_opts="${spoof_opts}${comma}ip-nospoof" + comma="," + fi + if [[ $allow_restricted_traffic != "true" ]]; then + spoof_opts="${spoof_opts}${comma}restricted" + comma="," + fi + if [[ ${enable_dhcp} == "false" ]]; then + spoof_opts="${spoof_opts}${comma}dhcp-nospoof" + comma="," + fi + + if [[ -n ${spoof_opts} ]]; then + dladm set-linkprop -t -z $ZONENAME -p \ + "protection=${spoof_opts}" ${nic} + if (( $? != 0 )); then + echo "error setting VNIC protection $nic $spoof_opts" + logger -p daemon.err "zone $ZONENAME error setting " \ + "VNIC protection $nic $spoof_opts" + exit 1 + fi + fi + + # If we aren't using IP spoofing, we'll need to set the allowed-ips + # property on the NIC so that the zone will be able to ifconfig the + # proper addresses. + if [[ $allow_ip_spoof != "true" ]]; then + unset allowed_ip_map + typeset -A allowed_ip_map + + dynamic_methods="" + separator="" + OLDIFS=$IFS + IFS=, + + for zone_ip in $zone_ips; do + # For each static IP available, add it to the list. + if [[ $zone_ip == "dhcp" ]]; then + dynamic_methods+="${separator}dhcpv4" + separator="," + elif [[ $zone_ip == "addrconf" ]]; then + dynamic_methods+="${separator}addrconf" + separator="," + else + clean_ip=`printf "%s" "${zone_ip}" | sed 's|^\([^/]*\)/.*|\1|'` + allowed_ip_map[${clean_ip}]=true + fi + done + + # If any additional IPs have been specified (for example, older + # VMs set up for IPv6 before vmadm gained support), add them to + # the list. + for allowed_ip in $allowed_ips; do + allowed_ip_map[${allowed_ip}]=true + done + IFS=$OLDIFS + + # If we're using VRRP and have the IP, add it to the list. + if [[ -n "$jst_vrrp" && -n $vrrp_primary_ip ]]; then + allowed_ip_map[${vrrp_primary_ip}]=true + fi + + allowed_ip_list="" + + separator="" + for allowed_ip in ${!allowed_ip_map[@]}; do + allowed_ip_list+="${separator}${allowed_ip}" + separator="," + done + + # Set the allowed-ips property on the NIC + if [[ -n ${allowed_ip_list} ]] && + ! dladm set-linkprop -t -z $ZONENAME \ + -p "allowed-ips=${allowed_ip_list}" ${nic}; then + log_and_exit \ + "error setting VNIC allowed-ips $nic $allowed_ip_list" + fi + + # Set the dynamic-methods property on the NIC + if [[ -n ${dynamic_methods} ]] && + ! dladm set-linkprop -t -z $ZONENAME \ + -p "dynamic-methods=${dynamic_methods}" ${nic}; then + log_and_exit \ + "error setting VNIC dynamic-methods $nic $dynamic_methods" + fi + fi + + if [[ "$enable_dhcp" != "true" ]] && [[ -n "$allowed_dhcp_cids" ]] && + ! dladm set-linkprop -p "allowed-dhcp-cids=${allowed_dhcp_cids}" \ + -t -z $ZONENAME $nic; then + log_and_exit \ + "error setting VNIC allowed-dhcp-cids $nic $allowed_dhcp_cids" + fi + + if [[ "$enable_dhcp" != "true" ]] && [[ -z "$allowed_dhcp_cids" ]] && + [[ "$zone_ips" == *dhcp* || "$zone_ips" == *addrconf* ]] && + ! dladm set-linkprop -p "allow-all-dhcp-cids=true" \ + -t -z $ZONENAME $nic; then + log_and_exit "error setting VNIC allow-all-dhcp-cids $nic" + fi + + if [[ -n "$jst_ufpromisc" && ${allow_unfiltered_promisc} == "true" ]]; then + dladm set-linkprop -t -z $ZONENAME -p "promisc-filtered=off" ${nic} + fi + + if [[ -n $blocked_outgoing_ports ]]; then + OLDIFS=$IFS + IFS=, + for port in $blocked_outgoing_ports; do + # br='block remote'. Flow names should be < 31 + # chars in length so that they get unique + # kstats. + # Use the VNIC mac addr. to generate a unique + # name. + mac_addr=`dladm show-vnic -z $ZONENAME -p \ + -o MACADDRESS $nic | tr ':' '_'` + flowadm add-flow -t -l $nic -z $ZONENAME \ + -a transport=tcp,remote_port=$port \ + -p maxbw=0 f${mac_addr}_br_${port} + if (( $? != 0 )); then + echo "error adding flow " \ + "$nic f${mac_addr}_br_${port}" + logger -p daemon.err "zone $ZONENAME " \ + "error adding flow " \ + "$nic f${mac_addr}_br_${port}" + exit 1 + fi + done + IFS=$OLDIFS + fi + + if [[ -n "$jst_createvnd" ]]; then + # + # At this point we should go ahead and set up + # the vnd interface for this datalink. + # + $VNDADM create -z $ZONENAME $nic + if [[ $? -ne 0 ]]; then + echo "failed to create vnd device" + exit 1 + fi + fi + done +} + +# +# Log a message, then exit +# +log_and_exit() +{ + echo "$1" + logger -p daemon.err "zone $ZONENAME $1" + exit 1 +} + +# +# Set up the firewall for the zone. +# +setup_fw() +{ + ipf_conf=$ZONEPATH/config/ipf.conf + ipf6_conf=$ZONEPATH/config/ipf6.conf + if [ -e $ipf_conf ]; then + echo "starting firewall ($ipf_conf)" + /usr/sbin/ipf -GE $ZONENAME + if (( $? != 0 )); then + log_and_exit "error enabling ipfilter" + fi + + /usr/sbin/ipf -GFa $ZONENAME + if (( $? != 0 )); then + log_and_exit "error flushing ipfilter (IPv4)" + fi + + /usr/sbin/ipf -6GFa $ZONENAME + if (( $? != 0 )); then + log_and_exit "error flushing ipfilter (IPv6)" + fi + + /usr/sbin/ipf -Gf $ipf_conf $ZONENAME + if (( $? != 0 )); then + log_and_exit "error loading ipfilter config for IPv4" + fi + + if [[ -e $ipf6_conf ]] && + ! /usr/sbin/ipf -6Gf $ipf6_conf $ZONENAME; then + log_and_exit "error loading ipfilter config for IPv6" + fi + + /usr/sbin/ipf -Gy $ZONENAME + if (( $? != 0 )); then + log_and_exit "error syncing ipfilter interfaces" + fi + fi +} + +# +# We're readying the zone. Make sure the per-zone writable +# directories exist so that we can lofs mount them. We do this here, +# instead of in the install script, since this list has evolved and +# there are already zones out there in the installed state. +# +setup_fs() +{ + # create directory for metadata socket + mkdir -m755 -p /var/zonecontrol/${ZONENAME} + + uname -v > $ZONEPATH/lastbooted + [[ -n "$jst_simplefs" ]] && return + + [ ! -d $ZONEPATH/site ] && mkdir -m755 $ZONEPATH/site + [ ! -d $ZONEPATH/local ] && mkdir -m755 $ZONEPATH/local + [ ! -d $ZONEPATH/$SNAPSHOT_DIR ] && mkdir -m755 $ZONEPATH/$SNAPSHOT_DIR + if [ ! -d $ZONEPATH/ccs ]; then + mkdir -m755 $ZONEPATH/ccs + (cd /usr/ccs; tar cbf 512 - *) | \ + (cd $ZONEPATH/ccs; tar xbf 512 -) + fi + +} + +setup_snapshots() +{ + # + # Because the top-level directory of each ZFS snapshot contains some + # internal information, mount the /root directory of each snapshot + # separately. + # + for snap in $(ls -1 $ZONEPATH/.zfs/snapshot); do + snapdir=$ZONEPATH/$SNAPSHOT_DIR/$(echo ${snap} | sed -e "s/^vmsnap-//") + mkdir -p ${snapdir} + mount -F lofs -o ro,setuid,nodevices \ + $ZONEPATH/.zfs/snapshot/${snap}/root ${snapdir} + done +} + +# +# If the zone has a CPU cap, calculate the CPU baseline and set it so we can +# track when we're bursting. There are many ways that the baseline can be +# calculated based on the other settings in the zones (e.g. a simple way would +# be as a precentage of the cap). +# +# For SmartMachines, our CPU baseline is calculated off of the system's +# provisionable memory and the memory cap of the zone. We assume that 83% of +# the system's memory is usable by zones (the rest is for the OS) and we assume +# that the zone memory cap is set so that we're proportional to how many zones +# we can provision on the system (i.e. we don't overprovision memory). Using +# these assumptions, we calculate the proportion of CPU for the zone based on +# its proportion of memory. Thus, the zone's CPU baseline is calculated using: +# ((zone capped memsize in MB) * 100) / (MB/core). +# Uncapped zones have no baseline (i.e. infrastructure zones). +# +# Remember that the cpu-cap rctl and the baseline are expressed in units of +# a percent of a CPU, so 100 is 1 full CPU. +# +setup_cpu_baseline() +{ + # A brand can override the setup of bursting. + [ -n "$NO_BURSTING" ] && return + + # If there is already a baseline, don't set one heuristically + curr_base=`prctl -P -n zone.cpu-baseline -i zone $ZONENAME | nawk '{ + if ($2 == "privileged") print $3 + }'` + [ -n "$curr_base" ] && return + + # Get current cap and convert from zonecfg format into rctl format + cap=`zonecfg -z $ZONENAME info capped-cpu | nawk '{ + if ($1 == "[ncpus:") print (substr($2, 1, length($2) - 1) * 100) + }'` + [ -z "$cap" ] && return + + # Get zone's memory cap in MB times 100 + zmem=`zonecfg -z $ZONENAME info capped-memory | nawk '{ + if ($1 == "[physical:") { + val = substr($2, 1, length($2) - 2) + units = substr($2, length($2) - 1, 1) + + # convert GB to MB + if (units == "G") + val *= 1024 + print (val * 100) + } + }'` + [ -z "$zmem" ] && return + + # Get system's total memory in MB + smem=`prtconf -m` + # provisionable memory is 83% of total memory (bash can't do floats) + prov_mem=$((($smem * 83) / 100)) + nprocs=`psrinfo -v | \ + nawk '/virtual processor/ {cnt++} END {print cnt}'` + + mb_per_core=$(($prov_mem / $nprocs)) + + baseline=$(($zmem / $mb_per_core)) + [[ $baseline == 0 ]] && baseline=1 + [[ $baseline -gt $cap ]] && baseline=$cap + + prctl -n zone.cpu-baseline -v $baseline -t priv -i zone $ZONENAME +} + +cleanup_snapshots() +{ + # + # Each ZFS snapshot is mounted separately, so find all mounted + # snapshots for this zone, and unmount them. + # + snaps=$(ls -1 $ZONEPATH/$SNAPSHOT_DIR) + + for snap in ${snaps}; do + snapdir=$ZONEPATH/$SNAPSHOT_DIR/$(echo ${snap} | sed -e "s/^vmsnap-//") + umount ${snapdir} + rmdir ${snapdir} + done +} + +# +# We're halting the zone, perform network cleanup. +# +cleanup_net() +{ + # Cleanup any flows that were setup. + for nic in $_ZONECFG_net_resources + do + flowadm remove-flow -t -z $ZONENAME -l $nic + if (( $? != 0 )); then + echo "error removing flows for $nic" + logger -p daemon.err "zone $ZONENAME " \ + "error removing flows for $nic" + fi + done +} + +id_gz_sockholder() +{ + echo "searching for GZ process holding socket $1" + logger -p daemon.err "zone $ZONENAME " \ + "searching for GZ process holding socket $1" + + pid=`(cd /proc; + for i in *; + do + pfiles $i 2>/dev/null | egrep -s "AF_UNIX $1"; + [ $? == 0 ] && echo "$i"; + done)` + + [ -z "$pid" ] && return + + echo "Error: GZ process $pid holding socket $1 blocking shutdown" + logger -p daemon.err "Error: zone $ZONENAME:" \ + "GZ process $pid holding socket $1 blocking shutdown" +} + +# zonadmd unable to unmount the given path, try to cleanup so unmount can +# succeed. +cleanup_mount() +{ + echo "attempting to cleanup mount $1" + logger -p daemon.err "zone $ZONENAME attempting to cleanup mount $1" + + fnd_procs=0 + for i in `fuser -c $1 2>/dev/null` + do + fnd_procs=1 + + pty=`ps -otty -p $i | \ + nawk '{if ($1 != "TT" && $1 != "?") print $0}'` + + if [ -n "$pty" ]; then + echo "shell process $i blocking zone" \ + "$ZONENAME shutdown, killing the process" | wall + echo "killing GZ user shell $i under $1" + logger -p daemon.err "zone $ZONENAME:" \ + "killing GZ user shell $i under $1" + kill -9 $i + else + echo "Error: GZ process $i under $1 blocking shutdown" + logger -p daemon.err "Error: zone $ZONENAME:" \ + "GZ process $i under $1 blocking shutdown" + + local args="pargs: `pargs $i`" + echo "$args" + logger -p daemon.err "$args" + + local tree="ptree: `ptree $i`" + echo "$tree" + logger -p daemon.err "$tree" + fi + done + + if [ $fnd_procs -eq 1 ]; then + # Exit out to give the zoneadmd umount a chance to suceed now. + # Zoneadmd will give us another shot if it still can't umount. + sleep 1 + exit 0 + fi + + # Processes which are injected into a zone and then open a file as a + # socket end-point will show in pfiles with the path relative to the + # zone's root. For example, a zone with its root at /zones/foo/root and + # an open socket as /zones/foo/root/var/run/x will show up in a pfiles + # search as /var/run/x. This is a problem since we have no way to + # narrow down which process is the culprit. + # + # Because the socket doesn't have enough information for us to tie to + # the specific GZ process, we hardcode to id things we know will open + # sockets into the zone: + # $jst_mdatapath/metadata.sock + # /var/run/.smartdc-amon.sock + + ZVR=$ZONEPATH/root/var/run + [ -S $ZVR/smartdc/metadata.sock ] && + id_gz_sockholder $jst_mdatapath/metadata.sock + + [ -S $ZVR/.smartdc-amon.sock ] && + id_gz_sockholder /var/run/.smartdc-amon.sock +} + +function fix_forced_attrs { + typeset attr + + for attr in ${!FORCED_ATTRS[@]}; do + typeset nval=${FORCED_ATTRS["$attr"]} + typeset -n envvar=_ZONECFG_attr_${attr//-/_} + typeset cval=$envvar + + if [[ $cval == $nval ]]; then + # In most cases, $nval and $cval will be the same and + # nothing needs to be done. This includes the case where + # $nval and $cval are "". + continue + elif [[ -z $nval ]]; then + logger -p daemon.error "[zone $ZONENAME]" \ + "Illegal value for attr '$attr': '$cval'." \ + "Removing attr '$attr'" + zonecfg -z "$ZONENAME" "remove -F attr name=$attr" + + unset ${!envvar} + else + logger -p daemon.error "[zone $ZONENAME]" \ + "Illegal value for attr '$attr': '$cval'." \ + "Setting to '$nval'" + zonecfg -z "$ZONENAME" "remove -F attr name=$attr;" \ + "add attr; set type=string;" \ + "set name=$attr; set value=\"$nval\"; end;" + + export ${!envvar}="$nval" + fi + done +} + +# +# Main +# + +case $subcommand in +pre) + case $cmd in + 0) # pre-ready + fix_forced_attrs + setup_fs + ;; + 4) # pre-halt + [[ -n "$jst_showsnap" ]] && cleanup_snapshots + cleanup_net + ;; + esac + ;; +post) + case $cmd in + 0) # post-ready + [[ -n "$jst_showsnap" ]] && setup_snapshots + setup_net + setup_fw + ;; + 1) # post-boot + # We can't set a rctl until we have a process in the zone to + # grab + setup_cpu_baseline + ;; + 8) # post-unmount + # Zone halt is hung unmounting, try to recover + if [[ $state == 6 ]]; then + cleanup_mount "$6" + fi + ;; + esac + ;; +esac + +exit 0 |