2 files changed, 363 insertions, 1 deletions
diff --git a/usr/src/cmd/svc/startd/startd.c b/usr/src/cmd/svc/startd/startd.c
index 6e3ea9876b..c5307879e2 100644
--- a/usr/src/cmd/svc/startd/startd.c
+++ b/usr/src/cmd/svc/startd/startd.c
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
  */
 
 /*
@@ -42,6 +42,137 @@
  * engine commands by executing methods, updating the repository, and sending
  * feedback (mostly state updates) to the graph engine.
  *
+ * Overview of the SMF Architecture
+ *
+ * There are a few different components that make up SMF and are responsible
+ * for different pieces of functionality that are used:
+ *
+ * svc.startd(1M): A daemon that is in charge of starting, stopping, and
+ *     restarting services and instances.
+ * svc.configd(1M): A daemon that manages the repository that stores
+ *     information, property groups, and state of the different services and
+ *     instances.
+ * libscf(3LIB): A C library that provides the glue for communicating,
+ *     accessing, and updating information about services and instances.
+ * svccfg(1M): A utility to add and remove services as well as change the
+ *     properties associated with different services and instances.
+ * svcadm(1M): A utility to control the different instance of a service. You
+ *     can use this to enable and disable them among some other useful things.
+ * svcs(1): A utility that reports on the status of various services on the
+ *     system.
+ *
+ * The following block diagram explains how these components communicate:
+ *
+ * The SMF Block Diagram
+ *                                                       Repository
+ *   This attempts to show       +---------+             +--------+
+ *   the relations between       |         |     SQL     |        |
+ *   the different pieces        | configd |<----------->| SQLite |
+ *   that make SMF work and      |         | Transaction |        |
+ *   users/administrators        +---------+             +--------+
+ *   call into.                   ^      ^
+ *                                |      |
+ *                   door_call(3C)|      | door_call(3C)
+ *                                |      |
+ *                                v      v
+ *      +----------+     +--------+      +--------+      +----------+
+ *      |          |     |        |      |        |      |  svccfg  |
+ *      |  startd  |<--->| libscf |      | libscf |<---->|  svcadm  |
+ *      |          |     | (3LIB) |      | (3LIB) |      |   svcs   |
+ *      +----------+     +--------+      +--------+      +----------+
+ *        ^      ^
+ *        |      | fork(2)/exec(2)
+ *        |      | libcontract(3LIB)
+ *        v      v                           Various System/User services
+ *       +-------------------------------------------------------------------+
+ *       | system/filesystem/local:default      system/coreadm:default       |
+ *       | network/loopback:default             system/zones:default         |
+ *       | milestone/multi-user:default         system/cron:default          |
+ *       | system/console-login:default         network/ssh:default          |
+ *       | system/pfexec:default                system/svc/restarter:default |
+ *       +-------------------------------------------------------------------+
+ *
+ * Chatting with Configd and Sharing Repository Information
+ *
+ * As you run commands with svcs, svccfg, and svcadm, they are all creating a
+ * libscf handle to communicate with configd. As calls are made via libscf they
+ * ultimately go and talk to configd to get information. However, how we
+ * actually are talking to configd is not as straightforward as it appears.
+ *
+ * When configd starts up it creates a door located at
+ * /etc/svc/volatile/repository_door. This door runs the routine called
+ * main_switcher() from usr/src/cmd/svc/configd/maindoor.c. When you first
+ * invoke svc(cfg|s|adm), one of the first things that occurs is creating a
+ * scf_handle_t and binding it to configd by calling scf_handle_bind(). This
+ * function makes a door call to configd and gets returned a new file
+ * descriptor. This file descriptor is itself another door which calls into
+ * configd's client_switcher(). This is the door that is actually used when
+ * getting and fetching properties, and many other useful things.
+ *
+ * svc.startd needs a way to notice the changes that occur to the repository.
+ * For example, if you enabled a service that was not previously running, it's
+ * up to startd to notice that this has happened, check dependencies, and
+ * eventually start up the service. The way it gets these notifications is via
+ * a thread who's sole purpose in life is to call _scf_notify_wait(). This
+ * function acts like poll(2) but for changes that occur in the repository.
+ * Once this thread gets the event, it dispatches the event appropriately.
+ *
+ * The Events of svc.startd
+ *
+ * svc.startd has to handle a lot of complexity. Understanding how you go from
+ * getting the notification that a service was enabled to actually enabling it
+ * is not obvious from a cursory glance. The first thing to keep in mind is
+ * that startd maintains a graph of all the related services and instances so
+ * it can keep track of what is enabled, what dependencies exist, etc. all so
+ * that it can answer the question of what is affected by a change. Internally
+ * there are a lot of different queues for events, threads to process these
+ * queues, and different paths to have events enter these queues. What follows
+ * is a diagram that attempts to explain some of those paths, though it's
+ * important to note that for some of these pieces, such as the graph and
+ * vertex events, there are many additional ways and code paths these threads
+ * and functions can take. And yes, restarter_event_enqueue() is not the same
+ * thing as restarter_queue_event().
+ *
+ *   Threads/Functions                 Queues                  Threads/Functions
+ *
+ * called by various
+ *     +----------------+             +-------+                  +-------------+
+ * --->| graph_protocol | graph_event | graph |   graph_event_   | graph_event |
+ * --->| _send_event()  |------------>| event |----------------->| _thread     |
+ *     +----------------+ _enqueue()  | queue |   dequeue()      +-------------+
+ *                                    +-------+                         |
+ *  _scf_notify_wait()                               vertex_send_event()|
+ *  |                                                                   v
+ *  |  +------------------+                              +--------------------+
+ *  +->| repository_event | vertex_send_event()          | restarter_protocol |
+ *     | _thread          |----------------------------->| _send_event()      |
+ *     +------------------+                              +--------------------+
+ *                                                          |    | out to other
+ *                restarter_                     restarter_ |    | restarters
+ *                event_dequeue() +-----------+  event_     |    | not startd
+ *               +----------------| restarter |<------------+    +------------->
+ *               v                |   event   |  enqueue()
+ *      +-----------------+       |   queue   |             +------------------>
+ *      | restarter_event |       +-----------+             |+----------------->
+ *      | _thread         |                                 ||+---------------->
+ *      +-----------------+                                 ||| start/stop inst
+ *               |               +--------------+       +--------------------+
+ *               |               |   instance   |       | restarter_process_ |
+ *               +-------------->|    event     |------>| events             |
+ *                restarter_     |    queue     |       | per-instance lwp   |
+ *                queue_event()  +--------------+       +--------------------+
+ *                                                          ||| various funcs
+ *                                                          ||| controlling
+ *                                                          ||| instance state
+ *                                                          ||+--------------->
+ *                                                          |+---------------->
+ *                                                          +----------------->
+ *
+ * What's important to take away is that there is a queue for each instance on
+ * the system that handles events related to dealing directly with that
+ * instance and that events can be added to it because of changes to properties
+ * that are made to configd and acted upon asynchronously by startd.
+ *
  * Error handling
  *
  * In general, when svc.startd runs out of memory it reattempts a few times,
diff --git a/usr/src/cmd/svc/svccfg/svccfg_libscf.c b/usr/src/cmd/svc/svccfg/svccfg_libscf.c
index 5a96e5eac4..df7f7af209 100644
--- a/usr/src/cmd/svc/svccfg/svccfg_libscf.c
+++ b/usr/src/cmd/svc/svccfg/svccfg_libscf.c
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
  * Copyright 2012 Milan Jurik. All rights reserved.
  */
 
@@ -44,6 +45,7 @@
 #include <stdarg.h>
 #include <string.h>
 #include <strings.h>
+#include <time.h>
 #include <unistd.h>
 #include <wait.h>
 #include <poll.h>
@@ -241,6 +243,9 @@ static const char *emsg_dpt_no_dep;
 static int li_only = 0;
 static int no_refresh = 0;
 
+/* how long in ns we should wait between checks for a pg */
+static uint64_t pg_timeout = 100 * (NANOSEC / MILLISEC);
+
 /* import globals, to minimize allocations */
 static scf_scope_t *imp_scope = NULL;
 static scf_service_t *imp_svc = NULL, *imp_tsvc = NULL;
@@ -6751,6 +6756,203 @@ connaborted:
 }
 
 /*
+ * When an instance is imported we end up telling configd about it. Once we tell
+ * configd about these changes, startd eventually notices. If this is a new
+ * instance, the manifest may not specify the SCF_PG_RESTARTER (restarter)
+ * property group. However, many of the other tools expect that this property
+ * group exists and has certain values.
+ *
+ * These values are added asynchronously by startd. We should not return from
+ * this routine until we can verify that the property group we need is there.
+ *
+ * Before we go ahead and verify this, we have to ask ourselves an important
+ * question: Is the early manifest service currently running?  Because if it is
+ * running and it has invoked us, then the service will never get a restarter
+ * property because svc.startd is blocked on EMI finishing before it lets itself
+ * fully connect to svc.configd. Of course, this means that this race condition
+ * is in fact impossible to 100% eliminate.
+ *
+ * svc.startd makes sure that EMI only runs once and has succeeded by checking
+ * the state of the EMI instance. If it is online it bails out and makes sure
+ * that it doesn't run again. In this case, we're going to do something similar,
+ * only if the state is online, then we're going to actually verify. EMI always
+ * has to be present, but it can be explicitly disabled to reduce the amount of
+ * damage it can cause. If EMI has been disabled then we no longer have to worry
+ * about the implicit race condition and can go ahead and check things. If EMI
+ * is in some state that isn't online or disabled and isn't runinng, then we
+ * assume that things are rather bad and we're not going to get in your way,
+ * even if the rest of SMF does.
+ *
+ * Returns 0 on success or returns an errno.
+ */
+#ifndef NATIVE_BUILD
+static int
+lscf_instance_verify(scf_scope_t *scope, entity_t *svc, entity_t *inst)
+{
+	int ret, err;
+	struct timespec ts;
+	char *emi_state;
+
+	/*
+	 * smf_get_state does not distinguish between its different failure
+	 * modes: memory allocation failures and SMF internal failures.
+	 */
+	if ((emi_state = smf_get_state(SCF_INSTANCE_EMI)) == NULL)
+		return (EAGAIN);
+
+	/*
+	 * As per the block comment for this function check the state of EMI
+	 */
+	if (strcmp(emi_state, SCF_STATE_STRING_ONLINE) != 0 &&
+	    strcmp(emi_state, SCF_STATE_STRING_DISABLED) != 0) {
+		warn(gettext("Not validating instance %s:%s because EMI's "
+		    "state is %s\n"), svc->sc_name, inst->sc_name, emi_state);
+		free(emi_state);
+		return (0);
+	}
+
+	free(emi_state);
+
+	/*
+	 * First we have to get the property.
+	 */
+	if ((ret = scf_scope_get_service(scope, svc->sc_name, imp_svc)) != 0) {
+		ret = scf_error();
+		warn(gettext("Failed to look up service: %s\n"), svc->sc_name);
+		return (ret);
+	}
+
+	/*
+	 * We should always be able to get the instance. It should already
+	 * exist because we just created it or got it. There probably is a
+	 * slim chance that someone may have come in and deleted it though from
+	 * under us.
+	 */
+	if ((ret = scf_service_get_instance(imp_svc, inst->sc_name, imp_inst))
+	    != 0) {
+		ret = scf_error();
+		warn(gettext("Failed to verify instance: %s\n"), inst->sc_name);
+		switch (ret) {
+		case SCF_ERROR_DELETED:
+			err = ENODEV;
+			break;
+		case SCF_ERROR_CONNECTION_BROKEN:
+			warn(gettext("Lost repository connection\n"));
+			err = ECONNABORTED;
+			break;
+		case SCF_ERROR_NOT_FOUND:
+			warn(gettext("Instance \"%s\" disappeared out from "
+			    "under us.\n"), inst->sc_name);
+			err = ENOENT;
+			break;
+		default:
+			bad_error("scf_service_get_instance", ret);
+		}
+
+		return (err);
+	}
+
+	/*
+	 * An astute observer may want to use _scf_wait_pg which would notify us
+	 * of a property group change, unfortunately that does not work if the
+	 * property group in question does not exist. So instead we have to
+	 * manually poll and ask smf the best way to get to it.
+	 */
+	while ((ret = scf_instance_get_pg(imp_inst, SCF_PG_RESTARTER, imp_pg))
+	    != SCF_SUCCESS) {
+		ret = scf_error();
+		if (ret != SCF_ERROR_NOT_FOUND) {
+			warn(gettext("Failed to get restarter property "
+			    "group for instance: %s\n"), inst->sc_name);
+			switch (ret) {
+			case SCF_ERROR_DELETED:
+				err = ENODEV;
+				break;
+			case SCF_ERROR_CONNECTION_BROKEN:
+				warn(gettext("Lost repository connection\n"));
+				err = ECONNABORTED;
+				break;
+			default:
+				bad_error("scf_service_get_instance", ret);
+			}
+
+			return (err);
+		}
+
+		ts.tv_sec = pg_timeout / NANOSEC;
+		ts.tv_nsec = pg_timeout % NANOSEC;
+
+		(void) nanosleep(&ts, NULL);
+	}
+
+	/*
+	 * svcadm also expects that the SCF_PROPERTY_STATE property is present.
+	 * So in addition to the property group being present, we need to wait
+	 * for the property to be there in some form.
+	 *
+	 * Note that a property group is a frozen snapshot in time. To properly
+	 * get beyond this, you have to refresh the property group each time.
+	 */
+	while ((ret = scf_pg_get_property(imp_pg, SCF_PROPERTY_STATE,
+	    imp_prop)) != 0) {
+
+		ret = scf_error();
+		if (ret != SCF_ERROR_NOT_FOUND) {
+			warn(gettext("Failed to get property %s from the "
+			    "restarter property group of instance %s\n"),
+			    SCF_PROPERTY_STATE, inst->sc_name);
+			switch (ret) {
+			case SCF_ERROR_CONNECTION_BROKEN:
+				warn(gettext("Lost repository connection\n"));
+				err = ECONNABORTED;
+				break;
+			case SCF_ERROR_DELETED:
+				err = ENODEV;
+				break;
+			default:
+				bad_error("scf_pg_get_property", ret);
+			}
+
+			return (err);
+		}
+
+		ts.tv_sec = pg_timeout / NANOSEC;
+		ts.tv_nsec = pg_timeout % NANOSEC;
+
+		(void) nanosleep(&ts, NULL);
+
+		ret = scf_instance_get_pg(imp_inst, SCF_PG_RESTARTER, imp_pg);
+		if (ret != SCF_SUCCESS) {
+			warn(gettext("Failed to get restarter property "
+			    "group for instance: %s\n"), inst->sc_name);
+			switch (ret) {
+			case SCF_ERROR_DELETED:
+				err = ENODEV;
+				break;
+			case SCF_ERROR_CONNECTION_BROKEN:
+				warn(gettext("Lost repository connection\n"));
+				err = ECONNABORTED;
+				break;
+			default:
+				bad_error("scf_service_get_instance", ret);
+			}
+
+			return (err);
+		}
+	}
+
+	/*
+	 * We don't have to free the property groups or other values that we got
+	 * because we stored them in global variables that are allocated and
+	 * freed by the routines that call into these functions. Unless of
+	 * course the rest of the code here that we are basing this on is
+	 * mistaken.
+	 */
+	return (0);
+}
+#endif
+
+/*
  * If the service is missing, create it, import its properties, and import the
  * instances.  Since the service is brand new, it should be empty, and if we
  * run into any existing entities (SCF_ERROR_EXISTS), abort.
@@ -8122,7 +8324,36 @@ lscf_bundle_import(bundle_t *bndl, const char *filename, uint_t flags)
 				goto progress;
 
 		result = 0;
+
+		/*
+		 * This snippet of code assumes that we are running svccfg as we
+		 * normally do -- witih svc.startd running. Of course, that is
+		 * not actually the case all the time because we also use a
+		 * varient of svc.configd and svccfg which are only meant to
+		 * run during the build process. During this time we have no
+		 * svc.startd, so this check would hang the build process.
+		 */
+#ifndef NATIVE_BUILD
+		/*
+		 * Verify that the restarter group is preset
+		 */
+		for (svc = uu_list_first(bndl->sc_bundle_services);
+		    svc != NULL;
+		    svc = uu_list_next(bndl->sc_bundle_services, svc)) {
+
+			insts = svc->sc_u.sc_service.sc_service_instances;
+
+			for (inst = uu_list_first(insts);
+			    inst != NULL;
+			    inst = uu_list_next(insts, inst)) {
+				if (lscf_instance_verify(imp_scope, svc,
+				    inst) != 0)
+					goto progress;
+			}
+		}
+#endif
 		goto out;
+
 	}
 
 	if (uu_error() != UU_ERROR_CALLBACK_FAILED)