3 files changed, 94 insertions, 47 deletions
diff --git a/usr/src/cmd/svc/startd/graph.c b/usr/src/cmd/svc/startd/graph.c
index c831c99301..5a4e933220 100644
--- a/usr/src/cmd/svc/startd/graph.c
+++ b/usr/src/cmd/svc/startd/graph.c
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2015, Syneto S.R.L. All rights reserved.
  */
 
@@ -142,6 +143,8 @@
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
 #include <fm/libfmevent.h>
 #include <libscf.h>
 #include <libscf_priv.h>
@@ -4876,6 +4879,20 @@ vertex_subgraph_dependencies_shutdown(scf_handle_t *h, graph_vertex_t *v,
 	was_up = up_state(old_state);
 	now_up = up_state(v->gv_state);
 
+	if (halting != -1 && old_state == RESTARTER_STATE_DISABLED &&
+	    v->gv_state != RESTARTER_STATE_DISABLED) {
+		/*
+		 * We're halting and we have a svc which is transitioning to
+		 * offline in parallel. This leads to a race condition where
+		 * gt_enter_offline might re-enable the svc after we disabled
+		 * it. Since we're halting, we want to ensure no svc ever
+		 * transitions out of the disabled state. In this case, modify
+		 * the flags to keep us on the halting path.
+		 */
+		was_up = 0;
+		now_up = 0;
+	}
+
 	if (!was_up && now_up) {
 		++non_subgraph_svcs;
 	} else if (was_up && !now_up) {
@@ -6828,6 +6845,7 @@ repository_event_thread(void *unused)
 	char *fmri = startd_alloc(max_scf_fmri_size);
 	char *pg_name = startd_alloc(max_scf_value_size);
 	int r;
+	int fd;
 
 	h = libscf_handle_create_bound_loop();
 
@@ -6850,6 +6868,14 @@ retry:
 		goto retry;
 	}
 
+	if ((fd = open("/etc/svc/volatile/startd.ready", O_RDONLY | O_CREAT,
+	    S_IRUSR)) < 0) {
+		log_error(LOG_WARNING, "Couldn't create startd.ready file\n",
+		    SCF_GROUP_FRAMEWORK, scf_strerror(scf_error()));
+	} else {
+		(void) close(fd);
+	}
+
 	/*CONSTCOND*/
 	while (1) {
 		ssize_t res;
diff --git a/usr/src/cmd/svc/startd/method.c b/usr/src/cmd/svc/startd/method.c
index cc9ce6768c..c3cd0144c1 100644
--- a/usr/src/cmd/svc/startd/method.c
+++ b/usr/src/cmd/svc/startd/method.c
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Joyent Inc.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
  */
 
 /*
@@ -100,34 +100,18 @@ static uint_t method_events[] = {
  * method_record_start(restarter_inst_t *)
  *   Record a service start for rate limiting.  Place the current time
  *   in the circular array of instance starts.
+ *
+ *   Save the critical_failure_period and critical_failure_allowed with either
+ *   the defaults or the svc properties startd/critical_failure_count and
+ *   startd/critical_failure_period.
+ *   ri_crit_fail_allowed is capped at RINST_START_TIMES.
  */
 static void
 method_record_start(restarter_inst_t *inst)
 {
-	int index = inst->ri_start_index++ % RINST_START_TIMES;
-
-	inst->ri_start_time[index] = gethrtime();
-}
-
-/*
- * method_rate_critical(restarter_inst_t *)
- *    Return true if the average start interval is less than the permitted
- *    interval.  The implicit interval defaults to RINST_FAILURE_RATE_NS and
- *    RINST_START_TIMES but may be overridden with the svc properties
- *    startd/critical_failure_count and startd/critical_failure_period
- *    which represent the number of failures to consider and the amount of
- *    time in seconds in which that number may occur, respectively. Note that
- *    this time is measured as of the transition to 'enabled' rather than wall
- *    clock time.
- *    Implicit success if insufficient measurements for an average exist.
- */
-int
-method_rate_critical(restarter_inst_t *inst)
-{
+	int index;
+	uint_t critical_failure_allowed = RINST_START_TIMES;
 	hrtime_t critical_failure_period;
-	uint_t critical_failure_count = RINST_START_TIMES;
-	uint_t n = inst->ri_start_index;
-	hrtime_t avg_ns = 0;
 	uint64_t scf_fr, scf_st;
 	scf_propvec_t *prop = NULL;
 	scf_propvec_t restart_critical[] = {
@@ -151,17 +135,48 @@ method_rate_critical(restarter_inst_t *inst)
 		 * in seconds but tracked in ns
 		 */
 		critical_failure_period = (hrtime_t)scf_fr * NANOSEC;
-		critical_failure_count = (uint_t)scf_st;
+		critical_failure_allowed = (uint_t)scf_st;
+
+		if (critical_failure_allowed > RINST_START_TIMES)
+			critical_failure_allowed = RINST_START_TIMES;
+		if (critical_failure_allowed < 1)
+			critical_failure_allowed = 1;
+
 	}
-	if (inst->ri_start_index < critical_failure_count)
+
+	inst->ri_crit_fail_allowed = critical_failure_allowed;
+	inst->ri_crit_fail_period = critical_failure_period;
+
+	index = inst->ri_start_index++ % critical_failure_allowed;
+	inst->ri_start_time[index] = gethrtime();
+}
+
+/*
+ * method_rate_critical(restarter_inst_t *)
+ *    Return true if the number of failures within the interval
+ *    ri_crit_fail_period exceeds ri_crit_fail_allowed. The allowed failure
+ *    count defaults to RINST_START_TIMES and the implicit interval defaults
+ *    to RINST_FAILURE_RATE_NS but may be overridden with the svc properties
+ *    startd/critical_failure_count and startd/critical_failure_period which
+ *    represent the acceptable number of failures and the amount of time in
+ *    seconds in which that number may occur, respectively. Note that this time
+ *    is measured as of the transition to 'enabled' rather than wall clock
+ *    time. Implicitly not critical if insufficient failures have occured.
+ */
+int
+method_rate_critical(restarter_inst_t *inst)
+{
+	uint_t n = inst->ri_start_index;
+	uint_t fail_allowed = inst->ri_crit_fail_allowed;
+	hrtime_t diff_ns;
+
+	if (n < fail_allowed)
 		return (0);
 
-	avg_ns =
-	    (inst->ri_start_time[(n - 1) % critical_failure_count] -
-	    inst->ri_start_time[n % critical_failure_count]) /
-	    (critical_failure_count - 1);
+	diff_ns = inst->ri_start_time[(n - 1) % fail_allowed] -
+	    inst->ri_start_time[n % fail_allowed];
 
-	return (avg_ns < critical_failure_period);
+	return (diff_ns < inst->ri_crit_fail_period);
 }
 
 /*
@@ -989,7 +1004,8 @@ method_run(restarter_inst_t **instp, int type, int *exit_code)
 			goto contract_out;
 		}
 
-		if (!WIFEXITED(ret_status)) {
+		if (!WIFEXITED(ret_status) &&
+		    WEXITSTATUS(ret_status) != SMF_EXIT_NODAEMON) {
 			/*
 			 * If method didn't exit itself (it was killed by an
 			 * external entity, etc.), consider the entire
@@ -1018,7 +1034,7 @@ method_run(restarter_inst_t **instp, int type, int *exit_code)
 		}
 
 		*exit_code = WEXITSTATUS(ret_status);
-		if (*exit_code != 0) {
+		if (*exit_code != 0 && *exit_code != SMF_EXIT_NODAEMON) {
 			log_error(LOG_WARNING,
 			    "%s: Method \"%s\" failed with exit status %d.\n",
 			    inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status));
@@ -1027,6 +1043,7 @@ method_run(restarter_inst_t **instp, int type, int *exit_code)
 		log_instance(inst, B_TRUE, "Method \"%s\" exited with status "
 		    "%d.", mname, *exit_code);
 
+		/* Note: we will take this path for SMF_EXIT_NODAEMON */
 		if (*exit_code != 0)
 			goto contract_out;
 
@@ -1073,7 +1090,10 @@ assured_kill:
 	}
 
 contract_out:
-	/* Abandon contracts for transient methods & methods that fail. */
+	/*
+	 * Abandon contracts for transient methods, methods that exit with
+	 * SMF_EXIT_NODAEMON & methods that fail.
+	 */
 	transient = method_is_transient(inst, type);
 	if ((transient || *exit_code != 0 || result != 0) &&
 	    (restarter_is_kill_method(method) < 0))
@@ -1169,7 +1189,7 @@ retry:
 
 	r = method_run(&inst, info->sf_method_type, &exit_code);
 
-	if (r == 0 && exit_code == 0) {
+	if (r == 0 && (exit_code == 0 || exit_code == SMF_EXIT_NODAEMON)) {
 		/* Success! */
 		assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE);
 
@@ -1187,6 +1207,12 @@ retry:
 			else
 				method_remove_contract(inst, B_TRUE, B_TRUE);
 		}
+
+		/*
+		 * For methods that exit with SMF_EXIT_NODAEMON, we already
+		 * called method_remove_contract in method_run.
+		 */
+
 		/*
 		 * We don't care whether the handle was rebound because this is
 		 * the last thing we do with it.
diff --git a/usr/src/cmd/svc/startd/startd.h b/usr/src/cmd/svc/startd/startd.h
index c1062e45e0..df3e98a27b 100644
--- a/usr/src/cmd/svc/startd/startd.h
+++ b/usr/src/cmd/svc/startd/startd.h
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
  */
 
 #ifndef	_STARTD_H
@@ -82,16 +82,9 @@ extern "C" {
 
 #endif
 
-#ifndef NDEBUG
-#define	bad_error(func, err)	{					\
-	(void) fprintf(stderr, "%s:%d: %s() failed with unexpected "	\
-	    "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err)); \
-	abort();							\
-}
-#else
-#define	bad_error(func, err)	abort()
-#endif
-
+#define	bad_error(func, err)					\
+	uu_panic("%s:%d: %s() failed with unexpected "		\
+	    "error %d.  Aborting.\n", __FILE__, __LINE__, (func), (err));
 
 #define	min(a, b)	(((a) < (b)) ? (a) : (b))
 
@@ -405,7 +398,7 @@ typedef enum {
 
 #define	RINST_RETAKE_MASK	0x0f000000
 
-#define	RINST_START_TIMES	5		/* failures to consider */
+#define	RINST_START_TIMES	10		/* up to 10 fails to consider */
 #define	RINST_FAILURE_RATE_NS	600000000000LL	/* 1 failure/10 minutes */
 #define	RINST_WT_SVC_FAILURE_RATE_NS	NANOSEC	/* 1 failure/second */
 
@@ -427,6 +420,8 @@ typedef struct restarter_inst {
 
 	hrtime_t		ri_start_time[RINST_START_TIMES];
 	uint_t			ri_start_index;	/* times started */
+	uint_t			ri_crit_fail_allowed;
+	hrtime_t		ri_crit_fail_period;
 
 	uu_list_node_t		ri_link;
 	pthread_mutex_t		ri_lock;