diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/cmd/mdb/common/modules/svc.startd/startd.c | 20 | ||||
-rw-r--r-- | usr/src/cmd/svc/startd/graph.c | 300 | ||||
-rw-r--r-- | usr/src/cmd/svc/startd/log.c | 2 | ||||
-rw-r--r-- | usr/src/cmd/svc/startd/startd.h | 4 | ||||
-rw-r--r-- | usr/src/cmd/svc/startd/transition.c | 50 |
5 files changed, 272 insertions, 104 deletions
diff --git a/usr/src/cmd/mdb/common/modules/svc.startd/startd.c b/usr/src/cmd/mdb/common/modules/svc.startd/startd.c index 53fc3d0956..1f163db4a0 100644 --- a/usr/src/cmd/mdb/common/modules/svc.startd/startd.c +++ b/usr/src/cmd/mdb/common/modules/svc.startd/startd.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -279,18 +278,18 @@ pr_vertex(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) static int logbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { - size_t logbuf_sz; + GElf_Sym sym; char *buf; char *cp; - if (mdb_readvar(&logbuf_sz, "logbuf_sz") == -1) { - mdb_warn("failed to read 'logbuf_sz'\n"); + if (mdb_lookup_by_name("logbuf", &sym) == -1) { + mdb_warn("The 'logbuf' symbol is missing.\n"); return (DCMD_ERR); } - buf = mdb_alloc(logbuf_sz, UM_SLEEP | UM_GC); + buf = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC); - if (mdb_readsym(buf, logbuf_sz, "logbuf") == -1) { + if (mdb_vread(buf, sym.st_size, sym.st_value) == -1) { mdb_warn("failed to read 'logbuf'\n"); return (DCMD_ERR); } @@ -301,7 +300,8 @@ logbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) /* Empty */ return (DCMD_OK); - if (cp >= buf + logbuf_sz || strchr(cp + 1, '\0') >= buf + logbuf_sz) { + if (cp >= buf + sym.st_size || + strchr(cp + 1, '\0') >= buf + sym.st_size) { mdb_warn("'logbuf' is corrupt\n"); return (DCMD_ERR); } diff --git a/usr/src/cmd/svc/startd/graph.c b/usr/src/cmd/svc/startd/graph.c index 5be29451f1..5e84afe6fa 100644 --- a/usr/src/cmd/svc/startd/graph.c +++ b/usr/src/cmd/svc/startd/graph.c @@ -126,6 +126,14 @@ #define VERTEX_REMOVED 0 /* vertex has been freed */ #define VERTEX_INUSE 1 /* vertex is still in use */ +/* + * Services in these states are not considered 'down' by the + * milestone/shutdown code. + */ +#define up_state(state) ((state) == RESTARTER_STATE_ONLINE || \ + (state) == RESTARTER_STATE_DEGRADED || \ + (state) == RESTARTER_STATE_OFFLINE) + static uu_list_pool_t *graph_edge_pool, *graph_vertex_pool; static uu_list_t *dgraph; static pthread_mutex_t dgraph_lock; @@ -2973,6 +2981,14 @@ init_state: */ if (milestone == MILESTONE_NONE || !(v->gv_flags & GV_INSUBGRAPH)) { + /* + * This might seem unjustified after the milestone + * transition has completed (non_subgraph_svcs == 0), + * but it's important because when we boot to + * a milestone, we set the milestone before populating + * the graph, and all of the new non-subgraph services + * need to be disabled here. + */ switch (err = libscf_set_enable_ovr(inst, 0)) { case 0: break; @@ -3852,11 +3868,11 @@ dgraph_refresh_instance(graph_vertex_t *v, scf_instance_t *inst) } /* - * Returns 1 if any instances which directly depend on the passed instance - * (or it's service) are running. + * Returns true only if none of this service's dependents are 'up' -- online, + * degraded, or offline. */ static int -has_running_nonsubgraph_dependents(graph_vertex_t *v) +is_nonsubgraph_leaf(graph_vertex_t *v) { graph_vertex_t *vv; graph_edge_t *e; @@ -3869,71 +3885,69 @@ has_running_nonsubgraph_dependents(graph_vertex_t *v) vv = e->ge_vertex; if (vv->gv_type == GVT_INST) { - if (inst_running(vv) && - ((vv->gv_flags & GV_INSUBGRAPH) == 0)) - return (1); + if ((vv->gv_flags & GV_CONFIGURED) == 0) + continue; + + if (vv->gv_flags & GV_INSUBGRAPH) + continue; + + if (up_state(vv->gv_state)) + return (0); } else { /* * For dependency group or service vertices, keep * traversing to see if instances are running. */ - if (has_running_nonsubgraph_dependents(vv)) - return (1); + if (!is_nonsubgraph_leaf(vv)) + return (0); } } - return (0); + + return (1); } /* - * For the dependency, disable the instance which makes up the dependency if - * it is not in the subgraph and running. If the dependency instance is in - * the subgraph or it is not running, continue by disabling all of its - * non-subgraph dependencies. + * Disable v temporarily. Attempt to do this by setting its enabled override + * property in the repository. If that fails, send a _DISABLE command. + * Returns 0 on success and ECONNABORTED if the repository connection is + * broken. */ -static void -disable_nonsubgraph_dependencies(graph_vertex_t *v, void *arg) +static int +disable_service_temporarily(graph_vertex_t *v, scf_handle_t *h) { - int r; - scf_handle_t *h = (scf_handle_t *)arg; - scf_instance_t *inst = NULL; + const char * const emsg = "Could not temporarily disable %s because " + "%s. Will stop service anyways. Repository status for the " + "service may be inaccurate.\n"; + const char * const emsg_cbroken = + "the repository connection was broken"; - assert(PTHREAD_MUTEX_HELD(&dgraph_lock)); - - /* Continue recursing non-inst nodes */ - if (v->gv_type != GVT_INST) - goto recurse; - - /* - * For instances that are in the subgraph or already not running, - * skip and attempt to disable their non-dependencies. - */ - if ((v->gv_flags & GV_INSUBGRAPH) || (!inst_running(v))) - goto recurse; - - /* - * If not all this instance's dependents have stopped - * running, do not disable. - */ - if (has_running_nonsubgraph_dependents(v)) - return; + scf_instance_t *inst; + int r; inst = scf_instance_create(h); if (inst == NULL) { - log_error(LOG_WARNING, "Unable to gracefully disable instance:" - " %s due to lack of resources\n", v->gv_name); - goto disable; + char buf[100]; + + (void) snprintf(buf, sizeof (buf), + "scf_instance_create() failed (%s)", + scf_strerror(scf_error())); + log_error(LOG_WARNING, emsg, v->gv_name, buf); + + graph_enable_by_vertex(v, 0, 0); + return (0); } -again: + r = scf_handle_decode_fmri(h, v->gv_name, NULL, NULL, inst, NULL, NULL, SCF_DECODE_FMRI_EXACT); if (r != 0) { switch (scf_error()) { case SCF_ERROR_CONNECTION_BROKEN: - libscf_handle_rebind(h); - goto again; + log_error(LOG_WARNING, emsg, v->gv_name, emsg_cbroken); + graph_enable_by_vertex(v, 0, 0); + return (ECONNABORTED); case SCF_ERROR_NOT_FOUND: - goto recurse; + return (0); case SCF_ERROR_HANDLE_MISMATCH: case SCF_ERROR_INVALID_ARGUMENT: @@ -3944,33 +3958,84 @@ again: scf_error()); } } + r = libscf_set_enable_ovr(inst, 0); switch (r) { case 0: scf_instance_destroy(inst); - return; + return (0); + case ECANCELED: scf_instance_destroy(inst); - goto recurse; + return (0); + case ECONNABORTED: - libscf_handle_rebind(h); - goto again; + log_error(LOG_WARNING, emsg, v->gv_name, emsg_cbroken); + graph_enable_by_vertex(v, 0, 0); + return (ECONNABORTED); + case EPERM: + log_error(LOG_WARNING, emsg, v->gv_name, + "the repository denied permission"); + graph_enable_by_vertex(v, 0, 0); + return (0); + case EROFS: - log_error(LOG_WARNING, - "Could not set %s/%s for %s: %s.\n", - SCF_PG_GENERAL_OVR, SCF_PROPERTY_ENABLED, - v->gv_name, strerror(r)); - goto disable; + log_error(LOG_WARNING, emsg, v->gv_name, + "the repository is read-only"); + graph_enable_by_vertex(v, 0, 0); + return (0); + default: bad_error("libscf_set_enable_ovr", r); + /* NOTREACHED */ } -disable: - graph_enable_by_vertex(v, 0, 0); +} + +/* + * Of the transitive instance dependencies of v, disable those which are not + * in the subgraph and which are leaves (i.e., have no dependents which are + * "up"). + */ +static void +disable_nonsubgraph_leaves(graph_vertex_t *v, void *arg) +{ + assert(PTHREAD_MUTEX_HELD(&dgraph_lock)); + + /* If v isn't an instance, recurse on its dependencies. */ + if (v->gv_type != GVT_INST) + goto recurse; + + if ((v->gv_flags & GV_CONFIGURED) == 0) + /* + * Unconfigured instances should have no dependencies, but in + * case they ever get them, + */ + goto recurse; + + /* + * If v is in the subgraph, so should all of its dependencies, so do + * nothing. + */ + if (v->gv_flags & GV_INSUBGRAPH) + return; + + /* If v isn't a leaf because it's already down, recurse. */ + if (!up_state(v->gv_state)) + goto recurse; + + /* If v is disabled but not down yet, be patient. */ + if ((v->gv_flags & GV_ENABLED) == 0) + return; + + /* If v is a leaf, disable it. */ + if (is_nonsubgraph_leaf(v)) + (void) disable_service_temporarily(v, (scf_handle_t *)arg); + return; + recurse: - graph_walk_dependencies(v, disable_nonsubgraph_dependencies, - arg); + graph_walk_dependencies(v, disable_nonsubgraph_leaves, arg); } /* @@ -4001,6 +4066,8 @@ dgraph_set_instance_state(scf_handle_t *h, const char *inst_name, return (ENOENT); } + assert(v->gv_type == GVT_INST); + switch (state) { case RESTARTER_STATE_UNINIT: case RESTARTER_STATE_DISABLED: @@ -4028,23 +4095,45 @@ dgraph_set_instance_state(scf_handle_t *h, const char *inst_name, } /* - * If this is a service shutdown and we're in the middle of a subgraph - * shutdown, we need to check if either we're the last service to go - * and should kickoff system shutdown, or if we should disable other - * services. + * Handle state changes during milestone shutdown. See + * dgraph_set_milestone(). If the repository connection is broken, + * ECONNABORTED will be returned, though a _DISABLE command will be sent for + * the vertex anyway. */ -void +int vertex_subgraph_dependencies_shutdown(scf_handle_t *h, graph_vertex_t *v, - int was_running) + restarter_instance_state_t old_state) { - int up_or_down; + int was_up, now_up; + int ret = 0; + + assert(v->gv_type == GVT_INST); - up_or_down = was_running ^ inst_running(v); + /* Don't care if we're not going to a milestone. */ + if (milestone == NULL) + return (0); + + /* Don't care if we already finished coming down. */ + if (non_subgraph_svcs == 0) + return (0); - if (up_or_down && milestone != NULL && !inst_running(v) && - ((v->gv_flags & GV_INSUBGRAPH) == 0 || - milestone == MILESTONE_NONE)) { + /* Don't care if the service is in the subgraph. */ + if (v->gv_flags & GV_INSUBGRAPH) + return (0); + + /* + * Update non_subgraph_svcs. It is the number of non-subgraph + * services which are in online, degraded, or offline. + */ + + was_up = up_state(old_state); + now_up = up_state(v->gv_state); + + if (!was_up && now_up) { + ++non_subgraph_svcs; + } else if (was_up && !now_up) { --non_subgraph_svcs; + if (non_subgraph_svcs == 0) { if (halting != -1) { do_uadmin(); @@ -4052,11 +4141,37 @@ vertex_subgraph_dependencies_shutdown(scf_handle_t *h, graph_vertex_t *v, (void) startd_thread_create(single_user_thread, NULL); } - } else { - graph_walk_dependencies(v, - disable_nonsubgraph_dependencies, (void *)h); + return (0); + } + } + + /* If this service is a leaf, it should be disabled. */ + if ((v->gv_flags & GV_ENABLED) && is_nonsubgraph_leaf(v)) { + int r; + + r = disable_service_temporarily(v, h); + switch (r) { + case 0: + break; + + case ECONNABORTED: + ret = ECONNABORTED; + break; + + default: + bad_error("disable_service_temporarily", r); } } + + /* + * If the service just came down, propagate the disable to the newly + * exposed leaves. + */ + if (was_up && !now_up) + graph_walk_dependencies(v, disable_nonsubgraph_leaves, + (void *)h); + + return (ret); } /* @@ -4605,11 +4720,30 @@ mark_subgraph(graph_edge_t *e, void *arg) } /* - * "Restrict" the graph to dependencies of fmri. We implement it by walking - * all services, override-disabling those which are not descendents of the - * instance, and removing any enable-override for the rest. milestone is set - * to the vertex which represents fmri so that the other graph operations may - * act appropriately. + * Bring down all services which are not dependencies of fmri. The + * dependencies of fmri (direct & indirect) will constitute the "subgraph", + * and will have the GV_INSUBGRAPH flag set. The rest must be brought down, + * which means the state is "disabled", "maintenance", or "uninitialized". We + * could consider "offline" to be down, and refrain from sending start + * commands for such services, but that's not strictly necessary, so we'll + * decline to intrude on the state machine. It would probably confuse users + * anyway. + * + * The services should be brought down in reverse-dependency order, so we + * can't do it all at once here. We initiate by override-disabling the leaves + * of the dependency tree -- those services which are up but have no + * dependents which are up. When they come down, + * vertex_subgraph_dependencies_shutdown() will override-disable the newly + * exposed leaves. Perseverance will ensure completion. + * + * Sometimes we need to take action when the transition is complete, like + * start sulogin or halt the system. To tell when we're done, we initialize + * non_subgraph_svcs here to be the number of services which need to come + * down. As each does, we decrement the counter. When it hits zero, we take + * the appropriate action. See vertex_subgraph_dependencies_shutdown(). + * + * In case we're coming up, we also remove any enable-overrides for the + * services which are dependencies of fmri. * * If norepository is true, the function will not change the repository. * @@ -4762,10 +4896,20 @@ again: } else { assert(isnone || (v->gv_flags & GV_INSUBGRAPH) == 0); - if (inst_running(v)) + /* + * Services which are up need to come down before + * we're done, but we can only disable the leaves + * here. + */ + + if (up_state(v->gv_state)) ++non_subgraph_svcs; - if (has_running_nonsubgraph_dependents(v)) + /* If it's already disabled, don't bother. */ + if ((v->gv_flags & GV_ENABLED) == 0) + continue; + + if (!is_nonsubgraph_leaf(v)) continue; r = libscf_set_enable_ovr(inst, 0); diff --git a/usr/src/cmd/svc/startd/log.c b/usr/src/cmd/svc/startd/log.c index 80e46d2f40..46beb9a69a 100644 --- a/usr/src/cmd/svc/startd/log.c +++ b/usr/src/cmd/svc/startd/log.c @@ -133,8 +133,6 @@ static FILE *logfile = NULL; * null, go until the second, and then go back to the beginning until the * first null. Or use ::startd_log in mdb. */ -/* LINTED unused */ -static const size_t logbuf_sz = LOGBUF_SZ; /* For mdb */ static char logbuf[LOGBUF_SZ] = ""; static pthread_mutex_t logbuf_mutex = PTHREAD_MUTEX_INITIALIZER; #endif diff --git a/usr/src/cmd/svc/startd/startd.h b/usr/src/cmd/svc/startd/startd.h index c049785db0..2c6fe5650f 100644 --- a/usr/src/cmd/svc/startd/startd.h +++ b/usr/src/cmd/svc/startd/startd.h @@ -589,8 +589,8 @@ void graph_enable_by_vertex(graph_vertex_t *, int, int); int refresh_vertex(graph_vertex_t *, scf_instance_t *); void vertex_send_event(graph_vertex_t *, restarter_event_type_t); void graph_start_if_satisfied(graph_vertex_t *); -void vertex_subgraph_dependencies_shutdown(scf_handle_t *h, - graph_vertex_t *v, int was_running); +int vertex_subgraph_dependencies_shutdown(scf_handle_t *, graph_vertex_t *, + restarter_instance_state_t); void graph_transition_sulogin(restarter_instance_state_t, restarter_instance_state_t); void graph_transition_propagate(graph_vertex_t *, propagate_event_t, diff --git a/usr/src/cmd/svc/startd/transition.c b/usr/src/cmd/svc/startd/transition.c index 94a52e05d7..66fb7d8cdb 100644 --- a/usr/src/cmd/svc/startd/transition.c +++ b/usr/src/cmd/svc/startd/transition.c @@ -35,7 +35,7 @@ * All functions are called with dgraph_lock held. * * The start action for this state machine is not explicit. The states - * (ONLINE and DEGRADED) which needs to know when they're entering the state + * (ONLINE and DEGRADED) which need to know when they're entering the state * due to a daemon restart implement this understanding by checking for * transition from uninitialized. In the future, this would likely be better * as an explicit start action instead of relying on an overloaded transition. @@ -64,8 +64,6 @@ gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v, int err; scf_instance_t *inst; - vertex_subgraph_dependencies_shutdown(h, v, gt_running(old_state)); - /* Initialize instance by refreshing it. */ err = libscf_fmri_get_instance(h, v->gv_name, &inst); @@ -103,12 +101,11 @@ gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v, return (0); } +/* ARGSUSED */ static int gt_enter_maint(scf_handle_t *h, graph_vertex_t *v, restarter_instance_state_t old_state, restarter_error_t rerr) { - vertex_subgraph_dependencies_shutdown(h, v, gt_running(old_state)); - /* * If the service was running, propagate a stop event. If the * service was not running the maintenance transition may satisfy @@ -131,12 +128,11 @@ gt_enter_maint(scf_handle_t *h, graph_vertex_t *v, return (0); } +/* ARGSUSED */ static int gt_enter_offline(scf_handle_t *h, graph_vertex_t *v, restarter_instance_state_t old_state, restarter_error_t rerr) { - vertex_subgraph_dependencies_shutdown(h, v, gt_running(old_state)); - /* * If the instance should be enabled, see if we can start it. * Otherwise send a disable command. @@ -160,12 +156,11 @@ gt_enter_offline(scf_handle_t *h, graph_vertex_t *v, return (0); } +/* ARGSUSED */ static int gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v, restarter_instance_state_t old_state, restarter_error_t rerr) { - vertex_subgraph_dependencies_shutdown(h, v, gt_running(old_state)); - /* * If the instance should be disabled, no problem. Otherwise, * send an enable command, which should result in the instance @@ -300,7 +295,8 @@ int gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr, restarter_instance_state_t old_state) { - int err = 0; + int err; + int lost_repository = 0; /* * If there's a common set of work to be done on exit from the @@ -308,6 +304,19 @@ gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr, * now there's no such work, so there are no gt_exit functions. */ + err = vertex_subgraph_dependencies_shutdown(h, v, old_state); + switch (err) { + case 0: + break; + + case ECONNABORTED: + lost_repository = 1; + break; + + default: + bad_error("vertex_subgraph_dependencies_shutdown", err); + } + /* * Now call the appropriate gt_enter function for the new state. */ @@ -339,11 +348,28 @@ gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr, default: /* Shouldn't be in an invalid state. */ #ifndef NDEBUG - uu_warn("%s:%d: Uncaught case %d.\n", __FILE__, __LINE__, + uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__, v->gv_state); #endif abort(); } - return (err); + switch (err) { + case 0: + break; + + case ECONNABORTED: + lost_repository = 1; + break; + + default: +#ifndef NDEBUG + uu_warn("%s:%d: " + "gt_enter_%s() failed with unexpected error %d.\n", + __FILE__, __LINE__, instance_state_str[v->gv_state], err); +#endif + abort(); + } + + return (lost_repository ? ECONNABORTED : 0); } |