diff options
Diffstat (limited to 'usr/src/cmd/rcm_daemon')
26 files changed, 24947 insertions, 0 deletions
diff --git a/usr/src/cmd/rcm_daemon/Makefile b/usr/src/cmd/rcm_daemon/Makefile new file mode 100644 index 0000000000..6145ba954b --- /dev/null +++ b/usr/src/cmd/rcm_daemon/Makefile @@ -0,0 +1,78 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright (c) 1999-2001 by Sun Microsystems, Inc. +# All rights reserved. +# +# cmd/rcm_daemon/Makefile +# + +include ../Makefile.cmd + +# +# One for each ISA. +# +SUBDIRS= $(MACH) + +all := TARGET= all +install := TARGET= install +clean := TARGET= clean +clobber := TARGET= clobber +_msg := TARGET= _msg +lint := TARGET= lint +install_h := TARGET= install_h + +#definitions for install_h target +HDRS= rcm_module.h +ROOTHDRDIR= $(ROOT)/usr/include +ROOTHDRS= $(HDRS:%=$(ROOTHDRDIR)/%) +CHECKHDRS= $(HDRS:%.h=common/%.check) +$(ROOTHDRS) := FILEMODE = 644 + +.KEEP_STATE: + +install: install_h $(SUBDIRS) + +all lint _msg: install_h $(SUBDIRS) +clean clobber: $(SUBDIRS) + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +install_h: $(ROOTHDRS) + +check: $(CHECKHDRS) + +$(ROOTHDRDIR)/%: common/% + $(INS.file) + +$(ROOTHDRDIR): + $(INS.dir) + +common/%.check: common/%.h + $(DOT_H_CHECK) + +FRC: + +#include ../Makefile.targ diff --git a/usr/src/cmd/rcm_daemon/Makefile.com b/usr/src/cmd/rcm_daemon/Makefile.com new file mode 100644 index 0000000000..8dd9114270 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/Makefile.com @@ -0,0 +1,205 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# ident "%Z%%M% %I% %E% SMI" +# + +include ../../Makefile.cmd + +COMMON = ../common +CMDINETCOMMON = $(SRC)/cmd/cmd-inet/common + +RCM_SRC = \ + $(COMMON)/rcm_event.c \ + $(COMMON)/rcm_main.c \ + $(COMMON)/rcm_impl.c \ + $(COMMON)/rcm_subr.c \ + $(COMMON)/rcm_lock.c \ + $(COMMON)/rcm_script.c + +RCM_OBJ = \ + rcm_event.o \ + rcm_main.o \ + rcm_impl.o \ + rcm_subr.o \ + rcm_lock.o \ + rcm_script.o + +COMMON_MOD_SRC = \ + $(COMMON)/filesys_rcm.c \ + $(COMMON)/dump_rcm.c \ + $(COMMON)/swap_rcm.c \ + $(COMMON)/network_rcm.c \ + $(COMMON)/ip_rcm.c \ + $(COMMON)/cluster_rcm.c \ + $(COMMON)/pool_rcm.c \ + $(COMMON)/mpxio_rcm.c \ + $(COMMON)/ip_anon_rcm.c \ + $(COMMON)/svm_rcm.c \ + $(COMMON)/mac_rcm.c + +sparc_MOD_SRC = $(COMMON)/ttymux_rcm.c + +COMMON_MOD_OBJ = \ + filesys_rcm.o \ + dump_rcm.o \ + swap_rcm.o \ + network_rcm.o \ + ip_rcm.o \ + cluster_rcm.o \ + pool_rcm.o \ + mpxio_rcm.o \ + ip_anon_rcm.o \ + svm_rcm.o \ + mac_rcm.o + +sparc_MOD_OBJ = ttymux_rcm.o + +RCM_DAEMON = rcm_daemon + +COMMON_RCM_MODS = \ + SUNW_filesys_rcm.so \ + SUNW_dump_rcm.so \ + SUNW_swap_rcm.so \ + SUNW_network_rcm.so \ + SUNW_ip_rcm.so \ + SUNW_cluster_rcm.so \ + SUNW_pool_rcm.so \ + SUNW_mpxio_rcm.so \ + SUNW_ip_anon_rcm.so \ + SUNW_svm_rcm.so \ + SUNW_mac_rcm.so + +sparc_RCM_MODS = SUNW_ttymux_rcm.so + +RCM_DIR = rcm +MOD_DIR = modules +SCRIPT_DIR = scripts + +CLOBBERFILES = $(COMMON_RCM_MODS) $($(MACH)_RCM_MODS) $(RCM_DAEMON) + +LINT_MODULES = $(COMMON_MOD_SRC:.c=.ln) $($(MACH)_MOD_SRC:.c=.ln) + +CPPFLAGS += -I.. -I $(CMDINETCOMMON) +CPPFLAGS += -D_POSIX_PTHREAD_SEMANTICS -D_REENTRANT +CFLAGS += $(CCVERBOSE) $(C_PICFLAGS) + +LINTFLAGS += -u -erroff=E_FUNC_ARG_UNUSED + +LDLIBS_MODULES = +SUNW_pool_rcm.so := LDLIBS_MODULES += -L$(ROOT)/usr/lib -lpool +SUNW_svm_rcm.so := LDLIBS_MODULES += -L$(ROOT)/usr/lib -lmeta +SUNW_mac_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm -llaadm +SUNW_ip_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm -ldlpi +SUNW_network_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldlpi + +LDLIBS += -lgen -lelf -lcmd -lrcm -lnvpair -ldevinfo -lnsl -lsocket -lrt + +SRCS = $(RCM_SRC) $(COMMON_MOD_SRC) + +POFILES = $(SRCS:.c=.po) +POFILE = prcm_daemon.po + +# install specifics + +ROOTLIB_RCM = $(ROOTLIB)/$(RCM_DIR) +ROOTLIB_RCM_MOD = $(ROOTLIB_RCM)/$(MOD_DIR) +ROOTLIB_RCM_DAEMON = $(RCM_DAEMON:%=$(ROOTLIB_RCM)/%) +ROOTLIB_RCM_MODULES = $(COMMON_RCM_MODS:%=$(ROOTLIB_RCM_MOD)/%) \ + $($(MACH)_RCM_MODS:%=$(ROOTLIB_RCM_MOD)/%) +ROOTLIB_RCM_SCRIPT = $(ROOTLIB_RCM)/$(SCRIPT_DIR) +ROOTETC_RCM = $(ROOTETC)/$(RCM_DIR) +ROOTETC_RCM_SCRIPT = $(ROOTETC_RCM)/$(SCRIPT_DIR) + +all := TARGET= all +install := TARGET= install +clean := TARGET= clean +clobber := TARGET= clobber +lint := TARGET= lint + +$(ROOTETC_RCM) := GROUP = sys +$(ROOTETC_RCM_SCRIPT) := GROUP = sys + +.KEEP_STATE: + +all: $(RCM_DAEMON) $(COMMON_RCM_MODS) $($(MACH)_RCM_MODS) + +install: all \ + $(ROOTLIB_RCM) \ + $(ROOTLIB_RCM_DAEMON) \ + $(ROOTLIB_RCM_MOD) \ + $(ROOTLIB_RCM_MODULES) \ + $(ROOTLIB_RCM_SCRIPT) \ + $(ROOTETC_RCM) \ + $(ROOTETC_RCM_SCRIPT) + +clean: + $(RM) $(RCM_OBJ) $(COMMON_MOD_OBJ) $($(MACH)_MOD_OBJ) $(POFILES) + +lint: $(RCM_DAEMON).ln $(LINT_MODULES) + +$(RCM_DAEMON).ln: FRC + $(LINT.c) $(RCM_SRC) $(LDLIBS) + +%.ln: FRC + $(LINT.c) $(RCM_SRC) $(@:.ln=.c) $(LDLIBS) + +FRC: + +include ../../Makefile.targ + +$(POFILE): $(POFILES) + $(RM) $@; cat $(POFILES) > $@ + +$(RCM_DAEMON): $(RCM_OBJ) + $(LINK.c) -o $@ $< $(RCM_OBJ) $(LDLIBS) + $(POST_PROCESS) + +SUNW_%.so: %.o + $(LINK.c) -o $@ $(GSHARED) -h $@ $< $(LDLIBS_MODULES) + +%.o: $(COMMON)/%.c + $(COMPILE.c) -o $@ $< + +$(ROOTLIB_RCM): + $(INS.dir) + +$(ROOTLIB_RCM)/%: % + $(INS.file) + +$(ROOTLIB_RCM_MOD): + $(INS.dir) + +$(ROOTLIB_RCM_MOD)/%: % + $(INS.file) + +$(ROOTLIB_RCM_SCRIPT): + $(INS.dir) + +$(ROOTETC_RCM): + $(INS.dir) + +$(ROOTETC_RCM_SCRIPT): + $(INS.dir) diff --git a/usr/src/cmd/rcm_daemon/common/cluster_rcm.c b/usr/src/cmd/rcm_daemon/common/cluster_rcm.c new file mode 100644 index 0000000000..1d79d93f8e --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/cluster_rcm.c @@ -0,0 +1,220 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2001, 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * RCM module for managing the OS Quiesce event (SUNW_OS) in a + * clustered environment. + */ + +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <thread.h> +#include <synch.h> +#include <assert.h> +#include <errno.h> +#include <libintl.h> +#include <sys/param.h> +#include <sys/wait.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/cladm.h> +#include "rcm_module.h" + +#define SUNW_OS "SUNW_OS" +#define OS_USAGE gettext("Sun Cluster") +#define OS_SUSPEND_ERR gettext("OS cannot be quiesced on clustered nodes") +#define OS_OFFLINE_ERR gettext("Invalid operation: OS cannot be offlined") +#define OS_REMOVE_ERR gettext("Invalid operation: OS cannot be removed") + +static int cluster_register(rcm_handle_t *); +static int cluster_unregister(rcm_handle_t *); +static int cluster_getinfo(rcm_handle_t *, char *, id_t, uint_t, + char **, char **, nvlist_t *, rcm_info_t **); +static int cluster_suspend(rcm_handle_t *, char *, id_t, + timespec_t *, uint_t, char **, rcm_info_t **); +static int cluster_resume(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int cluster_offline(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int cluster_online(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int cluster_remove(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); + +static int cluster_SUNW_os_registered = 0; + +static struct rcm_mod_ops cluster_ops = +{ + RCM_MOD_OPS_VERSION, + cluster_register, + cluster_unregister, + cluster_getinfo, + cluster_suspend, + cluster_resume, + cluster_offline, + cluster_online, + cluster_remove, + NULL, + NULL, + NULL +}; + +struct rcm_mod_ops * +rcm_mod_init() +{ + return (&cluster_ops); +} + +const char * +rcm_mod_info() +{ + return (gettext("RCM Cluster module %I%")); +} + +int +rcm_mod_fini() +{ + return (RCM_SUCCESS); +} + +static int +cluster_register(rcm_handle_t *hdl) +{ + int bootflags; + + if (cluster_SUNW_os_registered) + return (RCM_SUCCESS); + + if (_cladm(CL_INITIALIZE, CL_GET_BOOTFLAG, &bootflags) != 0) { + rcm_log_message(RCM_ERROR, + gettext("unable to check cluster status\n")); + return (RCM_FAILURE); + } + + /* attempt to determine if we are in cluster mode */ + + if (bootflags & CLUSTER_BOOTED) { + if (rcm_register_interest(hdl, SUNW_OS, 0, NULL) != + RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + gettext("failed to register\n")); + return (RCM_FAILURE); + } else { + cluster_SUNW_os_registered = 1; + rcm_log_message(RCM_DEBUG, "registered " SUNW_OS + "\n"); + } + } + + return (RCM_SUCCESS); +} + +static int +cluster_unregister(rcm_handle_t *hdl) +{ + + if (cluster_SUNW_os_registered) { + if (rcm_unregister_interest(hdl, SUNW_OS, 0) != + RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + gettext("failed to unregister")); + } + cluster_SUNW_os_registered = 0; + } + return (RCM_SUCCESS); +} + +/*ARGSUSED*/ +static int +cluster_getinfo(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **infostr, char **errstr, nvlist_t *props, rcm_info_t **dependent) +{ + + assert(rsrcname != NULL && infostr != NULL); + + if ((*infostr = strdup(OS_USAGE)) == NULL) + rcm_log_message(RCM_ERROR, gettext("strdup failure\n")); + + return (RCM_SUCCESS); +} + +/*ARGSUSED*/ +static int +cluster_suspend(rcm_handle_t *hdl, char *rsrcname, id_t id, + timespec_t *interval, uint_t flags, char **errstr, + rcm_info_t **dependent) +{ + if ((*errstr = strdup(OS_SUSPEND_ERR)) == NULL) + rcm_log_message(RCM_ERROR, gettext("strdup failure\n")); + + return (RCM_FAILURE); +} + +/*ARGSUSED*/ +static int +cluster_resume(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + return (RCM_SUCCESS); +} + +/* + * By default, reject offline. If offline request is + * forced, attempt to relocate the cluster device. + */ +/*ARGSUSED*/ +static int +cluster_offline(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + if ((*errstr = strdup(OS_OFFLINE_ERR)) == NULL) + rcm_log_message(RCM_ERROR, gettext("strdup failure\n")); + + return (RCM_FAILURE); +} + +/*ARGSUSED*/ +static int +cluster_online(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + return (RCM_SUCCESS); +} + +/*ARGSUSED*/ +static int +cluster_remove(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + if ((*errstr = strdup(OS_REMOVE_ERR)) == NULL) + rcm_log_message(RCM_ERROR, gettext("strdup failure\n")); + + return (RCM_FAILURE); +} diff --git a/usr/src/cmd/rcm_daemon/common/dump_rcm.c b/usr/src/cmd/rcm_daemon/common/dump_rcm.c new file mode 100644 index 0000000000..d2adbf76e4 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/dump_rcm.c @@ -0,0 +1,608 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2000 by Sun Microsystems, Inc. + * All rights reserved. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * RCM module for managing dump device during dynamic + * reconfiguration. + */ +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <thread.h> +#include <synch.h> +#include <assert.h> +#include <errno.h> +#include <libintl.h> +#include <sys/dumpadm.h> +#include <sys/param.h> +#include <sys/wait.h> +#include <sys/types.h> +#include <sys/stat.h> +#include "rcm_module.h" + +/* cache flags */ +#define DUMP_CACHE_NEW 0x01 +#define DUMP_CACHE_STALE 0x02 +#define DUMP_CACHE_OFFLINED 0x04 + +#define DUMPADM "/usr/sbin/dumpadm -d " +#define DUMPADM_SWAP DUMPADM"swap" + +typedef struct dump_conf { + char device[MAXPATHLEN]; + int conf_flags; /* defs in <sys/dumpadm.h> */ + int cache_flags; + struct dump_conf *next; + struct dump_conf *prev; +} dump_conf_t; + +/* + * Registration cache. + * + * N.B. Although we currently only support a single + * dump device, the cache is multi-entry since there + * may be multiple outstanding registrations. + */ +static dump_conf_t *cache; +static mutex_t cache_lock; + +static int dump_register(rcm_handle_t *); +static int dump_unregister(rcm_handle_t *); +static int dump_getinfo(rcm_handle_t *, char *, id_t, uint_t, + char **, char **, nvlist_t *, rcm_info_t **); +static int dump_suspend(rcm_handle_t *, char *, id_t, timespec_t *, + uint_t, char **, rcm_info_t **); +static int dump_resume(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int dump_offline(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int dump_online(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int dump_remove(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); + +static int alloc_usage(char **, int); +static void cache_insert(dump_conf_t *); +static dump_conf_t *cache_lookup(char *); +static void cache_remove(dump_conf_t *); +static dump_conf_t *dump_conf_alloc(void); +static int dump_configure(dump_conf_t *, char **); +static int dump_relocate(dump_conf_t *, char **); +static void free_cache(void); +static void log_cmd_status(int); +static int update_cache(rcm_handle_t *); + +static struct rcm_mod_ops dump_ops = +{ + RCM_MOD_OPS_VERSION, + dump_register, + dump_unregister, + dump_getinfo, + dump_suspend, + dump_resume, + dump_offline, + dump_online, + dump_remove, + NULL, + NULL, + NULL +}; + +struct rcm_mod_ops * +rcm_mod_init() +{ + return (&dump_ops); +} + +const char * +rcm_mod_info() +{ + return ("RCM Dump module %I%"); +} + +int +rcm_mod_fini() +{ + free_cache(); + (void) mutex_destroy(&cache_lock); + + return (RCM_SUCCESS); +} + +static int +dump_register(rcm_handle_t *hdl) +{ + return (update_cache(hdl)); +} + +static int +dump_unregister(rcm_handle_t *hdl) +{ + dump_conf_t *dc; + + (void) mutex_lock(&cache_lock); + while ((dc = cache) != NULL) { + cache = cache->next; + (void) rcm_unregister_interest(hdl, dc->device, 0); + free(dc); + } + (void) mutex_unlock(&cache_lock); + + return (RCM_SUCCESS); +} + +/*ARGSUSED*/ +static int +dump_getinfo(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **infostr, char **errstr, nvlist_t *props, rcm_info_t **dependent) +{ + dump_conf_t *dc; + int conf_flags; + + assert(rsrcname != NULL && infostr != NULL); + + (void) mutex_lock(&cache_lock); + if ((dc = cache_lookup(rsrcname)) == NULL) { + (void) mutex_unlock(&cache_lock); + rcm_log_message(RCM_ERROR, "unknown resource: %s\n", + rsrcname); + return (RCM_FAILURE); + } + conf_flags = dc->conf_flags; + (void) mutex_unlock(&cache_lock); + + return ((alloc_usage(infostr, conf_flags) == 0) ? + RCM_SUCCESS : RCM_FAILURE); +} + +/* + * Relocate dump device to maintain availability during suspension. + * Fail request if unable to relocate. + */ +/*ARGSUSED*/ +static int +dump_suspend(rcm_handle_t *hdl, char *rsrcname, id_t id, timespec_t *interval, + uint_t flags, char **errstr, rcm_info_t **dependent) +{ + dump_conf_t *dc; + int rv; + + assert(rsrcname != NULL && errstr != NULL); + + if (flags & RCM_QUERY) + return (RCM_SUCCESS); + + (void) mutex_lock(&cache_lock); + if ((dc = cache_lookup(rsrcname)) == NULL) { + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + rv = dump_relocate(dc, errstr); + (void) mutex_unlock(&cache_lock); + + return (rv); +} + +/*ARGSUSED*/ +static int +dump_resume(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + dump_conf_t *dc; + int rv; + + assert(rsrcname != NULL && errstr != NULL); + + (void) mutex_lock(&cache_lock); + if ((dc = cache_lookup(rsrcname)) == NULL) { + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + rv = dump_configure(dc, errstr); + (void) mutex_unlock(&cache_lock); + + return (rv); +} + +/* + * By default, reject offline. If offline request is + * forced, attempt to relocate the dump device. + */ +/*ARGSUSED*/ +static int +dump_offline(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + dump_conf_t *dc; + int conf_flags; + int rv; + + assert(rsrcname != NULL && errstr != NULL); + + if ((flags & RCM_FORCE) && (flags & RCM_QUERY)) + return (RCM_SUCCESS); + + (void) mutex_lock(&cache_lock); + if ((dc = cache_lookup(rsrcname)) == NULL) { + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + if (flags & RCM_FORCE) { + rv = dump_relocate(dc, errstr); + (void) mutex_unlock(&cache_lock); + return (rv); + } + + /* default */ + conf_flags = dc->conf_flags; + (void) mutex_unlock(&cache_lock); + (void) alloc_usage(errstr, conf_flags); + + return (RCM_FAILURE); +} + +/*ARGSUSED*/ +static int +dump_online(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + dump_conf_t *dc; + int rv; + + assert(rsrcname != NULL && errstr != NULL); + + (void) mutex_lock(&cache_lock); + if ((dc = cache_lookup(rsrcname)) == NULL) { + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + rv = dump_configure(dc, errstr); + (void) mutex_unlock(&cache_lock); + + return (rv); +} + +/*ARGSUSED*/ +static int +dump_remove(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + dump_conf_t *dc; + + assert(rsrcname != NULL && errstr != NULL); + + (void) mutex_lock(&cache_lock); + if ((dc = cache_lookup(rsrcname)) == NULL) { + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + cache_remove(dc); + free(dc); + (void) mutex_unlock(&cache_lock); + + return (RCM_SUCCESS); +} + +/* + * For dedicated dump devices, invoke dumpadm(1M) + * to relocate dump to swap. For dump device on + * swap, this is a no-op as the RCM swap module + * will relocate by invoking swap(1M). + * + * Call with cache_lock held. + */ +static int +dump_relocate(dump_conf_t *dc, char **errstr) +{ + int stat; + + /* + * This state may get out of sync for a dump device on swap, + * since we will will not know if the swap module succeeds. + * Worst case is we end up invoking dumpadm to configure + * the same device during a rollback. + */ + dc->cache_flags |= DUMP_CACHE_OFFLINED; + + /* RCM swap module will handle non-dedicated */ + if (!(dc->conf_flags & DUMP_EXCL)) + return (RCM_SUCCESS); + + rcm_log_message(RCM_TRACE1, "%s\n", DUMPADM_SWAP); + if ((stat = rcm_exec_cmd(DUMPADM_SWAP)) != 0) { + log_cmd_status(stat); + *errstr = strdup(gettext("unable to relocate dump device")); + dc->cache_flags &= ~DUMP_CACHE_OFFLINED; + return (RCM_FAILURE); + } + + return (RCM_SUCCESS); +} + +/* + * (Re)Configure dump device. + * Call with cache_lock held. + */ +static int +dump_configure(dump_conf_t *dc, char **errstr) +{ + char cmd[sizeof (DUMPADM) + MAXPATHLEN]; + int stat; + + assert(dc != NULL && dc->device != NULL); + + /* minor optimization */ + if (!(dc->cache_flags & DUMP_CACHE_OFFLINED)) + return (RCM_SUCCESS); + + (void) snprintf(cmd, sizeof (cmd), "%s%s", DUMPADM, dc->device); + rcm_log_message(RCM_TRACE1, "%s\n", cmd); + if ((stat = rcm_exec_cmd(cmd)) != 0) { + log_cmd_status(stat); + *errstr = strdup(gettext("unable to configure dump device")); + return (RCM_FAILURE); + } + dc->cache_flags &= ~DUMP_CACHE_OFFLINED; + + return (RCM_SUCCESS); +} + +/* + * Returns current dump configuration + */ +static dump_conf_t * +dump_conf_alloc(void) +{ + dump_conf_t *dc; + struct stat sbuf; + int fd; + char *err; + + if ((dc = calloc(1, sizeof (*dc))) == NULL) { + rcm_log_message(RCM_ERROR, "calloc failure\n"); + return (NULL); + } + + if ((fd = open("/dev/dump", O_RDONLY)) == -1) { + /* + * Suppress reporting if no logical link. + */ + if (stat("/dev/dump", &sbuf) == 0 && + (fd = open("/dev/dump", O_RDONLY)) == -1) { + rcm_log_message(RCM_ERROR, + "failed to open /dev/dump: %s\n", + ((err = strerror(errno)) == NULL) ? "" : err); + } + + if (fd == -1) { + free(dc); + return (NULL); + } + } + + if (ioctl(fd, DIOCGETDEV, dc->device) == -1) { + if (errno == ENODEV) { + dc->device[0] = '\0'; + } else { + rcm_log_message(RCM_ERROR, "ioctl: %s\n", + ((err = strerror(errno)) == NULL) ? "" : err); + (void) close(fd); + free(dc); + return (NULL); + } + } + + if (dc->device[0] != '\0') { + if ((dc->conf_flags = ioctl(fd, DIOCGETCONF, 0)) == -1) { + rcm_log_message(RCM_ERROR, "ioctl: %s\n", + ((err = strerror(errno)) == NULL) ? "" : err); + (void) close(fd); + free(dc); + return (NULL); + } + } + (void) close(fd); + + return (dc); +} + +static int +update_cache(rcm_handle_t *hdl) +{ + dump_conf_t *ent, *curr_dump, *tmp; + int rv = RCM_SUCCESS; + + if ((curr_dump = dump_conf_alloc()) == NULL) + return (RCM_FAILURE); + + (void) mutex_lock(&cache_lock); + + /* + * pass 1 - mark all current registrations stale + */ + for (ent = cache; ent != NULL; ent = ent->next) { + ent->cache_flags |= DUMP_CACHE_STALE; + } + + /* + * update current dump conf + */ + if (curr_dump->device[0] == '\0') { + free(curr_dump); + } else if ((ent = cache_lookup(curr_dump->device)) != NULL) { + ent->cache_flags &= ~DUMP_CACHE_STALE; + ent->conf_flags = curr_dump->conf_flags; + free(curr_dump); + } else { + curr_dump->cache_flags |= DUMP_CACHE_NEW; + cache_insert(curr_dump); + } + + /* + * pass 2 - register, unregister, or no-op based on cache flags + */ + ent = cache; + while (ent != NULL) { + if (ent->cache_flags & DUMP_CACHE_OFFLINED) { + ent = ent->next; + continue; + } + + if (ent->cache_flags & DUMP_CACHE_STALE) { + if (rcm_unregister_interest(hdl, ent->device, 0) != + RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, "failed to " + "unregister %s\n", ent->device); + } + tmp = ent; + ent = ent->next; + cache_remove(tmp); + free(tmp); + continue; + } + + if (!(ent->cache_flags & DUMP_CACHE_NEW)) { + ent = ent->next; + continue; + } + + if (rcm_register_interest(hdl, ent->device, 0, NULL) != + RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, "failed to register " + "%s\n", ent->device); + rv = RCM_FAILURE; + } else { + rcm_log_message(RCM_DEBUG, "registered %s\n", + ent->device); + ent->cache_flags &= ~DUMP_CACHE_NEW; + } + ent = ent->next; + } + (void) mutex_unlock(&cache_lock); + + return (rv); +} + +/* + * Call with cache_lock held. + */ +static dump_conf_t * +cache_lookup(char *rsrc) +{ + dump_conf_t *dc; + + for (dc = cache; dc != NULL; dc = dc->next) { + if (strcmp(rsrc, dc->device) == 0) { + return (dc); + } + } + return (NULL); +} + +/* + * Link to front of list. + * Call with cache_lock held. + */ +static void +cache_insert(dump_conf_t *ent) +{ + ent->next = cache; + if (ent->next) + ent->next->prev = ent; + ent->prev = NULL; + cache = ent; +} + +/* + * Call with cache_lock held. + */ +static void +cache_remove(dump_conf_t *ent) +{ + if (ent->next != NULL) { + ent->next->prev = ent->prev; + } + if (ent->prev != NULL) { + ent->prev->next = ent->next; + } else { + cache = ent->next; + } + ent->next = NULL; + ent->prev = NULL; +} + +static void +free_cache(void) +{ + dump_conf_t *dc; + + (void) mutex_lock(&cache_lock); + while ((dc = cache) != NULL) { + cache = cache->next; + free(dc); + } + (void) mutex_unlock(&cache_lock); +} + +static int +alloc_usage(char **cpp, int conf_flags) +{ + /* simplifies message translation */ + if (conf_flags & DUMP_EXCL) { + *cpp = strdup(gettext("dump device (dedicated)")); + } else { + *cpp = strdup(gettext("dump device (swap)")); + } + + if (*cpp == NULL) { + rcm_log_message(RCM_ERROR, "strdup failure\n"); + return (-1); + } + return (0); +} + +static void +log_cmd_status(int stat) +{ + char *err; + + if (stat == -1) { + rcm_log_message(RCM_ERROR, "wait: %s\n", + ((err = strerror(errno)) == NULL) ? "" : err); + } else if (WIFEXITED(stat)) { + rcm_log_message(RCM_ERROR, "exit status: %d\n", + WEXITSTATUS(stat)); + } else { + rcm_log_message(RCM_ERROR, "wait status: %d\n", stat); + } +} diff --git a/usr/src/cmd/rcm_daemon/common/filesys_rcm.c b/usr/src/cmd/rcm_daemon/common/filesys_rcm.c new file mode 100644 index 0000000000..2103ba99ae --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/filesys_rcm.c @@ -0,0 +1,1203 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This module adds support to the RCM framework for mounted filesystems. + * + * The module provides this functionality: + * 1) reports device usage for mounted filesystems + * 2) prevents offline operations for mounted resources + * 3) prevents suspend operations (unless forced) of those filesystems + * deemed critical for the continued operation of the OS + * 4) propagates RCM operations from mounted resources to the consumers + * of files within the mounted filesystems + */ + +#include <stdio.h> +#include <assert.h> +#include <string.h> +#include <synch.h> +#include <libintl.h> +#include <errno.h> +#include <sys/mnttab.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/utssys.h> + +#include "rcm_module.h" + +/* Definitions */ + +#define HASH_DEFAULT 4 +#define HASH_THRESHOLD 256 + +#define OPT_IGNORE "ignore" + +#define MSG_HDR_STD gettext("mounted filesystem") +#define MSG_HDR_STD_MULTI gettext("mounted filesystems") +#define MSG_HDR_CRIT gettext("cannot suspend filesystem") +#define MSG_HDR_CRIT_MULTI gettext("cannot suspend filesystems") +#define MSG_SEPARATOR gettext(", ") +#define MSG_FAIL_USAGE gettext("failed to construct usage string.") +#define MSG_FAIL_DEPENDENTS gettext("failed while calling dependents.") +#define MSG_FAIL_REMOVE gettext("filesystems cannot be removed.") +#define MSG_FAIL_INTERNAL gettext("internal processing failure.") + +typedef struct hashentry { + int n_mounts; + char *special; + char **mountps; + struct hashentry *next; +} hashentry_t; + +typedef struct { + time_t timestamp; + uint32_t hash_size; + hashentry_t **mounts; +} cache_t; + +/* Forward Declarations */ + +/* module interface routines */ +static int mnt_register(rcm_handle_t *); +static int mnt_unregister(rcm_handle_t *); +static int mnt_getinfo(rcm_handle_t *, char *, id_t, uint_t, char **, char **, + nvlist_t *, rcm_info_t **); +static int mnt_suspend(rcm_handle_t *, char *, id_t, timespec_t *, + uint_t, char **, rcm_info_t **); +static int mnt_resume(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int mnt_offline(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int mnt_online(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int mnt_remove(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); + +/* cache functions */ +static cache_t *cache_create(); +static int cache_insert(cache_t *, struct mnttab *); +static int cache_sync(rcm_handle_t *, cache_t **); +static hashentry_t *cache_lookup(cache_t *, char *); +static void free_cache(cache_t **); +static void free_entry(hashentry_t **); +static void free_list(char **); + +/* miscellaneous functions */ +static uint32_t hash(uint32_t, char *); +static void register_rsrc(rcm_handle_t *, char *); +static void unregister_rsrc(rcm_handle_t *, char *); +static char *create_message(char *, char *, char **); +static int detect_critical_failure(char **, uint_t, char **); +static int is_critical(char *); +static int use_cache(char *, char **, char ***); +static void prune_dependents(char **, char *); +static char **create_dependents(hashentry_t *); + +/* Module-Private data */ + +static struct rcm_mod_ops mnt_ops = +{ + RCM_MOD_OPS_VERSION, + mnt_register, + mnt_unregister, + mnt_getinfo, + mnt_suspend, + mnt_resume, + mnt_offline, + mnt_online, + mnt_remove +}; + +static cache_t *mnt_cache; +static mutex_t cache_lock; + +/* Module Interface Routines */ + +/* + * rcm_mod_init() + * + * Called when module is loaded. Returns the ops vector. + */ +struct rcm_mod_ops * +rcm_mod_init() +{ + return (&mnt_ops); +} + +/* + * rcm_mod_info() + * + * Returns a string identifying this module. + */ +const char * +rcm_mod_info() +{ + return ("File system module %I%"); +} + +/* + * rcm_mod_fini() + * + * Called when module is unloaded. Frees up all used memory. + * + * Locking: the cache is locked for the duration of this function. + */ +int +rcm_mod_fini() +{ + (void) mutex_lock(&cache_lock); + free_cache(&mnt_cache); + (void) mutex_unlock(&cache_lock); + + return (RCM_SUCCESS); +} + +/* + * mnt_register() + * + * Called to synchronize the module's registrations. Results in the + * construction of a new cache, destruction of any old cache data, + * and a full synchronization of the module's registrations. + * + * Locking: the cache is locked for the duration of this function. + */ +int +mnt_register(rcm_handle_t *hd) +{ + assert(hd != NULL); + + rcm_log_message(RCM_TRACE1, "FILESYS: register()\n"); + + (void) mutex_lock(&cache_lock); + + /* cache_sync() does all of the necessary work */ + if (cache_sync(hd, &mnt_cache) < 0) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed to synchronize cache (%s).\n", + strerror(errno)); + (void) mutex_unlock(&cache_lock); + return (RCM_FAILURE); + } + + (void) mutex_unlock(&cache_lock); + + return (RCM_SUCCESS); +} + +/* + * mnt_unregister() + * + * Manually walk through the cache, unregistering all the special + * files and mount points. + * + * Locking: the cache is locked throughout the execution of this + * routine because it reads and modifies cache links continuously. + */ +int +mnt_unregister(rcm_handle_t *hd) +{ + uint32_t index; + hashentry_t *entry; + + assert(hd != NULL); + + rcm_log_message(RCM_TRACE1, "FILESYS: unregister()\n"); + + (void) mutex_lock(&cache_lock); + + /* Unregister everything in the cache */ + if (mnt_cache) { + for (index = 0; index < mnt_cache->hash_size; index++) { + for (entry = mnt_cache->mounts[index]; entry != NULL; + entry = entry->next) { + unregister_rsrc(hd, entry->special); + } + } + } + + /* Destroy the cache */ + free_cache(&mnt_cache); + + (void) mutex_unlock(&cache_lock); + + return (RCM_SUCCESS); +} + +/* + * mnt_offline() + * + * Filesystem resources cannot be offlined. Always returns failure. + * Since no real action is taken, QUERY or not doesn't matter. + */ +int +mnt_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **errorp, rcm_info_t **dependent_info) +{ + char **dependents; + + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(errorp != NULL); + + rcm_log_message(RCM_TRACE1, "FILESYS: offline(%s)\n", rsrc); + + /* Retrieve necessary info from the cache */ + if (use_cache(rsrc, errorp, &dependents) < 0) + return (RCM_FAILURE); + + /* Convert the gathered dependents into an error message */ + *errorp = create_message(MSG_HDR_STD, MSG_HDR_STD_MULTI, dependents); + if (*errorp == NULL) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed to construct offline message (%s).\n", + strerror(errno)); + } + free_list(dependents); + + return (RCM_FAILURE); +} + +/* + * mnt_online() + * + * Filesystem resources aren't offlined, so there's really nothing to do + * here. + */ +int +mnt_online(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **errorp, + rcm_info_t **dependent_reason) +{ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(errorp != NULL); + + rcm_log_message(RCM_TRACE1, "FILESYS: online(%s)\n", rsrc); + + return (RCM_SUCCESS); +} + +/* + * mnt_getinfo() + * + * Report how a given resource is in use by this module. And also + * possibly include dependent consumers of the mounted filesystems. + */ +int +mnt_getinfo(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **usagep, + char **errorp, nvlist_t *props, rcm_info_t **depend_info) +{ + int rv = RCM_SUCCESS; + char **dependents; + + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(usagep != NULL); + assert(errorp != NULL); + assert(props != NULL); + + rcm_log_message(RCM_TRACE1, "FILESYS: getinfo(%s)\n", rsrc); + + /* Retrieve necessary info from the cache */ + if (use_cache(rsrc, errorp, &dependents) < 0) + return (RCM_FAILURE); + + /* Convert the gathered dependents into a usage message */ + *usagep = create_message(MSG_HDR_STD, MSG_HDR_STD_MULTI, dependents); + if (*usagep == NULL) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed to construct usage message (%s).\n", + strerror(errno)); + *errorp = strdup(MSG_FAIL_USAGE); + free_list(dependents); + return (RCM_FAILURE); + } + + /* Recurse on dependents if necessary */ + if ((flag & RCM_INCLUDE_DEPENDENT) && (dependents != NULL)) { + prune_dependents(dependents, rsrc); + if (dependents[0] != NULL) { + if ((rv = rcm_get_info_list(hd, dependents, flag, + depend_info)) != RCM_SUCCESS) { + *errorp = strdup(MSG_FAIL_DEPENDENTS); + } + } + } + + /* Free up info retrieved from the cache */ + free_list(dependents); + + return (rv); +} + +/* + * mnt_suspend() + * + * Notify all dependents that the resource is being suspended. + * Since no real action is taken, QUERY or not doesn't matter. + */ +int +mnt_suspend(rcm_handle_t *hd, char *rsrc, id_t id, timespec_t *interval, + uint_t flag, char **errorp, rcm_info_t **depend_info) +{ + int rv = RCM_SUCCESS; + char **dependents; + + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(interval != NULL); + assert(errorp != NULL); + + rcm_log_message(RCM_TRACE1, "FILESYS: suspend(%s)\n", rsrc); + + /* Retrieve necessary info from the cache */ + if (use_cache(rsrc, errorp, &dependents) < 0) + return (RCM_FAILURE); + + /* Unforced suspensions fail if any of the dependents are critical */ + if (detect_critical_failure(errorp, flag, dependents)) { + free_list(dependents); + return (RCM_FAILURE); + } + + /* Recurse on dependents if necessary */ + if ((flag & RCM_INCLUDE_DEPENDENT) && (dependents != NULL)) { + prune_dependents(dependents, rsrc); + if (dependents[0] != NULL) + if ((rv = rcm_request_suspend_list(hd, dependents, flag, + interval, depend_info)) != RCM_SUCCESS) { + *errorp = strdup(MSG_FAIL_DEPENDENTS); + } + } + free_list(dependents); + + return (rv); +} + +/* + * mnt_resume() + * + * Resume all the dependents of a suspended filesystem. + */ +int +mnt_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **errorp, + rcm_info_t **depend_info) +{ + int rv = RCM_SUCCESS; + char **dependents; + + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(errorp != NULL); + + rcm_log_message(RCM_TRACE1, "FILESYS: resume(%s)\n", rsrc); + + /* Retrieve necessary info from the cache */ + if (use_cache(rsrc, errorp, &dependents) < 0) + return (RCM_FAILURE); + + /* Recurse on dependents if necessary */ + if ((flag & RCM_INCLUDE_DEPENDENT) && (dependents != NULL)) { + prune_dependents(dependents, rsrc); + if (dependents[0] != NULL) { + if ((rv = rcm_notify_resume_list(hd, dependents, flag, + depend_info)) != RCM_SUCCESS) { + *errorp = strdup(MSG_FAIL_DEPENDENTS); + } + } + } + free_list(dependents); + + return (rv); +} + +/* + * mnt_remove() + * + * Remove should never be called since offline always fails. + * + * Return failure and log the mistake if a remove is ever received for a + * mounted filesystem resource. + */ +int +mnt_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **errorp, + rcm_info_t **depend_info) +{ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(errorp != NULL); + + rcm_log_message(RCM_TRACE1, "FILESYS: remove(%s)\n", rsrc); + + /* Log the mistake */ + rcm_log_message(RCM_ERROR, "FILESYS: invalid remove of \"%s\"\n", rsrc); + *errorp = strdup(MSG_FAIL_REMOVE); + + return (RCM_FAILURE); +} + +/* + * Cache management routines + */ + +/* + * cache_create() + * + * This routine constructs a new cache of the current mnttab file. + * + * Locking: the cache must be locked prior to calling this function. + * + * Return Values: NULL with errno set on failure, new cache point on + * success. + */ +static cache_t * +cache_create() +{ + FILE *fp; + cache_t *cache; + int i; + uint32_t size; + struct stat st; + struct mnttab mt; + + /* + * To keep the hash table relatively sparse, default values are + * used for smaller mnttab files and these values are scaled up + * as a fraction of the total mnttab file size for larger ones. + */ + if (stat(MNTTAB, &st) < 0) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed to stat \"%s\" (%s).\n", MNTTAB, + strerror(errno)); + errno = EBADF; + return (NULL); + } + if (st.st_size > HASH_THRESHOLD) { + size = st.st_size / HASH_THRESHOLD; + for (i = 0; size > 1; i++, size >>= 1); + for (; i > -1; i--, size <<= 1); + } else { + size = HASH_DEFAULT; + } + + /* Allocate a new empty cache */ + if ((cache = (cache_t *)calloc(1, sizeof (cache_t))) == NULL) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed to allocate cache (%s).\n", + strerror(errno)); + errno = ENOMEM; + return (NULL); + } + cache->hash_size = size; + cache->timestamp = st.st_mtime; + + /* Allocate an empty hash table for the registered special devices */ + cache->mounts = (hashentry_t **)calloc(size, sizeof (hashentry_t *)); + if (cache->mounts == NULL) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed to allocate mount table (%s).\n", + strerror(errno)); + free_cache(&cache); + errno = ENOMEM; + return (NULL); + } + + /* Open the mnttab file */ + if ((fp = fopen(MNTTAB, "r")) == NULL) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed to open \"%s\" (%s).\n", MNTTAB, + strerror(errno)); + free_cache(&cache); + errno = EIO; + return (NULL); + } + + /* Insert each mnttab entry into the cache */ + while (getmntent(fp, &mt) == 0) { + + /* Well, not each entry... some are meant to be ignored */ + if ((mt.mnt_mntopts != NULL) && + (hasmntopt(&mt, OPT_IGNORE) != NULL)) + continue; + + if (cache_insert(cache, &mt) < 0) { + rcm_log_message(RCM_ERROR, + "FILESYS: cache insertion failure (%s).\n", + strerror(errno)); + free_cache(&cache); + (void) fclose(fp); + errno = EFAULT; + return (NULL); + } + } + + /* Close the mnttab file */ + (void) fclose(fp); + + return (cache); +} + +/* + * free_cache() + * + * Free up all the memory associated with a cache. + * + * Locking: the cache must be locked before calling this function. + */ +static void +free_cache(cache_t **cachep) +{ + uint32_t index; + hashentry_t *entry; + hashentry_t *entry_tmp; + + /* Do nothing with empty caches */ + if ((cachep == NULL) || (*cachep == NULL)) + return; + + if ((*cachep)->mounts) { + /* Walk through the hashtable, emptying it */ + for (index = 0; index < (*cachep)->hash_size; index++) { + entry = (*cachep)->mounts[index]; + while (entry) { + entry_tmp = entry->next; + free_entry(&entry); + entry = entry_tmp; + } + } + free((*cachep)->mounts); + } + + free(*cachep); + *cachep = NULL; +} + +/* + * free_entry() + * + * Free up memory associated with a hashtable entry. + * + * Locking: the cache must be locked before calling this function. + */ +static void +free_entry(hashentry_t **entryp) +{ + if (entryp) { + if (*entryp) { + if ((*entryp)->special) + free((*entryp)->special); + free_list((*entryp)->mountps); + free(*entryp); + } + *entryp = NULL; + } +} + +/* + * free_list() + * + * Free up memory associated with a null terminated list of names. + */ +static void +free_list(char **list) +{ + int i; + + if (list) { + for (i = 0; list[i] != NULL; i++) + free(list[i]); + free(list); + } +} + +/* + * cache_sync() + * + * Resynchronize the mnttab cache with the mnttab file. + * + * Locking: the cache must be locked before calling this function. + * + * Return Values: -1 with errno set on failure, 0 on success. + */ +static int +cache_sync(rcm_handle_t *hd, cache_t **cachep) +{ + uint32_t index; + cache_t *new_cache; + cache_t *old_cache; + hashentry_t *entry; + struct stat st; + + /* Only accept valid arguments */ + if ((hd == NULL) || (cachep == NULL)) { + rcm_log_message(RCM_ERROR, + "FILESYS: invalid arguments to cache_sync().\n"); + errno = EINVAL; + return (-1); + } + + /* Do nothing if there's already an up-to-date cache */ + old_cache = *cachep; + if (old_cache) { + if (stat(MNTTAB, &st) == 0) { + if (old_cache->timestamp >= st.st_mtime) { + return (0); + } + } else { + rcm_log_message(RCM_WARNING, + "FILESYS: failed to stat \"%s\", cache is stale " + "(%s).\n", MNTTAB, strerror(errno)); + errno = EIO; + return (-1); + } + } + + /* Create a new cache based on the new mnttab file. */ + if ((new_cache = cache_create()) == NULL) { + rcm_log_message(RCM_WARNING, + "FILESYS: failed creating cache, cache is stale (%s).\n", + strerror(errno)); + errno = EIO; + return (-1); + } + + /* Register any specials found in the new cache but not the old one */ + for (index = 0; index < new_cache->hash_size; index++) { + for (entry = new_cache->mounts[index]; entry != NULL; + entry = entry->next) { + if (cache_lookup(old_cache, entry->special) == NULL) { + register_rsrc(hd, entry->special); + } + } + } + + /* Pass the new cache pointer to the calling function */ + *cachep = new_cache; + + /* If there wasn't an old cache, return successfully now */ + if (old_cache == NULL) + return (0); + + /* + * If there was an old cache, then unregister whatever specials it + * contains that aren't in the new cache. And then destroy the old + * cache. + */ + for (index = 0; index < old_cache->hash_size; index++) { + for (entry = old_cache->mounts[index]; entry != NULL; + entry = entry->next) { + if (cache_lookup(new_cache, entry->special) == NULL) { + unregister_rsrc(hd, entry->special); + } + } + } + free_cache(&old_cache); + + return (0); +} + +/* + * cache_insert() + * + * Given a cache and a mnttab entry, this routine inserts that entry in + * the cache. The mnttab entry's special device is added to the 'mounts' + * hashtable of the cache, and the entry's mountp value is added to the + * list of associated mountpoints for the corresponding hashtable entry. + * + * Locking: the cache must be locked before calling this function. + * + * Return Values: -1 with errno set on failure, 0 on success. + */ +static int +cache_insert(cache_t *cache, struct mnttab *mt) +{ + uint32_t index; + hashentry_t *entry; + char **mountps; + + /* Only accept valid arguments */ + if ((cache == NULL) || + (cache->mounts == NULL) || + (mt == NULL) || + (mt->mnt_special == NULL) || + (mt->mnt_mountp == NULL)) { + errno = EINVAL; + return (-1); + } + + /* + * Disregard any non-loopback mounts whose special device names + * don't begin with "/dev". + */ + if ((strncmp(mt->mnt_special, "/dev", strlen("/dev")) != 0) && + (strcmp(mt->mnt_fstype, "lofs") != 0)) + return (0); + + /* + * Find the special device's entry in the mounts hashtable, allocating + * a new entry if necessary. + */ + index = hash(cache->hash_size, mt->mnt_special); + for (entry = cache->mounts[index]; entry != NULL; entry = entry->next) { + if (strcmp(entry->special, mt->mnt_special) == 0) + break; + } + if (entry == NULL) { + entry = (hashentry_t *)calloc(1, sizeof (hashentry_t)); + if ((entry == NULL) || + ((entry->special = strdup(mt->mnt_special)) == NULL)) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed to allocate special device name " + "(%s).\n", strerror(errno)); + free_entry(&entry); + errno = ENOMEM; + return (-1); + } + entry->next = cache->mounts[index]; + cache->mounts[index] = entry; + } + + /* + * Keep entries in the list of mounts unique, so exit early if the + * mount is already in the list. + */ + for (index = 0; index < entry->n_mounts; index++) { + if (strcmp(entry->mountps[index], mt->mnt_mountp) == 0) + return (0); + } + + /* + * Add this mountpoint to the list of mounts associated with the + * special device. + */ + mountps = (char **)realloc(entry->mountps, + (entry->n_mounts + 2) * sizeof (char *)); + if ((mountps == NULL) || + ((mountps[entry->n_mounts] = strdup(mt->mnt_mountp)) == NULL)) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed to allocate mountpoint name (%s).\n", + strerror(errno)); + if (entry->n_mounts == 0) { + cache->mounts[index] = entry->next; + free_entry(&entry); + } + errno = ENOMEM; + return (-1); + } + mountps[entry->n_mounts + 1] = NULL; + entry->n_mounts++; + entry->mountps = mountps; + + return (0); +} + +/* + * cache_lookup() + * + * Searches the cached table of mounts for a special device entry. + * + * Locking: the cache must be locked before calling this function. + * + * Return Value: NULL with errno set if failure, pointer to existing + * cache entry when successful. + */ +static hashentry_t * +cache_lookup(cache_t *cache, char *rsrc) +{ + uint32_t index; + hashentry_t *entry; + + /* Only accept valid arguments */ + if ((cache == NULL) || (cache->mounts == NULL) || (rsrc == NULL)) { + errno = EINVAL; + return (NULL); + } + + /* Search the cached mounts table for the resource's entry */ + index = hash(cache->hash_size, rsrc); + if (cache->mounts[index]) { + for (entry = cache->mounts[index]; entry != NULL; + entry = entry->next) { + if (strcmp(entry->special, rsrc) == 0) + return (entry); + } + } + + errno = ENOENT; + return (NULL); +} + +/* + * Miscellaneous Functions + */ + +/* + * hash() + * + * A naive hashing function that converts a string 's' to an index in a + * hash table of size 'h'. It seems to spread entries around well enough. + */ +static uint32_t +hash(uint32_t h, char *s) +{ + uint32_t sum = 0; + unsigned char *byte; + + if ((byte = (unsigned char *)s) != NULL) { + while (*byte) { + sum += 0x3F & (uint32_t)*byte; + byte++; + } + } + + return (sum % h); +} + +/* + * register_rsrc() + * + * Registers for any given resource, unless it's "/". + */ +static void +register_rsrc(rcm_handle_t *hd, char *rsrc) +{ + /* Only accept valid arguments */ + if ((hd == NULL) || (rsrc == NULL)) + return; + + /* + * Register any resource other than "/" or "/devices" + */ + if ((strcmp(rsrc, "/") != 0) && (strcmp(rsrc, "/devices") != 0)) { + rcm_log_message(RCM_DEBUG, "FILESYS: registering %s\n", rsrc); + if (rcm_register_interest(hd, rsrc, 0, NULL) != RCM_SUCCESS) { + rcm_log_message(RCM_WARNING, + "FILESYS: failed to register %s\n", rsrc); + } + } + +} + +/* + * unregister_rsrc() + * + * Unregister a resource. This does a little filtering since we know + * "/" can't be registered, so we never bother unregistering for it. + */ +static void +unregister_rsrc(rcm_handle_t *hd, char *rsrc) +{ + assert(hd != NULL); + assert(rsrc != NULL); + + /* Unregister any resource other than "/" */ + if (strcmp(rsrc, "/") != 0) { + rcm_log_message(RCM_DEBUG, "FILESYS: unregistering %s\n", rsrc); + (void) rcm_unregister_interest(hd, rsrc, 0); + } +} + +/* + * create_message() + * + * Given some header strings and a list of dependent names, this + * constructs a single string. If there's only one dependent, the + * string consists of the first header and the only dependent appended + * to the end of the string enclosed in quotemarks. If there are + * multiple dependents, then the string uses the second header and the + * full list of dependents is appended at the end as a comma separated + * list of names enclosed in quotemarks. + */ +static char * +create_message(char *header, char *header_multi, char **dependents) +{ + int i; + size_t len; + int ndependents; + char *msg_buf; + char *msg_header; + char *separator = MSG_SEPARATOR; + + assert(header != NULL); + assert(header_multi != NULL); + assert(dependents != NULL); + + /* Count the number of dependents */ + for (ndependents = 0; dependents[ndependents] != NULL; ndependents++); + + /* If there are no dependents, fail */ + if (ndependents == 0) { + errno = ENOENT; + return (NULL); + } + + /* Pick the appropriate header to use based on amount of dependents */ + if (ndependents == 1) { + msg_header = header; + } else { + msg_header = header_multi; + } + + /* Compute the size required for the message buffer */ + len = strlen(msg_header) + 2; /* +2 for the space and a NULL */ + for (i = 0; dependents[i] != NULL; i++) + len += strlen(dependents[i]) + 2; /* +2 for quotemarks */ + len += strlen(separator) * (ndependents - 1); + + /* Allocate the message buffer */ + if ((msg_buf = (char *)calloc(len, sizeof (char))) == NULL) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed to allocate message buffer (%s).\n", + strerror(errno)); + errno = ENOMEM; + return (NULL); + } + + /* Fill in the message buffer */ + (void) snprintf(msg_buf, len, "%s ", msg_header); + for (i = 0; dependents[i] != NULL; i++) { + (void) strlcat(msg_buf, "\"", len); + (void) strlcat(msg_buf, dependents[i], len); + (void) strlcat(msg_buf, "\"", len); + if ((i + 1) < ndependents) + (void) strlcat(msg_buf, separator, len); + } + + return (msg_buf); +} + +/* + * create_dependents() + * + * Creates a copy of the list of dependent mounts associated with a + * given hashtable entry from the cache. + * + * Return Values: NULL with errno set on failure, the resulting list of + * dependent resources when successful. + */ +static char ** +create_dependents(hashentry_t *entry) +{ + int i; + char **dependents; + + if (entry == NULL) { + errno = EINVAL; + return (NULL); + } + + if (entry->n_mounts == 0) { + errno = ENOENT; + return (NULL); + } + + /* Allocate space for the full dependency list */ + dependents = (char **)calloc(entry->n_mounts + 1, sizeof (char *)); + if (dependents == NULL) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed to allocate dependents (%s).\n", + strerror(errno)); + errno = ENOMEM; + return (NULL); + } + + /* Copy all the dependent names into the new list of dependents */ + for (i = 0; i < entry->n_mounts; i++) { + if ((dependents[i] = strdup(entry->mountps[i])) == NULL) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed to allocate dependent \"%s\" " + "(%s).\n", entry->mountps[i], strerror(errno)); + free_list(dependents); + errno = ENOMEM; + return (NULL); + } + } + + return (dependents); +} + +/* + * detect_critical_failure() + * + * Given a list of dependents, a place to store an error message, and + * the flags associated with an operation, this function detects whether + * or not the operation should fail due to the presence of any critical + * filesystem resources. When a failure is detected, an appropriate + * error message is constructed and passed back to the caller. This is + * called during a suspend request operation. + * + * Return Values: 0 when a critical resource failure shouldn't prevent + * the operation, and 1 when such a failure condition does exist. + */ +static int +detect_critical_failure(char **errorp, uint_t flags, char **dependents) +{ + int i; + int n_critical; + char *tmp; + + /* Do nothing if the operation is forced or there are no dependents */ + if ((errorp == NULL) || (flags & RCM_FORCE) || (dependents == NULL)) + return (0); + + /* + * Count how many of the dependents are critical, and shift the + * critical resources to the head of the list. + */ + if (dependents) { + for (i = 0, n_critical = 0; dependents[i] != NULL; i++) { + if (is_critical(dependents[i])) { + if (n_critical != i) { + tmp = dependents[n_critical]; + dependents[n_critical] = dependents[i]; + dependents[i] = tmp; + } + n_critical++; + } + } + } + + /* If no criticals were found, do nothing and return */ + if (n_critical == 0) + return (0); + + /* + * Criticals were found. Prune the list appropriately and construct + * an error message. + */ + + /* Prune non-criticals out of the list */ + for (i = n_critical; dependents[i] != NULL; i++) { + free(dependents[i]); + dependents[i] = NULL; + } + + /* Construct the critical resource error message */ + *errorp = create_message(MSG_HDR_CRIT, MSG_HDR_CRIT_MULTI, dependents); + + return (1); +} + +/* + * is_critical() + * + * Test a resource to determine if it's critical to the system and thus + * cannot be suspended. + * + * Return Values: 1 if the named resource is critical, 0 if not. + */ +static int +is_critical(char *rsrc) +{ + assert(rsrc != NULL); + + if ((strcmp(rsrc, "/") == 0) || + (strcmp(rsrc, "/usr") == 0) || + (strcmp(rsrc, "/usr/lib") == 0) || + (strcmp(rsrc, "/usr/bin") == 0) || + (strcmp(rsrc, "/tmp") == 0) || + (strcmp(rsrc, "/var") == 0) || + (strcmp(rsrc, "/var/run") == 0) || + (strcmp(rsrc, "/etc") == 0) || + (strcmp(rsrc, "/etc/mnttab") == 0) || + (strcmp(rsrc, "/sbin") == 0)) + return (1); + + return (0); +} + +/* + * use_cache() + * + * This routine handles all the tasks necessary to lookup a resource + * in the cache and extract a separate list of dependents for that + * entry. If an error occurs while doing this, an appropriate error + * message is passed back to the caller. + * + * Locking: the cache is locked for the whole duration of this function. + */ +static int +use_cache(char *rsrc, char **errorp, char ***dependentsp) +{ + hashentry_t *entry; + + (void) mutex_lock(&cache_lock); + if ((entry = cache_lookup(mnt_cache, rsrc)) == NULL) { + rcm_log_message(RCM_ERROR, + "FILESYS: failed looking up \"%s\" in cache (%s).\n", + rsrc, strerror(errno)); + *errorp = strdup(MSG_FAIL_INTERNAL); + (void) mutex_unlock(&cache_lock); + return (-1); + } + *dependentsp = create_dependents(entry); + (void) mutex_unlock(&cache_lock); + + return (0); +} + +/* + * prune_dependents() + * + * Before calling back into RCM with a list of dependents, the list + * must be cleaned up a little. To avoid infinite recursion, "/" and + * the named resource must be pruned out of the list. + */ +static void +prune_dependents(char **dependents, char *rsrc) +{ + int i; + int n; + + if (dependents) { + + /* Set 'n' to the total length of the list */ + for (n = 0; dependents[n] != NULL; n++); + + /* + * Move offending dependents to the tail of the list and + * then truncate the list. + */ + for (i = 0; dependents[i] != NULL; i++) { + if ((strcmp(dependents[i], rsrc) == 0) || + (strcmp(dependents[i], "/") == 0)) { + free(dependents[i]); + dependents[i] = dependents[n - 1]; + dependents[n] = NULL; + i--; + n--; + } + } + } +} diff --git a/usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c b/usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c new file mode 100644 index 0000000000..cb35558622 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/ip_anon_rcm.c @@ -0,0 +1,901 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2002-2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * RCM module to prevent plumbed IP addresses from being removed. + */ + + +#include <stdlib.h> +#include <ctype.h> +#include <memory.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <thread.h> +#include <synch.h> +#include <assert.h> +#include <errno.h> +#include <libintl.h> +#include <sys/param.h> +#include <sys/wait.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/cladm.h> +#include <sys/file.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/time.h> +#include <net/if.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <netinet/ip6.h> +#include <inet/ip.h> +#include <inet/ip6.h> + +#include "rcm_module.h" +#include "ifaddrlist.h" + +#define SUNW_IP "SUNW_ip/" +#define IP_REG_SIZE (9 + INET6_ADDRSTRLEN) +#define IP_ANON_USAGE gettext("Plumbed IP Address") +#define IP_SUSPEND_ERR gettext("Plumbed IP Addresses cannot be suspended") +#define IP_OFFLINE_ERR gettext("Invalid operation: IP cannot be offlined") +#define IP_REMOVE_ERR gettext("Invalid operation: IP cannot be removed") +#define IP_REG_FAIL gettext("Registration Failed") +#define IP_NO_CLUSTER gettext("Could not read cluster network addresses") + +#define IP_FLAG_NEW 0x00 +#define IP_FLAG_REG 0x01 +#define IP_FLAG_CL 0x02 +#define IP_FLAG_IGNORE 0x04 +#define IP_FLAG_DELETE 0x08 + +static int ip_anon_register(rcm_handle_t *); +static int ip_anon_unregister(rcm_handle_t *); +static int ip_anon_getinfo(rcm_handle_t *, char *, id_t, uint_t, + char **, char **, nvlist_t *, rcm_info_t **); +static int ip_anon_suspend(rcm_handle_t *, char *, id_t, + timespec_t *, uint_t, char **, rcm_info_t **); +static int ip_anon_resume(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int ip_anon_offline(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int ip_anon_online(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int ip_anon_remove(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); + +static int exclude_ipv4(cladm_netaddrs_t exclude_addrs, + ipaddr_t address); +static int exclude_ipv6(cladm_netaddrs_t exclude_addrs, + uint32_t address[4]); + + +typedef struct ip_status { + int flags; + char device[IP_REG_SIZE]; + struct ip_status *next; +} ip_status_t; + +static ip_status_t *findreg(char *reg); +static ip_status_t *addreg(char *reg); +static int deletereg(ip_status_t *entry); + +static ip_status_t *ip_list = NULL; +static mutex_t ip_list_lock; + +static struct rcm_mod_ops ip_anon_ops = +{ + RCM_MOD_OPS_VERSION, + ip_anon_register, + ip_anon_unregister, + ip_anon_getinfo, + ip_anon_suspend, + ip_anon_resume, + ip_anon_offline, + ip_anon_online, + ip_anon_remove, + NULL, + NULL, + NULL +}; + +struct rcm_mod_ops * +rcm_mod_init() +{ + return (&ip_anon_ops); +} + +const char * +rcm_mod_info() +{ + return ("RCM IP address module %I%"); +} + +int +rcm_mod_fini() +{ + ip_status_t *tlist; + + /* free the registration list */ + + (void) mutex_lock(&ip_list_lock); + while (ip_list != NULL) { + tlist = ip_list->next; + free(ip_list); + ip_list = tlist; + } + (void) mutex_unlock(&ip_list_lock); + + (void) mutex_destroy(&ip_list_lock); + return (RCM_SUCCESS); +} + +static int +ip_anon_register(rcm_handle_t *hdl) +{ + int bootflags; + struct ifaddrlist *al = NULL, *al6 = NULL; + char errbuf[ERRBUFSIZE] = "", errbuf6[ERRBUFSIZE] = ""; + char treg[IP_REG_SIZE], tstr[IP_REG_SIZE]; + cladm_netaddrs_t exclude_addrs; + int num_ifs, num_ifs6, i, ret; + uint32_t num_exclude_addrs = 0; + ip_status_t *tlist, *tentry; + + (void) mutex_lock(&ip_list_lock); + + rcm_log_message(RCM_DEBUG, "ip_anon: registration refresh.\n"); + + exclude_addrs.cladm_num_netaddrs = 0; + + if (_cladm(CL_INITIALIZE, CL_GET_BOOTFLAG, &bootflags) != 0) { + rcm_log_message(RCM_ERROR, + gettext("unable to check cluster status\n")); + (void) mutex_unlock(&ip_list_lock); + return (RCM_FAILURE); + } + + rcm_log_message(RCM_DEBUG, + "ip_anon: cladm bootflags=%d\n", bootflags); + + if (bootflags == 3) { + + /* build the exclusion list */ + + if ((ret = _cladm(CL_CONFIG, CL_GET_NUM_NETADDRS, + &num_exclude_addrs)) == 0) { + exclude_addrs.cladm_num_netaddrs = num_exclude_addrs; + + if (num_exclude_addrs == 0) + rcm_log_message(RCM_DEBUG, + "ip_anon: no addresses excluded\n"); + else { + if ((exclude_addrs.cladm_netaddrs_array = + malloc(sizeof (cladm_netaddr_entry_t) * + (num_exclude_addrs))) == NULL) { + rcm_log_message(RCM_ERROR, + gettext("out of memory\n")); + (void) mutex_unlock(&ip_list_lock); + return (RCM_FAILURE); + } + + if ((ret = _cladm(CL_CONFIG, + CL_GET_NETADDRS, &exclude_addrs)) + != 0) { + rcm_log_message(RCM_ERROR, + IP_NO_CLUSTER); + (void) mutex_unlock(&ip_list_lock); + return (RCM_FAILURE); + } + } + + } else { + if ((ret != 0) && (errno == EINVAL)) { + rcm_log_message(RCM_DEBUG, + "no _cladm() backend to get addrs\n"); + } else { + rcm_log_message(RCM_ERROR, IP_NO_CLUSTER); + (void) mutex_unlock(&ip_list_lock); + return (RCM_FAILURE); + } + } + rcm_log_message(RCM_DEBUG, + "cladm returned %d errno=%d\n", ret, errno); + + rcm_log_message(RCM_DEBUG, + "ip_anon: num exclude addrs: %d\n", + exclude_addrs.cladm_num_netaddrs); + + /* print the exclusion list for debugging purposes */ + + for (i = 0; i < exclude_addrs.cladm_num_netaddrs; i++) { + (void) strcpy(treg, "<UNKNOWN>"); + (void) strcpy(tstr, "<UNKNOWN>"); + if (exclude_addrs.cladm_netaddrs_array[i].\ + cl_ipversion == IPV4_VERSION) { + (void) inet_ntop(AF_INET, + &exclude_addrs.cladm_netaddrs_array[i]. + cl_ipv_un.cl_ipv4.ipv4_netaddr, + treg, INET_ADDRSTRLEN); + + (void) inet_ntop(AF_INET, + &exclude_addrs.cladm_netaddrs_array[i]. + cl_ipv_un.cl_ipv4.ipv4_netmask, + tstr, INET_ADDRSTRLEN); + } + + if (exclude_addrs.cladm_netaddrs_array[i].\ + cl_ipversion == IPV6_VERSION) { + (void) inet_ntop(AF_INET6, + &exclude_addrs.cladm_netaddrs_array[i]. + cl_ipv_un.cl_ipv6.ipv6_netaddr, + treg, INET6_ADDRSTRLEN); + + (void) inet_ntop(AF_INET6, + &exclude_addrs.cladm_netaddrs_array[i]. + cl_ipv_un.cl_ipv6.ipv6_netmask, + tstr, INET6_ADDRSTRLEN); + } + rcm_log_message(RCM_DEBUG, "IPV%d: %s %s\n", + exclude_addrs.cladm_netaddrs_array[i]. + cl_ipversion, treg, tstr); + + } + } + + + + /* obtain a list of all IPv4 and IPv6 addresses in the system */ + + rcm_log_message(RCM_DEBUG, + "ip_anon: obtaining list of IPv4 addresses.\n"); + num_ifs = ifaddrlist(&al, AF_INET, errbuf); + + if ((num_ifs == -1) && (errno != 0) && (errno != ENOENT) && + (errno != EINVAL)) { + rcm_log_message(RCM_ERROR, + gettext("cannot get IPv4 address list errno=%d (%s)\n"), + errno, errbuf); + free(al); + (void) mutex_unlock(&ip_list_lock); + return (RCM_FAILURE); + } + + rcm_log_message(RCM_DEBUG, + "ip_anon: obtaining list of IPv6 addresses.\n"); + + num_ifs6 = ifaddrlist(&al6, AF_INET6, errbuf6); + if ((num_ifs6 == -1) && (errno != 0) && (errno != ENOENT) && + (errno != EINVAL)) { + rcm_log_message(RCM_ERROR, + gettext("cannot get IPv6 address list errno=%d (%s)\n"), + errno, errbuf6); + free(al); + free(al6); + (void) mutex_unlock(&ip_list_lock); + return (RCM_FAILURE); + } + + /* check the state of outstanding registrations against the list */ + + rcm_log_message(RCM_DEBUG, + "ip_anon: checking outstanding registrations.\n"); + + tlist = ip_list; + while (tlist != NULL) { + tlist->flags |= IP_FLAG_DELETE; + tlist = tlist->next; + } + + /* IPv4 */ + + rcm_log_message(RCM_DEBUG, "ip_anon: checking IPv4 addresses.\n"); + + for (i = 0; i < num_ifs; i++) { + (void) inet_ntop(AF_INET, &al[i].addr.addr, tstr, + INET_ADDRSTRLEN); + (void) strcpy(treg, SUNW_IP); + (void) strcat(treg, tstr); + + if ((tlist = findreg(treg)) == NULL) + tlist = addreg(treg); + else + tlist->flags &= (~IP_FLAG_DELETE); + + if (tlist == NULL) { + rcm_log_message(RCM_ERROR, + gettext("out of memory\n")); + free(al); + free(al6); + (void) mutex_unlock(&ip_list_lock); + return (RCM_FAILURE); + } + + if (exclude_ipv4(exclude_addrs, al[i].addr.addr.s_addr)) + tlist->flags |= IP_FLAG_CL; + } + + /* IPv6 */ + + rcm_log_message(RCM_DEBUG, "ip_anon: checking IPv6 addresses.\n"); + + for (i = 0; i < num_ifs6; i++) { + (void) inet_ntop(AF_INET6, &al6[i].addr.addr, tstr, + INET6_ADDRSTRLEN); + (void) strcpy(treg, SUNW_IP); + (void) strcat(treg, tstr); + + if ((tlist = findreg(treg)) == NULL) + tlist = addreg(treg); + else + tlist->flags &= (~IP_FLAG_DELETE); + + if (tlist == NULL) { + rcm_log_message(RCM_ERROR, + gettext("out of memory\n")); + free(al); + free(al6); + (void) mutex_unlock(&ip_list_lock); + return (RCM_FAILURE); + } + + if (exclude_ipv6(exclude_addrs, al6[i].addr.addr6._S6_un.\ + _S6_u32)) + tlist->flags |= IP_FLAG_CL; + } + + rcm_log_message(RCM_DEBUG, "ip_anon: updating reg. state.\n"); + + /* examine the list of ip address registrations and their state */ + + tlist = ip_list; + while (tlist != NULL) { + tentry = tlist; + tlist = tlist->next; + + if (tentry->flags & IP_FLAG_DELETE) { + if (tentry->flags & IP_FLAG_REG) { + rcm_log_message(RCM_DEBUG, + "ip_anon: unregistering interest in %s\n", + tentry->device); + if (rcm_unregister_interest(hdl, + tentry->device, 0) != 0) { + rcm_log_message(RCM_ERROR, + gettext("failed to unregister")); + } + } + (void) deletereg(tentry); + } else if (!(tentry->flags & IP_FLAG_IGNORE)) { + /* + * If the registration is not a clustered devices and + * not already registered, then RCM doesn't + * currently know about it. + */ + if (!(tentry->flags & IP_FLAG_CL) && + !(tentry->flags & IP_FLAG_REG)) { + tentry->flags |= IP_FLAG_REG; + rcm_log_message(RCM_DEBUG, + "ip_anon: registering interest in %s\n", + tentry->device); + if (rcm_register_interest(hdl, + tentry->device, 0, NULL) != + RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + IP_REG_FAIL); + free(al); + free(al6); + (void) mutex_unlock(&ip_list_lock); + return (RCM_FAILURE); + } else { + rcm_log_message(RCM_DEBUG, + "ip_anon: registered %s\n", + tentry->device); + } + } + + /* + * If the entry is registered and clustered, then + * the configuration has been changed and it + * should be unregistered. + */ + if ((tentry->flags & IP_FLAG_REG) & + (tentry->flags & IP_FLAG_CL)) { + rcm_log_message(RCM_DEBUG, + "ip_anon: unregistering in %s\n", + tentry->device); + if (rcm_unregister_interest(hdl, + tentry->device, 0) != 0) { + rcm_log_message(RCM_ERROR, + gettext("failed to unregister")); + } + tentry->flags &= (~IP_FLAG_REG); + } + } + } + + tlist = ip_list; + while (tlist != NULL) { + rcm_log_message(RCM_DEBUG, "ip_anon: %s (%Xh)\n", + tlist->device, tlist->flags); + tlist = tlist->next; + } + rcm_log_message(RCM_DEBUG, "ip_anon: registration complete.\n"); + + free(al); + free(al6); + (void) mutex_unlock(&ip_list_lock); + return (RCM_SUCCESS); +} + +static int +ip_anon_unregister(rcm_handle_t *hdl) +{ + ip_status_t *tlist; + + (void) mutex_lock(&ip_list_lock); + + tlist = ip_list; + while (tlist != NULL) { + if ((tlist->flags & IP_FLAG_REG)) { + if (rcm_unregister_interest(hdl, + tlist->device, 0) != 0) { + rcm_log_message(RCM_ERROR, + gettext("failed to unregister")); + } + tlist->flags &= (~IP_FLAG_REG); + } + tlist = tlist->next; + } + + (void) mutex_unlock(&ip_list_lock); + + return (RCM_SUCCESS); +} + +/*ARGSUSED*/ +static int +ip_anon_getinfo(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **infostr, char **errstr, nvlist_t *props, rcm_info_t **dependent) +{ + + assert(rsrcname != NULL && infostr != NULL); + + if ((*infostr = strdup(IP_ANON_USAGE)) == NULL) + rcm_log_message(RCM_ERROR, gettext("strdup failure\n")); + + return (RCM_SUCCESS); +} + +/*ARGSUSED*/ +static int +ip_anon_suspend(rcm_handle_t *hdl, char *rsrcname, id_t id, + timespec_t *interval, uint_t flags, char **errstr, + rcm_info_t **dependent) +{ + if ((*errstr = strdup(IP_SUSPEND_ERR)) == NULL) + rcm_log_message(RCM_ERROR, gettext("strdup failure\n")); + + return (RCM_FAILURE); +} + +/*ARGSUSED*/ +static int +ip_anon_resume(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + return (RCM_SUCCESS); +} + +/*ARGSUSED*/ +static int +ip_anon_offline(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + if ((*errstr = strdup(IP_OFFLINE_ERR)) == NULL) + rcm_log_message(RCM_ERROR, gettext("strdup failure\n")); + + return (RCM_FAILURE); +} + +/*ARGSUSED*/ +static int +ip_anon_online(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + return (RCM_SUCCESS); +} + +/*ARGSUSED*/ +static int +ip_anon_remove(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + if ((*errstr = strdup(IP_REMOVE_ERR)) == NULL) + rcm_log_message(RCM_ERROR, gettext("strdup failure\n")); + + return (RCM_FAILURE); +} + +/* + * Call with ip_list_lock held. + */ + +static ip_status_t * +findreg(char *reg) +{ + ip_status_t *tlist; + int done; + + tlist = ip_list; + done = 0; + while ((tlist != NULL) && (!done)) { + if (strcmp(tlist->device, reg) == 0) + done = 1; + else + tlist = tlist->next; + } + + return (tlist); +} + +static ip_status_t * +addreg(char *reg) +{ + ip_status_t *tlist, *tentry; + + tentry = (ip_status_t *)malloc(sizeof (ip_status_t)); + if (tentry == NULL) + return (tentry); + + tentry->flags = IP_FLAG_NEW; + tentry->next = NULL; + (void) strcpy(tentry->device, reg); + + if (ip_list == NULL) + ip_list = tentry; + else { + tlist = ip_list; + while (tlist->next != NULL) + tlist = tlist->next; + tlist->next = tentry; + } + + return (tentry); +} + +static int +deletereg(ip_status_t *entry) +{ + ip_status_t *tlist; + + if (entry == NULL) + return (-1); + + if (entry == ip_list) { + ip_list = ip_list->next; + free(entry); + } else { + tlist = ip_list; + while ((tlist->next != NULL) && (tlist->next != entry)) + tlist = tlist->next; + + if (tlist->next != entry) + return (-1); + tlist->next = entry->next; + free(entry); + } + return (0); +} + +static int +exclude_ipv4(cladm_netaddrs_t exclude_addrs, ipaddr_t address) +{ + int i; + char taddr[IP_REG_SIZE], tmask[IP_REG_SIZE], tmatch[IP_REG_SIZE]; + ipaddr_t ipv4_netaddr, ipv4_netmask; + + (void) inet_ntop(AF_INET, &address, taddr, INET_ADDRSTRLEN); + + rcm_log_message(RCM_DEBUG, "ip_anon: exclude_ipv4 (%s, %d)\n", + taddr, exclude_addrs.cladm_num_netaddrs); + /* + * If this falls in the exclusion list, the IP_FLAG_CL + * bit should be set for the adapter. + */ + for (i = 0; i < exclude_addrs.cladm_num_netaddrs; i++) { + if (exclude_addrs.cladm_netaddrs_array[i].\ + cl_ipversion == IPV4_VERSION) { + + ipv4_netaddr = exclude_addrs.\ + cladm_netaddrs_array[i].cl_ipv_un.cl_ipv4.\ + ipv4_netaddr; + ipv4_netmask = exclude_addrs.\ + cladm_netaddrs_array[i].cl_ipv_un.cl_ipv4.\ + ipv4_netmask; + + (void) inet_ntop(AF_INET, &ipv4_netaddr, tmatch, + INET_ADDRSTRLEN); + (void) inet_ntop(AF_INET, &ipv4_netmask, tmask, + INET_ADDRSTRLEN); + + if ((address & ipv4_netmask) == ipv4_netaddr) { + rcm_log_message(RCM_DEBUG, + "ip_anon: matched %s:%s => %s\n", + taddr, tmask, tmatch); + return (1); + } + } + } + rcm_log_message(RCM_DEBUG, "ip_anon: no match for %s\n", + taddr); + return (0); +} + +static int +exclude_ipv6(cladm_netaddrs_t exclude_addrs, uint32_t address[4]) +{ + int i, j, numequal; + uint32_t addr[4], ipv6_netaddr[4], ipv6_netmask[4]; + char taddr[IP_REG_SIZE], tmask[IP_REG_SIZE], tmatch[IP_REG_SIZE]; + + (void) inet_ntop(AF_INET6, address, taddr, INET6_ADDRSTRLEN); + + /* + * If this falls in the exclusion list, the IP_FLAG_CL + * bit should be set for the adapter. + */ + + for (i = 0; i < exclude_addrs.cladm_num_netaddrs; i++) { + if (exclude_addrs.cladm_netaddrs_array[i].\ + cl_ipversion == IPV6_VERSION) { + numequal = 0; + for (j = 0; j < 4; j++) { + ipv6_netaddr[j] = exclude_addrs.\ + cladm_netaddrs_array[i].\ + cl_ipv_un.cl_ipv6.ipv6_netaddr[j]; + + ipv6_netmask[j] = exclude_addrs.\ + cladm_netaddrs_array[i].\ + cl_ipv_un.cl_ipv6.ipv6_netmask[j]; + + addr[j] = address[j] & ipv6_netmask[j]; + if (addr[j] == ipv6_netaddr[j]) + numequal++; + } + + (void) inet_ntop(AF_INET6, ipv6_netaddr, tmatch, + INET6_ADDRSTRLEN); + (void) inet_ntop(AF_INET6, ipv6_netmask, tmask, + INET6_ADDRSTRLEN); + + if (numequal == 4) + return (1); + } + } + rcm_log_message(RCM_DEBUG, "ip_anon: no match for %s\n", + taddr); + return (0); +} + +/* + * The code below is taken from usr/src/cmd/cmd-inet/common/ifaddrlist.c + */ + +/* + * Copyright (c) 1997 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the Computer Systems + * Engineering Group at Lawrence Berkeley Laboratory. + * 4. Neither the name of the University nor of the Laboratory may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * Construct the interface list with given address family. + * If it fails, returns -1 and an error message in *errbuf; + * otherwise, returns number of interfaces, and the interface list in *ipaddrp. + */ +int +ifaddrlist(struct ifaddrlist **ipaddrp, int family, char *errbuf) +{ + int fd; + struct lifreq *lifrp, *lifend; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + struct ifaddrlist *al; + struct lifconf lifc; + struct lifreq *ibuf, lifr; + char device[LIFNAMSIZ + 1]; + struct ifaddrlist *ifaddrlist; + struct lifnum lifn; + int lifc_flags = 0; + int count; + + if (family != AF_INET && family != AF_INET6) { + (void) sprintf(errbuf, "invalid address family"); + return (-1); + } + + fd = socket(family, SOCK_DGRAM, 0); + if (fd < 0) { + (void) snprintf(errbuf, ERRBUFSIZE, "socket: %s", + strerror(errno)); + return (-1); + } + + /* determine the number of interfaces */ + lifn.lifn_family = family; + lifn.lifn_flags = lifc_flags; + if (ioctl(fd, SIOCGLIFNUM, &lifn) < 0) { + (void) snprintf(errbuf, ERRBUFSIZE, "SIOCGLIFNUM: %s", + strerror(errno)); + (void) close(fd); + return (-1); + } + + /* allocate memory for the determined number of interfaces */ + ifaddrlist = calloc((size_t)lifn.lifn_count, + (size_t)sizeof (struct ifaddrlist)); + if (ifaddrlist == NULL) { + (void) snprintf(errbuf, ERRBUFSIZE, "calloc: %s", + strerror(errno)); + (void) close(fd); + return (-1); + } + + ibuf = calloc((size_t)lifn.lifn_count, (size_t)sizeof (struct lifreq)); + if (ibuf == NULL) { + (void) snprintf(errbuf, ERRBUFSIZE, "calloc: %s", + strerror(errno)); + free(ifaddrlist); + (void) close(fd); + return (-1); + } + + /* pull out the interface list from the kernel */ + lifc.lifc_family = family; + lifc.lifc_len = (int)(lifn.lifn_count * sizeof (struct lifreq)); + lifc.lifc_buf = (caddr_t)ibuf; + lifc.lifc_flags = lifc_flags; + + if (ioctl(fd, SIOCGLIFCONF, (char *)&lifc) < 0 || + lifc.lifc_len < sizeof (struct lifreq)) { + (void) snprintf(errbuf, ERRBUFSIZE, "SIOCGLIFCONF: %s", + strerror(errno)); + free(ifaddrlist); + free(ibuf); + (void) close(fd); + return (-1); + } + + lifrp = ibuf; + /*LINTED*/ + lifend = (struct lifreq *)((char *)ibuf + lifc.lifc_len); + + al = ifaddrlist; + count = 0; + + /* let's populate the interface entries in the ifaddrlist */ + for (; lifrp < lifend; lifrp++) { + /* + * Need a template to preserve address info that is + * used below to locate the next entry. (Otherwise, + * SIOCGLIFFLAGS stomps over it because the requests + * are returned in a union.) + */ + (void) strncpy(lifr.lifr_name, lifrp->lifr_name, + sizeof (lifr.lifr_name)); + if (ioctl(fd, SIOCGLIFFLAGS, (char *)&lifr) < 0) { + if (errno == ENXIO) { + continue; + } + (void) snprintf(errbuf, ERRBUFSIZE, + "SIOCGLIFFLAGS: %.*s: %s", + (int)sizeof (lifr.lifr_name), lifr.lifr_name, + strerror(errno)); + free(ifaddrlist); + free(ibuf); + (void) close(fd); + return (-1); + } + + al->flags = lifr.lifr_flags; + + /* get the interface address */ + (void) strncpy(device, lifr.lifr_name, sizeof (device)); + device[sizeof (device) - 1] = '\0'; + if (ioctl(fd, SIOCGLIFADDR, (char *)&lifr) < 0) { + (void) snprintf(errbuf, ERRBUFSIZE, + "SIOCGLIFADDR: %s: %s", device, strerror(errno)); + free(ifaddrlist); + free(ibuf); + (void) close(fd); + return (-1); + } + + if (family == AF_INET) { + sin = (struct sockaddr_in *)&lifr.lifr_addr; + al->addr.addr = sin->sin_addr; + } else { + sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr; + al->addr.addr6 = sin6->sin6_addr; + } + + (void) strncpy(al->device, device, sizeof (device)); + + /* get the interface index */ + if (ioctl(fd, SIOCGLIFINDEX, (char *)&lifr) < 0) { + (void) snprintf(errbuf, ERRBUFSIZE, + "SIOCGLIFADDR: %s: %s", device, strerror(errno)); + free(ifaddrlist); + free(ibuf); + (void) close(fd); + return (-1); + } + + al->index = lifr.lifr_index; + + ++al; + ++count; + } + + free(ibuf); + (void) close(fd); + + *ipaddrp = ifaddrlist; + + return (count); +} diff --git a/usr/src/cmd/rcm_daemon/common/ip_rcm.c b/usr/src/cmd/rcm_daemon/common/ip_rcm.c new file mode 100644 index 0000000000..acc3e49c77 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/ip_rcm.c @@ -0,0 +1,3934 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This RCM module adds support to the RCM framework for IP managed + * interfaces. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <assert.h> +#include <string.h> +#include <synch.h> +#include <libintl.h> +#include <errno.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <arpa/inet.h> +#include <sys/dlpi.h> +#include <stropts.h> +#include <strings.h> +#include <libdevinfo.h> +#include <sys/systeminfo.h> +#include <netdb.h> +#include <libdladm.h> +#include <libdlpi.h> + +#include <ipmp_mpathd.h> +#include "rcm_module.h" + +/* + * Definitions + */ +#ifndef lint +#define _(x) gettext(x) +#else +#define _(x) x +#endif + +/* Some generic well-knowns and defaults used in this module */ +#define SLASH_DEV "/dev" /* /dev directory */ + +#define IP_DEV_NAME "/dev/ip" /* IPV4 ip device */ +#define IP6_DEV_NAME "/dev/ip6" /* IPV6 ip device */ +#define IP_MOD_NAME "ip" /* ip module */ +#define ARP_MOD_NAME "arp" /* arp module */ +#define UDP_DEV_NAME "/dev/udp" /* IPv4 udp device */ +#define UDP6_DEV_NAME "/dev/udp6" /* IPv6 udp device */ +#define IP_MAX_MODS 9 /* max modules pushed on intr */ +#define MAX_RECONFIG_SIZE 1024 /* Max. reconfig string size */ + +#define RCM_NET_PREFIX "SUNW_network" /* RCM network name prefix */ +#define RCM_NET_RESOURCE_MAX (13 + LIFNAMSIZ) /* RCM_NET_PREFIX+LIFNAMSIZ */ + +#define RCM_STR_SUNW_IP "SUNW_ip/" /* IP address export prefix */ +#define RCM_SIZE_SUNW_IP 9 /* strlen("SUNW_ip/") + 1 */ + +#define MAXINTSTR 11 /* max integer string len */ + +/* ifconfig(1M) */ +#define USR_SBIN_IFCONFIG "/usr/sbin/ifconfig" /* ifconfig command */ +#define CFGFILE_FMT_IPV4 "/etc/hostname." /* IPV4 config file */ +#define CFGFILE_FMT_IPV6 "/etc/hostname6." /* IPV6 config file */ +#define CFG_CMDS_STD " netmask + broadcast + up" /* Normal config string */ +#define CONFIG_AF_INET 0x1 /* Post-configure IPv4 */ +#define CONFIG_AF_INET6 0x2 /* Post-configure IPv6 */ +#define MAXLINE 1024 /* Max. line length */ +#define MAXARGS 512 /* Max. args in ifconfig cmd */ + +/* Physical interface flags mask */ +#define RCM_PIF_FLAGS (IFF_OFFLINE | IFF_INACTIVE | IFF_FAILED | \ + IFF_STANDBY) + +/* Some useful macros */ +#ifndef MAX +#define MAX(a, b) (((a) > (b))?(a):(b)) +#endif /* MAX */ + +#ifndef ISSPACE +#define ISSPACE(c) ((c) == ' ' || (c) == '\t') +#endif + +#ifndef ISEOL +#define ISEOL(c) ((c) == '\n' || (c) == '\r' || (c) == '\0') +#endif + +#ifndef STREQ +#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0) +#endif + +#ifndef ADDSPACE +#define ADDSPACE(a) ((void) strcat((a), " ")) +#endif + +/* Interface Cache state flags */ +#define CACHE_IF_STALE 0x1 /* stale cached data */ +#define CACHE_IF_NEW 0x2 /* new cached interface */ +#define CACHE_IF_OFFLINED 0x4 /* interface offlined */ +#define CACHE_IF_UPDATED 0x8 /* interface props. updated */ +#define CACHE_IF_IGNORE 0x10 /* state held elsewhere */ + +/* Network Cache lookup options */ +#define CACHE_NO_REFRESH 0x1 /* cache refresh not needed */ +#define CACHE_REFRESH 0x2 /* refresh cache */ + +/* RCM IPMP Module specific property definitions */ +#define RCM_IPMP_MIN_REDUNDANCY 1 /* default min. redundancy */ + +/* in.mpathd(1M) specifics */ +#define MPATHD_MAX_RETRIES 5 /* Max. offline retries */ +#define MPATHD_OK 0 /* failback success from mpathd */ +#define MPATHD_FAILBACK_TIME 180 /* Time in secs. for mpathd failback */ + +/* Stream module operations */ +#define MOD_INSERT 0 /* Insert a mid-stream module */ +#define MOD_REMOVE 1 /* Remove a mid-stream module */ +#define MOD_CHECK 2 /* Check mid-stream module safety */ + +/* VLAN format support */ +#define VLAN_MAX_PPA_ALLOWED 1000 +#define VLAN_GET_VID(ppa) (ppa / VLAN_MAX_PPA_ALLOWED) +#define VLAN_GET_PPA(ppa) (ppa % VLAN_MAX_PPA_ALLOWED) + +/* devfsadm attach nvpair values */ +#define PROP_NV_DDI_NETWORK "ddi_network" + +/* + * in.mpathd(1M) message passing formats + */ +typedef struct mpathd_cmd { + uint32_t cmd_command; /* message command */ + char cmd_ifname[LIFNAMSIZ]; /* this interface name */ + char cmd_movetoif[LIFNAMSIZ]; /* move to interface */ + uint32_t cmd_min_red; /* min. redundancy */ +/* Message passing values for MI_SETOINDEX */ +#define from_lifname cmd_ifname /* current logical interface */ +#define to_pifname cmd_movetoif /* new physical interface */ +#define addr_family cmd_min_red /* address family */ +} mpathd_cmd_t; + +/* This is needed since mpathd checks message size for offline */ +typedef struct mpathd_unoffline { + uint32_t cmd_command; /* offline / undo offline */ + char cmd_ifname[LIFNAMSIZ]; /* this interface name */ +} mpathd_unoffline_t; + +typedef struct mpathd_response { + uint32_t resp_sys_errno; /* system errno */ + uint32_t resp_mpathd_err; /* mpathd error information */ +} mpathd_response_t; + +/* + * IP module data types + */ + +/* Physical interface representation */ +typedef struct ip_pif { + char pi_ifname[LIFNAMSIZ+1]; /* interface name */ + char pi_ifindex; /* Interface index */ + char pi_grpname[LIFNAMSIZ+1]; /* IPMP group name */ + int pi_style; /* DLPI provider style */ + int pi_ppa; /* Phys. point of attachment */ + struct ip_lif *pi_lifs; /* ptr to logical interfaces */ +} ip_pif_t; + +/* Logical interface representation */ +typedef struct ip_lif +{ + struct ip_lif *li_next; /* ptr to next lif */ + struct ip_lif *li_prev; /* previous next ptr */ + ip_pif_t *li_pif; /* back ptr to phy int */ + ushort_t li_ifnum; /* interface number */ + union { + sa_family_t family; + struct sockaddr_storage storage; + struct sockaddr_in ip4; /* IPv4 */ + struct sockaddr_in6 ip6; /* IPv6 */ + } li_addr; + uint64_t li_ifflags; /* current IFF_* flags */ + uint64_t li_oldflags; /* flags prior to offline */ + int li_modcnt; /* # of modules */ + char *li_modules[IP_MAX_MODS]; /* module list pushed */ + char *li_reconfig; /* Reconfiguration string */ + int32_t li_cachestate; /* cache state flags */ +} ip_lif_t; + +/* Cache element */ +typedef struct ip_cache +{ + struct ip_cache *ip_next; /* next cached resource */ + struct ip_cache *ip_prev; /* prev cached resource */ + char *ip_resource; /* resource name */ + ip_pif_t *ip_pif; /* ptr to phy int */ + int32_t ip_ifred; /* min. redundancy */ + int ip_cachestate; /* cache state flags */ +} ip_cache_t; + +/* + * Global cache for network interfaces + */ +static ip_cache_t cache_head; +static ip_cache_t cache_tail; +static mutex_t cache_lock; +static int events_registered = 0; + +/* + * Global NIC list to be configured after DR-attach + */ +#define NIL_NULL ((struct ni_list *)0) + +struct net_interface { + char *type; /* Name of type of interface (le, ie, etc.) */ + char *name; /* Qualified name of interface (le0, ie0, etc.) */ +}; + +struct ni_list { + struct net_interface *nifp; + struct ni_list *next; +}; + +static mutex_t nil_lock; /* NIC list lock */ +static int num_ni = 0; /* Global new interface count */ +static struct ni_list *nil_head = NIL_NULL; /* Global new if list */ + +struct devfs_minor_data { + int32_t minor_type; + char *minor_name; + char *minor_node_type; +}; + +/* + * RCM module interface prototypes + */ +static int ip_register(rcm_handle_t *); +static int ip_unregister(rcm_handle_t *); +static int ip_get_info(rcm_handle_t *, char *, id_t, uint_t, + char **, char **, nvlist_t *, rcm_info_t **); +static int ip_suspend(rcm_handle_t *, char *, id_t, + timespec_t *, uint_t, char **, rcm_info_t **); +static int ip_resume(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int ip_offline(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int ip_undo_offline(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int ip_remove(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int ip_notify_event(rcm_handle_t *, char *, id_t, uint_t, + char **, nvlist_t *, rcm_info_t **); + +/* Module private routines */ +static void free_cache(); +static int update_cache(rcm_handle_t *); +static void cache_remove(ip_cache_t *); +static ip_cache_t *cache_lookup(rcm_handle_t *, char *, char); +static void free_node(ip_cache_t *); +static void cache_insert(ip_cache_t *); +static char *ip_usage(ip_cache_t *); +static int update_pif(rcm_handle_t *, int, int, struct lifreq *); +static int ip_ipmp_offline(ip_cache_t *, ip_cache_t *); +static int ip_ipmp_undo_offline(ip_cache_t *); +static int if_cfginfo(ip_cache_t *, uint_t); +static int if_unplumb(ip_cache_t *); +static int if_replumb(ip_cache_t *); +static void ip_log_err(ip_cache_t *, char **, char *); +static int getdlpi_style(char *); +static char *get_physical_resource(char *); +static int get_ppa(char *); +static void clr_cfg_state(ip_pif_t *); +/*LINTED*/ +static int if_change_flags(ip_pif_t *, uint64_t, boolean_t); +static uint64_t if_get_flags(ip_pif_t *); +static int mpathd_send_cmd(mpathd_cmd_t *); +static int connect_to_mpathd(int); +/*LINTED*/ +static int get_lun(char *); +/*LINTED*/ +static void dump_node(ip_cache_t *); +static int modop(char *, char *, int, char); +static int get_modlist(char *, ip_lif_t *); +static int ip_domux2fd(int *, int *, struct lifreq *); +static int ip_plink(int, int, struct lifreq *); +static int ip_onlinelist(rcm_handle_t *, ip_cache_t *, char **, uint_t, + rcm_info_t **); +static int ip_offlinelist(rcm_handle_t *, ip_cache_t *, char **, uint_t, + rcm_info_t **); +static char **ip_get_addrlist(ip_cache_t *); +static void ip_free_addrlist(char **); +static void ip_consumer_notify(rcm_handle_t *, char *, char **, uint_t, + rcm_info_t **); + +static int process_nvlist(nvlist_t *); +static void process_minor(char *, char *, int32_t, struct devfs_minor_data *); +static int if_configure(char *); +static int isgrouped(char *); +static int if_ipmp_config(char *, int, int); +static int if_mpathd_configure(char *, char *, int, int); +static char *get_mpathd_dest(char *, int); +static int if_getcount(int); +static void tokenize(char *, char **, char *, int *); + + +/* Module-Private data */ +static struct rcm_mod_ops ip_ops = +{ + RCM_MOD_OPS_VERSION, + ip_register, + ip_unregister, + ip_get_info, + ip_suspend, + ip_resume, + ip_offline, + ip_undo_offline, + ip_remove, + NULL, + NULL, + ip_notify_event +}; + +/* + * rcm_mod_init() - Update registrations, and return the ops structure. + */ +struct rcm_mod_ops * +rcm_mod_init(void) +{ + rcm_log_message(RCM_TRACE1, "IP: mod_init\n"); + + cache_head.ip_next = &cache_tail; + cache_head.ip_prev = NULL; + cache_tail.ip_prev = &cache_head; + cache_tail.ip_next = NULL; + (void) mutex_init(&cache_lock, NULL, NULL); + (void) mutex_init(&nil_lock, NULL, NULL); + + /* Return the ops vectors */ + return (&ip_ops); +} + +/* + * rcm_mod_info() - Return a string describing this module. + */ +const char * +rcm_mod_info(void) +{ + rcm_log_message(RCM_TRACE1, "IP: mod_info\n"); + + return ("IP Multipathing module version %I%"); +} + +/* + * rcm_mod_fini() - Destroy the network interfaces cache. + */ +int +rcm_mod_fini(void) +{ + rcm_log_message(RCM_TRACE1, "IP: mod_fini\n"); + + free_cache(); + (void) mutex_destroy(&nil_lock); + (void) mutex_destroy(&cache_lock); + return (RCM_SUCCESS); +} + +/* + * ip_register() - Make sure the cache is properly sync'ed, and its + * registrations are in order. + */ +static int +ip_register(rcm_handle_t *hd) +{ + rcm_log_message(RCM_TRACE1, "IP: register\n"); + + /* Guard against bad arguments */ + assert(hd != NULL); + + if (update_cache(hd) < 0) + return (RCM_FAILURE); + + /* + * Need to register interest in all new resources + * getting attached, so we get attach event notifications + */ + if (!events_registered) { + if (rcm_register_event(hd, RCM_RESOURCE_NETWORK_NEW, 0, NULL) + != RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + _("IP: failed to register %s\n"), + RCM_RESOURCE_NETWORK_NEW); + return (RCM_FAILURE); + } else { + rcm_log_message(RCM_DEBUG, "IP: registered %s\n", + RCM_RESOURCE_NETWORK_NEW); + events_registered++; + } + } + + return (RCM_SUCCESS); +} + +/* + * ip_unregister() - Walk the cache, unregistering all the networks. + */ +static int +ip_unregister(rcm_handle_t *hd) +{ + ip_cache_t *probe; + + rcm_log_message(RCM_TRACE1, "IP: unregister\n"); + + /* Guard against bad arguments */ + assert(hd != NULL); + + /* Walk the cache, unregistering everything */ + (void) mutex_lock(&cache_lock); + probe = cache_head.ip_next; + while (probe != &cache_tail) { + if (rcm_unregister_interest(hd, probe->ip_resource, 0) + != RCM_SUCCESS) { + /* unregister failed for whatever reason */ + (void) mutex_unlock(&cache_lock); + return (RCM_FAILURE); + } + cache_remove(probe); + free_node(probe); + probe = cache_head.ip_next; + } + (void) mutex_unlock(&cache_lock); + + /* + * Need to unregister interest in all new resources + */ + if (events_registered) { + if (rcm_unregister_event(hd, RCM_RESOURCE_NETWORK_NEW, 0) + != RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + _("IP: failed to unregister %s\n"), + RCM_RESOURCE_NETWORK_NEW); + return (RCM_FAILURE); + } else { + rcm_log_message(RCM_DEBUG, "IP: unregistered %s\n", + RCM_RESOURCE_NETWORK_NEW); + events_registered--; + } + } + + return (RCM_SUCCESS); +} + +/* + * ip_offline() - Offline an interface. + */ +static int +ip_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **errorp, rcm_info_t **depend_info) +{ + char *nic; + ip_cache_t *node; + ip_pif_t *pif; + int detachable = 0; + int nofailover = 0; + int ipmp = 0; + + rcm_log_message(RCM_TRACE1, "IP: offline(%s)\n", rsrc); + + /* Guard against bad arguments */ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(errorp != NULL); + assert(depend_info != NULL); + + nic = strrchr(rsrc, '/'); + nic = nic ? nic + 1 : rsrc; + + /* Lock the cache and lookup the resource */ + (void) mutex_lock(&cache_lock); + node = cache_lookup(hd, rsrc, CACHE_REFRESH); + if (node == NULL) { + ip_log_err(node, errorp, "Unrecognized resource"); + errno = ENOENT; + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + pif = node->ip_pif; + + /* Establish default detachability criteria */ + if (flags & RCM_FORCE) { + detachable++; + } + + /* Check if the interface is an IPMP grouped interface */ + if (strcmp(pif->pi_grpname, "")) { + ipmp++; + } + + if (if_get_flags(pif) & IFF_NOFAILOVER) { + nofailover++; + } + + /* + * Even if the interface is not in an IPMP group, it's possible that + * it's still okay to offline it as long as there are higher-level + * failover mechanisms for the addresses it owns (e.g., clustering). + * In this case, ip_offlinelist() will return RCM_SUCCESS, and we + * charge on. + */ + if (!ipmp && !detachable) { + /* Inform consumers of IP addresses being offlined */ + if (ip_offlinelist(hd, node, errorp, flags, depend_info) == + RCM_SUCCESS) { + rcm_log_message(RCM_DEBUG, + "IP: consumers agree on detach"); + } else { + ip_log_err(node, errorp, + "Device consumers prohibit offline"); + (void) mutex_unlock(&cache_lock); + return (RCM_FAILURE); + } + } + + /* + * Cannot remove an IPMP interface if IFF_NOFAILOVER is set. + */ + if (ipmp && nofailover) { + /* Interface is part of an IPMP group, and cannot failover */ + ip_log_err(node, errorp, "Failover disabled"); + errno = EBUSY; + (void) mutex_unlock(&cache_lock); + return (RCM_FAILURE); + } + + /* Check if it's a query */ + if (flags & RCM_QUERY) { + rcm_log_message(RCM_TRACE1, "IP: offline query success(%s)\n", + rsrc); + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + /* Check detachability, save configuration if detachable */ + if (if_cfginfo(node, (flags & RCM_FORCE)) < 0) { + node->ip_cachestate |= CACHE_IF_IGNORE; + rcm_log_message(RCM_TRACE1, "IP: Ignoring node(%s)\n", rsrc); + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + /* standalone detachable device */ + if (!ipmp) { + if (if_unplumb(node) < 0) { + ip_log_err(node, errorp, + "Failed to unplumb the device"); + + errno = EIO; + (void) mutex_unlock(&cache_lock); + return (RCM_FAILURE); + } + + node->ip_cachestate |= CACHE_IF_OFFLINED; + rcm_log_message(RCM_TRACE1, "IP: Offline success(%s)\n", rsrc); + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + /* + * This an IPMP interface that can be failed over. + * Request in.mpathd(1M) to failover the physical interface. + */ + + /* Failover to "any", let mpathd determine best failover candidate */ + if (ip_ipmp_offline(node, NULL) < 0) { + ip_log_err(node, errorp, "in.mpathd failover failed"); + /* + * Odds are that in.mpathd(1M) could not offline the device + * because it was the last interface in the group. However, + * it's possible that it's still okay to offline it as long as + * there are higher-level failover mechanisms for the + * addresses it owns (e.g., clustering). In this case, + * ip_offlinelist() will return RCM_SUCCESS, and we charge on. + * + * TODO: change ip_ipmp_offline() to return the actual failure + * from in.mpathd so that we can verify that it did indeed + * fail with IPMP_EMINRED. + */ + if (!detachable) { + /* Inform consumers of IP addresses being offlined */ + if (ip_offlinelist(hd, node, errorp, flags, + depend_info) == RCM_SUCCESS) { + rcm_log_message(RCM_DEBUG, + "IP: consumers agree on detach"); + } else { + ip_log_err(node, errorp, + "Device consumers prohibit offline"); + (void) mutex_unlock(&cache_lock); + errno = EBUSY; + return (RCM_FAILURE); + } + } + } + + if (if_unplumb(node) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: Unplumb failed (%s)\n"), + pif->pi_ifname); + + /* Request mpathd to undo the offline */ + if (ip_ipmp_undo_offline(node) < 0) { + ip_log_err(node, errorp, "Undo offline failed"); + (void) mutex_unlock(&cache_lock); + return (RCM_FAILURE); + } + (void) mutex_unlock(&cache_lock); + return (RCM_FAILURE); + } + + node->ip_cachestate |= CACHE_IF_OFFLINED; + rcm_log_message(RCM_TRACE1, "IP: offline success(%s)\n", rsrc); + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); +} + +/* + * ip_undo_offline() - Undo offline of a previously offlined device. + */ +/*ARGSUSED*/ +static int +ip_undo_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **errorp, rcm_info_t **depend_info) +{ + ip_cache_t *node; + char *nic; + + rcm_log_message(RCM_TRACE1, "IP: online(%s)\n", rsrc); + + /* Guard against bad arguments */ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(errorp != NULL); + assert(depend_info != NULL); + + nic = strrchr(rsrc, '/'); + nic = nic ? nic + 1 : rsrc; + + (void) mutex_lock(&cache_lock); + node = cache_lookup(hd, rsrc, CACHE_NO_REFRESH); + + if (node == NULL) { + ip_log_err(node, errorp, "No such device"); + (void) mutex_unlock(&cache_lock); + errno = ENOENT; + return (RCM_FAILURE); + } + + /* Check if no attempt should be made to online the device here */ + if (node->ip_cachestate & CACHE_IF_IGNORE) { + node->ip_cachestate &= ~(CACHE_IF_IGNORE); + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + /* Check if the interface was previously offlined */ + if (!(node->ip_cachestate & CACHE_IF_OFFLINED)) { + ip_log_err(node, errorp, "Device not offlined"); + (void) mutex_unlock(&cache_lock); + errno = ENOTSUP; + return (RCM_FAILURE); + } + + if (if_replumb(node) == -1) { + /* re-plumb failed */ + ip_log_err(node, errorp, "Replumb failed"); + (void) mutex_unlock(&cache_lock); + errno = EIO; + return (RCM_FAILURE); + + } + + /* Inform consumers about IP addresses being un-offlined */ + (void) ip_onlinelist(hd, node, errorp, flags, depend_info); + + node->ip_cachestate &= ~(CACHE_IF_OFFLINED); + rcm_log_message(RCM_TRACE1, "IP: online success(%s)\n", rsrc); + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); +} + +/* + * ip_get_info() - Gather usage information for this resource. + */ +/*ARGSUSED*/ +int +ip_get_info(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **usagep, char **errorp, nvlist_t *props, rcm_info_t **depend_info) +{ + ip_cache_t *node; + char *infostr; + + /* Guard against bad arguments */ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(usagep != NULL); + assert(errorp != NULL); + assert(depend_info != NULL); + + rcm_log_message(RCM_TRACE1, "IP: get_info(%s)\n", rsrc); + + (void) mutex_lock(&cache_lock); + node = cache_lookup(hd, rsrc, CACHE_REFRESH); + if (!node) { + rcm_log_message(RCM_INFO, + _("IP: get_info(%s) unrecognized resource\n"), rsrc); + (void) mutex_unlock(&cache_lock); + errno = ENOENT; + return (RCM_FAILURE); + } + + infostr = ip_usage(node); + + if (infostr == NULL) { + /* most likely malloc failure */ + rcm_log_message(RCM_ERROR, + _("IP: get_info(%s) malloc failure\n"), rsrc); + (void) mutex_unlock(&cache_lock); + errno = ENOMEM; + *errorp = NULL; + return (RCM_FAILURE); + } + + /* Set client/role properties */ + (void) nvlist_add_string(props, RCM_CLIENT_NAME, "IP"); + + /* Set usage property, infostr will be freed by caller */ + *usagep = infostr; + + rcm_log_message(RCM_TRACE1, "IP: get_info(%s) info = %s \n", + rsrc, infostr); + + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); +} + +/* + * ip_suspend() - Nothing to do, always okay + */ +/*ARGSUSED*/ +static int +ip_suspend(rcm_handle_t *hd, char *rsrc, id_t id, timespec_t *interval, + uint_t flags, char **errorp, rcm_info_t **depend_info) +{ + /* Guard against bad arguments */ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(interval != NULL); + assert(errorp != NULL); + assert(depend_info != NULL); + + rcm_log_message(RCM_TRACE1, "IP: suspend(%s)\n", rsrc); + return (RCM_SUCCESS); +} + +/* + * ip_resume() - Nothing to do, always okay + */ +/*ARGSUSED*/ +static int +ip_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **errorp, rcm_info_t ** depend_info) +{ + /* Guard against bad arguments */ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(errorp != NULL); + assert(depend_info != NULL); + + rcm_log_message(RCM_TRACE1, "IP: resume(%s)\n", rsrc); + + return (RCM_SUCCESS); +} + +/* + * ip_remove() - remove a resource from cache + */ +/*ARGSUSED*/ +static int +ip_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **errorp, rcm_info_t **depend_info) +{ + ip_cache_t *node; + + /* Guard against bad arguments */ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(errorp != NULL); + assert(depend_info != NULL); + + rcm_log_message(RCM_TRACE1, "IP: remove(%s)\n", rsrc); + + (void) mutex_lock(&cache_lock); + node = cache_lookup(hd, rsrc, CACHE_NO_REFRESH); + if (!node) { + rcm_log_message(RCM_INFO, + _("IP: remove(%s) unrecognized resource\n"), rsrc); + (void) mutex_unlock(&cache_lock); + errno = ENOENT; + return (RCM_FAILURE); + } + + /* remove the cached entry for the resource */ + cache_remove(node); + + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); +} + +/* + * ip_notify_event - Project private implementation to receive new resource + * events. It intercepts all new resource events. If the + * new resource is a network resource, pass up a notify + * for it too. The new resource need not be cached, since + * it is done at register again. + */ +/*ARGSUSED*/ +static int +ip_notify_event(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **errorp, nvlist_t *nvl, rcm_info_t **depend_info) +{ + struct ni_list *nilp, *onilp; + struct net_interface *nip; + int n; + + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(nvl != NULL); + + rcm_log_message(RCM_TRACE1, "IP: notify_event(%s)\n", rsrc); + + if (!STREQ(rsrc, RCM_RESOURCE_NETWORK_NEW)) { + rcm_log_message(RCM_INFO, + _("IP: unrecognized event for %s\n"), rsrc); + ip_log_err(NULL, errorp, "unrecognized event"); + errno = EINVAL; + return (RCM_FAILURE); + } + + /* Update cache to reflect latest interfaces */ + if (update_cache(hd) < 0) { + rcm_log_message(RCM_ERROR, _("IP: update_cache failed\n")); + ip_log_err(NULL, errorp, "Private Cache update failed"); + return (RCM_FAILURE); + } + + /* Process the nvlist for the event */ + if (process_nvlist(nvl) != 0) { + rcm_log_message(RCM_WARNING, + _("IP: Error processing resource attributes(%s)\n"), rsrc); + rcm_log_message(RCM_WARNING, + _("IP: One or more devices may not be configured.\n")); + ip_log_err(NULL, errorp, "Error processing device properties"); + /* Continue processing interfaces that were valid */ + } + + (void) mutex_lock(&nil_lock); + + /* Configure all new interfaces found */ + for (nilp = nil_head, n = 0; n < num_ni; nilp = nilp->next, n++) { + nip = nilp->nifp; + if (if_configure(nip->name) != 0) { + rcm_log_message(RCM_ERROR, + _("IP: Configuration failed (%s)\n"), nip->name); + ip_log_err(NULL, errorp, + "Failed configuring one or more IP addresses"); + /* continue configuring rest of the interfaces */ + } + } + + /* Notify all IP address consumers and clean up interface list */ + for (nilp = nil_head; nilp; ) { + nip = nilp->nifp; + if (nip != (struct net_interface *)0) { + if (nip->name != 0) { + ip_consumer_notify(hd, nip->name, errorp, flags, + depend_info); + free(nip->name); + } + if (nip->type != 0) + free(nip->type); + free((char *)nip); + } + + onilp = nilp; + nilp = nilp->next; + free((char *)onilp); + } + + num_ni = 0; /* reset new if count */ + nil_head = NIL_NULL; /* reset list head */ + + (void) mutex_unlock(&nil_lock); + + rcm_log_message(RCM_TRACE1, + "IP: notify_event: device configuration complete\n"); + + return (RCM_SUCCESS); +} + +/* + * ip_usage - Determine the usage of a device. Call with cache_lock held. + * The returned buffer is owned by caller, and the caller + * must free it up when done. + */ +static char * +ip_usage(ip_cache_t *node) +{ + ip_lif_t *lif; + int numifs; + char *buf; + char *nic; + const char *fmt; + char *sep; + char addrstr[INET6_ADDRSTRLEN]; + int offline = 0; + size_t bufsz; + + rcm_log_message(RCM_TRACE2, "IP: usage(%s)\n", node->ip_resource); + + nic = strchr(node->ip_resource, '/'); + nic = nic ? nic + 1 : node->ip_resource; + + /* TRANSLATION_NOTE: separator used between IP addresses */ + sep = _(", "); + + numifs = 0; + for (lif = node->ip_pif->pi_lifs; lif != NULL; lif = lif->li_next) { + if (lif->li_ifflags & IFF_UP) { + numifs++; + } + } + + if (node->ip_cachestate & CACHE_IF_OFFLINED) { + offline++; + } + + if (!offline && numifs) { + fmt = _("%1$s hosts IP addresses: "); + } else if (offline) { + fmt = _("%1$s offlined"); + } else { + fmt = _("%1$s plumbed but down"); + } + + /* space for addresses and separators, plus message */ + bufsz = ((numifs * (INET6_ADDRSTRLEN + strlen(sep))) + + strlen(fmt) + strlen(nic) + 1); + if ((buf = malloc(bufsz)) == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: usage(%s) malloc failure(%s)\n"), + node->ip_resource, strerror(errno)); + return (NULL); + } + bzero(buf, bufsz); + (void) sprintf(buf, fmt, nic); + + if (offline || (numifs == 0)) { /* Nothing else to do */ + rcm_log_message(RCM_TRACE2, "IP: usage (%s) info = %s\n", + node->ip_resource, buf); + + return (buf); + } + + for (lif = node->ip_pif->pi_lifs; lif != NULL; lif = lif->li_next) { + + void *addr; + int af; + + if (!(lif->li_ifflags & IFF_UP)) { + /* ignore interfaces not up */ + continue; + } + af = lif->li_addr.family; + if (af == AF_INET6) { + addr = &lif->li_addr.ip6.sin6_addr; + } else if (af == AF_INET) { + addr = &lif->li_addr.ip4.sin_addr; + } else { + rcm_log_message(RCM_DEBUG, + "IP: unknown addr family %d, assuming AF_INET\n", + af); + af = AF_INET; + addr = &lif->li_addr.ip4.sin_addr; + } + if (inet_ntop(af, addr, addrstr, INET6_ADDRSTRLEN) == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: inet_ntop: %s\n"), strerror(errno)); + continue; + } + rcm_log_message(RCM_DEBUG, "IP addr := %s\n", addrstr); + + (void) strcat(buf, addrstr); + numifs--; + if (numifs > 0) { + (void) strcat(buf, ", "); + } + } + + rcm_log_message(RCM_TRACE2, "IP: usage (%s) info = %s\n", + node->ip_resource, buf); + + return (buf); +} + +/* + * Cache management routines, all cache management functions should be + * be called with cache_lock held. + */ + +/* + * cache_lookup() - Get a cache node for a resource. Supports VLAN interfaces. + * Call with cache lock held. + * + * This ensures that the cache is consistent with the system state and + * returns a pointer to the cache element corresponding to the resource. + */ +static ip_cache_t * +cache_lookup(rcm_handle_t *hd, char *rsrc, char options) +{ + ip_cache_t *probe; + char *resource; /* physical resource */ + + rcm_log_message(RCM_TRACE2, "IP: cache lookup(%s)\n", rsrc); + + if ((options & CACHE_REFRESH) && (hd != NULL)) { + /* drop lock since update locks cache again */ + (void) mutex_unlock(&cache_lock); + (void) update_cache(hd); + (void) mutex_lock(&cache_lock); + } + + if ((resource = get_physical_resource(rsrc)) == NULL) { + errno = ENOENT; + return (NULL); + } + + probe = cache_head.ip_next; + while (probe != &cache_tail) { + if (probe->ip_resource && + STREQ(resource, probe->ip_resource)) { + rcm_log_message(RCM_TRACE2, + "IP: cache lookup success(%s)\n", rsrc); + free(resource); + return (probe); + } + probe = probe->ip_next; + } + free(resource); + return (NULL); +} + +/* + * free_node - Free a node from the cache + * Call with cache_lock held. + */ +static void +free_node(ip_cache_t *node) +{ + ip_pif_t *pif; + ip_lif_t *lif, *tmplif; + + if (node) { + if (node->ip_resource) { + free(node->ip_resource); + } + + /* free the pif */ + pif = node->ip_pif; + if (pif) { + /* free logical interfaces */ + lif = pif->pi_lifs; + while (lif) { + tmplif = lif->li_next; + free(lif); + lif = tmplif; + } + free(pif); + } + free(node); + } +} + +/* + * cache_insert - Insert a resource node in cache + * Call with the cache_lock held. + */ +static void +cache_insert(ip_cache_t *node) +{ + /* insert at the head for best performance */ + node->ip_next = cache_head.ip_next; + node->ip_prev = &cache_head; + + node->ip_next->ip_prev = node; + node->ip_prev->ip_next = node; +} + +/* + * cache_remove() - Remove a resource node from cache. + * Call with the cache_lock held. + */ +static void +cache_remove(ip_cache_t *node) +{ + node->ip_next->ip_prev = node->ip_prev; + node->ip_prev->ip_next = node->ip_next; + node->ip_next = NULL; + node->ip_prev = NULL; +} + +/* + * update_pif() - Update physical interface properties + * Call with cache_lock held + */ +/*ARGSUSED*/ +static int +update_pif(rcm_handle_t *hd, int af, int sock, struct lifreq *lifr) +{ + char ifname[RCM_NET_RESOURCE_MAX]; + ushort_t ifnumber = 0; + int ppa; + char *cp; + ip_cache_t *probe; + ip_pif_t pif; + ip_pif_t *probepif; + ip_lif_t *probelif; + struct lifreq lifreq; + struct sockaddr_storage ifaddr; + uint64_t ifflags; + int lif_listed = 0; + + rcm_log_message(RCM_TRACE1, "IP: update_pif(%s)\n", lifr->lifr_name); + + /* Determine the interface name and lun number */ + (void) memcpy(&ifname, lifr->lifr_name, sizeof (ifname)); + ifname[sizeof (ifname) - 1] = '\0'; + + /* remove LIF component */ + cp = strchr(ifname, ':'); + if (cp) { + *cp = 0; + cp++; + ifnumber = atoi(cp); + } + + (void) memcpy(&pif.pi_ifname, &ifname, sizeof (pif.pi_ifname)); + pif.pi_ifname[sizeof (pif.pi_ifname) - 1] = '\0'; + + /* Determine DLPI style */ + if (getdlpi_style(ifname) == DL_STYLE1) { + pif.pi_ppa = 0; + pif.pi_style = DL_STYLE1; + rcm_log_message(RCM_DEBUG, "IP: DLPI style1 (%s)\n", ifname); + } else { /* DLPI style 2 */ + /* Determine the ppa */ + if ((ppa = get_ppa(ifname)) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: get_ppa(%s): %s\n"), + ifname, strerror(ENXIO)); + return (-1); + } + + pif.pi_style = DL_STYLE2; + pif.pi_ppa = ppa; + rcm_log_message(RCM_DEBUG, "IP: DLPI style2 (%s)\n", ifname); + } + + /* Get the interface flags */ + (void) strcpy(lifreq.lifr_name, lifr->lifr_name); + if (ioctl(sock, SIOCGLIFFLAGS, (char *)&lifreq) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: SIOCGLIFFLAGS(%s): %s\n"), + ifname, strerror(errno)); + return (-1); + } + (void) memcpy(&ifflags, &lifreq.lifr_flags, sizeof (ifflags)); + + /* Ignore loopback and multipoint interfaces */ + if (!(ifflags & IFF_MULTICAST) || + (ifflags & IFF_LOOPBACK)) { + rcm_log_message(RCM_TRACE3, "IP: if ignored (%s)\n", ifname); + return (0); + } + + /* Get the interface group name for this interface */ + if (ioctl(sock, SIOCGLIFGROUPNAME, (char *)&lifreq) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: SIOCGLIFGROUPNAME(%s): %s\n"), + lifreq.lifr_name, strerror(errno)); + return (-1); + } + + /* copy the group name */ + (void) memcpy(&pif.pi_grpname, &lifreq.lifr_groupname, + sizeof (pif.pi_grpname)); + pif.pi_grpname[sizeof (pif.pi_grpname) - 1] = '\0'; + + /* Get the interface address for this interface */ + if (ioctl(sock, SIOCGLIFADDR, (char *)&lifreq) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: SIOCGLIFADDR(%s): %s\n"), + lifreq.lifr_name, strerror(errno)); + return (-1); + } + (void) memcpy(&ifaddr, &lifreq.lifr_addr, sizeof (ifaddr)); + + /* Get the interface index */ + if (ioctl(sock, SIOCGLIFINDEX, (char *)&lifreq) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: SIOCGLIFINDEX(%s): %s\n"), + lifreq.lifr_name, strerror(errno)); + return (-1); + } + pif.pi_ifindex = lifreq.lifr_index; + + /* Search for the interface in our cache */ + (void) snprintf(ifname, sizeof (ifname), "%s/%s", RCM_NET_PREFIX, + pif.pi_ifname); + + probe = cache_lookup(hd, ifname, CACHE_NO_REFRESH); + if (probe != NULL) { + probe->ip_cachestate &= ~(CACHE_IF_STALE); + } else { + if ((probe = calloc(1, sizeof (ip_cache_t))) == NULL) { + /* malloc errors are bad */ + rcm_log_message(RCM_ERROR, _("IP: calloc: %s\n"), + strerror(errno)); + return (-1); + } + + probe->ip_resource = get_physical_resource(ifname); + if (!probe->ip_resource) { + rcm_log_message(RCM_ERROR, _("IP: strdup: %s\n"), + strerror(errno)); + free(probe); + return (-1); + } + + probe->ip_pif = NULL; + probe->ip_ifred = RCM_IPMP_MIN_REDUNDANCY; + probe->ip_cachestate |= CACHE_IF_NEW; + + cache_insert(probe); + } + + probepif = probe->ip_pif; + if (probepif != NULL) { + /* Check if lifs need to be updated */ + probelif = probepif->pi_lifs; + while (probelif != NULL) { + if ((probelif->li_ifnum == ifnumber) && + (probelif->li_addr.family == ifaddr.ss_family)) { + + rcm_log_message(RCM_TRACE2, + "IP: refreshing lifs for %s, ifnum=%d\n", + pif.pi_ifname, probelif->li_ifnum); + + /* refresh lif properties */ + (void) memcpy(&probelif->li_addr, &ifaddr, + sizeof (probelif->li_addr)); + + probelif->li_ifflags = ifflags; + + lif_listed++; + probe->ip_cachestate |= CACHE_IF_UPDATED; + probelif->li_cachestate &= ~(CACHE_IF_STALE); + break; + } + probelif = probelif->li_next; + } + } + + if (probepif == NULL) { + if ((probepif = calloc(1, sizeof (ip_pif_t))) == NULL) { + rcm_log_message(RCM_ERROR, _("IP: malloc: %s\n"), + strerror(errno)); + if (probe->ip_pif == NULL) { + /* we created it, so clean it up */ + free(probe); + } + return (-1); + } + + probe->ip_pif = probepif; + + probe->ip_cachestate |= CACHE_IF_UPDATED; + + /* Save interface name */ + (void) memcpy(&probepif->pi_ifname, &pif.pi_ifname, + sizeof (pif.pi_ifname)); + } + + /* save pif properties */ + probepif->pi_ifindex = pif.pi_ifindex; + (void) memcpy(&probepif->pi_grpname, &pif.pi_grpname, + sizeof (pif.pi_grpname)); + probepif->pi_style = pif.pi_style; + probepif->pi_ppa = pif.pi_ppa; + + /* add lif, if this is a lif and it is not in cache */ + if (!lif_listed) { + rcm_log_message(RCM_TRACE2, "IP: adding lifs to %s\n", + pif.pi_ifname); + + if ((probelif = calloc(1, sizeof (ip_lif_t))) == NULL) { + rcm_log_message(RCM_ERROR, _("IP: malloc: %s\n"), + strerror(errno)); + return (-1); + } + + /* save lif properties */ + (void) memcpy(&probelif->li_addr, &ifaddr, + sizeof (probelif->li_addr)); + + probelif->li_ifnum = ifnumber; + probelif->li_ifflags = ifflags; + + /* insert us at the head of the lif list */ + probelif->li_next = probepif->pi_lifs; + if (probelif->li_next != NULL) { + probelif->li_next->li_prev = probelif; + } + probelif->li_prev = NULL; + probelif->li_pif = probepif; + + probepif->pi_lifs = probelif; + probelif->li_cachestate = CACHE_IF_NEW; + + probe->ip_cachestate |= CACHE_IF_UPDATED; + } + + rcm_log_message(RCM_TRACE3, "IP: update_pif: (%s) success\n", + probe->ip_resource); + + return (0); +} + +/* + * update_ipifs() - Determine all network interfaces in the system + * Call with cache_lock held + */ +static int +update_ipifs(rcm_handle_t *hd, int af) +{ + int sock; + char *buf; + struct lifnum lifn; + struct lifconf lifc; + struct lifreq *lifrp; + int i; + + rcm_log_message(RCM_TRACE2, "IP: update_ipifs\n"); + + if ((sock = socket(af, SOCK_DGRAM, 0)) == -1) { + rcm_log_message(RCM_ERROR, + _("IP: failure opening %s socket: %s\n"), + af == AF_INET6 ? "IPv6" : "IPv4", strerror(errno)); + return (-1); + } + + lifn.lifn_family = af; + lifn.lifn_flags = 0; + if (ioctl(sock, SIOCGLIFNUM, (char *)&lifn) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: SIOCLGIFNUM failed: %s\n"), + strerror(errno)); + (void) close(sock); + return (-1); + } + + if ((buf = calloc(lifn.lifn_count, sizeof (struct lifreq))) == NULL) { + rcm_log_message(RCM_ERROR, _("IP: calloc: %s\n"), + strerror(errno)); + (void) close(sock); + return (-1); + } + + lifc.lifc_family = af; + lifc.lifc_flags = 0; + lifc.lifc_len = sizeof (struct lifreq) * lifn.lifn_count; + lifc.lifc_buf = buf; + + if (ioctl(sock, SIOCGLIFCONF, (char *)&lifc) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: SIOCGLIFCONF failed: %s\n"), + strerror(errno)); + free(buf); + (void) close(sock); + return (-1); + } + + /* now we need to search for active interfaces */ + lifrp = lifc.lifc_req; + for (i = 0; i < lifn.lifn_count; i++) { + (void) update_pif(hd, af, sock, lifrp); + lifrp++; + } + + free(buf); + (void) close(sock); + return (0); +} + +/* + * update_cache() - Update cache with latest interface info + */ +static int +update_cache(rcm_handle_t *hd) +{ + ip_cache_t *probe; + struct ip_lif *lif; + struct ip_lif *nextlif; + int rv; + int i; + + rcm_log_message(RCM_TRACE2, "IP: update_cache\n"); + + (void) mutex_lock(&cache_lock); + + /* first we walk the entire cache, marking each entry stale */ + probe = cache_head.ip_next; + while (probe != &cache_tail) { + probe->ip_cachestate |= CACHE_IF_STALE; + if ((probe->ip_pif != NULL) && + ((lif = probe->ip_pif->pi_lifs) != NULL)) { + while (lif != NULL) { + lif->li_cachestate |= CACHE_IF_STALE; + lif = lif->li_next; + } + } + probe = probe->ip_next; + } + + rcm_log_message(RCM_TRACE2, "IP: scanning IPv4 interfaces\n"); + if (update_ipifs(hd, AF_INET) < 0) { + (void) mutex_unlock(&cache_lock); + return (-1); + } + + rcm_log_message(RCM_TRACE2, "IP: scanning IPv6 interfaces\n"); + if (update_ipifs(hd, AF_INET6) < 0) { + (void) mutex_unlock(&cache_lock); + return (-1); + } + + probe = cache_head.ip_next; + /* unregister devices that are not offlined and still in cache */ + while (probe != &cache_tail) { + ip_cache_t *freeit; + if ((probe->ip_pif != NULL) && + ((lif = probe->ip_pif->pi_lifs) != NULL)) { + /* clear stale lifs */ + while (lif != NULL) { + if (lif->li_cachestate & CACHE_IF_STALE) { + nextlif = lif->li_next; + if (lif->li_prev != NULL) + lif->li_prev->li_next = nextlif; + if (nextlif != NULL) + nextlif->li_prev = lif->li_prev; + if (probe->ip_pif->pi_lifs == lif) + probe->ip_pif->pi_lifs = + nextlif; + for (i = 0; i < IP_MAX_MODS; i++) { + free(lif->li_modules[i]); + } + free(lif->li_reconfig); + free(lif); + lif = nextlif; + } else { + lif = lif->li_next; + } + } + } + if ((probe->ip_cachestate & CACHE_IF_STALE) && + !(probe->ip_cachestate & CACHE_IF_OFFLINED)) { + (void) rcm_unregister_interest(hd, probe->ip_resource, + 0); + rcm_log_message(RCM_DEBUG, "IP: unregistered %s\n", + probe->ip_resource); + freeit = probe; + probe = probe->ip_next; + cache_remove(freeit); + free_node(freeit); + continue; + } + + if (!(probe->ip_cachestate & CACHE_IF_NEW)) { + probe = probe->ip_next; + continue; + } + + rv = rcm_register_interest(hd, probe->ip_resource, 0, NULL); + if (rv != RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + _("IP: failed to register %s\n"), + probe->ip_resource); + (void) mutex_unlock(&cache_lock); + return (-1); + } else { + rcm_log_message(RCM_DEBUG, "IP: registered %s\n", + probe->ip_resource); + probe->ip_cachestate &= ~(CACHE_IF_NEW); + } + probe = probe->ip_next; + } + + (void) mutex_unlock(&cache_lock); + return (0); +} + +/* + * free_cache() - Empty the cache + */ +static void +free_cache() +{ + ip_cache_t *probe; + + rcm_log_message(RCM_TRACE2, "IP: free_cache\n"); + + (void) mutex_lock(&cache_lock); + probe = cache_head.ip_next; + while (probe != &cache_tail) { + cache_remove(probe); + free_node(probe); + probe = cache_head.ip_next; + } + (void) mutex_unlock(&cache_lock); +} + +/* + * ip_log_err() - RCM error log wrapper + */ +static void +ip_log_err(ip_cache_t *node, char **errorp, char *errmsg) +{ + char *nic = NULL; + int len; + const char *errfmt; + char *error; + + if ((node != NULL) && (node->ip_pif != NULL) && + (node->ip_pif->pi_ifname != NULL)) { + nic = strrchr(node->ip_pif->pi_ifname, '/'); + nic = nic ? nic + 1 : node->ip_pif->pi_ifname; + } + + if (errorp != NULL) + *errorp = NULL; + + if (nic == NULL) { + rcm_log_message(RCM_ERROR, _("IP: %s\n"), errmsg); + errfmt = _("IP: %s"); + len = strlen(errfmt) + strlen(errmsg) + 1; + if (error = (char *)calloc(1, len)) { + (void) sprintf(error, errfmt, errmsg); + } + } else { + rcm_log_message(RCM_ERROR, _("IP: %s(%s)\n"), errmsg, nic); + errfmt = _("IP: %s(%s)"); + len = strlen(errfmt) + strlen(errmsg) + strlen(nic) + 1; + if (error = (char *)calloc(1, len)) { + (void) sprintf(error, errfmt, errmsg, nic); + } + } + + if (errorp != NULL) + *errorp = error; +} + + +/* + * if_cfginfo() - Save off the config info for all interfaces + */ +static int +if_cfginfo(ip_cache_t *node, uint_t force) +{ + ip_lif_t *lif; + ip_pif_t *pif; + int i; + FILE *fp; + char syscmd[MAX_RECONFIG_SIZE + LIFNAMSIZ]; + char buf[MAX_RECONFIG_SIZE]; + + rcm_log_message(RCM_TRACE2, "IP: if_cfginfo(%s)\n", node->ip_resource); + + pif = node->ip_pif; + lif = pif->pi_lifs; + + while (lif != NULL) { + /* Make a list of modules pushed and save */ + if (lif->li_ifnum == 0) { /* physical instance */ + if (get_modlist(pif->pi_ifname, lif) == -1) { + rcm_log_message(RCM_ERROR, + _("IP: get modlist error (%s) %s\n"), + pif->pi_ifname, strerror(errno)); + (void) clr_cfg_state(pif); + return (-1); + } + + if (!force) { + /* Look if unknown modules have been inserted */ + for (i = (lif->li_modcnt - 2); i > 0; i--) { + if (modop(pif->pi_ifname, + lif->li_modules[i], + i, MOD_CHECK) == -1) { + rcm_log_message(RCM_ERROR, + _("IP: module %s@%d\n"), + lif->li_modules[i], i); + (void) clr_cfg_state(pif); + return (-1); + } + } + } + + /* Last module is the device driver, so ignore that */ + for (i = (lif->li_modcnt - 2); i > 0; i--) { + rcm_log_message(RCM_TRACE2, + "IP: modremove Pos = %d, Module = %s \n", + i, lif->li_modules[i]); + if (modop(pif->pi_ifname, lif->li_modules[i], + i, MOD_REMOVE) == -1) { + while (i != (lif->li_modcnt - 2)) { + if (modop(pif->pi_ifname, + lif->li_modules[i], + i, MOD_INSERT) == -1) { + /* Gross error */ + rcm_log_message( + RCM_ERROR, + _("IP: if_cfginfo" + "(%s) %s\n"), + pif->pi_ifname, + strerror(errno)); + clr_cfg_state(pif); + return (-1); + } + i++; + } + rcm_log_message( + RCM_ERROR, + _("IP: if_cfginfo(%s): modremove " + "%s failed: %s\n"), pif->pi_ifname, + lif->li_modules[i], + strerror(errno)); + clr_cfg_state(pif); + return (-1); + } + } + } + + /* Save reconfiguration information */ + if (lif->li_ifflags & IFF_IPV4) { + (void) snprintf(syscmd, sizeof (syscmd), + "%s %s:%d configinfo\n", USR_SBIN_IFCONFIG, + pif->pi_ifname, lif->li_ifnum); + } else if (lif->li_ifflags & IFF_IPV6) { + (void) snprintf(syscmd, sizeof (syscmd), + "%s %s:%d inet6 configinfo\n", USR_SBIN_IFCONFIG, + pif->pi_ifname, lif->li_ifnum); + } + rcm_log_message(RCM_TRACE2, "IP: %s\n", syscmd); + + /* open a pipe to retrieve reconfiguration info */ + if ((fp = popen(syscmd, "r")) == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: ifconfig configinfo error (%s:%d) %s\n"), + pif->pi_ifname, lif->li_ifnum, strerror(errno)); + (void) clr_cfg_state(pif); + return (-1); + } + bzero(buf, MAX_RECONFIG_SIZE); + + if (fgets(buf, MAX_RECONFIG_SIZE, fp) == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: ifconfig configinfo error (%s:%d) %s\n"), + pif->pi_ifname, lif->li_ifnum, strerror(errno)); + (void) pclose(fp); + (void) clr_cfg_state(pif); + return (-1); + } + (void) pclose(fp); + + lif->li_reconfig = malloc(strlen(buf)+1); + if (lif->li_reconfig == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: malloc error (%s) %s\n"), + pif->pi_ifname, strerror(errno)); + (void) clr_cfg_state(pif); + return (-1); + } + (void) strcpy(lif->li_reconfig, buf); + rcm_log_message(RCM_DEBUG, + "IP: if_cfginfo: reconfig string(%s:%d) = %s\n", + pif->pi_ifname, lif->li_ifnum, lif->li_reconfig); + + lif = lif->li_next; + } + + return (0); +} + +/* + * if_unplumb() - Unplumb the interface + * Save off the modlist, ifconfig options and unplumb. + * Fail, if an unknown module lives between IP and driver and + * force is not set + * Call with cache_lock held + */ +static int +if_unplumb(ip_cache_t *node) +{ + ip_lif_t *lif; + ip_pif_t *pif; + int ipv4 = 0, ipv6 = 0; + char syscmd[MAX_RECONFIG_SIZE + LIFNAMSIZ]; + + rcm_log_message(RCM_TRACE2, "IP: if_unplumb(%s)\n", node->ip_resource); + + pif = node->ip_pif; + lif = pif->pi_lifs; + + while (lif != NULL) { + if (lif->li_ifflags & IFF_IPV4) { + ipv4++; + } else if (lif->li_ifflags & IFF_IPV6) { + ipv6++; + } else { + /* Unlikely case */ + rcm_log_message(RCM_DEBUG, + _("IP: Unplumb ignored (%s:%d)\n"), + pif->pi_ifname, lif->li_ifnum); + lif = lif->li_next; + continue; + } + lif = lif->li_next; + } + + /* Unplumb the physical interface */ + if (ipv4) { + rcm_log_message(RCM_TRACE2, + "IP: if_unplumb: ifconfig %s unplumb\n", pif->pi_ifname); + (void) snprintf(syscmd, sizeof (syscmd), "%s %s unplumb\n", + USR_SBIN_IFCONFIG, pif->pi_ifname); + if (rcm_exec_cmd(syscmd) != 0) { + rcm_log_message(RCM_ERROR, + _("IP: Cannot unplumb (%s) %s\n"), + pif->pi_ifname, strerror(errno)); + return (-1); + } + } + if (ipv6) { + rcm_log_message(RCM_TRACE2, + "IP: if_unplumb: ifconfig %s inet6 unplumb\n", + pif->pi_ifname); + (void) snprintf(syscmd, sizeof (syscmd), + "%s %s inet6 unplumb\n", USR_SBIN_IFCONFIG, pif->pi_ifname); + if (rcm_exec_cmd(syscmd) != 0) { + rcm_log_message(RCM_ERROR, + _("IP: Cannot unplumb (%s) %s\n"), + pif->pi_ifname, strerror(errno)); + return (-1); + } + } + rcm_log_message(RCM_TRACE2, "IP: if_unplumb(%s) success\n", + node->ip_resource); + + return (0); +} + +/* + * if_replumb() - Undo previous unplumb i.e. plumb back the physical interface + * instances and the logical interfaces in order, restoring + * all ifconfig options + * Call with cache_lock held + */ +static int +if_replumb(ip_cache_t *node) +{ + ip_lif_t *lif; + ip_pif_t *pif; + int i; + char syscmd[LIFNAMSIZ+MAXPATHLEN]; /* must be big enough */ + int max_ipv4 = 0, max_ipv6 = 0; + + rcm_log_message(RCM_TRACE2, "IP: if_replumb(%s)\n", node->ip_resource); + + /* + * Be extra careful about bringing up the interfaces in the + * correct order: + * - First plumb in the physical interface instances + * - modinsert the necessary modules@pos + * - Next, add the logical interfaces being careful about + * the order, (follow the cached interface number li_ifnum order) + */ + + pif = node->ip_pif; + lif = pif->pi_lifs; + + /* + * Make a first pass to plumb in physical interfaces and get a count + * of the max logical interfaces + */ + while (lif != NULL) { + if (lif->li_ifflags & IFF_IPV4) { + if (lif->li_ifnum > max_ipv4) { + max_ipv4 = lif->li_ifnum; + } + } else if (lif->li_ifflags & IFF_IPV6) { + if (lif->li_ifnum > max_ipv6) { + max_ipv6 = lif->li_ifnum; + } + } else { + /* Unlikely case */ + rcm_log_message(RCM_DEBUG, + _("IP: Re-plumb ignored (%s:%d)\n"), + pif->pi_ifname, lif->li_ifnum); + lif = lif->li_next; + continue; + } + + if (lif->li_ifnum == 0) { /* physical interface instance */ + if ((lif->li_ifflags & IFF_NOFAILOVER) || + (strcmp(pif->pi_grpname, "") == 0)) { + (void) snprintf(syscmd, sizeof (syscmd), + "%s %s\n", USR_SBIN_IFCONFIG, + lif->li_reconfig); + } else if (lif->li_ifflags & IFF_IPV4) { + (void) snprintf(syscmd, sizeof (syscmd), + "%s %s inet plumb group %s\n", + USR_SBIN_IFCONFIG, + pif->pi_ifname, pif->pi_grpname); + } else if (lif->li_ifflags & IFF_IPV6) { + (void) snprintf(syscmd, sizeof (syscmd), + "%s %s inet6 plumb group %s\n", + USR_SBIN_IFCONFIG, + pif->pi_ifname, pif->pi_grpname); + } + + rcm_log_message(RCM_TRACE2, + "IP: if_replumb: %s\n", syscmd); + if (rcm_exec_cmd(syscmd) != 0) { + rcm_log_message(RCM_ERROR, + _("IP: Cannot plumb (%s) %s\n"), + pif->pi_ifname, strerror(errno)); + return (-1); + } + + rcm_log_message(RCM_TRACE2, + "IP: if_replumb: Modcnt = %d\n", lif->li_modcnt); + /* modinsert modules in order, ignore driver(last) */ + for (i = 0; i < (lif->li_modcnt - 1); i++) { + rcm_log_message(RCM_TRACE2, + "IP: modinsert: Pos = %d Mod = %s\n", + i, lif->li_modules[i]); + if (modop(pif->pi_ifname, lif->li_modules[i], i, + MOD_INSERT) == -1) { + rcm_log_message(RCM_ERROR, + _("IP: modinsert error(%s)\n"), + pif->pi_ifname); + return (-1); + } + } + } + + lif = lif->li_next; + } + + /* Now, add all the logical interfaces in the correct order */ + for (i = 1; i <= MAX(max_ipv6, max_ipv4); i++) { + /* reset lif through every iteration */ + lif = pif->pi_lifs; + while (lif != NULL) { + if (((lif->li_ifflags & IFF_NOFAILOVER) || + (strcmp(pif->pi_grpname, "") == 0)) && + (lif->li_ifnum == i)) { + /* Plumb in the logical interface */ + (void) snprintf(syscmd, sizeof (syscmd), + "%s %s\n", USR_SBIN_IFCONFIG, + lif->li_reconfig); + rcm_log_message(RCM_TRACE2, + "IP: if_replumb: %s\n", syscmd); + if (rcm_exec_cmd(syscmd) != 0) { + rcm_log_message(RCM_ERROR, + _("IP: Cannot addif (%s:%d) " + "%s\n"), + pif->pi_ifname, i, strerror(errno)); + return (-1); + } + } + lif = lif->li_next; + } + } + + rcm_log_message(RCM_TRACE2, "IP: if_replumb(%s) success \n", + node->ip_resource); + + return (0); +} + +/* + * clr_cfg_state() - Cleanup after errors in unplumb + */ +static void +clr_cfg_state(ip_pif_t *pif) +{ + ip_lif_t *lif; + int i; + + lif = pif->pi_lifs; + + while (lif != NULL) { + lif->li_modcnt = 0; + free(lif->li_reconfig); + lif->li_reconfig = NULL; + for (i = 0; i < IP_MAX_MODS; i++) { + free(lif->li_modules[i]); + lif->li_modules[i] = NULL; + } + lif = lif->li_next; + } +} + +/* + * ip_ipmp_offline() - Failover from if_from to if_to using a + * minimum redudancy of min_red. This uses IPMPs + * "offline" mechanism to achieve the failover. + */ +static int +ip_ipmp_offline(ip_cache_t *if_from, ip_cache_t *if_to) +{ + mpathd_cmd_t mpdcmd; + + if ((if_from == NULL) || (if_from->ip_pif == NULL) || + (if_from->ip_pif->pi_ifname == NULL)) { + return (-1); + } + + rcm_log_message(RCM_TRACE1, "IP: ip_ipmp_offline\n"); + + mpdcmd.cmd_command = MI_OFFLINE; + (void) strcpy(mpdcmd.cmd_ifname, if_from->ip_pif->pi_ifname); + + if ((if_to != NULL) && (if_to->ip_pif != NULL) && + (if_to->ip_pif->pi_ifname != NULL)) { + rcm_log_message(RCM_TRACE1, "IP: ip_ipmp_offline (%s)->(%s)\n", + if_from->ip_pif->pi_ifname, if_to->ip_pif->pi_ifname); + (void) strncpy(mpdcmd.cmd_movetoif, if_to->ip_pif->pi_ifname, + sizeof (mpdcmd.cmd_movetoif)); + mpdcmd.cmd_movetoif[sizeof (mpdcmd.cmd_movetoif) - 1] = '\0'; + } else { + rcm_log_message(RCM_TRACE1, "IP: ip_ipmp_offline (%s)->(any)\n", + if_from->ip_pif->pi_ifname); + (void) strcpy(mpdcmd.cmd_movetoif, ""); /* signifies any */ + } + mpdcmd.cmd_min_red = if_from->ip_ifred; + + if (mpathd_send_cmd(&mpdcmd) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: mpathd offline error: %s\n"), + strerror(errno)); + return (-1); + } + + rcm_log_message(RCM_TRACE1, "IP: ipmp offline success\n"); + return (0); +} + +/* + * ip_ipmp_undo_offline() - Undo prior offline of the interface. + * This uses IPMPs "undo offline" feature. + */ +static int +ip_ipmp_undo_offline(ip_cache_t *node) +{ + mpathd_cmd_t mpdcmd; + + mpdcmd.cmd_command = MI_UNDO_OFFLINE; + (void) strcpy(mpdcmd.cmd_ifname, node->ip_pif->pi_ifname); + + if (mpathd_send_cmd(&mpdcmd) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: mpathd error: %s\n"), + strerror(errno)); + return (-1); + } + + rcm_log_message(RCM_TRACE1, "IP: ipmp undo offline success\n"); + return (0); +} + +/* + * is_virtual() - Determine whether the specified device is a virtual + * device managed by dld. + */ +static boolean_t +is_virtual(char *ifname) +{ + dladm_attr_t attr; + + return (dladm_info(ifname, &attr) == 0); +} + +/* + * getdlpi_style() - Determine the DLPI provider style of the interface + */ +static int +getdlpi_style(char *ifname) +{ + int local_fd; + char devname[RCM_NET_RESOURCE_MAX]; + + (void) snprintf(devname, sizeof (devname), "%s/%s", SLASH_DEV, ifname); + + /* First try DLPI style 1 */ + if ((local_fd = open(devname, O_RDWR)) != -1) { + (void) close(local_fd); + return (DL_STYLE1); + } + + return (DL_STYLE2); +} + +/* + * get_ppa() - Determine the ppa for an interface, DLPI style 2 only + */ +static int +get_ppa(char *rsrc) +{ + int i; + uint_t p = 0; + unsigned int m = 1; + + i = strlen(rsrc) - 1; + while (i >= 0 && '0' <= rsrc[i] && rsrc[i] <= '9') { + p += (rsrc[i] - '0')*m; + m *= 10; + i--; + } + if (m == 1) { + return (-1); + } + return (is_virtual(rsrc) ? p : VLAN_GET_PPA(p)); /* VLAN support */ +} + +/* + * get_lun() - Determine the logical interface number + */ +static int +get_lun(char *rsrc) +{ + char resource[RCM_NET_RESOURCE_MAX]; + char *cp; + + (void) strcpy(resource, rsrc); + + /* remove LIF component */ + cp = strchr(resource, ':'); + if (cp) { + cp++; + return (atoi(cp)); + } + + return (0); +} + +/* + * get_physical_resource() - Determine the actual physical interface name. + * Supports VLAN interfaces. + * Caller must free the malloced space for the string. + */ +static char * +get_physical_resource(char *rsrc) +{ + char *resource; + char *ifname; + char *nic; + int ppa; + + resource = (char *)malloc(strlen(rsrc) + 1); + if (resource == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: malloc error(%s): %s\n"), strerror(errno), rsrc); + return (NULL); + } + (void) strcpy(resource, rsrc); + + /* remove LIF component if any */ + nic = strchr(resource, ':'); + if (nic) { + *nic = 0; + } + + ppa = get_ppa(resource); + + /* Determine device name */ + nic = resource; + while (nic++) { + if (('0' <= *nic) && (*nic <= '9')) { + *nic = 0; + break; + } + } + + ifname = (char *)malloc(RCM_NET_RESOURCE_MAX); + if (ifname == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: malloc error(%s): %s\n"), strerror(errno), rsrc); + free(resource); + return (NULL); + } + + if (ppa < 0) { + /* This could be a network group */ + (void) snprintf(ifname, RCM_NET_RESOURCE_MAX, "%s", resource); + } else { + (void) snprintf(ifname, RCM_NET_RESOURCE_MAX, "%s%d", + resource, ppa); + } + + free(resource); + return (ifname); +} + +/* + * if_change_flags() - set/clear the flag specified for the physical interface + * Call with cache_lock held + */ +static int +if_change_flags(ip_pif_t *pif, uint64_t flags, boolean_t set) +{ + int sock; + struct lifreq lifr; + ip_lif_t *lif; + + if (!(flags & RCM_PIF_FLAGS)) { + rcm_log_message(RCM_DEBUG, + "IP: if_change_flags: Not a physical interface flag\n"); + return (-1); + } + + /* IPv4 is ok, since we only manipulate physical interface flags */ + if ((sock = socket(AF_INET, SOCK_DGRAM, 0)) == -1) { + rcm_log_message(RCM_ERROR, + _("IPv4 socket open: %s\n"), strerror(errno)); + return (-1); + } + + /* + * Get the current flags from the kernel, and set/clear the + * desired phyint flags. + */ + (void) strncpy(lifr.lifr_name, pif->pi_ifname, sizeof (lifr.lifr_name)); + lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0'; + if (ioctl(sock, SIOCGLIFFLAGS, (char *)&lifr) < 0) { + rcm_log_message(RCM_DEBUG, + "ioctl SIOCGLIFFLAGS: %s\n", strerror(errno)); + (void) close(sock); + return (-1); + } + if (set) + lifr.lifr_flags |= flags; + else + lifr.lifr_flags &= ~flags; + if (ioctl(sock, SIOCSLIFFLAGS, (char *)&lifr) < 0) { + rcm_log_message(RCM_DEBUG, + "ioctl SIOCSLIFFLAGS: %s\n", strerror(errno)); + (void) close(sock); + return (-1); + } + + (void) close(sock); + + /* Keep cached flags consistent. */ + for (lif = pif->pi_lifs; lif != NULL; lif = lif->li_next) { + if (lif->li_ifnum == 0) { + if (set) + lif->li_ifflags |= flags; + else + lif->li_ifflags &= ~flags; + } + } + + return (0); +} + +/* + * if_get_flags() - Return the cached physical interface flags + * Call with cache_lock held + */ +static uint64_t +if_get_flags(ip_pif_t *pif) +{ + ip_lif_t *lif; + + for (lif = pif->pi_lifs; lif != NULL; lif = lif->li_next) { + if (lif->li_ifnum == 0) { + return (lif->li_ifflags & RCM_PIF_FLAGS); + } + } + return (0); +} + +/* + * mpathd_send_cmd() - Sends the command to in.mpathd. + */ +static int +mpathd_send_cmd(mpathd_cmd_t *mpd) +{ + mpathd_unoffline_t mpc; + struct mpathd_response mpr; + int i; + int s; + + rcm_log_message(RCM_TRACE1, "IP: mpathd_send_cmd \n"); + + for (i = 0; i < MPATHD_MAX_RETRIES; i++) { + s = connect_to_mpathd(AF_INET); + if (s == -1) { + s = connect_to_mpathd(AF_INET6); + if (s == -1) { + rcm_log_message(RCM_ERROR, + _("IP: Cannot talk to mpathd\n")); + return (-1); + } + } + switch (mpd->cmd_command) { + case MI_OFFLINE : + rcm_log_message(RCM_TRACE1, "IP: MI_OFFLINE: " + "(%s)->(%s) redundancy = %d\n", mpd->cmd_ifname, + mpd->cmd_movetoif, mpd->cmd_min_red); + + if (write(s, mpd, sizeof (mpathd_cmd_t)) != + sizeof (mpathd_cmd_t)) { + rcm_log_message(RCM_ERROR, + _("IP: mpathd write: %s\n"), + strerror(errno)); + (void) close(s); + return (-1); + } + break; + + case MI_SETOINDEX : + rcm_log_message(RCM_TRACE1, "IP: MI_SETOINDEX: " + "(%s)->(%s) family = %d\n", mpd->from_lifname, + mpd->to_pifname, mpd->addr_family); + + if (write(s, mpd, sizeof (mpathd_cmd_t)) != + sizeof (mpathd_cmd_t)) { + rcm_log_message(RCM_ERROR, + _("IP: mpathd write: %s\n"), + strerror(errno)); + (void) close(s); + return (-1); + } + break; + + case MI_UNDO_OFFLINE: + /* mpathd checks for exact size of the message */ + mpc.cmd_command = mpd->cmd_command; + (void) strcpy(mpc.cmd_ifname, mpd->cmd_ifname); + + rcm_log_message(RCM_TRACE1, "IP: MI_UNDO_OFFLINE: " + "(%s)\n", mpd->cmd_ifname); + + if (write(s, &mpc, sizeof (mpathd_unoffline_t)) != + sizeof (mpathd_unoffline_t)) { + rcm_log_message(RCM_ERROR, + _("IP: mpathd write: %s\n"), + strerror(errno)); + (void) close(s); + return (-1); + } + break; + default : + rcm_log_message(RCM_ERROR, + _("IP: unsupported mpathd command\n")); + (void) close(s); + return (-1); + } + + bzero(&mpr, sizeof (struct mpathd_response)); + /* Read the result from mpathd */ + if (read(s, &mpr, sizeof (struct mpathd_response)) != + sizeof (struct mpathd_response)) { + rcm_log_message(RCM_ERROR, + _("IP: mpathd read : %s\n"), strerror(errno)); + (void) close(s); + return (-1); + } + + (void) close(s); + if (mpr.resp_mpathd_err == 0) { + rcm_log_message(RCM_TRACE1, + "IP: mpathd_send_cmd success\n"); + return (0); /* Successful */ + } + + if (mpr.resp_mpathd_err == MPATHD_SYS_ERROR) { + if (mpr.resp_sys_errno == EAGAIN) { + (void) sleep(1); + rcm_log_message(RCM_DEBUG, + _("IP: mpathd retrying\n")); + continue; /* Retry */ + } + errno = mpr.resp_sys_errno; + rcm_log_message(RCM_WARNING, + _("IP: mpathd_send_cmd error: %s\n"), + strerror(errno)); + } else if (mpr.resp_mpathd_err == MPATHD_MIN_RED_ERROR) { + errno = EIO; + rcm_log_message(RCM_ERROR, _("IP: in.mpathd(1M): " + "Minimum redundancy not met\n")); + } else { + rcm_log_message(RCM_ERROR, + _("IP: mpathd_send_cmd error\n")); + } + /* retry */ + } + + rcm_log_message(RCM_ERROR, + _("IP: mpathd_send_cmd failed %d retries\n"), MPATHD_MAX_RETRIES); + return (-1); +} + +/* + * Returns -1 on failure. Returns the socket file descriptor on + * success. + */ +static int +connect_to_mpathd(int family) +{ + int s; + struct sockaddr_storage ss; + struct sockaddr_in *sin = (struct sockaddr_in *)&ss; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss; + struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; + int addrlen; + int ret; + int on; + + rcm_log_message(RCM_TRACE1, "IP: connect_to_mpathd\n"); + + s = socket(family, SOCK_STREAM, 0); + if (s < 0) { + rcm_log_message(RCM_ERROR, + _("IP: mpathd socket: %s\n"), strerror(errno)); + return (-1); + } + bzero((char *)&ss, sizeof (ss)); + ss.ss_family = family; + /* + * Need to bind to a privelged port. For non-root, this + * will fail. in.mpathd verifies that only commands coming + * from priveleged ports succeed so that the ordinary user + * can't issue offline commands. + */ + on = 1; + if (setsockopt(s, IPPROTO_TCP, TCP_ANONPRIVBIND, &on, + sizeof (on)) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: mpathd setsockopt: TCP_ANONPRIVBIND: %s\n"), + strerror(errno)); + return (-1); + } + switch (family) { + case AF_INET: + sin->sin_port = 0; + sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addrlen = sizeof (struct sockaddr_in); + break; + case AF_INET6: + sin6->sin6_port = 0; + sin6->sin6_addr = loopback_addr; + addrlen = sizeof (struct sockaddr_in6); + break; + } + ret = bind(s, (struct sockaddr *)&ss, addrlen); + if (ret != 0) { + rcm_log_message(RCM_ERROR, + _("IP: mpathd bind: %s\n"), strerror(errno)); + return (-1); + } + switch (family) { + case AF_INET: + sin->sin_port = htons(MPATHD_PORT); + break; + case AF_INET6: + sin6->sin6_port = htons(MPATHD_PORT); + break; + } + ret = connect(s, (struct sockaddr *)&ss, addrlen); + if (ret != 0) { + if (errno == ECONNREFUSED) { + /* in.mpathd is not running, start it */ + if (rcm_exec_cmd(MPATHD_PATH) == -1) { + rcm_log_message(RCM_ERROR, + _("IP: mpathd exec: %s\n"), + strerror(errno)); + return (-1); + } + ret = connect(s, (struct sockaddr *)&ss, addrlen); + } + if (ret != 0) { + rcm_log_message(RCM_ERROR, + _("IP: mpathd connect: %s\n"), strerror(errno)); + return (-1); + } + } + on = 0; + if (setsockopt(s, IPPROTO_TCP, TCP_ANONPRIVBIND, &on, + sizeof (on)) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: mpathd setsockopt TCP_ANONPRIVBIND: %s\n"), + strerror(errno)); + return (-1); + } + + rcm_log_message(RCM_TRACE1, "IP: connect_to_mpathd success\n"); + + return (s); +} + +/* + * modop() - Remove/insert a module + */ +static int +modop(char *name, char *arg, int pos, char op) +{ + char syscmd[LIFNAMSIZ+MAXPATHLEN]; /* must be big enough */ + + rcm_log_message(RCM_TRACE1, "IP: modop(%s)\n", name); + + /* Nothing to do with "ip", "arp" */ + if ((arg == NULL) || (strcmp(arg, "") == 0) || + STREQ(arg, IP_MOD_NAME) || STREQ(arg, ARP_MOD_NAME)) { + rcm_log_message(RCM_TRACE1, "IP: modop success\n"); + return (0); + } + + if (op == MOD_CHECK) { + /* + * No known good modules (yet) apart from ip and arp + * which are handled above + */ + return (-1); + } + + if (op == MOD_REMOVE) { + (void) snprintf(syscmd, sizeof (syscmd), + "%s %s modremove %s@%d\n", USR_SBIN_IFCONFIG, name, arg, + pos); + } else if (op == MOD_INSERT) { + (void) snprintf(syscmd, sizeof (syscmd), + "%s %s modinsert %s@%d\n", USR_SBIN_IFCONFIG, name, arg, + pos); + } else { + rcm_log_message(RCM_ERROR, + _("IP: modop(%s): unknown operation\n"), name); + return (-1); + } + + rcm_log_message(RCM_TRACE1, "IP: modop(%s): %s\n", name, syscmd); + if (rcm_exec_cmd(syscmd) == -1) { + rcm_log_message(RCM_ERROR, + _("IP: modop(%s): %s\n"), name, strerror(errno)); + return (-1); + } + + rcm_log_message(RCM_TRACE1, "IP: modop success\n"); + return (0); +} + +/* + * get_modlist() - return a list of pushed mid-stream modules + * Required memory is malloced to construct the list, + * Caller must free this memory list + * Call with cache_lock held + */ +static int +get_modlist(char *name, ip_lif_t *lif) +{ + int udp_fd; + int fd; + int i; + int num_mods; + struct lifreq lifr; + struct str_list strlist; + + rcm_log_message(RCM_TRACE1, "IP: getmodlist(%s)\n", name); + + (void) strncpy(lifr.lifr_name, name, sizeof (lifr.lifr_name)); + lifr.lifr_flags = lif->li_ifflags; + if (ip_domux2fd(&udp_fd, &fd, &lifr) < 0) { + rcm_log_message(RCM_ERROR, _("IP: ip_domux2fd(%s)\n"), name); + return (-1); + } + + if ((num_mods = ioctl(fd, I_LIST, NULL)) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: get_modlist(%s): I_LIST(%s) \n"), + name, strerror(errno)); + (void) ip_plink(udp_fd, fd, &lifr); + return (-1); + } + + strlist.sl_nmods = num_mods; + strlist.sl_modlist = malloc(sizeof (struct str_mlist) * num_mods); + + if (strlist.sl_modlist == NULL) { + rcm_log_message(RCM_ERROR, _("IP: get_modlist(%s): %s\n"), + name, strerror(errno)); + (void) ip_plink(udp_fd, fd, &lifr); + return (-1); + } + + if (ioctl(fd, I_LIST, (caddr_t)&strlist) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: get_modlist(%s): I_LIST error: %s\n"), + name, strerror(errno)); + (void) ip_plink(udp_fd, fd, &lifr); + return (-1); + } + + for (i = 0; i < strlist.sl_nmods; i++) { + lif->li_modules[i] = + malloc(strlen(strlist.sl_modlist[i].l_name)+1); + if (lif->li_modules[i] == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: get_modlist(%s): %s\n"), + name, strerror(errno)); + (void) ip_plink(udp_fd, fd, &lifr); + return (-1); + } + (void) strcpy(lif->li_modules[i], strlist.sl_modlist[i].l_name); + } + + lif->li_modcnt = strlist.sl_nmods; + free(strlist.sl_modlist); + + rcm_log_message(RCM_TRACE1, "IP: getmodlist(%s) success\n", name); + return (ip_plink(udp_fd, fd, &lifr)); +} + +/* + * ip_domux2fd() - Helper function for mod*() functions + * Stolen from ifconfig.c + */ +static int +ip_domux2fd(int *udp_fd, int *fd, struct lifreq *lifr) +{ + int ip_fd; + char *udp_dev_name; + char *ip_dev_name; + + if (lifr->lifr_flags & IFF_IPV6) { + udp_dev_name = UDP6_DEV_NAME; + ip_dev_name = IP6_DEV_NAME; + } else { + udp_dev_name = UDP_DEV_NAME; + ip_dev_name = IP_DEV_NAME; + } + + if ((ip_fd = open(ip_dev_name, O_RDWR)) < 0) { + rcm_log_message(RCM_ERROR, _("IP: ip_domux2fd: open(%s) %s\n"), + ip_dev_name, strerror(errno)); + return (-1); + } + if ((*udp_fd = open(udp_dev_name, O_RDWR)) < 0) { + rcm_log_message(RCM_ERROR, _("IP: ip_domux2fd: open(%s) %s\n"), + udp_dev_name, strerror(errno)); + (void) close(ip_fd); + return (-1); + } + if (ioctl(ip_fd, SIOCGLIFMUXID, (caddr_t)lifr) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: ip_domux2fd: SIOCGLIFMUXID(%s): %s\n"), + ip_dev_name, strerror(errno)); + (void) close(*udp_fd); + (void) close(ip_fd); + return (-1); + } + + rcm_log_message(RCM_TRACE2, + "IP: ip_domux2fd: ARP_muxid %d IP_muxid %d\n", + lifr->lifr_arp_muxid, lifr->lifr_ip_muxid); + + if ((*fd = ioctl(*udp_fd, _I_MUXID2FD, lifr->lifr_ip_muxid)) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: ip_domux2fd: _I_MUXID2FD(%s): %s\n"), + udp_dev_name, strerror(errno)); + (void) close(*udp_fd); + (void) close(ip_fd); + return (-1); + } + if (ioctl(*udp_fd, I_PUNLINK, lifr->lifr_ip_muxid) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: ip_domux2fd: I_PUNLINK(%s): %s\n"), + udp_dev_name, strerror(errno)); + (void) close(*udp_fd); + (void) close(ip_fd); + return (-1); + } + + /* Note: udp_fd is closed in ip_plink below */ + (void) close(ip_fd); + return (0); +} + +/* + * ip_plink() - Helper function for mod*() functions. + * Stolen from ifconfig.c + */ +static int +ip_plink(int udp_fd, int fd, struct lifreq *lifr) +{ + int mux_id; + + if ((mux_id = ioctl(udp_fd, I_PLINK, fd)) < 0) { + rcm_log_message(RCM_ERROR, _("IP: ip_plink I_PLINK(%s): %s\n"), + UDP_DEV_NAME, strerror(errno)); + (void) close(udp_fd); + (void) close(fd); + return (-1); + } + + lifr->lifr_ip_muxid = mux_id; + if (ioctl(udp_fd, SIOCSLIFMUXID, (caddr_t)lifr) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: ip_plink SIOCSLIFMUXID(%s): %s\n"), + UDP_DEV_NAME, strerror(errno)); + (void) close(udp_fd); + (void) close(fd); + return (-1); + } + + (void) close(udp_fd); + (void) close(fd); + return (0); +} + +/* + * ip_onlinelist() + * + * Notify online to IP address consumers. + */ +static int +ip_onlinelist(rcm_handle_t *hd, ip_cache_t *node, char **errorp, uint_t flags, + rcm_info_t **depend_info) +{ + char **addrlist; + int ret = RCM_SUCCESS; + + rcm_log_message(RCM_TRACE2, "IP: ip_onlinelist\n"); + + addrlist = ip_get_addrlist(node); + if (addrlist == NULL || addrlist[0] == NULL) { + rcm_log_message(RCM_TRACE2, "IP: ip_onlinelist none\n"); + ip_free_addrlist(addrlist); + return (ret); + } + + ret = rcm_notify_online_list(hd, addrlist, 0, depend_info); + + ip_free_addrlist(addrlist); + rcm_log_message(RCM_TRACE2, "IP: ip_onlinelist done\n"); + return (ret); +} + +/* + * ip_offlinelist() + * + * Offline IP address consumers. + */ +static int +ip_offlinelist(rcm_handle_t *hd, ip_cache_t *node, char **errorp, uint_t flags, + rcm_info_t **depend_info) +{ + char **addrlist; + int ret = RCM_SUCCESS; + + rcm_log_message(RCM_TRACE2, "IP: ip_offlinelist\n"); + + addrlist = ip_get_addrlist(node); + if (addrlist == NULL || addrlist[0] == NULL) { + rcm_log_message(RCM_TRACE2, "IP: ip_offlinelist none\n"); + ip_free_addrlist(addrlist); + return (RCM_SUCCESS); + } + + if ((ret = rcm_request_offline_list(hd, addrlist, flags, depend_info)) + != RCM_SUCCESS) { + if (ret == RCM_FAILURE) + (void) rcm_notify_online_list(hd, addrlist, 0, NULL); + + ret = RCM_FAILURE; + } + + ip_free_addrlist(addrlist); + rcm_log_message(RCM_TRACE2, "IP: ip_offlinelist done\n"); + return (ret); +} + +/* + * ip_get_addrlist() - Compile list of IP addresses hosted on this NIC (node) + * This routine malloc() required memeory for the list + * Returns list on success, NULL if failed + * Call with cache_lock held. + */ +static char ** +ip_get_addrlist(ip_cache_t *node) +{ + ip_lif_t *lif; + char **addrlist = NULL; + int numifs; + char addrstr[INET6_ADDRSTRLEN]; + void *addr; + int af; + int i; + + rcm_log_message(RCM_TRACE2, "IP: ip_get_addrlist(%s)\n", + node->ip_resource); + + numifs = 0; + for (lif = node->ip_pif->pi_lifs; lif != NULL; lif = lif->li_next) { + numifs++; + } + + /* + * Allocate space for resource names list; add 1 and use calloc() + * so that the list is NULL-terminated. + */ + if ((addrlist = calloc(numifs + 1, sizeof (char *))) == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: ip_get_addrlist(%s) malloc failure(%s)\n"), + node->ip_resource, strerror(errno)); + return (NULL); + } + + for (lif = node->ip_pif->pi_lifs, i = 0; lif != NULL; + lif = lif->li_next, i++) { + + af = lif->li_addr.family; + if (af == AF_INET6) { + addr = &lif->li_addr.ip6.sin6_addr; + } else if (af == AF_INET) { + addr = &lif->li_addr.ip4.sin_addr; + } else { + rcm_log_message(RCM_DEBUG, + "IP: unknown addr family %d, assuming AF_INET\n", + af); + af = AF_INET; + addr = &lif->li_addr.ip4.sin_addr; + } + if (inet_ntop(af, addr, addrstr, INET6_ADDRSTRLEN) == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: inet_ntop: %s\n"), strerror(errno)); + ip_free_addrlist(addrlist); + return (NULL); + } + + if ((addrlist[i] = malloc(strlen(addrstr) + RCM_SIZE_SUNW_IP)) + == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: ip_get_addrlist(%s) malloc failure(%s)\n"), + node->ip_resource, strerror(errno)); + ip_free_addrlist(addrlist); + return (NULL); + } + (void) strcpy(addrlist[i], RCM_STR_SUNW_IP); /* SUNW_ip/ */ + (void) strcat(addrlist[i], addrstr); /* SUNW_ip/<address> */ + + rcm_log_message(RCM_DEBUG, "Anon Address: %s\n", addrlist[i]); + } + + rcm_log_message(RCM_TRACE2, "IP: get_addrlist (%s) done\n", + node->ip_resource); + + return (addrlist); +} + +static void +ip_free_addrlist(char **addrlist) +{ + int i; + + if (addrlist == NULL) + return; + + for (i = 0; addrlist[i] != NULL; i++) + free(addrlist[i]); + free(addrlist); +} + +/* + * ip_consumer_notify() - Notify consumers of IP addresses coming back online. + */ + +static void +ip_consumer_notify(rcm_handle_t *hd, char *ifinst, char **errorp, uint_t flags, + rcm_info_t **depend_info) +{ + char ifname[LIFNAMSIZ + 1]; + char cached_name[RCM_NET_RESOURCE_MAX]; + ip_cache_t *node; + char *cp; + + rcm_log_message(RCM_TRACE1, "IP: ip_consumer_notify(%s)\n", ifinst); + + if (ifinst == NULL) + return; + + (void) memcpy(&ifname, ifinst, sizeof (ifname)); + ifname[sizeof (ifname) - 1] = '\0'; + + /* remove LIF component */ + cp = strchr(ifname, ':'); + if (cp) { + *cp = 0; + } + + /* Check for the interface in the cache */ + (void) snprintf(cached_name, sizeof (cached_name), "%s/%s", + RCM_NET_PREFIX, ifname); + + (void) mutex_lock(&cache_lock); + if ((node = cache_lookup(hd, cached_name, CACHE_REFRESH)) == NULL) { + rcm_log_message(RCM_TRACE1, "IP: Skipping interface(%s) \n", + ifname); + (void) mutex_unlock(&cache_lock); + return; + } + /* + * Inform anonymous consumers about IP addresses being + * onlined + */ + (void) ip_onlinelist(hd, node, errorp, flags, depend_info); + + (void) mutex_unlock(&cache_lock); + + rcm_log_message(RCM_TRACE2, "IP: ip_consumer_notify success\n"); + return; + +} +/* + * process_nvlist() - Determine network interfaces on a new attach by + * processing the nvlist + */ +/*ARGSUSED*/ +static int +process_nvlist(nvlist_t *nvl) +{ + nvpair_t *nvp = NULL; + char *driver_name; + char *devfs_path; + int32_t instance; + char *minor_byte_array; /* packed nvlist of minor_data */ + uint_t nminor; /* # of minor nodes */ + struct devfs_minor_data *mdata; + nvlist_t *mnvl; + nvpair_t *mnvp = NULL; + + rcm_log_message(RCM_TRACE1, "IP: process_nvlist\n"); + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + /* Get driver name */ + if (STREQ(nvpair_name(nvp), RCM_NV_DRIVER_NAME)) { + if (nvpair_value_string(nvp, &driver_name) != 0) { + rcm_log_message(RCM_WARNING, + _("IP: cannot get driver name\n")); + return (-1); + } + } + /* Get instance */ + if (STREQ(nvpair_name(nvp), RCM_NV_INSTANCE)) { + if (nvpair_value_int32(nvp, &instance) != 0) { + rcm_log_message(RCM_WARNING, + _("IP: cannot get device instance\n")); + return (-1); + } + } + /* Get devfs_path */ + if (STREQ(nvpair_name(nvp), RCM_NV_DEVFS_PATH)) { + if (nvpair_value_string(nvp, &devfs_path) != 0) { + rcm_log_message(RCM_WARNING, + _("IP: cannot get device path\n")); + return (-1); + } + } + /* Get minor data */ + if (STREQ(nvpair_name(nvp), RCM_NV_MINOR_DATA)) { + if (nvpair_value_byte_array(nvp, + (uchar_t **)&minor_byte_array, &nminor) != 0) { + rcm_log_message(RCM_WARNING, + _("IP: cannot get device minor data\n")); + return (-1); + } + if (nvlist_unpack(minor_byte_array, + nminor, &mnvl, 0) != 0) { + rcm_log_message(RCM_WARNING, + _("IP: cannot get minor node data\n")); + return (-1); + } + mdata = (struct devfs_minor_data *)calloc(1, + sizeof (struct devfs_minor_data)); + if (mdata == NULL) { + rcm_log_message(RCM_WARNING, + _("IP: calloc error(%s)\n"), + strerror(errno)); + nvlist_free(mnvl); + return (-1); + } + /* Enumerate minor node data */ + while ((mnvp = nvlist_next_nvpair(mnvl, mnvp)) != + NULL) { + /* Get minor type */ + if (STREQ(nvpair_name(mnvp), + RCM_NV_MINOR_TYPE)) { + if (nvpair_value_int32(mnvp, + &mdata->minor_type) != 0) { + rcm_log_message(RCM_WARNING, + _("IP: cannot get minor " + "type \n")); + nvlist_free(mnvl); + return (-1); + } + } + /* Get minor name */ + if (STREQ(nvpair_name(mnvp), + RCM_NV_MINOR_NAME)) { + if (nvpair_value_string(mnvp, + &mdata->minor_name) != 0) { + rcm_log_message(RCM_WARNING, + _("IP: cannot get minor " + "name \n")); + nvlist_free(mnvl); + return (-1); + } + } + /* Get minor node type */ + if (STREQ(nvpair_name(mnvp), + RCM_NV_MINOR_NODE_TYPE)) { + if (nvpair_value_string(mnvp, + &mdata->minor_node_type) != 0) { + rcm_log_message(RCM_WARNING, + _("IP: cannot get minor " + "node type \n")); + nvlist_free(mnvl); + return (-1); + } + } + } + (void) process_minor(devfs_path, driver_name, instance, + mdata); + nvlist_free(mnvl); + } + } + + rcm_log_message(RCM_TRACE1, "IP: process_nvlist success\n"); + return (0); +} + +static void +process_minor(char *devfs_path, char *name, int instance, + struct devfs_minor_data *mdata) +{ + struct net_interface *nip; + struct ni_list *nilp; + struct ni_list *p; + struct ni_list **pp; + char *cname; + size_t cnamelen; + char dev_name[MAXPATHLEN]; + boolean_t virtual = B_FALSE; + + rcm_log_message(RCM_TRACE1, "IP: process_minor\n"); + + if ((mdata->minor_node_type != NULL) && + !STREQ(mdata->minor_node_type, PROP_NV_DDI_NETWORK)) { + /* Process network devices only */ + return; + } + + rcm_log_message(RCM_TRACE1, "IP: Examining %s (%s)\n", + devfs_path, mdata->minor_name); + + /* + * Virtual DDI_NT_NET nodes created by dld are exposed by devfs + * for non-VLAN as well as VLANs. Determine if we're dealing + * with a virtual device. + */ + if (strncmp("/pseudo", devfs_path, strlen("/pseudo")) == 0) { + rcm_log_message(RCM_TRACE1, "IP: pseudo node %s (%s)\n", + devfs_path, mdata->minor_name); + if (strcmp(name, "dld") == 0) { + if (dlpi_if_parse(mdata->minor_name, dev_name, + &instance) < 0 || instance < 0) { + /* dld always also creates a style-2 */ + rcm_log_message(RCM_DEBUG, "IP: ignoring " + "\"%s\" (style 1)\n", devfs_path); + return; + } + name = dev_name; + virtual = B_TRUE; + rcm_log_message(RCM_TRACE1, "IP: virtual datalink " + "%s%d\n", name, instance); + } + } + + /* Sanity check, instances > 999 are illegal */ + if (!virtual && instance > 999) { + errno = EINVAL; + rcm_log_message(RCM_ERROR, _("IP: invalid instance %d(%s)\n"), + instance, strerror(errno)); + return; + } + + /* Now, let's add the node to the interface list */ + if ((nip = malloc(sizeof (struct net_interface))) == NULL) { + rcm_log_message(RCM_ERROR, _("IP: malloc failure(%s)\n"), + strerror(errno)); + return; + } + (void) memset(nip, 0, sizeof (struct net_interface)); + + cnamelen = strlen(name) + 1; + /* Set NIC type */ + if ((nip->type = (char *)malloc(cnamelen)) == NULL) { + free(nip); + rcm_log_message(RCM_ERROR, _("IP: malloc failure(%s)\n"), + strerror(errno)); + return; + } + (void) memcpy(nip->type, name, cnamelen); + + cnamelen += MAXINTSTR; + if ((cname = (char *)malloc(cnamelen)) == NULL) { + free(nip->type); + free(nip); + rcm_log_message(RCM_ERROR, _("IP: malloc failure(%s)\n"), + strerror(errno)); + return; + } + (void) snprintf(cname, cnamelen, "%s%d", name, instance); + + rcm_log_message(RCM_TRACE1, "IP: Found SUNW_network/%s%d\n", name, + instance); + + /* Set NIC name */ + if ((nip->name = strdup(cname)) == NULL) { + free(nip->type); + free(nip); + free(cname); + rcm_log_message(RCM_ERROR, _("IP: strdup failure(%s)\n"), + strerror(errno)); + return; + } + free(cname); + + /* Add new interface to the list */ + (void) mutex_lock(&nil_lock); + for (pp = &nil_head; (p = *pp) != NULL; pp = &(p->next)) { + cname = p->nifp->name; + if (strcmp(cname, nip->name) == 0) + break; + } + + if (p != NULL) { + (void) mutex_unlock(&nil_lock); + free(nip->name); + free(nip->type); + free(nip); + rcm_log_message(RCM_TRACE1, "IP: secondary node - ignoring\n"); + return; + } + + if ((nilp = malloc(sizeof (struct ni_list))) == NULL) { + (void) mutex_unlock(&nil_lock); + free(nip->name); + free(nip->type); + free(nip); + rcm_log_message(RCM_ERROR, _("IP: malloc failure(%s)\n"), + strerror(errno)); + return; + } + + nilp->nifp = nip; + nilp->next = NULL; + *pp = nilp; + + num_ni++; /* Increment interface count */ + + (void) mutex_unlock(&nil_lock); + rcm_log_message(RCM_TRACE1, "IP: added new node\n"); +} + +/* + * if_configure() - Configure a physical interface after attach + */ +static int +if_configure(char *ifinst) +{ + char cfgfile[MAXPATHLEN]; + char ifname[LIFNAMSIZ + 1]; + char cached_name[RCM_NET_RESOURCE_MAX]; + struct stat statbuf; + ip_cache_t *node; + char *cp; + int af = 0; + int ipmp = 0; + + if (ifinst == NULL) + return (0); + + rcm_log_message(RCM_TRACE1, "IP: if_configure(%s)\n", ifinst); + + /* + * Check if the interface is already configured + */ + + (void) memcpy(&ifname, ifinst, sizeof (ifname)); + ifname[sizeof (ifname) - 1] = '\0'; + + /* remove LIF component */ + cp = strchr(ifname, ':'); + if (cp) { + *cp = 0; + } + + /* Check for the interface in the cache */ + (void) snprintf(cached_name, sizeof (cached_name), "%s/%s", + RCM_NET_PREFIX, ifname); + + /* Check if the interface is new or was previously offlined */ + (void) mutex_lock(&cache_lock); + if (((node = cache_lookup(NULL, cached_name, CACHE_REFRESH)) != NULL) && + (!(node->ip_cachestate & CACHE_IF_OFFLINED))) { + rcm_log_message(RCM_TRACE1, + "IP: Skipping configured interface(%s) \n", ifname); + (void) mutex_unlock(&cache_lock); + return (0); + } + (void) mutex_unlock(&cache_lock); + + /* Scan IPv4 configuration first */ + (void) snprintf(cfgfile, MAXPATHLEN, "%s%s", CFGFILE_FMT_IPV4, ifinst); + cfgfile[MAXPATHLEN - 1] = '\0'; + + rcm_log_message(RCM_TRACE1, "IP: Scanning %s\n", cfgfile); + if (stat(cfgfile, &statbuf) == 0) { + af |= CONFIG_AF_INET; + if (isgrouped(cfgfile)) { + ipmp++; + } + } + + /* Scan IPv6 configuration details */ + (void) snprintf(cfgfile, MAXPATHLEN, "%s%s", CFGFILE_FMT_IPV6, ifinst); + cfgfile[MAXPATHLEN - 1] = '\0'; + rcm_log_message(RCM_TRACE1, "IP: Scanning %s\n", cfgfile); + if (stat(cfgfile, &statbuf) == 0) { + af |= CONFIG_AF_INET6; + if ((ipmp == 0) && isgrouped(cfgfile)) { + ipmp++; + } + } + + if (af & CONFIG_AF_INET) { + if (if_ipmp_config(ifinst, CONFIG_AF_INET, ipmp) == -1) { + rcm_log_message(RCM_ERROR, + _("IP: IPv4 Post-attach failed (%s)\n"), ifinst); + return (-1); + } + } + + if (af & CONFIG_AF_INET6) { + if (if_ipmp_config(ifinst, CONFIG_AF_INET6, ipmp) == -1) { + rcm_log_message(RCM_ERROR, + _("IP: IPv6 Post-attach failed(%s)\n"), ifinst); + return (-1); + } + } + + rcm_log_message(RCM_TRACE1, "IP: if_configure(%s) success\n", ifinst); + + return (0); + +} + +/* + * isgrouped() - Scans the given config file to see if this is a grouped + * interface + * Returns non-zero if true; 0 if false + */ +static int +isgrouped(char *cfgfile) +{ + FILE *fp; + struct stat statb; + char *buf = NULL; + char *tokens[MAXARGS]; /* token pointers */ + char tspace[MAXLINE]; /* token space */ + int ntok; + int group = 0; + + if (cfgfile == NULL) + return (0); + + rcm_log_message(RCM_TRACE1, "IP: isgrouped(%s)\n", cfgfile); + + if (stat(cfgfile, &statb) != 0) { + rcm_log_message(RCM_TRACE1, + _("IP: No config file(%s)\n"), cfgfile); + return (0); + } + + /* + * We also ignore single-byte config files because the file should + * always be newline-terminated, so we know there's nothing of + * interest. Further, a single-byte file would cause the fgets() loop + * below to spin forever. + */ + if (statb.st_size <= 1) { + rcm_log_message(RCM_TRACE1, + _("IP: Empty config file(%s)\n"), cfgfile); + return (0); + } + + if ((fp = fopen(cfgfile, "r")) == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: Cannot open configuration file(%s): %s\n"), cfgfile, + strerror(errno)); + return (0); + } + + if ((buf = calloc(1, statb.st_size)) == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: calloc failure(%s): %s\n"), cfgfile, + strerror(errno)); + (void) fclose(fp); + return (0); + } + + while (fgets(buf, statb.st_size, fp) != NULL) { + if (*buf == '\0') + continue; + + tokenize(buf, tokens, tspace, &ntok); + while (ntok) { + if (STREQ("group", tokens[ntok - 1])) { + if (tokens[ntok] != NULL) { + group++; + } + } + ntok--; + } + } + + free(buf); + + (void) fclose(fp); + + if (group <= 0) { + rcm_log_message(RCM_TRACE1, "IP: isgrouped(%s) non-grouped\n", + cfgfile); + return (0); + } else { + rcm_log_message(RCM_TRACE1, "IP: isgrouped(%s) grouped\n", + cfgfile); + return (1); + } +} + + +/* + * if_ipmp_config() - Configure an interface instance as specified by the + * address family af and if it is grouped (ipmp). + */ +static int +if_ipmp_config(char *ifinst, int af, int ipmp) +{ + char cfgfile[MAXPATHLEN]; /* configuration file */ + FILE *fp; + struct stat statb; + char *buf; + char *tokens[MAXARGS]; /* list of config attributes */ + char tspace[MAXLINE]; /* token space */ + char syscmd[MAX_RECONFIG_SIZE + MAXPATHLEN + 1]; + char grpcmd[MAX_RECONFIG_SIZE + MAXPATHLEN + 1]; + char fstr[8]; /* address family string inet or inet6 */ + int nofailover = 0; + int newattach = 0; + int cmdvalid = 0; + int ntok; + int n; + int stdif = 0; + + if (ifinst == NULL) + return (0); + + rcm_log_message(RCM_TRACE1, "IP: if_ipmp_config(%s) ipmp = %d\n", + ifinst, ipmp); + + if (af & CONFIG_AF_INET) { + (void) snprintf(cfgfile, MAXPATHLEN, "%s%s", CFGFILE_FMT_IPV4, + ifinst); + (void) strcpy(fstr, "inet"); + } else if (af & CONFIG_AF_INET6) { + (void) snprintf(cfgfile, MAXPATHLEN, "%s%s", CFGFILE_FMT_IPV6, + ifinst); + (void) strcpy(fstr, "inet6"); + } else { + return (0); /* nothing to do */ + } + + cfgfile[MAXPATHLEN - 1] = '\0'; + grpcmd[0] = '\0'; + + if (stat(cfgfile, &statb) != 0) { + rcm_log_message(RCM_TRACE1, + _("IP: No config file(%s)\n"), ifinst); + return (0); + } + + /* Config file exists, plumb in the physical interface */ + if (af & CONFIG_AF_INET6) { + if (if_getcount(AF_INET6) == 0) { + /* + * Configure software loopback driver if this is the + * first IPv6 interface plumbed + */ + newattach++; + (void) snprintf(syscmd, sizeof (syscmd), + "%s lo0 %s plumb ::1 up", USR_SBIN_IFCONFIG, fstr); + if (rcm_exec_cmd(syscmd) != 0) { + rcm_log_message(RCM_ERROR, + _("IP: Cannot plumb (%s) %s\n"), + ifinst, strerror(errno)); + return (-1); + } + } + (void) snprintf(syscmd, sizeof (syscmd), "%s %s %s plumb up", + USR_SBIN_IFCONFIG, ifinst, fstr); + } else { + (void) snprintf(syscmd, sizeof (syscmd), "%s %s %s plumb ", + USR_SBIN_IFCONFIG, ifinst, fstr); + if (if_getcount(AF_INET) == 0) { + newattach++; + } + } + rcm_log_message(RCM_TRACE1, "IP: Exec: %s\n", syscmd); + + if (rcm_exec_cmd(syscmd) != 0) { + rcm_log_message(RCM_ERROR, + _("IP: Cannot plumb (%s) %s\n"), ifinst, strerror(errno)); + return (-1); + } + + /* Check if config file is empty, if so, nothing else to do */ + if (statb.st_size == 0) { + rcm_log_message(RCM_TRACE1, + _("IP: Zero size config file(%s)\n"), ifinst); + return (0); + } + + if ((fp = fopen(cfgfile, "r")) == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: Open error(%s): %s\n"), cfgfile, strerror(errno)); + return (-1); + } + + if ((buf = calloc(1, statb.st_size)) == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: calloc(%s): %s\n"), ifinst, strerror(errno)); + (void) fclose(fp); + return (-1); + } + + /* a single line with one token implies a classical if */ + if (fgets(buf, statb.st_size, fp) != NULL) { + tokenize(buf, tokens, tspace, &ntok); + if (ntok == 1) { + rcm_log_message(RCM_TRACE1, "IP: Standard interface\n"); + stdif++; + } + } + if (fseek(fp, 0L, SEEK_SET) == -1) { + rcm_log_message(RCM_ERROR, _("IP: fseek: %s\n"), + strerror(errno)); + return (-1); + } + + /* + * Process the config command + * This loop also handles multiple logical interfaces that may + * be configured on a single line + */ + while (fgets(buf, statb.st_size, fp) != NULL) { + nofailover = 0; + cmdvalid = 0; + + if (*buf == '\0') + continue; + + tokenize(buf, tokens, tspace, &ntok); + if (ntok <= 0) + continue; + + /* Reset the config command */ + (void) snprintf(syscmd, sizeof (syscmd), "%s %s %s ", + USR_SBIN_IFCONFIG, ifinst, fstr); + + /* No parsing if this is first interface of its kind */ + if (newattach) { + (void) strcat(syscmd, buf); + /* Classic if */ + if ((af & CONFIG_AF_INET) && (stdif == 1)) { + (void) strcat(syscmd, CFG_CMDS_STD); + } + rcm_log_message(RCM_TRACE1, "IP: New: %s\n", syscmd); + if (rcm_exec_cmd(syscmd) != 0) { + rcm_log_message(RCM_ERROR, + _("IP: Error: %s (%s): %s\n"), + syscmd, ifinst, strerror(errno)); + } + continue; + } + + /* Parse the tokens to determine nature of the interface */ + for (n = 0; n < ntok; n++) { + /* Handle pathological failover cases */ + if (STREQ("-failover", tokens[n])) + nofailover++; + if (STREQ("failover", tokens[n])) + nofailover--; + + /* group attribute requires special processing */ + if (STREQ("group", tokens[n])) { + if (tokens[n + 1] != NULL) { + (void) snprintf(grpcmd, sizeof (grpcmd), + "%s %s %s %s %s", USR_SBIN_IFCONFIG, + ifinst, fstr, + tokens[n], tokens[n + 1]); + n++; /* skip next token */ + continue; + } + } + + /* Execute buffered command ? */ + if (STREQ("set", tokens[n]) || + STREQ("addif", tokens[n]) || + STREQ("removeif", tokens[n]) || + (n == (ntok -1))) { + + /* config command complete ? */ + if (n == (ntok -1)) { + ADDSPACE(syscmd); + (void) strcat(syscmd, tokens[n]); + cmdvalid++; + } + + if (!cmdvalid) { + ADDSPACE(syscmd); + (void) strcat(syscmd, tokens[n]); + cmdvalid++; + continue; + } + /* Classic if ? */ + if ((af & CONFIG_AF_INET) && (stdif == 1)) { + (void) strcat(syscmd, CFG_CMDS_STD); + } + + if (nofailover > 0) { + rcm_log_message(RCM_TRACE1, + "IP: Interim exec: %s\n", syscmd); + if (rcm_exec_cmd(syscmd) != 0) { + rcm_log_message(RCM_ERROR, + _("IP: %s fail(%s): %s\n"), + syscmd, ifinst, + strerror(errno)); + } + } else { + /* Have mpathd configure the address */ + if (if_mpathd_configure(syscmd, ifinst, + af, ipmp) != 0) { + rcm_log_message(RCM_ERROR, + _("IP: %s fail(%s): %s\n"), + syscmd, ifinst, + strerror(errno)); + } + } + + /* Reset config command */ + (void) snprintf(syscmd, sizeof (syscmd), + "%s %s %s ", USR_SBIN_IFCONFIG, ifinst, + fstr); + nofailover = 0; + cmdvalid = 0; + } + /* + * Note: No explicit command validation is required + * since ifconfig to does it for us + */ + ADDSPACE(syscmd); + (void) strcat(syscmd, tokens[n]); + cmdvalid++; + } + } + + free(buf); + (void) fclose(fp); + + /* + * The group name needs to be set after all the test/nofailover + * addresses have been configured. Otherwise, if IPMP detects that the + * interface is failed, the addresses will be moved to a working + * interface before the '-failover' flag can be set. + */ + if (grpcmd[0] != '\0') { + rcm_log_message(RCM_TRACE1, "IP: set group name: %s\n", grpcmd); + if (rcm_exec_cmd(grpcmd) != 0) { + rcm_log_message(RCM_ERROR, _("IP: %s fail(%s): %s\n"), + grpcmd, ifinst, strerror(errno)); + } + } + + rcm_log_message(RCM_TRACE1, "IP: if_ipmp_config(%s) success\n", ifinst); + + return (0); +} + +/* + * if_mpathd_configure() - Determine configuration disposition of the interface + */ +static int +if_mpathd_configure(char *syscmd, char *ifinst, int af, int ipmp) +{ + char *tokens[MAXARGS]; + char tspace[MAXLINE]; + int ntok; + char *addr; + char *from_lifname; + mpathd_cmd_t mpdcmd; + int n; + + rcm_log_message(RCM_TRACE1, "IP: if_mpathd_configure(%s): %s\n", + ifinst, syscmd); + + tokenize(syscmd, tokens, tspace, &ntok); + if (ntok <= 0) + return (0); + + addr = tokens[3]; /* by default, third token is valid address */ + for (n = 0; n < ntok; n++) { + if (STREQ("set", tokens[n]) || + STREQ("addif", tokens[n])) { + addr = tokens[n+1]; + if (addr == NULL) { /* invalid format */ + return (-1); + } else + break; + } + } + + /* Check std. commands or no failed over address */ + if (STREQ("removeif", addr) || STREQ("group", addr) || + ((from_lifname = get_mpathd_dest(addr, af)) == NULL)) { + rcm_log_message(RCM_TRACE1, + "IP: No failed-over host, exec %s\n", syscmd); + if (rcm_exec_cmd(syscmd) != 0) { + rcm_log_message(RCM_ERROR, + _("IP: %s failed(%s): %s\n"), + syscmd, ifinst, strerror(errno)); + return (-1); + } + return (0); + } + + /* Check for non-IPMP failover scenarios */ + if ((ipmp <= 0) && (from_lifname != NULL)) { + /* Address already hosted on another NIC, return */ + rcm_log_message(RCM_TRACE1, + "IP: Non-IPMP failed-over host(%s): %s\n", + ifinst, addr); + return (0); + } + + /* + * Valid failed-over host; have mpathd set the original index + */ + mpdcmd.cmd_command = MI_SETOINDEX; + (void) strcpy(mpdcmd.from_lifname, from_lifname); + (void) strcpy(mpdcmd.to_pifname, ifinst); + if (af & CONFIG_AF_INET6) { + mpdcmd.addr_family = AF_INET6; + } else { + mpdcmd.addr_family = AF_INET; + } + + /* Send command to in.mpathd(1M) */ + rcm_log_message(RCM_TRACE1, + "IP: Attempting setoindex from (%s) to (%s) ....\n", + from_lifname, ifinst); + + if (mpathd_send_cmd(&mpdcmd) < 0) { + rcm_log_message(RCM_TRACE1, + _("IP: mpathd set original index unsuccessful: %s\n"), + strerror(errno)); + return (-1); + } + + rcm_log_message(RCM_TRACE1, + "IP: setoindex success (%s) to (%s)\n", + from_lifname, ifinst); + + return (0); +} + +/* + * get_mpathd_addr() - Return current destination for lif; caller is + * responsible to free memory allocated for address + */ +static char * +get_mpathd_dest(char *addr, int family) +{ + int sock; + char *buf; + struct lifnum lifn; + struct lifconf lifc; + struct lifreq *lifrp; + sa_family_t af = AF_INET; /* IPv4 by default */ + int i; + struct lifreq lifreq; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; + struct hostent *hp; + char *ifname = NULL; + char *prefix = NULL; + char addrstr[INET6_ADDRSTRLEN]; + char ifaddr[INET6_ADDRSTRLEN]; + int err; + + if (addr == NULL) { + return (NULL); + } + + rcm_log_message(RCM_TRACE2, "IP: get_mpathd_dest(%s)\n", addr); + + if (family & CONFIG_AF_INET6) { + af = AF_INET6; + } else { + af = AF_INET; + } + + if ((sock = socket(af, SOCK_DGRAM, 0)) == -1) { + rcm_log_message(RCM_ERROR, + _("IP: failure opening %s socket: %s\n"), + af == AF_INET6 ? "IPv6" : "IPv4", strerror(errno)); + return (NULL); + } + + lifn.lifn_family = af; + lifn.lifn_flags = 0; + if (ioctl(sock, SIOCGLIFNUM, (char *)&lifn) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: SIOCLGIFNUM failed: %s\n"), + strerror(errno)); + (void) close(sock); + return (NULL); + } + + if ((buf = calloc(lifn.lifn_count, sizeof (struct lifreq))) == NULL) { + rcm_log_message(RCM_ERROR, _("IP: calloc: %s\n"), + strerror(errno)); + (void) close(sock); + return (NULL); + } + + lifc.lifc_family = af; + lifc.lifc_flags = 0; + lifc.lifc_len = sizeof (struct lifreq) * lifn.lifn_count; + lifc.lifc_buf = buf; + + if (ioctl(sock, SIOCGLIFCONF, (char *)&lifc) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: SIOCGLIFCONF failed: %s\n"), + strerror(errno)); + free(buf); + (void) close(sock); + return (NULL); + } + + /* Filter out prefix address from netmask */ + (void) strcpy(ifaddr, addr); + if ((prefix = strchr(ifaddr, '/')) != NULL) { + *prefix = '\0'; /* We care about the address part only */ + } + + /* Check for aliases */ + hp = getipnodebyname(ifaddr, af, AI_DEFAULT, &err); + if (hp) { + if (inet_ntop(af, (void *)hp->h_addr_list[0], + ifaddr, sizeof (ifaddr)) == NULL) { + /* Restore original address and use it */ + (void) strcpy(ifaddr, addr); + if ((prefix = strchr(ifaddr, '/')) != NULL) { + *prefix = '\0'; + } + } + freehostent(hp); + } + rcm_log_message(RCM_TRACE2, "IP: ifaddr(%s) = %s\n", + addr, ifaddr); + + /* now search the interfaces */ + lifrp = lifc.lifc_req; + for (i = 0; i < lifn.lifn_count; i++, lifrp++) { + (void) strcpy(lifreq.lifr_name, lifrp->lifr_name); + /* Get the interface address for this interface */ + if (ioctl(sock, SIOCGLIFADDR, (char *)&lifreq) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: SIOCGLIFADDR: %s\n"), strerror(errno)); + free(buf); + (void) close(sock); + return (NULL); + } + + if (af == AF_INET6) { + sin6 = (struct sockaddr_in6 *)&lifreq.lifr_addr; + if (inet_ntop(AF_INET6, (void *)&sin6->sin6_addr, + addrstr, sizeof (addrstr)) == NULL) { + continue; + } + } else { + sin = (struct sockaddr_in *)&lifreq.lifr_addr; + if (inet_ntop(AF_INET, (void *)&sin->sin_addr, + addrstr, sizeof (addrstr)) == NULL) { + continue; + } + } + + if (STREQ(addrstr, ifaddr)) { + /* Allocate memory to hold interface name */ + if ((ifname = (char *)malloc(LIFNAMSIZ)) == NULL) { + rcm_log_message(RCM_ERROR, + _("IP: malloc: %s\n"), strerror(errno)); + free(buf); + (void) close(sock); + return (NULL); + } + + /* Copy the interface name */ + /* + * (void) memcpy(ifname, lifrp->lifr_name, + * sizeof (ifname)); + * ifname[sizeof (ifname) - 1] = '\0'; + */ + (void) strcpy(ifname, lifrp->lifr_name); + break; + } + } + + (void) close(sock); + free(buf); + + if (ifname == NULL) + rcm_log_message(RCM_TRACE2, "IP: get_mpathd_dest(%s): none\n", + addr); + else + rcm_log_message(RCM_TRACE2, "IP: get_mpathd_dest(%s): %s\n", + addr, ifname); + + return (ifname); +} + +static int +if_getcount(int af) +{ + int sock; + struct lifnum lifn; + + rcm_log_message(RCM_TRACE1, "IP: if_getcount\n"); + + if ((sock = socket(af, SOCK_DGRAM, 0)) == -1) { + rcm_log_message(RCM_ERROR, + _("IP: failure opening %s socket: %s\n"), + af == AF_INET6 ? "IPv6" : "IPv4", strerror(errno)); + return (-1); + } + + lifn.lifn_family = af; + lifn.lifn_flags = 0; + if (ioctl(sock, SIOCGLIFNUM, (char *)&lifn) < 0) { + rcm_log_message(RCM_ERROR, + _("IP: SIOCLGIFNUM failed: %s\n"), + strerror(errno)); + (void) close(sock); + return (-1); + } + (void) close(sock); + + rcm_log_message(RCM_TRACE1, "IP: if_getcount success: %d\n", + lifn.lifn_count); + + return (lifn.lifn_count); +} + +/* + * tokenize() - turn a command line into tokens; caller is responsible to + * provide enough memory to hold all tokens + */ +static void +tokenize(char *line, char **tokens, char *tspace, int *ntok) +{ + char *cp; + char *sp; + + sp = tspace; + cp = line; + for (*ntok = 0; *ntok < MAXARGS; (*ntok)++) { + tokens[*ntok] = sp; + while (ISSPACE(*cp)) + cp++; + if (ISEOL(*cp)) + break; + do { + *sp++ = *cp++; + } while (!ISSPACE(*cp) && !ISEOL(*cp)); + + *sp++ = '\0'; + } +} + +#ifdef RCM_IPMP_DEBUG + +static void +dump_node(ip_cache_t *node) +{ + ip_pif_t *pif; + ip_lif_t *lif; + + pif = node->ip_pif; + + rcm_log_message(RCM_TRACE1, "Node dump:\n"); + rcm_log_message(RCM_TRACE1, "resource = %s\t cache flags = 0x%x\n", + node->ip_resource, node->ip_cachestate); + rcm_log_message(RCM_TRACE1, "ifname = %s\t ifindex = %d\n", + pif->pi_ifname, pif->pi_ifindex); + rcm_log_message(RCM_TRACE1, "groupname = %s\t PPA = %d\n", + pif->pi_grpname, pif->pi_ppa); + if (pif->pi_style == DL_STYLE1) { + rcm_log_message(RCM_TRACE1, "Provider = DLPI style 1\n"); + } + if (pif->pi_style == DL_STYLE2) { + rcm_log_message(RCM_TRACE1, "Provider = DLPI style 2\n"); + }; + + lif = pif->pi_lifs; + + if (lif == NULL) { + rcm_log_message(RCM_TRACE1, "No lifs hosted on this device.\n"); + return; + } + + rcm_log_message(RCM_TRACE1, + "Logical interfaces hosted on this device - \n"); + while (lif != NULL) { + rcm_log_message(RCM_TRACE1, "\t ifnum = %d \t ifflags = 0x%x", + lif->li_ifnum, lif->li_ifflags); + if (lif->li_addr.family == AF_INET) + rcm_log_message(RCM_TRACE1, "\t Family = IPv4"); + else if (lif->li_addr.family == AF_INET6) + rcm_log_message(RCM_TRACE1, "\t Family = IPv6"); + else rcm_log_message(RCM_TRACE1, "\t Family = <Unknown>"); + rcm_log_message(RCM_TRACE1, "\n"); + + lif = lif->li_next; + } +} + +#else /* !RCM_IPMP_DEBUG */ + +/*ARGSUSED*/ +static void +dump_node(ip_cache_t *node) +{ + /* do nothing */ +} + +#endif /* RCM_IPMP_DEBUG */ diff --git a/usr/src/cmd/rcm_daemon/common/mac_rcm.c b/usr/src/cmd/rcm_daemon/common/mac_rcm.c new file mode 100644 index 0000000000..bfb90c1019 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/mac_rcm.c @@ -0,0 +1,1446 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This RCM module adds support to the RCM framework for datalinks + * managed by dladm(1M). + */ +#include <alloca.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <assert.h> +#include <string.h> +#include <synch.h> +#include <libintl.h> +#include <errno.h> +#include <libdevinfo.h> +#include <sys/types.h> +#include <libdladm.h> +#include <liblaadm.h> +#include <net/if.h> +#include "rcm_module.h" + +#define _KERNEL +#include <sys/sysmacros.h> +#undef _KERNEL + +#define CACHE_STALE 1 /* flags */ +#define CACHE_NEW 2 /* flags */ + +typedef enum mac_op { + MAC_OP_SUSPEND = 0, + MAC_OP_OFFLINE = 1, + MAC_OP_ONLINE = 2, + MAC_OP_REMOVE = 3, + MAC_OP_RESUME = 4 +} mac_op_t; + +char *mac_op_str[] = { + "SUSPEND", + "OFFLINE", + "ONLINE", + "REMOVE", + "RESUME" +}; + +/* devfsadm post-attach nvpair values */ +#define PROP_NV_DDI_MAC "ddi_mac" + +typedef struct mac_cache { + char *resource; + char *driver; + int instance; + int flags; + struct mac_cache *next; + struct mac_cache *prev; +} mac_cache_t; + +static mac_cache_t cache_head; +static mac_cache_t cache_tail; +static mutex_t cache_lock; +static int events_registered = 0; + +struct devfs_minor_data { + int32_t minor_type; + char *minor_name; + char *minor_node_type; +}; + +/* module interface routines */ +static int mac_register(rcm_handle_t *); +static int mac_unregister(rcm_handle_t *); +static int mac_getinfo(rcm_handle_t *, char *, id_t, uint_t, char **, + char **, nvlist_t *, rcm_info_t **); +static int mac_suspend(rcm_handle_t *, char *, id_t, timespec_t *, + uint_t, char **, rcm_info_t **); +static int mac_resume(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int mac_offline(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int mac_online(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int mac_remove(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int mac_notify_event(rcm_handle_t *, char *, id_t, uint_t, + char **, nvlist_t *, rcm_info_t **); + +/* module private routines */ +static void free_cache(void); +static void update_cache(rcm_handle_t *hd); +static int devfs_entry(di_node_t node, di_minor_t minor, void *arg); +static void cache_remove(mac_cache_t *node); +static mac_cache_t *cache_lookup(const char *resource); +static void free_node(mac_cache_t *); +static void cache_insert(mac_cache_t *); +static int process_nvlist(nvlist_t *); + +/* + * Module-Private data + */ +static struct rcm_mod_ops mac_ops = { + RCM_MOD_OPS_VERSION, + mac_register, + mac_unregister, + mac_getinfo, + mac_suspend, + mac_resume, + mac_offline, + mac_online, + mac_remove, + NULL, /* request_capacity_change */ + NULL, /* notify_capacity_change */ + mac_notify_event +}; + +/* + * Module Interface Routines + */ + +/* + * rcm_mod_init() + * + * Update registrations, and return the ops structure. + */ +struct rcm_mod_ops * +rcm_mod_init() +{ + cache_head.next = &cache_tail; + cache_head.prev = NULL; + cache_tail.prev = &cache_head; + cache_tail.next = NULL; + (void) mutex_init(&cache_lock, NULL, NULL); + + /* Return the ops vectors */ + return (&mac_ops); +} + +/* + * rcm_mod_info() + * + * Return a string describing this module. + */ +const char * +rcm_mod_info() +{ + return ("Network namespace module %I%"); +} + +/* + * rcm_mod_fini() + * + * Destroy the cache. + */ +int +rcm_mod_fini() +{ + free_cache(); + (void) mutex_destroy(&cache_lock); + return (RCM_SUCCESS); +} + +/* + * mac_register() + * + * Make sure the cache is properly sync'ed, and its registrations + * are in order. + * + * Locking: the cache is locked by update_cache, and is held + * throughout update_cache's execution because it reads and + * possibly modifies cache links continuously. + */ +static int +mac_register(rcm_handle_t *hd) +{ + if (!events_registered) { + if (rcm_register_event(hd, RCM_RESOURCE_MAC_NEW, 0, NULL) != + RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + gettext("MAC: failed to register for events %s\n"), + RCM_RESOURCE_MAC_NEW); + return (RCM_FAILURE); + } else { + rcm_log_message(RCM_TRACE1, "MAC: registered " + " for events %s\n", RCM_RESOURCE_MAC_NEW); + events_registered++; + } + } + update_cache(hd); + return (RCM_SUCCESS); +} + +/* + * mac_unregister() + * + * Manually walk through the cache, unregistering all the networks. + * + * Locking: the cache is locked throughout the execution of this routine + * because it reads and modifies cache links continuously. + */ +static int +mac_unregister(rcm_handle_t *hd) +{ + mac_cache_t *probe; + + /* Walk the cache, unregistering everything */ + (void) mutex_lock(&cache_lock); + probe = cache_head.next; + while (probe != &cache_tail) { + (void) rcm_unregister_interest(hd, probe->resource, 0); + cache_remove(probe); + free_node(probe); + probe = cache_head.next; + } + (void) mutex_unlock(&cache_lock); + if (events_registered) { + (void) rcm_unregister_event(hd, RCM_RESOURCE_MAC_NEW, 0); + events_registered--; + } + return (RCM_SUCCESS); +} + +typedef struct mac_dl_walker_state { + char *ws_dev_name; + uint_t ws_n_datalinks; + char **ws_datalink; + char **ws_paths; +} mac_dl_walker_state_t; + +/* + * Adds a datalink of the specified name to the list hanging off + * the specified state. Invoked by mac_dl_walker_db() and mac_dl_walker(). + */ +static void +mac_add_datalink(mac_dl_walker_state_t *state, const char *name) +{ + char dl_path[MAXPATHLEN]; + + (void) snprintf(dl_path, sizeof (dl_path), "/devices/pseudo/dld@0:%s", + name); + rcm_log_message(RCM_DEBUG, "MAC: found datalink \"%s\"\n", dl_path); + + state->ws_n_datalinks++; + + state->ws_datalink = realloc(state->ws_datalink, + (state->ws_n_datalinks + 1) * sizeof (char *)); + if (state->ws_datalink == NULL) + return; + state->ws_datalink[state->ws_n_datalinks-1] = strdup(name); + state->ws_datalink[state->ws_n_datalinks] = NULL; + + state->ws_paths = realloc(state->ws_paths, + (state->ws_n_datalinks + 1) * sizeof (char *)); + if (state->ws_paths == NULL) + return; + state->ws_paths[state->ws_n_datalinks-1] = strdup(dl_path); + state->ws_paths[state->ws_n_datalinks] = NULL; +} + +/* + * Invoked for each DDI_NT_NET node found by the dladm library. + */ +static +void +mac_dl_walker(void *arg, const char *name) +{ + dladm_attr_t dl_attr; + mac_dl_walker_state_t *state = (mac_dl_walker_state_t *)arg; + + rcm_log_message(RCM_DEBUG, "MAC: walker: DDI_NT_NET \"%s\"\n", name); + + if ((state->ws_datalink == NULL) || (state->ws_paths == NULL)) + return; + + if (dladm_info(name, &dl_attr) < 0) { + rcm_log_message(RCM_DEBUG, "MAC: dladm_info failed " + "(legacy)\n"); + return; + } + + /* + * We have a virtual data link that is defined on top + * of a MAC port. Ignore it unless the MAC port was + * registered by the device being acted upon. + */ + rcm_log_message(RCM_DEBUG, "MAC: rsrc \"%s\" matches link \"%s\"?\n", + state->ws_dev_name, dl_attr.da_dev); + if (strcmp(state->ws_dev_name, dl_attr.da_dev) != 0) { + rcm_log_message(RCM_DEBUG, "MAC: no match\n"); + return; + } + + mac_add_datalink(state, name); +} + +/* + * Allocate and return a list of strings containing the virtual + * data links that are currently configured on top of a device. + */ +static int +mac_list_datalinks(char *dev_name, char ***paths, char ***names) +{ + mac_dl_walker_state_t dl_state; + + /* + * Use the instance and driver from the cache node to find + * matching mac ports. + */ + dl_state.ws_dev_name = dev_name; + dl_state.ws_n_datalinks = 0; + + dl_state.ws_datalink = calloc(1, sizeof (char *)); + dl_state.ws_paths = calloc(1, sizeof (char *)); + if ((dl_state.ws_datalink == NULL) || (dl_state.ws_paths == NULL)) { + rcm_log_message(RCM_ERROR, gettext("MAC: malloc failure")); + goto bail; + } + + (void) dladm_walk(mac_dl_walker, &dl_state); + + if ((dl_state.ws_datalink == NULL) || (dl_state.ws_paths == NULL)) { + rcm_log_message(RCM_ERROR, gettext("MAC: malloc failure")); + goto bail; + } + + if (paths != NULL) + *paths = dl_state.ws_paths; + if (names != NULL) + *names = dl_state.ws_datalink; + + return (dl_state.ws_n_datalinks); + +bail: + free(dl_state.ws_datalink); + free(dl_state.ws_paths); + return (-1); +} + +/* + * Invoked for each virtual datalink defined in database. + */ +static void +mac_dl_walker_db(void *arg, const char *name, dladm_attr_t *dl_attr) +{ + mac_dl_walker_state_t *state = arg; + + rcm_log_message(RCM_DEBUG, "MAC: DB walker: \"%s\"\n", name); + + if ((state->ws_datalink == NULL) || (state->ws_paths == NULL)) + return; + + /* + * We have a virtual data link that is defined on top + * of a MAC port. Ignore it unless the MAC port was + * registered by the device being acted upon. + */ + rcm_log_message(RCM_DEBUG, "MAC: DB rsrc \"%s\" matches link " + "\"%s\"?\n", state->ws_dev_name, dl_attr->da_dev); + if (strcmp(state->ws_dev_name, dl_attr->da_dev) != 0) { + rcm_log_message(RCM_DEBUG, "MAC: no match\n"); + return; + } + + mac_add_datalink(state, name); +} + +/* + * Allocate and return a list of strings containing the virtual + * data links that are configured on top of a device. + */ +static int +mac_list_datalinks_db(char *dev_name, char ***paths, char ***names) +{ + mac_dl_walker_state_t dl_state; + + /* + * Use the instance and driver from the cache node to find + * matching mac ports. + */ + dl_state.ws_dev_name = dev_name; + dl_state.ws_n_datalinks = 0; + + dl_state.ws_datalink = calloc(1, sizeof (char *)); + dl_state.ws_paths = calloc(1, sizeof (char *)); + if ((dl_state.ws_datalink == NULL) || (dl_state.ws_paths == NULL)) { + rcm_log_message(RCM_ERROR, gettext("MAC: malloc failure")); + goto bail; + } + + (void) dladm_db_walk(mac_dl_walker_db, &dl_state); + + if ((dl_state.ws_datalink == NULL) || (dl_state.ws_paths == NULL)) { + rcm_log_message(RCM_ERROR, gettext("MAC: malloc failure")); + goto bail; + } + + if (paths != NULL) + *paths = dl_state.ws_paths; + if (names != NULL) + *names = dl_state.ws_datalink; + + return (dl_state.ws_n_datalinks); + +bail: + free(dl_state.ws_datalink); + free(dl_state.ws_paths); + return (-1); +} + +/* + * Link aggregation walker state. + */ +typedef struct mac_aggr_walker_state { + char *as_dev_name; + uint_t as_n_aggr; + uint32_t *as_aggr; +} mac_aggr_walker_state_t; + +/* + * Link aggregation walker. + */ +static int +mac_list_aggr_walker(void *arg, laadm_grp_attr_sys_t *grp) +{ + mac_aggr_walker_state_t *state = (mac_aggr_walker_state_t *)arg; + laadm_port_attr_sys_t *port; + int i, j; + + rcm_log_message(RCM_TRACE1, "MAC: aggr sys walker: key %u\n", + grp->lg_key); + + if (state->as_aggr == NULL) + return (0); + + /* + * Add an entry for each aggregated MAC port that was registered + * by the device being acted upon by RCM. + */ + for (i = 0; i < grp->lg_nports; i++) { + port = &grp->lg_ports[i]; + + rcm_log_message(RCM_TRACE1, "MAC: aggr (%d) port %s/%d\n", + grp->lg_key, port->lp_devname, port->lp_port); + + if (strcmp(port->lp_devname, state->as_dev_name) != 0) + continue; + + /* + * Found matching port. Add aggregation key to list + * if it not already there, since multiple ports of + * the same device could be added to the same + * aggregation. + */ + for (j = 0; j < state->as_n_aggr; j++) { + if (state->as_aggr[j] == grp->lg_key) + break; + } + if (j < state->as_n_aggr) + /* aggregation group already in list */ + continue; + + state->as_n_aggr++; + state->as_aggr = realloc(state->as_aggr, + (state->as_n_aggr + 1) * sizeof (uint32_t)); + if (state->as_aggr == NULL) + return (0); + state->as_aggr[state->as_n_aggr-1] = grp->lg_key; + state->as_aggr[state->as_n_aggr] = 0; + } + + return (0); +} + +/* + * Allocate and return a list of key values of aggregations that + * are currently configured on top of the MAC ports registered + * by a device. + */ +static int +mac_list_aggr(char *dev_name, uint32_t **aggr) +{ + mac_aggr_walker_state_t ag_state; + int rv; + + ag_state.as_dev_name = dev_name; + ag_state.as_n_aggr = 0; + + ag_state.as_aggr = calloc(1, sizeof (uint32_t)); + if (ag_state.as_aggr == NULL) { + rcm_log_message(RCM_ERROR, gettext("MAC: malloc failure")); + return (-1); + } + + rv = laadm_walk_sys(mac_list_aggr_walker, &ag_state); + if (rv != 0) { + rcm_log_message(RCM_ERROR, + gettext("MAC: cannot list aggregations " + "(%s)\n"), strerror(errno)); + free(ag_state.as_aggr); + return (-1); + } + + if (ag_state.as_aggr == NULL) { + rcm_log_message(RCM_ERROR, gettext("MAC: malloc failure")); + return (-1); + } + + if (aggr != NULL) + *aggr = ag_state.as_aggr; + + return (ag_state.as_n_aggr); +} + +static void +mac_list_free(char **list) +{ + int i; + + if (list == NULL) + return; + + for (i = 0; list[i] != NULL; i++) + free(list[i]); + + free(list); +} + +static int +mac_dl_down_list(char **list) +{ + int i; + dladm_diag_t diag; + + if (list == NULL) + return (RCM_SUCCESS); + + for (i = 0; list[i] != NULL; i++) { + rcm_log_message(RCM_DEBUG, "MAC: dl_down() for \"%s\"\n", + list[i]); + if (dladm_down(list[i], &diag) != 0) { + char diag_str[256]; + + if (diag != 0) { + (void) snprintf(diag_str, sizeof (diag_str), + " (%s)", dladm_diag(diag)); + } else { + diag_str[0] = '\0'; + } + + rcm_log_message(RCM_ERROR, + gettext("MAC: failed to bring " + "down link %s%s"), list[i], diag_str); + goto error; + } + } + + return (RCM_SUCCESS); + +error: + /* bring data links back up */ + for (i--; i >= 0; i--) { + dladm_diag_t diag; + (void) dladm_up(list[i], &diag); + } + return (RCM_FAILURE); +} + +static int +mac_dl_up_list(char **list) +{ + int i; + dladm_diag_t diag; + + if (list == NULL) + return (RCM_SUCCESS); + + for (i = 0; list[i] != NULL; i++) { + rcm_log_message(RCM_DEBUG, "MAC: dl_up() for \"%s\"\n", + list[i]); + if (dladm_up(list[i], &diag) != 0) { + char diag_str[256]; + + if (diag != 0) { + (void) snprintf(diag_str, sizeof (diag_str), + " (%s)", dladm_diag(diag)); + } else { + diag_str[0] = '\0'; + } + + rcm_log_message(RCM_ERROR, + gettext("MAC: failed to bring " + "up link %s%s\n"), list[i], diag_str); + goto error; + } + } + + return (RCM_SUCCESS); + +error: + /* bring data links down */ + for (i--; i >= 0; i--) { + dladm_diag_t diag; + (void) dladm_down(list[i], &diag); + } + return (RCM_FAILURE); +} + +/* + * Since all we do is pass operations thru, we provide a general + * routine for passing through operations. + */ +/*ARGSUSED*/ +static int +mac_propagate(rcm_handle_t *hd, mac_op_t op, const char *rsrc, uint_t flag, + char **reason, rcm_info_t **dependent_reason, void *arg) +{ + mac_cache_t *node; + int rv = RCM_SUCCESS; + int ndep, naggr; + char **dl_paths = NULL; + char **dl_names = NULL; + char dev_name[MAXNAMELEN]; + uint32_t *aggr = NULL; + + /* + * Lock the cache just long enough to extract information about this + * resource. + */ + (void) mutex_lock(&cache_lock); + node = cache_lookup(rsrc); + if (node == NULL) { + rcm_log_message(RCM_WARNING, + gettext("MAC: unrecognized resource %s\n"), rsrc); + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + (void) snprintf(dev_name, sizeof (dev_name), "%s%d", node->driver, + node->instance); + rcm_log_message(RCM_DEBUG, "MAC: mac_propagate() %s for \"%s\" (%s)\n", + mac_op_str[op], rsrc, dev_name); + + /* + * We need to propagate the notification to the MAC clients + * that are configured on top of the MACs of the specified + * device. These MAC clients can be virtual links, + * or link aggregation groups. + */ + + /* + * Remove notifications are unconditional in the RCM state model, + * so it's safe to remove the node from the cache at this point. + * And we need to remove it so that we will recognize it as a new + * resource following the reattachment of the resource. + */ + if (op == MAC_OP_REMOVE) { + cache_remove(node); + free_node(node); + } + (void) mutex_unlock(&cache_lock); + + /* + * Obtain the list of virtual datalinks configured on currently + * active on top of the MAC ports registered by the device. + */ + if ((op == MAC_OP_SUSPEND) || (op == MAC_OP_OFFLINE) || + (op == MAC_OP_RESUME)) + ndep = mac_list_datalinks(dev_name, &dl_paths, &dl_names); + else + ndep = mac_list_datalinks_db(dev_name, &dl_paths, &dl_names); + + if (ndep == -1) { + rv = RCM_FAILURE; + goto done; + } else if ((ndep == 0) && (op != MAC_OP_OFFLINE)) { + goto done; + } + + switch (op) { + case MAC_OP_SUSPEND: + rv = rcm_request_suspend_list(hd, dl_paths, flag, + (timespec_t *)arg, dependent_reason); + break; + + case MAC_OP_OFFLINE: + /* refuse operation if aggregation defined on a MAC port */ + naggr = mac_list_aggr(dev_name, &aggr); + if (naggr == -1) { + rv = RCM_FAILURE; + break; + } else if (naggr > 0) { + /* + * Active link aggregation(s) defined on at least + * one of the MAC ports registered by the device + * being offlined. + */ + char *errstr; + char errgrp[64]; + int i; + + errstr = strdup(gettext( + "Resource is in use by aggregation")); + if (errstr == NULL) { + rcm_log_message(RCM_ERROR, + gettext("MAC: malloc failure")); + rv = RCM_FAILURE; + goto done; + } + + for (i = 0; i < naggr; i++) { + (void) snprintf(errgrp, sizeof (errgrp), " %d", + aggr[i]); + errstr = realloc(errstr, strlen(errstr) + + strlen(errgrp) + 1); + if (errstr == NULL) { + rcm_log_message(RCM_ERROR, + gettext("MAC: malloc failure")); + rv = RCM_FAILURE; + goto done; + } + (void) strcat(errstr, errgrp); + } + *reason = errstr; + rcm_log_message(RCM_ERROR, "MAC: %s %s\n", + dev_name, *reason); + errno = EBUSY; + rv = RCM_FAILURE; + break; + } + + if (ndep == 0) + break; + + /* propagate offline request */ + rv = rcm_request_offline_list(hd, dl_paths, flag, + dependent_reason); + if (rv != RCM_SUCCESS) + break; + if (flag & RCM_QUERY) + break; + rv = mac_dl_down_list(dl_names); + break; + + case MAC_OP_REMOVE: + rv = rcm_notify_remove_list(hd, dl_paths, flag, + dependent_reason); + break; + + case MAC_OP_ONLINE: + rv = mac_dl_up_list(dl_names); + if (rv != RCM_SUCCESS) + break; + rv = rcm_notify_online_list(hd, dl_paths, flag, + dependent_reason); + break; + + case MAC_OP_RESUME: + rv = rcm_notify_resume_list(hd, dl_paths, flag, + dependent_reason); + break; + + default: + rcm_log_message(RCM_WARNING, + gettext("MAC: bad RCM operation %d\n"), op); + errno = EINVAL; + return (RCM_FAILURE); + } + +done: + if (rv != RCM_SUCCESS) { + rcm_log_message(RCM_WARNING, + gettext("MAC: %s operation failed\n"), + mac_op_str[op]); + } + + mac_list_free(dl_paths); + mac_list_free(dl_names); + free(aggr); + + return (rv); +} + + +/* + * mac_offline() + * + * Determine dependents of the resource being offlined, and offline + * them all. + */ +static int +mac_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **reason, rcm_info_t **dependent_reason) +{ + rcm_log_message(RCM_TRACE1, "MAC: offline(%s)\n", rsrc); + + return (mac_propagate(hd, MAC_OP_OFFLINE, rsrc, flags, reason, + dependent_reason, NULL)); +} + +/* + * mac_online() + * + * Remount the previously offlined filesystem, and online its dependents. + */ +static int +mac_online(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **reason, + rcm_info_t **dependent_reason) +{ + rcm_log_message(RCM_DEBUG, "MAC: online(%s)\n", rsrc); + + return (mac_propagate(hd, MAC_OP_ONLINE, rsrc, flag, reason, + dependent_reason, NULL)); +} + +/* + * mac_getinfo() + * + * Gather usage information for this resource. + * + * Locking: the cache is locked while this routine looks up the + * resource and extracts copies of any piece of information it needs. + * The cache is then unlocked, and this routine performs the rest of + * its functions without touching any part of the cache. + */ +/*ARGSUSED*/ +static int +mac_getinfo(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, + char **info, char **errstr, nvlist_t *proplist, rcm_info_t **depend_info) +{ + int len; + char nic[LIFNAMSIZ]; + const char *info_fmt; + mac_cache_t *node; + char **dl_paths; + + rcm_log_message(RCM_TRACE1, "MAC: getinfo(%s)\n", rsrc); + + info_fmt = "MAC %s"; + + (void) mutex_lock(&cache_lock); + node = cache_lookup(rsrc); + if (!node) { + rcm_log_message(RCM_WARNING, + gettext("MAC: unrecognized resource %s\n"), rsrc); + (void) mutex_unlock(&cache_lock); + errno = ENOENT; + return (RCM_FAILURE); + } + + (void) snprintf(nic, sizeof (nic), "%s%d", node->driver, + node->instance); + + len = strlen(info_fmt) + strlen(nic) + 1; + if ((*info = (char *)malloc(len)) == NULL) { + rcm_log_message(RCM_ERROR, gettext("MAC: malloc failure")); + return (RCM_FAILURE); + } + + /* Fill in the string */ + (void) snprintf(*info, len, info_fmt, nic); + + if (flag & RCM_INCLUDE_DEPENDENT) { + char dev_name[MAXNAMELEN]; + int ndep; + + rcm_log_message(RCM_DEBUG, "MAC: getting dependents\n"); + /* get list of configured datalinks */ + (void) snprintf(dev_name, sizeof (dev_name), "%s%d", + node->driver, node->instance); + ndep = mac_list_datalinks(dev_name, &dl_paths, NULL); + if (ndep != 0) { + (void) rcm_get_info_list(hd, dl_paths, flag, + depend_info); + mac_list_free(dl_paths); + } + } + + (void) mutex_unlock(&cache_lock); + + return (RCM_SUCCESS); +} + +/* + * mac_suspend() + * + * Notify all dependents that the resource is being suspended. + * Since no real operation is involved, QUERY or not doesn't matter. + * + * Locking: the cache is only used to retrieve some information about + * this resource, so it is only locked during that retrieval. + */ +static int +mac_suspend(rcm_handle_t *hd, char *rsrc, id_t id, timespec_t *interval, + uint_t flag, char **reason, rcm_info_t **dependent_reason) +{ + rcm_log_message(RCM_TRACE1, "MAC: suspend(%s)\n", rsrc); + + return (mac_propagate(hd, MAC_OP_SUSPEND, rsrc, flag, reason, + dependent_reason, (void *)interval)); +} + +/* + * mac_resume() + * + * Resume all the dependents of a suspended network. + * + * Locking: the cache is only used to retrieve some information about + * this resource, so it is only locked during that retrieval. + */ +static int +mac_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **info, + rcm_info_t **dependent_info) +{ + rcm_log_message(RCM_TRACE1, "MAC: resume(%s)\n", rsrc); + + return (mac_propagate(hd, MAC_OP_RESUME, rsrc, flag, info, + dependent_info, NULL)); +} + +/* + * mac_remove() + * + * This is another NO-OP for us, we propagate the information. We + * don't need to remove it from our cache. We don't unregister + * interest at this point either; the network device name is still + * around. This way we don't have to change this logic when we + * gain the ability to learn about DR attach operations. + */ +static int +mac_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **info, + rcm_info_t **dependent_info) +{ + rcm_log_message(RCM_TRACE1, "MAC: remove(%s)\n", rsrc); + + return (mac_propagate(hd, MAC_OP_REMOVE, rsrc, flag, info, + dependent_info, NULL)); +} + +/* + * Process post-attach notifications sent by devfs for devices + * that created DDI_NT_MAC minor nodes. Bring up the links + * that are configured on top of the corresponding MAC ports. + */ +/*ARGSUSED*/ +static int +mac_notify_event(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, + char **errstr, nvlist_t *nvl, rcm_info_t **result) +{ + rcm_log_message(RCM_DEBUG, "MAC: notify_event(%s)\n", rsrc); + + if (strcmp(rsrc, RCM_RESOURCE_MAC_NEW) != 0) { + rcm_log_message(RCM_WARNING, + gettext("MAC: unrecognized event for %s\n"), rsrc); + return (RCM_FAILURE); + } + + /* update cache to reflect attached nodes */ + update_cache(hd); + + /* Process the nvlist for the event */ + if (process_nvlist(nvl) != 0) { + rcm_log_message(RCM_WARNING, + gettext("MAC: Error processing resource attributes(%s)\n"), + rsrc); + rcm_log_message(RCM_WARNING, + gettext("MAC: One or more devices may not be " + "configured.\n")); + } + + return (RCM_SUCCESS); +} + +/* + * Cache management routines. Note that the cache is implemented as a + * trivial linked list, and is only required because RCM doesn't + * provide enough state about our own registrations back to us. This + * linked list implementation probably clobbers the CPU cache pretty + * well. + */ + +/* + * cache_lookup() + * + * Get a cache node for a resource. Call with cache lock held. + */ +static mac_cache_t * +cache_lookup(const char *resource) +{ + mac_cache_t *probe; + + probe = cache_head.next; + while (probe != &cache_tail) { + if (probe->resource && + (strcmp(resource, probe->resource) == 0)) { + return (probe); + } + probe = probe->next; + } + return (NULL); +} + +/* + * free_node() + * + * Free a node. Make sure it isn't in the list! + */ +static void +free_node(mac_cache_t *node) +{ + if (node != NULL) { + free(node->resource); + free(node->driver); + free(node); + } +} + +/* + * cache_insert() + * + * Call with the cache_lock held. + */ +static void +cache_insert(mac_cache_t *node) +{ + /* insert at the head for best performance */ + node->next = cache_head.next; + node->prev = &cache_head; + + node->next->prev = node; + cache_head.next = node; +} + +/* + * cache_remove() + * + * Call with the cache_lock held. + */ +static void +cache_remove(mac_cache_t *node) +{ + node->next->prev = node->prev; + node->prev->next = node->next; + node->next = NULL; + node->prev = NULL; +} + +/* + * di_walk_minor() walker. Invoked for each DDI_NT_MAC device. + */ +/*ARGSUSED*/ +static int +devfs_entry(di_node_t node, di_minor_t minor, void *arg) +{ + char *devfspath; + char resource[MAXPATHLEN]; + char *name; + char *cp; + uint_t port_num; + int instance; + mac_cache_t *probe; + + cp = di_minor_nodetype(minor); + if ((cp == NULL) || (strcmp(cp, DDI_NT_MAC) != 0)) { + /* doesn't look like a MAC device */ + return (DI_WALK_CONTINUE); + } + + /* + * We need to register interest for devices that + * can be unconfigured, suspended, etc, and registered + * one or more MAC ports with the kernel. + * + * In our cache, we keep one entry per device that registered + * MAC ports. Each cache entry is also associated with a + * list of MAC ports that have been registered by the + * device associated with that cache entry. + */ + + name = di_driver_name(node); + if (name == NULL) { + /* what else can we do? */ + return (DI_WALK_CONTINUE); + } + rcm_log_message(RCM_DEBUG, "MAC: node driver name: \"%s\"\n", name); + + instance = di_instance(node); + rcm_log_message(RCM_DEBUG, "MAC: node instance: %d\n", instance); + + port_num = getminor(di_minor_devt(minor)); + rcm_log_message(RCM_DEBUG, "MAC: port number: %u\n", port_num); + + devfspath = di_devfs_path(node); + if (devfspath == NULL) { + /* no devfs path?!? */ + rcm_log_message(RCM_DEBUG, "MAC: missing devfs path\n"); + return (DI_WALK_CONTINUE); + } + + if (strncmp("/pseudo", devfspath, strlen("/pseudo")) == 0) { + /* ignore pseudo devices, they are not NICs */ + rcm_log_message(RCM_DEBUG, "MAC: ignoring pseudo device %s\n", + devfspath); + di_devfs_path_free(devfspath); + return (DI_WALK_CONTINUE); + } + + (void) snprintf(resource, sizeof (resource), "/devices%s", devfspath); + di_devfs_path_free(devfspath); + + probe = cache_lookup(resource); + if (probe != NULL) { + rcm_log_message(RCM_DEBUG, "MAC: %s already registered\n", + resource); + probe->flags &= ~(CACHE_STALE); + } else { + rcm_log_message(RCM_DEBUG, "MAC: %s is new resource\n", + resource); + probe = calloc(1, sizeof (mac_cache_t)); + if (probe == NULL) { + rcm_log_message(RCM_ERROR, + gettext("MAC: malloc failure")); + return (DI_WALK_CONTINUE); + } + + probe->resource = strdup(resource); + probe->instance = instance; + probe->driver = strdup(name); + + if (probe->resource == NULL || probe->driver == NULL) { + free_node(probe); + return (DI_WALK_CONTINUE); + } + + probe->flags |= CACHE_NEW; + cache_insert(probe); + } + + return (DI_WALK_CONTINUE); +} + +static void +update_cache(rcm_handle_t *hd) +{ + mac_cache_t *probe; + di_node_t root; + int rv; + + (void) mutex_lock(&cache_lock); + + /* first we walk the entire cache, marking each entry stale */ + probe = cache_head.next; + while (probe != &cache_tail) { + probe->flags |= CACHE_STALE; + probe = probe->next; + } + + root = di_init("/", DINFOSUBTREE | DINFOMINOR); + if (root == DI_NODE_NIL) { + goto done; + } + + (void) di_walk_minor(root, DDI_NT_MAC, DI_CHECK_ALIAS, NULL, + devfs_entry); + + di_fini(root); + + probe = cache_head.next; + while (probe != &cache_tail) { + mac_cache_t *freeit; + + if (probe->flags & CACHE_STALE) { + (void) rcm_unregister_interest(hd, probe->resource, 0); + rcm_log_message(RCM_DEBUG, "MAC: unregistered %s\n", + probe->resource); + freeit = probe; + probe = probe->next; + cache_remove(freeit); + free_node(freeit); + continue; + } + + if (!(probe->flags & CACHE_NEW)) { + probe = probe->next; + continue; + } + + rcm_log_message(RCM_DEBUG, "MAC: registering %s\n", + probe->resource); + rv = rcm_register_interest(hd, probe->resource, 0, NULL); + if (rv != RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + gettext("MAC: failed to register %s\n"), + probe->resource); + } else { + rcm_log_message(RCM_DEBUG, + "MAC: registered %s\n", probe->resource); + probe->flags &= ~(CACHE_NEW); + } + probe = probe->next; + } + +done: + (void) mutex_unlock(&cache_lock); +} + +static void +free_cache(void) +{ + mac_cache_t *probe; + + (void) mutex_lock(&cache_lock); + probe = cache_head.next; + while (probe != &cache_tail) { + cache_remove(probe); + free_node(probe); + probe = cache_head.next; + } + (void) mutex_unlock(&cache_lock); +} + +/* + * Walker state and function used to bring up the virtual datalinks + * that are configured on top of a MAC port for which we received + * a post-attach notification. + */ + +typedef struct dl_evt_walker_state { + char ws_dev_name[MAXNAMELEN]; + int ws_port_num; +} dl_evt_walker_state_t; + +static void +dl_evt_walker(void *arg, const char *name, dladm_attr_t *dl_attr) +{ + dl_evt_walker_state_t *state = (dl_evt_walker_state_t *)arg; + int rc; + dladm_diag_t diag; + + rcm_log_message(RCM_DEBUG, "MAC: dl evt walker match %s/%d with " + "config %s/%d?\n", dl_attr->da_dev, dl_attr->da_port, + state->ws_dev_name, state->ws_port_num); + + if ((strcmp(state->ws_dev_name, dl_attr->da_dev) != 0) || + (state->ws_port_num != dl_attr->da_port)) { + /* no match */ + rcm_log_message(RCM_DEBUG, "MAC: no dl match, skip entry\n"); + return; + } + + /* we have a match, bring up the datalink */ + rc = dladm_up(name, &diag); + if (rc != 0) { + char diag_str[256]; + + if (diag != 0) { + (void) snprintf(diag_str, sizeof (diag_str), " (%s)", + dladm_diag(diag)); + } else { + diag_str[0] = '\0'; + } + + rcm_log_message(RCM_ERROR, + gettext("MAC: error (%s) configuring " + "virtual datalink %s%s\n"), strerror(rc), name, diag_str); + } +} + +/* + * Process a notification received for a MAC minor node. Bring up + * each link that is configured on top of the MAC port. + */ +static void +process_minor(char *devfs_path, char *name, int instance, + struct devfs_minor_data *mdata) +{ + dl_evt_walker_state_t state; + + rcm_log_message(RCM_TRACE1, "MAC: process_minor\n"); + + if ((mdata->minor_node_type != NULL) && + strcmp(mdata->minor_node_type, PROP_NV_DDI_MAC) != 0) { + /* Process MAC devices only */ + return; + } + + rcm_log_message(RCM_TRACE1, "MAC: Examining %s (%s)\n", + devfs_path, mdata->minor_name); + + if (strncmp("/pseudo", devfs_path, strlen("/pseudo")) == 0) { + rcm_log_message(RCM_TRACE1, "MAC: ignoring pseudo %s (%s)\n", + devfs_path, mdata->minor_name); + return; + } + + rcm_log_message(RCM_TRACE1, "MAC: process MAC minor " + "(dev=%s, name=%s, inst=%d, port=\"%s\")\n", + devfs_path, name, instance, mdata->minor_name); + + (void) snprintf(state.ws_dev_name, sizeof (state.ws_dev_name), "%s%d", + name, instance); + state.ws_port_num = atoi(mdata->minor_name); + (void) dladm_db_walk(dl_evt_walker, &state); +} + +/* + * Process a post-attached notification nvlist sent by devfs. + */ +static int +process_nvlist(nvlist_t *nvl) +{ + nvpair_t *nvp = NULL; + char *driver_name; + char *devfs_path; + int32_t instance; + char *minor_byte_array; + uint_t nminor; + struct devfs_minor_data *mdata = NULL; + nvlist_t *mnvl = NULL; + nvpair_t *mnvp = NULL; + + rcm_log_message(RCM_TRACE1, "MAC: process_nvlist\n"); + + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + /* Get driver name */ + if (strcmp(nvpair_name(nvp), RCM_NV_DRIVER_NAME) == 0) { + if (nvpair_value_string(nvp, &driver_name) != 0) { + rcm_log_message(RCM_WARNING, + gettext("MAC: cannot get driver name\n")); + return (-1); + } + } + /* Get instance */ + if (strcmp(nvpair_name(nvp), RCM_NV_INSTANCE) == 0) { + if (nvpair_value_int32(nvp, &instance) != 0) { + rcm_log_message(RCM_WARNING, gettext( + "MAC: cannot get device instance\n")); + return (-1); + } + } + /* Get devfs_path */ + if (strcmp(nvpair_name(nvp), RCM_NV_DEVFS_PATH) == 0) { + if (nvpair_value_string(nvp, &devfs_path) != 0) { + rcm_log_message(RCM_WARNING, + gettext("MAC: cannot get device path\n")); + return (-1); + } + } + /* Get minor data */ + if (strcmp(nvpair_name(nvp), RCM_NV_MINOR_DATA) == 0) { + if (nvpair_value_byte_array(nvp, + (uchar_t **)&minor_byte_array, &nminor) != 0) { + rcm_log_message(RCM_WARNING, gettext( + "MAC: cannot get device minor data\n")); + return (-1); + } + if (nvlist_unpack(minor_byte_array, + nminor, &mnvl, 0) != 0) { + rcm_log_message(RCM_WARNING, gettext( + "MAC: cannot get minor node data\n")); + return (-1); + } + mdata = (struct devfs_minor_data *)calloc(1, + sizeof (struct devfs_minor_data)); + if (mdata == NULL) { + rcm_log_message(RCM_WARNING, + gettext("MAC: calloc error(%s)\n"), + strerror(errno)); + goto bail; + } + /* Enumerate minor node data */ + while ((mnvp = nvlist_next_nvpair(mnvl, mnvp)) != + NULL) { + /* Get minor type */ + if (strcmp(nvpair_name(mnvp), + RCM_NV_MINOR_TYPE) == 0) { + if (nvpair_value_int32(mnvp, + &mdata->minor_type) != 0) { + rcm_log_message(RCM_WARNING, + gettext("MAC: cannot get " + "minor type \n")); + goto bail; + } + } + /* Get minor name */ + if (strcmp(nvpair_name(mnvp), + RCM_NV_MINOR_NAME) == 0) { + if (nvpair_value_string(mnvp, + &mdata->minor_name) != 0) { + rcm_log_message(RCM_WARNING, + gettext("MAC: cannot get " + "minor name \n")); + goto bail; + } + } + /* Get minor node type */ + if (strcmp(nvpair_name(mnvp), + RCM_NV_MINOR_NODE_TYPE) == 0) { + if (nvpair_value_string(mnvp, + &mdata->minor_node_type) != 0) { + rcm_log_message(RCM_WARNING, + gettext("MAC: cannot get " + "minor node type \n")); + goto bail; + } + } + } + process_minor(devfs_path, driver_name, instance, + mdata); + nvlist_free(mnvl); + } + } + + rcm_log_message(RCM_TRACE1, "MAC: process_nvlist success\n"); + return (0); + +bail: + if (mnvl != NULL) + nvlist_free(mnvl); + if (mdata != NULL) + free(mdata); + return (-1); +} diff --git a/usr/src/cmd/rcm_daemon/common/mpxio_rcm.c b/usr/src/cmd/rcm_daemon/common/mpxio_rcm.c new file mode 100644 index 0000000000..5d1a23dad2 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/mpxio_rcm.c @@ -0,0 +1,982 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * RCM module supporting multiplexed I/O controllers (MPxIO). + */ +#include <stdlib.h> +#include <stdarg.h> +#include <unistd.h> +#include <assert.h> +#include <syslog.h> +#include <string.h> +#include <synch.h> +#include <libintl.h> +#include <locale.h> +#include <ctype.h> +#include <errno.h> +#include <libdevinfo.h> +#include <sys/types.h> +#include "rcm_module.h" + +#define MPXIO_PROP_NAME "mpxio-component" +#define MPXIO_PROP_CLIENT "client" + +#define CMD_GETINFO 0 +#define CMD_OFFLINE 1 +#define CMD_ONLINE 2 +#define CMD_REMOVE 3 + +#define CACHE_NEW 0 +#define CACHE_REFERENCED 1 +#define CACHE_STALE 2 + +#define MPXIO_MSG_CACHEFAIL gettext("Internal analysis failure.") +#define MPXIO_MSG_LASTPATH gettext("Last path to busy resources.") +#define MPXIO_MSG_USAGE gettext("SCSI Multipathing PHCI (%s)") +#define MPXIO_MSG_USAGEUNKNOWN gettext("SCSI Multipathing PHCI (<unknown>)") + +typedef struct { + char *path; + di_path_state_t state; +} phci_t; + +typedef struct phci_list { + phci_t phci; + int referenced; + struct phci_list *next; +} phci_list_t; + +typedef struct group { + int offline; + int nphcis; + int nclients; + phci_t *phcis; + char **clients; + struct group *next; +} group_t; + +static int mpxio_register(rcm_handle_t *); +static int mpxio_unregister(rcm_handle_t *); +static int mpxio_getinfo(rcm_handle_t *, char *, id_t, uint_t, char **, char **, + nvlist_t *, rcm_info_t **); +static int mpxio_suspend(rcm_handle_t *, char *, id_t, timespec_t *, uint_t, + char **, rcm_info_t **); +static int mpxio_resume(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int mpxio_offline(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int mpxio_online(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int mpxio_remove(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int get_nclients(di_node_t, void *); +static int build_groups(di_node_t, void *); +static void refresh_regs(rcm_handle_t *); +static int get_affected_clients(rcm_handle_t *, char *, int, int, char ***); +static int detect_client_change(rcm_handle_t *, int, int, group_t *, char *); +static int merge_clients(int *, char ***, group_t *); +static phci_list_t *lookup_phci(char *); +static int is_client(di_node_t); +static char *get_rsrcname(di_node_t); +static char *s_state(di_path_state_t); +static int compare_phci(const void *, const void *); +static void free_grouplist(); +static void free_group(group_t *); +static void free_clients(int, char **); +static void free_phcis(int, phci_t *); + +static struct rcm_mod_ops mpxio_ops = +{ + RCM_MOD_OPS_VERSION, + mpxio_register, + mpxio_unregister, + mpxio_getinfo, + mpxio_suspend, + mpxio_resume, + mpxio_offline, + mpxio_online, + mpxio_remove, + NULL, + NULL, + NULL +}; + +static group_t *group_list; +static phci_list_t *reg_list; +static mutex_t mpxio_lock; + +extern int errno; + +/* + * Return the mod-ops vector for initialization. + */ +struct rcm_mod_ops * +rcm_mod_init() +{ + rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_init()\n"); + + return (&mpxio_ops); +} + +/* + * Return name and version number for mod_info. + */ +const char * +rcm_mod_info() +{ + rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_info()\n"); + + return (gettext("RCM MPxIO module %I%")); +} + +/* + * Destroy the cache and mutex lock when being unloaded. + */ +int +rcm_mod_fini() +{ + phci_list_t *reg; + phci_list_t *next; + + rcm_log_message(RCM_TRACE1, "MPXIO: rcm_mod_fini()\n"); + + /* Free the cache of MPxIO group information */ + free_grouplist(); + + /* Free the cache of registrants */ + reg = reg_list; + while (reg) { + next = reg->next; + free(reg->phci.path); + free(reg); + reg = next; + } + + /* Destroy the mutex for locking the caches */ + (void) mutex_destroy(&mpxio_lock); + + return (RCM_SUCCESS); +} + +/* + * During each register callback: totally rebuild the group list from a new + * libdevinfo snapshot, and then update the registrants. + */ +static int +mpxio_register(rcm_handle_t *hdl) +{ + int nclients = 0; + di_node_t devroot; + + rcm_log_message(RCM_TRACE1, "MPXIO: register()\n"); + + (void) mutex_lock(&mpxio_lock); + + /* Destroy the previous group list */ + free_grouplist(); + + /* Get a current libdevinfo snapshot */ + if ((devroot = di_init("/", DINFOCPYALL | DINFOPATH)) == DI_NODE_NIL) { + rcm_log_message(RCM_ERROR, + "MPXIO: libdevinfo initialization failed (%s).\n", + strerror(errno)); + (void) mutex_unlock(&mpxio_lock); + return (RCM_FAILURE); + } + + /* + * First count the total number of clients. This'll be a useful + * upper bound when allocating client arrays within each group. + */ + (void) di_walk_node(devroot, DI_WALK_CLDFIRST, &nclients, get_nclients); + + rcm_log_message(RCM_TRACE2, gettext("MPXIO: found %d clients.\n"), + nclients); + + /* + * Then walk the libdevinfo snapshot, building up the new group list + * along the way. Pass in the total number of clients (from above) to + * assist in group construction. + */ + (void) di_walk_node(devroot, DI_WALK_CLDFIRST, &nclients, build_groups); + + /* Now with a new group list constructed, refresh the registrants */ + refresh_regs(hdl); + + /* Free the libdevinfo snapshot */ + di_fini(devroot); + + (void) mutex_unlock(&mpxio_lock); + + return (0); +} + +/* + * Unregister all PHCIs and mark the whole registrants list as stale. + */ +static int +mpxio_unregister(rcm_handle_t *hdl) +{ + phci_list_t *reg; + + rcm_log_message(RCM_TRACE1, "MPXIO: unregister()\n"); + + (void) mutex_lock(&mpxio_lock); + + for (reg = reg_list; reg != NULL; reg = reg->next) { + (void) rcm_unregister_interest(hdl, reg->phci.path, 0); + reg->referenced = CACHE_STALE; + } + + (void) mutex_unlock(&mpxio_lock); + + return (RCM_SUCCESS); +} + +/* + * To return usage information, just lookup the PHCI in the cache and return + * a string identifying that it's a PHCI and describing its cached MPxIO state. + * Recurse with the cached list of disks if dependents are to be included. + */ +static int +mpxio_getinfo(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags, + char **infostr, char **errstr, nvlist_t *props, rcm_info_t **infop) +{ + size_t len; + int rv = RCM_SUCCESS; + char *buf = NULL; + char **clients = NULL; + phci_list_t *reg; + char c; + + rcm_log_message(RCM_TRACE1, "MPXIO: getinfo(%s)\n", rsrc); + + *infostr = NULL; + *errstr = NULL; + + (void) mutex_lock(&mpxio_lock); + + if ((reg = lookup_phci(rsrc)) == NULL) { + *errstr = strdup(MPXIO_MSG_CACHEFAIL); + (void) mutex_unlock(&mpxio_lock); + return (RCM_FAILURE); + } + + len = snprintf(&c, 1, MPXIO_MSG_USAGE, s_state(reg->phci.state)); + buf = calloc(len + 1, sizeof (char)); + if ((buf == NULL) || (snprintf(buf, len + 1, MPXIO_MSG_USAGE, + s_state(reg->phci.state)) > len + 1)) { + *infostr = strdup(MPXIO_MSG_USAGEUNKNOWN); + *errstr = strdup(gettext("Cannot construct usage string.")); + (void) mutex_unlock(&mpxio_lock); + if (buf) + free(buf); + return (RCM_FAILURE); + } + *infostr = buf; + + if (flags & RCM_INCLUDE_DEPENDENT) { + rcm_log_message(RCM_TRACE2, "MPXIO: getting clients\n"); + if (get_affected_clients(hdl, rsrc, CMD_GETINFO, flags, + &clients) < 0) { + *errstr = strdup(gettext("Cannot lookup clients.")); + (void) mutex_unlock(&mpxio_lock); + return (RCM_FAILURE); + } + if (clients) { + rv = rcm_get_info_list(hdl, clients, flags, infop); + free(clients); + } else { + rcm_log_message(RCM_TRACE2, "MPXIO: none found\n"); + } + } + + (void) mutex_unlock(&mpxio_lock); + return (rv); +} + +/* + * Nothing is implemented for suspend operations. + */ +static int +mpxio_suspend(rcm_handle_t *hdl, char *rsrc, id_t id, timespec_t *interval, + uint_t flags, char **errstr, rcm_info_t **infop) +{ + rcm_log_message(RCM_TRACE1, "MPXIO: suspend(%s)\n", rsrc); + + return (RCM_SUCCESS); +} + +/* + * Nothing is implemented for resume operations. + */ +static int +mpxio_resume(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags, + char **errstr, rcm_info_t **infop) +{ + rcm_log_message(RCM_TRACE1, "MPXIO: resume(%s)\n", rsrc); + + return (RCM_SUCCESS); +} + +/* + * MPxIO has no policy against offlining. If disks will be affected, then + * base the return value for this request on the results of offlining the + * list of disks. Otherwise succeed. + */ +static int +mpxio_offline(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags, + char **errstr, rcm_info_t **infop) +{ + char **clients = NULL; + int rv = RCM_SUCCESS; + + rcm_log_message(RCM_TRACE1, "MPXIO: offline(%s)\n", rsrc); + + (void) mutex_lock(&mpxio_lock); + + if (get_affected_clients(hdl, rsrc, CMD_OFFLINE, flags, &clients) < 0) { + *errstr = strdup(gettext("Cannot lookup clients.")); + (void) mutex_unlock(&mpxio_lock); + return (RCM_FAILURE); + } + + if (clients) { + rv = rcm_request_offline_list(hdl, clients, flags, infop); + if (rv != RCM_SUCCESS) + *errstr = strdup(MPXIO_MSG_LASTPATH); + free(clients); + } + + (void) mutex_unlock(&mpxio_lock); + + return (rv); +} + +/* + * If disks are affected, then they are probably offline and we need to + * propagate this online notification to them. + */ +static int +mpxio_online(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags, + char **errstr, rcm_info_t **infop) +{ + char **clients; + int rv = RCM_SUCCESS; + + rcm_log_message(RCM_TRACE1, "MPXIO: online(%s)\n", rsrc); + + (void) mutex_lock(&mpxio_lock); + + if (get_affected_clients(hdl, rsrc, CMD_ONLINE, flags, &clients) < 0) { + *errstr = strdup(gettext("Cannot lookup clients.")); + (void) mutex_unlock(&mpxio_lock); + return (RCM_FAILURE); + } + + if (clients) { + rv = rcm_notify_online_list(hdl, clients, flags, infop); + free(clients); + } + + (void) mutex_unlock(&mpxio_lock); + + return (rv); +} + +/* + * If clients are affected, then they are probably offline and we need to + * propagate this removal notification to them. We can also remove the + * cache entry for this PHCI. If that leaves its group empty, then the + * group will be removed during the next register callback. + */ +static int +mpxio_remove(rcm_handle_t *hdl, char *rsrc, id_t id, uint_t flags, + char **errstr, rcm_info_t **infop) +{ + char **clients; + int rv = RCM_SUCCESS; + + rcm_log_message(RCM_TRACE1, "MPXIO: remove(%s)\n", rsrc); + + (void) mutex_lock(&mpxio_lock); + + if (get_affected_clients(hdl, rsrc, CMD_REMOVE, flags, &clients) < 0) { + *errstr = strdup(gettext("Cannot lookup clients.")); + (void) mutex_unlock(&mpxio_lock); + return (RCM_FAILURE); + } + + if (clients) { + rv = rcm_notify_remove_list(hdl, clients, flags, infop); + free(clients); + } + + (void) mutex_unlock(&mpxio_lock); + + return (rv); +} + + +/* + * Returns a string representation of a given libdevinfo path state. + */ +static char * +s_state(di_path_state_t state) +{ + switch (state) { + case DI_PATH_STATE_ONLINE: + return ("online"); + case DI_PATH_STATE_OFFLINE: + return ("offline"); + case DI_PATH_STATE_STANDBY: + return ("standby"); + case DI_PATH_STATE_FAULT: + return ("faulted"); + default: + return ("<unknown>"); + } +} + +static int +get_affected_clients(rcm_handle_t *hdl, char *rsrc, int cmd, int flags, + char ***clientsp) +{ + int nclients = 0; + phci_t phci; + group_t *group; + char **clients = NULL; + + /* Build a dummy phci_t for use with bsearch(). */ + phci.path = rsrc; + + /* Analyze the effects upon each group. */ + for (group = group_list; group != NULL; group = group->next) { + + /* If the PHCI isn't in the group, then no effects. Skip. */ + if (bsearch(&phci, group->phcis, group->nphcis, sizeof (phci_t), + compare_phci) == NULL) + continue; + + /* + * Merge in the clients. All clients are merged in for getinfo + * operations. Otherwise it's contingent upon a state change + * being transferred to the clients as a result of changing + * the PHCI's state. + */ + if ((cmd == CMD_GETINFO) || + detect_client_change(hdl, cmd, flags, group, rsrc)) { + if (merge_clients(&nclients, &clients, group) < 0) { + free_clients(nclients, clients); + return (-1); + } + } + } + + /* Return the array of affected disks */ + *clientsp = clients; + return (0); +} + +/* + * Iterates through the members of a PHCI list, returning the entry + * corresponding to the named PHCI resource. Returns NULL when the lookup + * fails. + */ +static phci_list_t * +lookup_phci(char *rsrc) +{ + phci_list_t *reg; + + for (reg = reg_list; reg != NULL; reg = reg->next) { + if (strcmp(reg->phci.path, rsrc) == 0) + return (reg); + } + + return (NULL); +} + +/* + * Tests whether or not an operation on a specific PHCI resource would affect + * the array of client devices attached to the PHCI's MPxIO group. + * + * Returns: 1 if clients would be affected, 0 if not. + */ +static int +detect_client_change(rcm_handle_t *hdl, int cmd, int flags, group_t *group, + char *rsrc) +{ + int i; + int state; + + /* + * Perform a full set analysis on the set of redundant PHCIs. When + * there are no unaffected and online PHCIs, then changing the state + * of the named PHCI results in a client state change. + */ + for (i = 0; i < group->nphcis; i++) { + + /* Filter the named resource out of the analysis */ + if (strcmp(group->phcis[i].path, rsrc) == 0) + continue; + + /* + * If we find a path that's in the ONLINE or STANDBY state + * that would be left over in the system after completing + * whatever DR or hotplugging operation is in progress, then + * return a 0. + */ + if ((group->phcis[i].state == DI_PATH_STATE_ONLINE) || + (group->phcis[i].state == DI_PATH_STATE_STANDBY)) { + if (rcm_get_rsrcstate(hdl, group->phcis[i].path, &state) + != RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + "MPXIO: Failed to query resource state\n"); + continue; + } + rcm_log_message(RCM_TRACE2, "MPXIO: state of %s: %d\n", + group->phcis[i].path, state); + if (state == RCM_STATE_ONLINE) { + return (0); + } + } + } + + /* + * The analysis above didn't find a redundant path to take over. So + * report that the state of the client resources will change. + */ + return (1); +} + +/* + * Merges the client disks connected to a particular MPxIO group in with a + * previous array of disk clients. The result is to adjust the 'nclients' + * value with the new count of disks in the array, and to adjust the 'disks' + * value to be a larger array of disks including its original contents along + * with the current group's contents merged in. + */ +static int +merge_clients(int *nclients, char ***clientsp, group_t *group) +{ + int i; + int old_nclients; + char **clients_new; + + if (group->nclients) { + old_nclients = *nclients; + *nclients += group->nclients; + clients_new = realloc(*clientsp, + ((*nclients) + 1) * sizeof (char *)); + if (clients_new == NULL) { + rcm_log_message(RCM_ERROR, + "MPXIO: cannot reallocate client array (%s).\n", + strerror(errno)); + return (-1); + } + for (i = old_nclients; i < (*nclients); i++) { + /* + * Don't allocate space for individual disks in the + * merged list. Just make references to the previously + * allocated strings in the group_t structs themselves. + */ + clients_new[i] = group->clients[i - old_nclients]; + } + clients_new[(*nclients)] = NULL; + *clientsp = clients_new; + } + + return (0); +} + +/* + * A libdevinfo di_walk_node() callback. It's passed an integer pointer as an + * argument, and it increments the integer each time it encounters an MPxIO + * client. By initializing the integer to zero and doing a libdevinfo walk with + * this function, the total count of MPxIO clients in the system can be found. + */ +static int +get_nclients(di_node_t dinode, void *arg) +{ + int *nclients = arg; + + if (is_client(dinode)) + (*nclients)++; + + return (DI_WALK_CONTINUE); +} + +/* + * Tests a libdevinfo node to determine if it's an MPxIO client. + * + * Returns: non-zero for true, 0 for false. + */ +static int +is_client(di_node_t dinode) +{ + return (di_path_next_phci(dinode, DI_PATH_NIL) != DI_PATH_NIL); +} + +/* + * After a new group_list has been constructed, this refreshes the RCM + * registrations and the reg_list contents. It uses a clock like algorithm + * with reference bits in the reg_list to know which registrants are new or + * old. + */ +static void +refresh_regs(rcm_handle_t *hdl) +{ + int i; + group_t *group; + phci_list_t *reg; + phci_list_t *prev_reg; + + /* + * First part of the clock-like algorithm: clear reference bits. + */ + for (reg = reg_list; reg != NULL; reg = reg->next) + reg->referenced = CACHE_STALE; + + /* + * Second part of the clock-like algorithm: set the reference bits + * on every registrant that's still active. (Also add new list nodes + * for new registrants.) + */ + for (group = group_list; group != NULL; group = group->next) { + for (i = 0; i < group->nphcis; i++) { + + /* + * If already stale in the registrants list, just set + * its reference bit to REFERENCED and update its state. + */ + if ((reg = lookup_phci(group->phcis[i].path)) != NULL) { + if (reg->referenced == CACHE_STALE) + reg->referenced = CACHE_REFERENCED; + reg->phci.state = group->phcis[i].state; + continue; + } + + /* + * Otherwise, build a new list node and mark it NEW. + */ + reg = (phci_list_t *)calloc(1, sizeof (*reg)); + if (reg == NULL) { + rcm_log_message(RCM_ERROR, + "MPXIO: cannot allocate phci_list (%s).\n", + strerror(errno)); + continue; + } + reg->phci.path = strdup(group->phcis[i].path); + if (reg->phci.path == NULL) { + free(reg); + rcm_log_message(RCM_ERROR, + "MPXIO: cannot allocate phci path (%s).\n", + strerror(errno)); + continue; + } + reg->phci.state = group->phcis[i].state; + reg->referenced = CACHE_NEW; + + /* Link it at the head of reg_list */ + reg->next = reg_list; + reg_list = reg; + } + } + + /* + * Final part of the clock algorithm: unregister stale entries, and + * register new entries. Stale entries get removed from the list. + */ + reg = reg_list; + prev_reg = NULL; + while (reg) { + + /* Unregister and remove stale entries. */ + if (reg->referenced == CACHE_STALE) { + (void) rcm_unregister_interest(hdl, reg->phci.path, 0); + free(reg->phci.path); + if (prev_reg == NULL) { + reg_list = reg->next; + free(reg); + reg = reg_list; + } else { + prev_reg->next = reg->next; + free(reg); + reg = prev_reg->next; + } + continue; + } + + /* Register new entries. */ + if (reg->referenced == CACHE_NEW) { + if (rcm_register_interest(hdl, reg->phci.path, 0, NULL) + != RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + "MPXIO: failed to register %s (%s).\n", + reg->phci.path, strerror(errno)); + } + } + + prev_reg = reg; + reg = reg->next; + } +} + + +/* + * A libdevinfo di_walk_node() callback that builds up the MPxIO group list. + * + * Every node encountered that's a client node is added into a group's client + * list. Whenever a group doesn't already exist with a matching set of + * related PHCIs, then a new group is constructed and put at the head of the + * group list. + */ +static int +build_groups(di_node_t dinode, void *arg) +{ + int i = 0; + int nphcis = 0; + int *nclients = (int *)arg; + phci_t *phcis; + group_t *group; + di_node_t phcinode; + di_path_t dipath = DI_PATH_NIL; + + /* Safety check */ + if (nclients == NULL) + return (DI_WALK_TERMINATE); + + /* + * Build a sorted array of PHCIs pertaining to the client. + */ + while ((dipath = di_path_next_phci(dinode, dipath)) != DI_PATH_NIL) + nphcis++; + + /* Skip non-clients. */ + if (nphcis == 0) + return (DI_WALK_CONTINUE); + + if ((phcis = (phci_t *)calloc(nphcis, sizeof (phci_t))) == NULL) { + rcm_log_message(RCM_ERROR, + "MPXIO: failed to allocate client's PHCIs (%s).\n", + strerror(errno)); + return (DI_WALK_TERMINATE); + } + while ((dipath = di_path_next_phci(dinode, dipath)) != DI_PATH_NIL) { + phcinode = di_path_phci_node(dipath); + if (phcinode == DI_NODE_NIL) { + free_phcis(i, phcis); /* free preceeding PHCIs */ + rcm_log_message(RCM_ERROR, + "MPXIO: client appears to have no PHCIs.\n"); + return (DI_WALK_TERMINATE); + } + if ((phcis[i].path = get_rsrcname(phcinode)) == NULL) { + free_phcis(i, phcis); + return (DI_WALK_TERMINATE); + } + phcis[i].state = di_path_state(dipath); + i++; + } + qsort(phcis, nphcis, sizeof (phci_t), compare_phci); + + /* + * Compare that PHCI set to each existing group's set. We just add + * the client to the group and exit successfully once a match is made. + * Falling out of this loop means no match was found. + */ + for (group = group_list; group != NULL; group = group->next) { + + /* There is no match if the number of PHCIs is inequal */ + if (nphcis != group->nphcis) + continue; + + /* Compare the PHCIs linearly (which is okay; they're sorted) */ + for (i = 0; i < nphcis; i++) + if (strcmp(phcis[i].path, group->phcis[i].path) != 0) + break; + + /* + * If the loop above completed, we have a match. Add the client + * to the group's disk array in that case, and return + * successfully. + */ + if (i == nphcis) { + free_phcis(nphcis, phcis); + if ((group->clients[group->nclients] = + get_rsrcname(dinode)) == NULL) + return (DI_WALK_TERMINATE); + group->nclients++; + return (DI_WALK_CONTINUE); + } + } + + /* The loop above didn't find a match. So build a new group. */ + if ((group = (group_t *)calloc(1, sizeof (*group))) == NULL) { + rcm_log_message(RCM_ERROR, + "MPXIO: failed to allocate PHCI group (%s).\n", + strerror(errno)); + free_phcis(nphcis, phcis); + return (DI_WALK_TERMINATE); + } + if ((group->clients = (char **)calloc(*nclients, sizeof (char *))) == + NULL) { + free(group); + free_phcis(nphcis, phcis); + return (DI_WALK_TERMINATE); + } + group->nphcis = nphcis; + group->phcis = phcis; + if ((group->clients[0] = get_rsrcname(dinode)) == NULL) { + free_group(group); + return (DI_WALK_TERMINATE); + } + group->nclients = 1; + + /* Link the group into the group list and return successfully. */ + group->next = group_list; + group_list = group; + return (DI_WALK_CONTINUE); +} + +/* + * For bsearch() and qsort(). Returns the results of a strcmp() on the names + * of two phci_t's. + */ +static int +compare_phci(const void *arg1, const void *arg2) +{ + phci_t *p1 = (phci_t *)arg1; + phci_t *p2 = (phci_t *)arg2; + + if ((p1 == NULL) || (p2 == NULL)) { + if (p1 != NULL) + return (-1); + else if (p2 != NULL) + return (1); + return (0); + } + + return (strcmp(p1->path, p2->path)); +} + +/* + * Free the whole list of group's in the global group_list. + */ +static void +free_grouplist() +{ + group_t *group = group_list; + group_t *next; + + while (group) { + next = group->next; + free_group(group); + group = next; + } + + group_list = NULL; +} + +/* + * Free the contents of a single group_t. + */ +static void +free_group(group_t *group) +{ + if (group) { + free_phcis(group->nphcis, group->phcis); + free_clients(group->nclients, group->clients); + free(group); + } +} + +/* + * Free an array of clients. + */ +static void +free_clients(int nclients, char **clients) +{ + int i; + + if (clients != NULL) { + if (nclients > 0) { + for (i = 0; i < nclients; i++) + if (clients[i]) + free(clients[i]); + } + free(clients); + } +} + +/* + * Free an array of phci_t's. + */ +static void +free_phcis(int nphcis, phci_t *phcis) +{ + int i; + + if ((phcis != NULL) && (nphcis > 0)) { + for (i = 0; i < nphcis; i++) + if (phcis[i].path) + free(phcis[i].path); + free(phcis); + } +} + +/* + * Converts a libdevinfo node into a /devices path. Caller must free results. + */ +static char * +get_rsrcname(di_node_t dinode) +{ + int len; + char *rsrcname; + char *devfspath; + char name[MAXPATHLEN]; + + if ((devfspath = di_devfs_path(dinode)) == NULL) { + rcm_log_message(RCM_ERROR, "MPXIO: resource has null path.\n"); + return (NULL); + } + + len = snprintf(name, sizeof (name), "/devices%s", devfspath); + di_devfs_path_free(devfspath); + if (len >= sizeof (name)) { + rcm_log_message(RCM_ERROR, "MPXIO: resource path too long.\n"); + return (NULL); + } + + if ((rsrcname = strdup(name)) == NULL) + rcm_log_message(RCM_ERROR, + "MPXIO: failed to allocate resource name (%s).\n", + strerror(errno)); + + return (rsrcname); +} diff --git a/usr/src/cmd/rcm_daemon/common/network_rcm.c b/usr/src/cmd/rcm_daemon/common/network_rcm.c new file mode 100644 index 0000000000..6072ff510a --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/network_rcm.c @@ -0,0 +1,909 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * This RCM module adds support to the RCM framework for an abstract + * namespace for network devices (DLPI providers). + */ +#include <alloca.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <assert.h> +#include <string.h> +#include <synch.h> +#include <libintl.h> +#include <errno.h> +#include <libdevinfo.h> +#include <ctype.h> +#include <sys/types.h> +#include <libdlpi.h> +#include "rcm_module.h" + +/* + * Definitions + */ +#ifndef lint +#define _(x) gettext(x) +#else +#define _(x) x +#endif + +#define CACHE_STALE 1 /* flags */ +#define CACHE_NEW 2 /* flags */ + +/* operations */ +#define NET_OFFLINE 1 +#define NET_ONLINE 2 +#define NET_REMOVE 3 +#define NET_SUSPEND 4 +#define NET_RESUME 5 + +/* + * PSARC decided that DLPI providers are not allowed to end in a digit. + * If this ever changes we could add a delimiter with this macro. + */ +#define NET_DELIMITER "" + +#define DLD_NAME "dld" + +typedef struct net_cache +{ + char *resource; + char *exported; + char *driver; + int ppa; + int flags; + struct net_cache *next; + struct net_cache *prev; +} net_cache_t; + +static net_cache_t cache_head; +static net_cache_t cache_tail; +static mutex_t cache_lock; +static int events_registered = 0; + +/* module interface routines */ +static int net_register(rcm_handle_t *); +static int net_unregister(rcm_handle_t *); +static int net_getinfo(rcm_handle_t *, char *, id_t, uint_t, char **, + char **, nvlist_t *, rcm_info_t **); +static int net_suspend(rcm_handle_t *, char *, id_t, timespec_t *, + uint_t, char **, rcm_info_t **); +static int net_resume(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int net_offline(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int net_online(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int net_remove(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int net_notify_event(rcm_handle_t *, char *, id_t, uint_t, + char **, nvlist_t *, rcm_info_t **); + +/* module private routines */ +static void free_cache(void); +static void update_cache(rcm_handle_t *hd); +static int devfs_entry(di_node_t node, di_minor_t minor, void *arg); +static void cache_remove(net_cache_t *node); +static net_cache_t *cache_lookup(const char *resource); +static void free_node(net_cache_t *); +static void cache_insert(net_cache_t *); + +/* + * Module-Private data + */ +static struct rcm_mod_ops net_ops = { + RCM_MOD_OPS_VERSION, + net_register, + net_unregister, + net_getinfo, + net_suspend, + net_resume, + net_offline, + net_online, + net_remove, + NULL, /* request_capacity_change */ + NULL, /* notify_capacity_change */ + net_notify_event +}; + +/* + * Module Interface Routines + */ + +/* + * rcm_mod_init() + * + * Update registrations, and return the ops structure. + */ +struct rcm_mod_ops * +rcm_mod_init(void) +{ + cache_head.next = &cache_tail; + cache_head.prev = NULL; + cache_tail.prev = &cache_head; + cache_tail.next = NULL; + (void) mutex_init(&cache_lock, NULL, NULL); + + /* Return the ops vectors */ + return (&net_ops); +} + +/* + * rcm_mod_info() + * + * Return a string describing this module. + */ +const char * +rcm_mod_info(void) +{ + return ("Network namespace module %I%"); +} + +/* + * rcm_mod_fini() + * + * Destroy the cache. + */ +int +rcm_mod_fini(void) +{ + free_cache(); + (void) mutex_destroy(&cache_lock); + return (RCM_SUCCESS); +} + +/* + * net_register() + * + * Make sure the cache is properly sync'ed, and its registrations + * are in order. + * + * Locking: the cache is locked by update_cache, and is held + * throughout update_cache's execution because it reads and + * possibly modifies cache links continuously. + */ +static int +net_register(rcm_handle_t *hd) +{ + if (events_registered == 0) { + (void) rcm_register_event(hd, "SUNW_resource/new", 0, NULL); + events_registered++; + } + update_cache(hd); + return (RCM_SUCCESS); +} + +/* + * net_unregister() + * + * Manually walk through the cache, unregistering all the networks. + * + * Locking: the cache is locked throughout the execution of this routine + * because it reads and modifies cache links continuously. + */ +static int +net_unregister(rcm_handle_t *hd) +{ + net_cache_t *probe; + + assert(hd != NULL); + + /* Walk the cache, unregistering everything */ + (void) mutex_lock(&cache_lock); + probe = cache_head.next; + while (probe != &cache_tail) { + (void) rcm_unregister_interest(hd, probe->resource, 0); + cache_remove(probe); + free_node(probe); + probe = cache_head.next; + } + (void) mutex_unlock(&cache_lock); + if (events_registered > 0) { + (void) rcm_unregister_event(hd, "SUNW_resource/new", 0); + events_registered--; + } + return (RCM_SUCCESS); +} + +/* + * Since all we do is pass operations thru, we provide a general + * routine for passing through operations. + */ +/*ARGSUSED*/ +static int +net_passthru(rcm_handle_t *hd, int op, const char *rsrc, uint_t flag, + char **reason, rcm_info_t **dependent_reason, void *arg) +{ + net_cache_t *node; + char *exported; + int rv; + + /* + * Lock the cache just long enough to extract information about this + * resource. + */ + (void) mutex_lock(&cache_lock); + node = cache_lookup(rsrc); + if (!node) { + rcm_log_message(RCM_WARNING, + _("NET: unrecognized resource %s\n"), rsrc); + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + /* + * Since node->exported could be freed after we drop cache_lock, + * allocate a stack-local copy. We don't use strdup() because some of + * the operations (such as NET_REMOVE) are not allowed to fail. Note + * that node->exported is never more than MAXPATHLEN bytes. + */ + exported = alloca(strlen(node->exported) + 1); + (void) strlcpy(exported, node->exported, strlen(node->exported) + 1); + + /* + * Remove notifications are unconditional in the RCM state model, + * so it's safe to remove the node from the cache at this point. + * And we need to remove it so that we will recognize it as a new + * resource following the reattachment of the resource. + */ + if (op == NET_REMOVE) { + cache_remove(node); + free_node(node); + } + (void) mutex_unlock(&cache_lock); + + switch (op) { + case NET_SUSPEND: + rv = rcm_request_suspend(hd, exported, flag, + (timespec_t *)arg, dependent_reason); + break; + case NET_OFFLINE: + rv = rcm_request_offline(hd, exported, flag, dependent_reason); + break; + case NET_ONLINE: + rv = rcm_notify_online(hd, exported, flag, dependent_reason); + break; + case NET_REMOVE: + rv = rcm_notify_remove(hd, exported, flag, dependent_reason); + break; + case NET_RESUME: + rv = rcm_notify_resume(hd, exported, flag, dependent_reason); + break; + default: + rcm_log_message(RCM_WARNING, + _("NET: bad RCM operation %1$d for %2$s\n"), op, exported); + errno = EINVAL; + return (RCM_FAILURE); + } + + if (rv != RCM_SUCCESS) { + char format[256]; + (void) snprintf(format, sizeof (format), + _("RCM operation on dependent %s did not succeed"), + exported); + rcm_log_message(RCM_WARNING, "NET: %s\n", format); + } + + return (rv); +} + + +/* + * net_offline() + * + * Determine dependents of the resource being offlined, and offline + * them all. + */ +static int +net_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **reason, rcm_info_t **dependent_reason) +{ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(reason != NULL); + assert(dependent_reason != NULL); + + rcm_log_message(RCM_TRACE1, "NET: offline(%s)\n", rsrc); + + return (net_passthru(hd, NET_OFFLINE, rsrc, flags, reason, + dependent_reason, NULL)); +} + +/* + * net_online() + * + * Remount the previously offlined filesystem, and online its dependents. + */ +static int +net_online(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **reason, + rcm_info_t **dependent_reason) +{ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + + rcm_log_message(RCM_TRACE1, "NET: online(%s)\n", rsrc); + + return (net_passthru(hd, NET_ONLINE, rsrc, flag, reason, + dependent_reason, NULL)); +} + +/* + * net_getinfo() + * + * Gather usage information for this resource. + * + * Locking: the cache is locked while this routine looks up the + * resource and extracts copies of any piece of information it needs. + * The cache is then unlocked, and this routine performs the rest of + * its functions without touching any part of the cache. + */ +/*ARGSUSED*/ +static int +net_getinfo(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, + char **info, char **errstr, nvlist_t *proplist, rcm_info_t **depend_info) +{ + int len; + char *exported; + char nic[64]; + const char *info_fmt; + net_cache_t *node; + + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(info != NULL); + assert(depend_info != NULL); + + rcm_log_message(RCM_TRACE1, "NET: getinfo(%s)\n", rsrc); + + info_fmt = _("Network interface %s"); + + (void) mutex_lock(&cache_lock); + node = cache_lookup(rsrc); + if (!node) { + rcm_log_message(RCM_WARNING, + _("NET: unrecognized resource %s\n"), rsrc); + (void) mutex_unlock(&cache_lock); + errno = ENOENT; + return (RCM_FAILURE); + } + exported = strdup(node->exported); + if (!exported) { + rcm_log_message(RCM_ERROR, _("NET: strdup failure")); + (void) mutex_unlock(&cache_lock); + return (RCM_FAILURE); + } + + (void) snprintf(nic, sizeof (nic), "%s%d", node->driver, node->ppa); + (void) mutex_unlock(&cache_lock); + + len = strlen(info_fmt) + strlen(nic) + 1; + if ((*info = (char *)malloc(len)) == NULL) { + rcm_log_message(RCM_ERROR, _("NET: malloc failure")); + free(exported); + return (RCM_FAILURE); + } + + /* Fill in the string */ + (void) snprintf(*info, len, info_fmt, nic); + + /* Get dependent info if requested */ + if ((flag & RCM_INCLUDE_DEPENDENT) || (flag & RCM_INCLUDE_SUBTREE)) { + (void) rcm_get_info(hd, exported, flag, depend_info); + } + + (void) nvlist_add_string(proplist, RCM_CLIENT_NAME, "SunOS"); + (void) nvlist_add_string_array(proplist, RCM_CLIENT_EXPORTS, + &exported, 1); + + free(exported); + return (RCM_SUCCESS); +} + +/* + * net_suspend() + * + * Notify all dependents that the resource is being suspended. + * Since no real operation is involved, QUERY or not doesn't matter. + * + * Locking: the cache is only used to retrieve some information about + * this resource, so it is only locked during that retrieval. + */ +static int +net_suspend(rcm_handle_t *hd, char *rsrc, id_t id, timespec_t *interval, + uint_t flag, char **reason, rcm_info_t **dependent_reason) +{ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(interval != NULL); + assert(reason != NULL); + assert(dependent_reason != NULL); + + rcm_log_message(RCM_TRACE1, "NET: suspend(%s)\n", rsrc); + + return (net_passthru(hd, NET_SUSPEND, rsrc, flag, reason, + dependent_reason, (void *)interval)); +} + +/* + * net_resume() + * + * Resume all the dependents of a suspended network. + * + * Locking: the cache is only used to retrieve some information about + * this resource, so it is only locked during that retrieval. + */ +static int +net_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **info, + rcm_info_t **dependent_info) +{ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(info != NULL); + assert(dependent_info != NULL); + + rcm_log_message(RCM_TRACE1, "NET: resume(%s)\n", rsrc); + + return (net_passthru(hd, NET_RESUME, rsrc, flag, info, dependent_info, + NULL)); +} + +/* + * net_remove() + * + * This is another NO-OP for us, we just passthru the information. We + * don't need to remove it from our cache. We don't unregister + * interest at this point either; the network device name is still + * around. This way we don't have to change this logic when we + * gain the ability to learn about DR attach operations. + */ +static int +net_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **info, + rcm_info_t **dependent_info) +{ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(info != NULL); + assert(dependent_info != NULL); + + rcm_log_message(RCM_TRACE1, "NET: remove(%s)\n", rsrc); + + return (net_passthru(hd, NET_REMOVE, rsrc, flag, info, dependent_info, + NULL)); +} + +/* + * net_notify_event() + * + * Receive new resource events. If the resource is a network + * device, then pass up a notify for it too. No need to cache + * it, though, since we'll do that in our register() routine the + * next time we're called. + */ +/*ARGSUSED*/ +static int +net_notify_event(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, + char **errstr, nvlist_t *nvl, rcm_info_t **result) +{ + char *devname = NULL, *nodetype, *driver, *kpath; + char ifname[MAXPATHLEN]; + di_node_t node; + di_minor_t minor; + nvlist_t *nvlist; + nvpair_t *nvp = NULL; + int rv; + + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(nvl != NULL); + assert(result != NULL); + + rcm_log_message(RCM_TRACE1, "NET: notify_event(%s)\n", rsrc); + + if (strcmp(rsrc, "SUNW_resource/new") != 0) { + /* how did we get this? we didn't ask for it! */ + rcm_log_message(RCM_WARNING, + _("NET: unrecognized event for %s\n"), rsrc); + return (RCM_FAILURE); + } + + /* is it a /devices resource? */ + /* + * note: we'd like to use nvlist_lookup_string, but a bug in + * libnvpair breaks lookups, so we have to walk it ourself. + */ +#ifdef NVLIST_LOOKUP_NOTBROKEN + if (nvlist_lookup_string(nvl, RCM_RSRCNAME, &devname) != 0) { + /* resource not found */ + rcm_log_message(RCM_WARNING, + _("NET: event without resource name\n")); + return (RCM_FAILURE); + } +#else + while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { + if (strcmp(nvpair_name(nvp), RCM_RSRCNAME) == 0) { + if (nvpair_value_string(nvp, &devname) != 0) { + rcm_log_message(RCM_WARNING, + _("NET: cannot get event " + "resource value\n")); + return (RCM_FAILURE); + } + break; + } + } + if (devname == NULL) { + rcm_log_message(RCM_WARNING, + _("NET: event without resource name\n")); + return (RCM_FAILURE); + } +#endif + rcm_log_message(RCM_TRACE1, "NET: new rsrc(%s)\n", devname); + if (strncmp(devname, "/devices/", strlen("/devices/")) != 0) { + /* not a /devices resource, we ignore it */ + rcm_log_message(RCM_TRACE1, "NET: %s not for us\n", devname); + return (RCM_SUCCESS); + } + kpath = devname + strlen("/devices"); + if (strncmp(kpath, "/pseudo/", strlen("/pseudo/")) == 0) { + /* pseudo device , not for us */ + rcm_log_message(RCM_TRACE1, "NET: ignoring pseudo %s\n", + devname); + return (RCM_SUCCESS); + } + + /* just snapshot the specific tree we need */ + if ((node = di_init(kpath, DINFOMINOR)) == NULL) { + rcm_log_message(RCM_ERROR, + _("NET: cannot initialize device tree\n")); + return (RCM_FAILURE); + } + + /* network devices usually only have a single minor node */ + if ((minor = di_minor_next(node, DI_MINOR_NIL)) == DI_MINOR_NIL) { + rcm_log_message(RCM_WARNING, + _("NET: cannot find minor for %s\n"), + devname); + di_fini(node); + return (RCM_FAILURE); + } + + nodetype = di_minor_nodetype(minor); + if ((nodetype == NULL) || (strcmp(nodetype, DDI_NT_NET) != 0)) { + /* doesn't look like a network device */ + rcm_log_message(RCM_TRACE1, "NET: %s not a NIC\n", devname); + goto done; + } + if ((driver = di_driver_name(node)) == NULL) { + rcm_log_message(RCM_TRACE1, "NET: no driver (%s)\n", devname); + goto done; + } + (void) snprintf(ifname, sizeof (ifname), "SUNW_network/%s%s%d", driver, + NET_DELIMITER, di_instance(node)); + + rcm_log_message(RCM_TRACE1, "NET: notifying arrival of %s\n", ifname); + /* build up our nvlist -- these shouldn't ever fail */ + if ((rv = nvlist_alloc(&nvlist, NV_UNIQUE_NAME, 0)) != 0) { + rcm_log_message(RCM_TRACE1, + "NET: nvlist alloc failed %d, errno %d\n", rv, errno); + } + + if ((rv = nvlist_add_string(nvlist, RCM_RSRCNAME, ifname)) != 0) { + rcm_log_message(RCM_TRACE1, + "NET: nvlist_add_string failed %d, errno %d\n", rv, errno); + } + /* now we need to do our own notification */ + rv = rcm_notify_event(hd, "SUNW_resource/new", 0, nvlist, result); + if (rv != RCM_SUCCESS) { + rcm_log_message(RCM_TRACE1, + "NET: notify_event failed: %s\n", strerror(errno)); + } else { + rcm_log_message(RCM_TRACE1, "NET: notify_event succeeded\n"); + } + + /* and clean up our nvlist */ + nvlist_free(nvlist); + +done: + di_fini(node); + return (RCM_SUCCESS); +} + +/* + * Cache management routines. Note that the cache is implemented as a + * trivial linked list, and is only required because RCM doesn't + * provide enough state about our own registrations back to us. This + * linked list implementation probably clobbers the CPU cache pretty + * well. + */ + +/* + * cache_lookup() + * + * Get a cache node for a resource. Call with cache lock held. + */ +static net_cache_t * +cache_lookup(const char *resource) +{ + net_cache_t *probe; + probe = cache_head.next; + while (probe != &cache_tail) { + if (probe->resource && + (strcmp(resource, probe->resource) == 0)) { + return (probe); + } + probe = probe->next; + } + return (NULL); +} + +/* + * free_node() + * + * Free a node. Make sure it isn't in the list! + */ +static void +free_node(net_cache_t *node) +{ + if (node) { + free(node->resource); + free(node->exported); + free(node->driver); + free(node); + } +} + +/* + * cache_insert() + * + * Call with the cache_lock held. + */ +static void +cache_insert(net_cache_t *node) +{ + /* insert at the head for best performance */ + node->next = cache_head.next; + node->prev = &cache_head; + + node->next->prev = node; + node->prev->next = node; +} + +/* + * cache_remove() + * + * Call with the cache_lock held. + */ +static void +cache_remove(net_cache_t *node) +{ + node->next->prev = node->prev; + node->prev->next = node->next; + node->next = NULL; + node->prev = NULL; +} + +/* + * devfs_entry() + * + * Call with the cache_lock held. + */ +/*ARGSUSED*/ +static int +devfs_entry(di_node_t node, di_minor_t minor, void *arg) +{ + char ifname [MAXPATHLEN]; /* should be big enough! */ + char *devfspath; + char resource[MAXPATHLEN]; + char dev_name[MAXPATHLEN]; + char *name; + char *cp; + int instance; + net_cache_t *probe; + + cp = di_minor_nodetype(minor); + if ((cp == NULL) || (strcmp(cp, DDI_NT_NET))) { + /* doesn't look like a network device */ + return (DI_WALK_CONTINUE); + } + + name = di_driver_name(node); + if (name == NULL) { + /* what else can we do? */ + return (DI_WALK_CONTINUE); + } + + instance = di_instance(node); + + devfspath = di_devfs_path(node); + if (!devfspath) { + /* no devfs path?!? */ + rcm_log_message(RCM_DEBUG, "NET: missing devfs path\n"); + return (DI_WALK_CONTINUE); + } + + if (strncmp("/pseudo", devfspath, strlen("/pseudo")) == 0) { + char *minor_name; + + if (strcmp(DLD_NAME, name) != 0) { + /* ignore pseudo devices, probably not really NICs */ + rcm_log_message(RCM_DEBUG, "NET: ignoring pseudo " + "device %s\n", devfspath); + di_devfs_path_free(devfspath); + return (DI_WALK_CONTINUE); + } + + /* we have a virtual datalink created by dld */ + di_devfs_path_free(devfspath); + devfspath = di_devfs_minor_path(minor); + rcm_log_message(RCM_DEBUG, "NET: virtual datalink \"%s\"\n", + devfspath); + + minor_name = di_minor_name(minor); + if (dlpi_if_parse(minor_name, dev_name, &instance) < 0 || + instance < 0) { + rcm_log_message(RCM_DEBUG, "NET: ignoring \"%s\" " + "(style 1)\n", devfspath); + di_devfs_path_free(devfspath); + return (DI_WALK_CONTINUE); + } + name = dev_name; + } + + (void) snprintf(resource, sizeof (resource), "/devices%s", devfspath); + di_devfs_path_free(devfspath); + + (void) snprintf(ifname, sizeof (ifname), "SUNW_network/%s%s%d", + name, NET_DELIMITER, instance); + + probe = cache_lookup(resource); + if (probe != NULL) { + rcm_log_message(RCM_DEBUG, "NET: %s already registered\n", + resource); + probe->flags &= ~(CACHE_STALE); + } else { + rcm_log_message(RCM_DEBUG, "NET: %s is new resource\n", + resource); + probe = calloc(1, sizeof (net_cache_t)); + if (!probe) { + rcm_log_message(RCM_ERROR, _("NET: malloc failure")); + return (DI_WALK_CONTINUE); + } + + probe->resource = strdup(resource); + probe->ppa = instance; + probe->driver = strdup(name); + probe->exported = strdup(ifname); + + if ((!probe->resource) || (!probe->exported) || + (!probe->driver)) { + free_node(probe); + return (DI_WALK_CONTINUE); + } + + probe->flags |= CACHE_NEW; + cache_insert(probe); + } + + return (DI_WALK_CONTINUE); +} + +/* + * update_cache() + * + * The devinfo tree walking code is lifted from ifconfig.c. + */ +static void +update_cache(rcm_handle_t *hd) +{ + net_cache_t *probe; + di_node_t root; + int rv; + + (void) mutex_lock(&cache_lock); + + /* first we walk the entire cache, marking each entry stale */ + probe = cache_head.next; + while (probe != &cache_tail) { + probe->flags |= CACHE_STALE; + probe = probe->next; + } + + root = di_init("/", DINFOSUBTREE | DINFOMINOR); + if (root == DI_NODE_NIL) { + goto done; + } + + (void) di_walk_minor(root, DDI_NT_NET, DI_CHECK_ALIAS, NULL, + devfs_entry); + + di_fini(root); + + probe = cache_head.next; + while (probe != &cache_tail) { + net_cache_t *freeit; + if (probe->flags & CACHE_STALE) { + (void) rcm_unregister_interest(hd, probe->resource, 0); + rcm_log_message(RCM_DEBUG, "NET: unregistered %s\n", + probe->resource); + freeit = probe; + probe = probe->next; + cache_remove(freeit); + free_node(freeit); + continue; + } + + if (!(probe->flags & CACHE_NEW)) { + probe = probe->next; + continue; + } + + rcm_log_message(RCM_DEBUG, "NET: registering %s\n", + probe->resource); + rv = rcm_register_interest(hd, probe->resource, 0, NULL); + if (rv != RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + _("NET: failed to register %s\n"), + probe->resource); + } else { + rcm_log_message(RCM_DEBUG, + "NET: registered %s (as %s)\n", + probe->resource, probe->exported); + probe->flags &= ~(CACHE_NEW); + } + probe = probe->next; + } + +done: + (void) mutex_unlock(&cache_lock); +} + +/* + * free_cache() + */ +static void +free_cache(void) +{ + net_cache_t *probe; + + (void) mutex_lock(&cache_lock); + probe = cache_head.next; + while (probe != &cache_tail) { + cache_remove(probe); + free_node(probe); + probe = cache_head.next; + } + (void) mutex_unlock(&cache_lock); +} diff --git a/usr/src/cmd/rcm_daemon/common/pool_rcm.c b/usr/src/cmd/rcm_daemon/common/pool_rcm.c new file mode 100644 index 0000000000..6a0e920cc0 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/pool_rcm.c @@ -0,0 +1,495 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <unistd.h> +#include <errno.h> +#include <libintl.h> +#include <string.h> +#include <rcm_module.h> +#include <sys/pset.h> + +#include <pool.h> + +/* + * RCM module ops. + */ +static int pool_register(rcm_handle_t *); +static int pool_unregister(rcm_handle_t *); +static int pool_get_info(rcm_handle_t *, char *, id_t, uint_t, char **, + char **, nvlist_t *, rcm_info_t **); +static int pool_request_suspend(rcm_handle_t *, char *, id_t, + timespec_t *, uint_t, char **, rcm_info_t **); +static int pool_notify_resume(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int pool_notify_remove(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int pool_request_offline(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int pool_notify_online(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int pool_request_capacity_change(rcm_handle_t *, char *, id_t, uint_t, + nvlist_t *, char **, rcm_info_t **); +static int pool_notify_capacity_change(rcm_handle_t *, char *, id_t, uint_t, + nvlist_t *, char **, rcm_info_t **); + +/* + * Pool-specific callback functions. + */ +static int pset_validate_remove(nvlist_t *, char **); + +static struct { + const char *rsrc; + int (*capacity_change_cb)(nvlist_t *, char **); +} registrations[] = { + { "SUNW_cpu", pset_validate_remove }, + { NULL, NULL } +}; + +static int registered = 0; + +static struct rcm_mod_ops pool_ops = { + RCM_MOD_OPS_VERSION, + pool_register, + pool_unregister, + pool_get_info, + pool_request_suspend, + pool_notify_resume, + pool_request_offline, + pool_notify_online, + pool_notify_remove, + pool_request_capacity_change, + pool_notify_capacity_change, + NULL +}; + +struct rcm_mod_ops * +rcm_mod_init(void) +{ + rcm_log_message(RCM_TRACE1, "Pools RCM module created\n"); + return (&pool_ops); +} + + +int +rcm_mod_fini(void) +{ + rcm_log_message(RCM_TRACE1, "Pools RCM module unloaded\n"); + return (RCM_SUCCESS); +} + +const char * +rcm_mod_info(void) +{ + return ("Pools RCM module %I%"); +} + +static int +pool_check_pset(pool_conf_t *conf, pool_resource_t *res, + processorid_t *del_cpus, char **errorp) +{ + int64_t tmp; + int i, j; + uint_t num_cpus; + uint64_t min_cpus; + uint_t num_found = 0; + processorid_t *cpulist; + psetid_t psetid; + pool_value_t *pval; + pool_elem_t *elem = pool_resource_to_elem(conf, res); + + if ((pval = pool_value_alloc()) == NULL) + return (-1); + if (pool_get_property(conf, elem, "pset.min", pval) != POC_UINT) { + rcm_log_message(RCM_ERROR, + gettext("POOL: cannot find property 'pset.min' in pset\n")); + pool_value_free(pval); + return (-1); + } + (void) pool_value_get_uint64(pval, &min_cpus); + if (pool_get_property(conf, elem, "pset.sys_id", pval) != POC_INT) { + rcm_log_message(RCM_ERROR, + gettext("POOL: cannot get pset.sys_id\n")); + pool_value_free(pval); + return (-1); + } + (void) pool_value_get_int64(pval, &tmp); + pool_value_free(pval); + psetid = (psetid_t)tmp; + rcm_log_message(RCM_TRACE1, "POOL: checking pset: %d\n", psetid); + + rcm_log_message(RCM_TRACE1, "POOL: min_cpus is %llu\n", min_cpus); + if (pset_info(psetid, NULL, &num_cpus, NULL) != 0) { + rcm_log_message(RCM_ERROR, + gettext("POOL: pset_info(%d) failed: %s\n"), psetid, + strerror(errno)); + return (-1); + } + if ((cpulist = malloc(num_cpus * sizeof (processorid_t))) == NULL) { + rcm_log_message(RCM_ERROR, + gettext("POOL: malloc failed: %s\n"), strerror(errno)); + return (-1); + } + if (pset_info(psetid, NULL, &num_cpus, cpulist) != 0) { + free(cpulist); + rcm_log_message(RCM_ERROR, + gettext("POOL: pset_info(%d) failed: %s\n"), psetid, + strerror(errno)); + return (-1); + } + for (i = 0; del_cpus[i] != -1; i++) + for (j = 0; j < num_cpus; j++) + if (cpulist[j] == del_cpus[i]) + num_found++; + free(cpulist); + if (num_found > 0 && (num_cpus - num_found) < (uint_t)min_cpus) { + int len; + char *errval; + const char *errfmt = + gettext("POOL: processor set (%1$d) would go " + "below its minimum value of %2$u\n"); + + /* + * We would go below the min value. Fail this request. + */ + len = strlen(errfmt) + 4 * 2; /* 4 digits for psetid and min */ + if ((errval = malloc((len + 1) * sizeof (char))) != NULL) { + (void) snprintf(errval, len + 1, errfmt, psetid, + (uint_t)min_cpus); + *errorp = errval; + } + + rcm_log_message(RCM_ERROR, (char *)errfmt, psetid, + (uint_t)min_cpus); + + return (-1); + } + rcm_log_message(RCM_TRACE1, "POOL: pset %d is fine\n", psetid); + return (0); +} + +/* + * pset_validate_remove() + * Check to see if the requested cpu removal would be acceptable. + * Returns RCM_FAILURE if not. + */ +static int +pset_validate_remove(nvlist_t *nvl, char **errorp) +{ + int error = RCM_SUCCESS; + int32_t old_total, new_total, removed_total; + processorid_t *removed_list = NULL; /* list terminated by (-1). */ + processorid_t *old_cpu_list = NULL, *new_cpu_list = NULL; + int i, j; + pool_conf_t *conf; + pool_value_t *pvals[] = { NULL, NULL }; + pool_resource_t **res = NULL; + uint_t nelem; + const char *generic_error = gettext("POOL: Error processing request\n"); + + if ((conf = pool_conf_alloc()) == NULL) + return (RCM_FAILURE); + if (pool_conf_open(conf, pool_dynamic_location(), PO_RDONLY) < 0) { + rcm_log_message(RCM_TRACE1, + "POOL: failed to parse config file: '%s'\n", + pool_dynamic_location()); + pool_conf_free(conf); + return (RCM_SUCCESS); + } + + if ((error = nvlist_lookup_int32(nvl, "old_total", &old_total)) != 0) { + (void) pool_conf_close(conf); + pool_conf_free(conf); + rcm_log_message(RCM_ERROR, + gettext("POOL: unable to find 'old_total' in nvlist: %s\n"), + strerror(error)); + *errorp = strdup(generic_error); + return (RCM_FAILURE); + } + if ((error = nvlist_lookup_int32(nvl, "new_total", &new_total)) != 0) { + (void) pool_conf_close(conf); + pool_conf_free(conf); + rcm_log_message(RCM_ERROR, + gettext("POOL: unable to find 'new_total' in nvlist: %s\n"), + strerror(error)); + *errorp = strdup(generic_error); + return (RCM_FAILURE); + } + if (new_total >= old_total) { + (void) pool_conf_close(conf); + pool_conf_free(conf); + /* + * This doesn't look like a cpu removal. + */ + rcm_log_message(RCM_TRACE1, + gettext("POOL: 'old_total' (%d) is less than 'new_total' " + "(%d)\n"), old_total, new_total); + return (RCM_SUCCESS); + } + if ((removed_list = malloc((old_total - new_total + 1) * sizeof (int))) + == NULL) { + rcm_log_message(RCM_ERROR, + gettext("POOL: malloc failed: %s\n"), strerror(errno)); + + error = RCM_FAILURE; + goto out; + } + if ((error = nvlist_lookup_int32_array(nvl, "old_cpu_list", + &old_cpu_list, &nelem)) != 0) { + rcm_log_message(RCM_ERROR, + gettext("POOL: 'old_cpu_list' not found in nvlist: %s\n"), + strerror(error)); + error = RCM_FAILURE; + goto out; + } + if ((int32_t)nelem != old_total) { + rcm_log_message(RCM_ERROR, + gettext("POOL: 'old_cpu_list' size mismatch: %1$d vs " + "%2$d\n"), nelem, old_total); + error = RCM_FAILURE; + goto out; + } + if ((error = nvlist_lookup_int32_array(nvl, "new_cpu_list", + &new_cpu_list, &nelem)) != 0) { + rcm_log_message(RCM_ERROR, + gettext("POOL: 'new_cpu_list' not found in nvlist: %s\n"), + strerror(error)); + error = RCM_FAILURE; + goto out; + } + if (nelem != new_total) { + rcm_log_message(RCM_ERROR, + gettext("POOL: 'new_cpu_list' size mismatch: %1$d vs " + "%2$d\n"), nelem, new_total); + error = RCM_FAILURE; + goto out; + } + + for (i = 0, removed_total = 0; i < old_total; i++) { + for (j = 0; j < new_total; j++) + if (old_cpu_list[i] == new_cpu_list[j]) + break; + if (j == new_total) /* not found in new_cpu_list */ + removed_list[removed_total++] = old_cpu_list[i]; + } + removed_list[removed_total] = -1; + + if (removed_total != (old_total - new_total)) { + rcm_log_message(RCM_ERROR, + gettext("POOL: error finding removed cpu list\n")); + error = RCM_FAILURE; + goto out; + } + if ((pvals[0] = pool_value_alloc()) == NULL) { + rcm_log_message(RCM_ERROR, gettext("POOL: pool_value_alloc" + " failed: %s\n"), strerror(errno)); + error = RCM_FAILURE; + goto out; + } + /* + * Look for resources with "'type' = 'pset'" + */ + pool_value_set_name(pvals[0], "type"); + pool_value_set_string(pvals[0], "pset"); + if ((res = pool_query_resources(conf, &nelem, pvals)) == NULL) { + rcm_log_message(RCM_ERROR, + gettext("POOL: No psets found in configuration\n")); + pool_value_free(pvals[0]); + error = RCM_FAILURE; + goto out; + } + pool_value_free(pvals[0]); + for (i = 0; res[i] != NULL; i++) + /* + * Ask each pset if removing these cpus would cause it to go + * below it's minimum value. + */ + if (pool_check_pset(conf, res[i], removed_list, errorp) < 0) { + error = RCM_FAILURE; + break; + } + free(res); +out: + if (removed_list) + free(removed_list); + if (conf) { + (void) pool_conf_close(conf); + pool_conf_free(conf); + } + + /* + * Set the error string if not already set. + */ + if (error != RCM_SUCCESS && *errorp == NULL) + *errorp = strdup(generic_error); + return (error); +} + +/* + * Returns RCM_SUCCESS in a number of error cases, since RCM_FAILURE would + * mean that the capacity change would be disallowed by this module, + * which is not what we mean. + */ +static int +pool_request_capacity_change(rcm_handle_t *hdl, char *rsrcname, id_t id, + uint_t flags, nvlist_t *nvlist, char **errorp, rcm_info_t **dependent_info) +{ + int i; + + *errorp = NULL; + rcm_log_message(RCM_TRACE1, + "POOL: requesting capacity change for: %s (flag: %d)\n", + rsrcname, flags); + if (flags & RCM_FORCE) { + rcm_log_message(RCM_TRACE1, + "POOL: Allowing forced operation to pass through...\n"); + return (RCM_SUCCESS); + } + for (i = 0; registrations[i].rsrc != NULL; i++) { + if (strcmp(rsrcname, registrations[i].rsrc) == 0) { + return ((*registrations[i].capacity_change_cb)(nvlist, + errorp)); + } + } + + return (RCM_SUCCESS); +} + +static int +pool_notify_capacity_change(rcm_handle_t *hdl, char *rsrcname, id_t id, + uint_t flags, nvlist_t *nvlist, char **info, rcm_info_t **dependent_info) +{ + rcm_log_message(RCM_TRACE1, + "POOL: notifying capacity change for: %s (flags: %d)\n", + rsrcname, flags); + return (RCM_SUCCESS); +} + +static int +pool_register(rcm_handle_t *hdl) +{ + int i; + + rcm_log_message(RCM_TRACE1, "Registering Pools RCM module\n"); + if (registered) + return (RCM_SUCCESS); + registered++; + for (i = 0; registrations[i].rsrc != NULL; i++) { + if (rcm_register_capacity(hdl, (char *)registrations[i].rsrc, + 0, NULL) != RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + gettext("POOL: failed to register capacity " + "change for '%s'\n"), + registrations[i].rsrc); + } + } + return (RCM_SUCCESS); +} + +static int +pool_unregister(rcm_handle_t *hdl) +{ + int i; + + rcm_log_message(RCM_TRACE1, "Pools RCM un-registered\n"); + if (registered) { + registered--; + for (i = 0; registrations[i].rsrc != NULL; i++) + if (rcm_unregister_capacity(hdl, + (char *)registrations[i].rsrc, 0) != RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + gettext("POOL: unregister capacity failed " + "for '%s'\n"), registrations[i].rsrc); + } + } + return (RCM_SUCCESS); +} + +static int +pool_get_info(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flag, + char **infop, char **errorp, nvlist_t *props, rcm_info_t **dependent_info) +{ + rcm_log_message(RCM_TRACE1, "POOL: RCM get info: '%s'\n", rsrcname); + if ((*infop = strdup(gettext("POOL: In use by pool(4) subsystem"))) + == NULL) { + rcm_log_message(RCM_ERROR, gettext("POOL: get info(%s) malloc " + "failure\n"), rsrcname); + *infop = NULL; + *errorp = NULL; + return (RCM_FAILURE); + } + return (RCM_SUCCESS); +} + + +static int +pool_request_suspend(rcm_handle_t *hdl, char *rsrcname, + id_t id, timespec_t *time, uint_t flags, char **reason, + rcm_info_t **dependent_info) +{ + rcm_log_message(RCM_TRACE1, + "POOL: requesting suspend for: %s\n", rsrcname); + return (RCM_SUCCESS); +} + +static int +pool_notify_resume(rcm_handle_t *hdl, char *rsrcname, + id_t pid, uint_t flags, char **reason, rcm_info_t **dependent_info) +{ + rcm_log_message(RCM_TRACE1, + "POOL: notifying resume of: %s\n", rsrcname); + return (RCM_SUCCESS); +} + +static int +pool_request_offline(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flag, + char **reason, rcm_info_t **dependent_info) +{ + rcm_log_message(RCM_TRACE1, + "POOL: requesting offline for: %s\n", rsrcname); + return (RCM_SUCCESS); +} + +static int +pool_notify_online(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flags, + char **reason, rcm_info_t **dependent_info) +{ + rcm_log_message(RCM_TRACE1, + "POOL: notifying online for: %s\n", rsrcname); + return (RCM_SUCCESS); +} +static int +pool_notify_remove(rcm_handle_t *hdl, char *rsrcname, id_t pid, + uint_t flag, char **reason, rcm_info_t **dependent_info) +{ + rcm_log_message(RCM_TRACE1, + "POOL: notifying removal of: %s\n", rsrcname); + return (RCM_SUCCESS); +} diff --git a/usr/src/cmd/rcm_daemon/common/rcm_event.c b/usr/src/cmd/rcm_daemon/common/rcm_event.c new file mode 100644 index 0000000000..4b36739fd7 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/rcm_event.c @@ -0,0 +1,450 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1999-2001 by Sun Microsystems, Inc. + * All rights reserved. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <door.h> +#include <assert.h> +#include <sys/acl.h> +#include <sys/stat.h> +#include <librcm_event.h> + +#include "rcm_impl.h" + +/* + * Event handling routine + */ + +#define RCM_NOTIFY 0 +#define RCM_GETINFO 1 +#define RCM_REQUEST 2 +#define RCM_EFAULT 3 +#define RCM_EPERM 4 +#define RCM_EINVAL 5 + +static void process_event(int, int, nvlist_t *, nvlist_t **); +static void generate_reply_event(int, rcm_info_t *, nvlist_t **); +static void rcm_print_nvlist(nvlist_t *); + +/* + * Top level function for event service + */ +void +event_service(void **data, size_t *datalen) +{ + int cmd; + int lerrno; + int seq_num; + nvlist_t *nvl; + nvlist_t *ret; + + rcm_log_message(RCM_TRACE1, "received door operation\n"); + + /* Decode the data from the door into an unpacked nvlist */ + if (data == NULL || datalen == NULL) { + rcm_log_message(RCM_ERROR, "received null door argument\n"); + return; + } + if (lerrno = nvlist_unpack(*data, *datalen, &nvl, 0)) { + rcm_log_message(RCM_ERROR, "received bad door argument, %s\n", + strerror(lerrno)); + return; + } + + /* Do nothing if the door is just being knocked on */ + if (errno = nvlist_lookup_int32(nvl, RCM_CMD, &cmd)) { + rcm_log_message(RCM_ERROR, + "bad door argument (nvlist_lookup=%s)\n", strerror(errno)); + nvlist_free(nvl); + return; + } + if (cmd == CMD_KNOCK) { + rcm_log_message(RCM_TRACE1, "door event was just a knock\n"); + nvlist_free(nvl); + *data = NULL; + *datalen = 0; + return; + } + + /* + * Go increment thread count. Before daemon is fully initialized, + * the event processing blocks inside this function. + */ + seq_num = rcmd_thr_incr(cmd); + + process_event(cmd, seq_num, nvl, &ret); + nvlist_free(nvl); + assert(ret != NULL); + + /* + * Decrement thread count + */ + rcmd_thr_decr(); + +out: + *data = ret; + *datalen = 0; +} + +/* + * Actually processes events; returns a reply event + */ +static void +process_event(int cmd, int seq_num, nvlist_t *nvl, nvlist_t **ret) +{ + int i; + int error; + uint_t nvl_nrsrcs = 0; + pid_t pid; + uint32_t flag = (uint32_t)0; + uint64_t pid64 = (uint64_t)0; + size_t buflen = 0; + size_t interval_size = 0; + timespec_t *interval = NULL; + nvlist_t *change_data = NULL; + nvlist_t *event_data = NULL; + rcm_info_t *info = NULL; + char *modname = NULL; + char *buf = NULL; + char **rsrcnames = NULL; + char **nvl_rsrcs = NULL; + + rcm_log_message(RCM_TRACE2, "servicing door command=%d\n", cmd); + + rcm_print_nvlist(nvl); + + /* + * Extract data from the door argument nvlist. Not all arguments + * are needed; sanity checks are performed later. + */ + (void) nvlist_lookup_string_array(nvl, RCM_RSRCNAMES, &nvl_rsrcs, + &nvl_nrsrcs); + (void) nvlist_lookup_string(nvl, RCM_CLIENT_MODNAME, &modname); + (void) nvlist_lookup_uint64(nvl, RCM_CLIENT_ID, (uint64_t *)&pid64); + pid = (pid_t)pid64; + (void) nvlist_lookup_uint32(nvl, RCM_REQUEST_FLAG, (uint32_t *)&flag); + (void) nvlist_lookup_byte_array(nvl, RCM_SUSPEND_INTERVAL, + (uchar_t **)&interval, &interval_size); + (void) nvlist_lookup_byte_array(nvl, RCM_CHANGE_DATA, (uchar_t **)&buf, + &buflen); + if (buf != NULL && buflen > 0) { + (void) nvlist_unpack(buf, buflen, &change_data, 0); + buf = NULL; + buflen = 0; + } + (void) nvlist_lookup_byte_array(nvl, RCM_EVENT_DATA, (uchar_t **)&buf, + &buflen); + if (buf != NULL && buflen > 0) + (void) nvlist_unpack(buf, buflen, &event_data, 0); + + rsrcnames = s_calloc(nvl_nrsrcs + 1, sizeof (char *)); + for (i = 0; i < nvl_nrsrcs; i++) { + rsrcnames[i] = nvl_rsrcs[i]; + } + rsrcnames[nvl_nrsrcs] = NULL; + + /* + * Switch off the command being performed to do the appropriate + * sanity checks and dispatch the arguments to the appropriate + * implementation routine. + */ + switch (cmd) { + case CMD_REGISTER: + if ((modname == NULL) || (rsrcnames == NULL) || + (rsrcnames[0] == NULL)) + goto faildata; + error = add_resource_client(modname, rsrcnames[0], pid, flag, + &info); + break; + + case CMD_UNREGISTER: + if ((modname == NULL) || (rsrcnames == NULL) || + (rsrcnames[0] == NULL)) + goto faildata; + error = remove_resource_client(modname, rsrcnames[0], pid, + flag); + break; + + case CMD_GETINFO: + if ((rsrcnames == NULL) && + ((flag & (RCM_DR_OPERATION | RCM_MOD_INFO)) == 0)) + goto faildata; + if ((error = get_resource_info(rsrcnames, flag, seq_num, &info)) + == EINVAL) { + rcm_log_message(RCM_DEBUG, + "invalid argument in get info request\n"); + generate_reply_event(EINVAL, NULL, ret); + return; + } + break; + + case CMD_SUSPEND: + if ((rsrcnames == NULL) || (rsrcnames[0] == NULL) || + (interval == NULL)) + goto faildata; + error = process_resource_suspend(rsrcnames, pid, flag, seq_num, + interval, &info); + break; + + case CMD_RESUME: + if ((rsrcnames == NULL) || (rsrcnames[0] == NULL)) + goto faildata; + error = notify_resource_resume(rsrcnames, pid, flag, seq_num, + &info); + break; + + case CMD_OFFLINE: + if ((rsrcnames == NULL) || (rsrcnames[0] == NULL)) + goto faildata; + error = process_resource_offline(rsrcnames, pid, flag, seq_num, + &info); + break; + + case CMD_ONLINE: + if ((rsrcnames == NULL) || (rsrcnames[0] == NULL)) + goto faildata; + error = notify_resource_online(rsrcnames, pid, flag, seq_num, + &info); + break; + + case CMD_REMOVE: + if ((rsrcnames == NULL) || (rsrcnames[0] == NULL)) + goto faildata; + error = notify_resource_remove(rsrcnames, pid, flag, seq_num, + &info); + break; + + case CMD_EVENT: + if ((rsrcnames == NULL) || (rsrcnames[0] == NULL) || + (event_data == NULL)) + goto faildata; + error = notify_resource_event(rsrcnames[0], pid, flag, seq_num, + event_data, &info); + nvlist_free(event_data); + break; + + case CMD_REQUEST_CHANGE: + if ((rsrcnames == NULL) || (rsrcnames[0] == NULL) || + (change_data == NULL)) + goto faildata; + error = request_capacity_change(rsrcnames[0], pid, flag, + seq_num, change_data, &info); + nvlist_free(change_data); + break; + + case CMD_NOTIFY_CHANGE: + if ((rsrcnames == NULL) || (rsrcnames[0] == NULL) || + (change_data == NULL)) + goto faildata; + error = notify_capacity_change(rsrcnames[0], pid, flag, seq_num, + change_data, &info); + nvlist_free(change_data); + break; + + case CMD_GETSTATE: + if ((rsrcnames == NULL) || (rsrcnames[0] == NULL)) + goto faildata; + error = get_resource_state(rsrcnames[0], pid, &info); + break; + + default: + rcm_log_message(RCM_WARNING, + gettext("unknown door command: %d\n"), cmd); + generate_reply_event(EFAULT, NULL, ret); + (void) free(rsrcnames); + return; + } + + rcm_log_message(RCM_TRACE2, "finish processing event 0x%x\n", cmd); + generate_reply_event(error, info, ret); + (void) free(rsrcnames); + return; + +faildata: + rcm_log_message(RCM_WARNING, + gettext("data error in door arguments for cmd 0x%x\n"), cmd); + + generate_reply_event(EFAULT, NULL, ret); + (void) free(rsrcnames); +} + + +/* + * Generate reply event from resource registration information + */ +static void +generate_reply_event(int error, rcm_info_t *info, nvlist_t **ret) +{ + nvlist_t *nvl = NULL; + rcm_info_t *tmp; + char *buf = NULL; + size_t buflen = 0; + + rcm_log_message(RCM_TRACE4, "generating reply event\n"); + + /* Allocate an empty nvlist */ + if ((errno = nvlist_alloc(&nvl, 0, 0)) > 0) { + rcm_log_message(RCM_ERROR, + gettext("nvlist_alloc failed: %s\n"), strerror(errno)); + rcmd_exit(errno); + } + + /* Encode the result of the operation in the nvlist */ + if (errno = nvlist_add_int32(nvl, RCM_RESULT, error)) { + rcm_log_message(RCM_ERROR, + gettext("nvlist_add(RESULT) failed: %s\n"), + strerror(errno)); + rcmd_exit(errno); + } + + /* Go through the RCM info tuples, appending them all to the nvlist */ + tmp = info; + while (tmp) { + if (tmp->info) { + buf = NULL; + buflen = 0; + if (errno = nvlist_pack(tmp->info, &buf, &buflen, + NV_ENCODE_NATIVE, 0)) { + rcm_log_message(RCM_ERROR, + gettext("nvlist_pack(INFO) failed: %s\n"), + strerror(errno)); + rcmd_exit(errno); + } + if (errno = nvlist_add_byte_array(nvl, RCM_RESULT_INFO, + (uchar_t *)buf, buflen)) { + rcm_log_message(RCM_ERROR, + gettext("nvlist_add(INFO) failed: %s\n"), + strerror(errno)); + rcmd_exit(errno); + } + (void) free(buf); + nvlist_free(tmp->info); + } + info = tmp->next; + (void) free(tmp); + tmp = info; + } + + /* Return the nvlist (unpacked) in the return argument */ + rcm_print_nvlist(nvl); + *ret = nvl; +} + +static void +rcm_print_nvlist(nvlist_t *nvl) +{ + uchar_t data_byte; + int16_t data_int16; + uint16_t data_uint16; + int32_t data_int32; + uint32_t data_uint32; + int64_t data_int64; + uint64_t data_uint64; + char *data_string; + char **data_strings; + uint_t data_nstrings; + nvpair_t *nvp = NULL; + int i; + char *name; + data_type_t type; + + rcm_log_message(RCM_TRACE3, "event attributes:\n"); + + while (nvp = nvlist_next_nvpair(nvl, nvp)) { + type = nvpair_type(nvp); + name = nvpair_name(nvp); + rcm_log_message(RCM_TRACE3, "\t%s(%d)=", name, type); + + switch (type) { + case DATA_TYPE_BOOLEAN: + rcm_log_message(RCM_TRACE3, "True (boolean)\n"); + break; + + case DATA_TYPE_BYTE: + (void) nvpair_value_byte(nvp, &data_byte); + rcm_log_message(RCM_TRACE3, "0x%x (byte)\n", + data_byte); + break; + + case DATA_TYPE_INT16: + (void) nvpair_value_int16(nvp, &data_int16); + rcm_log_message(RCM_TRACE3, "0x%x (int16)\n", + data_int16); + break; + + case DATA_TYPE_UINT16: + (void) nvpair_value_uint16(nvp, &data_uint16); + rcm_log_message(RCM_TRACE3, "0x%x (uint16)\n", + data_uint16); + break; + + case DATA_TYPE_INT32: + (void) nvpair_value_int32(nvp, &data_int32); + rcm_log_message(RCM_TRACE3, "0x%x (int32)\n", + data_int32); + break; + + case DATA_TYPE_UINT32: + (void) nvpair_value_uint32(nvp, &data_uint32); + rcm_log_message(RCM_TRACE3, "0x%x (uint32)\n", + data_uint32); + break; + + case DATA_TYPE_INT64: + (void) nvpair_value_int64(nvp, &data_int64); + rcm_log_message(RCM_TRACE3, "0x%lx (int64)\n", + data_int64); + break; + + case DATA_TYPE_UINT64: + (void) nvpair_value_uint64(nvp, &data_uint64); + rcm_log_message(RCM_TRACE3, "0x%lx (uint64)\n", + data_uint64); + break; + + case DATA_TYPE_STRING: + (void) nvpair_value_string(nvp, &data_string); + rcm_log_message(RCM_TRACE3, "\"%s\" (string)\n", + data_string); + break; + + case DATA_TYPE_STRING_ARRAY: + (void) nvpair_value_string_array(nvp, &data_strings, + &data_nstrings); + for (i = 0; i < data_nstrings; i++) { + rcm_log_message(RCM_TRACE3, + "\t\"%s\" (string)\n", data_strings[i]); + if (i < (data_nstrings - 1)) + rcm_log_message(RCM_TRACE3, "\t\t\t"); + } + break; + + default: + rcm_log_message(RCM_TRACE3, "<not dumped>\n"); + break; + } + } +} diff --git a/usr/src/cmd/rcm_daemon/common/rcm_impl.c b/usr/src/cmd/rcm_daemon/common/rcm_impl.c new file mode 100644 index 0000000000..e6f6e65868 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/rcm_impl.c @@ -0,0 +1,815 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <librcm_impl.h> +#include "rcm_impl.h" + +static int query(char **, int, const char *, int, pid_t, uint_t, timespec_t *, + int, rcm_info_t **, int *); +static void cancel_query(int, const char *, pid_t, uint_t, int); + +/* + * The following ops are invoked when modules initiate librcm calls which + * require daemon processing. Cascaded RCM operations must come through + * this path. + */ +librcm_ops_t rcm_ops = { + add_resource_client, + remove_resource_client, + get_resource_info, + process_resource_suspend, + notify_resource_resume, + process_resource_offline, + notify_resource_online, + notify_resource_remove, + request_capacity_change, + notify_capacity_change, + notify_resource_event, + get_resource_state +}; + +/* + * Process a request or a notification on a subtree + */ +/*ARGSUSED2*/ +static int +common_resource_op(int cmd, char *rsrcname, pid_t pid, uint_t flag, int seq_num, + timespec_t *interval, nvlist_t *nvl, rcm_info_t **info) +{ + int error; + rsrc_node_t *node; + tree_walk_arg_t arg; + + /* + * Find the node (root of subtree) in the resource tree, invoke + * appropriate callbacks for all clients hanging off the subtree, + * and mark the subtree with the appropriate state. + * + * NOTE: It's possible the node doesn't exist, which means no RCM + * consumer registered for the resource. In this case we silently + * succeed. + */ + error = rsrc_node_find(rsrcname, 0, &node); + if ((error == RCM_SUCCESS) && (node != NULL)) { + arg.flag = flag; + arg.info = info; + arg.seq_num = seq_num; + arg.interval = interval; + arg.nvl = nvl; + arg.cmd = cmd; + + if ((cmd == CMD_NOTIFY_CHANGE) || + (cmd == CMD_REQUEST_CHANGE) || + (cmd == CMD_EVENT)) { + error = rsrc_client_action_list(node->users, cmd, &arg); + } else { + error = rsrc_tree_action(node, cmd, &arg); + } + } + return (error); +} + +/* + * When a resource is removed, notify all clients who registered for this + * particular resource. + */ +int +notify_resource_remove(char **rsrcnames, pid_t pid, uint_t flag, int seq_num, + rcm_info_t **info) +{ + int i; + int error; + int retval = RCM_SUCCESS; + + for (i = 0; rsrcnames[i] != NULL; i++) { + + rcm_log_message(RCM_TRACE2, + "notify_resource_remove(%s, %ld, 0x%x, %d)\n", rsrcnames[i], + pid, flag, seq_num); + + /* + * Mark state as issuing removal notification. Return failure + * if no DR request for this node exists. + */ + error = dr_req_update(rsrcnames[i], pid, flag, + RCM_STATE_REMOVING, seq_num, info); + if (error != RCM_SUCCESS) { + retval = error; + continue; + } + + error = common_resource_op(CMD_REMOVE, rsrcnames[i], pid, flag, + seq_num, NULL, NULL, info); + + /* + * delete the request entry from DR list + */ + dr_req_remove(rsrcnames[i], flag); + + if (error != RCM_SUCCESS) + retval = error; + } + + return (retval); +} + +/* + * Notify users that a resource has been resumed + */ +int +notify_resource_resume(char **rsrcnames, pid_t pid, uint_t flag, int seq_num, + rcm_info_t **info) +{ + int i; + int error; + rcm_info_t *state_info; + rcm_info_tuple_t *state_tuple; + int retval = RCM_SUCCESS; + + for (i = 0; rsrcnames[i] != NULL; i++) { + + state_info = NULL; + state_tuple = NULL; + + /* Check resource state (was resource actually suspended?) */ + if (get_resource_state(rsrcnames[i], pid, &state_info) || + ((state_tuple = rcm_info_next(state_info, NULL)) == NULL) || + (rcm_info_state(state_tuple) == RCM_STATE_SUSPEND)) + flag |= RCM_SUSPENDED; + if (state_info) + rcm_free_info(state_info); + + rcm_log_message(RCM_TRACE2, + "notify_resource_resume(%s, %ld, 0x%x, %d)\n", + rsrcnames[i], pid, flag, seq_num); + + /* + * Mark state as sending resumption notifications + */ + error = dr_req_update(rsrcnames[i], pid, flag, + RCM_STATE_RESUMING, seq_num, info); + if (error != RCM_SUCCESS) { + retval = error; + continue; + } + + error = common_resource_op(CMD_RESUME, rsrcnames[i], pid, flag, + seq_num, NULL, NULL, info); + + dr_req_remove(rsrcnames[i], flag); + + if (error != RCM_SUCCESS) + retval = error; + } + + return (retval); +} + +/* + * Notify users that an offlined device is again available + */ +int +notify_resource_online(char **rsrcnames, pid_t pid, uint_t flag, int seq_num, + rcm_info_t **info) +{ + int i; + int error; + int retval = RCM_SUCCESS; + + for (i = 0; rsrcnames[i] != NULL; i++) { + + rcm_log_message(RCM_TRACE2, + "notify_resource_online(%s, %ld, 0x%x, %d)\n", + rsrcnames[i], pid, flag, seq_num); + + /* + * Mark state as sending onlining notifications + */ + error = dr_req_update(rsrcnames[i], pid, flag, + RCM_STATE_ONLINING, seq_num, info); + if (error != RCM_SUCCESS) { + retval = error; + continue; + } + + error = common_resource_op(CMD_ONLINE, rsrcnames[i], pid, flag, + seq_num, NULL, NULL, info); + + dr_req_remove(rsrcnames[i], flag); + + if (error != RCM_SUCCESS) + retval = error; + } + + return (retval); +} + +/* + * For offline and suspend, need to get the logic correct here. There are + * several cases: + * + * 1. It is a door call and RCM_QUERY is not set: + * run a QUERY; if that succeeds, run the operation. + * + * 2. It is a door call and RCM_QUERY is set: + * run the QUERY only. + * + * 3. It is not a door call: + * run the call, but look at the flag to see if the + * lock should be kept. + */ + +/* + * Request permission to suspend a resource + */ +int +process_resource_suspend(char **rsrcnames, pid_t pid, uint_t flag, int seq_num, + timespec_t *interval, rcm_info_t **info) +{ + int i; + int error = RCM_SUCCESS; + int is_doorcall = ((seq_num & SEQ_NUM_MASK) == 0); + + /* + * Query the operation first. The return value of the query indicates + * if the operation should proceed and be implemented. + */ + if (query(rsrcnames, CMD_SUSPEND, "suspend", RCM_STATE_SUSPEND_QUERYING, + pid, flag, interval, seq_num, info, &error) == 0) { + return (error); + } + + /* + * Implement the operation. + */ + for (i = 0; rsrcnames[i] != NULL; i++) { + + /* Update the lock from a query state to the suspending state */ + if ((error = dr_req_update(rsrcnames[i], pid, flag, + RCM_STATE_SUSPENDING, seq_num, info)) != RCM_SUCCESS) { + + rcm_log_message(RCM_DEBUG, + "suspend %s denied with error %d\n", rsrcnames[i], + error); + + /* + * When called from a module, don't return EAGAIN. + * This is to avoid recursion if module always retries. + */ + if (!is_doorcall && error == EAGAIN) { + return (RCM_CONFLICT); + } + + return (error); + } + + /* Actually suspend the resource */ + error = common_resource_op(CMD_SUSPEND, rsrcnames[i], pid, + flag, seq_num, interval, NULL, info); + if (error != RCM_SUCCESS) { + (void) dr_req_update(rsrcnames[i], pid, flag, + RCM_STATE_SUSPEND_FAIL, seq_num, info); + rcm_log_message(RCM_DEBUG, + "suspend tree failed for %s\n", rsrcnames[i]); + return (error); + } + + rcm_log_message(RCM_TRACE3, "suspend tree succeeded for %s\n", + rsrcnames[i]); + + /* Update the lock for the successful suspend */ + (void) dr_req_update(rsrcnames[i], pid, flag, + RCM_STATE_SUSPEND, seq_num, info); + } + + return (RCM_SUCCESS); +} + +/* + * Process a device removal request, reply is needed + */ +int +process_resource_offline(char **rsrcnames, pid_t pid, uint_t flag, int seq_num, + rcm_info_t **info) +{ + int i; + int error = RCM_SUCCESS; + int is_doorcall = ((seq_num & SEQ_NUM_MASK) == 0); + + /* + * Query the operation first. The return value of the query indicates + * if the operation should proceed and be implemented. + */ + if (query(rsrcnames, CMD_OFFLINE, "offline", RCM_STATE_OFFLINE_QUERYING, + pid, flag, NULL, seq_num, info, &error) == 0) { + return (error); + } + + /* + * Implement the operation. + */ + for (i = 0; rsrcnames[i] != NULL; i++) { + + error = dr_req_update(rsrcnames[i], pid, flag, + RCM_STATE_OFFLINING, seq_num, info); + if (error != RCM_SUCCESS) { + rcm_log_message(RCM_DEBUG, + "offline %s denied with error %d\n", rsrcnames[i], + error); + + /* + * When called from a module, don't return EAGAIN. + * This is to avoid recursion if module always retries. + */ + if (!is_doorcall && error == EAGAIN) { + return (RCM_CONFLICT); + } + + return (error); + } + + /* Actually offline the resource */ + error = common_resource_op(CMD_OFFLINE, rsrcnames[i], pid, + flag, seq_num, NULL, NULL, info); + if (error != RCM_SUCCESS) { + (void) dr_req_update(rsrcnames[i], pid, flag, + RCM_STATE_OFFLINE_FAIL, seq_num, info); + rcm_log_message(RCM_DEBUG, + "offline tree failed for %s\n", rsrcnames[i]); + return (error); + } + + rcm_log_message(RCM_TRACE3, "offline tree succeeded for %s\n", + rsrcnames[i]); + + /* Update the lock for the successful offline */ + (void) dr_req_update(rsrcnames[i], pid, flag, + RCM_STATE_OFFLINE, seq_num, info); + } + + return (RCM_SUCCESS); +} + +/* + * Add a resource client who wishes to interpose on DR, events, or capacity. + * Reply needed. + */ +int +add_resource_client(char *modname, char *rsrcname, pid_t pid, uint_t flag, + rcm_info_t **infop) +{ + int error = RCM_SUCCESS; + client_t *user = NULL; + rsrc_node_t *node = NULL; + rcm_info_t *info = NULL; + + rcm_log_message(RCM_TRACE2, + "add_resource_client(%s, %s, %ld, 0x%x)\n", + modname, rsrcname, pid, flag); + + if (strcmp(rsrcname, "/") == 0) { + /* + * No need to register for / because it will never go away. + */ + rcm_log_message(RCM_INFO, gettext( + "registering for / by %s has been turned into a no-op\n"), + modname); + return (RCM_SUCCESS); + } + + /* + * Hold the rcm_req_lock so no dr request may come in while the + * registration is in progress. + */ + (void) mutex_lock(&rcm_req_lock); + + /* + * Test if the requested registration is a noop, and return EALREADY + * if it is. + */ + error = rsrc_node_find(rsrcname, RSRC_NODE_CREATE, &node); + if ((error != RCM_SUCCESS) || (node == NULL)) { + (void) mutex_unlock(&rcm_req_lock); + return (RCM_FAILURE); + } + + user = rsrc_client_find(modname, pid, &node->users); + if ((user != NULL) && + ((user->flag & (flag & RCM_REGISTER_MASK)) != 0)) { + (void) mutex_unlock(&rcm_req_lock); + if ((flag & RCM_REGISTER_DR) && + (user->state == RCM_STATE_REMOVE)) { + user->state = RCM_STATE_ONLINE; + return (RCM_SUCCESS); + } + return (EALREADY); + } + + /* If adding a new DR registration, reject if the resource is locked */ + if (flag & RCM_REGISTER_DR) { + + if (rsrc_check_lock_conflicts(rsrcname, flag, LOCK_FOR_USE, + &info) != RCM_SUCCESS) { + /* + * The resource is being DR'ed, so return failure + */ + (void) mutex_unlock(&rcm_req_lock); + + /* + * If caller doesn't care about info, free it + */ + if (infop) + *infop = info; + else + rcm_free_info(info); + + return (RCM_CONFLICT); + } + } + + /* The registration is new and allowable, so add it */ + error = rsrc_node_add_user(node, rsrcname, modname, pid, flag); + (void) mutex_unlock(&rcm_req_lock); + + return (error); +} + +/* + * Remove a resource client, who no longer wishes to interpose on either + * DR, events, or capacity. + */ +int +remove_resource_client(char *modname, char *rsrcname, pid_t pid, uint_t flag) +{ + int error; + rsrc_node_t *node; + + rcm_log_message(RCM_TRACE2, + "remove_resource_client(%s, %s, %ld, 0x%x)\n", + modname, rsrcname, pid, flag); + + /* + * Allow resource client to leave anytime, assume client knows what + * it is trying to do. + */ + error = rsrc_node_find(rsrcname, 0, &node); + if ((error != RCM_SUCCESS) || (node == NULL)) { + rcm_log_message(RCM_WARNING, + gettext("resource %s not found\n"), rsrcname); + return (ENOENT); + } + + return (rsrc_node_remove_user(node, modname, pid, flag)); +} + +/* + * Reply is needed + */ +int +get_resource_info(char **rsrcnames, uint_t flag, int seq_num, rcm_info_t **info) +{ + int rv = RCM_SUCCESS; + + if (flag & RCM_DR_OPERATION) { + *info = rsrc_dr_info(); + } else if (flag & RCM_MOD_INFO) { + *info = rsrc_mod_info(); + } else { + rv = rsrc_usage_info(rsrcnames, flag, seq_num, info); + } + + return (rv); +} + +int +notify_resource_event(char *rsrcname, id_t pid, uint_t flag, int seq_num, + nvlist_t *event_data, rcm_info_t **info) +{ + int error; + + assert(flag == 0); + + rcm_log_message(RCM_TRACE2, "notify_resource_event(%s, %ld, 0x%x)\n", + rsrcname, pid, flag); + + error = common_resource_op(CMD_EVENT, rsrcname, pid, flag, seq_num, + NULL, event_data, info); + + return (error); +} + +int +request_capacity_change(char *rsrcname, id_t pid, uint_t flag, int seq_num, + nvlist_t *nvl, rcm_info_t **info) +{ + int error; + int is_doorcall = ((seq_num & SEQ_NUM_MASK) == 0); + + rcm_log_message(RCM_TRACE2, + "request_capacity_change(%s, %ld, 0x%x, %d)\n", rsrcname, pid, + flag, seq_num); + + if (is_doorcall || (flag & RCM_QUERY)) { + + error = common_resource_op(CMD_REQUEST_CHANGE, rsrcname, pid, + flag | RCM_QUERY, seq_num, NULL, nvl, info); + + if (error != RCM_SUCCESS) { + rcm_log_message(RCM_DEBUG, + "request state change query denied\n"); + return (error); + } + } + + if (flag & RCM_QUERY) + return (RCM_SUCCESS); + + error = common_resource_op(CMD_REQUEST_CHANGE, rsrcname, pid, flag, + seq_num, NULL, nvl, info); + + if (error != RCM_SUCCESS) { + rcm_log_message(RCM_DEBUG, "request state change failed\n"); + return (RCM_FAILURE); + } + + rcm_log_message(RCM_TRACE3, "request state change succeeded\n"); + + return (error); +} + +int +notify_capacity_change(char *rsrcname, id_t pid, uint_t flag, int seq_num, + nvlist_t *nvl, rcm_info_t **info) +{ + int error; + + rcm_log_message(RCM_TRACE2, + "notify_capacity_change(%s, %ld, 0x%x, %d)\n", rsrcname, pid, + flag, seq_num); + + error = common_resource_op(CMD_NOTIFY_CHANGE, rsrcname, pid, flag, + seq_num, NULL, nvl, info); + + if (error != RCM_SUCCESS) { + rcm_log_message(RCM_DEBUG, "notify state change failed\n"); + return (RCM_FAILURE); + } + + rcm_log_message(RCM_TRACE3, "notify state change succeeded\n"); + + return (error); +} + +int +get_resource_state(char *rsrcname, pid_t pid, rcm_info_t **info) +{ + int error; + int state; + char *s; + char *resolved; + rcm_info_t *dr_info = NULL; + rcm_info_tuple_t *dr_info_tuple = NULL; + rsrc_node_t *node; + client_t *client; + char *state_info = gettext("State of resource"); + + rcm_log_message(RCM_TRACE2, "get_resource_state(%s, %ld)\n", + rsrcname, pid); + + /* + * Check for locks, first. + */ + dr_info = rsrc_dr_info(); + if (dr_info) { + state = RCM_STATE_UNKNOWN; + if ((resolved = resolve_name(rsrcname)) == NULL) + return (RCM_FAILURE); + while (dr_info_tuple = rcm_info_next(dr_info, dr_info_tuple)) { + s = (char *)rcm_info_rsrc(dr_info_tuple); + if (s && (strcmp(resolved, s) == 0)) { + state = rcm_info_state(dr_info_tuple); + break; + } + } + free(resolved); + rcm_free_info(dr_info); + if (state != RCM_STATE_UNKNOWN) { + rcm_log_message(RCM_TRACE2, + "get_resource_state(%s)=%d\n", rsrcname, state); + add_busy_rsrc_to_list(rsrcname, pid, state, 0, NULL, + (char *)state_info, NULL, NULL, info); + return (RCM_SUCCESS); + } + } + + /* + * No locks, so look for client states in the resource tree. + * + * NOTE: It's possible the node doesn't exist, which means no RCM + * consumer registered for the resource. In this case we silently + * succeed. + */ + error = rsrc_node_find(rsrcname, 0, &node); + state = RCM_STATE_ONLINE; + + if ((error == RCM_SUCCESS) && (node != NULL)) { + for (client = node->users; client; client = client->next) { + if (client->state == RCM_STATE_OFFLINE_FAIL || + client->state == RCM_STATE_OFFLINE_QUERY_FAIL || + client->state == RCM_STATE_SUSPEND_FAIL || + client->state == RCM_STATE_SUSPEND_QUERY_FAIL) { + state = client->state; + break; + } + + if (client->state != RCM_STATE_ONLINE && + client->state != RCM_STATE_REMOVE) + state = client->state; + } + } + + if (error == RCM_SUCCESS) { + rcm_log_message(RCM_TRACE2, "get_resource_state(%s)=%d\n", + rsrcname, state); + add_busy_rsrc_to_list(rsrcname, pid, state, 0, NULL, + (char *)state_info, NULL, NULL, info); + } + + return (error); +} + +/* + * Perform a query of an offline or suspend. + * + * The return value of this function indicates whether the operation should + * be implemented (0 == No, 1 == Yes). Note that locks and client state + * changes will only persist if the caller is going to implement the operation. + */ +static int +query(char **rsrcnames, int cmd, const char *opname, int querystate, pid_t pid, + uint_t flag, timespec_t *interval, int seq_num, rcm_info_t **info, + int *errorp) +{ + int i; + int error; + int final_error; + int is_doorcall = ((seq_num & SEQ_NUM_MASK) == 0); + + /* Only query for door calls, or when the RCM_QUERY flag is set */ + if ((is_doorcall == 0) && ((flag & RCM_QUERY) == 0)) { + return (1); + } + + /* Lock all the resources. Fail the query in the case of a conflict. */ + for (i = 0; rsrcnames[i] != NULL; i++) { + + rcm_log_message(RCM_TRACE2, + "process_resource_%s(%s, %ld, 0x%x, %d)\n", + opname, rsrcnames[i], pid, flag, seq_num); + + error = dr_req_add(rsrcnames[i], pid, flag, querystate, seq_num, + NULL, info); + + /* The query goes no further if a resource cannot be locked */ + if (error != RCM_SUCCESS) { + + rcm_log_message(RCM_DEBUG, + "%s query %s defined with error %d\n", + opname, rsrcnames[i], error); + + /* + * Replace EAGAIN with RCM_CONFLICT in the case of + * module callbacks; to avoid modules from trying + * again infinitely. + */ + if ((is_doorcall == 0) && (error == EAGAIN)) { + error = RCM_CONFLICT; + } + + goto finished; + } + } + + /* + * All the resources were locked above, so use common_resource_op() + * to pass the query on to the clients. Accumulate the overall error + * value in 'final_error', before transferring it to 'error' at the end. + */ + for (final_error = RCM_SUCCESS, i = 0; rsrcnames[i] != NULL; i++) { + + /* Log the query (for tracing purposes). */ + rcm_log_message(RCM_TRACE2, "querying resource %s\n", + rsrcnames[i]); + + /* Query the resource's clients through common_resource_op(). */ + error = common_resource_op(cmd, rsrcnames[i], pid, + flag | RCM_QUERY, seq_num, interval, NULL, info); + + /* + * If a query fails, don't stop iterating through the loop. + * Just ensure that 'final_error' is set (if not already), + * log the error, and continue looping. + * + * In the case of a user who manually intervenes and retries + * the operation, this will maximize the extent of the query + * so that they experience fewer such iterations overall. + */ + if (error != RCM_SUCCESS) { + + /* Log each query that failed along the way */ + rcm_log_message(RCM_DEBUG, "%s %s query denied\n", + opname, rsrcnames[i]); + + if (final_error != RCM_FAILURE) { + final_error = error; + } + } + } + error = final_error; + + /* + * Tell the calling function not to proceed any further with the + * implementation phase of the operation if the query failed, or + * if the user's intent was to only query the operation. + */ +finished: + if ((error != RCM_SUCCESS) || ((flag & RCM_QUERY) != 0)) { + + /* + * Since the operation won't be implemented, cancel the + * query (unlock resources and reverse client state changes). + * + * The cancellation routine cleans up everything for the entire + * operation, and thus it should only be called from the very + * root of the operation (e.g. when 'is_doorcall' is TRUE). + */ + if (is_doorcall != 0) { + cancel_query(cmd, opname, pid, flag, seq_num); + } + + *errorp = error; + return (0); + } + + /* Otherwise, tell the caller to proceed with the implementation. */ + *errorp = RCM_SUCCESS; + return (1); +} + +/* + * Implementation of a query cancellation. + * + * The full scope of the query is already noted, so the scope of the operation + * does not need to be expanded in the same recursive manner that was used for + * the query itself. (Clients don't have to be called to cross namespaces.) + * Instead, the locks added to the DR request list during the query are scanned. + */ +static void +cancel_query(int cmd, const char *opname, pid_t pid, uint_t flag, int seq_num) +{ + char rsrc[MAXPATHLEN]; + + /* + * Find every lock in the DR request list that is a part of this + * sequence. Call common_resource_op() with the QUERY_CANCEL flag to + * cancel each sub-operation, and then remove each lock from the list. + * + * The 'rsrc' buffer is required to retrieve the 'device' fields of + * matching DR request list entries in a way that's multi-thread safe. + */ + while (dr_req_lookup(seq_num, rsrc) == RCM_SUCCESS) { + + rcm_log_message(RCM_TRACE2, "%s query %s cancelled\n", + opname, rsrc); + + (void) common_resource_op(cmd, rsrc, pid, + flag | RCM_QUERY | RCM_QUERY_CANCEL, seq_num, NULL, NULL, + NULL); + + (void) dr_req_remove(rsrc, flag); + } +} diff --git a/usr/src/cmd/rcm_daemon/common/rcm_impl.h b/usr/src/cmd/rcm_daemon/common/rcm_impl.h new file mode 100644 index 0000000000..3b70289ca3 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/rcm_impl.h @@ -0,0 +1,320 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _RCM_IMPL_H +#define _RCM_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <assert.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <dirent.h> +#include <dlfcn.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <locale.h> +#include <poll.h> +#include <signal.h> +#include <strings.h> +#include <syslog.h> +#include <thread.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <librcm.h> +#include <librcm_impl.h> + +#include "rcm_module.h" + + +/* + * Daemon states for thread control + */ +#define RCMD_INIT 1 +#define RCMD_NORMAL 2 +#define RCMD_CLEANUP 3 +#define RCMD_FINI 4 + +/* + * flags for node operation + */ +#define RSRC_NODE_CREATE 1 +#define RSRC_NODE_REMOVE 2 /* not used */ + +/* + * Resource types + */ +#define RSRC_TYPE_NORMAL 0 +#define RSRC_TYPE_DEVICE 1 +#define RSRC_TYPE_FILESYS 2 +#define RSRC_TYPE_ABSTRACT 3 + +/* + * lock conflict checking flags + */ +#define LOCK_FOR_DR 0 +#define LOCK_FOR_USE 1 + +/* + * Sequence number encoding constants + */ +#define SEQ_NUM_SHIFT 8 /* lowest 8 bits indicate cascade operation */ +#define SEQ_NUM_MASK ((1 << SEQ_NUM_SHIFT) - 1) + +/* + * RCM queuing structure + */ +typedef struct rcm_queue { + struct rcm_queue *next; + struct rcm_queue *prev; +} rcm_queue_t; + +#define RCM_STRUCT_BASE_ADDR(struct_type, x, y) \ + ((struct_type *) ((void *)(((char *)(x)) - \ + (int)(&((struct_type *)0)->y)))) + +/* + * Struct for client loadable module + */ +typedef struct module { + struct module *next; + void *dlhandle; + struct rcm_mod_ops *(*init)(); + const char *(*info)(); + int (*fini)(); + struct rcm_mod_ops *modops; /* ops vector */ + char *name; /* module name */ + rcm_handle_t *rcmhandle; + int ref_count; + rcm_queue_t client_q; /* list of module's clients */ + struct script_info *rsi; /* scripting data */ +} module_t; + +/* + * Struct for describing a resource client + */ +typedef struct client { + rcm_queue_t queue; /* per module queue */ + struct client *next; /* next client on rsrc node list */ + module_t *module; /* per-client module */ + char *alias; /* rsrc_name known to client */ + pid_t pid; /* pid of regis process */ + int state; /* rsrc state known to client */ + uint_t flag; /* flag specified for registration */ + uint_t prv_flags; /* currently used by rcm scripting */ +} client_t; + +/* + * defines for client_t:prv_flags (used by rcm scripting) + */ +#define RCM_NEED_TO_UNREGISTER 1 + +/* + * Struct for a list of outstanding rcm requests + */ +typedef struct { + int n_req; + int n_req_max; /* max entries in this block */ + struct { + int seq_num; /* sequence number of request */ + int state; /* current state */ + id_t id; /* id of initiator */ + uint_t flag; /* request flags */ + int type; /* resource(device) type */ + char device[MAXPATHLEN]; /* name of device or resource */ + } req[1]; + /* more entries may follow */ +} rcm_req_t; + +/* + * struct for describing resource tree node + */ +typedef struct rsrc_node { + struct rsrc_node *parent; + struct rsrc_node *sibling; + struct rsrc_node *child; + char *name; /* phys path for devices */ + client_t *users; /* linked list of users */ + int type; /* resource type */ +} rsrc_node_t; + +/* + * struct for tree action args + */ +typedef struct { + int cmd; /* command */ + int seq_num; /* unique sequence number */ + int retcode; /* return code */ + uint_t flag; /* flag assoc. w command */ + timespec_t *interval; /* for suspend command */ + nvlist_t *nvl; /* for state changes */ + rcm_info_t **info; /* info to be filled in */ +} tree_walk_arg_t; + +/* + * for synchrizing various threads + */ +typedef struct { + int thr_count; + short wanted; + short state; + time_t last_update; + cond_t cv; + mutex_t lock; +} barrier_t; + +/* + * locks + */ +extern mutex_t rcm_req_lock; + +/* + * global variables + */ +extern librcm_ops_t rcm_ops; /* ops for module callback */ +extern int need_cleanup; + +/* + * comparison macros + * EQUAL, AFTER, DESCENDENT + */ +#define EQUAL(x, y) (strcmp(x, y) == 0) +#define AFTER(x, y) (strcmp(x, y) > 0) +#define DESCENDENT(x, y) \ + ((strlen(x) > strlen(y)) && \ + (strncmp(x, y, strlen(y)) == 0) && \ + ((x[strlen(y)] == '/') || \ + (x[strlen(y)] == ':') || \ + (x[strlen(y) - 1] == '/'))) + +/* + * function prototypes + */ + +/* top level request handling routines */ + +void event_service(void **, size_t *); +int process_resource_suspend(char **, pid_t, uint_t, int, timespec_t *, + rcm_info_t **); +int notify_resource_resume(char **, pid_t, uint_t, int, rcm_info_t **); +int process_resource_offline(char **, pid_t, uint_t, int, rcm_info_t **); +int notify_resource_online(char **, pid_t, uint_t, int, rcm_info_t **); +int notify_resource_remove(char **, pid_t, uint_t, int, rcm_info_t **); +int add_resource_client(char *, char *, pid_t, uint_t, rcm_info_t **); +int remove_resource_client(char *, char *, pid_t, uint_t); +int get_resource_info(char **, uint_t, int, rcm_info_t **); +int notify_resource_event(char *, pid_t, uint_t, int, nvlist_t *, + rcm_info_t **); +int request_capacity_change(char *, pid_t, uint_t, int, nvlist_t *, + rcm_info_t **); +int notify_capacity_change(char *, pid_t, uint_t, int, nvlist_t *, + rcm_info_t **); +int get_resource_state(char *, pid_t, rcm_info_t **); +rcm_info_t *rsrc_mod_info(); + +/* dr request list routines */ + +rcm_info_t *rsrc_dr_info(); +void clean_dr_list(); +int dr_req_add(char *, pid_t, uint_t, int, int, timespec_t *, rcm_info_t **); +int dr_req_update(char *, pid_t, uint_t, int, int, rcm_info_t **); +int dr_req_lookup(int, char *); +void dr_req_remove(char *, uint_t); +int info_req_add(char *, uint_t, int); +void info_req_remove(int); +int rsrc_check_lock_conflicts(char *, uint_t, int, rcm_info_t **); + +/* node related routines */ + +int rsrc_get_type(const char *); +int rsrc_node_find(char *, int, rsrc_node_t **); +int rsrc_node_add_user(rsrc_node_t *, char *, char *, pid_t, uint_t); +int rsrc_node_remove_user(rsrc_node_t *, char *, pid_t, uint_t); +client_t *rsrc_client_find(char *, pid_t, client_t **); +int rsrc_client_action_list(client_t *, int cmd, void *); + +/* tree related routines */ + +int rsrc_usage_info(char **, uint_t, int, rcm_info_t **); +int rsrc_tree_action(rsrc_node_t *, int, tree_walk_arg_t *); + +/* database helpers and misc */ + +void rcmd_set_state(int); +int rcmd_thr_incr(int); +void rcmd_thr_decr(void); +void rcmd_thr_signal(void); +void rcmd_lock_init(void); +void rcmd_db_init(void); +void rcmd_db_sync(void); +void rcmd_db_clean(void); +void rcmd_start_timer(int); +void rcmd_exit(int); +void rcm_log_message(int, char *, ...); +void rcm_log_msg(int, char *, ...); +void add_busy_rsrc_to_list(char *, pid_t, int, int, char *, const char *, + const char *, nvlist_t *, rcm_info_t **); +char *resolve_name(char *); +int proc_exist(pid_t); +void *s_malloc(size_t); +void *s_calloc(int, size_t); +void *s_realloc(void *, size_t); +char *s_strdup(const char *); + +/* + * RCM queuing function prototypes + */ +void rcm_init_queue(rcm_queue_t *); +void rcm_enqueue_head(rcm_queue_t *, rcm_queue_t *); +void rcm_enqueue_tail(rcm_queue_t *, rcm_queue_t *); +void rcm_enqueue(rcm_queue_t *, rcm_queue_t *); +rcm_queue_t *rcm_dequeue_head(rcm_queue_t *); +rcm_queue_t *rcm_dequeue_tail(rcm_queue_t *); +void rcm_dequeue(rcm_queue_t *); + +/* + * Function protoypes related to rcm scripting + */ +int script_main_init(void); +int script_main_fini(void); +struct rcm_mod_ops *script_init(module_t *); +char *script_info(module_t *); +int script_fini(module_t *); + + +#ifdef __cplusplus +} +#endif + +#endif /* _RCM_IMPL_H */ diff --git a/usr/src/cmd/rcm_daemon/common/rcm_lock.c b/usr/src/cmd/rcm_daemon/common/rcm_lock.c new file mode 100644 index 0000000000..0f5b3012c1 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/rcm_lock.c @@ -0,0 +1,1640 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "rcm_impl.h" +#include "rcm_module.h" + +/* + * Global locks + */ +mutex_t rcm_req_lock; /* protects global dr & info request list */ + +/* + * Daemon state file + */ +static int state_fd; +#define RCM_STATE_FILE "/var/run/rcm_daemon_state" +#define N_REQ_CHUNK 10 /* grow 10 entries at a time */ + +/* + * Daemon timeout value + */ +#define RCM_DAEMON_TIMEOUT 300 /* 5 minutes idle time */ + +/* + * Struct for a list of outstanding rcm requests + */ +typedef struct { + int seq_num; /* sequence number of request */ + int state; /* current state */ + pid_t pid; /* pid of initiator */ + uint_t flag; /* request flags */ + int type; /* resource(device) type */ + timespec_t interval; /* suspend interval */ + char device[MAXPATHLEN]; /* name of device or resource */ +} req_t; + +typedef struct { + int n_req; + int n_req_max; /* number of req_t's to follow */ + int n_seq_max; /* last sequence number */ + int idle_timeout; /* persist idle timeout value */ + req_t req[1]; + /* more req_t follows */ +} req_list_t; + +static req_list_t *dr_req_list; +static req_list_t *info_req_list; + +static const char *locked_info = "DR operation in progress"; +static const char *locked_err = "Resource is busy"; + +static int rcmd_get_state(); +static void add_to_polling_list(pid_t); +static void remove_from_polling_list(pid_t); + +void start_polling_thread(); +static void stop_polling_thread(); + +/* + * Initialize request lists required for locking + */ +void +rcmd_lock_init(void) +{ + int size; + struct stat fbuf; + + /* + * Start info list with one slot, then grow on demand. + */ + info_req_list = s_calloc(1, sizeof (req_list_t)); + info_req_list->n_req_max = 1; + + /* + * Open daemon state file and map in contents + */ + state_fd = open(RCM_STATE_FILE, O_CREAT|O_RDWR, 0600); + if (state_fd == -1) { + rcm_log_message(RCM_ERROR, gettext("cannot open %s: %s\n"), + RCM_STATE_FILE, strerror(errno)); + rcmd_exit(errno); + } + + if (fstat(state_fd, &fbuf) != 0) { + rcm_log_message(RCM_ERROR, gettext("cannot stat %s: %s\n"), + RCM_STATE_FILE, strerror(errno)); + rcmd_exit(errno); + } + + size = fbuf.st_size; + if (size == 0) { + size = sizeof (req_list_t); + if (ftruncate(state_fd, size) != 0) { + rcm_log_message(RCM_ERROR, + gettext("cannot truncate %s: %s\n"), + RCM_STATE_FILE, strerror(errno)); + rcmd_exit(errno); + } + } + + /*LINTED*/ + dr_req_list = (req_list_t *)mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_SHARED, state_fd, 0); + if (dr_req_list == MAP_FAILED) { + rcm_log_message(RCM_ERROR, gettext("cannot mmap %s: %s\n"), + RCM_STATE_FILE, strerror(errno)); + rcmd_exit(errno); + } + + /* + * Initial size is one entry + */ + if (dr_req_list->n_req_max == 0) { + dr_req_list->n_req_max = 1; + (void) fsync(state_fd); + return; + } + + rcm_log_message(RCM_DEBUG, "n_req = %d, n_req_max = %d\n", + dr_req_list->n_req, dr_req_list->n_req_max); + + /* + * Recover the daemon state + */ + clean_dr_list(); +} + +/* + * Get a unique sequence number--to be called with rcm_req_lock held. + */ +static int +get_seq_number() +{ + int number; + + if (dr_req_list == NULL) + return (0); + + dr_req_list->n_seq_max++; + number = (dr_req_list->n_seq_max << SEQ_NUM_SHIFT); + (void) fsync(state_fd); + + return (number); +} + +/* + * Find entry in list with the same resource name and sequence number. + * If seq_num == -1, no seq_num matching is required. + */ +static req_t * +find_req_entry(char *device, uint_t flag, int seq_num, req_list_t *list) +{ + int i; + + /* + * Look for entry with the same resource and seq_num. + * Also match RCM_FILESYS field in flag. + */ + for (i = 0; i < list->n_req_max; i++) { + if (list->req[i].state == RCM_STATE_REMOVE) + /* stale entry */ + continue; + /* + * We need to distiguish a file system root from the directory + * it is mounted on. + * + * Applications are not aware of any difference between the + * two, but the system keeps track of it internally by + * checking for mount points while traversing file path. + * In a similar spirit, RCM is keeping this difference as + * an implementation detail. + */ + if ((strcmp(device, list->req[i].device) != 0) || + (list->req[i].flag & RCM_FILESYS) != (flag & RCM_FILESYS)) + /* different resource */ + continue; + + if ((seq_num != -1) && ((seq_num >> SEQ_NUM_SHIFT) != + (list->req[i].seq_num >> SEQ_NUM_SHIFT))) + /* different base seqnum */ + continue; + + return (&list->req[i]); + } + + return (NULL); +} + +/* + * Get the next empty req_t entry. If no entry exists, grow the list. + */ +static req_t * +get_req_entry(req_list_t **listp) +{ + int i; + int n_req = (*listp)->n_req; + int n_req_max = (*listp)->n_req_max; + + /* + * If the list is full, grow the list and return the first + * entry in the new portion. + */ + if (n_req == n_req_max) { + int newsize; + + n_req_max += N_REQ_CHUNK; + newsize = sizeof (req_list_t) + (n_req_max - 1) * + sizeof (req_t); + + if (listp == &info_req_list) { + *listp = s_realloc(*listp, newsize); + } else if (ftruncate(state_fd, newsize) != 0) { + rcm_log_message(RCM_ERROR, + gettext("cannot truncate %s: %s\n"), + RCM_STATE_FILE, strerror(errno)); + rcmd_exit(errno); + /*LINTED*/ + } else if ((*listp = (req_list_t *)mmap(NULL, newsize, + PROT_READ|PROT_WRITE, MAP_SHARED, state_fd, 0)) == + MAP_FAILED) { + rcm_log_message(RCM_ERROR, + gettext("cannot mmap %s: %s\n"), + RCM_STATE_FILE, strerror(errno)); + rcmd_exit(errno); + } + + /* Initialize the new entries */ + for (i = (*listp)->n_req_max; i < n_req_max; i++) { + (*listp)->req[i].state = RCM_STATE_REMOVE; + (void) strcpy((*listp)->req[i].device, ""); + } + + (*listp)->n_req_max = n_req_max; + (*listp)->n_req++; + return (&(*listp)->req[n_req]); + } + + /* + * List contains empty slots, find it. + */ + for (i = 0; i < n_req_max; i++) { + if (((*listp)->req[i].device[0] == '\0') || + ((*listp)->req[i].state == RCM_STATE_REMOVE)) { + break; + } + } + + assert(i < n_req_max); /* empty slot must exist */ + + (*listp)->n_req++; + return (&(*listp)->req[i]); +} + +/* + * When one resource depends on multiple resources, it's possible that + * rcm_get_info can be called multiple times on the resource, resulting + * in duplicate information. By assigning a unique sequence number to + * each rcm_get_info operation, this duplication can be eliminated. + * + * Insert a dr entry in info_req_list + */ +int +info_req_add(char *rsrcname, uint_t flag, int seq_num) +{ + int error = 0; + char *device; + req_t *req; + + rcm_log_message(RCM_TRACE2, "info_req_add(%s, %d)\n", + rsrcname, seq_num); + + device = resolve_name(rsrcname); + (void) mutex_lock(&rcm_req_lock); + + /* + * Look for entry with the same resource and seq_num. + * If it exists, we return an error so that such + * information is not gathered more than once. + */ + if (find_req_entry(device, flag, seq_num, info_req_list) != NULL) { + rcm_log_message(RCM_DEBUG, "getinfo cycle: %s %d \n", + device, seq_num); + error = -1; + goto out; + } + + /* + * Get empty entry and fill in seq_num and device. + */ + req = get_req_entry(&info_req_list); + req->seq_num = seq_num; + req->state = RCM_STATE_ONLINE; /* mark that the entry is in use */ + req->flag = flag; + (void) strcpy(req->device, device); + +out: + (void) mutex_unlock(&rcm_req_lock); + free(device); + + return (error); +} + +/* + * Remove all entries associated with seq_num from info_req_list + */ +void +info_req_remove(int seq_num) +{ + int i; + + rcm_log_message(RCM_TRACE3, "info_req_remove(%d)\n", seq_num); + + seq_num >>= SEQ_NUM_SHIFT; + (void) mutex_lock(&rcm_req_lock); + + /* remove all entries with seq_num */ + for (i = 0; i < info_req_list->n_req_max; i++) { + if (info_req_list->req[i].state == RCM_STATE_REMOVE) + continue; + + if ((info_req_list->req[i].seq_num >> SEQ_NUM_SHIFT) != seq_num) + continue; + + info_req_list->req[i].state = RCM_STATE_REMOVE; + info_req_list->n_req--; + } + + /* + * We don't shrink the info_req_list size for now. + */ + (void) mutex_unlock(&rcm_req_lock); +} + +/* + * Checking lock conflicts. There is a conflict if: + * - attempt to DR a node when either its ancester or descendent + * is in the process of DR + * - attempt to register for a node when its ancester is locked for DR + */ +static int +check_lock(char *device, uint_t flag, int cflag, rcm_info_t **info) +{ + int i, ret = RCM_SUCCESS; + + if (info) + *info = NULL; + + /* + * During daemon initialization, don't check locks + */ + if (dr_req_list == NULL) + return (ret); + + for (i = 0; i < dr_req_list->n_req; i++) { + req_t *req = &dr_req_list->req[i]; + char *dr_dev = req->device; + + /* + * Skip empty entries + */ + if ((req->state == RCM_STATE_REMOVE) || (dr_dev[0] == '\0')) + continue; + + /* + * Make sure that none of the ancestors of dr_dev is + * being operated upon. + */ + if (EQUAL(device, dr_dev) || DESCENDENT(device, dr_dev)) { + /* + * An exception to this is the filesystem. + * We should allowed a filesystem rooted at a + * child directory to be unmounted. + */ + if ((flag & RCM_FILESYS) && (!EQUAL(device, dr_dev) || + ((dr_req_list->req[i].flag & RCM_FILESYS) == 0))) + continue; + + assert(info != 0); + + add_busy_rsrc_to_list(dr_dev, dr_req_list->req[i].pid, + dr_req_list->req[i].state, + dr_req_list->req[i].seq_num, NULL, locked_info, + locked_err, NULL, info); + ret = RCM_CONFLICT; + break; + } + + if ((cflag == LOCK_FOR_DR) && DESCENDENT(dr_dev, device)) { + /* + * Check descendents only for DR request. + * + * Could have multiple descendents doing DR, + * we want to find them all. + */ + assert(info != 0); + + add_busy_rsrc_to_list(dr_dev, dr_req_list->req[i].pid, + dr_req_list->req[i].state, + dr_req_list->req[i].seq_num, NULL, locked_info, + locked_err, NULL, info); + ret = RCM_CONFLICT; + /* don't break here, need to find all conflicts */ + } + } + + return (ret); +} + +/* + * Check for lock conflicts for DR operation or client registration + */ +int +rsrc_check_lock_conflicts(char *rsrcname, uint_t flag, int cflag, + rcm_info_t **info) +{ + int result; + char *device; + + device = resolve_name(rsrcname); + result = check_lock(device, flag, cflag, info); + free(device); + + return (result); +} + +static int +transition_state(int state) +{ + /* + * If the resource state is in transition, ask caller to + * try again. + */ + switch (state) { + case RCM_STATE_OFFLINING: + case RCM_STATE_SUSPENDING: + case RCM_STATE_RESUMING: + case RCM_STATE_ONLINING: + case RCM_STATE_REMOVING: + + return (1); + + default: + /*FALLTHROUGH*/ + break; + } + return (0); +} + +/* + * Update a dr entry in dr_req_list + */ +/*ARGSUSED*/ +static int +dr_req_update_entry(char *device, pid_t pid, uint_t flag, int state, + int seq_num, timespec_t *interval, rcm_info_t **infop) +{ + req_t *req; + + /* + * Find request entry. If not found, return RCM_FAILURE + */ + req = find_req_entry(device, flag, -1, dr_req_list); + + if (req == NULL) { + switch (state) { + case RCM_STATE_OFFLINE_QUERYING: + case RCM_STATE_SUSPEND_QUERYING: + case RCM_STATE_OFFLINING: + case RCM_STATE_SUSPENDING: + /* could be re-do operation, no error message */ + break; + + default: + rcm_log_message(RCM_DEBUG, + "update non-existing resource %s\n", device); + } + return (RCM_FAILURE); + } + + /* + * During initialization, update is unconditional (forced) + * in order to bring the daemon up in a sane state. + */ + if (rcmd_get_state() == RCMD_INIT) + goto update; + + /* + * Don't allow update with mismatched initiator pid. This could happen + * as part of normal operation. + */ + if (pid != req->pid) { + rcm_log_message(RCM_INFO, + gettext("mismatched dr initiator pid: %ld %ld\n"), + req->pid, pid); + goto failure; + } + + rcm_log_message(RCM_TRACE4, + "dr_req_update_entry: state=%d, device=%s\n", + req->state, req->device); + + /* + * Check that the state transition is valid + */ + switch (state) { + case RCM_STATE_OFFLINE_QUERYING: + case RCM_STATE_OFFLINING: + /* + * This is the case of re-offlining, which applies only + * if a previous attempt failed. + */ + if ((req->state != RCM_STATE_OFFLINE_FAIL) && + (req->state != RCM_STATE_OFFLINE_QUERYING) && + (req->state != RCM_STATE_OFFLINE_QUERY) && + (req->state != RCM_STATE_OFFLINE_QUERY_FAIL) && + (req->state != RCM_STATE_OFFLINE)) { + rcm_log_message(RCM_WARNING, + gettext("%s: invalid offlining from state %d\n"), + device, req->state); + goto failure; + } + break; + + case RCM_STATE_SUSPEND_QUERYING: + case RCM_STATE_SUSPENDING: + /* + * This is the case of re-suspending, which applies only + * if a previous attempt failed. + */ + if ((req->state != RCM_STATE_SUSPEND_FAIL) && + (req->state != RCM_STATE_SUSPEND_QUERYING) && + (req->state != RCM_STATE_SUSPEND_QUERY) && + (req->state != RCM_STATE_SUSPEND_QUERY_FAIL) && + (req->state != RCM_STATE_SUSPEND)) { + rcm_log_message(RCM_WARNING, + gettext("%s: invalid suspending from state %d\n"), + device, req->state); + goto failure; + } + break; + + case RCM_STATE_RESUMING: + if ((req->state != RCM_STATE_SUSPEND) && + (req->state != RCM_STATE_SUSPEND_QUERYING) && + (req->state != RCM_STATE_SUSPEND_QUERY) && + (req->state != RCM_STATE_SUSPEND_QUERY_FAIL) && + (req->state != RCM_STATE_SUSPEND_FAIL)) { + rcm_log_message(RCM_DEBUG, + "%s: invalid resuming from state %d\n", + device, req->state); + goto failure; + } + break; + + case RCM_STATE_ONLINING: + if ((req->state != RCM_STATE_OFFLINE) && + (req->state != RCM_STATE_OFFLINE_QUERYING) && + (req->state != RCM_STATE_OFFLINE_QUERY) && + (req->state != RCM_STATE_OFFLINE_QUERY_FAIL) && + (req->state != RCM_STATE_OFFLINE_FAIL)) { + rcm_log_message(RCM_INFO, + gettext("%s: invalid onlining from state %d\n"), + device, req->state); + goto failure; + } + break; + + case RCM_STATE_REMOVING: + if ((req->state != RCM_STATE_OFFLINE) && + (req->state != RCM_STATE_OFFLINE_FAIL)) { + rcm_log_message(RCM_INFO, + gettext("%s: invalid removing from state %d\n"), + device, req->state); + goto failure; + } + break; + + case RCM_STATE_SUSPEND_FAIL: + assert(req->state == RCM_STATE_SUSPENDING); + break; + + case RCM_STATE_OFFLINE_FAIL: + assert(req->state == RCM_STATE_OFFLINING); + break; + + case RCM_STATE_SUSPEND: + assert(req->state == RCM_STATE_SUSPENDING); + break; + + case RCM_STATE_OFFLINE: + assert(req->state == RCM_STATE_OFFLINING); + break; + + case RCM_STATE_ONLINE: + assert((req->state == RCM_STATE_RESUMING) || + (req->state == RCM_STATE_ONLINING)); + break; + + default: /* shouldn't be here */ + rcm_log_message(RCM_ERROR, + gettext("invalid update to dr state: %d\n"), state); + return (RCM_FAILURE); + } + +update: + /* + * update the state, interval, and sequence number; sync state file + */ + req->state = state; + req->seq_num = seq_num; + + if (interval) + req->interval = *interval; + else + bzero(&req->interval, sizeof (timespec_t)); + + (void) fsync(state_fd); + return (RCM_SUCCESS); + +failure: + if (infop != NULL) { + add_busy_rsrc_to_list(req->device, req->pid, req->state, + req->seq_num, NULL, locked_info, locked_err, NULL, infop); + } + + /* + * A request may be left in a transition state because the operator + * typed ctrl-C. In this case, the daemon thread continues to run + * and will eventually put the state in a non-transitional state. + * + * To be safe, we return EAGAIN to allow librcm to loop and retry. + * If we are called from a module, loop & retry could result in a + * deadlock. The called will check for this case and turn EAGAIN + * into RCM_CONFLICT. + */ + if (transition_state(req->state)) { + return (EAGAIN); + } + + return (RCM_CONFLICT); +} + +/* + * Insert a dr entry in dr_req_list + */ +int +dr_req_add(char *rsrcname, pid_t pid, uint_t flag, int state, int seq_num, + timespec_t *interval, rcm_info_t **info) +{ + int error; + char *device; + req_t *req; + + rcm_log_message(RCM_TRACE3, "dr_req_add(%s, %ld, 0x%x, %d, %d, %p)\n", + rsrcname, pid, flag, state, seq_num, (void *)info); + + device = resolve_name(rsrcname); + if (device == NULL) + return (EINVAL); + + (void) mutex_lock(&rcm_req_lock); + + /* + * In the re-offline/suspend case, attempt to update dr request. + * + * If this succeeds, return success; + * If this fails because of a conflict, return error; + * If this this fails because no entry exists, add a new entry. + */ + error = dr_req_update_entry(device, pid, flag, state, seq_num, interval, + info); + + switch (error) { + case RCM_FAILURE: + /* proceed to add a new entry */ + break; + + case RCM_CONFLICT: + case RCM_SUCCESS: + case EAGAIN: + default: + goto out; + } + + /* + * Check for lock conflicts + */ + error = check_lock(device, flag, LOCK_FOR_DR, info); + if (error != RCM_SUCCESS) { + error = RCM_CONFLICT; + goto out; + } + + /* + * Get empty request entry, fill in values and sync state file + */ + req = get_req_entry(&dr_req_list); + + req->seq_num = seq_num; + req->pid = pid; + req->flag = flag; + req->state = state; + req->type = rsrc_get_type(device); + (void) strcpy(req->device, device); + + /* cache interval for failure recovery */ + if (interval) + req->interval = *interval; + else + bzero(&req->interval, sizeof (timespec_t)); + + (void) fsync(state_fd); + + /* + * Add initiator pid to polling list + */ + add_to_polling_list(req->pid); + +out: + (void) mutex_unlock(&rcm_req_lock); + free(device); + + return (error); +} + +/* + * Update a dr entry in dr_req_list + */ +/*ARGSUSED*/ +int +dr_req_update(char *rsrcname, pid_t pid, uint_t flag, int state, int seq_num, + rcm_info_t **info) +{ + int error; + char *device = resolve_name(rsrcname); + + rcm_log_message(RCM_TRACE3, "dr_req_update(%s, %ld, 0x%x, %d, %d)\n", + rsrcname, pid, flag, state, seq_num); + + (void) mutex_lock(&rcm_req_lock); + error = dr_req_update_entry(device, pid, flag, state, seq_num, NULL, + info); + (void) mutex_unlock(&rcm_req_lock); + free(device); + + return (error); +} + +/* + * This function scans the DR request list for the next, non-removed + * entry that is part of the specified sequence. The 'device' name + * of the entry is copied into the provided 'rsrc' buffer. + * + * The 'rsrc' buffer is required because the DR request list is only + * locked during the duration of this lookup. Giving a direct pointer + * to something in the list would be unsafe. + */ +int +dr_req_lookup(int seq_num, char *rsrc) +{ + int i; + int len; + int base = (seq_num >> SEQ_NUM_SHIFT); + int retval = RCM_FAILURE; + + if (rsrc == NULL) { + return (RCM_FAILURE); + } + + (void) mutex_lock(&rcm_req_lock); + + for (i = 0; i < dr_req_list->n_req_max; i++) { + + /* Skip removed or non-matching entries */ + if ((dr_req_list->req[i].state == RCM_STATE_REMOVE) || + ((dr_req_list->req[i].seq_num >> SEQ_NUM_SHIFT) != base)) { + continue; + } + + /* Copy the next-matching 'device' name into 'rsrc' */ + len = strlcpy(rsrc, dr_req_list->req[i].device, MAXPATHLEN); + if (len < MAXPATHLEN) { + retval = RCM_SUCCESS; + } + break; + } + + (void) mutex_unlock(&rcm_req_lock); + + return (retval); +} + +/* + * Remove a dr entry in dr_req_list + */ +void +dr_req_remove(char *rsrcname, uint_t flag) +{ + req_t *req; + char *device = resolve_name(rsrcname); + + rcm_log_message(RCM_TRACE3, "dr_req_remove(%s)\n", rsrcname); + + (void) mutex_lock(&rcm_req_lock); + + /* find entry */ + req = find_req_entry(device, flag, -1, dr_req_list); + free(device); + + if (req == NULL) { + (void) mutex_unlock(&rcm_req_lock); + rcm_log_message(RCM_WARNING, + gettext("dr_req entry %s not found\n"), rsrcname); + return; + } + + req->state = RCM_STATE_REMOVE; + dr_req_list->n_req--; + (void) fsync(state_fd); + + /* + * remove pid from polling list + */ + remove_from_polling_list(req->pid); + + /* + * We don't shrink the dr_req_list size for now. + * Shouldn't cause big memory leaks. + */ + (void) mutex_unlock(&rcm_req_lock); +} + +/* + * Return the list of ongoing dr operation requests + */ +rcm_info_t * +rsrc_dr_info() +{ + int i; + rcm_info_t *info; + rcm_info_t *result = NULL; + char *rsrc; + int len; + + rcm_log_message(RCM_TRACE2, "rsrc_dr_info()\n"); + + (void) mutex_lock(&rcm_req_lock); + for (i = 0; i < dr_req_list->n_req_max; i++) { + if (dr_req_list->req[i].state == RCM_STATE_REMOVE) + continue; + + if (dr_req_list->req[i].device[0] == '\0') + continue; + + if (dr_req_list->req[i].flag & RCM_FILESYS) { + len = strlen(dr_req_list->req[i].device) + 5; + rsrc = s_malloc(len); + (void) snprintf(rsrc, len, "%s(fs)", + dr_req_list->req[i].device); + } else { + rsrc = s_strdup(dr_req_list->req[i].device); + } + + info = s_calloc(1, sizeof (*info)); + if (errno = nvlist_alloc(&(info->info), NV_UNIQUE_NAME, 0)) { + rcm_log_message(RCM_ERROR, + gettext("failed (nvlist_alloc=%s).\n"), + strerror(errno)); + rcmd_exit(errno); + } + + if (errno = nvlist_add_string(info->info, RCM_RSRCNAME, rsrc)) { + rcm_log_message(RCM_ERROR, + gettext("failed (nvlist_add=%s).\n"), + strerror(errno)); + rcmd_exit(errno); + } + (void) free(rsrc); + + if (errno = nvlist_add_int64(info->info, RCM_CLIENT_ID, + dr_req_list->req[i].pid)) { + rcm_log_message(RCM_ERROR, + gettext("failed (nvlist_add=%s).\n"), + strerror(errno)); + rcmd_exit(errno); + } + + if (errno = nvlist_add_int32(info->info, RCM_SEQ_NUM, + dr_req_list->req[i].seq_num)) { + rcm_log_message(RCM_ERROR, + gettext("failed (nvlist_add=%s).\n"), + strerror(errno)); + rcmd_exit(errno); + } + + if (errno = nvlist_add_int32(info->info, RCM_RSRCSTATE, + dr_req_list->req[i].state)) { + rcm_log_message(RCM_ERROR, + gettext("failed (nvlist_add=%s).\n"), + strerror(errno)); + rcmd_exit(errno); + } + + if (errno = nvlist_add_string(info->info, RCM_CLIENT_INFO, + (char *)locked_info)) { + rcm_log_message(RCM_ERROR, + gettext("failed (nvlist_add=%s).\n"), + strerror(errno)); + rcmd_exit(errno); + } + + info->next = result; + result = info; + } + (void) mutex_unlock(&rcm_req_lock); + + return (result); +} + +/* + * Eliminate entries whose dr initiator is no longer running + * and recover daemon state during daemon restart. + * + * This routine is called from either during daemon initialization + * after all modules have registered resources or from the cleanup + * thread. In either case, it is the only thread running in the + * daemon. + */ +void +clean_dr_list() +{ + int i; + struct clean_list { + struct clean_list *next; + char *rsrcname; + pid_t pid; + int seq_num; + int state; + timespec_t interval; + } *tmp, *list = NULL; + char *rsrcnames[2]; + + rcm_log_message(RCM_TRACE3, + "clean_dr_list(): look for stale dr initiators\n"); + + rsrcnames[1] = NULL; + + /* + * Make a list of entries to recover. This is necessary because + * the recovery operation will modify dr_req_list. + */ + (void) mutex_lock(&rcm_req_lock); + for (i = 0; i < dr_req_list->n_req_max; i++) { + /* skip empty entries */ + if (dr_req_list->req[i].state == RCM_STATE_REMOVE) + continue; + + if (dr_req_list->req[i].device[0] == '\0') + continue; + + /* skip cascade operations */ + if (dr_req_list->req[i].seq_num & SEQ_NUM_MASK) + continue; + + /* + * In the cleanup case, ignore entries with initiators alive + */ + if ((rcmd_get_state() == RCMD_CLEANUP) && + proc_exist(dr_req_list->req[i].pid)) + continue; + + rcm_log_message(RCM_TRACE1, + "found stale entry: %s\n", dr_req_list->req[i].device); + + tmp = s_malloc(sizeof (*tmp)); + tmp->rsrcname = s_strdup(dr_req_list->req[i].device); + tmp->state = dr_req_list->req[i].state; + tmp->pid = dr_req_list->req[i].pid; + tmp->seq_num = dr_req_list->req[i].seq_num; + tmp->interval = dr_req_list->req[i].interval; + tmp->next = list; + list = tmp; + } + (void) mutex_unlock(&rcm_req_lock); + + if (list == NULL) + return; + + /* + * If everything worked normally, we shouldn't be here. + * Since we are here, something went wrong, so say something. + */ + if (rcmd_get_state() == RCMD_INIT) { + rcm_log_message(RCM_NOTICE, gettext("rcm_daemon died " + "unexpectedly, recovering previous daemon state\n")); + } else { + rcm_log_message(RCM_INFO, gettext("one or more dr initiator " + "died, attempting automatic recovery\n")); + } + + while (list) { + tmp = list; + list = tmp->next; + + switch (tmp->state) { + case RCM_STATE_OFFLINE_QUERY: + case RCM_STATE_OFFLINE_QUERY_FAIL: + rsrcnames[0] = tmp->rsrcname; + if (proc_exist(tmp->pid)) { + /* redo */ + (void) process_resource_offline(rsrcnames, + tmp->pid, RCM_QUERY, tmp->seq_num, NULL); + } else { + /* undo */ + (void) notify_resource_online(rsrcnames, + tmp->pid, 0, tmp->seq_num, NULL); + } + break; + + case RCM_STATE_OFFLINE: + case RCM_STATE_OFFLINE_FAIL: + rsrcnames[0] = tmp->rsrcname; + if (proc_exist(tmp->pid)) { + /* redo */ + (void) process_resource_offline(rsrcnames, + tmp->pid, 0, tmp->seq_num, NULL); + } else { + /* undo */ + (void) notify_resource_online(rsrcnames, + tmp->pid, 0, tmp->seq_num, NULL); + } + break; + + case RCM_STATE_SUSPEND_QUERY: + case RCM_STATE_SUSPEND_QUERY_FAIL: + rsrcnames[0] = tmp->rsrcname; + if (proc_exist(tmp->pid)) { + /* redo */ + (void) process_resource_suspend(rsrcnames, + tmp->pid, RCM_QUERY, tmp->seq_num, + &tmp->interval, NULL); + } else { + /* undo */ + (void) notify_resource_resume(rsrcnames, + tmp->pid, 0, tmp->seq_num, NULL); + } + break; + + case RCM_STATE_SUSPEND: + case RCM_STATE_SUSPEND_FAIL: + rsrcnames[0] = tmp->rsrcname; + if (proc_exist(tmp->pid)) { + /* redo */ + (void) process_resource_suspend(rsrcnames, + tmp->pid, 0, tmp->seq_num, &tmp->interval, + NULL); + } else { + /* undo */ + (void) notify_resource_resume(rsrcnames, + tmp->pid, 0, tmp->seq_num, NULL); + } + break; + + case RCM_STATE_OFFLINING: + case RCM_STATE_ONLINING: + rsrcnames[0] = tmp->rsrcname; + (void) notify_resource_online(rsrcnames, tmp->pid, 0, + tmp->seq_num, NULL); + break; + + case RCM_STATE_SUSPENDING: + case RCM_STATE_RESUMING: + rsrcnames[0] = tmp->rsrcname; + (void) notify_resource_resume(rsrcnames, tmp->pid, 0, + tmp->seq_num, NULL); + break; + + case RCM_STATE_REMOVING: + rsrcnames[0] = tmp->rsrcname; + (void) notify_resource_remove(rsrcnames, tmp->pid, 0, + tmp->seq_num, NULL); + break; + + default: + rcm_log_message(RCM_WARNING, + gettext("%s in unknown state %d\n"), + tmp->rsrcname, tmp->state); + break; + } + free(tmp->rsrcname); + free(tmp); + } +} + +/* + * Selected thread blocking based on event type + */ +barrier_t barrier; + +/* + * Change barrier state: + * RCMD_INIT - daemon is intializing, only register allowed + * RCMD_NORMAL - normal daemon processing + * RCMD_CLEANUP - cleanup thread is waiting or running + */ +int +rcmd_get_state() +{ + return (barrier.state); +} + +void +rcmd_set_state(int state) +{ + /* + * The state transition is as follows: + * INIT --> NORMAL <---> CLEANUP + * The implementation favors the cleanup thread + */ + + (void) mutex_lock(&barrier.lock); + barrier.state = state; + + switch (state) { + case RCMD_CLEANUP: + /* + * Wait for existing threads to exit + */ + barrier.wanted++; + while (barrier.thr_count != 0) + (void) cond_wait(&barrier.cv, &barrier.lock); + barrier.wanted--; + barrier.thr_count = -1; + break; + + case RCMD_INIT: + case RCMD_NORMAL: + default: + if (barrier.thr_count == -1) + barrier.thr_count = 0; + if (barrier.wanted) + (void) cond_broadcast(&barrier.cv); + break; + } + + (void) mutex_unlock(&barrier.lock); +} + +/* + * Increment daemon thread count + */ +int +rcmd_thr_incr(int cmd) +{ + int seq_num; + + (void) mutex_lock(&barrier.lock); + /* + * Set wanted flag + */ + barrier.wanted++; + + /* + * Wait till it is safe for daemon to perform the operation + * + * NOTE: if a module registers by passing a request to the + * client proccess, we may need to allow register + * to come through during daemon initialization. + */ + while (barrier.state != RCMD_NORMAL) + (void) cond_wait(&barrier.cv, &barrier.lock); + + if ((cmd == CMD_EVENT) || + (cmd == CMD_REGISTER) || + (cmd == CMD_UNREGISTER)) { + /* + * Event passthru and register ops don't need sequence number + */ + seq_num = -1; + } else { + /* + * Non register operation gets a sequence number + */ + seq_num = get_seq_number(); + } + barrier.wanted--; + barrier.thr_count++; + (void) mutex_unlock(&barrier.lock); + + if ((cmd == CMD_OFFLINE) || + (cmd == CMD_SUSPEND) || + (cmd == CMD_GETINFO)) { + /* + * For these operations, need to ask modules to + * register any new resources that came online. + * + * This is because mount/umount are not instrumented + * to register with rcm before using system resources. + * Certain registration ops may fail during sync, which + * indicates race conditions. This cannot be avoided + * without changing mount/umount. + */ + rcmd_db_sync(); + } + + return (seq_num); +} + +/* + * Decrement thread count + */ +void +rcmd_thr_decr() +{ + /* + * Decrement thread count and wake up reload/cleanup thread. + */ + (void) mutex_lock(&barrier.lock); + barrier.last_update = time(NULL); + if (--barrier.thr_count == 0) + (void) cond_broadcast(&barrier.cv); + (void) mutex_unlock(&barrier.lock); +} + +/* + * Wakeup all waiting threads as a result of SIGHUP + */ +static int sighup_received = 0; + +void +rcmd_thr_signal() +{ + (void) mutex_lock(&barrier.lock); + sighup_received = 1; + (void) cond_broadcast(&barrier.cv); + (void) mutex_unlock(&barrier.lock); +} + +void +rcmd_start_timer(int timeout) +{ + timestruc_t abstime; + + if (timeout == 0) + timeout = RCM_DAEMON_TIMEOUT; /* default to 5 minutes */ + else + dr_req_list->idle_timeout = timeout; /* persist timeout */ + + if (timeout > 0) { + abstime.tv_sec = time(NULL) + timeout; + } + + (void) mutex_lock(&barrier.lock); + for (;;) { + int idletime; + int is_active; + + if (timeout > 0) + (void) cond_timedwait(&barrier.cv, &barrier.lock, + &abstime); + else + (void) cond_wait(&barrier.cv, &barrier.lock); + + /* + * If sighup received, change timeout to 0 so the daemon is + * shut down at the first possible moment + */ + if (sighup_received) + timeout = 0; + + /* + * If timeout is negative, never shutdown the daemon + */ + if (timeout < 0) + continue; + + /* + * Check for ongoing/pending activity + */ + is_active = (barrier.thr_count || barrier.wanted || + (dr_req_list->n_req != 0)); + if (is_active) { + abstime.tv_sec = time(NULL) + timeout; + continue; + } + + /* + * If idletime is less than timeout, continue to wait + */ + idletime = time(NULL) - barrier.last_update; + if (idletime < timeout) { + abstime.tv_sec = barrier.last_update + timeout; + continue; + } + break; + } + + (void) script_main_fini(); + + rcm_log_message(RCM_INFO, gettext("rcm_daemon is shut down.\n")); + rcmd_exit(0); + /*NOTREACHED*/ +} + +/* + * Code related to polling client pid's + * Not declared as static so that we can find this structure easily + * in the core file. + */ +struct { + int n_pids; + int n_max_pids; + thread_t poll_tid; /* poll thread id */ + int signaled; + pid_t *pids; + int *refcnt; + struct pollfd *fds; + cond_t cv; /* the associated lock is rcm_req_lock */ +} polllist; + +static int +find_pid_index(pid_t pid) +{ + int i; + + for (i = 0; i < polllist.n_pids; i++) { + if (polllist.pids[i] == pid) { + return (i); + } + } + return (-1); +} + +/* + * Resize buffer for new pids + */ +static int +get_pid_index() +{ + const int n_chunk = 10; + + int n_max; + int index = polllist.n_pids; + + if (polllist.n_pids < polllist.n_max_pids) { + polllist.n_pids++; + return (index); + } + + if (polllist.n_max_pids == 0) { + n_max = n_chunk; + polllist.pids = s_calloc(n_max, sizeof (pid_t)); + polllist.refcnt = s_calloc(n_max, sizeof (int)); + polllist.fds = s_calloc(n_max, sizeof (struct pollfd)); + } else { + n_max = polllist.n_max_pids + n_chunk; + polllist.pids = s_realloc(polllist.pids, + n_max * sizeof (pid_t)); + polllist.refcnt = s_realloc(polllist.refcnt, + n_max * sizeof (int)); + polllist.fds = s_realloc(polllist.fds, + n_max * sizeof (struct pollfd)); + } + polllist.n_max_pids = n_max; + polllist.n_pids++; + return (index); +} + +/* + * rcm_req_lock must be held + */ +static void +add_to_polling_list(pid_t pid) +{ + int fd, index; + char procfile[MAXPATHLEN]; + + if (pid == (pid_t)0) + return; + + rcm_log_message(RCM_TRACE1, "add_to_polling_list(%ld)\n", pid); + + /* + * Need to stop the poll thread before manipulating the polllist + * since poll thread may possibly be using polllist.fds[] and + * polllist.n_pids. As an optimization, first check if the pid + * is already in the polllist. If it is, there is no need to + * stop the poll thread. Just increment the pid reference count + * and return; + */ + index = find_pid_index(pid); + if (index != -1) { + polllist.refcnt[index]++; + return; + } + + stop_polling_thread(); + + /* + * In an attempt to stop the poll thread we may have released + * and reacquired rcm_req_lock. So find the index again. + */ + index = find_pid_index(pid); + if (index != -1) { + polllist.refcnt[index]++; + goto done; + } + + /* + * Open a /proc file + */ + (void) sprintf(procfile, "/proc/%ld/as", pid); + if ((fd = open(procfile, O_RDONLY)) == -1) { + rcm_log_message(RCM_NOTICE, gettext("open(%s): %s\n"), + procfile, strerror(errno)); + goto done; + } + + /* + * add pid to polllist + */ + index = get_pid_index(); + polllist.pids[index] = pid; + polllist.refcnt[index] = 1; + polllist.fds[index].fd = fd; + polllist.fds[index].events = 0; + polllist.fds[index].revents = 0; + + rcm_log_message(RCM_DEBUG, "add pid %ld at index %ld\n", pid, index); + +done: + start_polling_thread(); +} + +/* + * rcm_req_lock must be held + */ +static void +remove_from_polling_list(pid_t pid) +{ + int i, index; + + if (pid == (pid_t)0) + return; + + rcm_log_message(RCM_TRACE1, "remove_from_polling_list(%ld)\n", pid); + + /* + * Need to stop the poll thread before manipulating the polllist + * since poll thread may possibly be using polllist.fds[] and + * polllist.n_pids. As an optimization, first check the pid + * reference count. If the pid reference count is greater than 1 + * there is no need to stop the polling thread. + */ + + index = find_pid_index(pid); + if (index == -1) { + rcm_log_message(RCM_NOTICE, + gettext("error removing pid %ld from polling list\n"), pid); + return; + } + + /* + * decrement the pid refcnt + */ + if (polllist.refcnt[index] > 1) { + polllist.refcnt[index]--; + return; + } + + stop_polling_thread(); + + /* + * In an attempt to stop the poll thread we may have released + * and reacquired rcm_req_lock. So find the index again. + */ + index = find_pid_index(pid); + if (index == -1) { + rcm_log_message(RCM_NOTICE, + gettext("error removing pid %ld from polling list\n"), pid); + goto done; + } + + if (--polllist.refcnt[index] > 0) + goto done; + + /* + * refcnt down to zero, delete pid from polling list + */ + (void) close(polllist.fds[index].fd); + polllist.n_pids--; + + for (i = index; i < polllist.n_pids; i++) { + polllist.pids[i] = polllist.pids[i + 1]; + polllist.refcnt[i] = polllist.refcnt[i + 1]; + bcopy(&polllist.fds[i + 1], &polllist.fds[i], + sizeof (struct pollfd)); + } + + rcm_log_message(RCM_DEBUG, "remove pid %ld at index %d\n", pid, index); + +done: + start_polling_thread(); +} + +void +init_poll_thread() +{ + polllist.poll_tid = (thread_t)-1; +} + +void +cleanup_poll_thread() +{ + (void) mutex_lock(&rcm_req_lock); + if (polllist.poll_tid == thr_self()) { + rcm_log_message(RCM_TRACE2, + "cleanup_poll_thread: n_pids = %d\n", polllist.n_pids); + polllist.poll_tid = (thread_t)-1; + (void) cond_broadcast(&polllist.cv); + } + (void) mutex_unlock(&rcm_req_lock); +} + +/*ARGSUSED*/ +static void * +pollfunc(void *arg) +{ + sigset_t mask; + + rcm_log_message(RCM_TRACE2, "poll thread started. n_pids = %d\n", + polllist.n_pids); + + /* + * Unblock SIGUSR1 to allow polling thread to be killed + */ + (void) sigemptyset(&mask); + (void) sigaddset(&mask, SIGUSR1); + (void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL); + + (void) poll(polllist.fds, polllist.n_pids, (time_t)-1); + + /* + * block SIGUSR1 to avoid being killed while holding a lock + */ + (void) sigemptyset(&mask); + (void) sigaddset(&mask, SIGUSR1); + (void) thr_sigsetmask(SIG_BLOCK, &mask, NULL); + + rcm_log_message(RCM_TRACE2, "returned from poll()\n"); + + cleanup_poll_thread(); + + (void) mutex_lock(&barrier.lock); + need_cleanup = 1; + (void) cond_broadcast(&barrier.cv); + (void) mutex_unlock(&barrier.lock); + + return (NULL); +} + +/* + * rcm_req_lock must be held + */ +void +start_polling_thread() +{ + int err; + + if (rcmd_get_state() != RCMD_NORMAL) + return; + + if (polllist.poll_tid != (thread_t)-1 || polllist.n_pids == 0) + return; + + if ((err = thr_create(NULL, 0, pollfunc, NULL, THR_DETACHED, + &polllist.poll_tid)) == 0) + polllist.signaled = 0; + else + rcm_log_message(RCM_ERROR, + gettext("failed to create polling thread: %s\n"), + strerror(err)); +} + +/* + * rcm_req_lock must be held + */ +static void +stop_polling_thread() +{ + int err; + + while (polllist.poll_tid != (thread_t)-1) { + if (polllist.signaled == 0) { + if ((err = thr_kill(polllist.poll_tid, SIGUSR1)) == 0) + polllist.signaled = 1; + else + /* + * thr_kill shouldn't have failed since the + * poll thread id and the signal are valid. + * So log an error. Since when thr_kill + * fails no signal is sent (as per man page), + * the cond_wait below will wait until the + * the poll thread exits by some other means. + * The poll thread, for example, exits on its + * own when any DR initiator process that it + * is currently polling exits. + */ + rcm_log_message(RCM_ERROR, + gettext( + "fail to kill polling thread %d: %s\n"), + polllist.poll_tid, strerror(err)); + } + (void) cond_wait(&polllist.cv, &rcm_req_lock); + } +} diff --git a/usr/src/cmd/rcm_daemon/common/rcm_main.c b/usr/src/cmd/rcm_daemon/common/rcm_main.c new file mode 100644 index 0000000000..528c7f27d2 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/rcm_main.c @@ -0,0 +1,439 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Reconfiguration Coordination Daemon + * + * Accept RCM messages in the form of RCM events and process them + * - to build and update the system resource map + * - to allow clients to register/unregister for resource + * - to allow dr initiators to offline a resource before removal + * - to call into clients to perform suspend/offline actions + * + * The goal is to enable fully automated Dynamic Reconfiguration and better + * DR information tracking. + */ + +#include <librcm_event.h> + +#include "rcm_impl.h" + +/* will run in daemon mode if debug level < DEBUG_LEVEL_FORK */ +#define DEBUG_LEVEL_FORK RCM_DEBUG + +#define DAEMON_LOCK_FILE "/var/run/rcm_daemon_lock" + +static int hold_daemon_lock; +static int daemon_lock_fd; +static const char *daemon_lock_file = DAEMON_LOCK_FILE; + +int debug_level = 0; +static int idle_timeout; +static int logflag = 0; +static char *prog; + +static void usage(void); +static void catch_sighup(void); +static void catch_sigusr1(void); +static pid_t enter_daemon_lock(void); +static void exit_daemon_lock(void); + +extern void init_poll_thread(); +extern void cleanup_poll_thread(); + +/* + * Print command line syntax for starting rcm_daemon + */ +static void +usage() { + (void) fprintf(stderr, + gettext("usage: %s [-d debug_level] [-t idle_timeout]\n"), prog); + rcmd_exit(EINVAL); +} + +/* + * common exit function which ensures releasing locks + */ +void +rcmd_exit(int status) +{ + if (status == 0) { + rcm_log_message(RCM_INFO, + gettext("rcm_daemon normal exit\n")); + } else { + rcm_log_message(RCM_ERROR, + gettext("rcm_daemon exit: errno = %d\n"), status); + } + + if (hold_daemon_lock) { + exit_daemon_lock(); + } + + exit(status); +} + +/* + * When SIGHUP is received, reload modules at the next safe moment (when + * there is no DR activity. + */ +void +catch_sighup(void) +{ + rcm_log_message(RCM_INFO, + gettext("SIGHUP received, will exit when daemon is idle\n")); + rcmd_thr_signal(); +} + +/* + * When SIGUSR1 is received, exit the thread + */ +void +catch_sigusr1(void) +{ + rcm_log_message(RCM_DEBUG, "SIGUSR1 received in thread %d\n", + thr_self()); + cleanup_poll_thread(); + thr_exit(NULL); +} + +/* + * Use an advisory lock to ensure that only one daemon process is + * active at any point in time. + */ +static pid_t +enter_daemon_lock(void) +{ + struct flock lock; + + rcm_log_message(RCM_TRACE1, + "enter_daemon_lock: lock file = %s\n", daemon_lock_file); + + daemon_lock_fd = open(daemon_lock_file, O_CREAT|O_RDWR, 0644); + if (daemon_lock_fd < 0) { + rcm_log_message(RCM_ERROR, gettext("open(%s) - %s\n"), + daemon_lock_file, strerror(errno)); + rcmd_exit(errno); + } + + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + + if (fcntl(daemon_lock_fd, F_SETLK, &lock) == 0) { + hold_daemon_lock = 1; + return (getpid()); + } + + /* failed to get lock, attempt to find lock owner */ + if ((errno == EAGAIN || errno == EDEADLK) && + (fcntl(daemon_lock_fd, F_GETLK, &lock) == 0)) { + return (lock.l_pid); + } + + /* die a horrible death */ + rcm_log_message(RCM_ERROR, gettext("lock(%s) - %s"), daemon_lock_file, + strerror(errno)); + exit(errno); + /*NOTREACHED*/ +} + +/* + * Drop the advisory daemon lock, close lock file + */ +static void +exit_daemon_lock(void) +{ + struct flock lock; + + lock.l_type = F_UNLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + + if (fcntl(daemon_lock_fd, F_SETLK, &lock) == -1) { + rcm_log_message(RCM_ERROR, gettext("unlock(%s) - %s"), + daemon_lock_file, strerror(errno)); + } + + (void) close(daemon_lock_fd); +} + +/*PRINTFLIKE2*/ +static void +rcm_log_msg_impl(int level, char *message, va_list ap) +{ + int log_level; + + if (!logflag) { + /* + * RCM_ERROR goes to stderr, others go to stdout + */ + FILE *out = (level <= RCM_ERROR) ? stderr : stdout; + (void) vfprintf(out, message, ap); + return; + } + + /* + * translate RCM_* to LOG_* + */ + switch (level) { + case RCM_ERROR: + log_level = LOG_ERR; + break; + + case RCM_WARNING: + log_level = LOG_WARNING; + break; + + case RCM_NOTICE: + log_level = LOG_NOTICE; + break; + + case RCM_INFO: + log_level = LOG_INFO; + break; + + case RCM_DEBUG: + log_level = LOG_DEBUG; + break; + + default: + /* + * Don't log RCM_TRACEn messages + */ + return; + } + + (void) vsyslog(log_level, message, ap); +} + +/* + * print error messages to the terminal or to syslog + */ +void +rcm_log_message(int level, char *message, ...) +{ + va_list ap; + + if (level > debug_level) { + return; + } + + va_start(ap, message); + rcm_log_msg_impl(level, message, ap); + va_end(ap); +} + +/* + * Print error messages to the terminal or to syslog. + * Same as rcm_log_message except that it does not check for + * level > debug_level + * allowing callers to override the global debug_level. + */ +void +rcm_log_msg(int level, char *message, ...) +{ + va_list ap; + + va_start(ap, message); + rcm_log_msg_impl(level, message, ap); + va_end(ap); +} + +/* + * grab daemon_lock and direct messages to syslog + */ +static void +detachfromtty() +{ + (void) chdir("/"); + (void) setsid(); + (void) close(0); + (void) close(1); + (void) close(2); + (void) open("/dev/null", O_RDWR, 0); + (void) dup2(0, 1); + (void) dup2(0, 2); + openlog(prog, LOG_PID, LOG_DAEMON); + logflag = 1; +} + +void +main(int argc, char **argv) +{ + int c; + pid_t pid; + extern char *optarg; + sigset_t mask; + struct sigaction act; + + (void) setlocale(LC_ALL, ""); +#ifndef TEXT_DOMAIN +#define TEXT_DOMAIN "SYS_TEST" +#endif + (void) textdomain(TEXT_DOMAIN); + + if ((prog = strrchr(argv[0], '/')) == NULL) { + prog = argv[0]; + } else { + prog++; + } + + /* + * process arguments + */ + if (argc > 3) { + usage(); + } + while ((c = getopt(argc, argv, "d:t:")) != EOF) { + switch (c) { + case 'd': + debug_level = atoi(optarg); + break; + case 't': + idle_timeout = atoi(optarg); + break; + case '?': + default: + usage(); + /*NOTREACHED*/ + } + } + + /* + * Check permission + */ + if (getuid() != 0) { + (void) fprintf(stderr, gettext("Must be root to run %s\n"), + prog); + exit(EPERM); + } + + /* + * When rcm_daemon is started by a call to librcm, it inherits file + * descriptors from the DR initiator making a call. The file + * descriptors may correspond to devices that can be removed by DR. + * Since keeping them remain opened is problematic, close everything + * but stdin/stdout/stderr. + */ + closefrom(3); + + /* + * block SIGUSR1, use it for killing specific threads + */ + (void) sigemptyset(&mask); + (void) sigaddset(&mask, SIGUSR1); + (void) thr_sigsetmask(SIG_BLOCK, &mask, NULL); + + /* + * Setup signal handlers for SIGHUP and SIGUSR1 + * SIGHUP - causes a "delayed" daemon exit, effectively the same + * as a daemon restart. + * SIGUSR1 - causes a thr_exit(). Unblocked in selected threads. + */ + act.sa_flags = 0; + act.sa_handler = catch_sighup; + (void) sigaction(SIGHUP, &act, NULL); + act.sa_handler = catch_sigusr1; + (void) sigaction(SIGUSR1, &act, NULL); + + /* + * ignore SIGPIPE so that the rcm daemon does not exit when it + * attempts to read or write from a pipe whose corresponding + * rcm script process exited. + */ + act.sa_handler = SIG_IGN; + (void) sigaction(SIGPIPE, &act, NULL); + + /* + * run in daemon mode + */ + if (debug_level < DEBUG_LEVEL_FORK) { + if (fork()) { + exit(0); + } + detachfromtty(); + } + + /* only one daemon can run at a time */ + if ((pid = enter_daemon_lock()) != getpid()) { + rcm_log_message(RCM_DEBUG, "%s pid %d already running\n", + prog, pid); + exit(EDEADLK); + } + + rcm_log_message(RCM_TRACE1, "%s started, debug level = %d\n", + prog, debug_level); + + /* + * Set daemon state to block RCM requests before rcm_daemon is + * fully initialized. See rcmd_thr_incr(). + */ + rcmd_set_state(RCMD_INIT); + + /* + * create rcm_daemon door and set permission to 0400 + */ + if (create_event_service(RCM_SERVICE_DOOR, event_service) == -1) { + rcm_log_message(RCM_ERROR, + gettext("cannot create door service: %s\n"), + strerror(errno)); + rcmd_exit(errno); + } + (void) chmod(RCM_SERVICE_DOOR, S_IRUSR); + + init_poll_thread(); /* initialize poll thread related data */ + + /* + * Initialize database by asking modules to register. + */ + rcmd_db_init(); + + /* + * Initialize locking, including lock recovery in the event of + * unexpected daemon failure. + */ + rcmd_lock_init(); + + /* + * Start accepting normal requests + */ + rcmd_set_state(RCMD_NORMAL); + + /* + * Start cleanup thread + */ + rcmd_db_clean(); + + /* + * Loop and shutdown daemon after a period of inactivity. + */ + rcmd_start_timer(idle_timeout); + /* NOTREACHED */ +} diff --git a/usr/src/cmd/rcm_daemon/common/rcm_module.h b/usr/src/cmd/rcm_daemon/common/rcm_module.h new file mode 100644 index 0000000000..86347e5a9d --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/rcm_module.h @@ -0,0 +1,143 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1999-2000 by Sun Microsystems, Inc. + * All rights reserved. + */ + +#ifndef _RCM_MODULE_H +#define _RCM_MODULE_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <librcm.h> + +/* + * Each RCM module is required to define + * + * struct rcm_mod_ops *rcm_mod_init(); + * const char *rcm_mod_info(); + * int rcm_mod_fini(); + * + * The rcm_mod_init() is always invoked when the module is loaded. It should + * return an rcm_mod_ops vector. + * + * Once the module is loaded, the regis() entry point is + * called to allow the module to inform the framework all the + * events and devices it cares about. + * + * If at any point of time, the module has no outstanding registration + * against any device, the module will be unloaded. The rcm_mod_fini() + * entry point, if defined, is always invoked before module unloading. + */ + + +/* + * ops vector: + * The ops version must have a valid version number and all function fields + * must be non-NULL. Non-conforming RCM modules are rejected. + * + * Valid ops versions are defined below. + */ + +#define RCM_MOD_OPS_V1 1 +#define RCM_MOD_OPS_V2 2 +#define RCM_MOD_OPS_VERSION RCM_MOD_OPS_V2 + +struct rcm_mod_ops { + int version; + int (*rcmop_register)(rcm_handle_t *); + int (*rcmop_unregister)(rcm_handle_t *); + int (*rcmop_get_info)(rcm_handle_t *, char *, id_t, uint_t, + char **, char **, nvlist_t *, rcm_info_t **); + int (*rcmop_request_suspend)(rcm_handle_t *, char *, id_t, + timespec_t *, uint_t, char **, rcm_info_t **); + int (*rcmop_notify_resume)(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); + int (*rcmop_request_offline)(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); + int (*rcmop_notify_online)(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); + int (*rcmop_notify_remove)(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); + /* + * Fields for version 2 and beyond + */ + int (*rcmop_request_capacity_change)(rcm_handle_t *, char *, id_t, + uint_t, nvlist_t *, char **, rcm_info_t **); + int (*rcmop_notify_capacity_change)(rcm_handle_t *, char *, id_t, + uint_t, nvlist_t *, char **, rcm_info_t **); + int (*rcmop_notify_event)(rcm_handle_t *, char *, id_t, uint_t, + char **, nvlist_t *, rcm_info_t **); +}; + +/* + * Version 1 struct for compatibility + */ +struct rcm_mod_ops_v1 { + int version; + int (*rcmop_register)(rcm_handle_t *); + int (*rcmop_unregister)(rcm_handle_t *); + int (*rcmop_get_info)(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); + int (*rcmop_request_suspend)(rcm_handle_t *, char *, id_t, + timespec_t *, uint_t, char **, rcm_info_t **); + int (*rcmop_notify_resume)(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); + int (*rcmop_request_offline)(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); + int (*rcmop_notify_online)(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); + int (*rcmop_notify_remove)(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +}; + +/* + * RCM modules should use rcm_log_message() instead of syslog(). + * This allows the daemon to control the amount of message to be + * printed and to redirect output to screen for debugging purposes. + */ + +/* message levels for rcm_log_message */ + +#define RCM_ERROR 0 /* error message */ +#define RCM_WARNING 1 +#define RCM_NOTICE 2 +#define RCM_INFO 3 + /* 4 is not used for now */ +#define RCM_DEBUG 5 /* debug message */ +#define RCM_TRACE1 6 /* tracing message */ +#define RCM_TRACE2 7 +#define RCM_TRACE3 8 +#define RCM_TRACE4 9 + +extern void rcm_log_message(int, char *, ...); + +#ifdef __cplusplus +} +#endif + +#endif /* _RCM_MODULE_H */ diff --git a/usr/src/cmd/rcm_daemon/common/rcm_script.c b/usr/src/cmd/rcm_daemon/common/rcm_script.c new file mode 100644 index 0000000000..094bc6288e --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/rcm_script.c @@ -0,0 +1,2629 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * rcm scripting module: + * + * This module implements rcm scripting interfaces. + * It translates rcm module based interfaces to rcm script based + * interfaces. + * + * Entry points: + * + * int script_main_init() + * Initialize the rcm scripting framework. + * Called during the rcm daemon initialization + * + * int script_main_fini() + * Called at the time of the rcm daemon exit. + * + * struct rcm_mod_ops *script_init(module_t *module) + * Initialize the given script. + * module->name contains the name of the script. + * Called at the time of loading scripts. + * Semantics are similar to module init. + * + * char *script_info(module_t *module) + * Called when the rcm daemon wishes to get the script information. + * module->name contains the name of the script. + * Semantics are similar to module info. + * + * int script_fini(module_t *module) + * Called before removing the script. + * module->name contains the name of the script. + * Semantics are similar to module fini. + * + * In addition to the above entry points rcm_mod_ops structure contains + * the other entry points. A pointer to this structure is returned when + * script_init() is called. + */ + +#include "rcm_impl.h" +#include "rcm_script_impl.h" +#include <sys/resource.h> +#include <procfs.h> +#include <sys/proc.h> +#include <ctype.h> + +/* + * All rcm scripting commands are enumerated here. + * NOTE: command positions in script_cmd_id_t and script_cmd_name must match. + */ +typedef enum { + C_SCRIPTINFO, + C_RESOURCEINFO, + C_REGISTER, + C_QUERYREMOVE, + C_PREREMOVE, + C_POSTREMOVE, + C_UNDOREMOVE, + C_QUERYCAPACITY, + C_PRECAPACITY, + C_POSTCAPACITY, + C_QUERYSUSPEND, + C_PRESUSPEND, + C_POSTRESUME, + C_CANCELSUSPEND +} script_cmd_id_t; + +/* NOTE: command positions in script_cmd_id_t and script_cmd_name must match */ +static char *script_cmd_name[] = { + "scriptinfo", + "resourceinfo", + "register", + "queryremove", + "preremove", + "postremove", + "undoremove", + "querycapacity", + "precapacity", + "postcapacity", + "querysuspend", + "presuspend", + "postresume", + "cancelsuspend", + NULL +}; + +/* + * All rcm scripting data items are enumerated here. + * NOTE: data item positions in script_data_item_id_t and + * script_data_item_name must match. + */ +typedef enum { + D_SCRIPT_VERSION, + D_SCRIPT_FUNC_INFO, + D_CMD_TIMEOUT, + D_RESOURCE_NAME, + D_RESOURCE_USAGE_INFO, + D_FAILURE_REASON, + D_LOG_ERR, + D_LOG_WARN, + D_LOG_INFO, + D_LOG_DEBUG +} script_data_item_id_t; + +/* + * NOTE: data item positions in script_data_item_id_t and + * script_data_item_name must match. + */ +static const char *script_data_item_name[] = { + "rcm_script_version", + "rcm_script_func_info", + "rcm_cmd_timeout", + "rcm_resource_name", + "rcm_resource_usage_info", + "rcm_failure_reason", + "rcm_log_err", + "rcm_log_warn", + "rcm_log_info", + "rcm_log_debug", + NULL +}; + +/* + * Maximum number of rcm scripts that can run in parallel. + * RCM daemon has no limit on the number of scripts supported. But + * at most it runs script_max_parallelism number of scripts in parallel. + * For each running script rcm daemon consumes two file descriptors + * in order to communicate with the script via pipes. + * So maximum number of file descriptor entries consumed by rcm daemon + * on behalf of rcm scripts is "script_max_parallelism * 2" + */ +static const int script_max_parallelism = 64; + +/* + * semaphore to limit the number of rcm script processes running in + * parallel to script_max_parallelism. + */ +static sema_t script_process_sema; + +/* mutex to protect the any global data */ +static mutex_t script_lock; + +/* contains head to a queue of script_info structures */ +static rcm_queue_t script_info_q; + +/* + * This mmapped state file is used to store the process id and + * rcm script name of all currently running rcm scripts. + */ +static const char *script_ps_state_file = "/var/run/rcm_script_state"; +static state_file_descr_t script_ps_statefd; + +static char *script_env_noforce = "RCM_ENV_FORCE=FALSE"; +static char *script_env_force = "RCM_ENV_FORCE=TRUE"; +static char *script_env_interval = "RCM_ENV_INTERVAL=%ld"; + +#define RSCR_TRACE RCM_TRACE1 + +/* rcm script base environment */ +static char *script_env[MAX_ENV_PARAMS]; + +struct rlimit file_limit; + +/* function prototypes */ +static void build_env(void); +static void copy_env(char *[], char *[]); +static void open_state_file(const char *, state_file_descr_t *, size_t, int, + uint32_t); +static void truncate_state_file(state_file_descr_t *); +static void close_state_file(const char *, state_file_descr_t *); +static void grow_state_file(state_file_descr_t *); +static void *get_state_element(state_file_descr_t *, int, int *); +static void *allocate_state_element(state_file_descr_t *, int *); +static void free_state_element(void *); +static void script_ps_state_file_kill_pids(void); +static void script_ps_state_file_add_entry(pid_t, char *); +static void script_ps_state_file_remove_entry(pid_t); +static int dname_to_id(char *); +static void script_process_sema_wait(void); +static int run_script(script_info_t *, char *[], char *[], char **); +static int get_line(int fd, char *, char *, int, size_t *, time_t, int *); +static void script_exited(script_info_t *); +static int kill_pid(pid_t); +static void kill_script(script_info_t *); +static char *flags_to_name(int, char *, int); +static void fill_argv(script_info_t *, char *[], char *); +static void *read_stderr(script_info_t *); +static int process_dataitem(script_info_t *, int, char *, char **); +static int do_cmd(script_info_t *, char *[], char *[], char **); +static int do_script_info(script_info_t *); +static int do_dr(script_info_t *, char *[], char *[], char **); +static int script_get_info(rcm_handle_t *, char *, pid_t, uint_t, char **, + char **, nvlist_t *, rcm_info_t **); +static void add_for_unregister(script_info_t *); +static void remove_from_unregister(script_info_t *, char *); +static void complete_unregister(script_info_t *); +static int script_register_interest(rcm_handle_t *); +static void add_drreq(script_info_t *, char *); +static void remove_drreq(script_info_t *, char *); +static void remove_drreq_all(script_info_t *); +static int script_request_offline(rcm_handle_t *, char *, pid_t, uint_t, + char **, rcm_info_t **); +static int script_notify_online(rcm_handle_t *, char *, pid_t, uint_t, + char **, rcm_info_t **); +static int script_notify_remove(rcm_handle_t *, char *, pid_t, uint_t, + char **, rcm_info_t **); +static int script_request_suspend(rcm_handle_t *, char *, pid_t, timespec_t *, + uint_t, char **, rcm_info_t **); +static int script_notify_resume(rcm_handle_t *, char *, pid_t, uint_t, + char **, rcm_info_t **); +static capacity_descr_t *get_capacity_descr(char *); +static int build_env_for_capacity(script_info_t *, char *, uint_t, nvlist_t *, + char *[], int *, char **); +static int script_request_capacity_change(rcm_handle_t *, char *, pid_t, + uint_t, nvlist_t *, char **, rcm_info_t **); +static int script_notify_capacity_change(rcm_handle_t *, char *, pid_t, + uint_t, nvlist_t *, char **, rcm_info_t **); +static void log_msg(script_info_t *, int, char *); +static char *dup_err(int, char *, ...); +static void rcmscript_snprintf(char **, int *, char **, char *, ...); +static char *rcmscript_strdup(char *); +static void *rcmscript_malloc(size_t); +static void *rcmscript_calloc(size_t, size_t); + + +static struct rcm_mod_ops script_ops = +{ + RCM_MOD_OPS_VERSION, + script_register_interest, /* register */ + script_register_interest, /* unregister */ + script_get_info, + script_request_suspend, + script_notify_resume, + script_request_offline, + script_notify_online, + script_notify_remove, + script_request_capacity_change, + script_notify_capacity_change, + NULL +}; + +/* + * Messages fall into two categories: + * framework messages (MF_..) + * errors directly attributable to scripts (MS_..) + */ +#define MF_MEMORY_ALLOCATION_ERR \ + gettext("rcm: failed to allocate memory: %1$s\n") +#define MF_STATE_FILE_ERR \ + gettext("rcm: state file error: %1$s: %2$s\n") +#define MF_FUNC_CALL_ERR \ + gettext("rcm: %1$s: %2$s\n") +#define MF_NV_ERR \ + gettext("rcm: required name-value parameters missing (%1$s)\n") +#define MF_UNKNOWN_RSRC_ERR \ + gettext("rcm: unknown resource name %1$s (%2$s)\n") +#define MS_REGISTER_RSRC_ERR \ + gettext("rcm script %1$s: failed to register %2$s\n") +#define MS_REGISTER_ERR \ + gettext("rcm script %1$s: register: %2$s\n") +#define MS_SCRIPTINFO_ERR \ + gettext("rcm script %1$s: scriptinfo: %2$s\n") +#define MS_PROTOCOL_ERR \ + gettext("rcm script %1$s: scripting protocol error\n") +#define MS_TIMEOUT_ERR \ + gettext("rcm script %1$s: timeout error\n") +#define MS_UNSUPPORTED_VER \ + gettext("rcm script %1$s: unsupported version %2$d\n") +#define MS_SCRIPT_ERR \ + gettext("rcm script %1$s: error: %2$s\n") +#define MS_UNKNOWN_ERR \ + gettext("rcm script %1$s: unknown error\n") +#define MS_LOG_MSG \ + gettext("rcm script %1$s: %2$s\n") + + +/* + * Initialize rcm scripting framework. + * Called during initialization of rcm daemon. + */ +int +script_main_init(void) +{ +#define PS_STATE_FILE_CHUNK_SIZE 32 + + /* set base script environment */ + build_env(); + + rcm_init_queue(&script_info_q); + + /* + * Initialize the semaphore to limit the number of rcm script + * process running in parallel to script_max_parallelism. + */ + (void) sema_init(&script_process_sema, script_max_parallelism, + USYNC_THREAD, NULL); + + (void) mutex_init(&script_lock, USYNC_THREAD, NULL); + + /* save original file limit */ + (void) getrlimit(RLIMIT_NOFILE, &file_limit); + + open_state_file(script_ps_state_file, &script_ps_statefd, + sizeof (ps_state_element_t), + PS_STATE_FILE_CHUNK_SIZE, + PS_STATE_FILE_VER); + + /* + * If any pids exist in the ps state file since the last incarnation of + * the rcm daemon, kill the pids. + * On a normal daemon exit no pids should exist in the ps state file. + * But on an abnormal daemon exit pids may exist in the ps state file. + */ + if (script_ps_statefd.state_file) { + script_ps_state_file_kill_pids(); + truncate_state_file(&script_ps_statefd); + } + + return (0); +} + +/* + * Do any cleanup. + * Called at the time of normal rcm daemon exit. + */ +int +script_main_fini(void) +{ + script_ps_state_file_kill_pids(); + close_state_file(script_ps_state_file, &script_ps_statefd); + return (0); +} + +/* + * Initialize the given rcm script. + * module->name contains the name of the rcm script. + */ +struct rcm_mod_ops * +script_init(module_t *module) +{ + script_info_t *rsi; + size_t len; + char *script_path; + + rcm_log_message(RSCR_TRACE, "script_init: script name = %s\n", + module->name); + + module->rsi = NULL; + + if ((script_path = rcm_get_script_dir(module->name)) == NULL) + return (NULL); + + len = strlen(script_path) + strlen(module->name) + 2; + + /* calloc also zeros the contents */ + rsi = (script_info_t *)rcmscript_calloc(1, sizeof (script_info_t)); + rsi->script_full_name = (char *)rcmscript_calloc(1, len); + + rsi->module = module; + rcm_init_queue(&rsi->drreq_q); + + (void) mutex_init(&rsi->channel_lock, USYNC_THREAD, NULL); + + (void) snprintf(rsi->script_full_name, len, "%s%s", script_path, + module->name); + rsi->script_name = strrchr(rsi->script_full_name, '/') + 1; + + (void) mutex_lock(&rsi->channel_lock); + + rsi->cmd_timeout = -1; /* don't time scriptinfo command */ + if (do_script_info(rsi) == RCM_SUCCESS) { + /* + * if the script hasn't specified a timeout value set it to + * default + */ + if (rsi->cmd_timeout == -1) + rsi->cmd_timeout = SCRIPT_CMD_TIMEOUT; + (void) mutex_unlock(&rsi->channel_lock); + + /* put rsi on script_info_q */ + (void) mutex_lock(&script_lock); + rcm_enqueue_tail(&script_info_q, &rsi->queue); + (void) mutex_unlock(&script_lock); + + module->rsi = rsi; + return (&script_ops); + } + + (void) mutex_unlock(&rsi->channel_lock); + + free(rsi->script_full_name); + free(rsi); + return (NULL); +} + +/* + * Returns a string describing the script's functionality. + * module->name contains the name of the rcm script for which information + * is requested. + */ +char * +script_info(module_t *module) +{ + script_info_t *rsi = module->rsi; + + rcm_log_message(RSCR_TRACE, "script_info: script name = %s\n", + rsi->script_name); + return (rsi->func_info_buf); +} + +/* + * Called before unloading the script. + * module->name contains the name of the rcm script which is being unloaded. + * Do any cleanup. + */ +int +script_fini(module_t *module) +{ + script_info_t *rsi = module->rsi; + + rcm_log_message(RSCR_TRACE, "script_fini: script name = %s\n", + rsi->script_name); + + /* remove rsi from script_info_q */ + (void) mutex_lock(&script_lock); + rcm_dequeue(&rsi->queue); + (void) mutex_unlock(&script_lock); + + remove_drreq_all(rsi); + + if (rsi->func_info_buf) + free(rsi->func_info_buf); + + free(rsi->script_full_name); + free(rsi); + + module->rsi = NULL; + + return (RCM_SUCCESS); +} + +/* build base environment for scripts */ +static void +build_env(void) +{ + const char *env_list[] = { "LANG", "LC_COLLATE", "LC_CTYPE", + "LC_MESSAGES", "LC_MONETARY", "LC_NUMERIC", "LC_TIME", + "LC_ALL", "TZ", NULL }; + char *x; + int len; + int i, j = 0; + int d; + extern int debug_level; + + script_env[j++] = rcmscript_strdup("PATH=/usr/sbin:/usr/bin"); + + for (i = 0; env_list[i] != NULL; i++) { + x = getenv(env_list[i]); + if (x) { + len = strlen(env_list[i]) + strlen(x) + 2; + script_env[j] = (char *)rcmscript_malloc(len); + + (void) snprintf(script_env[j++], len, "%s=%s", + env_list[i], x); + } + } + + len = strlen("RCM_ENV_DEBUG_LEVEL") + 3; + script_env[j] = (char *)rcmscript_malloc(len); + + if (debug_level < 0) + d = 0; + else if (debug_level > 9) + d = 9; + else + d = debug_level; + + (void) snprintf(script_env[j++], len, "RCM_ENV_DEBUG_LEVEL=%d", d); + + script_env[j] = NULL; +} + +static void +copy_env(char *src[], char *dst[]) +{ + int i; + + for (i = 0; src[i] != NULL; i++) + dst[i] = src[i]; + + dst[i] = NULL; +} + +/* + * Open (or create if the file does not exist) the given state file + * and mmap it. + */ +static void +open_state_file(const char *filename, + state_file_descr_t *statefd, + size_t element_size, + int chunk_size, + uint32_t version) +{ + struct stat stats; + int error_num; + + if ((statefd->fd = open(filename, O_CREAT|O_RDWR, 0600)) == + -1) { + error_num = errno; + rcm_log_message(RCM_ERROR, MF_STATE_FILE_ERR, + "open", strerror(error_num)); + rcmd_exit(error_num); + /*NOTREACHED*/ + } + + if (fstat(statefd->fd, &stats) != 0) { + error_num = errno; + rcm_log_message(RCM_ERROR, MF_STATE_FILE_ERR, + "fstat", strerror(error_num)); + rcmd_exit(error_num); + /*NOTREACHED*/ + } + + if (stats.st_size != 0) { + /* LINTED */ + statefd->state_file = (state_file_t *)mmap(NULL, + stats.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, + statefd->fd, 0); + + if (statefd->state_file == MAP_FAILED) { + error_num = errno; + rcm_log_message(RCM_ERROR, MF_STATE_FILE_ERR, + "mmap", strerror(error_num)); + rcmd_exit(error_num); + /*NOTREACHED*/ + } + + if (statefd->state_file->version != version) { + (void) munmap((void *)statefd->state_file, + stats.st_size); + statefd->state_file = NULL; + (void) ftruncate(statefd->fd, 0); + } + } else { + statefd->state_file = NULL; + } + + statefd->version = version; + statefd->element_size = sizeof (state_element_t) + + RSCR_ROUNDUP(element_size, 8); + statefd->chunk_size = chunk_size; + statefd->index = 0; +} + +static void +truncate_state_file(state_file_descr_t *statefd) +{ + size_t size; + + if (statefd->state_file) { + size = sizeof (state_file_t) + statefd->element_size * + statefd->state_file->max_elements; + + (void) munmap((void *)statefd->state_file, size); + statefd->state_file = NULL; + } + (void) ftruncate(statefd->fd, 0); +} + +static void +close_state_file(const char *filename, state_file_descr_t *statefd) +{ + truncate_state_file(statefd); + (void) close(statefd->fd); + (void) unlink(filename); +} + +/* + * Grow the state file by the chunk size specified in statefd + * and mmap it. + */ +static void +grow_state_file(state_file_descr_t *statefd) +{ + size_t size; + int max_elements; + int error_num; + + max_elements = statefd->chunk_size; + if (statefd->state_file) + max_elements += statefd->state_file->max_elements; + + size = sizeof (state_file_t) + + statefd->element_size * max_elements; + + if (ftruncate(statefd->fd, size) != 0) { + error_num = errno; + rcm_log_message(RCM_ERROR, MF_STATE_FILE_ERR, + "ftruncate", strerror(error_num)); + rcmd_exit(error_num); + /*NOTREACHED*/ + } + + /* LINTED */ + statefd->state_file = (state_file_t *)mmap(NULL, size, + PROT_READ|PROT_WRITE, MAP_SHARED, statefd->fd, 0); + + if (statefd->state_file == MAP_FAILED) { + error_num = errno; + rcm_log_message(RCM_ERROR, MF_STATE_FILE_ERR, + "mmap", strerror(error_num)); + rcmd_exit(error_num); + /*NOTREACHED*/ + } + + statefd->index = statefd->state_file->max_elements; + statefd->state_file->max_elements = max_elements; + statefd->state_file->version = statefd->version; +} + +/* + * Given index into state element array, get the pointer to the actual + * state element. + * If flag is non-null set *flag to + * TRUE if the state element is currently is use. + * FALSE if the state element is free. + */ +static void * +get_state_element(state_file_descr_t *statefd, int index, int *flag) +{ + char *ptr; + + if (statefd->state_file && + (index < statefd->state_file->max_elements)) { + + ptr = (char *)(statefd->state_file); + ptr += sizeof (state_file_t) + + index * statefd->element_size; + + if (flag) { + *flag = (((state_element_t *)((void *)ptr))->flags & + STATE_ELEMENT_IN_USE) ? 1 : 0; + } + + ptr += sizeof (state_element_t); + } else + ptr = NULL; + + return ((void *)ptr); +} + +/* + * Allocate a state element entry in the state file and return a pointer + * to the allocated entry. + * If index is non-null set *index to index into the state element array + * of the allocated entry. + */ +static void * +allocate_state_element(state_file_descr_t *statefd, int *index) +{ + void *x; + int i; + int flag; + + if (statefd->state_file) { + /* find an empty slot */ + for (i = 0; i < statefd->state_file->max_elements; i++) { + x = get_state_element(statefd, statefd->index, + &flag); + assert(x != NULL); + + if (flag == 0) + /* entry is free */ + break; + + statefd->index++; + if (statefd->index >= statefd->state_file->max_elements) + statefd->index = 0; + } + } + + if (statefd->state_file == NULL || + i == statefd->state_file->max_elements) { + + /* All entries are in use. Grow the list */ + grow_state_file(statefd); + x = get_state_element(statefd, statefd->index, &flag); + assert(flag == 0); + } + + if (index != NULL) + *index = statefd->index; + + statefd->index++; + if (statefd->index >= statefd->state_file->max_elements) + statefd->index = 0; + + ((state_element_t *)x - 1)->flags |= STATE_ELEMENT_IN_USE; + return (x); +} + +static void +free_state_element(void *x) +{ + ((state_element_t *)x - 1)->flags &= ~STATE_ELEMENT_IN_USE; +} + +/* + * Kill the pids contained in ps state file. + */ +static void +script_ps_state_file_kill_pids(void) +{ + ps_state_element_t *x; + char procfile[80]; + psinfo_t psi; + int fd, i, flag; + + /* LINTED */ + for (i = 0; 1; i++) { + if ((x = (ps_state_element_t *)get_state_element( + &script_ps_statefd, i, &flag)) == NULL) + break; + + if (flag == 1) { /* the entry is in use */ + (void) snprintf(procfile, 80, "/proc/%ld/psinfo", + (long)x->pid); + if ((fd = open(procfile, O_RDONLY)) != -1 && + read(fd, &psi, sizeof (psi)) == sizeof (psi) && + strcmp(psi.pr_fname, + x->script_name) == 0) { + + (void) close(fd); + + /* + * just a safety check to not to blow up + * system processes if the file is ever corrupt + */ + if (x->pid > 1) { + rcm_log_message(RCM_DEBUG, + "script_ps_state_file_kill_pids: " + "killing script_name = %s pid = %ld\n", + x->script_name, x->pid); + + /* kill the process group */ + (void) kill(-(x->pid), SIGKILL); + } + } else { + if (fd != -1) + (void) close(fd); + } + free_state_element((void *)x); + } + } +} + +/* + * Add a state element entry to ps state file. + */ +static void +script_ps_state_file_add_entry(pid_t pid, char *script_name) +{ + ps_state_element_t *x; + + (void) mutex_lock(&script_lock); + + x = (ps_state_element_t *)allocate_state_element( + &script_ps_statefd, NULL); + + x->pid = pid; + (void) strlcpy(x->script_name, script_name, MAXNAMELEN); + + (void) fsync(script_ps_statefd.fd); + + (void) mutex_unlock(&script_lock); +} + +/* + * Remove the state element entry corresponding to pid from the + * ps state file. + */ +static void +script_ps_state_file_remove_entry(pid_t pid) +{ + ps_state_element_t *x; + int flag, i; + + (void) mutex_lock(&script_lock); + + /* LINTED */ + for (i = 0; 1; i++) { + if ((x = (ps_state_element_t *)get_state_element( + &script_ps_statefd, i, &flag)) == NULL) + break; + + /* if the state element entry is in use and pid matches */ + if (flag == 1 && x->pid == pid) { + free_state_element((void *)x); + break; + } + } + + (void) mutex_unlock(&script_lock); +} + +/* + * Get data item id given data item name + */ +static int +dname_to_id(char *dname) +{ + int i; + + for (i = 0; script_data_item_name[i] != NULL; i++) { + if (strcmp(dname, script_data_item_name[i]) == 0) + return (i); + } + + return (-1); +} + +/* + * Called before running any script. + * This routine waits until the number of script processes running in + * parallel drops down below to script_max_parallelism. + */ +static void +script_process_sema_wait(void) +{ + int error_num; + + /* LINTED */ + while (1) { + if (sema_wait(&script_process_sema) == 0) + return; + + if (errno != EINTR && errno != EAGAIN) { + error_num = errno; + rcm_log_message(RCM_ERROR, MF_FUNC_CALL_ERR, + "sema_wait", strerror(error_num)); + rcmd_exit(error_num); + /*NOTREACHED*/ + } + } + + /*NOTREACHED*/ +} + +/* + * Fork and execute the script. + */ +static int +run_script(script_info_t *rsi, char *argv[], char *envp[], char **errmsg) +{ + int i, p1 = -1, p2 = -1; + struct rlimit rlp; + struct stat stats; + + rcm_log_message(RSCR_TRACE, "run_script: script name = %s\n", + rsi->script_full_name); + + for (i = 0; argv[i] != NULL; i++) + rcm_log_message(RSCR_TRACE, "run_script: argv[%d] = %s\n", + i, argv[i]); + + *errmsg = NULL; + + /* check that the script exists */ + if (stat(rsi->script_full_name, &stats) != 0) + goto error; + + /* + * If the syscall pipe fails because of reaching the max open file + * count per process then dynamically increase the limit on the max + * open file count. + * + * At present the rcm_daemon consumes file descriptor + * entries for the following files. + * RCM_STATE_FILE - /var/run/rcm_daemon_state + * DAEMON_LOCK_FILE - /var/run/rcm_daemon_lock + * RCM_SERVICE_DOOR - /var/run/rcm_daemon_door + * proc files in the format "/proc/pid/as" for each pid + * communicating with the rcm_daemon via doors + * dlopen for each rcm module + * When in daemon mode stdin, stdout and stderr are closed; + * /dev/null opened and duped to stdout, and stderr + * openlog + * Some files which are opened briefly and closed such as + * directory files. + * Two file descriptors for each script in running state. + * Note that the constant script_max_parallelism sets an + * upper cap on how many rcm scripts can run in + * parallel. + */ + if ((p1 = pipe(rsi->pipe1)) == -1 || (p2 = pipe(rsi->pipe2)) == -1) { + if ((errno == EMFILE) && + (getrlimit(RLIMIT_NOFILE, &rlp) == 0)) { + + rlp.rlim_cur += 16; + if (rlp.rlim_max < rlp.rlim_cur) + rlp.rlim_max = rlp.rlim_cur; + (void) setrlimit(RLIMIT_NOFILE, &rlp); + + if (p1 == -1) { + if ((p1 = pipe(rsi->pipe1)) == -1) + goto error; + } + if ((p2 = pipe(rsi->pipe2)) == -1) + goto error; + } else + goto error; + } + +forkagain: + if ((rsi->pid = fork1()) == (pid_t)-1) { + if (errno == EINTR || errno == EAGAIN) + goto forkagain; + + goto error; + } + + if (rsi->pid == 0) { + /* child process */ + + (void) setsid(); + + /* close stdin, stdout and stderr */ + (void) close(0); + (void) close(1); + (void) close(2); + + /* set stdin to /dev/null */ + (void) open("/dev/null", O_RDWR, 0); + + /* redirect stdout and stderr to pipe */ + (void) dup2(rsi->pipe1[CHILD_END_OF_PIPE], 1); + (void) dup2(rsi->pipe2[CHILD_END_OF_PIPE], 2); + + /* close all other file descriptors */ + closefrom(3); + + /* restore original file limit */ + (void) setrlimit(RLIMIT_NOFILE, &file_limit); + + /* set current working dir */ + if (stats.st_uid == 0) { + /* root */ + if (chdir("/var/run") == -1) + _exit(127); + } else { + if (chdir("/tmp") == -1) + _exit(127); + } + + /* + * setuid sets real, effective and saved user ids to the + * given id. + * setgid sets real, effective and saved group ids to the + * given id. + */ + (void) setgid(stats.st_gid); + (void) setuid(stats.st_uid); + + (void) execve(rsi->script_full_name, argv, envp); + _exit(127); + /*NOTREACHED*/ + } + + (void) close(rsi->pipe1[CHILD_END_OF_PIPE]); + (void) close(rsi->pipe2[CHILD_END_OF_PIPE]); + + script_ps_state_file_add_entry(rsi->pid, rsi->script_name); + + return (0); + +error: + *errmsg = dup_err(RCM_ERROR, MS_SCRIPT_ERR, + rsi->script_name, strerror(errno)); + + if (p1 != -1) { + (void) close(rsi->pipe1[PARENT_END_OF_PIPE]); + (void) close(rsi->pipe1[CHILD_END_OF_PIPE]); + } + + if (p2 != -1) { + (void) close(rsi->pipe2[PARENT_END_OF_PIPE]); + (void) close(rsi->pipe2[CHILD_END_OF_PIPE]); + } + + return (-1); +} + +/* + * Reads one line of input (including the newline character) from the + * given file descriptor "fd" to buf. + * maxbuflen specifies the size of memory allocated for buf. + * Timeoutval is the max timeout value in seconds for the script to supply + * input. A timeoutval of 0 implies no timeout. + * + * Upon return *buflen contains the number of bytes read. + * + * Return values: + * 0 success + * -1 an error occured + * -2 timeout occurred + * -3 script exited + */ +static int +get_line(int fd, + char *fdname, + char *buf, + int maxbuflen, + size_t *buflen, + time_t timeoutval, + int *error_num) +{ + char c = '\0'; + struct pollfd fds[1]; + int x; + size_t len = 0; + char *ptr; + int timeit; + time_t deadline; + int rval = 0; + + if (timeoutval) { + timeit = TRUE; + deadline = time(NULL) + timeoutval; + fds[0].fd = fd; + fds[0].events = POLLIN; + } else + timeit = FALSE; + + ptr = buf; + + while (c != '\n' && len < (maxbuflen -1)) { + if (timeit) { +pollagain: + fds[0].revents = 0; + timeoutval = deadline - time(NULL); + if (timeoutval <= 0) { + rval = -2; + break; + } + x = poll(fds, 1, timeoutval*1000); + if (x <= 0) { + if (x == 0) + /* poll timedout */ + rval = -2; + else { + if (errno == EINTR || errno == EAGAIN) + goto pollagain; + *error_num = errno; + rval = -1; + } + break; + } + } +readagain: + if ((x = read(fd, &c, 1)) != 1) { + if (x == 0) + /* + * Script exited. Or more specifically the + * script has closed its end of the pipe. + */ + rval = -3; + else { + if (errno == EINTR || errno == EAGAIN) + goto readagain; + *error_num = errno; + rval = -1; + } + break; + } + + *ptr++ = c; + len++; + } + + *ptr = '\0'; + *buflen = len; + + rcm_log_message(RSCR_TRACE, + "get_line(%s): rval = %d buflen = %d line = %s\n", + fdname, rval, *buflen, buf); + return (rval); +} + +static void +script_exited(script_info_t *rsi) +{ + if (rsi->flags & STDERR_THREAD_CREATED) { + rcm_log_message(RSCR_TRACE, + "script_exited: doing thr_join (%s)\n", rsi->script_name); + (void) thr_join(rsi->tid, NULL, NULL); + rsi->flags &= ~STDERR_THREAD_CREATED; + } + + (void) close(rsi->pipe1[PARENT_END_OF_PIPE]); + (void) close(rsi->pipe2[PARENT_END_OF_PIPE]); + rsi->pipe1[PARENT_END_OF_PIPE] = -1; + rsi->pipe2[PARENT_END_OF_PIPE] = -1; + + script_ps_state_file_remove_entry(rsi->pid); + rsi->pid = 0; + (void) sema_post(&script_process_sema); +} + +/* + * Kill the specified process group + */ +static int +kill_pid(pid_t pid) +{ + time_t deadline, timeleft; + int child_status; + + /* kill the entire process group */ + (void) kill(-(pid), SIGKILL); + + /* give some time for the script to be killed */ + deadline = time(NULL) + SCRIPT_KILL_TIMEOUT; + do { + if (waitpid(pid, &child_status, WNOHANG) == pid) + return (0); + + /* wait for 100 ms */ + (void) poll(NULL, 0, 100); + + timeleft = deadline - time(NULL); + } while (timeleft > 0); + + /* script process was not killed successfully */ + return (-1); +} + +/* + * Kill the specified script. + */ +static void +kill_script(script_info_t *rsi) +{ + if (rsi->pid > 1) { + (void) kill_pid(rsi->pid); + script_exited(rsi); + remove_drreq_all(rsi); + } +} + +/* + * Convert rcm flags parameter to a string. + * Used for debug prints. + */ +static char * +flags_to_name(int flags, char *buf, int maxbuflen) +{ + (void) snprintf(buf, maxbuflen, "%s%s", + (flags & RCM_QUERY) ? "RCM_QUERY " : "", + (flags & RCM_FORCE) ? "RCM_FORCE" : ""); + + return (buf); +} + +static void +fill_argv(script_info_t *rsi, char *argv[], char *resource_name) +{ + argv[0] = rsi->script_full_name; + argv[1] = script_cmd_name[rsi->cmd]; + if (resource_name) { + argv[2] = resource_name; + argv[3] = NULL; + } else + argv[2] = NULL; +} + +/* + * stderr thread: + * Reads stderr and logs to syslog. + * Runs as a separate thread. + */ +static void * +read_stderr(script_info_t *rsi) +{ + char buf[MAX_LINE_LEN]; + size_t buflen; + int error_num; + + while ((get_line(rsi->pipe2[PARENT_END_OF_PIPE], "stderr", + buf, MAX_LINE_LEN, &buflen, 0, &error_num)) == 0) { + log_msg(rsi, RCM_ERROR, buf); + } + + if (buflen) + log_msg(rsi, RCM_ERROR, buf); + + return (NULL); +} + +/* process return data items passed by scripts to the framework */ +static int +process_dataitem(script_info_t *rsi, int token, char *value, char **errmsg) +{ + char *ptr; + int status; + + *errmsg = NULL; + + if (*value == '\0') + goto error; + + switch (token) { + case D_SCRIPT_VERSION: + if (rsi->cmd != C_SCRIPTINFO) + goto error; + + /* check that value contains only digits */ + for (ptr = value; *ptr != '\0'; ptr++) + if (isdigit((int)(*ptr)) == 0) + break; + + if (*ptr == '\0') + rsi->ver = atoi(value); + else + goto error; + + break; + + case D_SCRIPT_FUNC_INFO: + if (rsi->cmd != C_SCRIPTINFO) + goto error; + + rcmscript_snprintf(&rsi->func_info_buf, + &rsi->func_info_buf_len, + &rsi->func_info_buf_curptr, + "%s", value); + break; + + case D_CMD_TIMEOUT: + if (rsi->cmd != C_SCRIPTINFO) + goto error; + + /* check that value contains only digits */ + for (ptr = value; *ptr != '\0'; ptr++) + if (isdigit((int)(*ptr)) == 0) + break; + + if (*ptr == '\0') + rsi->cmd_timeout = atoi(value); + else + goto error; + break; + + case D_RESOURCE_NAME: + if (rsi->cmd != C_REGISTER) + goto error; + + if (get_capacity_descr(value) != NULL) + status = rcm_register_capacity(rsi->hdl, value, + 0, NULL); + else + status = rcm_register_interest(rsi->hdl, value, 0, + NULL); + + if (status == RCM_FAILURE && errno == EALREADY) + status = RCM_SUCCESS; + + if (status != RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, MS_REGISTER_RSRC_ERR, + rsi->script_name, value); + } + + remove_from_unregister(rsi, value); + break; + + case D_RESOURCE_USAGE_INFO: + if (rsi->cmd != C_RESOURCEINFO) + goto error; + + rcmscript_snprintf(&rsi->resource_usage_info_buf, + &rsi->resource_usage_info_buf_len, + &rsi->resource_usage_info_buf_curptr, + "%s", value); + break; + + case D_FAILURE_REASON: + rcmscript_snprintf(&rsi->failure_reason_buf, + &rsi->failure_reason_buf_len, + &rsi->failure_reason_buf_curptr, + "%s", value); + break; + + default: + goto error; + } + + return (0); + +error: + *errmsg = dup_err(RCM_ERROR, MS_PROTOCOL_ERR, rsi->script_name); + return (-1); +} + +/* Send the given command to the script and process return data */ +static int +do_cmd(script_info_t *rsi, char *argv[], char *envp[], char **errmsg) +{ + char buf[MAX_LINE_LEN]; + size_t buflen; + int loglevel = -1, continuelog = 0; + char *ptr, *dname, *value; + time_t maxsecs; + time_t deadline; + int sigaborted = 0; + int rval, child_status, token; + int error_num; + int cmd_timeout = rsi->cmd_timeout; + + *errmsg = NULL; + + script_process_sema_wait(); + + if (run_script(rsi, argv, envp, errmsg) == -1) { + (void) sema_post(&script_process_sema); + goto error2; + } + + (void) time(&rsi->lastrun); + deadline = rsi->lastrun + cmd_timeout; + + if (thr_create(NULL, 0, (void *(*)(void *))read_stderr, rsi, + 0, &rsi->tid) != 0) { + *errmsg = dup_err(RCM_ERROR, MF_FUNC_CALL_ERR, + "thr_create", strerror(errno)); + goto error1; + } + rsi->flags |= STDERR_THREAD_CREATED; + + /* LINTED */ + while (1) { + if (cmd_timeout > 0) { + maxsecs = deadline - time(NULL); + if (maxsecs <= 0) + goto timedout; + } else + maxsecs = 0; + + rval = get_line(rsi->pipe1[PARENT_END_OF_PIPE], + "stdout", buf, MAX_LINE_LEN, &buflen, + maxsecs, &error_num); + + if (buflen) { + if (continuelog) + log_msg(rsi, loglevel, buf); + else { + if ((ptr = strchr(buf, '=')) == NULL) + goto error; + + *ptr = '\0'; + dname = buf; + value = ptr + 1; + if ((token = dname_to_id(dname)) == -1) + goto error; + + switch (token) { + case D_LOG_ERR: + loglevel = RCM_ERROR; + break; + + case D_LOG_WARN: + loglevel = RCM_WARNING; + break; + + case D_LOG_INFO: + loglevel = RCM_INFO; + break; + + case D_LOG_DEBUG: + loglevel = RCM_DEBUG; + break; + + default: + loglevel = -1; + break; + } + + if (loglevel != -1) { + log_msg(rsi, loglevel, value); + if (buf[buflen - 1] == '\n') + continuelog = 0; + else + continuelog = 1; + } else { + if (buf[buflen - 1] != '\n') + goto error; + + buf[buflen - 1] = '\0'; + if (process_dataitem(rsi, token, + value, errmsg) != 0) + goto error1; + } + } + } + + if (rval == -3) { + /* script exited */ +waitagain: + if (waitpid(rsi->pid, &child_status, 0) + != rsi->pid) { + if (errno == EINTR || errno == EAGAIN) + goto waitagain; + *errmsg = dup_err(RCM_ERROR, MS_SCRIPT_ERR, + rsi->script_name, strerror(errno)); + goto error1; + } + + if (WIFEXITED(child_status)) { + script_exited(rsi); + rsi->exit_status = WEXITSTATUS(child_status); + } else { + if (sigaborted) + *errmsg = dup_err(RCM_ERROR, + MS_TIMEOUT_ERR, rsi->script_name); + else + *errmsg = dup_err(RCM_ERROR, + MS_UNKNOWN_ERR, rsi->script_name); + + /* kill any remaining processes in the pgrp */ + (void) kill(-(rsi->pid), SIGKILL); + script_exited(rsi); + goto error2; + } + + break; + } + + if (rval == -1) { + *errmsg = dup_err(RCM_ERROR, MS_SCRIPT_ERR, + rsi->script_name, strerror(errno)); + goto error1; + } + + if (rval == -2) { +timedout: + /* timeout occurred */ + if (sigaborted == 0) { + (void) kill(rsi->pid, SIGABRT); + sigaborted = 1; + /* extend deadline */ + deadline += SCRIPT_ABORT_TIMEOUT; + } else { + *errmsg = dup_err(RCM_ERROR, + MS_TIMEOUT_ERR, rsi->script_name); + goto error1; + } + } + } + + return (0); + +error: + *errmsg = dup_err(RCM_ERROR, MS_PROTOCOL_ERR, rsi->script_name); + +error1: + kill_script(rsi); + +error2: + return (-1); +} + +static int +do_script_info(script_info_t *rsi) +{ + char *argv[MAX_ARGS]; + int status = RCM_FAILURE; + int err = 0; + char *errmsg = NULL; + + rcm_log_message(RSCR_TRACE, "do_script_info: script name = %s\n", + rsi->script_name); + + rsi->cmd = C_SCRIPTINFO; + rsi->func_info_buf = NULL; + rsi->failure_reason_buf = NULL; + fill_argv(rsi, argv, NULL); + + if (do_cmd(rsi, argv, script_env, &errmsg) == 0) { + switch (rsi->exit_status) { + case E_SUCCESS: + if (rsi->func_info_buf != NULL && + rsi->failure_reason_buf == NULL) { + + if (rsi->ver >= SCRIPT_API_MIN_VER && + rsi->ver <= SCRIPT_API_MAX_VER) + status = RCM_SUCCESS; + else + rcm_log_message(RCM_ERROR, + MS_UNSUPPORTED_VER, rsi->script_name, + rsi->ver); + } else + err = 1; + break; + + case E_FAILURE: + if (rsi->failure_reason_buf != NULL) { + rcm_log_message(RCM_ERROR, MS_SCRIPTINFO_ERR, + rsi->script_name, + rsi->failure_reason_buf); + } else + err = 1; + break; + + default: + err = 1; + break; + } + if (err) + rcm_log_message(RCM_ERROR, MS_PROTOCOL_ERR, + rsi->script_name); + } else if (errmsg) + (void) free(errmsg); + + if (status != RCM_SUCCESS && rsi->func_info_buf != NULL) + free(rsi->func_info_buf); + + if (rsi->failure_reason_buf) + free(rsi->failure_reason_buf); + + return (status); +} + +static int +do_dr(script_info_t *rsi, char *argv[], char *envp[], char **info) +{ + int status = RCM_FAILURE; + int err = 0; + + rsi->failure_reason_buf = NULL; + + if (do_cmd(rsi, argv, envp, info) == 0) { + switch (rsi->exit_status) { + case E_SUCCESS: + case E_UNSUPPORTED_CMD: + if (rsi->failure_reason_buf == NULL) + status = RCM_SUCCESS; + else + err = 1; + break; + + case E_FAILURE: + case E_REFUSE: + if (rsi->failure_reason_buf != NULL) { + *info = rsi->failure_reason_buf; + rsi->failure_reason_buf = NULL; + } else + err = 1; + break; + + default: + err = 1; + break; + } + + if (err) + *info = dup_err(RCM_ERROR, MS_PROTOCOL_ERR, + rsi->script_name); + } + + if (rsi->failure_reason_buf) + free(rsi->failure_reason_buf); + + return (status); +} + +/* + * get_info entry point + */ +/* ARGSUSED */ +static int +script_get_info(rcm_handle_t *hdl, + char *resource_name, + pid_t pid, + uint_t flag, + char **info, + char **error, + nvlist_t *props, + rcm_info_t **dependent_info) +{ + script_info_t *rsi = hdl->module->rsi; + char *argv[MAX_ARGS]; + int status = RCM_FAILURE; + int err = 0; + + rcm_log_message(RSCR_TRACE, "script_get_info: resource = %s\n", + resource_name); + + *info = NULL; + *error = NULL; + + (void) mutex_lock(&rsi->channel_lock); + + rsi->hdl = hdl; + rsi->cmd = C_RESOURCEINFO; + rsi->resource_usage_info_buf = NULL; + rsi->failure_reason_buf = NULL; + fill_argv(rsi, argv, resource_name); + + if (do_cmd(rsi, argv, script_env, error) == 0) { + switch (rsi->exit_status) { + case E_SUCCESS: + if (rsi->resource_usage_info_buf != NULL && + rsi->failure_reason_buf == NULL) { + + *info = rsi->resource_usage_info_buf; + rsi->resource_usage_info_buf = NULL; + status = RCM_SUCCESS; + } else + err = 1; + break; + + case E_FAILURE: + if (rsi->failure_reason_buf != NULL) { + *error = rsi->failure_reason_buf; + rsi->failure_reason_buf = NULL; + } else + err = 1; + break; + + default: + err = 1; + break; + } + if (err) + *error = dup_err(RCM_ERROR, MS_PROTOCOL_ERR, + rsi->script_name); + } + + if (rsi->resource_usage_info_buf) + free(rsi->resource_usage_info_buf); + + if (rsi->failure_reason_buf) + free(rsi->failure_reason_buf); + + (void) mutex_unlock(&rsi->channel_lock); + + return (status); +} + +static void +add_for_unregister(script_info_t *rsi) +{ + module_t *module = rsi->module; + client_t *client; + rcm_queue_t *head; + rcm_queue_t *q; + + (void) mutex_lock(&rcm_req_lock); + + head = &module->client_q; + + for (q = head->next; q != head; q = q->next) { + client = RCM_STRUCT_BASE_ADDR(client_t, q, queue); + client->prv_flags |= RCM_NEED_TO_UNREGISTER; + } + + (void) mutex_unlock(&rcm_req_lock); +} + +static void +remove_from_unregister(script_info_t *rsi, char *resource_name) +{ + module_t *module = rsi->module; + client_t *client; + rcm_queue_t *head; + rcm_queue_t *q; + + (void) mutex_lock(&rcm_req_lock); + + head = &module->client_q; + + for (q = head->next; q != head; q = q->next) { + client = RCM_STRUCT_BASE_ADDR(client_t, q, queue); + if (strcmp(client->alias, resource_name) == 0) { + client->prv_flags &= ~RCM_NEED_TO_UNREGISTER; + break; + } + } + + (void) mutex_unlock(&rcm_req_lock); +} + +static void +complete_unregister(script_info_t *rsi) +{ + module_t *module = rsi->module; + client_t *client; + rcm_queue_t *head; + rcm_queue_t *q; + + (void) mutex_lock(&rcm_req_lock); + + head = &module->client_q; + + for (q = head->next; q != head; q = q->next) { + client = RCM_STRUCT_BASE_ADDR(client_t, q, queue); + if (client->prv_flags & RCM_NEED_TO_UNREGISTER) { + client->prv_flags &= ~RCM_NEED_TO_UNREGISTER; + client->state = RCM_STATE_REMOVE; + } + } + + (void) mutex_unlock(&rcm_req_lock); +} + +/* + * register_interest entry point + */ +static int +script_register_interest(rcm_handle_t *hdl) +{ + script_info_t *rsi = hdl->module->rsi; + char *argv[MAX_ARGS]; + int status = RCM_FAILURE; + int err = 0; + char *errmsg = NULL; + + rcm_log_message(RSCR_TRACE, + "script_register_interest: script name = %s\n", + rsi->script_name); + + (void) mutex_lock(&rsi->channel_lock); + + if (rsi->drreq_q.next != &rsi->drreq_q) { + /* if DR is already in progress no need to register again */ + (void) mutex_unlock(&rsi->channel_lock); + return (RCM_SUCCESS); + } + + rsi->hdl = hdl; + rsi->cmd = C_REGISTER; + rsi->failure_reason_buf = NULL; + fill_argv(rsi, argv, NULL); + + add_for_unregister(rsi); + + if (do_cmd(rsi, argv, script_env, &errmsg) == 0) { + switch (rsi->exit_status) { + case E_SUCCESS: + status = RCM_SUCCESS; + break; + + case E_FAILURE: + if (rsi->failure_reason_buf != NULL) { + rcm_log_message(RCM_ERROR, MS_REGISTER_ERR, + rsi->script_name, + rsi->failure_reason_buf); + } else + err = 1; + break; + + default: + err = 1; + break; + } + if (err) + rcm_log_message(RCM_ERROR, MS_PROTOCOL_ERR, + rsi->script_name); + } else if (errmsg) + (void) free(errmsg); + + complete_unregister(rsi); + + if (rsi->failure_reason_buf) + free(rsi->failure_reason_buf); + + (void) mutex_unlock(&rsi->channel_lock); + + return (status); +} + +/* + * Add the specified resource name to the drreq_q. + */ +static void +add_drreq(script_info_t *rsi, char *resource_name) +{ + rcm_queue_t *head = &rsi->drreq_q; + rcm_queue_t *q; + drreq_t *drreq; + + /* check if the dr req is already in the list */ + for (q = head->next; q != head; q = q->next) { + drreq = RCM_STRUCT_BASE_ADDR(drreq_t, q, queue); + if (strcmp(drreq->resource_name, resource_name) == 0) + /* dr req is already present in the queue */ + return; + } + + drreq = (drreq_t *)rcmscript_calloc(1, sizeof (drreq_t)); + drreq->resource_name = rcmscript_strdup(resource_name); + + rcm_enqueue_tail(&rsi->drreq_q, &drreq->queue); +} + +/* + * Remove the dr req for the specified resource name from the drreq_q. + */ +static void +remove_drreq(script_info_t *rsi, char *resource_name) +{ + rcm_queue_t *head = &rsi->drreq_q; + rcm_queue_t *q; + drreq_t *drreq; + + /* search for dr req and remove from the list */ + for (q = head->next; q != head; q = q->next) { + drreq = RCM_STRUCT_BASE_ADDR(drreq_t, q, queue); + if (strcmp(drreq->resource_name, resource_name) == 0) + break; + } + + if (q != head) { + /* found drreq on the queue */ + rcm_dequeue(&drreq->queue); + free(drreq->resource_name); + free(drreq); + } +} + +/* + * Remove all dr req's. + */ +static void +remove_drreq_all(script_info_t *rsi) +{ + drreq_t *drreq; + + while (rsi->drreq_q.next != &rsi->drreq_q) { + drreq = RCM_STRUCT_BASE_ADDR(drreq_t, + rsi->drreq_q.next, queue); + remove_drreq(rsi, drreq->resource_name); + } +} + +/* + * request_offline entry point + */ +/* ARGSUSED */ +static int +script_request_offline(rcm_handle_t *hdl, + char *resource_name, + pid_t pid, + uint_t flag, + char **info, + rcm_info_t **dependent_info) +{ + script_info_t *rsi = hdl->module->rsi; + char *argv[MAX_ARGS]; + char *envp[MAX_ENV_PARAMS]; + char flags_name[MAX_FLAGS_NAME_LEN]; + int status; + int i; + + rcm_log_message(RSCR_TRACE, + "script_request_offline: resource = %s flags = %s\n", + resource_name, + flags_to_name(flag, flags_name, MAX_FLAGS_NAME_LEN)); + + *info = NULL; + + (void) mutex_lock(&rsi->channel_lock); + + rsi->hdl = hdl; + rsi->cmd = (flag & RCM_QUERY) ? C_QUERYREMOVE : C_PREREMOVE; + + if (rsi->cmd == C_PREREMOVE) + add_drreq(rsi, resource_name); + + fill_argv(rsi, argv, resource_name); + copy_env(script_env, envp); + for (i = 0; envp[i] != NULL; i++) + ; + envp[i++] = (flag & RCM_FORCE) ? script_env_force : script_env_noforce; + envp[i] = NULL; + + status = do_dr(rsi, argv, envp, info); + + (void) mutex_unlock(&rsi->channel_lock); + return (status); +} + +/* + * notify_online entry point + */ +/* ARGSUSED */ +static int +script_notify_online(rcm_handle_t *hdl, + char *resource_name, + pid_t pid, + uint_t flag, + char **info, + rcm_info_t **dependent_info) +{ + script_info_t *rsi = hdl->module->rsi; + char *argv[MAX_ARGS]; + int status; + + rcm_log_message(RSCR_TRACE, "script_notify_online: resource = %s\n", + resource_name); + + *info = NULL; + + (void) mutex_lock(&rsi->channel_lock); + + rsi->hdl = hdl; + rsi->cmd = C_UNDOREMOVE; + fill_argv(rsi, argv, resource_name); + + status = do_dr(rsi, argv, script_env, info); + + remove_drreq(rsi, resource_name); + + (void) mutex_unlock(&rsi->channel_lock); + return (status); +} + +/* + * notify_remove entry point + */ +/* ARGSUSED */ +static int +script_notify_remove(rcm_handle_t *hdl, + char *resource_name, + pid_t pid, + uint_t flag, + char **info, + rcm_info_t **dependent_info) +{ + script_info_t *rsi = hdl->module->rsi; + char *argv[MAX_ARGS]; + int status; + + rcm_log_message(RSCR_TRACE, "script_notify_remove: resource = %s\n", + resource_name); + + *info = NULL; + + (void) mutex_lock(&rsi->channel_lock); + + rsi->hdl = hdl; + rsi->cmd = C_POSTREMOVE; + fill_argv(rsi, argv, resource_name); + + status = do_dr(rsi, argv, script_env, info); + + remove_drreq(rsi, resource_name); + + (void) mutex_unlock(&rsi->channel_lock); + return (status); +} + +/* + * request_suspend entry point + */ +/* ARGSUSED */ +static int +script_request_suspend(rcm_handle_t *hdl, + char *resource_name, + pid_t pid, + timespec_t *interval, + uint_t flag, + char **info, + rcm_info_t **dependent_info) +{ + script_info_t *rsi = hdl->module->rsi; + char *buf = NULL; + char *curptr = NULL; + char *argv[MAX_ARGS]; + char *envp[MAX_ENV_PARAMS]; + char flags_name[MAX_FLAGS_NAME_LEN]; + int buflen = 0; + long seconds; + int status; + int i; + + rcm_log_message(RSCR_TRACE, + "script_request_suspend: resource = %s flags = %s\n", resource_name, + flags_to_name(flag, flags_name, MAX_FLAGS_NAME_LEN)); + + *info = NULL; + + (void) mutex_lock(&rsi->channel_lock); + + rsi->hdl = hdl; + rsi->cmd = (flag & RCM_QUERY) ? C_QUERYSUSPEND : C_PRESUSPEND; + + if (rsi->cmd == C_PRESUSPEND) + add_drreq(rsi, resource_name); + + fill_argv(rsi, argv, resource_name); + + copy_env(script_env, envp); + for (i = 0; envp[i] != NULL; i++); + + envp[i++] = (flag & RCM_FORCE) ? script_env_force : script_env_noforce; + + if (interval) { + /* + * Merge the seconds and nanoseconds, rounding up if there + * are any remainder nanoseconds. + */ + seconds = interval->tv_sec + (interval->tv_nsec / 1000000000L); + if (interval->tv_nsec % 1000000000L) + seconds += (interval->tv_sec > 0) ? 1L : -1L; + rcmscript_snprintf(&buf, &buflen, &curptr, script_env_interval, + seconds); + envp[i++] = buf; + } + + envp[i] = NULL; + + status = do_dr(rsi, argv, envp, info); + + (void) mutex_unlock(&rsi->channel_lock); + if (buf) + free(buf); + return (status); +} + +/* + * notify_resume entry point + */ +/* ARGSUSED */ +static int +script_notify_resume(rcm_handle_t *hdl, + char *resource_name, + pid_t pid, + uint_t flag, + char **info, + rcm_info_t **dependent_info) +{ + script_info_t *rsi = hdl->module->rsi; + char *argv[MAX_ARGS]; + int status; + + rcm_log_message(RSCR_TRACE, "script_notify_resume: resource = %s\n", + resource_name); + + *info = NULL; + + (void) mutex_lock(&rsi->channel_lock); + + rsi->hdl = hdl; + rsi->cmd = (flag & RCM_SUSPENDED) ? C_POSTRESUME : C_CANCELSUSPEND; + fill_argv(rsi, argv, resource_name); + + status = do_dr(rsi, argv, script_env, info); + + remove_drreq(rsi, resource_name); + + (void) mutex_unlock(&rsi->channel_lock); + return (status); +} + +static capacity_descr_t capacity_type[] = { + { "SUNW_memory", MATCH_EXACT, + "new_pages", "RCM_ENV_CAPACITY", + "page_size", "RCM_ENV_UNIT_SIZE", + "", ""}, + { "SUNW_cpu", MATCH_EXACT, + "new_total", "RCM_ENV_CAPACITY", + "new_cpu_list", "RCM_ENV_CPU_IDS", + "", ""}, + { "SUNW_cpu/set", MATCH_PREFIX, + "new_total", "RCM_ENV_CAPACITY", + "new_cpu_list", "RCM_ENV_CPU_IDS", + "", ""}, + { "", MATCH_INVALID, "", "" } +}; + +static capacity_descr_t * +get_capacity_descr(char *resource_name) +{ + int i; + + for (i = 0; *capacity_type[i].resource_name != '\0'; i++) { + if ((capacity_type[i].match_type == MATCH_EXACT && + strcmp(capacity_type[i].resource_name, + resource_name) == 0) || + (capacity_type[i].match_type == MATCH_PREFIX && + strncmp(capacity_type[i].resource_name, + resource_name, + strlen(capacity_type[i].resource_name)) == 0)) + + return (&capacity_type[i]); + } + + return (NULL); +} + +static int +build_env_for_capacity(script_info_t *rsi, + char *resource_name, + uint_t flag, + nvlist_t *capacity_info, + char *envp[], + int *dynamic_env_index, + char **errmsg) +{ + int p, i; + capacity_descr_t *capa = NULL; + nvpair_t *nvpair; + char *buf; + char *curptr; + int buflen; + int error; + uint_t n; + + copy_env(script_env, envp); + for (p = 0; envp[p] != NULL; p++) + ; + + if (rsi->cmd == C_QUERYCAPACITY || rsi->cmd == C_PRECAPACITY) + envp[p++] = (flag & RCM_FORCE) ? script_env_force : + script_env_noforce; + + envp[p] = NULL; + *dynamic_env_index = p; + + if ((capa = get_capacity_descr(resource_name)) == NULL) { + *errmsg = dup_err(RCM_ERROR, MF_UNKNOWN_RSRC_ERR, + resource_name, rsi->script_name); + return (-1); + } + + for (i = 0; *capa->param[i].nvname != '\0'; i++) { + nvpair = NULL; + while ((nvpair = nvlist_next_nvpair(capacity_info, nvpair)) + != NULL) { + if (strcmp(nvpair_name(nvpair), + capa->param[i].nvname) == 0) + break; + } + + if (nvpair == NULL) { + *errmsg = dup_err(RCM_ERROR, MF_NV_ERR, + rsi->script_name); + return (-1); + } + + error = 0; + buf = NULL; + + rcmscript_snprintf(&buf, &buflen, &curptr, "%s=", + capa->param[i].envname); + + switch (nvpair_type(nvpair)) { + case DATA_TYPE_INT16: + { + int16_t x; + + if (nvpair_value_int16(nvpair, &x) == 0) { + rcmscript_snprintf(&buf, &buflen, &curptr, + "%hd", (short)x); + } else + error = 1; + break; + } + + case DATA_TYPE_UINT16: + { + uint16_t x; + + if (nvpair_value_uint16(nvpair, &x) == 0) { + rcmscript_snprintf(&buf, &buflen, &curptr, + "%hu", (unsigned short)x); + } else + error = 1; + break; + } + + case DATA_TYPE_INT32: + { + int32_t x; + + if (nvpair_value_int32(nvpair, &x) == 0) { + rcmscript_snprintf(&buf, &buflen, &curptr, + "%d", (int)x); + } else + error = 1; + break; + } + + case DATA_TYPE_UINT32: + { + uint32_t x; + + if (nvpair_value_uint32(nvpair, &x) == 0) { + rcmscript_snprintf(&buf, &buflen, &curptr, + "%u", (uint_t)x); + } else + error = 1; + break; + } + + case DATA_TYPE_INT64: + { + int64_t x; + + if (nvpair_value_int64(nvpair, &x) == 0) { + rcmscript_snprintf(&buf, &buflen, &curptr, + "%lld", (long long)x); + } else + error = 1; + break; + } + + case DATA_TYPE_UINT64: + { + uint64_t x; + + if (nvpair_value_uint64(nvpair, &x) == 0) { + rcmscript_snprintf(&buf, &buflen, &curptr, + "%llu", (unsigned long long)x); + } else + error = 1; + break; + } + + case DATA_TYPE_INT16_ARRAY: + { + int16_t *x; + + if (nvpair_value_int16_array(nvpair, &x, &n) == 0) { + while (n--) { + rcmscript_snprintf(&buf, &buflen, + &curptr, "%hd%s", + (short)(*x), + (n == 0) ? "" : " "); + x++; + } + } else + error = 1; + break; + } + + case DATA_TYPE_UINT16_ARRAY: + { + uint16_t *x; + + if (nvpair_value_uint16_array(nvpair, &x, &n) == 0) { + while (n--) { + rcmscript_snprintf(&buf, &buflen, + &curptr, "%hu%s", + (unsigned short)(*x), + (n == 0) ? "" : " "); + x++; + } + } else + error = 1; + break; + } + + case DATA_TYPE_INT32_ARRAY: + { + int32_t *x; + + if (nvpair_value_int32_array(nvpair, &x, &n) == 0) { + while (n--) { + rcmscript_snprintf(&buf, &buflen, + &curptr, "%d%s", + (int)(*x), + (n == 0) ? "" : " "); + x++; + } + } else + error = 1; + break; + } + + case DATA_TYPE_UINT32_ARRAY: + { + uint32_t *x; + + if (nvpair_value_uint32_array(nvpair, &x, &n) == 0) { + while (n--) { + rcmscript_snprintf(&buf, &buflen, + &curptr, "%u%s", + (uint_t)(*x), + (n == 0) ? "" : " "); + x++; + } + } else + error = 1; + break; + } + + case DATA_TYPE_INT64_ARRAY: + { + int64_t *x; + + if (nvpair_value_int64_array(nvpair, &x, &n) == 0) { + while (n--) { + rcmscript_snprintf(&buf, &buflen, + &curptr, "%lld%s", + (long long)(*x), + (n == 0) ? "" : " "); + x++; + } + } else + error = 1; + break; + } + + case DATA_TYPE_UINT64_ARRAY: + { + uint64_t *x; + + if (nvpair_value_uint64_array(nvpair, &x, &n) == 0) { + while (n--) { + rcmscript_snprintf(&buf, &buflen, + &curptr, "%llu%s", + (unsigned long long)(*x), + (n == 0) ? "" : " "); + x++; + } + } else + error = 1; + break; + } + + case DATA_TYPE_STRING: + { + char *x; + + if (nvpair_value_string(nvpair, &x) == 0) { + rcmscript_snprintf(&buf, &buflen, &curptr, + "%s", x); + } else + error = 1; + break; + } + + + default: + error = 1; + break; + } + + envp[p++] = buf; + + if (error) { + envp[p] = NULL; + for (p = *dynamic_env_index; envp[p] != NULL; p++) + free(envp[p]); + *errmsg = dup_err(RCM_ERROR, MF_NV_ERR, + rsi->script_name); + return (-1); + } + } + + envp[p] = NULL; + + return (0); +} + +/* + * request_capacity_change entry point + */ +/* ARGSUSED */ +static int +script_request_capacity_change(rcm_handle_t *hdl, + char *resource_name, + pid_t pid, + uint_t flag, + nvlist_t *capacity_info, + char **info, + rcm_info_t **dependent_info) +{ + script_info_t *rsi = hdl->module->rsi; + char *argv[MAX_ARGS]; + char *envp[MAX_ENV_PARAMS]; + char flags_name[MAX_FLAGS_NAME_LEN]; + int status; + int dynamic_env_index; + + rcm_log_message(RSCR_TRACE, + "script_request_capacity_change: resource = %s flags = %s\n", + resource_name, + flags_to_name(flag, flags_name, MAX_FLAGS_NAME_LEN)); + + *info = NULL; + + (void) mutex_lock(&rsi->channel_lock); + + rsi->hdl = hdl; + rsi->cmd = (flag & RCM_QUERY) ? C_QUERYCAPACITY : C_PRECAPACITY; + fill_argv(rsi, argv, resource_name); + + if (build_env_for_capacity(rsi, resource_name, flag, + capacity_info, envp, &dynamic_env_index, info) == 0) { + + status = do_dr(rsi, argv, envp, info); + + while (envp[dynamic_env_index] != NULL) { + free(envp[dynamic_env_index]); + dynamic_env_index++; + } + } else + status = RCM_FAILURE; + + (void) mutex_unlock(&rsi->channel_lock); + return (status); +} + +/* + * notify_capacity_change entry point + */ +/* ARGSUSED */ +static int +script_notify_capacity_change(rcm_handle_t *hdl, + char *resource_name, + pid_t pid, + uint_t flag, + nvlist_t *capacity_info, + char **info, + rcm_info_t **dependent_info) +{ + script_info_t *rsi = hdl->module->rsi; + char *argv[MAX_ARGS]; + char *envp[MAX_ENV_PARAMS]; + int status; + int dynamic_env_index; + + rcm_log_message(RSCR_TRACE, + "script_notify_capacity_change: resource = %s\n", resource_name); + + *info = NULL; + + (void) mutex_lock(&rsi->channel_lock); + + rsi->hdl = hdl; + rsi->cmd = C_POSTCAPACITY; + fill_argv(rsi, argv, resource_name); + + if (build_env_for_capacity(rsi, resource_name, flag, + capacity_info, envp, &dynamic_env_index, info) == 0) { + + status = do_dr(rsi, argv, envp, info); + + while (envp[dynamic_env_index] != NULL) { + free(envp[dynamic_env_index]); + dynamic_env_index++; + } + } else + status = RCM_FAILURE; + + (void) mutex_unlock(&rsi->channel_lock); + return (status); +} + +/* Log the message to syslog */ +static void +log_msg(script_info_t *rsi, int level, char *msg) +{ + rcm_log_msg(level, MS_LOG_MSG, rsi->script_name, msg); +} + +/*PRINTFLIKE2*/ +static char * +dup_err(int level, char *format, ...) +{ + va_list ap; + char buf1[1]; + char *buf2; + int n; + + va_start(ap, format); + n = vsnprintf(buf1, 1, format, ap); + va_end(ap); + + if (n > 0) { + n++; + if (buf2 = (char *)malloc(n)) { + va_start(ap, format); + n = vsnprintf(buf2, n, format, ap); + va_end(ap); + if (n > 0) { + if (level != -1) + rcm_log_message(level, buf2); + return (buf2); + } + free(buf2); + } + } + + return (NULL); +} + +/*PRINTFLIKE4*/ +static void +rcmscript_snprintf(char **buf, int *buflen, char **curptr, char *format, ...) +{ +/* must be power of 2 otherwise RSCR_ROUNDUP would break */ +#define SPRINTF_CHUNK_LEN 512 +#define SPRINTF_MIN_CHUNK_LEN 64 + + va_list ap; + int offset, bytesneeded, bytesleft, error_num; + + if (*buf == NULL) { + *buflen = 0; + *curptr = NULL; + } + + offset = *curptr - *buf; + bytesneeded = SPRINTF_MIN_CHUNK_LEN; + bytesleft = *buflen - offset; + + /* LINTED */ + while (1) { + if (bytesneeded > bytesleft) { + *buflen += RSCR_ROUNDUP(bytesneeded - bytesleft, + SPRINTF_CHUNK_LEN); + if ((*buf = (char *)realloc(*buf, *buflen)) == NULL) { + error_num = errno; + rcm_log_message(RCM_ERROR, + MF_MEMORY_ALLOCATION_ERR, + strerror(error_num)); + rcmd_exit(error_num); + /*NOTREACHED*/ + } + *curptr = *buf + offset; + bytesleft = *buflen - offset; + } + + va_start(ap, format); + bytesneeded = vsnprintf(*curptr, bytesleft, format, ap); + va_end(ap); + + if (bytesneeded < 0) { + /* vsnprintf encountered an error */ + error_num = errno; + rcm_log_message(RCM_ERROR, MF_FUNC_CALL_ERR, + "vsnprintf", strerror(error_num)); + rcmd_exit(error_num); + /*NOTREACHED*/ + + } else if (bytesneeded < bytesleft) { + /* vsnprintf succeeded */ + *curptr += bytesneeded; + return; + + } else { + bytesneeded++; /* to account for storage for '\0' */ + } + } +} + +static char * +rcmscript_strdup(char *str) +{ + char *dupstr; + + if ((dupstr = strdup(str)) == NULL) { + rcm_log_message(RCM_ERROR, MF_MEMORY_ALLOCATION_ERR, + strerror(errno)); + rcmd_exit(errno); + /*NOTREACHED*/ + } + + return (dupstr); +} + +static void * +rcmscript_malloc(size_t len) +{ + void *ptr; + + if ((ptr = malloc(len)) == NULL) { + rcm_log_message(RCM_ERROR, MF_MEMORY_ALLOCATION_ERR, + strerror(errno)); + rcmd_exit(errno); + /*NOTREACHED*/ + } + + return (ptr); +} + +static void * +rcmscript_calloc(size_t nelem, size_t elsize) +{ + void *ptr; + + if ((ptr = calloc(nelem, elsize)) == NULL) { + rcm_log_message(RCM_ERROR, MF_MEMORY_ALLOCATION_ERR, + strerror(errno)); + rcmd_exit(errno); + /*NOTREACHED*/ + } + + return (ptr); +} diff --git a/usr/src/cmd/rcm_daemon/common/rcm_script_impl.h b/usr/src/cmd/rcm_daemon/common/rcm_script_impl.h new file mode 100644 index 0000000000..abd5cebd65 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/rcm_script_impl.h @@ -0,0 +1,225 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2000-2001 by Sun Microsystems, Inc. + * All rights reserved. + */ + +#ifndef _RCM_SCRIPT_IMPL_H +#define _RCM_SCRIPT_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define TRUE 1 +#define FALSE 0 + +/* Minimum and maximum rcm scripting API version supported. */ +#define SCRIPT_API_MIN_VER 1 +#define SCRIPT_API_MAX_VER 1 + +/* + * Default maximum time (in seconds) allocated for an rcm command + * before SIGABRT is sent. + */ +#define SCRIPT_CMD_TIMEOUT 60 + +/* + * Maximum time (in seconds) allocated after sending SIGABRT before + * the script is killed. + */ +#define SCRIPT_ABORT_TIMEOUT 10 + +/* + * Maximum time (in seconds) for which the rcm daemon checks whether + * a script is killed or not after the rcm daemon kills the script. + */ +#define SCRIPT_KILL_TIMEOUT 3 + +/* Maximum number of command line parameters passed to a script */ +#define MAX_ARGS 16 + +/* Maximum number of environment parameters passed to a script */ +#define MAX_ENV_PARAMS 64 + +#define MAX_LINE_LEN (4*1024) +#define MAX_FLAGS_NAME_LEN 64 + +/* exit codes */ +typedef enum { + E_SUCCESS, + E_FAILURE, + E_UNSUPPORTED_CMD, + E_REFUSE +} script_exit_codes_t; + +/* This structure is used to maintain a list of current dr'ed resources */ +typedef struct { + rcm_queue_t queue; + char *resource_name; +} drreq_t; + +/* + * Main data structure for rcm scripting. There will be one instance of + * this structure for every rcm script. A pointer to this structure is + * kept in module structure. + */ +typedef struct script_info { + /* + * Used to maintain a queue of script_info structures + * Global variable script_info_q is the head of the queue. + */ + rcm_queue_t queue; + + rcm_queue_t drreq_q; /* queue head for current dr'ed resources */ + + module_t *module; + rcm_handle_t *hdl; + + char *script_full_name; /* name of the script including path */ + char *script_name; /* name of the script without path component */ + + /* + * file descriptors used to communicate with the script + * pipe1 is used to capture script's stdout + * pipe2 is used to capture script's stderr + */ + int pipe1[2]; + int pipe2[2]; + + pid_t pid; /* process id of the script process */ + thread_t tid; /* thread id of the stderr reader thread */ + + /* + * Lock to protect the fileds in this structure and also to protect + * the communication channel to the script. + */ + mutex_t channel_lock; + + int ver; /* scripting api version of the script */ + int cmd; /* current rcm scripting command */ + int cmd_timeout; /* timeout value in seconds */ + int exit_status; /* exit status of the script */ + + /* time stamp of the script when it was last run */ + time_t lastrun; + + char *func_info_buf; + char *func_info_buf_curptr; + int func_info_buf_len; + + char *resource_usage_info_buf; + char *resource_usage_info_buf_curptr; + int resource_usage_info_buf_len; + + char *failure_reason_buf; + char *failure_reason_buf_curptr; + int failure_reason_buf_len; + uint_t flags; +} script_info_t; + +/* + * script_info_t:flags + */ +#define STDERR_THREAD_CREATED 1 + +#define PARENT_END_OF_PIPE 0 +#define CHILD_END_OF_PIPE 1 + +#define PS_STATE_FILE_VER 1 + +typedef struct state_element { + uint32_t flags; + uint32_t reserved; /* for 64 bit alignment */ + /* followed by actual state element */ +} state_element_t; + +/* + * state_element_t:flags + * The following flag when set indicates that the state element is + * currently in use. When not set indicates that the state element is free. + */ +#define STATE_ELEMENT_IN_USE 0x1 + +/* + * This structure defines the layout of state file used by rcm scripting + */ +typedef struct state_file { + uint32_t version; + uint32_t max_elements; /* number of state elements */ + /* followed by an array of state elements of type state_element_t */ +} state_file_t; + +typedef struct state_file_descr { + uint32_t version; + int fd; /* file descriptor to the state file */ + size_t element_size; /* size of one state element */ + + /* + * number of state elements to allocate at a time when the state file + * grows. + */ + int chunk_size; + + /* + * index into the state element array where the next search will + * begin for an empty slot. + */ + int index; + + /* pointer to mmapped state file */ + state_file_t *state_file; +} state_file_descr_t; + +/* round up to n byte boundary. n must be power of 2 for this macro to work */ +#define RSCR_ROUNDUP(x, n) (((x) + ((n) - 1)) & (~((n) - 1))) + +typedef struct ps_state_element { + pid_t pid; + char script_name[MAXNAMELEN]; +} ps_state_element_t; + +/* maximum number of additional env variables for capacity specific stuff */ +#define MAX_CAPACITY_PARAMS 10 + +typedef struct capacity_descr { + char *resource_name; + int match_type; + struct { + char *nvname; + char *envname; + } param[MAX_CAPACITY_PARAMS]; +} capacity_descr_t; + +/* capacity_descr_t:match_type */ +#define MATCH_INVALID 0 +#define MATCH_EXACT 1 +#define MATCH_PREFIX 2 + +#ifdef __cplusplus +} +#endif + +#endif /* _RCM_SCRIPT_IMPL_H */ diff --git a/usr/src/cmd/rcm_daemon/common/rcm_subr.c b/usr/src/cmd/rcm_daemon/common/rcm_subr.c new file mode 100644 index 0000000000..58281c1297 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/rcm_subr.c @@ -0,0 +1,1936 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "rcm_impl.h" +#include "rcm_module.h" + +/* + * Short-circuits unloading of modules with no registrations, so that + * they are present during the next db_sync cycle. + */ +#define MOD_REFCNT_INIT 2 + +int need_cleanup; /* flag indicating if clean up is needed */ + +static mutex_t mod_lock; /* protects module list */ +static module_t *module_head; /* linked list of modules */ +static rsrc_node_t *rsrc_root; /* root of all resources */ + +/* + * Misc help routines + */ +static void rcmd_db_print(); +static void rcm_handle_free(rcm_handle_t *); +static rcm_handle_t *rcm_handle_alloc(module_t *); +static void rsrc_clients_free(client_t *); +static struct rcm_mod_ops *modops_from_v1(void *); +static int call_getinfo(struct rcm_mod_ops *, rcm_handle_t *, char *, id_t, + uint_t, char **, char **, nvlist_t *, rcm_info_t **); +static int node_action(rsrc_node_t *, void *); + +extern void start_polling_thread(); + +/* + * translate /dev name to a /devices path + * + * N.B. This routine can be enhanced to understand network names + * and friendly names in the future. + */ +char * +resolve_name(char *alias) +{ + char *tmp; + const char *dev = "/dev/"; + + if (strlen(alias) == 0) + return (NULL); + + if (strncmp(alias, dev, strlen(dev)) == 0) { + /* + * Treat /dev/... as a symbolic link + */ + tmp = s_malloc(PATH_MAX); + if (realpath(alias, tmp) != NULL) { + return (tmp); + } else { + free(tmp); + } + /* Fail to resolve /dev/ name, use the name as is */ + } + + return (s_strdup(alias)); +} + +/* + * Figure out resource type based on "resolved" name + * + * N.B. This routine does not figure out file system mount points. + * This is determined at runtime when filesys module register + * with RCM_FILESYS flag. + */ +int +rsrc_get_type(const char *resolved_name) +{ + if (resolved_name[0] != '/') + return (RSRC_TYPE_ABSTRACT); + + if (strncmp("/devices/", resolved_name, 9) == 0) + return (RSRC_TYPE_DEVICE); + + return (RSRC_TYPE_NORMAL); +} + +/* + * Module operations: + * module_load, module_unload, module_info, module_attach, module_detach, + * cli_module_hold, cli_module_rele + */ + +#ifdef ENABLE_MODULE_DETACH +/* + * call unregister() entry point to allow module to unregister for + * resources without getting confused. + */ +static void +module_detach(module_t *module) +{ + struct rcm_mod_ops *ops = module->modops; + + rcm_log_message(RCM_TRACE2, "module_detach(name=%s)\n", module->name); + + ops->rcmop_unregister(module->rcmhandle); +} +#endif /* ENABLE_MODULE_DETACH */ + +/* + * call register() entry point to allow module to register for resources + */ +static void +module_attach(module_t *module) +{ + struct rcm_mod_ops *ops = module->modops; + + rcm_log_message(RCM_TRACE2, "module_attach(name=%s)\n", module->name); + + if (ops->rcmop_register(module->rcmhandle) != RCM_SUCCESS) { + rcm_log_message(RCM_WARNING, + gettext("module %s register() failed\n"), module->name); + } +} + +struct rcm_mod_ops * +module_init(module_t *module) +{ + if (module->dlhandle) + /* rcm module */ + return (module->init()); + else + /* rcm script */ + return (script_init(module)); +} + +/* + * call rmc_mod_info() entry of module + */ +static const char * +module_info(module_t *module) +{ + if (module->dlhandle) + /* rcm module */ + return (module->info()); + else + /* rcm script */ + return (script_info(module)); +} + +int +module_fini(module_t *module) +{ + if (module->dlhandle) + /* rcm module */ + return (module->fini()); + else + /* rcm script */ + return (script_fini(module)); +} + +/* + * call rmc_mod_fini() entry of module, dlclose module, and free memory + */ +static void +module_unload(module_t *module) +{ + int version = module->modops->version; + + rcm_log_message(RCM_DEBUG, "module_unload(name=%s)\n", module->name); + + (void) module_fini(module); + + rcm_handle_free(module->rcmhandle); + free(module->name); + + switch (version) { + case RCM_MOD_OPS_V1: + /* + * Free memory associated with converted ops vector + */ + free(module->modops); + break; + + case RCM_MOD_OPS_VERSION: + default: + break; + } + + if (module->dlhandle) + rcm_module_close(module->dlhandle); + + free(module); +} + +/* + * Locate the module, execute rcm_mod_init() and check ops vector version + */ +static module_t * +module_load(char *modname) +{ + module_t *module; + + rcm_log_message(RCM_DEBUG, "module_load(name=%s)\n", modname); + + /* + * dlopen the module + */ + module = s_calloc(1, sizeof (*module)); + module->name = s_strdup(modname); + module->modops = NULL; + rcm_init_queue(&module->client_q); + + if (rcm_is_script(modname) == 0) { + /* rcm module */ + module->dlhandle = rcm_module_open(modname); + + if (module->dlhandle == NULL) { + rcm_log_message(RCM_NOTICE, + gettext("cannot open module %s\n"), modname); + goto fail; + } + + /* + * dlsym rcm_mod_init/fini/info() entry points + */ + module->init = (struct rcm_mod_ops *(*)())dlsym( + module->dlhandle, "rcm_mod_init"); + module->fini = (int (*)())dlsym( + module->dlhandle, "rcm_mod_fini"); + module->info = (const char *(*)())dlsym(module->dlhandle, + "rcm_mod_info"); + if (module->init == NULL || module->fini == NULL || + module->info == NULL) { + rcm_log_message(RCM_ERROR, + gettext("missing entries in module %s\n"), modname); + goto fail; + } + + } else { + /* rcm script */ + module->dlhandle = NULL; + module->init = (struct rcm_mod_ops *(*)()) NULL; + module->fini = (int (*)()) NULL; + module->info = (const char *(*)()) NULL; + } + + if ((module->modops = module_init(module)) == NULL) { + if (module->dlhandle) + rcm_log_message(RCM_ERROR, + gettext("cannot init module %s\n"), modname); + goto fail; + } + + /* + * Check ops vector version + */ + switch (module->modops->version) { + case RCM_MOD_OPS_V1: + module->modops = modops_from_v1((void *)module->modops); + break; + + case RCM_MOD_OPS_VERSION: + break; + + default: + rcm_log_message(RCM_ERROR, + gettext("module %s rejected: version %d not supported\n"), + modname, module->modops->version); + (void) module_fini(module); + goto fail; + } + + /* + * Make sure all fields are set + */ + if ((module->modops->rcmop_register == NULL) || + (module->modops->rcmop_unregister == NULL) || + (module->modops->rcmop_get_info == NULL) || + (module->modops->rcmop_request_suspend == NULL) || + (module->modops->rcmop_notify_resume == NULL) || + (module->modops->rcmop_request_offline == NULL) || + (module->modops->rcmop_notify_online == NULL) || + (module->modops->rcmop_notify_remove == NULL)) { + rcm_log_message(RCM_ERROR, + gettext("module %s rejected: has NULL ops fields\n"), + modname); + (void) module_fini(module); + goto fail; + } + + module->rcmhandle = rcm_handle_alloc(module); + return (module); + +fail: + if (module->modops && module->modops->version == RCM_MOD_OPS_V1) + free(module->modops); + + if (module->dlhandle) + rcm_module_close(module->dlhandle); + + free(module->name); + free(module); + return (NULL); +} + +/* + * add one to module hold count. load the module if not loaded + */ +static module_t * +cli_module_hold(char *modname) +{ + module_t *module; + + rcm_log_message(RCM_TRACE3, "cli_module_hold(%s)\n", modname); + + (void) mutex_lock(&mod_lock); + module = module_head; + while (module) { + if (strcmp(module->name, modname) == 0) { + break; + } + module = module->next; + } + + if (module) { + module->ref_count++; + (void) mutex_unlock(&mod_lock); + return (module); + } + + /* + * Module not found, attempt to load it + */ + if ((module = module_load(modname)) == NULL) { + (void) mutex_unlock(&mod_lock); + return (NULL); + } + + /* + * Hold module and link module into module list + */ + module->ref_count = MOD_REFCNT_INIT; + module->next = module_head; + module_head = module; + + (void) mutex_unlock(&mod_lock); + + return (module); +} + +/* + * decrement module hold count. Unload it if no reference + */ +static void +cli_module_rele(module_t *module) +{ + module_t *curr = module_head, *prev = NULL; + + rcm_log_message(RCM_TRACE3, "cli_module_rele(name=%s)\n", module->name); + + (void) mutex_lock(&mod_lock); + if (--(module->ref_count) != 0) { + (void) mutex_unlock(&mod_lock); + return; + } + + rcm_log_message(RCM_TRACE2, "unloading module %s\n", module->name); + + /* + * Unlink the module from list + */ + while (curr && (curr != module)) { + prev = curr; + curr = curr->next; + } + if (curr == NULL) { + rcm_log_message(RCM_ERROR, + gettext("Unexpected error: module %s not found.\n"), + module->name); + } else if (prev == NULL) { + module_head = curr->next; + } else { + prev->next = curr->next; + } + (void) mutex_unlock(&mod_lock); + + module_unload(module); +} + +/* + * Gather usage info be passed back to requester. Discard info if user does + * not care (list == NULL). + */ +void +add_busy_rsrc_to_list(char *alias, pid_t pid, int state, int seq_num, + char *modname, const char *infostr, const char *errstr, + nvlist_t *client_props, rcm_info_t **list) +{ + rcm_info_t *info; + rcm_info_t *tmp; + char *buf = NULL; + size_t buflen = 0; + + if (list == NULL) { + return; + } + + info = s_calloc(1, sizeof (*info)); + if (errno = nvlist_alloc(&(info->info), NV_UNIQUE_NAME, 0)) { + rcm_log_message(RCM_ERROR, "failed (nvlist_alloc=%s).\n", + strerror(errno)); + rcmd_exit(errno); + } + + /*LINTED*/ + if ((errno = nvlist_add_string(info->info, RCM_RSRCNAME, alias)) || + (errno = nvlist_add_int32(info->info, RCM_SEQ_NUM, seq_num)) || + (errno = nvlist_add_int64(info->info, RCM_CLIENT_ID, pid)) || + (errno = nvlist_add_int32(info->info, RCM_RSRCSTATE, state))) { + rcm_log_message(RCM_ERROR, "failed (nvlist_add=%s).\n", + strerror(errno)); + rcmd_exit(errno); + } + + /* + * Daemon calls to add_busy_rsrc_to_list may pass in + * error/info. Add these through librcm interfaces. + */ + if (errstr) { + rcm_log_message(RCM_TRACE3, "adding error string: %s\n", + errstr); + if (errno = nvlist_add_string(info->info, RCM_CLIENT_ERROR, + (char *)errstr)) { + rcm_log_message(RCM_ERROR, "failed (nvlist_add=%s).\n", + strerror(errno)); + rcmd_exit(errno); + } + } + + if (infostr) { + if (errno = nvlist_add_string(info->info, RCM_CLIENT_INFO, + (char *)infostr)) { + rcm_log_message(RCM_ERROR, "failed (nvlist_add=%s).\n", + strerror(errno)); + rcmd_exit(errno); + } + } + + if (modname) { + if (errno = nvlist_add_string(info->info, RCM_CLIENT_MODNAME, + modname)) { + rcm_log_message(RCM_ERROR, "failed (nvlist_add=%s).\n", + strerror(errno)); + rcmd_exit(errno); + } + } + + if (client_props) { + if (errno = nvlist_pack(client_props, &buf, &buflen, + NV_ENCODE_NATIVE, 0)) { + rcm_log_message(RCM_ERROR, "failed (nvlist_pack=%s).\n", + strerror(errno)); + rcmd_exit(errno); + } + if (errno = nvlist_add_byte_array(info->info, + RCM_CLIENT_PROPERTIES, (uchar_t *)buf, buflen)) { + rcm_log_message(RCM_ERROR, "failed (nvlist_add=%s).\n", + strerror(errno)); + rcmd_exit(errno); + } + (void) free(buf); + } + + + /* link info at end of list */ + if (*list) { + tmp = *list; + while (tmp->next) + tmp = tmp->next; + tmp->next = info; + } else { + *list = info; + } +} + +/* + * Resource client realted operations: + * rsrc_client_alloc, rsrc_client_find, rsrc_client_add, + * rsrc_client_remove, rsrc_client_action, rsrc_client_action_list + */ + +/* Allocate rsrc_client_t structure. Load module if necessary. */ +/*ARGSUSED*/ +static client_t * +rsrc_client_alloc(char *alias, char *modname, pid_t pid, uint_t flag) +{ + client_t *client; + module_t *mod; + + assert((alias != NULL) && (modname != NULL)); + + rcm_log_message(RCM_TRACE4, "rsrc_client_alloc(%s, %s, %ld)\n", + alias, modname, pid); + + if ((mod = cli_module_hold(modname)) == NULL) { + return (NULL); + } + + client = s_calloc(1, sizeof (client_t)); + client->module = mod; + client->pid = pid; + client->alias = s_strdup(alias); + client->prv_flags = 0; + client->state = RCM_STATE_ONLINE; + client->flag = flag; + + /* This queue is protected by rcm_req_lock */ + rcm_enqueue_tail(&mod->client_q, &client->queue); + + return (client); +} + +/* Find client in list matching modname and pid */ +client_t * +rsrc_client_find(char *modname, pid_t pid, client_t **list) +{ + client_t *client = *list; + + rcm_log_message(RCM_TRACE4, "rsrc_client_find(%s, %ld, %p)\n", + modname, pid, (void *)list); + + while (client) { + if ((client->pid == pid) && + strcmp(modname, client->module->name) == 0) { + break; + } + client = client->next; + } + return (client); +} + +/* Add a client to client list */ +static void +rsrc_client_add(client_t *client, client_t **list) +{ + rcm_log_message(RCM_TRACE4, "rsrc_client_add: %s, %s, %ld\n", + client->alias, client->module->name, client->pid); + + client->next = *list; + *list = client; +} + +/* Remove client from list and destroy it */ +static void +rsrc_client_remove(client_t *client, client_t **list) +{ + client_t *tmp, *prev = NULL; + + rcm_log_message(RCM_TRACE4, "rsrc_client_remove: %s, %s, %ld\n", + client->alias, client->module->name, client->pid); + + tmp = *list; + while (tmp) { + if (client != tmp) { + prev = tmp; + tmp = tmp->next; + continue; + } + if (prev) { + prev->next = tmp->next; + } else { + *list = tmp->next; + } + tmp->next = NULL; + rsrc_clients_free(tmp); + return; + } +} + +/* Free a list of clients. Called from cleanup thread only */ +static void +rsrc_clients_free(client_t *list) +{ + client_t *client = list; + + while (client) { + + /* + * Note that the rcm daemon is single threaded while + * executing this routine. So there is no need to acquire + * rcm_req_lock here while dequeuing. + */ + rcm_dequeue(&client->queue); + + if (client->module) { + cli_module_rele(client->module); + } + list = client->next; + if (client->alias) { + free(client->alias); + } + free(client); + client = list; + } +} + +/* + * Invoke a callback into a single client + * This is the core of rcm_mod_ops interface + */ +static int +rsrc_client_action(client_t *client, int cmd, void *arg) +{ + int rval = RCM_SUCCESS; + char *dummy_error = NULL; + char *error = NULL; + char *info = NULL; + rcm_handle_t *hdl; + nvlist_t *client_props = NULL; + rcm_info_t *depend_info = NULL; + struct rcm_mod_ops *ops = client->module->modops; + tree_walk_arg_t *targ = (tree_walk_arg_t *)arg; + + rcm_log_message(RCM_TRACE4, + "rsrc_client_action: %s, %s, cmd=%d, flag=0x%x\n", client->alias, + client->module->name, cmd, targ->flag); + + /* + * Create a per-operation handle, increment seq_num by 1 so we will + * know if a module uses this handle to callback into rcm_daemon. + */ + hdl = rcm_handle_alloc(client->module); + hdl->seq_num = targ->seq_num + 1; + + /* + * Filter out operations for which the client didn't register. + */ + switch (cmd) { + case CMD_SUSPEND: + case CMD_RESUME: + case CMD_OFFLINE: + case CMD_ONLINE: + case CMD_REMOVE: + if ((client->flag & RCM_REGISTER_DR) == 0) { + rcm_handle_free(hdl); + return (RCM_SUCCESS); + } + break; + case CMD_REQUEST_CHANGE: + case CMD_NOTIFY_CHANGE: + if ((client->flag & RCM_REGISTER_CAPACITY) == 0) { + rcm_handle_free(hdl); + return (RCM_SUCCESS); + } + break; + case CMD_EVENT: + if ((client->flag & RCM_REGISTER_EVENT) == 0) { + rcm_handle_free(hdl); + return (RCM_SUCCESS); + } + break; + } + + /* + * Create nvlist_t for any client-specific properties. + */ + if (errno = nvlist_alloc(&client_props, NV_UNIQUE_NAME, 0)) { + rcm_log_message(RCM_ERROR, + "client action failed (nvlist_alloc=%s)\n", + strerror(errno)); + rcmd_exit(errno); + } + + /* + * Process the operation via a callback to the client module. + */ + switch (cmd) { + case CMD_GETINFO: + rval = call_getinfo(ops, hdl, client->alias, client->pid, + targ->flag, &info, &error, client_props, &depend_info); + break; + + case CMD_SUSPEND: + if (((targ->flag & RCM_QUERY_CANCEL) == 0) && + (client->state == RCM_STATE_SUSPEND)) { + break; + } + + if ((targ->flag & RCM_QUERY) == 0) { + rcm_log_message(RCM_DEBUG, "suspending %s\n", + client->alias); + } else if ((targ->flag & RCM_QUERY_CANCEL) == 0) { + rcm_log_message(RCM_DEBUG, "suspend query %s\n", + client->alias); + } else { + rcm_log_message(RCM_DEBUG, + "suspend query %s cancelled\n", client->alias); + } + + /* + * Update the client's state before the operation. + * If this is a cancelled query, then updating the state is + * the only thing that needs to be done, so break afterwards. + */ + if ((targ->flag & RCM_QUERY) == 0) { + client->state = RCM_STATE_SUSPENDING; + } else if ((targ->flag & RCM_QUERY_CANCEL) == 0) { + client->state = RCM_STATE_SUSPEND_QUERYING; + } else { + client->state = RCM_STATE_ONLINE; + break; + } + + rval = ops->rcmop_request_suspend(hdl, client->alias, + client->pid, targ->interval, targ->flag, &error, + &depend_info); + + /* Update the client's state after the operation. */ + if ((targ->flag & RCM_QUERY) == 0) { + if (rval == RCM_SUCCESS) { + client->state = RCM_STATE_SUSPEND; + } else { + client->state = RCM_STATE_SUSPEND_FAIL; + } + } else { + if (rval == RCM_SUCCESS) { + client->state = RCM_STATE_SUSPEND_QUERY; + } else { + client->state = RCM_STATE_SUSPEND_QUERY_FAIL; + } + } + break; + + case CMD_RESUME: + if (client->state == RCM_STATE_ONLINE) { + break; + } + client->state = RCM_STATE_RESUMING; + rval = ops->rcmop_notify_resume(hdl, client->alias, client->pid, + targ->flag, &error, &depend_info); + + /* online state is unconditional */ + client->state = RCM_STATE_ONLINE; + break; + + case CMD_OFFLINE: + if (((targ->flag & RCM_QUERY_CANCEL) == 0) && + (client->state == RCM_STATE_OFFLINE)) { + break; + } + + if ((targ->flag & RCM_QUERY) == 0) { + rcm_log_message(RCM_DEBUG, "offlining %s\n", + client->alias); + } else if ((targ->flag & RCM_QUERY_CANCEL) == 0) { + rcm_log_message(RCM_DEBUG, "offline query %s\n", + client->alias); + } else { + rcm_log_message(RCM_DEBUG, + "offline query %s cancelled\n", client->alias); + } + + /* + * Update the client's state before the operation. + * If this is a cancelled query, then updating the state is + * the only thing that needs to be done, so break afterwards. + */ + if ((targ->flag & RCM_QUERY) == 0) { + client->state = RCM_STATE_OFFLINING; + } else if ((targ->flag & RCM_QUERY_CANCEL) == 0) { + client->state = RCM_STATE_OFFLINE_QUERYING; + } else { + client->state = RCM_STATE_ONLINE; + break; + } + + rval = ops->rcmop_request_offline(hdl, client->alias, + client->pid, targ->flag, &error, &depend_info); + + /* Update the client's state after the operation. */ + if ((targ->flag & RCM_QUERY) == 0) { + if (rval == RCM_SUCCESS) { + client->state = RCM_STATE_OFFLINE; + } else { + client->state = RCM_STATE_OFFLINE_FAIL; + } + } else { + if (rval == RCM_SUCCESS) { + client->state = RCM_STATE_OFFLINE_QUERY; + } else { + client->state = RCM_STATE_OFFLINE_QUERY_FAIL; + } + } + break; + + case CMD_ONLINE: + if (client->state == RCM_STATE_ONLINE) { + break; + } + + rcm_log_message(RCM_DEBUG, "onlining %s\n", client->alias); + + client->state = RCM_STATE_ONLINING; + rval = ops->rcmop_notify_online(hdl, client->alias, client->pid, + targ->flag, &error, &depend_info); + client->state = RCM_STATE_ONLINE; + break; + + case CMD_REMOVE: + rcm_log_message(RCM_DEBUG, "removing %s\n", client->alias); + client->state = RCM_STATE_REMOVING; + rval = ops->rcmop_notify_remove(hdl, client->alias, client->pid, + targ->flag, &error, &depend_info); + client->state = RCM_STATE_REMOVE; + break; + + case CMD_REQUEST_CHANGE: + rcm_log_message(RCM_DEBUG, "requesting state change of %s\n", + client->alias); + if (ops->rcmop_request_capacity_change) + rval = ops->rcmop_request_capacity_change(hdl, + client->alias, client->pid, targ->flag, targ->nvl, + &error, &depend_info); + break; + + case CMD_NOTIFY_CHANGE: + rcm_log_message(RCM_DEBUG, "requesting state change of %s\n", + client->alias); + if (ops->rcmop_notify_capacity_change) + rval = ops->rcmop_notify_capacity_change(hdl, + client->alias, client->pid, targ->flag, targ->nvl, + &error, &depend_info); + break; + + case CMD_EVENT: + rcm_log_message(RCM_DEBUG, "delivering event to %s\n", + client->alias); + if (ops->rcmop_notify_event) + rval = ops->rcmop_notify_event(hdl, client->alias, + client->pid, targ->flag, &error, targ->nvl, + &depend_info); + break; + + default: + rcm_log_message(RCM_ERROR, gettext("unknown command %d\n"), + cmd); + rval = RCM_FAILURE; + break; + } + + /* reset error code to the most significant error */ + if (rval != RCM_SUCCESS) + targ->retcode = rval; + + /* + * XXX - The code below may produce duplicate rcm_info_t's on error? + */ + if ((cmd != CMD_GETINFO) && + ((rval != RCM_SUCCESS) || + (error != NULL) || + (targ->flag & RCM_SCOPE))) { + (void) call_getinfo(ops, hdl, client->alias, client->pid, + targ->flag & (~(RCM_INCLUDE_DEPENDENT|RCM_INCLUDE_SUBTREE)), + &info, &dummy_error, client_props, &depend_info); + if (dummy_error) + (void) free(dummy_error); + } else if (cmd != CMD_GETINFO) { + nvlist_free(client_props); + client_props = NULL; + } + + if (client_props) { + add_busy_rsrc_to_list(client->alias, client->pid, client->state, + targ->seq_num, client->module->name, info, error, + client_props, targ->info); + nvlist_free(client_props); + } + + if (info) + (void) free(info); + if (error) + (void) free(error); + + if (depend_info) { + if (targ->info) { + (void) rcm_append_info(targ->info, depend_info); + } else { + rcm_free_info(depend_info); + } + } + + rcm_handle_free(hdl); + return (rval); +} + +/* + * invoke a callback into a list of clients, return 0 if all success + */ +int +rsrc_client_action_list(client_t *list, int cmd, void *arg) +{ + int error, rval = RCM_SUCCESS; + + while (list) { + client_t *client = list; + list = client->next; + + if (client->state == RCM_STATE_REMOVE) + continue; + + error = rsrc_client_action(client, cmd, arg); + if (error != RCM_SUCCESS) { + rval = error; + } + } + + return (rval); +} + +/* + * Node realted operations: + * + * rn_alloc, rn_free, rn_find_child, + * rn_get_child, rn_get_sibling, + * rsrc_node_find, rsrc_node_add_user, rsrc_node_remove_user, + */ + +/* Allocate node based on a logical or physical name */ +static rsrc_node_t * +rn_alloc(char *name, int type) +{ + rsrc_node_t *node; + + rcm_log_message(RCM_TRACE4, "rn_alloc(%s, %d)\n", name, type); + + node = s_calloc(1, sizeof (*node)); + node->name = s_strdup(name); + node->type = type; + + return (node); +} + +/* + * Free node along with its siblings and children + */ +static void +rn_free(rsrc_node_t *node) +{ + if (node == NULL) { + return; + } + + if (node->child) { + rn_free(node->child); + } + + if (node->sibling) { + rn_free(node->sibling); + } + + rsrc_clients_free(node->users); + free(node->name); + free(node); +} + +/* + * Find next sibling + */ +static rsrc_node_t * +rn_get_sibling(rsrc_node_t *node) +{ + return (node->sibling); +} + +/* + * Find first child + */ +static rsrc_node_t * +rn_get_child(rsrc_node_t *node) +{ + return (node->child); +} + +/* + * Find child named childname. Create it if flag is RSRC_NODE_CRTEATE + */ +static rsrc_node_t * +rn_find_child(rsrc_node_t *parent, char *childname, int flag, int type) +{ + rsrc_node_t *child = parent->child; + rsrc_node_t *new, *prev = NULL; + + rcm_log_message(RCM_TRACE4, + "rn_find_child(parent=%s, child=%s, 0x%x, %d)\n", + parent->name, childname, flag, type); + + /* + * Children are ordered based on strcmp. + */ + while (child && (strcmp(child->name, childname) < 0)) { + prev = child; + child = child->sibling; + } + + if (child && (strcmp(child->name, childname) == 0)) { + return (child); + } + + if (flag != RSRC_NODE_CREATE) + return (NULL); + + new = rn_alloc(childname, type); + new->parent = parent; + new->sibling = child; + + /* + * Set this linkage last so we don't break ongoing operations. + * + * N.B. Assume setting a pointer is an atomic operation. + */ + if (prev == NULL) { + parent->child = new; + } else { + prev->sibling = new; + } + + return (new); +} + +/* + * Pathname related help functions + */ +static void +pn_preprocess(char *pathname, int type) +{ + char *tmp; + + if (type != RSRC_TYPE_DEVICE) + return; + + /* + * For devices, convert ':' to '/' (treat minor nodes and children) + */ + tmp = strchr(pathname, ':'); + if (tmp == NULL) + return; + + *tmp = '/'; +} + +static char * +pn_getnextcomp(char *pathname, char **lasts) +{ + char *slash; + + if (pathname == NULL) + return (NULL); + + /* skip slashes' */ + while (*pathname == '/') + ++pathname; + + if (*pathname == '\0') + return (NULL); + + slash = strchr(pathname, '/'); + if (slash != NULL) { + *slash = '\0'; + *lasts = slash + 1; + } else { + *lasts = NULL; + } + + return (pathname); +} + +/* + * Find a node in tree based on device, which is the physical pathname + * of the form /sbus@.../esp@.../sd@... + */ +int +rsrc_node_find(char *rsrcname, int flag, rsrc_node_t **nodep) +{ + char *pathname, *nodename, *lasts; + rsrc_node_t *node; + int type; + + rcm_log_message(RCM_TRACE4, "rn_node_find(%s, 0x%x)\n", rsrcname, flag); + + /* + * For RSRC_TYPE_ABSTRACT, look under /ABSTRACT. For other types, + * look under /SYSTEM. + */ + pathname = resolve_name(rsrcname); + if (pathname == NULL) + return (EINVAL); + + type = rsrc_get_type(pathname); + switch (type) { + case RSRC_TYPE_DEVICE: + case RSRC_TYPE_NORMAL: + node = rn_find_child(rsrc_root, "SYSTEM", RSRC_NODE_CREATE, + RSRC_TYPE_NORMAL); + break; + + case RSRC_TYPE_ABSTRACT: + node = rn_find_child(rsrc_root, "ABSTRACT", RSRC_NODE_CREATE, + RSRC_TYPE_NORMAL); + break; + + default: + /* just to make sure */ + free(pathname); + return (EINVAL); + } + + /* + * Find position of device within tree. Upon exiting the loop, device + * should be placed between prev and curr. + */ + pn_preprocess(pathname, type); + lasts = pathname; + while ((nodename = pn_getnextcomp(lasts, &lasts)) != NULL) { + rsrc_node_t *parent = node; + node = rn_find_child(parent, nodename, flag, type); + if (node == NULL) { + assert((flag & RSRC_NODE_CREATE) == 0); + free(pathname); + *nodep = NULL; + return (RCM_SUCCESS); + } + } + free(pathname); + *nodep = node; + return (RCM_SUCCESS); +} + +/* + * add a usage client to a node + */ +/*ARGSUSED*/ +int +rsrc_node_add_user(rsrc_node_t *node, char *alias, char *modname, pid_t pid, + uint_t flag) +{ + client_t *user; + + rcm_log_message(RCM_TRACE3, + "rsrc_node_add_user(%s, %s, %s, %ld, 0x%x)\n", + node->name, alias, modname, pid, flag); + + user = rsrc_client_find(modname, pid, &node->users); + + /* + * If a client_t already exists, add the registration and return + * success if it's a valid registration request. + * + * Return EALREADY if the resource is already registered. + * This means either the client_t already has the requested + * registration flagged, or that a DR registration was attempted + * on a resource already in use in the DR operations state model. + */ + if (user != NULL) { + + if (user->flag & (flag & RCM_REGISTER_MASK)) { + return (EALREADY); + } + + if ((flag & RCM_REGISTER_DR) && + (user->state != RCM_STATE_REMOVE)) { + return (EALREADY); + } + + user->flag |= (flag & RCM_REGISTER_MASK); + if ((flag & RCM_REGISTER_DR) || + (user->state == RCM_STATE_REMOVE)) { + user->state = RCM_STATE_ONLINE; + } + + return (RCM_SUCCESS); + } + + /* + * Otherwise create a new client_t and create a new registration. + */ + if ((user = rsrc_client_alloc(alias, modname, pid, flag)) != NULL) { + rsrc_client_add(user, &node->users); + } + if (flag & RCM_FILESYS) + node->type = RSRC_TYPE_FILESYS; + + return (RCM_SUCCESS); +} + +/* + * remove a usage client of a node + */ +int +rsrc_node_remove_user(rsrc_node_t *node, char *modname, pid_t pid, uint_t flag) +{ + client_t *user; + + rcm_log_message(RCM_TRACE3, + "rsrc_node_remove_user(%s, %s, %ld, 0x%x)\n", node->name, modname, + pid, flag); + + user = rsrc_client_find(modname, pid, &node->users); + if ((user == NULL) || (user->state == RCM_STATE_REMOVE)) { + rcm_log_message(RCM_NOTICE, gettext( + "client not registered: module=%s, pid=%d, dev=%s\n"), + modname, pid, node->name); + return (ENOENT); + } + + /* Strip off the registration being removed (DR, event, capacity) */ + user->flag = user->flag & (~(flag & RCM_REGISTER_MASK)); + + /* + * Mark the client as removed if all registrations have been removed + */ + if ((user->flag & RCM_REGISTER_MASK) == 0) + user->state = RCM_STATE_REMOVE; + + return (RCM_SUCCESS); +} + +/* + * Tree walking function - rsrc_walk + */ + +#define MAX_TREE_DEPTH 32 + +#define RN_WALK_CONTINUE 0 +#define RN_WALK_PRUNESIB 1 +#define RN_WALK_PRUNECHILD 2 +#define RN_WALK_TERMINATE 3 + +#define EMPTY_STACK(sp) ((sp)->depth == 0) +#define TOP_NODE(sp) ((sp)->node[(sp)->depth - 1]) +#define PRUNE_SIB(sp) ((sp)->prunesib[(sp)->depth - 1]) +#define PRUNE_CHILD(sp) ((sp)->prunechild[(sp)->depth - 1]) +#define POP_STACK(sp) ((sp)->depth)-- +#define PUSH_STACK(sp, rn) \ + (sp)->node[(sp)->depth] = (rn); \ + (sp)->prunesib[(sp)->depth] = 0; \ + (sp)->prunechild[(sp)->depth] = 0; \ + ((sp)->depth)++ + +struct rn_stack { + rsrc_node_t *node[MAX_TREE_DEPTH]; + char prunesib[MAX_TREE_DEPTH]; + char prunechild[MAX_TREE_DEPTH]; + int depth; +}; + +/* walking one node and update node stack */ +/*ARGSUSED*/ +static void +walk_one_node(struct rn_stack *sp, void *arg, + int (*node_callback)(rsrc_node_t *, void *)) +{ + int prunesib; + rsrc_node_t *child, *sibling; + rsrc_node_t *node = TOP_NODE(sp); + + rcm_log_message(RCM_TRACE4, "walk_one_node(%s)\n", node->name); + + switch (node_callback(node, arg)) { + case RN_WALK_TERMINATE: + POP_STACK(sp); + while (!EMPTY_STACK(sp)) { + node = TOP_NODE(sp); + POP_STACK(sp); + } + return; + + case RN_WALK_PRUNESIB: + PRUNE_SIB(sp) = 1; + break; + + case RN_WALK_PRUNECHILD: + PRUNE_CHILD(sp) = 1; + break; + + case RN_WALK_CONTINUE: + default: + break; + } + + /* + * Push child on the stack + */ + if (!PRUNE_CHILD(sp) && (child = rn_get_child(node)) != NULL) { + PUSH_STACK(sp, child); + return; + } + + /* + * Pop the stack till a node's sibling can be pushed + */ + prunesib = PRUNE_SIB(sp); + POP_STACK(sp); + while (!EMPTY_STACK(sp) && + (prunesib || (sibling = rn_get_sibling(node)) == NULL)) { + node = TOP_NODE(sp); + prunesib = PRUNE_SIB(sp); + POP_STACK(sp); + } + + if (EMPTY_STACK(sp)) { + return; + } + + /* + * push sibling onto the stack + */ + PUSH_STACK(sp, sibling); +} + +/* + * walk tree rooted at root in child-first order + */ +static void +rsrc_walk(rsrc_node_t *root, void *arg, + int (*node_callback)(rsrc_node_t *, void *)) +{ + struct rn_stack stack; + + rcm_log_message(RCM_TRACE3, "rsrc_walk(%s)\n", root->name); + + /* + * Push root on stack and walk in child-first order + */ + stack.depth = 0; + PUSH_STACK(&stack, root); + PRUNE_SIB(&stack) = 1; + + while (!EMPTY_STACK(&stack)) { + walk_one_node(&stack, arg, node_callback); + } +} + +/* + * Callback for a command action on a node + */ +static int +node_action(rsrc_node_t *node, void *arg) +{ + tree_walk_arg_t *targ = (tree_walk_arg_t *)arg; + uint_t flag = targ->flag; + + rcm_log_message(RCM_TRACE4, "node_action(%s)\n", node->name); + + /* + * If flag indicates operation on a filesystem, we don't callback on + * the filesystem root to avoid infinite recursion on filesystem module. + * + * N.B. Such request should only come from filesystem RCM module. + */ + if (flag & RCM_FILESYS) { + assert(node->type == RSRC_TYPE_FILESYS); + targ->flag &= ~RCM_FILESYS; + return (RN_WALK_CONTINUE); + } + + /* + * Execute state change callback + */ + (void) rsrc_client_action_list(node->users, targ->cmd, arg); + + /* + * Upon hitting a filesys root, prune children. + * The filesys module should have taken care of + * children by now. + */ + if (node->type == RSRC_TYPE_FILESYS) + return (RN_WALK_PRUNECHILD); + + return (RN_WALK_CONTINUE); +} + +/* + * Execute a command on a subtree under root. + */ +int +rsrc_tree_action(rsrc_node_t *root, int cmd, tree_walk_arg_t *arg) +{ + rcm_log_message(RCM_TRACE2, "tree_action(%s, %d)\n", root->name, cmd); + + arg->cmd = cmd; + arg->retcode = RCM_SUCCESS; + rsrc_walk(root, (void *)arg, node_action); + + return (arg->retcode); +} + +/* + * Get info on current regsitrations + */ +int +rsrc_usage_info(char **rsrcnames, uint_t flag, int seq_num, rcm_info_t **info) +{ + rsrc_node_t *node; + rcm_info_t *result = NULL; + tree_walk_arg_t arg; + int initial_req; + int rv; + int i; + + arg.flag = flag; + arg.info = &result; + arg.seq_num = seq_num; + + for (i = 0; rsrcnames[i] != NULL; i++) { + + rcm_log_message(RCM_TRACE2, "rsrc_usage_info(%s, 0x%x, %d)\n", + rsrcnames[i], flag, seq_num); + + if (flag & RCM_INCLUDE_DEPENDENT) { + initial_req = ((seq_num & SEQ_NUM_MASK) == 0); + + /* + * if redundant request, skip the operation + */ + if (info_req_add(rsrcnames[i], flag, seq_num) != 0) { + continue; + } + } + + rv = rsrc_node_find(rsrcnames[i], 0, &node); + if ((rv != RCM_SUCCESS) || (node == NULL)) { + if ((flag & RCM_INCLUDE_DEPENDENT) && initial_req) + info_req_remove(seq_num); + continue; + } + + /* + * Based on RCM_INCLUDE_SUBTREE flag, query either the subtree + * or just the node. + */ + if (flag & RCM_INCLUDE_SUBTREE) { + (void) rsrc_tree_action(node, CMD_GETINFO, &arg); + } else { + arg.cmd = CMD_GETINFO; + (void) node_action(node, (void *)&arg); + } + + if ((flag & RCM_INCLUDE_DEPENDENT) && initial_req) + info_req_remove(seq_num); + } + +out: + (void) rcm_append_info(info, result); + return (rv); +} + +/* + * Get the list of currently loaded module + */ +rcm_info_t * +rsrc_mod_info() +{ + module_t *mod; + rcm_info_t *info = NULL; + + (void) mutex_lock(&mod_lock); + mod = module_head; + while (mod) { + char *modinfo = s_strdup(module_info(mod)); + add_busy_rsrc_to_list("dummy", 0, 0, 0, mod->name, + modinfo, NULL, NULL, &info); + mod = mod->next; + } + (void) mutex_unlock(&mod_lock); + + return (info); +} + +/* + * Initialize resource map - load all modules + */ +void +rcmd_db_init() +{ + char *tmp; + DIR *mod_dir; + struct dirent *retp, *entp; + int i; + char *dir_name; + int rcm_script; + +#ifdef lint +extern int readdir_r(DIR *, struct dirent *, struct dirent **); +#endif + + rcm_log_message(RCM_DEBUG, "rcmd_db_init(): initialize database\n"); + + if (script_main_init() == -1) + rcmd_exit(errno); + + rsrc_root = rn_alloc("/", RSRC_TYPE_NORMAL); + + entp = s_malloc(PATH_MAX + 1 + sizeof (struct dirent)); + + for (i = 0; (dir_name = rcm_dir(i, &rcm_script)) != NULL; i++) { + + if ((mod_dir = opendir(dir_name)) == NULL) { + continue; /* try next directory */ + } + + rcm_log_message(RCM_TRACE2, "search directory %s\n", dir_name); + + while (readdir_r(mod_dir, entp, &retp) == 0) { + module_t *module; + + if (retp == NULL) { + break; + } + + if (strcmp(entp->d_name, ".") == 0 || + strcmp(entp->d_name, "..") == 0) + continue; + + if (rcm_script == 0) { + /* rcm module */ + if (((tmp = strstr(entp->d_name, + RCM_MODULE_SUFFIX)) == NULL) || + (tmp[strlen(RCM_MODULE_SUFFIX)] != '\0')) { + continue; + } + } + + module = cli_module_hold(entp->d_name); + if (module == NULL) { + if (rcm_script == 0) + rcm_log_message(RCM_ERROR, + gettext("%s: failed to load\n"), + entp->d_name); + continue; + } + + if (module->ref_count == MOD_REFCNT_INIT) { + /* + * ask module to register for resource 1st time + */ + module_attach(module); + } + cli_module_rele(module); + } + (void) closedir(mod_dir); + } + + free(entp); + rcmd_db_print(); +} + +/* + * sync resource map - ask all modules to register again + */ +void +rcmd_db_sync() +{ + static time_t sync_time = (time_t)-1; + const time_t interval = 5; /* resync at most every 5 sec */ + + module_t *mod; + time_t curr = time(NULL); + + if ((sync_time != (time_t)-1) && (curr - sync_time < interval)) + return; + + sync_time = curr; + (void) mutex_lock(&mod_lock); + mod = module_head; + while (mod) { + /* + * Hold module by incrementing ref count and release + * mod_lock to avoid deadlock, since rcmop_register() + * may callback into the daemon and request mod_lock. + */ + mod->ref_count++; + (void) mutex_unlock(&mod_lock); + + mod->modops->rcmop_register(mod->rcmhandle); + + (void) mutex_lock(&mod_lock); + mod->ref_count--; + mod = mod->next; + } + (void) mutex_unlock(&mod_lock); +} + +/* + * Determine if a process is alive + */ +int +proc_exist(pid_t pid) +{ + char path[64]; + const char *procfs = "/proc"; + struct stat sb; + + if (pid == (pid_t)0) { + return (1); + } + + (void) snprintf(path, sizeof (path), "%s/%ld", procfs, pid); + return (stat(path, &sb) == 0); +} + +/* + * Cleaup client list + * + * N.B. This routine runs in a single-threaded environment only. It is only + * called by the cleanup thread, which never runs in parallel with other + * threads. + */ +static void +clean_client_list(client_t **listp) +{ + client_t *client = *listp; + + /* + * Cleanup notification clients for which pid no longer exists + */ + while (client) { + if ((client->state != RCM_STATE_REMOVE) && + proc_exist(client->pid)) { + listp = &client->next; + client = *listp; + continue; + } + + /* + * Destroy this client_t. rsrc_client_remove updates + * listp to point to the next client. + */ + rsrc_client_remove(client, listp); + client = *listp; + } +} + +/*ARGSUSED*/ +static int +clean_node(rsrc_node_t *node, void *arg) +{ + rcm_log_message(RCM_TRACE4, "clean_node(%s)\n", node->name); + + clean_client_list(&node->users); + + return (RN_WALK_CONTINUE); +} + +static void +clean_rsrc_tree() +{ + rcm_log_message(RCM_TRACE4, + "clean_rsrc_tree(): delete stale dr clients\n"); + + rsrc_walk(rsrc_root, NULL, clean_node); +} + +static void +db_clean() +{ + extern barrier_t barrier; + extern void clean_dr_list(); + + for (;;) { + (void) mutex_lock(&rcm_req_lock); + start_polling_thread(); + (void) mutex_unlock(&rcm_req_lock); + + (void) mutex_lock(&barrier.lock); + while (need_cleanup == 0) + (void) cond_wait(&barrier.cv, &barrier.lock); + (void) mutex_unlock(&barrier.lock); + + /* + * Make sure all other threads are either blocked or exited. + */ + rcmd_set_state(RCMD_CLEANUP); + + need_cleanup = 0; + + /* + * clean dr_req_list + */ + clean_dr_list(); + + /* + * clean resource tree + */ + clean_rsrc_tree(); + + rcmd_set_state(RCMD_NORMAL); + } +} + +void +rcmd_db_clean() +{ + rcm_log_message(RCM_DEBUG, + "rcm_db_clean(): launch thread to clean database\n"); + + if (thr_create(NULL, NULL, (void *(*)(void *))db_clean, + NULL, THR_DETACHED, NULL) != 0) { + rcm_log_message(RCM_WARNING, + gettext("failed to create cleanup thread %s\n"), + strerror(errno)); + } +} + +/*ARGSUSED*/ +static int +print_node(rsrc_node_t *node, void *arg) +{ + client_t *user; + + rcm_log_message(RCM_DEBUG, "rscname: %s, state = 0x%x\n", node->name); + rcm_log_message(RCM_DEBUG, " users:\n"); + + if ((user = node->users) == NULL) { + rcm_log_message(RCM_DEBUG, " none\n"); + return (RN_WALK_CONTINUE); + } + + while (user) { + rcm_log_message(RCM_DEBUG, " %s, %d, %s\n", + user->module->name, user->pid, user->alias); + user = user->next; + } + return (RN_WALK_CONTINUE); +} + +static void +rcmd_db_print() +{ + module_t *mod; + + rcm_log_message(RCM_DEBUG, "modules:\n"); + (void) mutex_lock(&mod_lock); + mod = module_head; + while (mod) { + rcm_log_message(RCM_DEBUG, " %s\n", mod->name); + mod = mod->next; + } + (void) mutex_unlock(&mod_lock); + + rcm_log_message(RCM_DEBUG, "\nresource tree:\n"); + + rsrc_walk(rsrc_root, NULL, print_node); + + rcm_log_message(RCM_DEBUG, "\n"); +} + +/* + * Allocate handle from calling into each RCM module + */ +static rcm_handle_t * +rcm_handle_alloc(module_t *module) +{ + rcm_handle_t *hdl; + + hdl = s_malloc(sizeof (rcm_handle_t)); + + hdl->modname = module->name; + hdl->pid = 0; + hdl->lrcm_ops = &rcm_ops; /* for callback into daemon directly */ + hdl->module = module; + + return (hdl); +} + +/* + * Free rcm_handle_t + */ +static void +rcm_handle_free(rcm_handle_t *handle) +{ + free(handle); +} + +/* + * help function that exit on memory outage + */ +void * +s_malloc(size_t size) +{ + void *buf = malloc(size); + + if (buf == NULL) { + rcmd_exit(ENOMEM); + } + return (buf); +} + +void * +s_calloc(int n, size_t size) +{ + void *buf = calloc(n, size); + + if (buf == NULL) { + rcmd_exit(ENOMEM); + } + return (buf); +} + +void * +s_realloc(void *ptr, size_t size) +{ + void *new = realloc(ptr, size); + + if (new == NULL) { + rcmd_exit(ENOMEM); + } + return (new); +} + +char * +s_strdup(const char *str) +{ + char *buf = strdup(str); + + if (buf == NULL) { + rcmd_exit(ENOMEM); + } + return (buf); +} + +/* + * Convert a version 1 ops vector to current ops vector + * Fields missing in version 1 are set to NULL. + */ +static struct rcm_mod_ops * +modops_from_v1(void *ops_v1) +{ + struct rcm_mod_ops *ops; + + ops = s_calloc(1, sizeof (struct rcm_mod_ops)); + bcopy(ops_v1, ops, sizeof (struct rcm_mod_ops_v1)); + return (ops); +} + +/* call a module's getinfo routine; detects v1 ops and adjusts the call */ +static int +call_getinfo(struct rcm_mod_ops *ops, rcm_handle_t *hdl, char *alias, id_t pid, + uint_t flag, char **info, char **error, nvlist_t *client_props, + rcm_info_t **infop) +{ + int rval; + struct rcm_mod_ops_v1 *v1_ops; + + if (ops->version == RCM_MOD_OPS_V1) { + v1_ops = (struct rcm_mod_ops_v1 *)ops; + rval = v1_ops->rcmop_get_info(hdl, alias, pid, flag, info, + infop); + if (rval != RCM_SUCCESS && *info != NULL) + *error = strdup(*info); + return (rval); + } else { + return (ops->rcmop_get_info(hdl, alias, pid, flag, info, error, + client_props, infop)); + } +} + +void +rcm_init_queue(rcm_queue_t *head) +{ + head->next = head->prev = head; +} + +void +rcm_enqueue_head(rcm_queue_t *head, rcm_queue_t *element) +{ + rcm_enqueue(head, element); +} + +void +rcm_enqueue_tail(rcm_queue_t *head, rcm_queue_t *element) +{ + rcm_enqueue(head->prev, element); +} + +void +rcm_enqueue(rcm_queue_t *list_element, rcm_queue_t *element) +{ + element->next = list_element->next; + element->prev = list_element; + element->next->prev = element; + list_element->next = element; +} + +rcm_queue_t * +rcm_dequeue_head(rcm_queue_t *head) +{ + rcm_queue_t *element = head->next; + rcm_dequeue(element); + return (element); +} + +rcm_queue_t * +rcm_dequeue_tail(rcm_queue_t *head) +{ + rcm_queue_t *element = head->prev; + rcm_dequeue(element); + return (element); +} + +void +rcm_dequeue(rcm_queue_t *element) +{ + element->prev->next = element->next; + element->next->prev = element->prev; + element->next = element->prev = NULL; +} diff --git a/usr/src/cmd/rcm_daemon/common/svm_rcm.c b/usr/src/cmd/rcm_daemon/common/svm_rcm.c new file mode 100644 index 0000000000..c8c80a8f78 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/svm_rcm.c @@ -0,0 +1,2945 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <errno.h> +#include <meta.h> +#include <sys/lvm/mdio.h> +#include <sys/lvm/md_sp.h> +#include <sdssc.h> + +#include "rcm_module.h" + +/* + * This module is the RCM Module for SVM. The policy adopted by this module + * is to block offline requests for any SVM resource that is in use. A + * resource is considered to be in use if it contains a metadb or if it is + * a non-errored component of a metadevice that is open. + * + * The module uses the library libmeta to access the current state of the + * metadevices. On entry, and when svm_register() is called, the module + * builds a cache of all of the SVM resources and their dependencies. Each + * metadevice has an entry of type deventry_t which is accessed by a hash + * function. When the cache is built each SVM resource is registered with + * the RCM framework. The check_device code path uses meta_invalidate_name to + * ensure that the caching in libmeta will not conflict with the cache + * we build within this code. + * + * When an RCM operation occurs that affects a registered SVM resource, the RCM + * framework will call the appropriate routine in this module. The cache + * entry will be found and if the resource has dependants, a callback will + * be made into the RCM framework to pass the request on to the dependants, + * which may themselves by SVM resources. + * + * Locking: + * The cache is protected by a mutex + */ + +/* + * Private constants + */ + +/* + * Generic Messages + */ +#define MSG_UNRECOGNIZED gettext("SVM: \"%s\" is not a SVM resource") +#define MSG_NODEPS gettext("SVM: can't find dependents") +#define MSG_OPENERR gettext("SVM: can't open \"%s\"") +#define MSG_CACHEFAIL gettext("SVM: can't malloc cache") + +#define ERR_UNRECOGNIZED gettext("unrecognized SVM resource") +#define ERR_NODEPS gettext("can't find SVM resource dependents") + +/* + * Macros to produce a quoted string containing the value of a preprocessor + * macro. For example, if SIZE is defined to be 256, VAL2STR(SIZE) is "256". + * This is used to construct format strings for scanf-family functions below. + */ +#define QUOTE(x) #x +#define VAL2STR(x) QUOTE(x) + +typedef enum { + SVM_SLICE = 0, + SVM_STRIPE, + SVM_CONCAT, + SVM_MIRROR, + SVM_RAID, + SVM_TRANS, + SVM_SOFTPART, + SVM_HS +} svm_type_t; + +/* Hash table parameters */ +#define HASH_DEFAULT 251 + +/* Hot spare pool users */ +typedef struct hspuser { + struct hspuser *next; /* next user */ + char *hspusername; /* name */ + dev_t hspuserkey; /* key */ +} hspuser_t; + +/* Hot spare pool entry */ +typedef struct hspentry { + struct hspentry *link; /* link through all hsp entries */ + struct hspentry *next; /* next hsp entry for a slice */ + char *hspname; /* name */ + hspuser_t *hspuser; /* first hsp user */ +} hspentry_t; + +/* Hash table entry */ +typedef struct deventry { + struct deventry *next; /* next entry with same hash */ + svm_type_t devtype; /* device type */ + dev_t devkey; /* key */ + char *devname; /* name */ + struct deventry *dependent; /* 1st dependent */ + struct deventry *next_dep; /* next dependent */ + struct deventry *antecedent; /* antecedent */ + hspentry_t *hsp_list; /* list of hot spare pools */ + int flags; /* flags */ +} deventry_t; + +/* flag values */ +#define REMOVED 0x1 +#define IN_HSP 0x2 +#define TRANS_LOG 0x4 +#define CONT_SOFTPART 0x8 +#define CONT_METADB 0x10 + +/* + * Device redundancy flags. If the device can be removed from the + * metadevice configuration then it is considered a redundant device, + * otherwise not. + */ +#define NOTINDEVICE -1 +#define NOTREDUNDANT 0 +#define REDUNDANT 1 + +/* Cache */ +typedef struct cache { + deventry_t **hashline; /* hash table */ + int32_t size; /* sizer of hash table */ + uint32_t registered; /* cache regsitered */ +} cache_t; + +/* + * Forward declarations of private functions + */ + +static int svm_register(rcm_handle_t *hd); +static int svm_unregister(rcm_handle_t *hd); +static deventry_t *cache_dependent(cache_t *cache, char *devname, int devflags, + deventry_t *dependents); +static deventry_t *cache_device(cache_t *cache, char *devname, + svm_type_t devtype, md_dev64_t devkey, int devflags); +static hspentry_t *find_hsp(char *hspname); +static hspuser_t *add_hsp_user(char *hspname, deventry_t *deventry); +static hspentry_t *add_hsp(char *hspname, deventry_t *deventry); +static void free_names(mdnamelist_t *nlp); +static int cache_all_devices(cache_t *cache); +static int cache_hsp(cache_t *cache, mdhspnamelist_t *nlp, md_hsp_t *hsp); +static int cache_trans(cache_t *cache, mdnamelist_t *nlp, md_trans_t *trans); +static int cache_mirror(cache_t *cache, mdnamelist_t *nlp, + md_mirror_t *mirror); +static int cache_raid(cache_t *cache, mdnamelist_t *nlp, md_raid_t *raid); +static int cache_stripe(cache_t *cache, mdnamelist_t *nlp, + md_stripe_t *stripe); +static int cache_sp(cache_t *cache, mdnamelist_t *nlp, md_sp_t *soft_part); +static int cache_all_devices_in_set(cache_t *cache, mdsetname_t *sp); +static cache_t *create_cache(); +static deventry_t *create_deventry(char *devname, svm_type_t devtype, + md_dev64_t devkey, int devflags); +static void cache_remove(cache_t *cache, deventry_t *deventry); +static deventry_t *cache_lookup(cache_t *cache, char *devname); +static void cache_sync(rcm_handle_t *hd, cache_t **cachep); +static char *cache_walk(cache_t *cache, uint32_t *i, deventry_t **hashline); +static void free_cache(cache_t **cache); +static void free_deventry(deventry_t **deventry); +static uint32_t hash(uint32_t h, char *s); +static void register_device(rcm_handle_t *hd, char *devname); +static int add_dep(int *ndeps, char ***depsp, deventry_t *deventry); +static int get_dependents(deventry_t *deventry, char *** dependentsp); +char *add_to_usage(char ** usagep, char *string); +char *add_to_usage_fmt(char **usagep, char *fmt, char *string); +static int is_open(dev_t devkey); +static int svm_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **errorp, rcm_info_t **infop); +static int svm_online(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **errorp, rcm_info_t **infop); +static int svm_get_info(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **usagep, char **errorp, nvlist_t *props, rcm_info_t **infop); +static int svm_suspend(rcm_handle_t *hd, char *rsrc, id_t id, + timespec_t *interval, uint_t flags, char **errorp, + rcm_info_t **infop); +static int svm_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **errorp, rcm_info_t **infop); +static int svm_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **errorp, rcm_info_t **infop); +static int check_device(deventry_t *deventry); +static int check_mirror(mdsetname_t *sp, mdname_t *np, md_error_t *ep); + +/* + * Module-Private data + */ +static struct rcm_mod_ops svm_ops = +{ + RCM_MOD_OPS_VERSION, + svm_register, + svm_unregister, + svm_get_info, + svm_suspend, + svm_resume, + svm_offline, + svm_online, + svm_remove, + NULL, + NULL, + NULL +}; + +static cache_t *svm_cache = NULL; +static mutex_t svm_cache_lock; +static hspentry_t *hsp_head = NULL; + +/* + * Module Interface Routines + */ + +/* + * rcm_mod_init() + * + * Create a cache, and return the ops structure. + * Input: None + * Return: rcm_mod_ops structure + */ +struct rcm_mod_ops * +rcm_mod_init() +{ + /* initialize the lock mutex */ + if (mutex_init(&svm_cache_lock, USYNC_THREAD, NULL)) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't init mutex")); + return (NULL); + } + + /* need to initialize the cluster library to avoid seg faults */ + if (sdssc_bind_library() == SDSSC_ERROR) { + rcm_log_message(RCM_ERROR, + gettext("SVM: Interface error with libsds_sc.so," + " aborting.")); + return (NULL); + } + + /* Create a cache */ + if ((svm_cache = create_cache()) == NULL) { + rcm_log_message(RCM_ERROR, + gettext("SVM: module can't function, aborting.")); + return (NULL); + } + + /* Return the ops vectors */ + return (&svm_ops); +} + +/* + * rcm_mod_info() + * + * Return a string describing this module. + * Input: None + * Return: String + * Locking: None + */ +const char * +rcm_mod_info() +{ + return (gettext("Solaris Volume Manager module %I%")); +} + +/* + * rcm_mod_fini() + * + * Destroy the cache and mutex + * Input: None + * Return: RCM_SUCCESS + * Locking: None + */ +int +rcm_mod_fini() +{ + (void) mutex_lock(&svm_cache_lock); + if (svm_cache) { + free_cache(&svm_cache); + } + (void) mutex_unlock(&svm_cache_lock); + (void) mutex_destroy(&svm_cache_lock); + return (RCM_SUCCESS); +} + +/* + * svm_register() + * + * Make sure the cache is properly sync'ed, and its registrations are in + * order. + * + * Input: + * rcm_handle_t *hd + * Return: + * RCM_SUCCESS + * Locking: the cache is locked throughout the execution of this routine + * because it reads and possibly modifies cache links continuously. + */ +static int +svm_register(rcm_handle_t *hd) +{ + uint32_t i = 0; + deventry_t *l = NULL; + char *devicename; + + + rcm_log_message(RCM_TRACE1, "SVM: register\n"); + /* Guard against bad arguments */ + assert(hd != NULL); + + /* Lock the cache */ + (void) mutex_lock(&svm_cache_lock); + + /* If the cache has already been registered, then just sync it. */ + if (svm_cache && svm_cache->registered) { + cache_sync(hd, &svm_cache); + (void) mutex_unlock(&svm_cache_lock); + return (RCM_SUCCESS); + } + + /* If not, register the whole cache and mark it as registered. */ + while ((devicename = cache_walk(svm_cache, &i, &l)) != NULL) { + register_device(hd, devicename); + } + svm_cache->registered = 1; + + /* Unlock the cache */ + (void) mutex_unlock(&svm_cache_lock); + + return (RCM_SUCCESS); +} + +/* + * svm_unregister() + * + * Manually walk through the cache, unregistering all the special files and + * mount points. + * + * Input: + * rcm_handle_t *hd + * Return: + * RCM_SUCCESS + * Locking: the cache is locked throughout the execution of this routine + * because it reads and modifies cache links continuously. + */ +static int +svm_unregister(rcm_handle_t *hd) +{ + deventry_t *l = NULL; + uint32_t i = 0; + char *devicename; + + rcm_log_message(RCM_TRACE1, "SVM: unregister\n"); + /* Guard against bad arguments */ + assert(hd != NULL); + + /* Walk the cache, unregistering everything */ + (void) mutex_lock(&svm_cache_lock); + if (svm_cache != NULL) { + while ((devicename = cache_walk(svm_cache, &i, &l)) != NULL) { + (void) rcm_unregister_interest(hd, devicename, 0); + } + svm_cache->registered = 0; + } + (void) mutex_unlock(&svm_cache_lock); + return (RCM_SUCCESS); +} + +/* + * svm_offline() + * + * Determine dependents of the resource being offlined, and offline + * them all. + * + * Input: + * rcm_handle_t *hd handle + * char* *rsrc resource name + * id_t id 0 + * char **errorp ptr to error message + * rcm_info_t **infop ptr to info string + * Output: + * char **errorp pass back error message + * Return: + * int RCM_SUCCESS or RCM_FAILURE + * Locking: the cache is locked for most of this routine, except while + * processing dependents. + */ +/*ARGSUSED*/ +static int +svm_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **errorp, rcm_info_t **infop) +{ + int rv = RCM_SUCCESS; + int ret; + char **dependents; + deventry_t *deventry; + hspentry_t *hspentry; + hspuser_t *hspuser; + + /* Guard against bad arguments */ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(errorp != NULL); + + /* Trace */ + rcm_log_message(RCM_TRACE1, "SVM: offline(%s), flags(%d)\n", + rsrc, flags); + + /* Lock the cache */ + (void) mutex_lock(&svm_cache_lock); + + /* Lookup the resource in the cache. */ + if ((deventry = cache_lookup(svm_cache, rsrc)) == NULL) { + rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED); + *errorp = strdup(ERR_UNRECOGNIZED); + (void) mutex_unlock(&svm_cache_lock); + rv = RCM_FAILURE; + rcm_log_message(RCM_TRACE1, "SVM: svm_offline(%s) exit %d\n", + rsrc, rv); + return (rv); + } + /* If it is a TRANS device, do not allow the offline */ + if (deventry->devtype == SVM_TRANS) { + rv = RCM_FAILURE; + (void) mutex_unlock(&svm_cache_lock); + goto exit; + } + + if (deventry->flags&IN_HSP) { + /* + * If this is in a hot spare pool, check to see + * if any of the hot spare pool users are open + */ + hspentry = deventry->hsp_list; + while (hspentry) { + hspuser = hspentry->hspuser; + while (hspuser) { + /* Check if open */ + if (is_open(hspuser->hspuserkey)) { + rv = RCM_FAILURE; + (void) mutex_unlock(&svm_cache_lock); + goto exit; + } + hspuser = hspuser->next; + } + hspentry = hspentry->next; + } + } + + /* Fail if the device contains a metadb replica */ + if (deventry->flags&CONT_METADB) { + /* + * The user should delete the replica before continuing, + * so force the error. + */ + rcm_log_message(RCM_TRACE1, "SVM: %s has a replica\n", + deventry->devname); + rv = RCM_FAILURE; + (void) mutex_unlock(&svm_cache_lock); + goto exit; + } + + /* Get dependents */ + if (get_dependents(deventry, &dependents) != 0) { + rcm_log_message(RCM_ERROR, MSG_NODEPS); + rv = RCM_FAILURE; + (void) mutex_unlock(&svm_cache_lock); + goto exit; + } + + if (dependents) { + /* Check if the device is broken (needs maintanence). */ + if (check_device(deventry) == REDUNDANT) { + /* + * The device is broken, the offline request should + * succeed, so ignore any of the dependents. + */ + rcm_log_message(RCM_TRACE1, + "SVM: ignoring dependents\n"); + (void) mutex_unlock(&svm_cache_lock); + free(dependents); + goto exit; + } + (void) mutex_unlock(&svm_cache_lock); + ret = rcm_request_offline_list(hd, dependents, flags, infop); + if (ret != RCM_SUCCESS) { + rv = ret; + } + free(dependents); + } else { + /* If no dependents, check if the metadevice is open */ + if ((deventry->devkey) && (is_open(deventry->devkey))) { + rv = RCM_FAILURE; + (void) mutex_unlock(&svm_cache_lock); + goto exit; + } + (void) mutex_unlock(&svm_cache_lock); + } +exit: + rcm_log_message(RCM_TRACE1, "SVM: svm_offline(%s) exit %d\n", rsrc, rv); + if (rv != RCM_SUCCESS) + *errorp = strdup(gettext("unable to offline")); + return (rv); +} + +/* + * svm_online() + * + * Just pass the online notification on to the dependents of this resource + * + * Input: + * rcm_handle_t *hd handle + * char* *rsrc resource name + * id_t id 0 + * char **errorp ptr to error message + * rcm_info_t **infop ptr to info string + * Output: + * char **errorp pass back error message + * Return: + * int RCM_SUCCESS or RCM_FAILURE + * Locking: the cache is locked for most of this routine, except while + * processing dependents. + */ +/*ARGSUSED*/ +static int +svm_online(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **errorp, + rcm_info_t **infop) +{ + int rv = RCM_SUCCESS; + char **dependents; + deventry_t *deventry; + + /* Guard against bad arguments */ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + + /* Trace */ + rcm_log_message(RCM_TRACE1, "SVM: online(%s)\n", rsrc); + + /* Lookup this resource in the cache (cache gets locked) */ + (void) mutex_lock(&svm_cache_lock); + deventry = cache_lookup(svm_cache, rsrc); + if (deventry == NULL) { + (void) mutex_unlock(&svm_cache_lock); + rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED, rsrc); + *errorp = strdup(ERR_UNRECOGNIZED); + return (RCM_FAILURE); + } + + /* Get dependents */ + if (get_dependents(deventry, &dependents) != 0) { + (void) mutex_unlock(&svm_cache_lock); + rcm_log_message(RCM_ERROR, MSG_NODEPS); + *errorp = strdup(ERR_NODEPS); + return (RCM_FAILURE); + } + (void) mutex_unlock(&svm_cache_lock); + + if (dependents) { + rv = rcm_notify_online_list(hd, dependents, flags, infop); + if (rv != RCM_SUCCESS) + *errorp = strdup(gettext("unable to online")); + free(dependents); + } + + return (rv); +} + +/* + * svm_get_info() + * + * Gather usage information for this resource. + * + * Input: + * rcm_handle_t *hd handle + * char* *rsrc resource name + * id_t id 0 + * char **errorp ptr to error message + * nvlist_t *props Not used + * rcm_info_t **infop ptr to info string + * Output: + * char **infop pass back info string + * Return: + * int RCM_SUCCESS or RCM_FAILURE + * Locking: the cache is locked throughout the whole function + */ +/*ARGSUSED*/ +static int +svm_get_info(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **usagep, + char **errorp, nvlist_t *props, rcm_info_t **infop) +{ + int rv = RCM_SUCCESS; + deventry_t *deventry; + deventry_t *dependent; + hspentry_t *hspentry; + char **dependents; + + /* Guard against bad arguments */ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(usagep != NULL); + assert(errorp != NULL); + + /* Trace */ + rcm_log_message(RCM_TRACE1, "SVM: get_info(%s)\n", rsrc); + + /* Lookup this resource in the cache (cache gets locked) */ + (void) mutex_lock(&svm_cache_lock); + deventry = cache_lookup(svm_cache, rsrc); + if (deventry == NULL) { + (void) mutex_unlock(&svm_cache_lock); + rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED, rsrc); + *errorp = strdup(ERR_UNRECOGNIZED); + return (RCM_FAILURE); + } + + *usagep = NULL; /* Initialise usage string */ + if (deventry->flags&CONT_METADB) { + *usagep = add_to_usage(usagep, gettext("contains metadb(s)")); + } + if (deventry->flags&CONT_SOFTPART) { + *usagep = add_to_usage(usagep, + gettext("contains soft partition(s)")); + } + if (deventry->devtype == SVM_SOFTPART) { + *usagep = add_to_usage_fmt(usagep, + gettext("soft partition based on \"%s\""), + deventry->antecedent->devname); + } + + if (deventry->flags&IN_HSP) { + int hspflag = 0; + hspentry = deventry->hsp_list; + while (hspentry) { + if (hspflag == 0) { + *usagep = add_to_usage(usagep, + gettext("member of hot spare pool")); + hspflag = 1; + } + *usagep = add_to_usage_fmt(usagep, "\"%s\"", + hspentry->hspname); + hspentry = hspentry->next; + } + } else { + dependent = deventry->dependent; + while (dependent) { + /* Resource has dependents */ + switch (dependent->devtype) { + case SVM_STRIPE: + *usagep = add_to_usage_fmt(usagep, + gettext("component of stripe \"%s\""), + dependent->devname); + break; + case SVM_CONCAT: + *usagep = add_to_usage_fmt(usagep, + gettext("component of concat \"%s\""), + dependent->devname); + break; + case SVM_MIRROR: + *usagep = add_to_usage_fmt(usagep, + gettext("submirror of \"%s\""), + dependent->devname); + break; + case SVM_RAID: + *usagep = add_to_usage_fmt(usagep, + gettext("component of RAID \"%s\""), + dependent->devname); + break; + case SVM_TRANS: + if (deventry->flags&TRANS_LOG) { + *usagep = add_to_usage_fmt(usagep, + gettext("trans log for \"%s\""), + dependent->devname); + } else { + *usagep = add_to_usage_fmt(usagep, + gettext("trans master for \"%s\""), + dependent->devname); + } + break; + case SVM_SOFTPART: + /* Contains soft parts, already processed */ + break; + default: + rcm_log_message(RCM_ERROR, + gettext("Unknown type %d\n"), + dependent->devtype); + } + dependent = dependent->next_dep; + } + } + + /* Get dependents and recurse if necessary */ + if (get_dependents(deventry, &dependents) != 0) { + (void) mutex_unlock(&svm_cache_lock); + rcm_log_message(RCM_ERROR, MSG_NODEPS); + *errorp = strdup(ERR_NODEPS); + return (RCM_FAILURE); + } + (void) mutex_unlock(&svm_cache_lock); + + if ((flags & RCM_INCLUDE_DEPENDENT) && (dependents != NULL)) { + rv = rcm_get_info_list(hd, dependents, flags, infop); + if (rv != RCM_SUCCESS) + *errorp = strdup(gettext("unable to get info")); + } + free(dependents); + + if (*usagep != NULL) + rcm_log_message(RCM_TRACE1, "SVM: usage = %s\n", *usagep); + return (rv); +} + +/* + * svm_suspend() + * + * Notify all dependents that the resource is being suspended. + * Since no real operation is involved, QUERY or not doesn't matter. + * + * Input: + * rcm_handle_t *hd handle + * char* *rsrc resource name + * id_t id 0 + * char **errorp ptr to error message + * rcm_info_t **infop ptr to info string + * Output: + * char **errorp pass back error message + * Return: + * int RCM_SUCCESS or RCM_FAILURE + * Locking: the cache is locked for most of this routine, except while + * processing dependents. + */ +static int +svm_suspend(rcm_handle_t *hd, char *rsrc, id_t id, timespec_t *interval, + uint_t flags, char **errorp, rcm_info_t **infop) +{ + int rv = RCM_SUCCESS; + deventry_t *deventry; + char **dependents; + + /* Guard against bad arguments */ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(interval != NULL); + assert(errorp != NULL); + + /* Trace */ + rcm_log_message(RCM_TRACE1, "SVM: suspend(%s)\n", rsrc); + + /* Lock the cache and extract information about this resource. */ + (void) mutex_lock(&svm_cache_lock); + if ((deventry = cache_lookup(svm_cache, rsrc)) == NULL) { + (void) mutex_unlock(&svm_cache_lock); + rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED, rsrc); + *errorp = strdup(ERR_UNRECOGNIZED); + return (RCM_SUCCESS); + } + + /* Get dependents */ + if (get_dependents(deventry, &dependents) != 0) { + (void) mutex_unlock(&svm_cache_lock); + rcm_log_message(RCM_ERROR, MSG_NODEPS); + *errorp = strdup(ERR_NODEPS); + return (RCM_FAILURE); + } + (void) mutex_unlock(&svm_cache_lock); + + if (dependents) { + rv = rcm_request_suspend_list(hd, dependents, flags, + interval, infop); + if (rv != RCM_SUCCESS) + *errorp = strdup(gettext("unable to suspend")); + free(dependents); + } + + return (rv); +} + +/* + * svm_resume() + * + * Notify all dependents that the resource is being resumed. + * + * Input: + * rcm_handle_t *hd handle + * char* *rsrc resource name + * id_t id 0 + * char **errorp ptr to error message + * rcm_info_t **infop ptr to info string + * Output: + * char **errorp pass back error message + * Return: + * int RCM_SUCCESS or RCM_FAILURE + * Locking: the cache is locked for most of this routine, except while + * processing dependents. + * + */ +static int +svm_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **errorp, + rcm_info_t **infop) +{ + int rv = RCM_SUCCESS; + deventry_t *deventry; + char **dependents; + + /* Guard against bad arguments */ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + assert(errorp != NULL); + + /* Trace */ + rcm_log_message(RCM_TRACE1, "SVM: resume(%s)\n", rsrc); + + /* + * Lock the cache just long enough to extract information about this + * resource. + */ + (void) mutex_lock(&svm_cache_lock); + if ((deventry = cache_lookup(svm_cache, rsrc)) == NULL) { + (void) mutex_unlock(&svm_cache_lock); + rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED, rsrc); + *errorp = strdup(ERR_UNRECOGNIZED); + return (RCM_SUCCESS); + } + + /* Get dependents */ + + if (get_dependents(deventry, &dependents) != 0) { + (void) mutex_unlock(&svm_cache_lock); + rcm_log_message(RCM_ERROR, MSG_NODEPS); + *errorp = strdup(ERR_NODEPS); + return (RCM_FAILURE); + } + + (void) mutex_unlock(&svm_cache_lock); + if (dependents) { + rv = rcm_notify_resume_list(hd, dependents, flags, infop); + if (rv != RCM_SUCCESS) + *errorp = strdup(gettext("unable to resume")); + free(dependents); + } + + return (rv); +} + + +/* + * svm_remove() + * + * Remove the resource from the cache and notify all dependents that + * the resource has been removed. + * + * Input: + * rcm_handle_t *hd handle + * char* *rsrc resource name + * id_t id 0 + * char **errorp ptr to error message + * rcm_info_t **infop ptr to info string + * Output: + * char **errorp pass back error message + * Return: + * int RCM_SUCCESS or RCM_FAILURE + * Locking: the cache is locked for most of this routine, except while + * processing dependents. + */ +static int +svm_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **errorp, + rcm_info_t **infop) +{ + int rv = RCM_SUCCESS; + char **dependents; + deventry_t *deventry; + + /* Guard against bad arguments */ + assert(hd != NULL); + assert(rsrc != NULL); + assert(id == (id_t)0); + + /* Trace */ + rcm_log_message(RCM_TRACE1, "SVM: svm_remove(%s)\n", rsrc); + + /* Lock the cache while removing resource */ + (void) mutex_lock(&svm_cache_lock); + if ((deventry = cache_lookup(svm_cache, rsrc)) == NULL) { + (void) mutex_unlock(&svm_cache_lock); + return (RCM_SUCCESS); + } + + /* Get dependents */ + if (get_dependents(deventry, &dependents) != 0) { + (void) mutex_unlock(&svm_cache_lock); + rcm_log_message(RCM_ERROR, MSG_NODEPS); + deventry->flags |= REMOVED; + *errorp = strdup(ERR_NODEPS); + return (RCM_FAILURE); + } + + if (dependents) { + (void) mutex_unlock(&svm_cache_lock); + rv = rcm_notify_remove_list(hd, dependents, flags, infop); + (void) mutex_lock(&svm_cache_lock); + if (rv != RCM_SUCCESS) + *errorp = strdup(gettext("unable to remove")); + free(dependents); + } + + /* Mark entry as removed */ + deventry->flags |= REMOVED; + + (void) mutex_unlock(&svm_cache_lock); + rcm_log_message(RCM_TRACE1, "SVM: exit svm_remove(%s)\n", rsrc); + /* Clean up and return success */ + return (RCM_SUCCESS); +} + +/* + * Definitions of private functions + * + */ + +/* + * find_hsp() + * + * Find the hot spare entry from the linked list of all hotspare pools + * + * Input: + * char *hspname name of hot spare pool + * Return: + * hspentry_t hot spare entry + */ +static hspentry_t * +find_hsp(char *hspname) +{ + hspentry_t *hspentry = hsp_head; + + while (hspentry) { + if (strcmp(hspname, hspentry->hspname) == 0) + return (hspentry); + hspentry = hspentry->link; + } + return (NULL); +} + +/* + * add_hsp_user() + * + * Add a hot spare pool user to the list for the hsp specfied by + * hspname. The memory allocated here will be freed by free_cache() + * + * Input: + * char *hspname hot spare pool name + * deventry_t *deventry specified hsp user + * Return: + * hspuser_t entry in hsp user list + */ +static hspuser_t * +add_hsp_user(char *hspname, deventry_t *deventry) +{ + hspuser_t *newhspuser; + char *newhspusername; + hspuser_t *previous; + hspentry_t *hspentry; + + hspentry = find_hsp(hspname); + if (hspentry == NULL) + return (NULL); + rcm_log_message(RCM_TRACE1, "SVM: Enter add_hsp_user %s, %x, %x\n", + hspname, hspentry, hspentry->hspuser); + + newhspuser = (hspuser_t *)malloc(sizeof (*newhspuser)); + if (newhspuser == NULL) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't malloc hspuser")); + return (NULL); + } + (void) memset((char *)newhspuser, 0, sizeof (*newhspuser)); + + newhspusername = strdup(deventry->devname); + if (newhspusername == NULL) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't malloc hspusername")); + free(newhspuser); + return (NULL); + } + newhspuser->hspusername = newhspusername; + newhspuser->hspuserkey = deventry->devkey; + + if ((previous = hspentry->hspuser) == NULL) { + hspentry->hspuser = newhspuser; + } else { + hspuser_t *temp = previous->next; + previous->next = newhspuser; + newhspuser->next = temp; + } + rcm_log_message(RCM_TRACE1, "SVM: Added hsp_user %s (dev %x) to %s\n", + newhspusername, newhspuser->hspuserkey, hspname); + return (newhspuser); +} + +/* + * add_hsp() + * + * Add a hot spare pool entry to the list for the slice, deventry. + * Also add to the linked list of all hsp pools + * The memory alllocated here will be freed by free_cache() + * + * Input: + * char *hspname name of hsp pool entry + * deventry_t *deventry device entry for the slice + * Return: + * hspentry_t end of hsp list + * Locking: None + */ +static hspentry_t * +add_hsp(char *hspname, deventry_t *deventry) +{ + hspentry_t *newhspentry; + hspentry_t *previous; + char *newhspname; + + rcm_log_message(RCM_TRACE1, "SVM: Enter add_hsp %s\n", + hspname); + newhspentry = (hspentry_t *)malloc(sizeof (*newhspentry)); + if (newhspentry == NULL) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't malloc hspentry")); + return (NULL); + } + (void) memset((char *)newhspentry, 0, sizeof (*newhspentry)); + + newhspname = strdup(hspname); + if (newhspname == NULL) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't malloc hspname")); + free(newhspentry); + return (NULL); + } + newhspentry->hspname = newhspname; + + /* Add to linked list of all hotspare pools */ + newhspentry->link = hsp_head; + hsp_head = newhspentry; + + /* Add to list of hotspare pools containing this slice */ + if ((previous = deventry->hsp_list) == NULL) { + deventry->hsp_list = newhspentry; + } else { + hspentry_t *temp = previous->next; + previous->next = newhspentry; + newhspentry->next = temp; + } + rcm_log_message(RCM_TRACE1, "SVM: Exit add_hsp %s\n", + hspname); + return (newhspentry); +} + +/* + * cache_dependent() + * + * Add a dependent for a deventry to the cache and return the cache entry + * If the name is not in the cache, we assume that it a SLICE. If it + * turns out to be any other type of metadevice, when it is processed + * in cache_all_devices_in_set(), cache_device() will be called to + * set the type to the actual value. + * + * Input: + * cache_t *cache cache + * char *devname metadevice name + * int devflags metadevice flags + * deventry_t *dependent dependent of this metadevice + * Return: + * deventry_t metadevice entry added to cache + * Locking: None + */ +static deventry_t * +cache_dependent(cache_t *cache, char *devname, int devflags, + deventry_t *dependent) +{ + + deventry_t *newdeventry = NULL; + deventry_t *hashprev = NULL; + deventry_t *deventry = NULL; + deventry_t *previous = NULL; + uint32_t hash_index; + int comp; + + rcm_log_message(RCM_TRACE1, "SVM: Enter cache_dep %s, %x, %s\n", + devname, devflags, dependent->devname); + + hash_index = hash(cache->size, devname); + if (hash_index >= cache->size) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't hash device.")); + return (NULL); + } + + deventry = cache->hashline[hash_index]; + + /* if the hash table slot is empty, then this is easy */ + if (deventry == NULL) { + deventry = create_deventry(devname, SVM_SLICE, 0, devflags); + cache->hashline[hash_index] = deventry; + } else { + /* if the hash table slot isn't empty, find the immediate successor */ + hashprev = NULL; + while ((comp = strcmp(deventry->devname, devname)) < 0 && + deventry->next != NULL) { + hashprev = deventry; + deventry = deventry->next; + } + + if (comp == 0) { + /* if already in cache, just update the flags */ + deventry->flags |= devflags; + } else { + /* insert the entry if it's not already there */ + if ((newdeventry = create_deventry(devname, + SVM_SLICE, 0, devflags)) == NULL) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't create hash line.")); + return (NULL); + } + if (comp > 0) { + newdeventry->next = deventry; + if (hashprev) + hashprev->next = newdeventry; + else + cache->hashline[hash_index] = + newdeventry; + } else if (comp < 0) { + newdeventry->next = deventry->next; + deventry->next = newdeventry; + } + deventry = newdeventry; + } + } + /* complete deventry by linking the dependent to it */ + dependent->antecedent = deventry; + if ((previous = deventry->dependent) != NULL) { + deventry_t *temp = previous->next_dep; + previous->next_dep = dependent; + dependent->next_dep = temp; + } else deventry->dependent = dependent; + return (deventry); + +} + +/* + * cache_device() + * + * Add an entry to the cache for devname + * + * Input: + * cache_t *cache cache + * char *devname metadevice named + * svm_type_t devtype metadevice type + * md_dev64_t devkey dev_t of device + * int devflags device flags + * Return: + * deventry_t metadevice added to cache + * Locking: None + */ +static deventry_t * +cache_device(cache_t *cache, char *devname, svm_type_t devtype, + md_dev64_t devkey, int devflags) +{ + deventry_t *newdeventry = NULL; + deventry_t *previous = NULL; + deventry_t *deventry = NULL; + uint32_t hash_index; + int comp; + + rcm_log_message(RCM_TRACE1, "SVM: Enter cache_device %s, %x, %lx, %x\n", + devname, devtype, devkey, devflags); + + hash_index = hash(cache->size, devname); + if (hash_index >= cache->size) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't hash device.")); + return (NULL); + } + + deventry = cache->hashline[hash_index]; + + /* if the hash table slot is empty, then this is easy */ + if (deventry == NULL) { + deventry = create_deventry(devname, devtype, devkey, + devflags); + cache->hashline[hash_index] = deventry; + } else { + /* if the hash table slot isn't empty, find the immediate successor */ + previous = NULL; + while ((comp = strcmp(deventry->devname, devname)) < 0 && + deventry->next != NULL) { + previous = deventry; + deventry = deventry->next; + } + + if (comp == 0) { + /* + * If entry already exists, just set the type, key + * and flags + */ + deventry->devtype = devtype; + deventry->devkey = meta_cmpldev(devkey); + deventry->flags |= devflags; + } else { + /* insert the entry if it's not already there */ + if ((newdeventry = create_deventry(devname, devtype, + devkey, devflags)) == NULL) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't create hash line.")); + } + if (comp > 0) { + newdeventry->next = deventry; + if (previous) + previous->next = newdeventry; + else + cache->hashline[hash_index] = + newdeventry; + } else if (comp < 0) { + newdeventry->next = deventry->next; + deventry->next = newdeventry; + } + deventry = newdeventry; + } + } + return (deventry); +} +/* + * free_names() + * + * Free all name list entries + * + * Input: + * mdnamelist_t *np namelist pointer + * Return: None + */ + +static void +free_names(mdnamelist_t *nlp) +{ + mdnamelist_t *p; + + for (p = nlp; p != NULL; p = p->next) { + meta_invalidate_name(p->namep); + p->namep = NULL; + } + metafreenamelist(nlp); +} + +/* + * cache_hsp() + * + * Add an entry to the cache for each slice in the hot spare + * pool. Call add_hsp() to add the hot spare pool to the list + * of all hot spare pools. + * + * Input: + * cache_t *cache cache + * mdnamelist_t *nlp pointer to hsp name + * md_hsp_t *hsp + * Return: + * 0 if successful or error code + */ +static int +cache_hsp(cache_t *cache, mdhspnamelist_t *nlp, md_hsp_t *hsp) +{ + int i; + deventry_t *deventry; + md_hs_t *hs; + + for (i = 0; i < hsp->hotspares.hotspares_len; i++) { + hs = &hsp->hotspares.hotspares_val[i]; + if ((deventry = cache_device(cache, hs->hsnamep->bname, + SVM_SLICE, hs->hsnamep->dev, + IN_HSP)) == NULL) { + return (ENOMEM); + } + if (add_hsp(nlp->hspnamep->hspname, deventry) == NULL) { + return (ENOMEM); + } + } + return (0); +} + +/* + * cache_trans() + * + * Add an entry to the cache for trans metadevice, the master + * and the log. Call cache_dependent() to link that master and + * the log to the trans metadevice. + * + * Input: + * cache_t *cache cache + * mdnamelist_t *nlp pointer to trans name + * md_trans_t *trans + * Return: + * 0 if successful or error code + * + */ +static int +cache_trans(cache_t *cache, mdnamelist_t *nlp, md_trans_t *trans) +{ + deventry_t *antecedent; + + if ((antecedent = cache_device(cache, nlp->namep->bname, SVM_TRANS, + nlp->namep->dev, 0)) == NULL) { + return (ENOMEM); + } + + if (cache_device(cache, trans->masternamep->bname, SVM_SLICE, + trans->masternamep->dev, 0) == NULL) { + return (ENOMEM); + } + + if (cache_dependent(cache, trans->masternamep->bname, 0, + antecedent) == NULL) { + return (ENOMEM); + } + + if (trans->lognamep != NULL) { + if (cache_device(cache, trans->lognamep->bname, SVM_SLICE, + trans->lognamep->dev, TRANS_LOG) == NULL) { + return (ENOMEM); + } + + if (cache_dependent(cache, trans->lognamep->bname, 0, + antecedent) == NULL) { + return (ENOMEM); + } + } + return (0); +} + +/* + * cache_mirror() + * + * Add an entry to the cache for the mirror. For each + * submirror, call cache_dependent() to add an entry to the + * cache and to link it to mirror entry. + * + * Input: + * cache_t *cache cache + * mdnamelist_t *nlp pointer to mirror name + * md_mirror_t *mirror + * Return: + * 0 if successful or error code + * + */ +static int +cache_mirror(cache_t *cache, mdnamelist_t *nlp, md_mirror_t *mirror) +{ + int i; + deventry_t *antecedent; + + if ((antecedent = cache_device(cache, nlp->namep->bname, SVM_MIRROR, + nlp->namep->dev, 0)) == NULL) { + return (ENOMEM); + } + for (i = 0; i < NMIRROR; i++) { + md_submirror_t *submirror; + + submirror = &mirror->submirrors[i]; + if (submirror->state == SMS_UNUSED) + continue; + + if (!submirror->submirnamep) + continue; + + if (cache_dependent(cache, submirror->submirnamep->bname, + 0, antecedent) == NULL) { + return (ENOMEM); + } + } + return (0); +} + +/* + * cache_raid() + * + * Add an entry to the cache for the RAID metadevice. For + * each component of the RAID call cache_dependent() to add + * add it to the cache and to link it to the RAID metadevice. + * + * Input: + * cache_t *cache cache + * mdnamelist_t *nlp pointer to raid name + * md_raid_t *raid mirror + * Return: + * 0 if successful or error code + */ +static int +cache_raid(cache_t *cache, mdnamelist_t *nlp, md_raid_t *raid) +{ + int i; + deventry_t *antecedent; + + if ((antecedent = cache_device(cache, nlp->namep->bname, SVM_RAID, + nlp->namep->dev, 0)) == NULL) { + return (ENOMEM); + } + if (raid->hspnamep) { + if (add_hsp_user(raid->hspnamep->hspname, + antecedent) == NULL) { + return (ENOMEM); + } + } + for (i = 0; i < raid->cols.cols_len; i++) { + if (cache_dependent(cache, + raid->cols.cols_val[i].colnamep->bname, 0, + antecedent) == NULL) { + return (ENOMEM); + } + } + return (0); +} + +/* + * cache_stripe() + * + * Add a CONCAT or a STRIPE entry entry to the cache for the + * metadevice and call cache_dependent() to add each + * component to the cache. + * + * Input: + * cache_t *cache cache + * mdnamelist_t *nlp pointer to stripe name + * md_stripe_t *stripe + * Return: + * 0 if successful or error code + * + */ +static int +cache_stripe(cache_t *cache, mdnamelist_t *nlp, md_stripe_t *stripe) +{ + int i; + deventry_t *antecedent; + + if ((antecedent = cache_device(cache, nlp->namep->bname, SVM_CONCAT, + nlp->namep->dev, 0)) == NULL) { + return (ENOMEM); + } + + if (stripe->hspnamep) { + if (add_hsp_user(stripe->hspnamep->hspname, + antecedent) == NULL) { + return (ENOMEM); + } + } + for (i = 0; i < stripe->rows.rows_len; i++) { + md_row_t *rowp; + int j; + + rowp = &stripe->rows.rows_val[i]; + if (stripe->rows.rows_len == 1 && rowp->comps.comps_len > 1) { + if ((void*) cache_device(cache, nlp->namep->bname, + SVM_STRIPE, nlp->namep->dev, 0) == NULL) + return (ENOMEM); + } + for (j = 0; j < rowp->comps.comps_len; j++) { + md_comp_t *component; + + component = &rowp->comps.comps_val[j]; + if (cache_dependent(cache, + component->compnamep->bname, 0, + antecedent) == NULL) { + return (ENOMEM); + } + } + } + return (0); +} + +/* + * cache_sp() + * + * Add an entry to the cache for the softpart and also call + * cache_dependent() to set the CONT_SOFTPART flag in the + * cache entry for the metadevice that contains the softpart. + * + * Input: + * cache_t *cache cache + * mdnamelist_t *nlp pointer to soft part name + * md_sp_t *soft_part + * Return: + * 0 if successful or error code + * + */ +static int +cache_sp(cache_t *cache, mdnamelist_t *nlp, md_sp_t *soft_part) +{ + deventry_t *antecedent; + + if ((antecedent = cache_device(cache, nlp->namep->bname, + SVM_SOFTPART, nlp->namep->dev, 0)) == NULL) { + return (ENOMEM); + } + if (cache_dependent(cache, soft_part->compnamep->bname, + CONT_SOFTPART, antecedent) == NULL) { + return (ENOMEM); + } + return (0); +} + +/* + * cache_all_devices_in_set() + * + * Add all of the metadevices and mddb replicas in the set to the + * cache + * + * Input: + * cache_t *cache cache + * mdsetname_t *sp setname + * Return: + * 0 if successful or error code + */ + +static int +cache_all_devices_in_set(cache_t *cache, mdsetname_t *sp) +{ + md_error_t error = mdnullerror; + md_replicalist_t *replica_list = NULL; + md_replicalist_t *mdbp; + mdnamelist_t *nlp; + mdnamelist_t *trans_list = NULL; + mdnamelist_t *mirror_list = NULL; + mdnamelist_t *raid_list = NULL; + mdnamelist_t *stripe_list = NULL; + mdnamelist_t *sp_list = NULL; + mdhspnamelist_t *hsp_list = NULL; + + rcm_log_message(RCM_TRACE1, "SVM: cache_all_devices_in_set\n"); + + /* Add each mddb replica to the cache */ + if (metareplicalist(sp, MD_BASICNAME_OK, &replica_list, &error) < 0) { + /* there are no metadb's; that is ok, no need to check the rest */ + mdclrerror(&error); + return (0); + } + + for (mdbp = replica_list; mdbp != NULL; mdbp = mdbp->rl_next) { + if (cache_device(cache, mdbp->rl_repp->r_namep->bname, + SVM_SLICE, mdbp->rl_repp->r_namep->dev, + CONT_METADB) == NULL) { + metafreereplicalist(replica_list); + return (ENOMEM); + } + } + metafreereplicalist(replica_list); + + /* Process Hot Spare pools */ + if (meta_get_hsp_names(sp, &hsp_list, 0, &error) >= 0) { + mdhspnamelist_t *nlp; + + for (nlp = hsp_list; nlp != NULL; nlp = nlp->next) { + md_hsp_t *hsp; + + hsp = meta_get_hsp(sp, nlp->hspnamep, &error); + if (hsp != NULL) { + if (cache_hsp(cache, nlp, hsp) != 0) { + metafreehspnamelist(hsp_list); + return (ENOMEM); + } + } + meta_invalidate_hsp(nlp->hspnamep); + } + metafreehspnamelist(hsp_list); + } + + /* Process Trans devices */ + if (meta_get_trans_names(sp, &trans_list, 0, &error) >= 0) { + for (nlp = trans_list; nlp != NULL; nlp = nlp->next) { + mdname_t *mdn; + md_trans_t *trans; + + mdn = metaname(&sp, nlp->namep->cname, &error); + if (mdn == NULL) { + continue; + } + + trans = meta_get_trans(sp, mdn, &error); + + if (trans != NULL && trans->masternamep != NULL) { + if (cache_trans(cache, nlp, trans) != NULL) { + free_names(trans_list); + return (ENOMEM); + } + } + } + free_names(trans_list); + } + + /* Process Mirrors */ + if (meta_get_mirror_names(sp, &mirror_list, 0, &error) >= 0) { + for (nlp = mirror_list; nlp != NULL; nlp = nlp->next) { + mdname_t *mdn; + md_mirror_t *mirror; + + mdn = metaname(&sp, nlp->namep->cname, &error); + if (mdn == NULL) { + continue; + } + + mirror = meta_get_mirror(sp, mdn, &error); + + if (mirror != NULL) { + if (cache_mirror(cache, nlp, mirror) != 0) { + free_names(mirror_list); + return (ENOMEM); + } + } + } + free_names(mirror_list); + } + + /* Process Raid devices */ + if (meta_get_raid_names(sp, &raid_list, 0, &error) >= 0) { + for (nlp = raid_list; nlp != NULL; nlp = nlp->next) { + mdname_t *mdn; + md_raid_t *raid; + + mdn = metaname(&sp, nlp->namep->cname, &error); + if (mdn == NULL) { + continue; + } + + raid = meta_get_raid(sp, mdn, &error); + + if (raid != NULL) { + if (cache_raid(cache, nlp, raid) != 0) { + free_names(raid_list); + return (ENOMEM); + } + } + } + free_names(raid_list); + } + + /* Process Slices */ + if (meta_get_stripe_names(sp, &stripe_list, 0, &error) >= 0) { + for (nlp = stripe_list; nlp != NULL; nlp = nlp->next) { + mdname_t *mdn; + md_stripe_t *stripe; + + mdn = metaname(&sp, nlp->namep->cname, &error); + if (mdn == NULL) { + continue; + } + + stripe = meta_get_stripe(sp, mdn, &error); + + if (stripe != NULL) { + if (cache_stripe(cache, nlp, stripe) != 0) { + free_names(stripe_list); + return (ENOMEM); + } + } + } + free_names(stripe_list); + } + + /* Process Soft partitions */ + if (meta_get_sp_names(sp, &sp_list, 0, &error) >= 0) { + for (nlp = sp_list; nlp != NULL; nlp = nlp->next) { + mdname_t *mdn; + md_sp_t *soft_part; + + mdn = metaname(&sp, nlp->namep->cname, &error); + if (mdn == NULL) { + continue; + } + + soft_part = meta_get_sp(sp, mdn, &error); + + if (soft_part != NULL) { + if (cache_sp(cache, nlp, soft_part) != 0) { + free_names(sp_list); + return (ENOMEM); + } + } + } + free_names(sp_list); + } + mdclrerror(&error); + return (0); +} + +/* + * create_all_devices() + * + * Cache all devices in all sets + * + * Input: + * cache_t cache + * Return: + * 0 if successful, error code if not + * Locking: None + */ +static int +cache_all_devices(cache_t *cache) +{ + int max_sets; + md_error_t error = mdnullerror; + int i; + + if ((max_sets = get_max_sets(&error)) == 0) { + return (0); + } + if (!mdisok(&error)) { + mdclrerror(&error); + return (0); + } + + rcm_log_message(RCM_TRACE1, + "SVM: cache_all_devices,max sets = %d\n", max_sets); + /* for each possible set number, see if we really have a diskset */ + for (i = 0; i < max_sets; i++) { + mdsetname_t *sp; + + if ((sp = metasetnosetname(i, &error)) == NULL) { + rcm_log_message(RCM_TRACE1, + "SVM: cache_all_devices no set: setno %d\n", i); + if (!mdisok(&error) && + ((error.info.errclass == MDEC_RPC) || + (mdiserror(&error, MDE_SMF_NO_SERVICE)))) { + /* + * metad rpc program not available + * - no metasets. metad rpc not available + * is indicated either by an RPC error or + * the fact that the service is not + * enabled. + */ + break; + } + + continue; + } + + if (cache_all_devices_in_set(cache, sp)) { + metaflushsetname(sp); + return (ENOMEM); + } + metaflushsetname(sp); + } + mdclrerror(&error); + rcm_log_message(RCM_TRACE1, "SVM: exit cache_all_devices\n"); + return (0); +} + +/* + * create_cache() + * + * Create an empty cache + * If the function fails free_cache() will be called to free any + * allocated memory. + * + * Input: None + * Return: + * cache_t cache created + * Locking: None + */ +static cache_t * +create_cache() +{ + cache_t *cache; + uint32_t size; + int ret; + + size = HASH_DEFAULT; + /* try allocating storage for a new, empty cache */ + if ((cache = (cache_t *)malloc(sizeof (cache_t))) == NULL) { + rcm_log_message(RCM_ERROR, MSG_CACHEFAIL); + return (NULL); + } + + (void) memset((char *)cache, 0, sizeof (*cache)); + cache->hashline = (deventry_t **)calloc(size, sizeof (deventry_t *)); + if (cache->hashline == NULL) { + rcm_log_message(RCM_ERROR, MSG_CACHEFAIL); + free(cache); + return (NULL); + } + cache->size = size; + + /* Initialise linked list of hsp entries */ + hsp_head = NULL; + + /* add entries to cache */ + ret = cache_all_devices(cache); + if (ret != 0) { + free_cache(&cache); + return (NULL); + } + + /* Mark the cache as new */ + cache->registered = 0; + + /* Finished - return the new cache */ + return (cache); +} + +/* + * create_deventry() + * + * Create a new deventry entry for device with name devname + * The memory alllocated here will be freed by free_cache() + * + * Input: + * char *devname device name + * svm_type_t devtype metadevice type + * md_dev64_t devkey device key + * int devflags device flags + * Return: + * deventry_t New deventry + * Locking: None + */ +static deventry_t * +create_deventry(char *devname, svm_type_t devtype, md_dev64_t devkey, + int devflags) +{ + deventry_t *newdeventry; + char *newdevname; + + newdeventry = (deventry_t *)malloc(sizeof (*newdeventry)); + if (newdeventry == NULL) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't malloc deventrys")); + return (NULL); + } + (void) memset((char *)newdeventry, 0, sizeof (*newdeventry)); + + newdevname = strdup(devname); + if (newdevname == NULL) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't malloc devname")); + free(newdeventry); + return (NULL); + } + newdeventry->devname = newdevname; + newdeventry->devtype = devtype; + newdeventry->devkey = meta_cmpldev(devkey); + newdeventry->flags = devflags; + rcm_log_message(RCM_TRACE1, + "SVM created deventry for %s\n", newdeventry->devname); + return (newdeventry); +} + +/* + * cache_remove() + * + * Given a cache and a deventry, the deventry is + * removed from the cache's tables and memory for the deventry is + * free'ed. + * + * Input: + * cache_t *cache cache + * deventry_t *deventry deventry to be removed + * Return: None + * Locking: The cache must be locked by the caller prior to calling + * this routine. + */ +static void +cache_remove(cache_t *cache, deventry_t *deventry) +{ + deventry_t *olddeventry; + deventry_t *previous; + hspentry_t *hspentry; + hspentry_t *oldhspentry; + hspuser_t *hspuser; + hspuser_t *oldhspuser; + uint32_t hash_index; + + /* sanity check */ + if (cache == NULL || deventry == NULL || deventry->devname == NULL) + return; + + + /* If this is in the hash table, remove it from there */ + hash_index = hash(cache->size, deventry->devname); + if (hash_index >= cache->size) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't hash device.")); + return; + } + olddeventry = cache->hashline[hash_index]; + previous = NULL; + while (olddeventry) { + if (olddeventry->devname && + strcmp(olddeventry->devname, deventry->devname) == 0) { + break; + } + previous = olddeventry; + olddeventry = olddeventry->next; + } + if (olddeventry) { + if (previous) + previous->next = olddeventry->next; + else + cache->hashline[hash_index] = olddeventry->next; + + if (olddeventry->flags&IN_HSP) { + /* + * If this is in a hot spare pool, remove the list + * of hot spare pools that it is in along with + * all of the volumes that are users of the pool + */ + hspentry = olddeventry->hsp_list; + while (hspentry) { + oldhspentry = hspentry; + hspuser = hspentry->hspuser; + while (hspuser) { + oldhspuser = hspuser; + free(hspuser->hspusername); + hspuser = hspuser->next; + free(oldhspuser); + } + free(hspentry->hspname); + hspentry = hspentry->next; + free(oldhspentry); + } + } + free(olddeventry->devname); + free(olddeventry); + } + +} + +/* + * cache_lookup() + * + * Return the deventry corresponding to devname from the cache + * Input: + * cache_t cache cache + * char *devname name to lookup in cache + * Return: + * deventry_t deventry of name, NULL if not found + * Locking: cache lock held on entry and on exit + */ +static deventry_t * +cache_lookup(cache_t *cache, char *devname) +{ + int comp; + uint32_t hash_index; + deventry_t *deventry; + + hash_index = hash(cache->size, devname); + if (hash_index >= cache->size) { + rcm_log_message(RCM_ERROR, + gettext("SVM: can't hash resource.")); + return (NULL); + } + + deventry = cache->hashline[hash_index]; + while (deventry) { + comp = strcmp(deventry->devname, devname); + if (comp == 0) + return (deventry); + if (comp > 0) + return (NULL); + deventry = deventry->next; + } + return (NULL); +} + +/* + * cache_sync() + * + * Resync cache with the svm database + * + * Input: + * rcm_handle_t *hd rcm handle + * cache_t **cachep pointer to cache + * Return: + * cache_t **cachep pointer to new cache + * Return: None + * Locking: The cache must be locked prior to entry + */ +static void +cache_sync(rcm_handle_t *hd, cache_t **cachep) +{ + char *devicename; + deventry_t *deventry; + cache_t *new_cache; + cache_t *old_cache = *cachep; + deventry_t *hashline = NULL; + uint32_t i = 0; + + /* Get a new cache */ + if ((new_cache = create_cache()) == NULL) { + rcm_log_message(RCM_WARNING, + gettext("SVM: WARNING: couldn't re-cache.")); + return; + } + + /* For every entry in the new cache... */ + while ((devicename = cache_walk(new_cache, &i, &hashline)) != NULL) { + /* Look for this entry in the old cache */ + deventry = cache_lookup(old_cache, devicename); + /* + * If no entry in old cache, register the resource. If there + * is an entry, but it is marked as removed, register it + * again and remove it from the old cache + */ + if (deventry == NULL) { + register_device(hd, hashline->devname); + } else { + if (deventry->flags&REMOVED) + register_device(hd, hashline->devname); + cache_remove(old_cache, deventry); + } + } + + /* + * For every device left in the old cache, just unregister if + * it has not already been removed + */ + i = 0; + hashline = NULL; + while ((devicename = cache_walk(old_cache, &i, &hashline)) != NULL) { + if (!(hashline->flags&REMOVED)) { + (void) rcm_unregister_interest(hd, devicename, 0); + } + } + + /* Swap pointers */ + *cachep = new_cache; + + /* Destroy old cache */ + free_cache(&old_cache); + + /* Mark the new cache as registered */ + new_cache-> registered = 1; +} + +/* + * cache_walk() + * + * Perform one step of a walk through the cache. The i and hashline + * parameters are updated to store progress of the walk for future steps. + * They must all be initialized for the beginning of the walk + * (i = 0, line = NULL). Initialize variables to these values for these + * parameters, and then pass in the address of each of the variables + * along with the cache. A NULL return value will be given to indicate + * when there are no more cached items to be returned. + * + * Input: + * cache_t *cache cache + * uint32_t *i hash table index of prev entry + * deventry_t **line ptr to previous device entry + * Output: + * uint32_t *i updated hash table index + * deventry_t **line ptr to device entry + * Return: + * char* device name (NULL for end of cache) + * Locking: The cache must be locked prior to calling this routine. + */ +static char * +cache_walk(cache_t *cache, uint32_t *i, deventry_t **line) +{ + uint32_t j; + + /* sanity check */ + if (cache == NULL || i == NULL || line == NULL || + *i >= cache->size) + return (NULL); + + /* if initial values were given, look for the first entry */ + if (*i == 0 && *line == NULL) { + for (j = 0; j < cache->size; j++) { + if (cache->hashline[j]) { + *i = j; + *line = cache->hashline[j]; + return ((*line)->devname); + } + } + } else { + /* otherwise, look for the next entry for this hash value */ + if (*line && (*line)->next) { + *line = (*line)->next; + return ((*line)->devname); + } else { + /* next look further down in the hash table */ + for (j = (*i) + 1; j < cache->size; j++) { + if (cache->hashline[j]) { + *i = j; + *line = cache->hashline[j]; + return ((*line)->devname); + } + } + } + } + + /* + * We would have returned somewhere above if there were any more + * entries. So set the sentinel values and return a NULL. + */ + *i = cache->size; + *line = NULL; + return (NULL); +} + +/* + * free_cache() + * + * Given a pointer to a cache structure, this routine will free all + * of the memory allocated within the cache. + * + * Input: + * cache_t **cache ptr to cache + * Return: None + * Locking: cache lock held on entry + */ +static void +free_cache(cache_t **cache) +{ + uint32_t index; + cache_t *realcache; + + /* sanity check */ + if (cache == NULL || *cache == NULL) + return; + + /* de-reference the cache pointer */ + realcache = *cache; + + /* free the hash table */ + for (index = 0; index < realcache->size; index++) { + free_deventry(&realcache->hashline[index]); + } + free(realcache->hashline); + realcache->hashline = NULL; + + free(realcache); + *cache = NULL; +} + +/* + * free_deventry() + * + * This routine frees all of the memory allocated within a node of a + * deventry. + * + * Input: + * deventry_t **deventry ptr to deventry + * Return: None + * Locking: cache lock held on entry + */ +static void +free_deventry(deventry_t **deventry) +{ + deventry_t *olddeventry; + hspentry_t *hspentry; + hspentry_t *oldhspentry; + hspuser_t *hspuser; + hspuser_t *oldhspuser; + + if (deventry != NULL) { + while (*deventry != NULL) { + olddeventry = (*deventry)->next; + if ((*deventry)->flags&IN_HSP) { + /* + * If this is in a hot spare pool, remove the + * memory allocated to hot spare pools and + * the users of the pool + */ + hspentry = (*deventry)->hsp_list; + while (hspentry) { + oldhspentry = hspentry; + hspuser = hspentry->hspuser; + while (hspuser) { + oldhspuser = hspuser; + free(hspuser->hspusername); + hspuser = hspuser->next; + free(oldhspuser); + } + free(hspentry->hspname); + hspentry = hspentry->next; + free(oldhspentry); + } + } + free((*deventry)->devname); + free (*deventry); + *deventry = olddeventry; + } + } +} + +/* + * hash() + * + * A rotating hashing function that converts a string 's' to an index + * in a hash table of size 'h'. + * + * Input: + * uint32_t h hash table size + * char *s string to be hashed + * Return: + * uint32_t hash value + * Locking: None + */ +static uint32_t +hash(uint32_t h, char *s) +{ + + int len; + int hash, i; + + len = strlen(s); + + for (hash = len, i = 0; i < len; ++i) { + hash = (hash<<4)^(hash>>28)^s[i]; + } + return (hash % h); +} + +/* + * register_device() + * + * Register a device + * + * Input: + * rcm_handle_t *hd rcm handle + * char *devname device name + * Return: None + * Locking: None + */ +static void +register_device(rcm_handle_t *hd, char *devname) +{ + /* Sanity check */ + if (devname == NULL) + return; + + rcm_log_message(RCM_TRACE1, "SVM: Registering %s(%d)\n", devname, + devname); + + if (rcm_register_interest(hd, devname, 0, NULL) != RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, + gettext("SVM: failed to register \"%s\"\n"), devname); + } +} + +/* + * add_dep() + * + * Add an entry to an array of dependent names for a device. Used to + * build an array to call the rcm framework with when passing on a + * DR request. + * + * Input: + * int *ndeps ptr to current number of deps + * char ***depsp ptr to current dependent array + * deventry_t *deventry deventry of device to be added + * Output: + * int *ndeps ptr to updated no of deps + * char ***depsp ptr to new dependant array + * Return: + * int 0, of ok, -1 if failed to allocate memory + * Locking: None + */ +static int +add_dep(int *ndeps, char ***depsp, deventry_t *deventry) +{ + char **deps_new; + + *ndeps += 1; + deps_new = realloc(*depsp, ((*ndeps) + 1) * sizeof (char *)); + if (deps_new == NULL) { + rcm_log_message(RCM_ERROR, + gettext("SVM: cannot allocate dependent array (%s).\n"), + strerror(errno)); + return (-1); + } + deps_new[(*ndeps-1)] = deventry->devname; + deps_new[(*ndeps)] = NULL; + *depsp = deps_new; + return (0); +} + + +/* + * get_dependent() + * + * Create a list of all dependents of a device + * Do not add dependent if it is marked as removed + * + * Input: + * deventry_t *deventry device entry + * Output: + * char ***dependentsp pty to dependent list + * Return: + * int 0, if ok, -1 if failed + * Locking: None + */ +static int +get_dependents(deventry_t *deventry, char *** dependentsp) +{ + int ndeps = 0; + deventry_t *dependent; + char **deps = NULL; + + + dependent = deventry->dependent; + if (dependent == NULL) { + *dependentsp = NULL; + return (0); + } + while (dependent != NULL) { + /* + * do not add dependent if we have + * already received a remove notifification + */ + if (!(dependent->flags&REMOVED)) + if (add_dep(&ndeps, &deps, dependent) < 0) + return (-1); + dependent = dependent->next_dep; + } + if (ndeps == 0) { + *dependentsp = NULL; + } else { + *dependentsp = deps; + } + return (0); +} + +/* + * add_to_usage() + * Add string to the usage string pointed at by usagep. Allocate memory + * for the new usage string and free the memory used by the original + * usage string + * + * Input: + * char **usagep ptr to usage string + * char *string string to be added to usage + * Return: + * char ptr to new usage string + * Locking: None + */ +char * +add_to_usage(char ** usagep, char *string) +{ + int len; + char *new_usage = NULL; + + if (*usagep == NULL) { + len = 0; + } else { + len = strlen(*usagep) + 2; /* allow space for comma */ + } + len += strlen(string) + 1; + if (new_usage = calloc(1, len)) { + if (*usagep) { + (void) strcpy(new_usage, *usagep); + free(*usagep); + (void) strcat(new_usage, ", "); + } + (void) strcat(new_usage, string); + } + return (new_usage); +} + +/* + * add_to_usage_fmt() + * + * Add a formatted string , of the form "blah %s" to the usage string + * pointed at by usagep. Allocate memory for the new usage string and free + * the memory used by the original usage string. + * + * Input: + * char **usagep ptr to current usage string + * char *fmt format string + * char *string string to be added + * Return: + * char* new usage string + * Locking: None + */ +/*PRINTFLIKE2*/ +char * +add_to_usage_fmt(char **usagep, char *fmt, char *string) +{ + int len; + char *usage; + char *new_usage = NULL; + + len = strlen(fmt) + + strlen(string) + 1; + if (usage = calloc(1, len)) { + (void) sprintf(usage, fmt, string); + new_usage = add_to_usage(usagep, usage); + free(usage); + } + return (new_usage); +} + +/* + * is_open() + * + * Make ioctl call to find if a device is open + * + * Input: + * dev_t devkey dev_t for device + * Return: + * int 0 if not open, !=0 if open + * Locking: None + */ +static int +is_open(dev_t devkey) +{ + int fd; + md_isopen_t isopen_ioc; + + /* Open admin device */ + if ((fd = open(ADMSPECIAL, O_RDONLY, 0)) < 0) { + rcm_log_message(RCM_ERROR, MSG_OPENERR, ADMSPECIAL); + return (0); + } + + (void) memset(&isopen_ioc, 0, sizeof (isopen_ioc)); + isopen_ioc.dev = devkey; + if (ioctl(fd, MD_IOCISOPEN, &isopen_ioc) < 0) { + (void) close(fd); + return (0); + } + (void) close(fd); + return (isopen_ioc.isopen); +} + +/* + * check_softpart() + * + * Check the status of the passed in device within the softpartition. + * + * Input: + * mdsetname_t * the name of the set + * mdname_t * the softpartition device that is being examined + * char * the device which needs to be checked + * md_error_t * error pointer (not used) + * Return: + * int REDUNDANT - device is redundant and can be + * removed + * NOTREDUNDANT - device cannot be removed + * NOTINDEVICE - device is not part of this + * component + */ +static int +check_softpart(mdsetname_t *sp, mdname_t *np, char *uname, md_error_t *ep) +{ + md_sp_t *softp = NULL; + + rcm_log_message(RCM_TRACE1, "SVM: softpart checking %s %s\n", + np->bname, uname); + + softp = meta_get_sp(sp, np, ep); + + /* softp cannot be NULL, if it is then the RCM cache is corrupt */ + assert(softp != NULL); + + /* + * if the softpartition is not a parent then nothing can be done, user + * must close the device and then fix the under lying devices. + */ + if (!(MD_HAS_PARENT(softp->common.parent))) { + rcm_log_message(RCM_TRACE1, + "SVM: softpart is a top level device\n"); + return (NOTREDUNDANT); + } + + if (strcmp(softp->compnamep->bname, uname) != 0) { + /* + * This can occur if this function has been called by the + * check_raid5 code as it is cycling through each column + * in turn. + */ + rcm_log_message(RCM_TRACE1, + "SVM: %s is not in softpart (%s)\n", + uname, softp->compnamep->bname); + return (NOTINDEVICE); + } + + /* + * Check the status of the soft partition this only moves from + * an okay state if the underlying devices fails while the soft + * partition is open. + */ + if (softp->status != MD_SP_OK) { + rcm_log_message(RCM_TRACE1, + "SVM: softpart is broken (state: 0x%x)\n", + softp->status); + return (REDUNDANT); + } + + return (NOTREDUNDANT); +} + +/* + * check_raid5() + * + * Check the status of the passed in device within the raid5 in question. + * + * Input: + * mdsetname_t * the name of the set + * mdname_t * the raid5 device that is being examined + * char * the device which needs to be checked + * md_error_t * error pointer (not used) + * Return: + * int REDUNDANT - device is redundant and can be + * removed + * NOTREDUNDANT - device cannot be removed + */ +static int +check_raid5(mdsetname_t *sp, mdname_t *np, char *uname, md_error_t *ep) +{ + md_raid_t *raidp = NULL; + md_raidcol_t *colp = NULL; + int i; + int rval = 0; + + rcm_log_message(RCM_TRACE1, "SVM: raid5 checking %s %s\n", + np->bname, uname); + + raidp = meta_get_raid(sp, np, ep); + + /* raidp cannot be NULL, if it is then the RCM cache is corrupt */ + assert(raidp != NULL); + + /* + * Now check each column in the device. We cannot rely upon the state + * of the device because if a hotspare is in use all the states are + * set to Okay, both at the metadevice layer and the column layer. + */ + for (i = 0; (i < raidp->cols.cols_len); i++) { + colp = &raidp->cols.cols_val[i]; + np = colp->colnamep; + + rcm_log_message(RCM_TRACE1, + "SVM: raid5 checking %s state %s 0x%x\n", + np->bname, raid_col_state_to_name(colp, NULL, 0), + colp->state); + + /* + * It is possible for the column to be a softpartition, + * so need to check the softpartiton if this is the + * case. It is *not* valid for the column to be a + * stripe/concat/mirror, and so no check to see what + * type of metadevice is being used. + */ + if (metaismeta(np)) { + /* this is a metadevice ie a softpartiton */ + rval = check_softpart(sp, np, uname, ep); + if (rval == REDUNDANT) { + rcm_log_message(RCM_TRACE1, + "SVM: raid5 %s is broken\n", uname); + meta_invalidate_name(np); + return (REDUNDANT); + } else if (rval == NOTREDUNDANT && + colp->hsnamep != NULL) { + rcm_log_message(RCM_TRACE1, + "SVM: raid5 device is broken, hotspared\n"); + meta_invalidate_name(np); + return (REDUNDANT); + } + meta_invalidate_name(np); + continue; + } + meta_invalidate_name(np); + + if (strcmp(uname, np->bname) != 0) + continue; + + /* + * Found the device. Check if it is broken or hotspared. + */ + if (colp->state & RUS_ERRED) { + rcm_log_message(RCM_TRACE1, + "SVM: raid5 column device is broken\n"); + return (REDUNDANT); + } + + if (colp->hsnamep != NULL) { + rcm_log_message(RCM_TRACE1, + "SVM: raid5 column device is broken, hotspared\n"); + return (REDUNDANT); + } + } + return (NOTREDUNDANT); +} + +/* + * check_stripe() + * + * Check the status of the passed in device within the stripe in question. + * + * Input: + * mdsetname_t * the name of the set + * mdname_t * the stripe that is being examined + * char * the device which needs to be checked + * md_error_t * error pointer (not used) + * Return: + * int REDUNDANT - device is redundant and can be + * removed + * NOTREDUNDANT - device cannot be removed + * NOTINDEVICE - device is not part of this + * component + */ +static int +check_stripe(mdsetname_t *sp, mdname_t *np, char *uname, md_error_t *ep) +{ + md_stripe_t *stripep = NULL; + md_row_t *mrp = NULL; + md_comp_t *mcp; + mdname_t *pnp; + char *miscname; + int row; + int col; + + rcm_log_message(RCM_TRACE1, "SVM: concat/stripe checking %s %s\n", + np->bname, uname); + stripep = meta_get_stripe(sp, np, ep); + + /* stripep cannot be NULL, if it is then the RCM cache is corrupt */ + assert(stripep != NULL); + + /* + * If the stripe is not a parent then nothing can be done, user + * must close the device and then fix the devices. + */ + if (!(MD_HAS_PARENT(stripep->common.parent))) { + rcm_log_message(RCM_TRACE1, + "SVM: stripe is a top level device\n"); + return (NOTREDUNDANT); + } + + pnp = metamnumname(&sp, stripep->common.parent, 0, ep); + + if (pnp == NULL) { + /* + * Only NULL when the replicas are in an inconsistant state + * ie the device says it is the parent of X but X does not + * exist. + */ + rcm_log_message(RCM_TRACE1, "SVM: parent is not configured\n"); + return (NOTREDUNDANT); + } + + /* + * Get the type of the parent and make sure that it is a mirror, + * if it is then need to find out the number of submirrors, and + * if it is not a mirror then this is not a REDUNDANT device. + */ + if ((miscname = metagetmiscname(pnp, ep)) == NULL) { + /* + * Again something is wrong with the configuration. + */ + rcm_log_message(RCM_TRACE1, + "SVM: unable to find the type of %s\n", pnp->cname); + meta_invalidate_name(pnp); + return (NOTREDUNDANT); + } + + if (!(strcmp(miscname, MD_MIRROR) == 0 && + check_mirror(sp, pnp, ep) == REDUNDANT)) { + rcm_log_message(RCM_TRACE1, + "SVM: %s is a %s and not redundant\n", + pnp->cname, miscname); + meta_invalidate_name(pnp); + return (NOTREDUNDANT); + } + + meta_invalidate_name(pnp); + + for (row = 0; row < stripep->rows.rows_len; row++) { + mrp = &stripep->rows.rows_val[row]; + + /* now the components in the row */ + for (col = 0; col < mrp->comps.comps_len; col++) { + mcp = &mrp->comps.comps_val[col]; + + rcm_log_message(RCM_TRACE1, + "SVM: stripe comp %s check\n", + mcp->compnamep->bname); + + if (strcmp(mcp->compnamep->bname, uname) != 0) + continue; + + rcm_log_message(RCM_TRACE1, + "SVM: component state: %s\n", + comp_state_to_name(mcp, NULL, 0)); + + if (mcp->hsnamep != NULL) { + /* device is broken and hotspared */ + rcm_log_message(RCM_TRACE1, + "SVM: stripe %s broken, hotspare active\n", + uname); + return (REDUNDANT); + } + + /* + * LAST_ERRED is a special case. If the state of a + * component is CS_LAST_ERRED then this is the last + * copy of the data and we need to keep using it, even + * though we had errors. Thus, we must block the DR + * request. If you follow the documented procedure for + * fixing each component (fix devs in maintenance + * before last erred) then the mirror will + * automatically transition Last Erred components to + * the Erred state after which they can be DRed out. + */ + if (mcp->state == CS_ERRED) { + /* device is broken */ + rcm_log_message(RCM_TRACE1, + "SVM: stripe %s is broken\n", uname); + return (REDUNDANT); + } + + /* + * Short circuit - if here the component has been + * found in the column so no further processing is + * required here. + */ + return (NOTREDUNDANT); + } + } + + /* + * Only get to this point if the device (uname) has not been + * found in the stripe. This means that there is something + * wrong with the device dependency list. + */ + rcm_log_message(RCM_TRACE1, + "SVM: component %s is not part of %s\n", + uname, np->bname); + + return (NOTINDEVICE); +} + +/* + * check_mirror() + * + * Make sure that the mirror > 1 submirror. + * + * Input: + * mdsetname_t * the name of the set + * mdname_t * the stripe that is being examined + * Return: + * int REDUNDANT - mirror > 1 submirrors + * NOTREDUNDANT - mirror has 1 submirror + */ +static int +check_mirror(mdsetname_t *sp, mdname_t *np, md_error_t *ep) +{ + uint_t nsm = 0; /* number of submirrors */ + uint_t smi = 0; /* index into submirror array */ + md_mirror_t *mirrorp = NULL; + + rcm_log_message(RCM_TRACE1, "SVM: mirror checking %s\n", np->bname); + mirrorp = meta_get_mirror(sp, np, ep); + + /* mirrorp cannot be NULL, if it is then the RCM cache is corrupt */ + assert(mirrorp != NULL); + + /* + * Need to check how many submirrors that the mirror has. + */ + for (smi = 0, nsm = 0; (smi < NMIRROR); ++smi) { + md_submirror_t *mdsp = &mirrorp->submirrors[smi]; + mdname_t *submirnamep = mdsp->submirnamep; + + /* Is this submirror being used ? No, then continue */ + if (submirnamep == NULL) + continue; + nsm++; + } + + /* + * If there is only one submirror then there is no redundancy + * in the configuration and the user needs to take some other + * action before using cfgadm on the device ie close the metadevice. + */ + if (nsm == 1) { + rcm_log_message(RCM_TRACE1, + "SVM: only one submirror unable to allow action\n"); + return (NOTREDUNDANT); + } + + return (REDUNDANT); +} + +/* + * check_device() + * + * Check the current status of the underlying device. + * + * Input: + * deventry_t * the device that is being checked + * Return: + * int REDUNDANT - device is redundant and can be + * removed + * NOTREDUNDANT - device cannot be removed + * Locking: + * None + * + * The check_device code path (the functions called by check_device) use + * libmeta calls directly to determine if the specified device is + * redundant or not. The can lead to conflicts between data cached in + * libmeta and data that is being cached by this rcm module. Since the + * rcm cache is our primary source of information here, we need to make + * sure that we are not getting stale data from the libmeta caches. + * We use meta_invalidate_name throughout this code path to clear the + * cached data in libmeta in order to ensure that we are not using stale data. + */ +static int +check_device(deventry_t *deventry) +{ + mdsetname_t *sp; + md_error_t error = mdnullerror; + char sname[BUFSIZ+1]; + uint32_t d; + mdname_t *np; + deventry_t *dependent; + int rval = NOTREDUNDANT; + int ret; + + dependent = deventry->dependent; + + rcm_log_message(RCM_TRACE1, "SVM: check_device(%s)\n", + deventry->devname); + /* + * should not be null because the caller has already figured out + * there are dependent devices. + */ + assert(dependent != NULL); + + do { + + rcm_log_message(RCM_TRACE1, "SVM: check dependent: %s\n", + dependent->devname); + + if (dependent->flags & REMOVED) { + dependent = dependent->next_dep; + continue; + } + + /* + * The device *should* be a metadevice and so need to see if + * it contains a setname. + */ + ret = sscanf(dependent->devname, + "/dev/md/%" VAL2STR(BUFSIZ) "[^/]/dsk/d%u", + sname, &d); + + if (ret != 2) + (void) strcpy(sname, MD_LOCAL_NAME); + + if ((sp = metasetname(sname, &error)) == NULL) { + rcm_log_message(RCM_TRACE1, + "SVM: unable to get setname for \"%s\", error %s\n", + sname, mde_sperror(&error, "")); + break; + } + + rcm_log_message(RCM_TRACE1, "SVM: processing: %s\n", + dependent->devname); + + np = metaname(&sp, dependent->devname, &error); + + switch (dependent->devtype) { + case SVM_TRANS: + /* + * No code to check trans devices because ufs logging + * should be being used. + */ + rcm_log_message(RCM_TRACE1, + "SVM: Use UFS logging instead of trans devices\n"); + break; + case SVM_SLICE: + case SVM_STRIPE: + case SVM_CONCAT: + rval = check_stripe(sp, np, deventry->devname, &error); + break; + case SVM_MIRROR: + /* + * No check here as this is performed by the one + * above when the submirror is checked. + */ + rcm_log_message(RCM_TRACE1, + "SVM: Mirror check is done by the stripe check\n"); + break; + case SVM_RAID: + /* + * Raid5 devices can be built on soft partitions or + * slices and so the check here is for the raid5 + * device built on top of slices. Note, a raid5 cannot + * be built on a stripe/concat. + */ + rval = check_raid5(sp, np, deventry->devname, &error); + break; + case SVM_SOFTPART: + /* + * Raid5 devices can be built on top of soft partitions + * and so they have to be checked. + */ + rval = check_softpart(sp, np, deventry->devname, + &error); + break; + default: + rcm_log_message(RCM_TRACE1, + "SVM: unknown devtype: %d\n", dependent->devtype); + break; + } + + meta_invalidate_name(np); + + if (rval == REDUNDANT) + break; + } while ((dependent = dependent->next_dep) != NULL); + + rcm_log_message(RCM_TRACE1, "SVM: check_device return %d\n", rval); + return (rval); +} diff --git a/usr/src/cmd/rcm_daemon/common/swap_rcm.c b/usr/src/cmd/rcm_daemon/common/swap_rcm.c new file mode 100644 index 0000000000..f22c161a21 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/swap_rcm.c @@ -0,0 +1,825 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * RCM module providing support for swap areas + * during reconfiguration operations. + */ +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <thread.h> +#include <synch.h> +#include <strings.h> +#include <assert.h> +#include <errno.h> +#include <libintl.h> +#include <sys/types.h> +#include <sys/swap.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <sys/dumpadm.h> +#include <sys/wait.h> +#include "rcm_module.h" + +/* cache flags */ +#define SWAP_CACHE_NEW 0x01 +#define SWAP_CACHE_STALE 0x02 +#define SWAP_CACHE_OFFLINED 0x04 + +#define SWAP_CMD "/usr/sbin/swap" +#define SWAP_DELETE SWAP_CMD" -d %s %ld" +#define SWAP_ADD SWAP_CMD" -a %s %ld %ld" + +/* LP64 hard code */ +#define MAXOFFSET_STRLEN 20 + +typedef struct swap_file { + char path[MAXPATHLEN]; + int cache_flags; + struct swap_area *areas; + struct swap_file *next; + struct swap_file *prev; +} swap_file_t; + +/* swap file may have multiple swap areas */ +typedef struct swap_area { + off_t start; + off_t len; + int cache_flags; + struct swap_area *next; + struct swap_area *prev; +} swap_area_t; + +static swap_file_t *cache; +static mutex_t cache_lock; + +static int swap_register(rcm_handle_t *); +static int swap_unregister(rcm_handle_t *); +static int swap_getinfo(rcm_handle_t *, char *, id_t, uint_t, + char **, char **, nvlist_t *, rcm_info_t **); +static int swap_suspend(rcm_handle_t *, char *, id_t, timespec_t *, + uint_t, char **, rcm_info_t **); +static int swap_resume(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int swap_offline(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int swap_online(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); +static int swap_remove(rcm_handle_t *, char *, id_t, uint_t, + char **, rcm_info_t **); + +static int alloc_usage(char **); +static void cache_insert(swap_file_t *); +static swap_file_t *cache_lookup(char *); +static void cache_remove(swap_file_t *); +static void free_cache(void); +static int get_dumpdev(char []); +static void log_cmd_status(int); +static int swap_add(swap_file_t *, char **); +static void swap_area_add(swap_file_t *, swap_area_t *); +static swap_area_t *swap_area_alloc(swapent_t *); +static swap_area_t *swap_area_lookup(swap_file_t *, swapent_t *); +static void swap_area_remove(swap_file_t *, swap_area_t *); +static int swap_delete(swap_file_t *, char **); +static swap_file_t *swap_file_alloc(char *); +static void swap_file_free(swap_file_t *); +static swaptbl_t *sys_swaptbl(void); +static int update_cache(rcm_handle_t *); + +static struct rcm_mod_ops swap_ops = +{ + RCM_MOD_OPS_VERSION, + swap_register, + swap_unregister, + swap_getinfo, + swap_suspend, + swap_resume, + swap_offline, + swap_online, + swap_remove, + NULL, + NULL, + NULL +}; + +struct rcm_mod_ops * +rcm_mod_init() +{ + return (&swap_ops); +} + +const char * +rcm_mod_info() +{ + return ("RCM Swap module %I%"); +} + +int +rcm_mod_fini() +{ + free_cache(); + (void) mutex_destroy(&cache_lock); + + return (RCM_SUCCESS); +} + +static int +swap_register(rcm_handle_t *hdl) +{ + return (update_cache(hdl)); +} + +static int +swap_unregister(rcm_handle_t *hdl) +{ + swap_file_t *sf; + + (void) mutex_lock(&cache_lock); + while ((sf = cache) != NULL) { + cache = cache->next; + (void) rcm_unregister_interest(hdl, sf->path, 0); + swap_file_free(sf); + } + (void) mutex_unlock(&cache_lock); + + return (RCM_SUCCESS); +} + +/*ARGSUSED*/ +static int +swap_getinfo(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **infostr, char **errstr, nvlist_t *props, rcm_info_t **dependent) +{ + assert(rsrcname != NULL && infostr != NULL); + + (void) mutex_lock(&cache_lock); + if (cache_lookup(rsrcname) == NULL) { + rcm_log_message(RCM_ERROR, "unknown resource: %s\n", + rsrcname); + (void) mutex_unlock(&cache_lock); + return (RCM_FAILURE); + } + (void) mutex_unlock(&cache_lock); + (void) alloc_usage(infostr); + + return (RCM_SUCCESS); +} + +/* + * Remove swap space to maintain availability of anonymous pages + * during device suspension. Swap will be reconfigured upon resume. + * Fail if operation will unconfigure dump device. + */ +/*ARGSUSED*/ +static int +swap_suspend(rcm_handle_t *hdl, char *rsrcname, id_t id, timespec_t *interval, + uint_t flags, char **errstr, rcm_info_t **dependent) +{ + swap_file_t *sf; + int rv; + + assert(rsrcname != NULL && errstr != NULL); + + if (flags & RCM_QUERY) + return (RCM_SUCCESS); + + (void) mutex_lock(&cache_lock); + if ((sf = cache_lookup(rsrcname)) == NULL) { + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + rv = swap_delete(sf, errstr); + (void) mutex_unlock(&cache_lock); + + return (rv); +} + +/*ARGSUSED*/ +static int +swap_resume(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + swap_file_t *sf; + int rv; + + assert(rsrcname != NULL && errstr != NULL); + + (void) mutex_lock(&cache_lock); + if ((sf = cache_lookup(rsrcname)) == NULL) { + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + rv = swap_add(sf, errstr); + (void) mutex_unlock(&cache_lock); + + return (rv); +} + +/* + * By default, reject offline request. If forced, attempt to + * delete swap. Fail if operation will unconfigure dump device. + */ +/*ARGSUSED*/ +static int +swap_offline(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + swap_file_t *sf; + int rv; + + assert(rsrcname != NULL && errstr != NULL); + + if ((flags & RCM_FORCE) && (flags & RCM_QUERY)) + return (RCM_SUCCESS); + + (void) mutex_lock(&cache_lock); + if ((sf = cache_lookup(rsrcname)) == NULL) { + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + if (flags & RCM_FORCE) { + rv = swap_delete(sf, errstr); + (void) mutex_unlock(&cache_lock); + return (rv); + } + /* default reject */ + (void) mutex_unlock(&cache_lock); + (void) alloc_usage(errstr); + + return (RCM_FAILURE); +} + +/*ARGSUSED*/ +static int +swap_online(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + swap_file_t *sf; + int rv; + + assert(rsrcname != NULL && errstr != NULL); + + (void) mutex_lock(&cache_lock); + if ((sf = cache_lookup(rsrcname)) == NULL) { + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + + rv = swap_add(sf, errstr); + (void) mutex_unlock(&cache_lock); + + return (rv); +} + +/*ARGSUSED*/ +static int +swap_remove(rcm_handle_t *hdl, char *rsrcname, id_t id, uint_t flags, + char **errstr, rcm_info_t **dependent) +{ + swap_file_t *sf; + + assert(rsrcname != NULL); + + (void) mutex_lock(&cache_lock); + if ((sf = cache_lookup(rsrcname)) == NULL) { + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + /* RCM framework handles unregistration */ + cache_remove(sf); + swap_file_free(sf); + (void) mutex_unlock(&cache_lock); + + return (RCM_SUCCESS); +} + +/* + * Delete all swap areas for swap file. + * Invoke swap(1M) instead of swapctl(2) to + * handle relocation of dump device. + * If dump device is configured, fail if + * unable to relocate dump. + * + * Call with cache_lock held. + */ +static int +swap_delete(swap_file_t *sf, char **errstr) +{ + swap_area_t *sa; + char cmd[sizeof (SWAP_DELETE) + MAXPATHLEN + + MAXOFFSET_STRLEN]; + char dumpdev[MAXPATHLEN]; + int have_dump = 1; + int stat; + int rv = RCM_SUCCESS; + + if (get_dumpdev(dumpdev) == 0 && dumpdev[0] == '\0') + have_dump = 0; + + for (sa = sf->areas; sa != NULL; sa = sa->next) { + /* swap(1M) is not idempotent */ + if (sa->cache_flags & SWAP_CACHE_OFFLINED) { + continue; + } + + (void) snprintf(cmd, sizeof (cmd), SWAP_DELETE, sf->path, + sa->start); + rcm_log_message(RCM_TRACE1, "%s\n", cmd); + if ((stat = rcm_exec_cmd(cmd)) != 0) { + log_cmd_status(stat); + *errstr = strdup(gettext("unable to delete swap")); + rv = RCM_FAILURE; + goto out; + } + sa->cache_flags |= SWAP_CACHE_OFFLINED; + + /* + * Fail on removal of dump device. + */ + if (have_dump == 0) + continue; + + if (get_dumpdev(dumpdev) != 0) { + rcm_log_message(RCM_WARNING, "unable to " + "check for removal of dump device\n"); + } else if (dumpdev[0] == '\0') { + rcm_log_message(RCM_DEBUG, "removed dump: " + "attempting recovery\n"); + + /* + * Restore dump + */ + (void) snprintf(cmd, sizeof (cmd), SWAP_ADD, + sf->path, sa->start, sa->len); + rcm_log_message(RCM_TRACE1, "%s\n", cmd); + if ((stat = rcm_exec_cmd(cmd)) != 0) { + log_cmd_status(stat); + rcm_log_message(RCM_ERROR, + "failed to restore dump\n"); + } else { + sa->cache_flags &= ~SWAP_CACHE_OFFLINED; + rcm_log_message(RCM_DEBUG, "dump restored\n"); + } + *errstr = strdup(gettext("unable to relocate dump")); + rv = RCM_FAILURE; + goto out; + } + } + sf->cache_flags |= SWAP_CACHE_OFFLINED; +out: + return (rv); +} + +/* + * Invoke swap(1M) to add each registered swap area. + * + * Call with cache_lock held. + */ +static int +swap_add(swap_file_t *sf, char **errstr) +{ + swap_area_t *sa; + char cmd[sizeof (SWAP_ADD) + MAXPATHLEN + + (2 * MAXOFFSET_STRLEN)]; + int stat; + int rv = RCM_SUCCESS; + + for (sa = sf->areas; sa != NULL; sa = sa->next) { + /* swap(1M) is not idempotent */ + if (!(sa->cache_flags & SWAP_CACHE_OFFLINED)) { + continue; + } + + (void) snprintf(cmd, sizeof (cmd), + SWAP_ADD, sf->path, sa->start, sa->len); + rcm_log_message(RCM_TRACE1, "%s\n", cmd); + if ((stat = rcm_exec_cmd(cmd)) != 0) { + log_cmd_status(stat); + *errstr = strdup(gettext("unable to add swap")); + rv = RCM_FAILURE; + break; + } else { + sa->cache_flags &= ~SWAP_CACHE_OFFLINED; + sf->cache_flags &= ~SWAP_CACHE_OFFLINED; + } + } + + return (rv); +} + +static int +update_cache(rcm_handle_t *hdl) +{ + swaptbl_t *swt; + swap_file_t *sf, *stale_sf; + swap_area_t *sa, *stale_sa; + int i; + int rv = RCM_SUCCESS; + + if ((swt = sys_swaptbl()) == NULL) { + rcm_log_message(RCM_ERROR, "failed to read " + "current swap configuration\n"); + return (RCM_FAILURE); + } + + (void) mutex_lock(&cache_lock); + + /* + * cache pass 1 - mark everyone stale + */ + for (sf = cache; sf != NULL; sf = sf->next) { + sf->cache_flags |= SWAP_CACHE_STALE; + for (sa = sf->areas; sa != NULL; sa = sa->next) { + sa->cache_flags |= SWAP_CACHE_STALE; + } + } + + /* + * add new entries + */ + for (i = 0; i < swt->swt_n; i++) { + if (swt->swt_ent[i].ste_flags & (ST_INDEL|ST_DOINGDEL)) { + continue; + } + + /* + * assure swap_file_t + */ + if ((sf = cache_lookup(swt->swt_ent[i].ste_path)) == NULL) { + if ((sf = swap_file_alloc(swt->swt_ent[i].ste_path)) == + NULL) { + free(swt); + return (RCM_FAILURE); + } + sf->cache_flags |= SWAP_CACHE_NEW; + cache_insert(sf); + } else { + sf->cache_flags &= ~SWAP_CACHE_STALE; + } + + /* + * assure swap_area_t + */ + if ((sa = swap_area_lookup(sf, &swt->swt_ent[i])) == NULL) { + if ((sa = swap_area_alloc(&swt->swt_ent[i])) == NULL) { + free(swt); + return (RCM_FAILURE); + } + swap_area_add(sf, sa); + } else { + sa->cache_flags &= ~SWAP_CACHE_STALE; + } + } + + free(swt); + + /* + * cache pass 2 + * + * swap_file_t - skip offlined, register new, unregister/remove stale + * swap_area_t - skip offlined, remove stale + */ + sf = cache; + while (sf != NULL) { + sa = sf->areas; + while (sa != NULL) { + if (sa->cache_flags & SWAP_CACHE_OFFLINED) { + sa->cache_flags &= ~SWAP_CACHE_STALE; + sa = sa->next; + continue; + } + if (sa->cache_flags & SWAP_CACHE_STALE) { + stale_sa = sa; + sa = sa->next; + swap_area_remove(sf, stale_sa); + free(stale_sa); + continue; + } + sa = sa->next; + } + + if (sf->cache_flags & SWAP_CACHE_OFFLINED) { + sf->cache_flags &= ~SWAP_CACHE_STALE; + sf = sf->next; + continue; + } + + if (sf->cache_flags & SWAP_CACHE_STALE) { + if (rcm_unregister_interest(hdl, sf->path, 0) != + RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, "failed to register " + "%s\n", sf->path); + } + stale_sf = sf; + sf = sf->next; + cache_remove(stale_sf); + swap_file_free(stale_sf); + continue; + } + + if (!(sf->cache_flags & SWAP_CACHE_NEW)) { + sf = sf->next; + continue; + } + + if (rcm_register_interest(hdl, sf->path, 0, NULL) != + RCM_SUCCESS) { + rcm_log_message(RCM_ERROR, "failed to register %s\n", + sf->path); + rv = RCM_FAILURE; + } else { + rcm_log_message(RCM_DEBUG, "registered %s\n", + sf->path); + sf->cache_flags &= ~SWAP_CACHE_NEW; + } + sf = sf->next; + } + (void) mutex_unlock(&cache_lock); + + return (rv); +} + +/* + * Returns system swap table. + */ +static swaptbl_t * +sys_swaptbl() +{ + swaptbl_t *swt; + char *cp; + int i, n; + size_t tbl_size; + + if ((n = swapctl(SC_GETNSWP, NULL)) == -1) + return (NULL); + + tbl_size = sizeof (int) + n * sizeof (swapent_t) + n * MAXPATHLEN; + if ((swt = (swaptbl_t *)malloc(tbl_size)) == NULL) + return (NULL); + + swt->swt_n = n; + cp = (char *)swt + (sizeof (int) + n * sizeof (swapent_t)); + for (i = 0; i < n; i++) { + swt->swt_ent[i].ste_path = cp; + cp += MAXPATHLEN; + } + + if ((n = swapctl(SC_LIST, swt)) == -1) { + free(swt); + return (NULL); + } + + if (n != swt->swt_n) { + /* mismatch, try again */ + free(swt); + return (sys_swaptbl()); + } + + return (swt); +} + +static int +get_dumpdev(char dumpdev[]) +{ + int fd; + int rv = 0; + char *err; + + if ((fd = open("/dev/dump", O_RDONLY)) == -1) { + rcm_log_message(RCM_ERROR, "failed to open /dev/dump\n"); + return (-1); + } + + if (ioctl(fd, DIOCGETDEV, dumpdev) == -1) { + if (errno == ENODEV) { + dumpdev[0] = '\0'; + } else { + rcm_log_message(RCM_ERROR, "ioctl: %s\n", + ((err = strerror(errno)) == NULL) ? "" : err); + rv = -1; + } + } + (void) close(fd); + + return (rv); +} + +static void +free_cache(void) +{ + swap_file_t *sf; + + (void) mutex_lock(&cache_lock); + while ((sf = cache) != NULL) { + cache = cache->next; + swap_file_free(sf); + } + (void) mutex_unlock(&cache_lock); + +} + +/* + * Call with cache_lock held. + */ +static void +swap_file_free(swap_file_t *sf) +{ + swap_area_t *sa; + + assert(sf != NULL); + + while ((sa = sf->areas) != NULL) { + sf->areas = sf->areas->next; + free(sa); + } + free(sf); +} + +/* + * Call with cache_lock held. + */ +static void +cache_insert(swap_file_t *ent) +{ + ent->next = cache; + if (ent->next) + ent->next->prev = ent; + ent->prev = NULL; + cache = ent; +} + +/* + * Call with cache_lock held. + */ +static swap_file_t * +cache_lookup(char *rsrc) +{ + swap_file_t *sf; + + for (sf = cache; sf != NULL; sf = sf->next) { + if (strcmp(rsrc, sf->path) == 0) { + return (sf); + } + } + return (NULL); +} + +/* + * Call with cache_lock held. + */ +static void +cache_remove(swap_file_t *ent) +{ + assert(ent != NULL); + + if (ent->next != NULL) { + ent->next->prev = ent->prev; + } + if (ent->prev != NULL) { + ent->prev->next = ent->next; + } else { + cache = ent->next; + } + ent->next = NULL; + ent->prev = NULL; +} + +/* + * Call with cache_lock held. + */ +static void +swap_area_add(swap_file_t *sf, swap_area_t *sa) +{ + sa->next = sf->areas; + if (sa->next) + sa->next->prev = sa; + sa->prev = NULL; + sf->areas = sa; +} + +/* + * Call with cache_lock held. + */ +static void +swap_area_remove(swap_file_t *sf, swap_area_t *ent) +{ + assert(sf != NULL && ent != NULL); + + if (ent->next != NULL) { + ent->next->prev = ent->prev; + } + if (ent->prev != NULL) { + ent->prev->next = ent->next; + } else { + sf->areas = ent->next; + } + ent->next = NULL; + ent->prev = NULL; +} + +static swap_file_t * +swap_file_alloc(char *rsrc) +{ + swap_file_t *sf; + + if ((sf = calloc(1, sizeof (*sf))) == NULL) { + rcm_log_message(RCM_ERROR, "calloc failure\n"); + return (NULL); + } + (void) strlcpy(sf->path, rsrc, sizeof (sf->path)); + + return (sf); +} + +static swap_area_t * +swap_area_alloc(swapent_t *swt_ent) +{ + swap_area_t *sa; + + if ((sa = calloc(1, sizeof (*sa))) == NULL) { + rcm_log_message(RCM_ERROR, "calloc failure\n"); + return (NULL); + } + sa->start = swt_ent->ste_start; + sa->len = swt_ent->ste_length; + + return (sa); +} + +/* + * Call with cache_lock held. + */ +static swap_area_t * +swap_area_lookup(swap_file_t *sf, swapent_t *swt_ent) +{ + swap_area_t *sa; + + assert(sf != NULL && swt_ent != NULL); + assert(strcmp(sf->path, swt_ent->ste_path) == 0); + + for (sa = sf->areas; sa != NULL; sa = sa->next) { + if (sa->start == swt_ent->ste_start && + sa->len == swt_ent->ste_length) { + return (sa); + } + } + return (NULL); +} + +/* + * All-purpose usage string. + */ +static int +alloc_usage(char **cpp) +{ + if ((*cpp = strdup(gettext("swap area"))) == NULL) { + rcm_log_message(RCM_ERROR, "strdup failure\n"); + return (-1); + } + return (0); +} + +static void +log_cmd_status(int stat) +{ + char *err; + + if (stat == -1) { + rcm_log_message(RCM_ERROR, "wait: %s\n", + ((err = strerror(errno)) == NULL) ? "" : err); + } else if (WIFEXITED(stat)) { + rcm_log_message(RCM_ERROR, "exit status: %d\n", + WEXITSTATUS(stat)); + } else { + rcm_log_message(RCM_ERROR, "wait status: %d\n", stat); + } +} diff --git a/usr/src/cmd/rcm_daemon/common/ttymux_rcm.c b/usr/src/cmd/rcm_daemon/common/ttymux_rcm.c new file mode 100644 index 0000000000..f8c2b17a41 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/ttymux_rcm.c @@ -0,0 +1,1433 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <unistd.h> +#include <assert.h> +#include <string.h> +#include <limits.h> +#include <synch.h> +#include <libintl.h> +#include <errno.h> +#include <libdevinfo.h> +#include <sys/uio.h> +#include <sys/sysmacros.h> +#include <sys/types.h> +#include <stropts.h> +#include <sys/stream.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/mkdev.h> + +#include <sys/param.h> +#include <sys/openpromio.h> +#include <sys/ttymuxuser.h> + +#include "ttymux_rcm_impl.h" +#include "rcm_module.h" + +#define TTYMUX_OFFLINE_ERR gettext("Resource is in use by") +#define TTYMUX_UNKNOWN_ERR gettext("Unknown Operation attempted") +#define TTYMUX_ONLINE_ERR gettext("Failed to connect under multiplexer") +#define TTYMUX_INVALID_ERR gettext("Invalid Operation on this resource") +#define TTYMUX_OFFLINE_FAIL gettext("Failed to disconnect from multiplexer") +#define TTYMUX_MEMORY_ERR gettext("TTYMUX: strdup failure\n") + + +static int msglvl = 6; /* print messages less than this level */ +#define TEST(cond, stmt) { if (cond) stmt; } +#define _msg(lvl, args) TEST(msglvl > (lvl), trace args) + +static int oflags = O_EXCL|O_RDWR|O_NONBLOCK|O_NOCTTY; +static dev_t cn_dev = NODEV; +static rsrc_t *cn_rsrc = NULL; +static rsrc_t cache_head; +static rsrc_t cache_tail; +static mutex_t cache_lock; +static char muxctl[PATH_MAX] = {0}; +static char muxcon[PATH_MAX] = {0}; +static int muxfd; +static boolean_t register_rsrcs; + +/* module interface routines */ +static int tty_register(rcm_handle_t *); +static int tty_unregister(rcm_handle_t *); +static int tty_getinfo(rcm_handle_t *, char *, id_t, uint_t, char **, + char **, nvlist_t *, rcm_info_t **); +static int tty_suspend(rcm_handle_t *, char *, id_t, + timespec_t *, uint_t, char **, rcm_info_t **); +static int tty_resume(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int tty_offline(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int tty_online(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); +static int tty_remove(rcm_handle_t *, char *, id_t, uint_t, char **, + rcm_info_t **); + +static int get_devpath(char *, char **, dev_t *); + +/* + * Module-Private data + */ +static struct rcm_mod_ops tty_ops = { + RCM_MOD_OPS_VERSION, + tty_register, + tty_unregister, + tty_getinfo, + tty_suspend, + tty_resume, + tty_offline, + tty_online, + tty_remove, + NULL, + NULL +}; + +/*PRINTFLIKE1*/ +static void +trace(char *fmt, ...) +{ + va_list args; + char buf[256]; + int sz; + + va_start(args, fmt); + sz = vsnprintf(buf, sizeof (buf), fmt, args); + va_end(args); + + if (sz < 0) + rcm_log_message(RCM_TRACE1, + _("TTYMUX: vsnprintf parse error\n")); + else if (sz > sizeof (buf)) { + char *b = malloc(sz + 1); + + if (b != NULL) { + va_start(args, fmt); + sz = vsnprintf(b, sz + 1, fmt, args); + va_end(args); + if (sz > 0) + rcm_log_message(RCM_TRACE1, _("%s"), b); + free(b); + } + } else { + rcm_log_message(RCM_TRACE1, _("%s"), buf); + } +} + +/* + * CACHE MANAGEMENT + * Resources managed by this module are stored in a list of rsrc_t + * structures. + */ + +/* + * cache_lookup() + * + * Get a cache node for a resource. Call with cache lock held. + */ +static rsrc_t * +cache_lookup(const char *resource) +{ + rsrc_t *rsrc; + rsrc = cache_head.next; + while (rsrc != &cache_tail) { + if (rsrc->id && strcmp(resource, rsrc->id) == 0) { + return (rsrc); + } + rsrc = rsrc->next; + } + return (NULL); +} + +/* + * Get a cache node for a minor node. Call with cache lock held. + */ +static rsrc_t * +cache_lookup_bydevt(dev_t devt) +{ + rsrc_t *rsrc; + rsrc = cache_head.next; + while (rsrc != &cache_tail) { + if (rsrc->dev == devt) + return (rsrc); + rsrc = rsrc->next; + } + return (NULL); +} + +/* + * free_node() + * + * Free a node. Make sure it isn't in the list! + */ +static void +free_node(rsrc_t *node) +{ + if (node) { + if (node->id) { + free(node->id); + } + free(node); + } +} + +/* + * cache_insert() + * + * Call with the cache_lock held. + */ +static void +cache_insert(rsrc_t *node) +{ + /* insert at the head for best performance */ + node->next = cache_head.next; + node->prev = &cache_head; + + node->next->prev = node; + node->prev->next = node; +} + +/* + * cache_create() + * + * Call with the cache_lock held. + */ +static rsrc_t * +cache_create(const char *resource, dev_t dev) +{ + rsrc_t *rsrc = malloc(sizeof (rsrc_t)); + + if (rsrc != NULL) { + if ((rsrc->id = strdup(resource)) != NULL) { + rsrc->dev = dev; + rsrc->flags = 0; + rsrc->dependencies = NULL; + cache_insert(rsrc); + } else { + free(rsrc); + rsrc = NULL; + } + } else { + _msg(0, ("TTYMUX: malloc failure for resource %s.\n", + resource)); + } + return (rsrc); +} + +/* + * cache_get() + * + * Call with the cache_lock held. + */ +static rsrc_t * +cache_get(const char *resource) +{ + rsrc_t *rsrc = cache_lookup(resource); + if (rsrc == NULL) { + dev_t dev; + + (void) get_devpath((char *)resource, NULL, &dev); + rsrc = cache_create(resource, dev); + } + return (rsrc); +} + +/* + * cache_remove() + * + * Call with the cache_lock held. + */ +static void +cache_remove(rsrc_t *node) +{ + node->next->prev = node->prev; + node->prev->next = node->next; + node->next = NULL; + node->prev = NULL; +} + +/* + * Open a file identified by fname with the given open flags. + * If the request is to open a file with exclusive access and the open + * fails then backoff exponentially and then retry the open. + * Do not wait for longer than about a second (since this may be an rcm + * framework thread). + */ +static int +open_file(char *fname, int flags) +{ + int fd, cnt; + struct timespec ts; + + if ((flags & O_EXCL) == 0) + return (open(fname, flags)); + + ts.tv_sec = 0; + ts.tv_nsec = 16000000; /* 16 milliseconds */ + + for (cnt = 0; cnt < 5 && (fd = open(fname, flags)) == -1; cnt++) { + (void) nanosleep(&ts, NULL); + ts.tv_nsec *= 2; + } + return (fd); +} + +/* + * No-op for creating an association between a pair of resources. + */ +/*ARGSUSED*/ +static int +nullconnect(link_t *link) +{ + return (0); +} + +/* + * No-op for destroying an association between a pair of resources. + */ +/*ARGSUSED*/ +static int +nulldisconnect(link_t *link) +{ + return (0); +} + +/* + * Record an actual or desired association between two resources + * identified by their rsrc_t structures. + */ +static link_t * +add_dependency(rsrc_t *user, rsrc_t *used) +{ + link_t *linkhead; + link_t *link; + + if (user == NULL || used == NULL) + return (NULL); + + if (user->id && used->id && strcmp(user->id, used->id) == 0) { + _msg(2, ("TTYMUX: attempt to connect devices created by " + "the same driver\n")); + return (NULL); + } + + /* + * Search for all resources that this resource user is depending + * upon. + */ + linkhead = user->dependencies; + for (link = linkhead; link != NULL; link = link->next) { + /* + * Does the using resource already depends on the used + * resource + */ + if (link->used == used) + return (link); + } + + link = malloc(sizeof (link_t)); + + if (link == NULL) { + rcm_log_message(RCM_ERROR, _("TTYMUX: Out of memory\n")); + return (NULL); + } + + _msg(6, ("TTYMUX: New link user %s used %s\n", user->id, used->id)); + + link->user = user; + link->used = used; + link->linkid = 0; + link->state = UNKNOWN; + link->flags = 0; + + link->connect = nullconnect; + link->disconnect = nulldisconnect; + link->next = linkhead; + + user->dependencies = link; + + return (link); +} + +/* + * Send an I_STR stream ioctl to a device + */ +static int +istrioctl(int fd, int cmd, void *data, int datalen, int *bytes) { + struct strioctl ios; + int rval; + + ios.ic_timout = 0; /* use the default */ + ios.ic_cmd = cmd; + ios.ic_dp = (char *)data; + ios.ic_len = datalen; + + rval = ioctl(fd, I_STR, (char *)&ios); + if (bytes) + *bytes = ios.ic_len; + return (rval); +} + +/* + * Streams link the driver identified by fd underneath a mux + * identified by ctrl_fd. + */ +static int +plink(int ctrl_fd, int fd) +{ + int linkid; + + /* + * pop any modules off the lower stream. + */ + while (ioctl(fd, I_POP, 0) == 0) + ; + + if ((linkid = ioctl(ctrl_fd, I_PLINK, fd)) < 0) + rcm_log_message(RCM_ERROR, + _("TTYMUX: I_PLINK error %d.\n"), errno); + return (linkid); +} + +/* + * Streams unlink the STREAM identified by linkid from a mux + * identified by ctrl_fd. + */ +static int +punlink(int ctrl_fd, int linkid) +{ + if (ioctl(ctrl_fd, I_PUNLINK, linkid) < 0) + return (errno); + else + return (0); +} + +/* + * Connect a pair of resources by establishing the dependency association. + * Only works for devices that support the TTYMUX ioctls. + */ +static int +mux_connect(link_t *link) +{ + int lfd; + int rv; + ttymux_assoc_t as; + uint8_t ioflags; + + _msg(6, ("TTYMUX: mux_connect (%ld:%ld<->%ld:%ld %s <-> %s\n", + major(link->user->dev), minor(link->user->dev), + major(link->used->dev), minor(link->used->dev), + link->user->id, link->used->id)); + + _msg(12, ("TTYMUX: struct size = %d (plen %d)\n", + sizeof (as), PATH_MAX)); + + if (link->user->dev == NODEV || link->used->dev == NODEV) { + /* + * One of the resources in the association is not + * present (wait for the online notification before + * attempting to establish the dependency association. + */ + return (EAGAIN); + } + if (major(link->user->dev) == major(link->used->dev)) { + _msg(2, ("TTYMUX: attempt to link devices created by " + "the same driver\n")); + return (EINVAL); + } + /* + * Explicitly check for attempts to plumb the system console - + * required becuase not all serial devices support the + * O_EXCL open flag. + */ + if (link->used->dev == cn_dev) { + rcm_log_message(RCM_WARNING, _("TTYMUX: Request to link the " + " system console under another device not allowed!\n")); + + return (EPERM); + } + + /* + * Ensure that the input/output mode of the dependent is reasonable + */ + if ((ioflags = link->flags & FORIO) == 0) + ioflags = FORIO; + + /* + * Open each resource participating in the association. + */ + lfd = open(link->used->id, O_EXCL|O_RDWR|O_NONBLOCK|O_NOCTTY); + if (lfd == -1) { + if (errno == EBUSY) { + rcm_log_message(RCM_WARNING, _("TTYMUX: device %s is " + " busy - " " cannot connect to %s\n"), + link->used->id, link->user->id); + } else { + rcm_log_message(RCM_WARNING, + _("TTYMUX: open error %d for device %s\n"), + errno, link->used->id); + } + return (errno); + } + /* + * Note: Issuing the I_PLINK and TTYMUX_ASSOC request on the 'using' + * resource is more generic: + * muxfd = open(link->user->id, oflags); + * However using the ctl (MUXCTLLINK) node means that any current opens + * on the 'using' resource are uneffected. + */ + + /* + * Figure out if the 'used' resource is already associated with + * some resource - if so tell the caller to try again later. + * More generally if any user or kernel thread has the resource + * open then the association should not be made. + * The ttymux driver makes this check (but it should be done here). + */ + as.ttymux_linkid = 0; + as.ttymux_ldev = link->used->dev; + + if (istrioctl(muxfd, TTYMUX_GETLINK, + (void *)&as, sizeof (as), NULL) == 0) { + + _msg(7, ("TTYMUX: %ld:%ld (%d) (udev %ld:%ld) already linked\n", + major(as.ttymux_ldev), minor(as.ttymux_ldev), + as.ttymux_linkid, major(as.ttymux_udev), + minor(as.ttymux_udev))); + link->linkid = as.ttymux_linkid; + if (as.ttymux_udev != NODEV) { + (void) close(lfd); + return (EAGAIN); + } + } + + /* + * Now link and associate the used resource under the using resource. + */ + as.ttymux_udev = link->user->dev; + as.ttymux_ldev = link->used->dev; + as.ttymux_tag = 0ul; + as.ttymux_ioflag = ioflags; + + _msg(6, ("TTYMUX: connecting %ld:%ld to %ld:%ld\n", + major(as.ttymux_ldev), minor(as.ttymux_ldev), + major(as.ttymux_udev), minor(as.ttymux_udev))); + + if (as.ttymux_udev == cn_dev) { + struct termios tc; + + if (ioctl(lfd, TCGETS, &tc) != -1) { + tc.c_cflag |= CREAD; + if (ioctl(lfd, TCSETSW, &tc) == -1) { + rcm_log_message(RCM_WARNING, + _("TTYMUX: error %d whilst enabling the " + "receiver on device %d:%d\n"), + errno, major(as.ttymux_ldev), + minor(as.ttymux_ldev)); + } + } + } + + if (as.ttymux_linkid <= 0 && (as.ttymux_linkid = + plink(muxfd, lfd)) <= 0) { + rcm_log_message(RCM_WARNING, + _("TTYMUX: Link error %d for device %s\n"), + errno, link->used->id); + rv = errno; + goto out; + } + link->linkid = as.ttymux_linkid; + + _msg(6, ("TTYMUX: associating\n")); + if (istrioctl(muxfd, TTYMUX_ASSOC, (void *)&as, sizeof (as), 0) != 0) { + rv = errno; + goto out; + } + _msg(6, ("TTYMUX: Succesfully connected %ld:%ld to %ld:%ld\n", + major(as.ttymux_ldev), minor(as.ttymux_ldev), + major(as.ttymux_udev), minor(as.ttymux_udev))); + link->state = CONNECTED; + (void) close(lfd); + return (0); +out: + rcm_log_message(RCM_WARNING, + _("TTYMUX: Error [%d] connecting %d:%d to %d:%d\n"), + rv, major(as.ttymux_ldev), minor(as.ttymux_ldev), + major(as.ttymux_udev), minor(as.ttymux_udev)); + + (void) close(lfd); + if (as.ttymux_linkid > 0) { + /* + * There was an error so unwind the I_PLINK step + */ + if (punlink(muxfd, as.ttymux_linkid) != 0) + rcm_log_message(RCM_WARNING, + _("TTYMUX: Unlink error %d (%s).\n"), + errno, link->used->id); + } + return (rv); +} + +/* + * Disconnect a pair of resources by destroying the dependency association. + * Only works for devices that support the TTYMUX ioctls. + */ +static int +mux_disconnect(link_t *link) +{ + int rv; + ttymux_assoc_t as; + + _msg(6, ("TTYMUX: mux_disconnect %s<->%s (%ld:%ld<->%ld:%ld)\n", + link->user->id, link->used->id, + major(link->user->dev), minor(link->user->dev), + major(link->used->dev), minor(link->used->dev))); + + as.ttymux_ldev = link->used->dev; + + if (istrioctl(muxfd, TTYMUX_GETLINK, + (void *)&as, sizeof (as), NULL) != 0) { + + _msg(1, ("TTYMUX: %ld:%ld not linked [err %d]\n", + major(link->used->dev), minor(link->used->dev), errno)); + return (0); + + /* + * Do not disassociate console resources - simply + * unlink them so that they remain persistent. + */ + } else if (as.ttymux_udev != cn_dev && + istrioctl(muxfd, TTYMUX_DISASSOC, (void *)&as, + sizeof (as), 0) == -1) { + + rv = errno; + rcm_log_message(RCM_WARNING, + _("TTYMUX: Dissassociate error %d for %s\n"), + rv, link->used->id); + + } else if (punlink(muxfd, as.ttymux_linkid) != 0) { + rv = errno; + rcm_log_message(RCM_WARNING, + _("TTYMUX: Error %d unlinking %d:%d\n"), + errno, major(link->used->dev), minor(link->used->dev)); + } else { + _msg(6, ("TTYMUX: %s<->%s disconnected.\n", + link->user->id, link->used->id)); + + link->state = DISCONNECTED; + link->linkid = 0; + rv = 0; + } + return (rv); +} + +/* PESISTENCY */ + +/* + * Given a special device file system path return the /devices path + * and/or the device number (dev_t) of the device. + */ +static int +get_devpath(char *dev, char **cname, dev_t *devt) +{ + struct stat sb; + + if (cname != NULL) + *cname = NULL; + + if (devt != NULL) + *devt = NODEV; + + if (lstat(dev, &sb) < 0) { + return (errno); + } else if ((sb.st_mode & S_IFMT) == S_IFLNK) { + int lsz; + char linkbuf[PATH_MAX+1]; + + if (stat(dev, &sb) < 0) + return (errno); + + lsz = readlink(dev, linkbuf, PATH_MAX); + + if (lsz <= 0) + return (ENODEV); + linkbuf[lsz] = '\0'; + dev = strstr(linkbuf, "/devices"); + if (dev == NULL) + return (ENODEV); + } + + if (cname != NULL) + *cname = strdup(dev); + + if (devt != NULL) + *devt = sb.st_rdev; + + return (0); +} + +/* + * See routine locate_node + */ +static int +locate_dev(di_node_t node, di_minor_t minor, void *arg) +{ + char *devfspath; + char resource[PATH_MAX]; + rsrc_t *rsrc; + + if (di_minor_devt(minor) != (dev_t)arg) + return (DI_WALK_CONTINUE); + + if ((devfspath = di_devfs_path(node)) == NULL) + return (DI_WALK_TERMINATE); + + if (snprintf(resource, sizeof (resource), "/devices%s:%s", + devfspath, di_minor_name(minor)) > sizeof (resource)) { + di_devfs_path_free(devfspath); + return (DI_WALK_TERMINATE); + } + + di_devfs_path_free(devfspath); + + rsrc = cache_lookup(resource); + if (rsrc == NULL && + (rsrc = cache_create(resource, di_minor_devt(minor))) == NULL) + return (DI_WALK_TERMINATE); + + rsrc->dev = di_minor_devt(minor); + rsrc->flags |= PRESENT; + rsrc->flags &= ~UNKNOWN; + return (DI_WALK_TERMINATE); +} + +/* + * Find a devinfo node that matches the device argument (dev). + * This is an expensive search of the whole device tree! + */ +static rsrc_t * +locate_node(dev_t dev, di_node_t *root) +{ + rsrc_t *rsrc; + + assert(root != NULL); + + if ((rsrc = cache_lookup_bydevt(dev)) != NULL) + return (rsrc); + + (void) di_walk_minor(*root, NULL, 0, (void*)dev, locate_dev); + + return (cache_lookup_bydevt(dev)); +} + +/* + * Search for any existing dependency relationships managed by this + * RCM module. + */ +static int +probe_dependencies() +{ + ttymux_assocs_t links; + ttymux_assoc_t *asp; + int cnt, n; + rsrc_t *ruser; + rsrc_t *used; + link_t *link; + di_node_t root; + + cnt = istrioctl(muxfd, TTYMUX_LIST, (void *)0, 0, 0); + + _msg(8, ("TTYMUX: Probed %d links [%d]\n", cnt, errno)); + + if (cnt <= 0) + return (0); + + if ((links.ttymux_assocs = calloc(cnt, sizeof (ttymux_assoc_t))) == 0) + return (EAGAIN); + + links.ttymux_nlinks = cnt; + + n = istrioctl(muxfd, TTYMUX_LIST, (void *)&links, sizeof (links), 0); + + if (n == -1) { + _msg(2, ("TTYMUX: Probe error %s\n", strerror(errno))); + free(links.ttymux_assocs); + return (0); + } + + asp = (ttymux_assoc_t *)links.ttymux_assocs; + + if ((root = di_init("/", DINFOSUBTREE|DINFOMINOR)) == DI_NODE_NIL) + return (errno); + + (void) mutex_lock(&cache_lock); + for (; cnt--; asp++) { + _msg(7, ("TTYMUX: Probed: %ld %ld %ld:%ld <-> %ld:%ld\n", + asp->ttymux_udev, asp->ttymux_ldev, + major(asp->ttymux_udev), minor(asp->ttymux_udev), + major(asp->ttymux_ldev), minor(asp->ttymux_ldev))); + /* + * The TTYMUX_LIST ioctl can return links relating + * to potential devices. Such devices are identified + * in the path field. + */ + if (asp->ttymux_ldev == NODEV) { + char buf[PATH_MAX]; + + if (asp->ttymux_path == NULL || + *asp->ttymux_path != '/') + continue; + + if (snprintf(buf, sizeof (buf), "/devices%s", + asp->ttymux_path) > sizeof (buf)) + continue; + + used = cache_get(buf); + } else { + used = locate_node(asp->ttymux_ldev, &root); + } + if ((ruser = locate_node(asp->ttymux_udev, &root)) == NULL) { + _msg(7, ("TTYMUX: Probe: %ld:%ld not present\n", + major(asp->ttymux_udev), minor(asp->ttymux_udev))); + continue; + } + if (used == NULL) { + _msg(7, ("TTYMUX: Probe: %ld:%ld not present\n", + major(asp->ttymux_ldev), minor(asp->ttymux_ldev))); + continue; + } + _msg(6, ("TTYMUX: Probe: Restore %s <-> %s (id %d)\n", + ruser->id, used->id, asp->ttymux_linkid)); + + link = add_dependency(ruser, used); + + if (link != NULL) { + link->flags = (uint_t)asp->ttymux_ioflag; + link->linkid = asp->ttymux_linkid; + link->state = CONNECTED; + link->connect = mux_connect; + link->disconnect = mux_disconnect; + } + } + di_fini(root); + (void) mutex_unlock(&cache_lock); + free(links.ttymux_assocs); + return (0); +} + +/* + * A resource has become available. Re-establish any associations involving + * the resource. + */ +static int +rsrc_available(rsrc_t *rsrc) +{ + link_t *link; + rsrc_t *rs; + + if (rsrc->dev == NODEV) { + /* + * Now that the resource is present obtain its device number. + * For this to work the node must be present in the /devices + * tree (see devfsadm(1M) or drvconfig(1M)). + * We do not use libdevinfo because the node must be present + * under /devices for the connect step below to work + * (the node needs to be opened). + */ + (void) get_devpath(rsrc->id, NULL, &rsrc->dev); + if (rsrc->dev == NODEV) { + _msg(4, + ("Device node %s does not exist\n", rsrc->id)); + /* + * What does RCM do with failed online notifications. + */ + return (RCM_FAILURE); + } + } + for (rs = cache_head.next; rs != &cache_tail; rs = rs->next) { + for (link = rs->dependencies; + link != NULL; + link = link->next) { + if (link->user == rsrc || link->used == rsrc) { + _msg(6, ("TTYMUX: re-connect\n")); + (void) link->connect(link); + } + } + } + return (RCM_SUCCESS); +} + +/* + * A resource is going away. Tear down any associations involving + * the resource. + */ +static int +rsrc_unavailable(rsrc_t *rsrc) +{ + link_t *link; + rsrc_t *rs; + + for (rs = cache_head.next; rs != &cache_tail; rs = rs->next) { + for (link = rs->dependencies; + link != NULL; + link = link->next) { + if (link->user == rsrc || link->used == rsrc) { + _msg(6, ("TTYMUX: unavailable %s %s\n", + link->user->id, link->used->id)); + (void) link->disconnect(link); + } + } + } + + return (RCM_SUCCESS); +} + +/* + * Find any resources that are using a given resource (identified by + * the rsrc argument). The search begins after the resource identified + * by the next argument. If next is NULL start at the first resource + * in this RCM modules resource list. If the redundancy argument is + * greater than zero then a resource which uses rsrc will only be + * returned if it is associated with >= redundancy dependents. + * + * Thus, provided that the caller keeps the list locked he can iterate + * through all the resources in the cache that depend upon rsrc. + */ +static rsrc_t * +get_next_user(rsrc_t *next, rsrc_t *rsrc, int redundancy) +{ + rsrc_t *src; + link_t *link; + int cnt = 0; + boolean_t inuse; + + src = (next != NULL) ? next->next : cache_head.next; + + while (src != &cache_tail) { + inuse = B_FALSE; + + for (link = src->dependencies, cnt = 0; + link != NULL; + link = link->next) { + + if (link->state == CONNECTED) + cnt++; + + if (link->used == rsrc) + inuse = B_TRUE; + } + if (inuse == B_TRUE && + (redundancy <= 0 || cnt == redundancy)) { + return (src); + } + + src = src->next; + } + + _msg(8, ("TTYMUX: count_users(%s) res %d.\n", rsrc->id, cnt)); + return (NULL); +} + +/* + * Common handler for RCM notifications. + */ +/*ARGSUSED*/ +static int +rsrc_change_common(rcm_handle_t *hd, int op, const char *rsrcid, uint_t flag, + char **reason, rcm_info_t **dependent_reason, void *arg) +{ + rsrc_t *rsrc, *user; + int rv, len; + char *tmp = NULL; + + (void) mutex_lock(&cache_lock); + rsrc = cache_lookup(rsrcid); + if (rsrc == NULL) { + /* shouldn't happen because rsrc has been registered */ + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + if ((muxfd = open_file(muxctl, oflags)) == -1) { + rcm_log_message(RCM_ERROR, _("TTYMUX: %s unavailable: %s\n"), + muxctl, strerror(errno)); + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); + } + switch (op) { + + case TTYMUX_SUSPEND: + rv = RCM_FAILURE; + _msg(4, ("TTYMUX: SUSPEND %s operation refused.\n", + rsrc->id)); + if ((*reason = strdup(TTYMUX_INVALID_ERR)) == NULL) { + rcm_log_message(RCM_ERROR, TTYMUX_MEMORY_ERR); + } + break; + + case TTYMUX_REMOVE: + rsrc->flags |= UNKNOWN; + rsrc->flags &= ~(PRESENT | REGISTERED); + rv = RCM_SUCCESS; + break; + + case TTYMUX_OFFLINE: + user = get_next_user(NULL, rsrc, 1); + if (flag & RCM_QUERY) { + rv = ((flag & RCM_FORCE) || (user == NULL)) ? + RCM_SUCCESS : RCM_FAILURE; + if (rv == RCM_FAILURE) { + tmp = TTYMUX_OFFLINE_ERR; + assert(tmp != NULL); + len = strlen(tmp) + strlen(user->id) + 2; + if ((*reason = (char *)malloc(len)) != NULL) { + (void) snprintf(*reason, len, + "%s %s", tmp, user->id); + } else { + rcm_log_message(RCM_ERROR, TTYMUX_MEMORY_ERR); + } + } + + } else if (flag & RCM_FORCE) { + rv = rsrc_unavailable(rsrc); + + if (rv == RCM_FAILURE) { + if ((*reason = strdup(TTYMUX_OFFLINE_FAIL)) == + NULL) { + rcm_log_message(RCM_ERROR, + TTYMUX_MEMORY_ERR); + } + } + + } else if (user != NULL) { + rv = RCM_FAILURE; + tmp = TTYMUX_OFFLINE_ERR; + assert(tmp != NULL); + len = strlen(tmp) + strlen(user->id) + 2; + if ((*reason = (char *)malloc(len)) != NULL) { + (void) snprintf(*reason, len, + "%s %s", tmp, user->id); + } else { + rcm_log_message(RCM_ERROR, TTYMUX_MEMORY_ERR); + } + + } else { + rv = rsrc_unavailable(rsrc); + if (rv == RCM_FAILURE) { + if ((*reason = strdup(TTYMUX_OFFLINE_FAIL)) == + NULL) { + rcm_log_message(RCM_ERROR, + TTYMUX_MEMORY_ERR); + } + } + } + + if (rv == RCM_FAILURE) { + _msg(4, ("TTYMUX: OFFLINE %s operation refused.\n", + rsrc->id)); + + } else { + _msg(4, ("TTYMUX: OFFLINE %s res %d.\n", rsrc->id, rv)); + } + break; + + case TTYMUX_RESUME: + rv = RCM_FAILURE; + _msg(4, ("TTYMUX: RESUME %s operation refused.\n", + rsrc->id)); + if ((*reason = strdup(TTYMUX_INVALID_ERR)) == NULL) { + rcm_log_message(RCM_ERROR, TTYMUX_MEMORY_ERR); + } + break; + + case TTYMUX_ONLINE: + _msg(4, ("TTYMUX: ONLINE %s res %d.\n", rsrc->id, rv)); + rv = rsrc_available(rsrc); + if (rv == RCM_FAILURE) { + if ((*reason = strdup(TTYMUX_ONLINE_ERR)) == NULL) { + rcm_log_message(RCM_ERROR, TTYMUX_MEMORY_ERR); + } + } + break; + default: + rv = RCM_FAILURE; + if ((*reason = strdup(TTYMUX_UNKNOWN_ERR)) == NULL) { + rcm_log_message(RCM_ERROR, TTYMUX_MEMORY_ERR); + } + } + + (void) close(muxfd); + (void) mutex_unlock(&cache_lock); + return (rv); +} + +static boolean_t +find_mux_nodes(char *drv) +{ + di_node_t root, node; + di_minor_t dim; + char *devfspath; + char muxctlname[] = "ctl"; + char muxconname[] = "con"; + int nminors = 0; + + (void) strcpy(muxctl, MUXCTLLINK); + (void) strcpy(muxcon, MUXCONLINK); + cn_rsrc = NULL; + + if ((root = di_init("/", DINFOCPYALL)) == DI_NODE_NIL) { + rcm_log_message(RCM_WARNING, _("di_init error\n")); + return (B_FALSE); + } + + node = di_drv_first_node(drv, root); + if (node == DI_NODE_NIL) { + _msg(4, ("no node for %s\n", drv)); + di_fini(root); + return (B_FALSE); + } + /* + * If the device is not a prom node do not continue. + */ + if (di_nodeid(node) != DI_PROM_NODEID) { + di_fini(root); + return (B_FALSE); + } + if ((devfspath = di_devfs_path(node)) == NULL) { + di_fini(root); + return (B_FALSE); + } + + /* + * Loop through all the minor nodes the driver (drv) looking + * for the ctl node (this is the device on which + * to issue ioctls). + */ + dim = DI_MINOR_NIL; + while ((dim = di_minor_next(node, dim)) != DI_MINOR_NIL) { + + _msg(7, ("MUXNODES: minor %s\n", di_minor_name(dim))); + + if (strcmp(di_minor_name(dim), muxctlname) == 0) { + if (snprintf(muxctl, sizeof (muxctl), + "/devices%s:%s", devfspath, + di_minor_name(dim)) > sizeof (muxctl)) { + _msg(1, ("muxctl:snprintf error\n")); + } + if (++nminors == 2) + break; + } else if (strcmp(di_minor_name(dim), muxconname) == 0) { + if (snprintf(muxcon, sizeof (muxcon), + "/devices%s:%s", devfspath, + di_minor_name(dim)) > sizeof (muxcon)) { + _msg(1, ("muxcon:snprintf error\n")); + } + if (++nminors == 2) + break; + } + } + + di_devfs_path_free(devfspath); + di_fini(root); + + if ((muxfd = open_file(muxctl, oflags)) != -1) { + + if (istrioctl(muxfd, TTYMUX_CONSDEV, (void *)&cn_dev, + sizeof (cn_dev), 0) != 0) { + cn_dev = NODEV; + } else { + _msg(8, ("MUXNODES: found sys console: %ld:%ld\n", + major(cn_dev), minor(cn_dev))); + + cn_rsrc = cache_create(muxcon, cn_dev); + if (cn_rsrc != NULL) { + cn_rsrc->flags |= PRESENT; + cn_rsrc->flags &= ~UNKNOWN; + } + } + (void) close(muxfd); + + if (cn_dev != NODEV) + return (B_TRUE); + } else { + _msg(1, ("TTYMUX: %s unavailable: %s\n", + muxctl, strerror(errno))); + } + + return (B_FALSE); +} + +/* + * Update registrations, and return the ops structure. + */ +struct rcm_mod_ops * +rcm_mod_init() +{ + _msg(4, ("TTYMUX: mod_init:\n")); + cache_head.next = &cache_tail; + cache_head.prev = NULL; + cache_tail.prev = &cache_head; + cache_tail.next = NULL; + (void) mutex_init(&cache_lock, NULL, NULL); + + /* + * Find the multiplexer ctl and con nodes + */ + register_rsrcs = find_mux_nodes(TTYMUX_DRVNAME); + + return (&tty_ops); +} + +/* + * Save state and release resources. + */ +int +rcm_mod_fini() +{ + rsrc_t *rsrc; + link_t *link, *nlink; + + _msg(7, ("TTYMUX: freeing cache.\n")); + (void) mutex_lock(&cache_lock); + rsrc = cache_head.next; + while (rsrc != &cache_tail) { + cache_remove(rsrc); + + for (link = rsrc->dependencies; link != NULL; ) { + nlink = link->next; + free(link); + link = nlink; + } + + free_node(rsrc); + rsrc = cache_head.next; + } + (void) mutex_unlock(&cache_lock); + + (void) mutex_destroy(&cache_lock); + return (RCM_SUCCESS); +} + +/* + * Return a string describing this module. + */ +const char * +rcm_mod_info() +{ + return ("Serial mux device module 1.1"); +} + +/* + * RCM Notification Handlers + */ + +static int +tty_register(rcm_handle_t *hd) +{ + rsrc_t *rsrc; + link_t *link; + int rv; + + if (register_rsrcs == B_FALSE) + return (RCM_SUCCESS); + + if ((muxfd = open_file(muxctl, oflags)) == -1) { + rcm_log_message(RCM_ERROR, _("TTYMUX: %s unavailable: %s\n"), + muxctl, strerror(errno)); + return (RCM_SUCCESS); + } + /* + * Search for any new dependencies since the last notification or + * since module was initialisated. + */ + (void) probe_dependencies(); + + /* + * Search the whole cache looking for any unregistered used resources + * and register them. Note that the 'using resource' (a ttymux device + * node) is not subject to DR operations so there is no need to + * register them with the RCM framework. + */ + (void) mutex_lock(&cache_lock); + for (rsrc = cache_head.next; rsrc != &cache_tail; rsrc = rsrc->next) { + _msg(6, ("TTYMUX: REGISTER rsrc %s flags %d\n", + rsrc->id, rsrc->flags)); + + if (rsrc->dependencies != NULL && + (rsrc->flags & REGISTERED) == 0) { + _msg(6, ("TTYMUX: Registering rsrc %s\n", rsrc->id)); + rv = rcm_register_interest(hd, rsrc->id, 0, NULL); + if (rv == RCM_SUCCESS) + rsrc->flags |= REGISTERED; + } + + for (link = rsrc->dependencies; link != NULL; + link = link->next) { + if ((link->used->flags & REGISTERED) != 0) + continue; + + _msg(6, ("TTYMUX: Registering rsrc %s\n", + link->used->id)); + rv = rcm_register_interest(hd, link->used->id, + 0, NULL); + if (rv != RCM_SUCCESS) + rcm_log_message(RCM_WARNING, + _("TTYMUX: err %d registering %s\n"), + rv, link->used->id); + else + link->used->flags |= REGISTERED; + } + } + + (void) mutex_unlock(&cache_lock); + (void) close(muxfd); + return (RCM_SUCCESS); +} + +/* + * Unregister all registrations. + */ +static int +tty_unregister(rcm_handle_t *hd) +{ + rsrc_t *rsrc; + + (void) mutex_lock(&cache_lock); + /* + * Search every resource in the cache and if it has been registered + * then unregister it from the RCM framework. + */ + for (rsrc = cache_head.next; rsrc != &cache_tail; rsrc = rsrc->next) { + if ((rsrc->flags & REGISTERED) == 0) + continue; + + if (rcm_unregister_interest(hd, rsrc->id, 0) != RCM_SUCCESS) + rcm_log_message(RCM_WARNING, + _("TTYMUX: Failed to unregister %s\n"), rsrc->id); + else + rsrc->flags &= ~REGISTERED; + } + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); +} + +/* + * Report resource usage information. + */ +/*ARGSUSED*/ +static int +tty_getinfo(rcm_handle_t *hd, char *rsrcid, id_t id, uint_t flag, char **info, + char **errstr, nvlist_t *proplist, rcm_info_t **depend_info) +{ + rsrc_t *rsrc, *user; + char *ru; + size_t sz; + + (void) mutex_lock(&cache_lock); + rsrc = cache_lookup(rsrcid); + + if (rsrc == NULL) { + (void) mutex_unlock(&cache_lock); + *errstr = strdup(gettext("Unmanaged resource")); + return (RCM_FAILURE); + } + + ru = strdup(gettext("Resource Users")); + user = NULL; + while ((user = get_next_user(user, rsrc, -1)) != NULL) { + *info = ru; + sz = strlen(*info) + strlen(user->id) + 2; + ru = malloc(sz); + if (ru == NULL) { + free(*info); + *info = NULL; + break; + } + if (snprintf(ru, sz, ": %s%s", *info, user->id) > sz) { + _msg(4, ("tty_getinfo: snprintf error.\n")); + } + + free(*info); + } + *info = ru; + + if (*info == NULL) { + (void) mutex_unlock(&cache_lock); + *errstr = strdup(gettext("Short of memory resources")); + return (RCM_FAILURE); + } + + (void) mutex_unlock(&cache_lock); + return (RCM_SUCCESS); +} + +/*ARGSUSED*/ +static int +tty_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **reason, rcm_info_t **dependent_reason) +{ + return (rsrc_change_common(hd, TTYMUX_OFFLINE, rsrc, flags, + reason, dependent_reason, NULL)); +} + +/*ARGSUSED*/ +static int +tty_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **reason, rcm_info_t **dependent_reason) +{ + return (rsrc_change_common(hd, TTYMUX_REMOVE, rsrc, flags, + reason, dependent_reason, NULL)); +} + +/*ARGSUSED*/ +static int +tty_suspend(rcm_handle_t *hd, char *rsrc, id_t id, timespec_t *interval, + uint_t flag, char **reason, rcm_info_t **dependent_reason) +{ + return (rsrc_change_common(hd, TTYMUX_SUSPEND, rsrc, flag, + reason, dependent_reason, (void *)interval)); +} + +/*ARGSUSED*/ +static int +tty_online(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **reason, rcm_info_t **dependent_reason) +{ + return (rsrc_change_common(hd, TTYMUX_ONLINE, rsrc, flags, + reason, dependent_reason, NULL)); +} + +/*ARGSUSED*/ +static int +tty_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, + char **reason, rcm_info_t **dependent_reason) +{ + return (rsrc_change_common(hd, TTYMUX_RESUME, rsrc, flags, + reason, dependent_reason, NULL)); +} diff --git a/usr/src/cmd/rcm_daemon/common/ttymux_rcm_impl.h b/usr/src/cmd/rcm_daemon/common/ttymux_rcm_impl.h new file mode 100644 index 0000000000..e1ae3acfb8 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/common/ttymux_rcm_impl.h @@ -0,0 +1,100 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2001 by Sun Microsystems, Inc. + * All rights reserved. + */ + +#ifndef _TTYMUX_RCM_IMPL_H +#define _TTYMUX_RCM_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef lint +#define _(x) gettext(x) +#else +#define _(x) x +#endif + +#define UNKNOWN 1 /* flags */ +#define PRESENT 2 /* flags */ +#define REGISTERED 4 /* flags */ +#define CONNECTED 8 /* flags */ +#define DISCONNECTED 0x10 /* flags */ + +/* RCM operations */ +#define TTYMUX_OFFLINE 1 +#define TTYMUX_ONLINE 2 +#define TTYMUX_REMOVE 3 +#define TTYMUX_SUSPEND 4 +#define TTYMUX_RESUME 5 + +/* + * Representation of a resource. + * All resources are placed in a cache structured as a doubly linked list + * (ie the next and prev fields). + * The dependencies list identifies which resources this resource is + * depending upon. + */ +typedef struct rsrc { + char *id; + dev_t dev; + int flags; + struct rsrc *next; + struct rsrc *prev; + struct link *dependencies; +} rsrc_t; + +/* + * Representation of a pair of resources participating in a + * dependency relationship + * The dependency is cast in terms of a resource that is using + * another resource in order to provide a service. + * This structure is used to represent a ttymux minor node that + * has another serial device multiplexed under it. In this + * case user resource would correspond to the ttymux minor node and the + * the used resource would correspond to the multiplexed serial device. + * The linkid field refers to the STREAM's link identifier. + */ +typedef struct link { + rsrc_t *user; /* the using resource */ + rsrc_t *used; /* the used resource */ + int linkid; /* STREAM's link identifier */ + uint_t state; + int flags; + int (*connect)(struct link *); + int (*disconnect)(struct link *); + struct link *next; +} link_t; + +#define MUXCTLLINK "/devices/multiplexer@0,0:ctl" +#define MUXCONLINK "/devices/multiplexer@0,0:con" + +#ifdef __cplusplus +} +#endif + +#endif /* _TTYMUX_RCM_IMPL_H */ diff --git a/usr/src/cmd/rcm_daemon/i386/Makefile b/usr/src/cmd/rcm_daemon/i386/Makefile new file mode 100644 index 0000000000..2b2ae9eef0 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/i386/Makefile @@ -0,0 +1,33 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright (c) 1999 by Sun Microsystems, Inc. +# All rights reserved. +# +# cmd/rcm_daemon/i386/Makefile +# + +include ../Makefile.com + +install: all diff --git a/usr/src/cmd/rcm_daemon/sparc/Makefile b/usr/src/cmd/rcm_daemon/sparc/Makefile new file mode 100644 index 0000000000..4e46ae6ba2 --- /dev/null +++ b/usr/src/cmd/rcm_daemon/sparc/Makefile @@ -0,0 +1,33 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright (c) 1999 by Sun Microsystems, Inc. +# All rights reserved. +# +# cmd/rcm_daemon/sparc/Makefile +# + +include ../Makefile.com + +install: all |
