summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/Makefile.lint2
-rw-r--r--usr/src/cmd/Makefile1
-rw-r--r--usr/src/cmd/acpihpd/Makefile62
-rw-r--r--usr/src/cmd/acpihpd/Makefile.com46
-rw-r--r--usr/src/cmd/acpihpd/Makefile.targ62
-rw-r--r--usr/src/cmd/acpihpd/acpihpd.c325
-rw-r--r--usr/src/cmd/acpihpd/acpihpd.xml93
-rw-r--r--usr/src/cmd/acpihpd/i386/Makefile37
-rw-r--r--usr/src/cmd/acpihpd/notify.c353
-rw-r--r--usr/src/cmd/acpihpd/svc-acpihpd39
-rw-r--r--usr/src/lib/cfgadm_plugins/Makefile4
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/Makefile.com5
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/amd64/Makefile40
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/common/ap.c21
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c13
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c106
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c29
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c52
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/common/cfga.c13
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/i386/Makefile39
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile9
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile9
-rw-r--r--usr/src/pkg/license_files/cr_Intel.hotplug1
-rw-r--r--usr/src/pkg/manifests/system-header.mf1
-rw-r--r--usr/src/pkg/manifests/system-kernel-dynamic-reconfiguration-i86pc.mf83
-rw-r--r--usr/src/uts/common/os/cpu_event.c65
-rw-r--r--usr/src/uts/common/os/cpu_pm.c10
-rw-r--r--usr/src/uts/common/os/mem_config.c27
-rw-r--r--usr/src/uts/common/os/mem_config_stubs.c6
-rw-r--r--usr/src/uts/common/os/sunddi.c939
-rw-r--r--usr/src/uts/common/sys/esunddi.h43
-rw-r--r--usr/src/uts/common/sys/pci_impl.h3
-rw-r--r--usr/src/uts/common/sys/processor.h3
-rw-r--r--usr/src/uts/common/syscall/p_online.c24
-rw-r--r--usr/src/uts/common/vm/page.h4
-rw-r--r--usr/src/uts/common/vm/seg_kpm.h12
-rw-r--r--usr/src/uts/i86pc/Makefile.files11
-rw-r--r--usr/src/uts/i86pc/Makefile.i86pc.shared5
-rw-r--r--usr/src/uts/i86pc/Makefile.rules38
-rw-r--r--usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h4
-rw-r--r--usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c48
-rw-r--r--usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c53
-rw-r--r--usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c28
-rw-r--r--usr/src/uts/i86pc/dr/Makefile104
-rw-r--r--usr/src/uts/i86pc/drmach_acpi/Makefile101
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c53
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c224
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c40
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_dr.c2743
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c205
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c44
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_pci.c172
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c106
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c39
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c163
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c74
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpinex/acpinex_event.c712
-rw-r--r--usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.c2927
-rw-r--r--usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.h199
-rw-r--r--usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c18
-rw-r--r--usr/src/uts/i86pc/io/cbe.c10
-rw-r--r--usr/src/uts/i86pc/io/dr/dr.c3329
-rw-r--r--usr/src/uts/i86pc/io/dr/dr.conf27
-rw-r--r--usr/src/uts/i86pc/io/dr/dr_cpu.c885
-rw-r--r--usr/src/uts/i86pc/io/dr/dr_io.c389
-rw-r--r--usr/src/uts/i86pc/io/dr/dr_mem_acpi.c639
-rw-r--r--usr/src/uts/i86pc/io/dr/dr_quiesce.c1050
-rw-r--r--usr/src/uts/i86pc/io/dr/dr_util.c457
-rw-r--r--usr/src/uts/i86pc/io/dr/sbdgenerr.pl90
-rw-r--r--usr/src/uts/i86pc/io/mp_platform_common.c180
-rw-r--r--usr/src/uts/i86pc/io/pcplusmp/apic.c495
-rw-r--r--usr/src/uts/i86pc/io/psm/uppc.c7
-rw-r--r--usr/src/uts/i86pc/ml/mpcore.s156
-rw-r--r--usr/src/uts/i86pc/ml/offsets.in2
-rw-r--r--usr/src/uts/i86pc/os/acpi_fw.h26
-rw-r--r--usr/src/uts/i86pc/os/cmi.c64
-rw-r--r--usr/src/uts/i86pc/os/cmi_hw.c66
-rw-r--r--usr/src/uts/i86pc/os/cms.c14
-rw-r--r--usr/src/uts/i86pc/os/cpr_impl.c19
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c22
-rw-r--r--usr/src/uts/i86pc/os/cpupm/cpu_acpi.c3
-rw-r--r--usr/src/uts/i86pc/os/cpupm/cpu_idle.c52
-rw-r--r--usr/src/uts/i86pc/os/cpupm/cpupm_intel.c4
-rw-r--r--usr/src/uts/i86pc/os/cpupm/cpupm_mach.c32
-rw-r--r--usr/src/uts/i86pc/os/fakebop.c162
-rw-r--r--usr/src/uts/i86pc/os/lgrpplat.c1034
-rw-r--r--usr/src/uts/i86pc/os/machdep.c44
-rw-r--r--usr/src/uts/i86pc/os/mem_config_arch.c56
-rw-r--r--usr/src/uts/i86pc/os/memnode.c4
-rw-r--r--usr/src/uts/i86pc/os/microcode.c10
-rw-r--r--usr/src/uts/i86pc/os/mlsetup.c66
-rw-r--r--usr/src/uts/i86pc/os/mp_implfuncs.c2
-rw-r--r--usr/src/uts/i86pc/os/mp_machdep.c98
-rw-r--r--usr/src/uts/i86pc/os/mp_pc.c513
-rw-r--r--usr/src/uts/i86pc/os/mp_startup.c578
-rw-r--r--usr/src/uts/i86pc/os/startup.c93
-rw-r--r--usr/src/uts/i86pc/os/x_call.c112
-rw-r--r--usr/src/uts/i86pc/sys/Makefile1
-rw-r--r--usr/src/uts/i86pc/sys/acpidev.h141
-rw-r--r--usr/src/uts/i86pc/sys/acpidev_dr.h252
-rw-r--r--usr/src/uts/i86pc/sys/acpidev_impl.h64
-rw-r--r--usr/src/uts/i86pc/sys/acpinex.h6
-rw-r--r--usr/src/uts/i86pc/sys/apic.h7
-rw-r--r--usr/src/uts/i86pc/sys/dr.h500
-rw-r--r--usr/src/uts/i86pc/sys/dr_util.h99
-rw-r--r--usr/src/uts/i86pc/sys/drmach.h177
-rw-r--r--usr/src/uts/i86pc/sys/fastboot_msg.h1
-rw-r--r--usr/src/uts/i86pc/sys/machcpuvar.h5
-rw-r--r--usr/src/uts/i86pc/sys/machsystm.h75
-rw-r--r--usr/src/uts/i86pc/sys/memnode.h5
-rw-r--r--usr/src/uts/i86pc/sys/psm_types.h54
-rw-r--r--usr/src/uts/i86pc/sys/rm_platter.h14
-rw-r--r--usr/src/uts/i86pc/sys/sbd_ioctl.h655
-rw-r--r--usr/src/uts/i86pc/sys/x_call.h2
-rw-r--r--usr/src/uts/i86pc/vm/hat_i86.c105
-rw-r--r--usr/src/uts/i86pc/vm/vm_dep.h35
-rw-r--r--usr/src/uts/i86pc/vm/vm_machdep.c288
-rw-r--r--usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c6
-rw-r--r--usr/src/uts/i86xpv/io/psm/xpv_psm.c5
-rw-r--r--usr/src/uts/i86xpv/io/psm/xpv_uppc.c5
-rw-r--r--usr/src/uts/intel/ia32/ml/modstubs.s11
-rw-r--r--usr/src/uts/intel/ia32/os/sundep.c18
-rw-r--r--usr/src/uts/intel/io/acpica/acpica.c4
-rw-r--r--usr/src/uts/intel/io/acpica/osl.c86
-rw-r--r--usr/src/uts/intel/sys/acpica.h6
-rw-r--r--usr/src/uts/intel/sys/bootconf.h13
-rw-r--r--usr/src/uts/intel/sys/cpu_module.h5
-rw-r--r--usr/src/uts/intel/sys/memlist_plat.h55
-rw-r--r--usr/src/uts/sun4/os/ddi_impl.c938
-rw-r--r--usr/src/uts/sun4u/sys/machsystm.h42
-rw-r--r--usr/src/uts/sun4v/sys/machsystm.h40
131 files changed, 22855 insertions, 2347 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint
index 90b76afe47..2182a61ff9 100644
--- a/usr/src/Makefile.lint
+++ b/usr/src/Makefile.lint
@@ -477,11 +477,13 @@ $(CLOSED_BUILD)COMMON_SUBDIRS += \
$(CLOSED)/lib/libc_i18n
i386_SUBDIRS= \
+ cmd/acpihpd \
cmd/biosdev \
cmd/rtc \
cmd/ucodeadm \
lib/brand/lx \
lib/cfgadm_plugins/sata \
+ lib/cfgadm_plugins/sbd \
lib/libfdisk
sparc_SUBDIRS= \
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index 2d0ce5e6d5..b493fa6ef1 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -476,6 +476,7 @@ ODE $(CLOSED)/cmd/tail \
$(CLOSED)/cmd/tr_xpg4
i386_SUBDIRS= \
+ acpihpd \
addbadsec \
biosdev \
diskscan \
diff --git a/usr/src/cmd/acpihpd/Makefile b/usr/src/cmd/acpihpd/Makefile
new file mode 100644
index 0000000000..33a26fb2be
--- /dev/null
+++ b/usr/src/cmd/acpihpd/Makefile
@@ -0,0 +1,62 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# cmd/acpihpd/Makefile
+#
+
+PLATFORM = i86pc
+MANIFEST = acpihpd.xml
+SVCMETHOD = svc-acpihpd
+
+include ../Makefile.cmd
+
+$(64ONLY)SUBDIRS= $(MACH)
+
+ROOTMANIFESTDIR = $(ROOTSVCPLATFORM)/${PLATFORM}
+
+all := TARGET = all
+install := TARGET = install
+clean := TARGET = clean
+clobber := TARGET = clobber
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all: $(SUBDIRS)
+
+install: $(SUBDIRS) $(ROOTMANIFEST) $(ROOTSVCMETHOD)
+
+clean clobber lint: $(SUBDIRS)
+
+check: $(CHKMANIFEST)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/acpihpd/Makefile.com b/usr/src/cmd/acpihpd/Makefile.com
new file mode 100644
index 0000000000..287a48d896
--- /dev/null
+++ b/usr/src/cmd/acpihpd/Makefile.com
@@ -0,0 +1,46 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# cmd/acpihpd/Makefile.com
+#
+
+LIBS = acpihpd
+PLATFORM = i86pc
+
+include ../../Makefile.cmd
+include ../../../Makefile.psm
+
+OBJS = acpihpd.o notify.o
+SRCS = $(OBJS:%.o=../%.c)
+LINTFILES = $(SRCS:%.c=%.ln)
+CLOBBERFILES = $(LIBS)
+
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -I$(USR_PSM_INCL_DIR)
+LDLIBS += -lsysevent -lnvpair -lcfgadm
+
+FILEMODE = 0755
diff --git a/usr/src/cmd/acpihpd/Makefile.targ b/usr/src/cmd/acpihpd/Makefile.targ
new file mode 100644
index 0000000000..89da583847
--- /dev/null
+++ b/usr/src/cmd/acpihpd/Makefile.targ
@@ -0,0 +1,62 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# cmd/acpihpd/Makefile.targ
+#
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+clean:
+ $(RM) $(OBJS)
+
+lint:
+ $(LINT.c) $(SRCS) $(LDLIBS)
+
+$(LIBS): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+%.o: ../%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+$(USR_PLAT_DIR):
+ -$(INS.dir)
+
+$(USR_PSM_DIR): $(USR_PLAT_DIR)
+ -$(INS.dir)
+
+$(USR_PSM_LIB_DIR): $(USR_PSM_DIR)
+ -$(INS.dir)
+
+$(USR_PSM_LIB_DIR)/%: % $(USR_PSM_LIB_DIR)
+ $(INS.file)
+
+include ../../Makefile.targ
+
diff --git a/usr/src/cmd/acpihpd/acpihpd.c b/usr/src/cmd/acpihpd/acpihpd.c
new file mode 100644
index 0000000000..6beee85f54
--- /dev/null
+++ b/usr/src/cmd/acpihpd/acpihpd.c
@@ -0,0 +1,325 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/dr.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <syslog.h>
+#include <string.h>
+#include <strings.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <config_admin.h>
+#include <libscf.h>
+#include <libsysevent.h>
+#include <stdarg.h>
+
+/* Signal handler type */
+typedef void (sig_handler_t)(int);
+
+#define ACPIHPD_PID_FILE "/var/run/acpihpd.pid" /* lock file path */
+
+/* Program Name */
+char *g_prog_name;
+int g_debuglevel = 0;
+
+static int s_pid_fd;
+static sysevent_handle_t *s_acpihpd_hdl;
+
+static int daemon_init(void);
+static void daemon_quit(int);
+static int set_sig_handler(int, sig_handler_t *);
+static int acpihpd_init(void);
+static void acpihpd_fini(void);
+static void acpihpd_event(sysevent_t *);
+extern void notify_hotplug(sysevent_t *ev);
+void debug_print(int, const char *, ...);
+
+int
+main(int argc, char *argv[])
+{
+ int c;
+
+ /* Get Program Name */
+ if ((g_prog_name = strrchr(argv[0], '/')) == NULL) {
+ g_prog_name = argv[0];
+ } else {
+ g_prog_name++;
+ }
+
+ while ((c = getopt(argc, argv, ":d:")) != -1) {
+ switch (c) {
+ case 'd':
+ g_debuglevel = atoi(optarg);
+ if ((g_debuglevel < 0) || (g_debuglevel > 2)) {
+ g_debuglevel = 0;
+ }
+ break;
+
+ case ':':
+ syslog(LOG_ERR,
+ "missed argument for option %c.", optopt);
+ break;
+
+ case '?':
+ syslog(LOG_ERR, "unrecognized option %c.", optopt);
+ break;
+ }
+ }
+
+ s_acpihpd_hdl = NULL;
+
+ /* Check the daemon running lock and initialize the signal */
+ if (daemon_init() != 0) {
+ debug_print(0, "%s could not startup!", g_prog_name);
+ exit(SMF_EXIT_ERR_FATAL);
+ }
+
+ /* Subscribe to the hotplug event */
+ if (acpihpd_init() != 0) {
+ debug_print(0, "%s could not startup!", g_prog_name);
+ daemon_quit(SMF_EXIT_ERR_FATAL);
+ }
+
+ debug_print(2, "daemon is running.");
+ /*CONSTCOND*/
+ while (1) {
+ (void) pause();
+ }
+
+ return (SMF_EXIT_OK);
+}
+
+static int
+daemon_init(void)
+{
+ int i, ret;
+ pid_t pid;
+ char pid_str[32];
+
+ if (geteuid() != 0) {
+ debug_print(0, "must be root to execute %s", g_prog_name);
+ return (1);
+ }
+
+ if ((pid = fork()) < 0) {
+ return (1);
+ }
+
+ if (pid > 0) {
+ /* Parent to exit. */
+ exit(SMF_EXIT_OK);
+ }
+
+ (void) setsid();
+ (void) chdir("/");
+ (void) umask(0);
+ (void) closefrom(0);
+ (void) open("/dev/null", O_RDONLY);
+ (void) open("/dev/null", O_WRONLY);
+ (void) dup(1);
+ (void) openlog(g_prog_name, LOG_PID, LOG_DAEMON);
+
+ /*
+ * Create the lock file for singleton
+ */
+ if ((s_pid_fd = open(ACPIHPD_PID_FILE, O_RDWR | O_CREAT, 0644)) < 0) {
+ debug_print(0, "could not create pid file: %s",
+ strerror(errno));
+ return (1);
+ }
+
+ if (lockf(s_pid_fd, F_TLOCK, 0L) < 0) {
+ if (errno == EACCES || errno == EAGAIN) {
+ debug_print(0, "another acpihpd is already running");
+ } else {
+ debug_print(0, "could not lock pid file");
+ }
+
+ return (1);
+ }
+
+ (void) ftruncate(s_pid_fd, 0);
+ i = sprintf(pid_str, "%ld", (long)getpid());
+ while ((ret = write(s_pid_fd, pid_str, i)) != i) {
+ if (errno == EINTR) {
+ continue;
+ }
+ if (ret < 0) {
+ debug_print(0, "pid file write failed: %s",
+ strerror(errno));
+ return (1);
+ }
+ }
+
+ if (set_sig_handler(SIGTERM, (sig_handler_t *)daemon_quit) != 0) {
+ debug_print(2, "could not set signal handler(SIGTERM)");
+ return (1);
+ }
+
+ if (set_sig_handler(SIGQUIT, (sig_handler_t *)daemon_quit) != 0) {
+ debug_print(2, "could not set signal handler(SIGQUIT)");
+ return (1);
+ }
+
+ if (set_sig_handler(SIGINT, (sig_handler_t *)daemon_quit) != 0) {
+ debug_print(2, "could not set signal handler(SIGINT)");
+ return (1);
+ }
+
+ if (set_sig_handler(SIGCHLD, SIG_IGN) != 0) {
+ debug_print(2, "could not set signal handler(SIGCHLD)");
+ return (1);
+ }
+
+ return (0);
+}
+
+static void
+daemon_quit(int signo)
+{
+ int status = 0;
+ id_t pgid;
+
+ debug_print(1, "daemon quit [signal#:%d].", signo);
+
+ acpihpd_fini();
+ (void) set_sig_handler(SIGTERM, SIG_IGN);
+ pgid = getpgrp();
+ (void) kill(-pgid, SIGTERM);
+ (void) close(s_pid_fd);
+ (void) unlink(ACPIHPD_PID_FILE);
+
+ if (signo < 0) {
+ status = signo;
+ }
+ _exit(status);
+}
+
+static int
+set_sig_handler(int sig, sig_handler_t *handler)
+{
+ struct sigaction act;
+
+ act.sa_handler = handler;
+ act.sa_flags = 0;
+ if (sig == SIGCHLD && handler == SIG_IGN) {
+ act.sa_flags |= SA_NOCLDWAIT;
+ }
+
+ (void) sigemptyset(&act.sa_mask);
+ if (sigaction(sig, &act, NULL) < 0) {
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+acpihpd_init(void)
+{
+ const char *subclass = ESC_DR_REQ;
+
+ debug_print(2, "acpihpd_init");
+
+ if ((s_acpihpd_hdl = sysevent_bind_handle(acpihpd_event)) == NULL) {
+ debug_print(2, "could not bind to sysevent.");
+ return (-1);
+ }
+
+ if (sysevent_subscribe_event(s_acpihpd_hdl, EC_DR, &subclass, 1) != 0) {
+ debug_print(2, "could not subscribe an event.");
+ sysevent_unbind_handle(s_acpihpd_hdl);
+ s_acpihpd_hdl = NULL;
+ return (-1);
+ }
+
+ return (0);
+}
+
+static void
+acpihpd_fini(void)
+{
+ debug_print(2, "acpihpd_fini");
+
+ if (s_acpihpd_hdl != NULL) {
+ sysevent_unsubscribe_event(s_acpihpd_hdl, EC_DR);
+ sysevent_unbind_handle(s_acpihpd_hdl);
+ }
+}
+
+static void
+acpihpd_event(sysevent_t *ev)
+{
+ debug_print(2, "*** got an event ***");
+
+ /* Inform cfgadm of the hot-plug event. */
+ notify_hotplug(ev);
+}
+
+void
+debug_print(int level, const char *fmt, ...)
+{
+ va_list ap;
+ int pri, pr_out = 0;
+
+ if (level <= g_debuglevel) {
+ switch (level) {
+ case 0:
+ pri = LOG_ERR;
+ pr_out = 1;
+ break;
+
+ case 1:
+ pri = LOG_NOTICE;
+ pr_out = 1;
+ break;
+
+ case 2:
+ pri = LOG_DEBUG;
+ pr_out = 1;
+ break;
+ }
+
+ if (pr_out) {
+ va_start(ap, fmt);
+ vsyslog(pri, fmt, ap);
+ va_end(ap);
+ }
+ }
+}
diff --git a/usr/src/cmd/acpihpd/acpihpd.xml b/usr/src/cmd/acpihpd/acpihpd.xml
new file mode 100644
index 0000000000..e8e799de08
--- /dev/null
+++ b/usr/src/cmd/acpihpd/acpihpd.xml
@@ -0,0 +1,93 @@
+<?xml version="1.0"?>
+<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
+<!--
+
+ Copyright (c) 2010, Intel Corporation.
+ All rights reserved.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ Note:
+ NOTE: This service manifest is not editable; its contents will
+ be overwritten by package or patch operations, including
+ operating system upgrade. Make customizations in a different
+ file.
+-->
+
+<service_bundle type='manifest' name='SUNWacpihpd:acpihpd'>
+
+<service
+ name='platform/i86pc/acpihpd'
+ type='service'
+ version='1'>
+
+ <create_default_instance enabled='true' />
+
+ <single_instance/>
+
+ <!--
+ acpihpd should be started after syseventd is running.
+ -->
+ <dependency
+ name='startup_req'
+ grouping='require_all'
+ restart_on='none'
+ type='service'>
+ <service_fmri value='svc:/system/sysevent' />
+ </dependency>
+
+ <exec_method
+ type='method'
+ name='start'
+ exec='/lib/svc/method/svc-acpihpd'
+ timeout_seconds='60' >
+ <method_context>
+ <method_credential user='root' group='root'
+ privileges='basic,!proc_info,sys_config,sys_mount,sys_devices' />
+ </method_context>
+ </exec_method>
+
+ <exec_method
+ type='method'
+ name='stop'
+ exec=':kill'
+ timeout_seconds='60' >
+ <method_context>
+ <method_credential user='root' group='root' />
+ </method_context>
+ </exec_method>
+
+ <stability value='Unstable' />
+
+ <template>
+ <common_name>
+ <loctext xml:lang='C'>
+ Intel ACPI hot-plug daemon
+ </loctext>
+ </common_name>
+ <documentation>
+ <manpage title='acpihpd' section='1M'
+ manpath='/usr/share/man' />
+ </documentation>
+ </template>
+</service>
+
+</service_bundle>
+
diff --git a/usr/src/cmd/acpihpd/i386/Makefile b/usr/src/cmd/acpihpd/i386/Makefile
new file mode 100644
index 0000000000..4356b38b82
--- /dev/null
+++ b/usr/src/cmd/acpihpd/i386/Makefile
@@ -0,0 +1,37 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# cmd/acpihpd/i386/Makefile
+#
+
+include ../Makefile.com
+
+.KEEP_STATE:
+
+install: all $(USR_PSM_LIBS)
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/acpihpd/notify.c b/usr/src/cmd/acpihpd/notify.c
new file mode 100644
index 0000000000..138b1a233d
--- /dev/null
+++ b/usr/src/cmd/acpihpd/notify.c
@@ -0,0 +1,353 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <config_admin.h>
+#include <strings.h>
+#include <syslog.h>
+#include <libsysevent.h>
+#include <libdevinfo.h>
+#include <libnvpair.h>
+#include <assert.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/dr.h>
+#include <sys/sbd_ioctl.h>
+#include <sys/acpidev.h>
+
+#define PMCONFIG_PATH "/usr/sbin/pmconfig"
+
+#define CFGADM_CMD_ASSIGN "assign"
+#define CFGADM_CMD_POWERON "poweron"
+#define CFGADM_CMD_PASSTHRU "passthru"
+
+#define STATUS_INPROGRESS 0
+#define STATUS_SUCCESS 1
+#define STATUS_FAILURE 2
+#define STATUS_NOOP 3
+
+static char *s_status_array[] = {
+ ACPIDEV_CMD_OST_INPROGRESS,
+ ACPIDEV_CMD_OST_SUCCESS,
+ ACPIDEV_CMD_OST_FAILURE,
+ ACPIDEV_CMD_OST_NOOP
+};
+
+extern void debug_print(int, const char *, ...);
+
+/*ARGSUSED*/
+static int
+confirm_no(void *appdata_ptr, const char *message)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+message_output(void *appdata_ptr, const char *message)
+{
+ debug_print(2, "cfgadm message: %s", message);
+ return (CFGA_OK);
+}
+
+static char *
+plat_opt_str_alloc(int cmd, char *acpi_event_type, int status)
+{
+ char *opt;
+ size_t len;
+
+ if (cmd == SBD_CMD_PASSTHRU) {
+ len = strlen(s_status_array[status]) +
+ strlen(ACPIDEV_EVENT_TYPE_ATTR_NAME) +
+ strlen(acpi_event_type) + 10;
+ if ((opt = malloc(len)) != NULL) {
+ (void) snprintf(opt, len, "%s %s=%s",
+ s_status_array[status],
+ ACPIDEV_EVENT_TYPE_ATTR_NAME,
+ acpi_event_type);
+ debug_print(2, "plat_opt_str_alloc = '%s'", opt);
+ }
+ } else {
+ len = strlen("platform=''") +
+ strlen(s_status_array[status]) +
+ strlen(ACPIDEV_EVENT_TYPE_ATTR_NAME) +
+ strlen(acpi_event_type) + 10;
+ if ((opt = malloc(len)) != NULL) {
+ (void) snprintf(opt, len, "platform='%s %s=%s'",
+ s_status_array[status],
+ ACPIDEV_EVENT_TYPE_ATTR_NAME,
+ acpi_event_type);
+ debug_print(2, "plat_opt_str_alloc = '%s'", opt);
+ }
+ }
+
+ return (opt);
+}
+
+static int
+cfgadm_cmd_wrapper(int cmd, int apid_num, char **apids,
+ char *acpi_event_type, int status,
+ struct cfga_confirm *confirm, struct cfga_msg *message)
+{
+ cfga_err_t ret;
+ char *plat_opts;
+ char *estrp = NULL;
+
+ assert(apid_num == 1);
+ assert(apids != NULL);
+
+ plat_opts = plat_opt_str_alloc(cmd, acpi_event_type, status);
+ if (plat_opts == NULL) {
+ debug_print(0,
+ "failed to generate platform option string for cfgadm");
+ return (-1);
+ }
+
+ switch (cmd) {
+ case SBD_CMD_CONNECT:
+ ret = config_change_state(CFGA_CMD_CONNECT, apid_num, apids,
+ plat_opts, confirm, message, &estrp, 0);
+ if (ret != CFGA_OK) {
+ debug_print(0, "cfgadm('connect', '%s') failed, "
+ "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+ }
+ break;
+
+ case SBD_CMD_CONFIGURE:
+ ret = config_change_state(CFGA_CMD_CONFIGURE, apid_num, apids,
+ plat_opts, confirm, message, &estrp, 0);
+ if (ret != CFGA_OK) {
+ debug_print(0, "cfgadm('configure', '%s') failed, "
+ "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+ }
+ break;
+
+ case SBD_CMD_ASSIGN:
+ ret = config_private_func(CFGADM_CMD_ASSIGN, apid_num, apids,
+ plat_opts, confirm, message, &estrp, 0);
+ if (ret != CFGA_OK) {
+ debug_print(0, "cfgadm('assign', '%s') failed, "
+ "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+ }
+ break;
+
+ case SBD_CMD_POWERON:
+ ret = config_private_func(CFGADM_CMD_POWERON, apid_num, apids,
+ plat_opts, confirm, message, &estrp, 0);
+ if (ret != CFGA_OK) {
+ debug_print(0, "cfgadm('poweron', '%s') failed, "
+ "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+ }
+ break;
+
+ case SBD_CMD_PASSTHRU:
+ ret = config_private_func(CFGADM_CMD_PASSTHRU, apid_num, apids,
+ plat_opts, confirm, message, &estrp, 0);
+ if (ret != CFGA_OK) {
+ debug_print(0, "cfgadm('passthru', '%s') failed, "
+ "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+ }
+ break;
+
+ default:
+ debug_print(2, "unknown command (%d) to cfgadm_cmd_wrapper()");
+ ret = CFGA_ERROR;
+ break;
+ }
+
+ if (plat_opts != NULL)
+ free(plat_opts);
+
+ return (ret == CFGA_OK ? 0 : -1);
+}
+
+static int
+event_process(char *ap_id, char *req, char *acpi_event_type)
+{
+ char *apids[1];
+ struct cfga_msg message;
+ struct cfga_confirm confirm;
+
+ if (strcmp(req, DR_REQ_INCOMING_RES) != 0) {
+ debug_print(2,
+ "Event is not supported (ap_id = %s, req = %s)",
+ ap_id, req);
+ return (-1);
+ }
+
+ apids[0] = ap_id;
+ (void) memset(&confirm, 0, sizeof (confirm));
+ confirm.confirm = confirm_no;
+ (void) memset(&message, 0, sizeof (message));
+ message.message_routine = message_output;
+
+ if (cfgadm_cmd_wrapper(SBD_CMD_ASSIGN, 1, apids,
+ acpi_event_type, STATUS_NOOP, &confirm, &message) != 0) {
+ goto L_ERR;
+ }
+ syslog(LOG_NOTICE,
+ "board '%s' has been assigned successfully", ap_id);
+ (void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+ acpi_event_type, STATUS_INPROGRESS, &confirm, &message);
+
+ if (cfgadm_cmd_wrapper(SBD_CMD_POWERON, 1, apids,
+ acpi_event_type, STATUS_NOOP, &confirm, &message) != 0) {
+ goto L_ERR_SIG;
+ }
+ syslog(LOG_NOTICE,
+ "board '%s' has been powered on successfully", ap_id);
+ (void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+ acpi_event_type, STATUS_INPROGRESS, &confirm, &message);
+
+ if (cfgadm_cmd_wrapper(SBD_CMD_CONNECT, 1, apids,
+ acpi_event_type, STATUS_INPROGRESS, &confirm, &message) != 0) {
+ goto L_ERR_SIG;
+ }
+ syslog(LOG_NOTICE,
+ "board '%s' has been connected successfully", ap_id);
+ (void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+ acpi_event_type, STATUS_INPROGRESS, &confirm, &message);
+
+ if (cfgadm_cmd_wrapper(SBD_CMD_CONFIGURE, 1, apids,
+ acpi_event_type, STATUS_INPROGRESS, &confirm, &message) != 0) {
+ goto L_ERR_SIG;
+ }
+ syslog(LOG_NOTICE,
+ "board '%s' has been configured successfully", ap_id);
+ (void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+ acpi_event_type, STATUS_SUCCESS, &confirm, &message);
+
+ (void) system(PMCONFIG_PATH);
+ syslog(LOG_NOTICE,
+ "board '%s' has been added into system successfully", ap_id);
+
+ return (0);
+
+L_ERR_SIG:
+ (void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+ acpi_event_type, STATUS_FAILURE, &confirm, &message);
+L_ERR:
+ syslog(LOG_ERR, "failed to add board '%s' into system", ap_id);
+
+ return (-1);
+}
+
+void
+notify_hotplug(sysevent_t *ev)
+{
+ char *vendor = NULL;
+ nvlist_t *attr_list = NULL;
+ char *class, *subclass;
+ char *ap_id, *req, *acpi_event_type;
+
+ vendor = sysevent_get_vendor_name(ev);
+ debug_print(2, "message_vendor = '%s'", vendor ? vendor : "unknown");
+ if (vendor == NULL || strcmp(vendor, SUNW_VENDOR) != 0) {
+ debug_print(2,
+ "vendor id of message is not '%s'", SUNW_VENDOR);
+ goto L_EXIT;
+ }
+
+ class = sysevent_get_class_name(ev);
+ debug_print(2, "message_class = '%s'", class ? class : "unknown");
+ if (class == NULL || strcmp(class, EC_DR) != 0) {
+ debug_print(2, "class of message is not '%s'", EC_DR);
+ goto L_EXIT;
+ }
+
+ subclass = sysevent_get_subclass_name(ev);
+ debug_print(2,
+ "message_subclass = '%s'", subclass ? subclass : "unknown");
+ if (subclass == NULL || strcmp(subclass, ESC_DR_REQ) != 0) {
+ debug_print(2,
+ "subclass of message is not '%s'", ESC_DR_REQ);
+ goto L_EXIT;
+ }
+
+ if (sysevent_get_attr_list(ev, &attr_list) != 0) {
+ debug_print(2,
+ "can't retrieve attribute list from DR message");
+ goto L_EXIT;
+ }
+
+ if (nvlist_lookup_string(attr_list, DR_AP_ID, &ap_id) != 0) {
+ debug_print(2,
+ "can't retrieve '%s' property from attribute list",
+ DR_AP_ID);
+ goto L_EXIT;
+ }
+ debug_print(2, "%s = '%s'", DR_AP_ID, ap_id ? ap_id : "<null>");
+ if ((ap_id == NULL) || (strlen(ap_id) == 0)) {
+ debug_print(2, "'%s' property in message is NULL", DR_AP_ID);
+ goto L_EXIT;
+ }
+
+ if (nvlist_lookup_string(attr_list, DR_REQ_TYPE, &req) != 0) {
+ debug_print(2,
+ "can't retrieve '%s' property from attribute list",
+ DR_REQ_TYPE);
+ goto L_EXIT;
+ }
+ debug_print(2, "%s = '%s'", DR_REQ_TYPE, req ? req : "<null>");
+ if ((req == NULL) || (strlen(req) == 0)) {
+ debug_print(2, "'%s' property in message is NULL", DR_REQ_TYPE);
+ goto L_EXIT;
+ }
+
+ if (nvlist_lookup_string(attr_list, ACPIDEV_EVENT_TYPE_ATTR_NAME,
+ &acpi_event_type) != 0) {
+ debug_print(2,
+ "can't retrieve '%s' property from attribute list",
+ ACPIDEV_EVENT_TYPE_ATTR_NAME);
+ goto L_EXIT;
+ }
+ debug_print(2, "%s = '%s'", ACPIDEV_EVENT_TYPE_ATTR_NAME,
+ acpi_event_type ? acpi_event_type : "<null>");
+ if ((acpi_event_type == NULL) || (strlen(acpi_event_type) == 0)) {
+ debug_print(2, "'%s' property in message is NULL",
+ ACPIDEV_EVENT_TYPE_ATTR_NAME);
+ goto L_EXIT;
+ }
+
+ (void) event_process(ap_id, req, acpi_event_type);
+
+L_EXIT:
+ if (vendor != NULL) {
+ free(vendor);
+ }
+
+ if (attr_list != NULL) {
+ nvlist_free(attr_list);
+ }
+
+ /* No need to free class & subclass. */
+}
diff --git a/usr/src/cmd/acpihpd/svc-acpihpd b/usr/src/cmd/acpihpd/svc-acpihpd
new file mode 100644
index 0000000000..f45f05ed06
--- /dev/null
+++ b/usr/src/cmd/acpihpd/svc-acpihpd
@@ -0,0 +1,39 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+
+. /lib/svc/share/smf_include.sh
+
+PLATFORM="i86pc"
+ACPIHPD=/usr/platform/${PLATFORM}/lib/acpihpd
+
+/usr/sbin/prtconf -D /devices/fw/acpidr@0 > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+ svcadm disable -t $SMF_FMRI
+ sleep 5&
+ exit $SMF_EXIT_OK
+fi
+
+$ACPIHPD && exit $SMF_EXIT_OK || exit $SMF_EXIT_ERR_FATAL
diff --git a/usr/src/lib/cfgadm_plugins/Makefile b/usr/src/lib/cfgadm_plugins/Makefile
index bb37297b11..9fc029e2ec 100644
--- a/usr/src/lib/cfgadm_plugins/Makefile
+++ b/usr/src/lib/cfgadm_plugins/Makefile
@@ -27,8 +27,8 @@
include $(SRC)/Makefile.master
-COMMON_SUBDIRS= scsi sdcard pci usb ib fp shp
-sparc_SUBDIRS= sbd ac sysctrl
+COMMON_SUBDIRS= scsi sdcard pci usb ib fp shp sbd
+sparc_SUBDIRS= ac sysctrl
i386_SUBDIRS= sata
diff --git a/usr/src/lib/cfgadm_plugins/sbd/Makefile.com b/usr/src/lib/cfgadm_plugins/sbd/Makefile.com
index fb3046ba42..e28db75c53 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/Makefile.com
+++ b/usr/src/lib/cfgadm_plugins/sbd/Makefile.com
@@ -23,7 +23,6 @@
# Use is subject to license terms.
#
-PLATFORM= sun4u
LIBRARY= sbd.a
VERS= .1
@@ -33,7 +32,7 @@ OBJECTS= ap.o ap_msg.o ap_rcm.o ap_sbd.o ap_seq.o cfga.o ap_err.o
include ../../../Makefile.lib
USR_PLAT_DIR = $(ROOT)/usr/platform
-USR_PSM_DIR = $(USR_PLAT_DIR)/sun4u
+USR_PSM_DIR = $(USR_PLAT_DIR)/$(PLATFORM)
USR_PSM_LIB_DIR = $(USR_PSM_DIR)/lib
USR_PSM_LIB_CFG_DIR = $(USR_PSM_LIB_DIR)/cfgadm
USR_PSM_LIB_CFG_DIR_64 = $(USR_PSM_LIB_CFG_DIR)/$(MACH64)
@@ -54,7 +53,7 @@ CPPFLAGS += -I$(ROOT)/usr/platform/$(PLATFORM)/include -DSBD_DEBUG
#
GENERRDIR= $(SRC)/lib/cfgadm_plugins/sbd
GENERR= $(GENERRDIR)/sbdgenerr
-ERRSRC= $(ROOT)/usr/platform/sun4u/include/sys/sbd_ioctl.h
+ERRSRC= $(ROOT)/usr/platform/$(PLATFORM)/include/sys/sbd_ioctl.h
.KEEP_STATE:
diff --git a/usr/src/lib/cfgadm_plugins/sbd/amd64/Makefile b/usr/src/lib/cfgadm_plugins/sbd/amd64/Makefile
new file mode 100644
index 0000000000..9fd69cde68
--- /dev/null
+++ b/usr/src/lib/cfgadm_plugins/sbd/amd64/Makefile
@@ -0,0 +1,40 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+
+PLATFORM = i86pc
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+CFLAGS += -D_POSIX_PTHREAD_SEMANTICS
+LINTFLAGS += -D_POSIX_PTHREAD_SEMANTICS
+LDLIBS += -lscf
+
+.KEEP_STATE:
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap.c
index c7c3e96f69..502d192014 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
@@ -40,6 +37,7 @@
#include <config_admin.h>
#include "ap.h"
+/*ARGSUSED0*/
int
ap_symid(apd_t *a, char *apid, char *symid, size_t bufsize)
{
@@ -78,13 +76,13 @@ ap_symid(apd_t *a, char *apid, char *symid, size_t bufsize)
cp--; /* Get the '/' */
if (strcmp(cp, apid) == 0) {
- snprintf(symid, bufsize, "%s", dp->d_name);
+ (void) snprintf(symid, bufsize, "%s", dp->d_name);
rc = 0;
break;
}
}
- closedir(dirp);
+ (void) closedir(dirp);
return (rc);
}
@@ -293,7 +291,7 @@ done:
DBG("path=<%s> ", a->path ? a->path : "");
DBG("drv=<%s> inst=%d minor=<%s> ",
- a->drv ? a->drv : "", a->inst, a->minor ? a->minor : "");
+ a->drv ? a->drv : "", a->inst, a->minor ? a->minor : "");
DBG("target=<%s>\n", a->target ? a->target : "");
DBG("cid=<%s> ", a->cid ? a->cid : "");
DBG("cname=<%s> ", a->cname ? a->cname : "");
@@ -612,7 +610,7 @@ ap_opt_parse(apd_t *a, ap_cmd_t *acp, const char *options)
* during sequencing.
*/
for (p = strtok(value, ":"); p != NULL;
- p = strtok(NULL, ":")) {
+ p = strtok(NULL, ":")) {
if ((i = ap_cmd(p)) == CMD_NONE) {
ap_err(a, ERR_CMD_INVAL, p);
return (-1);
@@ -711,8 +709,7 @@ ap_cmd_parse(apd_t *a, const char *f, const char *options, int *cmd)
target = mask(tgt);
DBG("cmd=%s(%d) tmask=0x%x cmask=0x%x omask=0x%x\n",
- ap_cmd_name(c), c, target,
- acp->cmask, acp->omask[tgt]);
+ ap_cmd_name(c), c, target, acp->cmask, acp->omask[tgt]);
if ((acp->cmask & target) == 0)
ap_err(a, ERR_CMD_NOTSUPP, c);
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c
index d2049bdc1d..1005dab325 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -63,8 +61,7 @@ debugging(void)
if ((debug_fp = fopen(ep, "a")) == NULL)
return (0);
}
- (void) fprintf(debug_fp,
- "\nDebug started, pid=%d\n", (int)getpid());
+ (void) fprintf(debug_fp, "\nDebug started, pid=%d\n", (int)getpid());
return (1);
}
@@ -110,7 +107,7 @@ ap_err_fmts[] = {
NULL
};
-#define ap_err_fmt(i) ap_err_fmts[min((i), ERR_NONE)]
+#define ap_err_fmt(i) ap_err_fmts[min((uint_t)(i), ERR_NONE)]
static char *
ap_msg_fmts[] = {
@@ -125,7 +122,7 @@ ap_msg_fmts[] = {
NULL
};
-#define ap_msg_fmt(i) ap_msg_fmts[min((i), MSG_NONE)]
+#define ap_msg_fmt(i) ap_msg_fmts[min((uint_t)(i), MSG_NONE)]
#define STR_BD "board"
#define STR_SEP ": "
@@ -366,7 +363,7 @@ ap_err(apd_t *a, ...)
switch (err) {
case ERR_CMD_FAIL:
(void) snprintf(p, len, fmt, cmd, target,
- syserr, rsep, sysrsrc);
+ syserr, rsep, sysrsrc);
break;
case ERR_CMD_ABORT:
case ERR_CMD_NACK:
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c
index f917bb5ae4..770784f16f 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -145,6 +143,8 @@ ap_rcm_ops[] = {
#ifdef __sparcv9
#define RCMLIB "/lib/sparcv9/librcm.so";
+#elif defined(__amd64)
+#define RCMLIB "/lib/amd64/librcm.so";
#else
#define RCMLIB "/lib/librcm.so";
#endif
@@ -323,22 +323,21 @@ ap_rcm_init(apd_t *a)
switch (i) {
case ALLOC_HANDLE:
rcm->alloc_handle = (int(*)
- (char *, uint_t, void *, rcm_handle_t **))sym;
+ (char *, uint_t, void *, rcm_handle_t **))sym;
break;
case FREE_HANDLE:
rcm->free_handle = (void (*)(rcm_handle_t *))sym;
break;
case GET_INFO:
rcm->get_info = (int (*)
- (rcm_handle_t *, char *, uint_t,
- rcm_info_t **))sym;
+ (rcm_handle_t *, char *, uint_t, rcm_info_t **))sym;
break;
case FREE_INFO:
rcm->free_info = (void (*)(rcm_info_t *))sym;
break;
case INFO_TUPLE_NEXT:
rcm->info_next = (rcm_info_tuple_t *(*)
- (rcm_info_t *, rcm_info_tuple_t *))sym;
+ (rcm_info_t *, rcm_info_tuple_t *))sym;
break;
case INFO_TUPLE_STATE:
rcm->info_state = (int (*)(rcm_info_tuple_t *))sym;
@@ -348,50 +347,50 @@ ap_rcm_init(apd_t *a)
break;
case INFO_TUPLE_ERROR:
rcm->info_error = (const char *(*)
- (rcm_info_tuple_t *))sym;
+ (rcm_info_tuple_t *))sym;
break;
case INFO_TUPLE_INFO:
rcm->info_info = (const char *(*)
- (rcm_info_tuple_t *))sym;
+ (rcm_info_tuple_t *))sym;
break;
case INFO_TUPLE_RSRC:
rcm->info_rsrc = (const char *(*)
- (rcm_info_tuple_t *))sym;
+ (rcm_info_tuple_t *))sym;
break;
case REQUEST_OFFLINE:
rcm->request_offline_list = (int (*)
- (rcm_handle_t *, char **, uint_t,
- rcm_info_t **))sym;
+ (rcm_handle_t *, char **, uint_t,
+ rcm_info_t **))sym;
break;
case NOTIFY_ONLINE:
rcm->notify_online_list = (int (*)
- (rcm_handle_t *, char **, uint_t,
- rcm_info_t **))sym;
+ (rcm_handle_t *, char **, uint_t,
+ rcm_info_t **))sym;
break;
case REQUEST_SUSPEND:
rcm->request_suspend = (int (*)
- (rcm_handle_t *, char *, uint_t,
- timespec_t *, rcm_info_t **))sym;
+ (rcm_handle_t *, char *, uint_t,
+ timespec_t *, rcm_info_t **))sym;
break;
case NOTIFY_RESUME:
rcm->notify_resume = (int (*)
- (rcm_handle_t *, char *, uint_t,
- rcm_info_t **))sym;
+ (rcm_handle_t *, char *, uint_t,
+ rcm_info_t **))sym;
break;
case NOTIFY_REMOVE:
rcm->notify_remove_list = (int (*)
- (rcm_handle_t *, char **, uint_t,
- rcm_info_t **))sym;
+ (rcm_handle_t *, char **, uint_t,
+ rcm_info_t **))sym;
break;
case REQUEST_CAP_CHANGE:
rcm->request_capacity_change = (int (*)
- (rcm_handle_t *, char *, uint_t,
- nvlist_t *, rcm_info_t **))sym;
+ (rcm_handle_t *, char *, uint_t,
+ nvlist_t *, rcm_info_t **))sym;
break;
case NOTIFY_CAP_CHANGE:
rcm->notify_capacity_change = (int (*)
- (rcm_handle_t *, char *, uint_t,
- nvlist_t *, rcm_info_t **))sym;
+ (rcm_handle_t *, char *, uint_t,
+ nvlist_t *, rcm_info_t **))sym;
break;
default:
break;
@@ -400,7 +399,7 @@ ap_rcm_init(apd_t *a)
if (rcm->alloc_handle == NULL ||
(*rcm->alloc_handle)(NULL, RCM_NOPID, NULL, &rcm->hd)
- != RCM_SUCCESS) {
+ != RCM_SUCCESS) {
ap_err(a, ERR_RCM_HANDLE);
return (CFGA_LIB_ERROR);
}
@@ -441,7 +440,7 @@ ap_rcm_init(apd_t *a)
rcm->ncpus = ncpu;
if ((rcm->cpuids = (cpuid_t *)calloc(ncpu, sizeof (cpuid_t)))
- == NULL) {
+ == NULL) {
ap_err(a, ERR_NOMEM);
return (CFGA_LIB_ERROR);
}
@@ -781,20 +780,21 @@ ap_rcm_cap_cpu(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd, uint_t flags,
nvlist_add_int32(nvl, "old_total", oldncpuids) != 0 ||
nvlist_add_int32(nvl, "new_total", newncpuids) != 0 ||
nvlist_add_int32_array(nvl, "old_cpu_list", oldcpuids,
- oldncpuids) != 0 ||
+ oldncpuids) != 0 ||
nvlist_add_int32_array(nvl, "new_cpu_list", newcpuids,
- newncpuids) != 0)
+ newncpuids) != 0)
goto done;
- snprintf(buf, sizeof (buf), fmt, change);
+ (void) snprintf(buf, sizeof (buf), fmt, change);
ap_msg(a, MSG_ISSUE, cmd, buf);
- if (cmd == CMD_RCM_CAP_DEL)
+ if (cmd == CMD_RCM_CAP_DEL) {
rv = (*rcm->request_capacity_change)(hd, "SUNW_cpu",
- flags, nvl, rinfo);
- else
+ flags, nvl, rinfo);
+ } else {
rv = (*rcm->notify_capacity_change)(hd, "SUNW_cpu",
- flags & ~RCM_FORCE, nvl, rinfo);
+ flags & ~RCM_FORCE, nvl, rinfo);
+ }
done:
if (nvl)
@@ -865,12 +865,13 @@ ap_rcm_cap_mem(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd, uint_t flags,
(void) snprintf(buf, sizeof (buf), "(%ld pages)", change);
ap_msg(a, MSG_ISSUE, cmd, buf);
- if (cmd == CMD_RCM_CAP_DEL)
+ if (cmd == CMD_RCM_CAP_DEL) {
rv = (*rcm->request_capacity_change)(hd, "SUNW_memory",
- flags, nvl, rinfo);
- else
+ flags, nvl, rinfo);
+ } else {
rv = (*rcm->notify_capacity_change)(hd, "SUNW_memory",
- flags & ~RCM_FORCE, nvl, rinfo);
+ flags & ~RCM_FORCE, nvl, rinfo);
+ }
nvlist_free(nvl);
@@ -923,11 +924,13 @@ ap_rcm_request_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
}
if (ncpus && ((*rv = ap_rcm_cap_cpu(a, rcm, hd, flags, rinfo,
- CMD_RCM_CAP_DEL, ncpus)) != RCM_SUCCESS))
+ CMD_RCM_CAP_DEL, ncpus)) != RCM_SUCCESS)) {
return (CFGA_LIB_ERROR);
+ }
if (npages && ((*rv = ap_rcm_cap_mem(a, rcm, hd, flags, rinfo,
- CMD_RCM_CAP_DEL, npages)) != RCM_SUCCESS))
+ CMD_RCM_CAP_DEL, npages)) != RCM_SUCCESS)) {
return (CFGA_LIB_ERROR);
+ }
return (CFGA_OK);
}
@@ -945,8 +948,8 @@ ap_rcm_add_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
DBG("ap_rcm_add_cap(%p)\n", (void *)a);
/* Get the new capacity info to figure out what has changed */
- if ((rc = ap_capinfo(a, rcm->firstcm, rcm->lastcm, &capinfo))
- != CFGA_OK)
+ if ((rc = ap_capinfo(a, rcm->firstcm, rcm->lastcm, &capinfo)) !=
+ CFGA_OK)
return (rc);
if (capinfo == NULL) {
@@ -981,7 +984,7 @@ ap_rcm_add_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
type = ap_cm_type(a, cm);
DBG("cm=%d valid=%d type=%d, prevos=%d os=%d\n",
- cm, prevvalidity, type, prevos, os);
+ cm, prevvalidity, type, prevos, os);
/*
* We are interested only in those components
@@ -1008,11 +1011,13 @@ ap_rcm_add_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
free(capinfo);
if (ncpus && ((*rv = ap_rcm_cap_cpu(a, rcm, hd, flags, rinfo,
- CMD_RCM_CAP_ADD, ncpus)) != RCM_SUCCESS))
+ CMD_RCM_CAP_ADD, ncpus)) != RCM_SUCCESS)) {
return (CFGA_LIB_ERROR);
+ }
if (npages && ((*rv = ap_rcm_cap_mem(a, rcm, hd, flags, rinfo,
- CMD_RCM_CAP_ADD, npages)) != RCM_SUCCESS))
+ CMD_RCM_CAP_ADD, npages)) != RCM_SUCCESS)) {
return (CFGA_LIB_ERROR);
+ }
return (CFGA_OK);
}
@@ -1045,8 +1050,8 @@ ap_rcm_notify_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
DBG("ap_rcm_notify_cap(%p)\n", (void *)a);
/* Get the new capacity info to figure out what has changed */
- if ((rc = ap_capinfo(a, rcm->firstcm, rcm->lastcm, &capinfo))
- != CFGA_OK)
+ if ((rc = ap_capinfo(a, rcm->firstcm, rcm->lastcm, &capinfo)) !=
+ CFGA_OK)
return (rc);
if (capinfo == NULL) {
@@ -1183,8 +1188,7 @@ ap_rcm_ctl(apd_t *a, int cmd)
if (rcm->capinfo == NULL)
noop++;
else
- rc = ap_rcm_request_cap(a, rcm, hd, &rv,
- flags, &rinfo);
+ rc = ap_rcm_request_cap(a, rcm, hd, &rv, flags, &rinfo);
break;
case CMD_RCM_CAP_ADD:
rc = ap_rcm_add_cap(a, rcm, hd, &rv, flags, &rinfo);
@@ -1266,13 +1270,13 @@ ap_rcm_ctl(apd_t *a, int cmd)
ap_msg(a, MSG_ISSUE, cmd, rlist[nrsrc]);
if (cmd == CMD_RCM_OFFLINE)
rv = (*rcm->request_offline_list)(hd, rlist, flags,
- &rinfo);
+ &rinfo);
else if (cmd == CMD_RCM_ONLINE)
rv = (*rcm->notify_online_list)(hd, rlist,
- flags & ~RCM_FORCE, &rinfo);
+ flags & ~RCM_FORCE, &rinfo);
else
rv = (*rcm->notify_remove_list)(hd, rlist,
- flags & ~RCM_FORCE, &rinfo);
+ flags & ~RCM_FORCE, &rinfo);
break;
}
case CMD_RCM_SUSPEND: {
@@ -1440,7 +1444,7 @@ ap_rcm_info(apd_t *a, char **msg)
if ((infostr = (*rcm->info_info)(tuple)) != NULL) {
(void) strcat(*msg, "\n");
(void) sprintf(&((*msg)[strlen(*msg)]), format,
- (*rcm->info_rsrc)(tuple), infostr);
+ (*rcm->info_rsrc)(tuple), infostr);
}
}
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c
index 70c3fe7dee..7c3b34b1f0 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -175,11 +172,11 @@ ap_stat(apd_t *a, int all)
*/
ctl->ic_type = SBD_COMP_UNKNOWN;
ctl->ic_unit = a->cnum;
- strcpy(ctl->ic_name, a->cname);
+ (void) strcpy(ctl->ic_name, a->cname);
}
DBG("ioctl(%d SBD_CMD_STATUS, sc=0x%p sz=%d flags=%d",
- fd, (void *)sc->s_statp, sc->s_nbytes, ctl->i_flags);
+ fd, (void *)sc->s_statp, sc->s_nbytes, ctl->i_flags);
if (select)
DBG(" cname=<%s> cnum=%d", a->cname, a->cnum);
DBG(")\n");
@@ -282,7 +279,7 @@ apd_init(apd_t *a, int all)
for (dst = st->s_stat, i = 0; i < st->s_nstat; i++, dst++) {
DBG("ds_name,ds_unit,ds_type=<%s,%d,%d> ",
- dst->ds_name, dst->ds_unit, dst->ds_type);
+ dst->ds_name, dst->ds_unit, dst->ds_type);
if (dst->ds_unit != a->cnum)
continue;
@@ -331,7 +328,7 @@ apd_free(apd_t *a)
ap_rcm_fini(a);
if (a->fd != -1)
- close(a->fd);
+ (void) close(a->fd);
s_free(a->options);
s_free(a->path);
@@ -405,7 +402,7 @@ ap_init(apd_t *a, cfga_list_data_t *ap)
st = (sbd_stat_t *)a->stat;
DBG("ap_init bd=%d rs=%d os=%d type=<%s>\n",
- a->bnum, st->s_rstate, st->s_ostate, st->s_type);
+ a->bnum, st->s_rstate, st->s_ostate, st->s_type);
parsable_strncpy(ap->ap_type, st->s_type, sizeof (ap->ap_type));
ap->ap_r_state = (cfga_stat_t)st->s_rstate;
@@ -555,7 +552,7 @@ ap_ioctl(apd_t *a, int cmd)
else {
ctl->ic_type = SBD_COMP_UNKNOWN;
ctl->ic_unit = a->cnum;
- strcpy(ctl->ic_name, a->cname);
+ (void) strcpy(ctl->ic_name, a->cname);
}
if (!(ioc = ap_ioc(cmd))) {
@@ -638,7 +635,7 @@ mod_estr(int code)
if (i == sbd_etab_len) {
char buf[32];
- snprintf(buf, sizeof (buf), "error %d", code);
+ (void) snprintf(buf, sizeof (buf), "error %d", code);
s = strdup(buf);
}
@@ -760,7 +757,7 @@ ap_help(struct cfga_msg *msgp, const char *options, cfga_flags_t flags)
continue;
if ((q = (char *)calloc(len + 1, 1)) == NULL)
continue;
- strcpy(q, *p);
+ (void) strcpy(q, *p);
(*msgp->message_routine)(msgp->appdata_ptr, q);
free(q);
}
@@ -823,7 +820,7 @@ ap_cm_devpath(apd_t *a, int seq)
* Assume an io component so that we can get
* the path if the component is indeed of type io.
*/
- if (seq == -1)
+ if (seq == CM_DFLT)
dst = (sbd_io_stat_t *)a->cmstat;
else {
sbd_stat_t *st;
@@ -1002,7 +999,7 @@ ap_cm_init(apd_t *a, cfga_list_data_t *ap, int seq)
a->cmstat = (void *)dst;
DBG("ap_cm_init bd=%d rs=%d os=%d type=<%s> seq=%d\n",
- a->bnum, st->s_rstate, dst->ds_ostate, type, seq);
+ a->bnum, st->s_rstate, dst->ds_ostate, type, seq);
(void) strncpy(ap->ap_type, type, sizeof (ap->ap_type));
ap->ap_r_state = (cfga_stat_t)st->s_rstate;
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c
index 929dc0251c..0536b838f4 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -24,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
@@ -40,6 +37,12 @@
#include <config_admin.h>
#include "ap.h"
+#ifdef __x86
+#include <libscf_priv.h>
+
+static int fastreboot_disabled;
+#endif /* __x86 */
+
static cfga_err_t
ap_suspend_check(apd_t *a, int cmd, int first, int last, int *suspend)
{
@@ -202,7 +205,7 @@ ap_seq_get(apd_t *a, int cmd, int *first, int *last)
*last = l;
DBG("ap_seq(%d, %d, %d, %p, %p) = (%d, %d)\n",
- rs, os, cmd, (void *)first, (void *)last, f, l);
+ rs, os, cmd, (void *)first, (void *)last, f, l);
return (AP_SEQ_OK);
}
@@ -268,7 +271,7 @@ ap_seq_exec(apd_t *a, int cmd, int first, int last)
* interpose on the suspend operation.
*/
rc = ap_suspend_check(a, cmd,
- first + 1, last, &suspend);
+ first + 1, last, &suspend);
break;
case CMD_RCM_SUSPEND:
if (suspend && ((rc = ap_rcm_ctl(a, c)) == CFGA_OK)) {
@@ -297,6 +300,41 @@ ap_seq_exec(apd_t *a, int cmd, int first, int last)
case CMD_RCM_CAP_NOTIFY:
(void) ap_rcm_ctl(a, c);
break;
+
+#ifdef __x86
+ /*
+ * Disable fast reboot if a CPU/MEM/IOH hotplug event happens.
+ * Note: this is a temporary solution and will be revised when
+ * fast reboot can support CPU/MEM/IOH DR operations in the
+ * future.
+ *
+ * ACPI BIOS generates some static ACPI tables, such as MADT,
+ * SRAT and SLIT, to describe the system hardware configuration
+ * on power-on. When a CPU/MEM/IOH hotplug event happens, those
+ * static tables won't be updated and will become stale.
+ *
+ * If we reset the system by fast reboot, BIOS will have no
+ * chance to regenerate those staled static tables. Fast reboot
+ * can't tolerate such inconsistency between staled ACPI tables
+ * and real hardware configuration yet.
+ *
+ * A temporary solution is introduced to disable fast reboot if
+ * CPU/MEM/IOH hotplug event happens. This solution should be
+ * revised when fast reboot is enhanced to support CPU/MEM/IOH
+ * DR operations.
+ */
+ case CMD_ASSIGN:
+ case CMD_POWERON:
+ case CMD_POWEROFF:
+ case CMD_UNASSIGN:
+ if (!fastreboot_disabled &&
+ scf_fastreboot_default_set_transient(B_FALSE) ==
+ SCF_SUCCESS) {
+ fastreboot_disabled = 1;
+ }
+ /* FALLTHROUGH */
+#endif /* __x86 */
+
default:
rc = ap_ioctl(a, c);
break;
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/cfga.c b/usr/src/lib/cfgadm_plugins/sbd/common/cfga.c
index a891746634..c03350e19b 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/cfga.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/cfga.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -24,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -243,14 +240,14 @@ cfga_list_ext(
ap_cm_id(a, i, dyn, sizeof (dyn));
(void) snprintf(ap->ap_log_id, szl, "%s::%s",
- a->target, dyn);
+ a->target, dyn);
(void) snprintf(ap->ap_phys_id, szp, "%s::%s",
- a->path, dyn);
+ a->path, dyn);
ap_cm_init(a, ap, i);
DBG("ap_phys_id=%s ap_log_id=%s\n",
- ap->ap_phys_id, ap->ap_log_id);
+ ap->ap_phys_id, ap->ap_log_id);
}
} else
diff --git a/usr/src/lib/cfgadm_plugins/sbd/i386/Makefile b/usr/src/lib/cfgadm_plugins/sbd/i386/Makefile
new file mode 100644
index 0000000000..7a382d291f
--- /dev/null
+++ b/usr/src/lib/cfgadm_plugins/sbd/i386/Makefile
@@ -0,0 +1,39 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+
+PLATFORM = i86pc
+
+include ../Makefile.com
+
+CFLAGS += -D_POSIX_PTHREAD_SEMANTICS
+LINTFLAGS += -D_POSIX_PTHREAD_SEMANTICS
+LDLIBS += -lscf
+
+.KEEP_STATE:
+
+install: all $(ROOTLIBS) $(ROOTLINKS)
diff --git a/usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile b/usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile
index dcaf7b8438..39bd47a725 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile
+++ b/usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -23,10 +22,10 @@
# Copyright (c) 2000 by Sun Microsystems, Inc.
# All rights reserved.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# lib/sbd/sparc/Makefile
+PLATFORM = sun4u
+
include ../Makefile.com
.KEEP_STATE:
diff --git a/usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile b/usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile
index 4ac3f6310c..1d9c67c4e1 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile
+++ b/usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -23,10 +22,10 @@
# Copyright (c) 2000 by Sun Microsystems, Inc.
# All rights reserved.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# lib/sbd/sparcv9/Makefile
+PLATFORM = sun4u
+
include ../Makefile.com
include ../../../Makefile.lib.64
diff --git a/usr/src/pkg/license_files/cr_Intel.hotplug b/usr/src/pkg/license_files/cr_Intel.hotplug
new file mode 100644
index 0000000000..bf0648aabc
--- /dev/null
+++ b/usr/src/pkg/license_files/cr_Intel.hotplug
@@ -0,0 +1 @@
+ * Copyright(c) 2009 - 2010 Intel Corporation. All rights reserved.
diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf
index 113817866b..b95dd46026 100644
--- a/usr/src/pkg/manifests/system-header.mf
+++ b/usr/src/pkg/manifests/system-header.mf
@@ -1660,6 +1660,7 @@ $(i386_ONLY)file path=usr/platform/i86pc/include/sys/psm_defs.h
$(i386_ONLY)file path=usr/platform/i86pc/include/sys/psm_modctl.h
$(i386_ONLY)file path=usr/platform/i86pc/include/sys/psm_types.h
$(i386_ONLY)file path=usr/platform/i86pc/include/sys/rm_platter.h
+$(i386_ONLY)file path=usr/platform/i86pc/include/sys/sbd_ioctl.h
$(i386_ONLY)file path=usr/platform/i86pc/include/sys/smp_impldefs.h
$(i386_ONLY)file path=usr/platform/i86pc/include/sys/vm_machparam.h
$(i386_ONLY)file path=usr/platform/i86pc/include/sys/x_call.h
diff --git a/usr/src/pkg/manifests/system-kernel-dynamic-reconfiguration-i86pc.mf b/usr/src/pkg/manifests/system-kernel-dynamic-reconfiguration-i86pc.mf
new file mode 100644
index 0000000000..28ddc833bb
--- /dev/null
+++ b/usr/src/pkg/manifests/system-kernel-dynamic-reconfiguration-i86pc.mf
@@ -0,0 +1,83 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# This package will install successfully into any zone, global or
+# non-global. The files, directories, links, and hardlinks, however,
+# will only be installed into the global zone.
+#
+<include hollow_zone_pkg>
+set name=pkg.fmri \
+ value=pkg:/system/kernel/dynamic-reconfiguration/i86pc@$(PKGVERS)
+set name=pkg.description \
+ value="Dynamic Reconfiguration Modules for i86pc required to support \
+ hotplug on i86pc based systems. Includes the ACPI Hotplug Daemon, SMF \
+ manifests/methods and drivers for hotplug operations."
+set name=pkg.summary value="Dynamic Reconfiguration Modules for i86pc"
+set name=info.classification \
+ value="org.opensolaris.category.2008:System/Administration and Configuration" \
+ value="org.opensolaris.category.2008:System/Enterprise Management"
+set name=variant.arch value=i386
+set name=variant.opensolaris.zone value=global value=nonglobal
+dir path=platform group=sys
+dir path=platform/i86pc group=sys
+dir path=platform/i86pc/kernel group=sys
+dir path=platform/i86pc/kernel/drv group=sys
+dir path=platform/i86pc/kernel/drv/$(ARCH64) group=sys
+dir path=platform/i86pc/kernel/misc group=sys
+dir path=platform/i86pc/kernel/misc/$(ARCH64) group=sys
+file path=platform/i86pc/kernel/misc/$(ARCH64)/drmach_acpi group=sys \
+ mode=0755
+dir path=usr group=sys
+dir path=usr/platform group=sys
+dir path=usr/platform/i86pc group=sys
+dir path=usr/platform/i86pc/lib group=sys
+dir path=var group=sys
+dir path=var/svc group=sys
+dir path=var/svc/manifest group=sys
+dir path=var/svc/manifest/platform group=sys
+dir path=var/svc/manifest/platform/i86pc group=sys
+file path=platform/i86pc/kernel/drv/$(ARCH64)/dr group=sys
+driver name=dr alias=acpidr_sbd
+file path=platform/i86pc/kernel/drv/dr.conf group=sys mode=0644
+file path=usr/platform/i86pc/lib/acpihpd group=sys mode=0755
+file path=var/svc/manifest/platform/i86pc/acpihpd.xml group=sys mode=0444
+dir path=lib group=sys
+dir path=lib/svc group=sys
+dir path=lib/svc/method group=sys
+file path=lib/svc/method/svc-acpihpd group=sys mode=0555
+dir path=usr/platform/i86pc/lib/cfgadm
+dir path=usr/platform/i86pc/lib/cfgadm/$(ARCH64)
+file path=usr/platform/i86pc/lib/cfgadm/sbd.so.1
+link path=usr/platform/i86pc/lib/cfgadm/sbd.so \
+ target=./sbd.so.1
+file path=usr/platform/i86pc/lib/cfgadm/$(ARCH64)/sbd.so.1
+link path=usr/platform/i86pc/lib/cfgadm/$(ARCH64)/sbd.so \
+ target=./sbd.so.1
+license cr_Intel.hotplug license=cr_Intel
+license cr_Sun license=cr_Sun
+license lic_CDDL license=lic_CDDL
+depend fmri=pkg:/system/kernel/power type=require
diff --git a/usr/src/uts/common/os/cpu_event.c b/usr/src/uts/common/os/cpu_event.c
index 11162ccfc9..b3ed323f92 100644
--- a/usr/src/uts/common/os/cpu_event.c
+++ b/usr/src/uts/common/os/cpu_event.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -152,8 +152,13 @@ typedef struct cpu_idle_cb_item {
/* Per-CPU state aligned to CPU_CACHE_COHERENCE_SIZE to avoid false sharing. */
typedef union cpu_idle_cb_state {
struct {
+ /* Index of already invoked callbacks. */
int index;
+ /* Invoke registered callbacks if true. */
+ boolean_t enabled;
+ /* Property values are valid if true. */
boolean_t ready;
+ /* Pointers to per-CPU properties. */
cpu_idle_prop_value_t *idle_state;
cpu_idle_prop_value_t *enter_ts;
cpu_idle_prop_value_t *exit_ts;
@@ -182,6 +187,17 @@ static int cpu_idle_cb_max = 0;
static cpu_idle_cb_state_t *cpu_idle_cb_state;
+#ifdef __x86
+/*
+ * cpuset used to intercept CPUs before powering them off.
+ * The control CPU sets the bit corresponding to the target CPU and waits
+ * until the bit is cleared.
+ * The target CPU disables interrupts before clearing corresponding bit and
+ * then loops for ever.
+ */
+static cpuset_t cpu_idle_intercept_set;
+#endif
+
static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum,
cpu_idle_prop_value_t *valp);
@@ -326,17 +342,26 @@ cpu_event_init(void)
#endif
}
+/*
+ * This function is called to initialize per CPU state when starting CPUs.
+ */
void
cpu_event_init_cpu(cpu_t *cp)
{
ASSERT(cp->cpu_seqid < max_ncpus);
+ cpu_idle_cb_state[cp->cpu_seqid].v.index = 0;
cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE;
+ cpu_idle_cb_state[cp->cpu_seqid].v.enabled = B_TRUE;
}
+/*
+ * This function is called to clean up per CPU state when stopping CPUs.
+ */
void
cpu_event_fini_cpu(cpu_t *cp)
{
ASSERT(cp->cpu_seqid < max_ncpus);
+ cpu_idle_cb_state[cp->cpu_seqid].v.enabled = B_FALSE;
cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE;
}
@@ -618,6 +643,21 @@ cpu_idle_enter(int state, int flag,
ASSERT(CPU->cpu_seqid < max_ncpus);
sp = &cpu_idle_cb_state[CPU->cpu_seqid];
ASSERT(sp->v.index == 0);
+ if (sp->v.enabled == B_FALSE) {
+#if defined(__x86)
+ /* Intercept CPU at a safe point before powering off it. */
+ if (CPU_IN_SET(cpu_idle_intercept_set, CPU->cpu_id)) {
+ iflags = intr_clear();
+ CPUSET_ATOMIC_DEL(cpu_idle_intercept_set, CPU->cpu_id);
+ /*CONSTCOND*/
+ while (1) {
+ SMT_PAUSE();
+ }
+ }
+#endif
+
+ return (0);
+ }
/*
* On x86, cpu_idle_enter can be called from idle thread with either
@@ -1091,3 +1131,26 @@ cpu_idle_get_cpu_state(cpu_t *cp)
cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle,
CPU_IDLE_GET_CTX(cp)));
}
+
+#if defined(__x86)
+/*
+ * Intercept CPU at a safe point in idle() before powering it off.
+ */
+void
+cpu_idle_intercept_cpu(cpu_t *cp)
+{
+ ASSERT(cp->cpu_seqid < max_ncpus);
+ ASSERT(cpu_idle_cb_state[cp->cpu_seqid].v.enabled == B_FALSE);
+
+ /* Set flag to intercept CPU. */
+ CPUSET_ATOMIC_ADD(cpu_idle_intercept_set, cp->cpu_id);
+ /* Wake up CPU from possible sleep state. */
+ poke_cpu(cp->cpu_id);
+ while (CPU_IN_SET(cpu_idle_intercept_set, cp->cpu_id)) {
+ DELAY(1);
+ }
+ /*
+ * Now target CPU is spinning in a pause loop with interrupts disabled.
+ */
+}
+#endif
diff --git a/usr/src/uts/common/os/cpu_pm.c b/usr/src/uts/common/os/cpu_pm.c
index 324e4168d7..b885e421b8 100644
--- a/usr/src/uts/common/os/cpu_pm.c
+++ b/usr/src/uts/common/os/cpu_pm.c
@@ -677,9 +677,13 @@ cpupm_redefine_max_activepwr_state(struct cpu *cp, int max_perf_level)
cpupm_state_t *new_state = NULL;
did = cpupm_domain_id(cp, type);
- mutex_enter(&cpu_lock);
- dom = cpupm_domain_find(did, type);
- mutex_exit(&cpu_lock);
+ if (MUTEX_HELD(&cpu_lock)) {
+ dom = cpupm_domain_find(did, type);
+ } else {
+ mutex_enter(&cpu_lock);
+ dom = cpupm_domain_find(did, type);
+ mutex_exit(&cpu_lock);
+ }
/*
* Can use a lock to avoid changing the power state of the cpu when
diff --git a/usr/src/uts/common/os/mem_config.c b/usr/src/uts/common/os/mem_config.c
index be67866592..20a89b0611 100644
--- a/usr/src/uts/common/os/mem_config.c
+++ b/usr/src/uts/common/os/mem_config.c
@@ -59,9 +59,6 @@
extern struct memlist *phys_avail;
-extern void mem_node_add(pfn_t, pfn_t);
-extern void mem_node_del(pfn_t, pfn_t);
-
extern uint_t page_ctrs_adjust(int);
void page_ctrs_cleanup(void);
static void kphysm_setup_post_add(pgcnt_t);
@@ -107,6 +104,13 @@ static void memseg_cpu_vm_flush(void);
int meta_alloc_enable;
+#ifdef DEBUG
+static int memseg_debug;
+#define MEMSEG_DEBUG(args...) if (memseg_debug) printf(args)
+#else
+#define MEMSEG_DEBUG(...)
+#endif
+
/*
* Add a chunk of memory to the system.
* base: starting PAGESIZE page of new memory.
@@ -117,13 +121,6 @@ int meta_alloc_enable;
* dynamically most likely means more hash misses, since the tables will
* be smaller than they otherwise would be.
*/
-#ifdef DEBUG
-static int memseg_debug;
-#define MEMSEG_DEBUG(args...) if (memseg_debug) printf(args)
-#else
-#define MEMSEG_DEBUG(...)
-#endif
-
int
kphysm_add_memory_dynamic(pfn_t base, pgcnt_t npgs)
{
@@ -341,6 +338,9 @@ mapalloc:
* this may change with COD or in larger SSM systems with
* nested latency groups, so we must not assume that the
* node does not yet exist.
+ *
+ * Note that there may be multiple memory nodes associated with
+ * a single lgrp node on x86 systems.
*/
pnum = pt_base + tpgs - 1;
mem_node_add_range(pt_base, pnum);
@@ -569,8 +569,8 @@ mapalloc:
(uint64_t)(tpgs) << PAGESHIFT);
delspan_unreserve(pt_base, tpgs);
- return (KPHYSM_OK); /* Successfully added system memory */
+ return (KPHYSM_OK); /* Successfully added system memory */
}
/*
@@ -2566,7 +2566,7 @@ remap_to_dummy(caddr_t va, pgcnt_t metapgs)
{
int phase;
- ASSERT(IS_P2ALIGNED((uint64_t)va, PAGESIZE));
+ ASSERT(IS_P2ALIGNED((uint64_t)(uintptr_t)va, PAGESIZE));
/*
* We may start remapping at a non-zero page offset
@@ -2574,7 +2574,8 @@ remap_to_dummy(caddr_t va, pgcnt_t metapgs)
* of the outgoing pp's could be shared by other
* memsegs (see memseg_remap_meta).
*/
- phase = btop((uint64_t)va) % pp_dummy_npages;
+ phase = btop((uint64_t)(uintptr_t)va) % pp_dummy_npages;
+ /*CONSTCOND*/
ASSERT(PAGESIZE % sizeof (page_t) || phase == 0);
while (metapgs != 0) {
diff --git a/usr/src/uts/common/os/mem_config_stubs.c b/usr/src/uts/common/os/mem_config_stubs.c
index d3226ff633..1a265173f4 100644
--- a/usr/src/uts/common/os/mem_config_stubs.c
+++ b/usr/src/uts/common/os/mem_config_stubs.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/cmn_err.h>
#include <sys/errno.h>
@@ -44,6 +42,7 @@ pgcnt_t kcage_desfree;
pgcnt_t kcage_needfree;
pgcnt_t kcage_lotsfree = 1;
+#if !defined(__x86) || defined(__xpv)
/*ARGSUSED*/
int
kphysm_setup_func_register(kphysm_setup_vector_t *vec, void *arg)
@@ -56,6 +55,7 @@ void
kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, void *arg)
{
}
+#endif
/*ARGSUSED*/
int
diff --git a/usr/src/uts/common/os/sunddi.c b/usr/src/uts/common/os/sunddi.c
index 91e2b72dc3..33cc970aea 100644
--- a/usr/src/uts/common/os/sunddi.c
+++ b/usr/src/uts/common/os/sunddi.c
@@ -85,6 +85,10 @@
#include <sys/zone.h>
#include <sys/clock_impl.h>
#include <sys/ddi.h>
+#include <sys/modhash.h>
+#include <sys/sunldi_impl.h>
+#include <sys/fs/dv_node.h>
+#include <sys/fs/snode.h>
extern pri_t minclsyspri;
@@ -9369,3 +9373,938 @@ ddi_cb_unregister(ddi_cb_handle_t hdl)
return (DDI_SUCCESS);
}
+
+/*
+ * Platform independent DR routines
+ */
+
+static int
+ndi2errno(int n)
+{
+ int err = 0;
+
+ switch (n) {
+ case NDI_NOMEM:
+ err = ENOMEM;
+ break;
+ case NDI_BUSY:
+ err = EBUSY;
+ break;
+ case NDI_FAULT:
+ err = EFAULT;
+ break;
+ case NDI_FAILURE:
+ err = EIO;
+ break;
+ case NDI_SUCCESS:
+ break;
+ case NDI_BADHANDLE:
+ default:
+ err = EINVAL;
+ break;
+ }
+ return (err);
+}
+
+/*
+ * Prom tree node list
+ */
+struct ptnode {
+ pnode_t nodeid;
+ struct ptnode *next;
+};
+
+/*
+ * Prom tree walk arg
+ */
+struct pta {
+ dev_info_t *pdip;
+ devi_branch_t *bp;
+ uint_t flags;
+ dev_info_t *fdip;
+ struct ptnode *head;
+};
+
+static void
+visit_node(pnode_t nodeid, struct pta *ap)
+{
+ struct ptnode **nextp;
+ int (*select)(pnode_t, void *, uint_t);
+
+ ASSERT(nodeid != OBP_NONODE && nodeid != OBP_BADNODE);
+
+ select = ap->bp->create.prom_branch_select;
+
+ ASSERT(select);
+
+ if (select(nodeid, ap->bp->arg, 0) == DDI_SUCCESS) {
+
+ for (nextp = &ap->head; *nextp; nextp = &(*nextp)->next)
+ ;
+
+ *nextp = kmem_zalloc(sizeof (struct ptnode), KM_SLEEP);
+
+ (*nextp)->nodeid = nodeid;
+ }
+
+ if ((ap->flags & DEVI_BRANCH_CHILD) == DEVI_BRANCH_CHILD)
+ return;
+
+ nodeid = prom_childnode(nodeid);
+ while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
+ visit_node(nodeid, ap);
+ nodeid = prom_nextnode(nodeid);
+ }
+}
+
+/*
+ * NOTE: The caller of this function must check for device contracts
+ * or LDI callbacks against this dip before setting the dip offline.
+ */
+static int
+set_infant_dip_offline(dev_info_t *dip, void *arg)
+{
+ char *path = (char *)arg;
+
+ ASSERT(dip);
+ ASSERT(arg);
+
+ if (i_ddi_node_state(dip) >= DS_ATTACHED) {
+ (void) ddi_pathname(dip, path);
+ cmn_err(CE_WARN, "Attempt to set offline flag on attached "
+ "node: %s", path);
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&(DEVI(dip)->devi_lock));
+ if (!DEVI_IS_DEVICE_OFFLINE(dip))
+ DEVI_SET_DEVICE_OFFLINE(dip);
+ mutex_exit(&(DEVI(dip)->devi_lock));
+
+ return (DDI_SUCCESS);
+}
+
+typedef struct result {
+ char *path;
+ int result;
+} result_t;
+
+static int
+dip_set_offline(dev_info_t *dip, void *arg)
+{
+ int end;
+ result_t *resp = (result_t *)arg;
+
+ ASSERT(dip);
+ ASSERT(resp);
+
+ /*
+ * We stop the walk if e_ddi_offline_notify() returns
+ * failure, because this implies that one or more consumers
+ * (either LDI or contract based) has blocked the offline.
+ * So there is no point in conitnuing the walk
+ */
+ if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
+ resp->result = DDI_FAILURE;
+ return (DDI_WALK_TERMINATE);
+ }
+
+ /*
+ * If set_infant_dip_offline() returns failure, it implies
+ * that we failed to set a particular dip offline. This
+ * does not imply that the offline as a whole should fail.
+ * We want to do the best we can, so we continue the walk.
+ */
+ if (set_infant_dip_offline(dip, resp->path) == DDI_SUCCESS)
+ end = DDI_SUCCESS;
+ else
+ end = DDI_FAILURE;
+
+ e_ddi_offline_finalize(dip, end);
+
+ return (DDI_WALK_CONTINUE);
+}
+
+/*
+ * The call to e_ddi_offline_notify() exists for the
+ * unlikely error case that a branch we are trying to
+ * create already exists and has device contracts or LDI
+ * event callbacks against it.
+ *
+ * We allow create to succeed for such branches only if
+ * no constraints block the offline.
+ */
+static int
+branch_set_offline(dev_info_t *dip, char *path)
+{
+ int circ;
+ int end;
+ result_t res;
+
+
+ if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
+ return (DDI_FAILURE);
+ }
+
+ if (set_infant_dip_offline(dip, path) == DDI_SUCCESS)
+ end = DDI_SUCCESS;
+ else
+ end = DDI_FAILURE;
+
+ e_ddi_offline_finalize(dip, end);
+
+ if (end == DDI_FAILURE)
+ return (DDI_FAILURE);
+
+ res.result = DDI_SUCCESS;
+ res.path = path;
+
+ ndi_devi_enter(dip, &circ);
+ ddi_walk_devs(ddi_get_child(dip), dip_set_offline, &res);
+ ndi_devi_exit(dip, circ);
+
+ return (res.result);
+}
+
+/*ARGSUSED*/
+static int
+create_prom_branch(void *arg, int has_changed)
+{
+ int circ;
+ int exists, rv;
+ pnode_t nodeid;
+ struct ptnode *tnp;
+ dev_info_t *dip;
+ struct pta *ap = arg;
+ devi_branch_t *bp;
+ char *path;
+
+ ASSERT(ap);
+ ASSERT(ap->fdip == NULL);
+ ASSERT(ap->pdip && ndi_dev_is_prom_node(ap->pdip));
+
+ bp = ap->bp;
+
+ nodeid = ddi_get_nodeid(ap->pdip);
+ if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE) {
+ cmn_err(CE_WARN, "create_prom_branch: invalid "
+ "nodeid: 0x%x", nodeid);
+ return (EINVAL);
+ }
+
+ ap->head = NULL;
+
+ nodeid = prom_childnode(nodeid);
+ while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
+ visit_node(nodeid, ap);
+ nodeid = prom_nextnode(nodeid);
+ }
+
+ if (ap->head == NULL)
+ return (ENODEV);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ rv = 0;
+ while ((tnp = ap->head) != NULL) {
+ ap->head = tnp->next;
+
+ ndi_devi_enter(ap->pdip, &circ);
+
+ /*
+ * Check if the branch already exists.
+ */
+ exists = 0;
+ dip = e_ddi_nodeid_to_dip(tnp->nodeid);
+ if (dip != NULL) {
+ exists = 1;
+
+ /* Parent is held busy, so release hold */
+ ndi_rele_devi(dip);
+#ifdef DEBUG
+ cmn_err(CE_WARN, "create_prom_branch: dip(%p) exists"
+ " for nodeid 0x%x", (void *)dip, tnp->nodeid);
+#endif
+ } else {
+ dip = i_ddi_create_branch(ap->pdip, tnp->nodeid);
+ }
+
+ kmem_free(tnp, sizeof (struct ptnode));
+
+ /*
+ * Hold the branch if it is not already held
+ */
+ if (dip && !exists) {
+ e_ddi_branch_hold(dip);
+ }
+
+ ASSERT(dip == NULL || e_ddi_branch_held(dip));
+
+ /*
+ * Set all dips in the newly created branch offline so that
+ * only a "configure" operation can attach
+ * the branch
+ */
+ if (dip == NULL || branch_set_offline(dip, path)
+ == DDI_FAILURE) {
+ ndi_devi_exit(ap->pdip, circ);
+ rv = EIO;
+ continue;
+ }
+
+ ASSERT(ddi_get_parent(dip) == ap->pdip);
+
+ ndi_devi_exit(ap->pdip, circ);
+
+ if (ap->flags & DEVI_BRANCH_CONFIGURE) {
+ int error = e_ddi_branch_configure(dip, &ap->fdip, 0);
+ if (error && rv == 0)
+ rv = error;
+ }
+
+ /*
+ * Invoke devi_branch_callback() (if it exists) only for
+ * newly created branches
+ */
+ if (bp->devi_branch_callback && !exists)
+ bp->devi_branch_callback(dip, bp->arg, 0);
+ }
+
+ kmem_free(path, MAXPATHLEN);
+
+ return (rv);
+}
+
+static int
+sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp)
+{
+ int rv, circ, len;
+ int i, flags, ret;
+ dev_info_t *dip;
+ char *nbuf;
+ char *path;
+ static const char *noname = "<none>";
+
+ ASSERT(pdip);
+ ASSERT(DEVI_BUSY_OWNED(pdip));
+
+ flags = 0;
+
+ /*
+ * Creating the root of a branch ?
+ */
+ if (rdipp) {
+ *rdipp = NULL;
+ flags = DEVI_BRANCH_ROOT;
+ }
+
+ ndi_devi_alloc_sleep(pdip, (char *)noname, DEVI_SID_NODEID, &dip);
+ rv = bp->create.sid_branch_create(dip, bp->arg, flags);
+
+ nbuf = kmem_alloc(OBP_MAXDRVNAME, KM_SLEEP);
+
+ if (rv == DDI_WALK_ERROR) {
+ cmn_err(CE_WARN, "e_ddi_branch_create: Error setting"
+ " properties on devinfo node %p", (void *)dip);
+ goto fail;
+ }
+
+ len = OBP_MAXDRVNAME;
+ if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "name", nbuf, &len)
+ != DDI_PROP_SUCCESS) {
+ cmn_err(CE_WARN, "e_ddi_branch_create: devinfo node %p has"
+ "no name property", (void *)dip);
+ goto fail;
+ }
+
+ ASSERT(i_ddi_node_state(dip) == DS_PROTO);
+ if (ndi_devi_set_nodename(dip, nbuf, 0) != NDI_SUCCESS) {
+ cmn_err(CE_WARN, "e_ddi_branch_create: cannot set name (%s)"
+ " for devinfo node %p", nbuf, (void *)dip);
+ goto fail;
+ }
+
+ kmem_free(nbuf, OBP_MAXDRVNAME);
+
+ /*
+ * Ignore bind failures just like boot does
+ */
+ (void) ndi_devi_bind_driver(dip, 0);
+
+ switch (rv) {
+ case DDI_WALK_CONTINUE:
+ case DDI_WALK_PRUNESIB:
+ ndi_devi_enter(dip, &circ);
+
+ i = DDI_WALK_CONTINUE;
+ for (; i == DDI_WALK_CONTINUE; ) {
+ i = sid_node_create(dip, bp, NULL);
+ }
+
+ ASSERT(i == DDI_WALK_ERROR || i == DDI_WALK_PRUNESIB);
+ if (i == DDI_WALK_ERROR)
+ rv = i;
+ /*
+ * If PRUNESIB stop creating siblings
+ * of dip's child. Subsequent walk behavior
+ * is determined by rv returned by dip.
+ */
+
+ ndi_devi_exit(dip, circ);
+ break;
+ case DDI_WALK_TERMINATE:
+ /*
+ * Don't create children and ask our parent
+ * to not create siblings either.
+ */
+ rv = DDI_WALK_PRUNESIB;
+ break;
+ case DDI_WALK_PRUNECHILD:
+ /*
+ * Don't create children, but ask parent to continue
+ * with siblings.
+ */
+ rv = DDI_WALK_CONTINUE;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ if (rdipp)
+ *rdipp = dip;
+
+ /*
+ * Set device offline - only the "configure" op should cause an attach.
+ * Note that it is safe to set the dip offline without checking
+ * for either device contract or layered driver (LDI) based constraints
+ * since there cannot be any contracts or LDI opens of this device.
+ * This is because this node is a newly created dip with the parent busy
+ * held, so no other thread can come in and attach this dip. A dip that
+ * has never been attached cannot have contracts since by definition
+ * a device contract (an agreement between a process and a device minor
+ * node) can only be created against a device that has minor nodes
+ * i.e is attached. Similarly an LDI open will only succeed if the
+ * dip is attached. We assert below that the dip is not attached.
+ */
+ ASSERT(i_ddi_node_state(dip) < DS_ATTACHED);
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ ret = set_infant_dip_offline(dip, path);
+ ASSERT(ret == DDI_SUCCESS);
+ kmem_free(path, MAXPATHLEN);
+
+ return (rv);
+fail:
+ (void) ndi_devi_free(dip);
+ kmem_free(nbuf, OBP_MAXDRVNAME);
+ return (DDI_WALK_ERROR);
+}
+
+static int
+create_sid_branch(
+ dev_info_t *pdip,
+ devi_branch_t *bp,
+ dev_info_t **dipp,
+ uint_t flags)
+{
+ int rv = 0, state = DDI_WALK_CONTINUE;
+ dev_info_t *rdip;
+
+ while (state == DDI_WALK_CONTINUE) {
+ int circ;
+
+ ndi_devi_enter(pdip, &circ);
+
+ state = sid_node_create(pdip, bp, &rdip);
+ if (rdip == NULL) {
+ ndi_devi_exit(pdip, circ);
+ ASSERT(state == DDI_WALK_ERROR);
+ break;
+ }
+
+ e_ddi_branch_hold(rdip);
+
+ ndi_devi_exit(pdip, circ);
+
+ if (flags & DEVI_BRANCH_CONFIGURE) {
+ int error = e_ddi_branch_configure(rdip, dipp, 0);
+ if (error && rv == 0)
+ rv = error;
+ }
+
+ /*
+ * devi_branch_callback() is optional
+ */
+ if (bp->devi_branch_callback)
+ bp->devi_branch_callback(rdip, bp->arg, 0);
+ }
+
+ ASSERT(state == DDI_WALK_ERROR || state == DDI_WALK_PRUNESIB);
+
+ return (state == DDI_WALK_ERROR ? EIO : rv);
+}
+
+int
+e_ddi_branch_create(
+ dev_info_t *pdip,
+ devi_branch_t *bp,
+ dev_info_t **dipp,
+ uint_t flags)
+{
+ int prom_devi, sid_devi, error;
+
+ if (pdip == NULL || bp == NULL || bp->type == 0)
+ return (EINVAL);
+
+ prom_devi = (bp->type == DEVI_BRANCH_PROM) ? 1 : 0;
+ sid_devi = (bp->type == DEVI_BRANCH_SID) ? 1 : 0;
+
+ if (prom_devi && bp->create.prom_branch_select == NULL)
+ return (EINVAL);
+ else if (sid_devi && bp->create.sid_branch_create == NULL)
+ return (EINVAL);
+ else if (!prom_devi && !sid_devi)
+ return (EINVAL);
+
+ if (flags & DEVI_BRANCH_EVENT)
+ return (EINVAL);
+
+ if (prom_devi) {
+ struct pta pta = {0};
+
+ pta.pdip = pdip;
+ pta.bp = bp;
+ pta.flags = flags;
+
+ error = prom_tree_access(create_prom_branch, &pta, NULL);
+
+ if (dipp)
+ *dipp = pta.fdip;
+ else if (pta.fdip)
+ ndi_rele_devi(pta.fdip);
+ } else {
+ error = create_sid_branch(pdip, bp, dipp, flags);
+ }
+
+ return (error);
+}
+
+int
+e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp, uint_t flags)
+{
+ int rv;
+ char *devnm;
+ dev_info_t *pdip;
+
+ if (dipp)
+ *dipp = NULL;
+
+ if (rdip == NULL || flags != 0 || (flags & DEVI_BRANCH_EVENT))
+ return (EINVAL);
+
+ pdip = ddi_get_parent(rdip);
+
+ ndi_hold_devi(pdip);
+
+ if (!e_ddi_branch_held(rdip)) {
+ ndi_rele_devi(pdip);
+ cmn_err(CE_WARN, "e_ddi_branch_configure: "
+ "dip(%p) not held", (void *)rdip);
+ return (EINVAL);
+ }
+
+ if (i_ddi_node_state(rdip) < DS_INITIALIZED) {
+ /*
+ * First attempt to bind a driver. If we fail, return
+ * success (On some platforms, dips for some device
+ * types (CPUs) may not have a driver)
+ */
+ if (ndi_devi_bind_driver(rdip, 0) != NDI_SUCCESS) {
+ ndi_rele_devi(pdip);
+ return (0);
+ }
+
+ if (ddi_initchild(pdip, rdip) != DDI_SUCCESS) {
+ rv = NDI_FAILURE;
+ goto out;
+ }
+ }
+
+ ASSERT(i_ddi_node_state(rdip) >= DS_INITIALIZED);
+
+ devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
+
+ (void) ddi_deviname(rdip, devnm);
+
+ if ((rv = ndi_devi_config_one(pdip, devnm+1, &rdip,
+ NDI_DEVI_ONLINE | NDI_CONFIG)) == NDI_SUCCESS) {
+ /* release hold from ndi_devi_config_one() */
+ ndi_rele_devi(rdip);
+ }
+
+ kmem_free(devnm, MAXNAMELEN + 1);
+out:
+ if (rv != NDI_SUCCESS && dipp && rdip) {
+ ndi_hold_devi(rdip);
+ *dipp = rdip;
+ }
+ ndi_rele_devi(pdip);
+ return (ndi2errno(rv));
+}
+
+void
+e_ddi_branch_hold(dev_info_t *rdip)
+{
+ if (e_ddi_branch_held(rdip)) {
+ cmn_err(CE_WARN, "e_ddi_branch_hold: branch already held");
+ return;
+ }
+
+ mutex_enter(&DEVI(rdip)->devi_lock);
+ if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) == 0) {
+ DEVI(rdip)->devi_flags |= DEVI_BRANCH_HELD;
+ DEVI(rdip)->devi_ref++;
+ }
+ ASSERT(DEVI(rdip)->devi_ref > 0);
+ mutex_exit(&DEVI(rdip)->devi_lock);
+}
+
+int
+e_ddi_branch_held(dev_info_t *rdip)
+{
+ int rv = 0;
+
+ mutex_enter(&DEVI(rdip)->devi_lock);
+ if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) &&
+ DEVI(rdip)->devi_ref > 0) {
+ rv = 1;
+ }
+ mutex_exit(&DEVI(rdip)->devi_lock);
+
+ return (rv);
+}
+
+void
+e_ddi_branch_rele(dev_info_t *rdip)
+{
+ mutex_enter(&DEVI(rdip)->devi_lock);
+ DEVI(rdip)->devi_flags &= ~DEVI_BRANCH_HELD;
+ DEVI(rdip)->devi_ref--;
+ mutex_exit(&DEVI(rdip)->devi_lock);
+}
+
+int
+e_ddi_branch_unconfigure(
+ dev_info_t *rdip,
+ dev_info_t **dipp,
+ uint_t flags)
+{
+ int circ, rv;
+ int destroy;
+ char *devnm;
+ uint_t nflags;
+ dev_info_t *pdip;
+
+ if (dipp)
+ *dipp = NULL;
+
+ if (rdip == NULL)
+ return (EINVAL);
+
+ pdip = ddi_get_parent(rdip);
+
+ ASSERT(pdip);
+
+ /*
+ * Check if caller holds pdip busy - can cause deadlocks during
+ * devfs_clean()
+ */
+ if (DEVI_BUSY_OWNED(pdip)) {
+ cmn_err(CE_WARN, "e_ddi_branch_unconfigure: failed: parent"
+ " devinfo node(%p) is busy held", (void *)pdip);
+ return (EINVAL);
+ }
+
+ destroy = (flags & DEVI_BRANCH_DESTROY) ? 1 : 0;
+
+ devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
+
+ ndi_devi_enter(pdip, &circ);
+ (void) ddi_deviname(rdip, devnm);
+ ndi_devi_exit(pdip, circ);
+
+ /*
+ * ddi_deviname() returns a component name with / prepended.
+ */
+ (void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
+
+ ndi_devi_enter(pdip, &circ);
+
+ /*
+ * Recreate device name as it may have changed state (init/uninit)
+ * when parent busy lock was dropped for devfs_clean()
+ */
+ (void) ddi_deviname(rdip, devnm);
+
+ if (!e_ddi_branch_held(rdip)) {
+ kmem_free(devnm, MAXNAMELEN + 1);
+ ndi_devi_exit(pdip, circ);
+ cmn_err(CE_WARN, "e_ddi_%s_branch: dip(%p) not held",
+ destroy ? "destroy" : "unconfigure", (void *)rdip);
+ return (EINVAL);
+ }
+
+ /*
+ * Release hold on the branch. This is ok since we are holding the
+ * parent busy. If rdip is not removed, we must do a hold on the
+ * branch before returning.
+ */
+ e_ddi_branch_rele(rdip);
+
+ nflags = NDI_DEVI_OFFLINE;
+ if (destroy || (flags & DEVI_BRANCH_DESTROY)) {
+ nflags |= NDI_DEVI_REMOVE;
+ destroy = 1;
+ } else {
+ nflags |= NDI_UNCONFIG; /* uninit but don't remove */
+ }
+
+ if (flags & DEVI_BRANCH_EVENT)
+ nflags |= NDI_POST_EVENT;
+
+ if (i_ddi_devi_attached(pdip) &&
+ (i_ddi_node_state(rdip) >= DS_INITIALIZED)) {
+ rv = ndi_devi_unconfig_one(pdip, devnm+1, dipp, nflags);
+ } else {
+ rv = e_ddi_devi_unconfig(rdip, dipp, nflags);
+ if (rv == NDI_SUCCESS) {
+ ASSERT(!destroy || ddi_get_child(rdip) == NULL);
+ rv = ndi_devi_offline(rdip, nflags);
+ }
+ }
+
+ if (!destroy || rv != NDI_SUCCESS) {
+ /* The dip still exists, so do a hold */
+ e_ddi_branch_hold(rdip);
+ }
+out:
+ kmem_free(devnm, MAXNAMELEN + 1);
+ ndi_devi_exit(pdip, circ);
+ return (ndi2errno(rv));
+}
+
+int
+e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp, uint_t flag)
+{
+ return (e_ddi_branch_unconfigure(rdip, dipp,
+ flag|DEVI_BRANCH_DESTROY));
+}
+
+/*
+ * Number of chains for hash table
+ */
+#define NUMCHAINS 17
+
+/*
+ * Devinfo busy arg
+ */
+struct devi_busy {
+ int dv_total;
+ int s_total;
+ mod_hash_t *dv_hash;
+ mod_hash_t *s_hash;
+ int (*callback)(dev_info_t *, void *, uint_t);
+ void *arg;
+};
+
+static int
+visit_dip(dev_info_t *dip, void *arg)
+{
+ uintptr_t sbusy, dvbusy, ref;
+ struct devi_busy *bsp = arg;
+
+ ASSERT(bsp->callback);
+
+ /*
+ * A dip cannot be busy if its reference count is 0
+ */
+ if ((ref = e_ddi_devi_holdcnt(dip)) == 0) {
+ return (bsp->callback(dip, bsp->arg, 0));
+ }
+
+ if (mod_hash_find(bsp->dv_hash, dip, (mod_hash_val_t *)&dvbusy))
+ dvbusy = 0;
+
+ /*
+ * To catch device opens currently maintained on specfs common snodes.
+ */
+ if (mod_hash_find(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
+ sbusy = 0;
+
+#ifdef DEBUG
+ if (ref < sbusy || ref < dvbusy) {
+ cmn_err(CE_WARN, "dip(%p): sopen = %lu, dvopen = %lu "
+ "dip ref = %lu\n", (void *)dip, sbusy, dvbusy, ref);
+ }
+#endif
+
+ dvbusy = (sbusy > dvbusy) ? sbusy : dvbusy;
+
+ return (bsp->callback(dip, bsp->arg, dvbusy));
+}
+
+static int
+visit_snode(struct snode *sp, void *arg)
+{
+ uintptr_t sbusy;
+ dev_info_t *dip;
+ int count;
+ struct devi_busy *bsp = arg;
+
+ ASSERT(sp);
+
+ /*
+ * The stable lock is held. This prevents
+ * the snode and its associated dip from
+ * going away.
+ */
+ dip = NULL;
+ count = spec_devi_open_count(sp, &dip);
+
+ if (count <= 0)
+ return (DDI_WALK_CONTINUE);
+
+ ASSERT(dip);
+
+ if (mod_hash_remove(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
+ sbusy = count;
+ else
+ sbusy += count;
+
+ if (mod_hash_insert(bsp->s_hash, dip, (mod_hash_val_t)sbusy)) {
+ cmn_err(CE_WARN, "%s: s_hash insert failed: dip=0x%p, "
+ "sbusy = %lu", "e_ddi_branch_referenced",
+ (void *)dip, sbusy);
+ }
+
+ bsp->s_total += count;
+
+ return (DDI_WALK_CONTINUE);
+}
+
+static void
+visit_dvnode(struct dv_node *dv, void *arg)
+{
+ uintptr_t dvbusy;
+ uint_t count;
+ struct vnode *vp;
+ struct devi_busy *bsp = arg;
+
+ ASSERT(dv && dv->dv_devi);
+
+ vp = DVTOV(dv);
+
+ mutex_enter(&vp->v_lock);
+ count = vp->v_count;
+ mutex_exit(&vp->v_lock);
+
+ if (!count)
+ return;
+
+ if (mod_hash_remove(bsp->dv_hash, dv->dv_devi,
+ (mod_hash_val_t *)&dvbusy))
+ dvbusy = count;
+ else
+ dvbusy += count;
+
+ if (mod_hash_insert(bsp->dv_hash, dv->dv_devi,
+ (mod_hash_val_t)dvbusy)) {
+ cmn_err(CE_WARN, "%s: dv_hash insert failed: dip=0x%p, "
+ "dvbusy=%lu", "e_ddi_branch_referenced",
+ (void *)dv->dv_devi, dvbusy);
+ }
+
+ bsp->dv_total += count;
+}
+
+/*
+ * Returns reference count on success or -1 on failure.
+ */
+int
+e_ddi_branch_referenced(
+ dev_info_t *rdip,
+ int (*callback)(dev_info_t *dip, void *arg, uint_t ref),
+ void *arg)
+{
+ int circ;
+ char *path;
+ dev_info_t *pdip;
+ struct devi_busy bsa = {0};
+
+ ASSERT(rdip);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ ndi_hold_devi(rdip);
+
+ pdip = ddi_get_parent(rdip);
+
+ ASSERT(pdip);
+
+ /*
+ * Check if caller holds pdip busy - can cause deadlocks during
+ * devfs_walk()
+ */
+ if (!e_ddi_branch_held(rdip) || DEVI_BUSY_OWNED(pdip)) {
+ cmn_err(CE_WARN, "e_ddi_branch_referenced: failed: "
+ "devinfo branch(%p) not held or parent busy held",
+ (void *)rdip);
+ ndi_rele_devi(rdip);
+ kmem_free(path, MAXPATHLEN);
+ return (-1);
+ }
+
+ ndi_devi_enter(pdip, &circ);
+ (void) ddi_pathname(rdip, path);
+ ndi_devi_exit(pdip, circ);
+
+ bsa.dv_hash = mod_hash_create_ptrhash("dv_node busy hash", NUMCHAINS,
+ mod_hash_null_valdtor, sizeof (struct dev_info));
+
+ bsa.s_hash = mod_hash_create_ptrhash("snode busy hash", NUMCHAINS,
+ mod_hash_null_valdtor, sizeof (struct snode));
+
+ if (devfs_walk(path, visit_dvnode, &bsa)) {
+ cmn_err(CE_WARN, "e_ddi_branch_referenced: "
+ "devfs walk failed for: %s", path);
+ kmem_free(path, MAXPATHLEN);
+ bsa.s_total = bsa.dv_total = -1;
+ goto out;
+ }
+
+ kmem_free(path, MAXPATHLEN);
+
+ /*
+ * Walk the snode table to detect device opens, which are currently
+ * maintained on specfs common snodes.
+ */
+ spec_snode_walk(visit_snode, &bsa);
+
+ if (callback == NULL)
+ goto out;
+
+ bsa.callback = callback;
+ bsa.arg = arg;
+
+ if (visit_dip(rdip, &bsa) == DDI_WALK_CONTINUE) {
+ ndi_devi_enter(rdip, &circ);
+ ddi_walk_devs(ddi_get_child(rdip), visit_dip, &bsa);
+ ndi_devi_exit(rdip, circ);
+ }
+
+out:
+ ndi_rele_devi(rdip);
+ mod_hash_destroy_ptrhash(bsa.s_hash);
+ mod_hash_destroy_ptrhash(bsa.dv_hash);
+ return (bsa.s_total > bsa.dv_total ? bsa.s_total : bsa.dv_total);
+}
diff --git a/usr/src/uts/common/sys/esunddi.h b/usr/src/uts/common/sys/esunddi.h
index 3cde0b2b14..e2f860d520 100644
--- a/usr/src/uts/common/sys/esunddi.h
+++ b/usr/src/uts/common/sys/esunddi.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -26,9 +25,6 @@
#ifndef _SYS_ESUNDDI_H
#define _SYS_ESUNDDI_H
-
-#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 */
-
#include <sys/sunddi.h>
#include <sys/proc.h>
#include <sys/autoconf.h>
@@ -214,6 +210,41 @@ dev_info_t *
e_ddi_nodeid_to_dip(pnode_t nodeid);
/*
+ * Defines for DR interfaces
+ */
+#define DEVI_BRANCH_CHILD 0x01 /* Walk immediate children of root */
+#define DEVI_BRANCH_CONFIGURE 0x02 /* Configure branch after create */
+#define DEVI_BRANCH_DESTROY 0x04 /* Destroy branch after unconfigure */
+#define DEVI_BRANCH_EVENT 0x08 /* Post NDI event */
+#define DEVI_BRANCH_PROM 0x10 /* Branches derived from PROM nodes */
+#define DEVI_BRANCH_SID 0x20 /* SID node branches */
+#define DEVI_BRANCH_ROOT 0x40 /* Node is the root of a branch */
+
+typedef struct devi_branch {
+ void *arg;
+ void (*devi_branch_callback)(dev_info_t *, void *, uint_t);
+ int type;
+ union {
+ int (*prom_branch_select)(pnode_t, void *, uint_t);
+ int (*sid_branch_create)(dev_info_t *, void *, uint_t);
+ } create;
+} devi_branch_t;
+
+extern int e_ddi_branch_create(dev_info_t *pdip, devi_branch_t *bp,
+ dev_info_t **dipp, uint_t flags);
+extern int e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp,
+ uint_t flags);
+extern int e_ddi_branch_unconfigure(dev_info_t *rdip, dev_info_t **dipp,
+ uint_t flags);
+extern int e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp,
+ uint_t flags);
+extern void e_ddi_branch_hold(dev_info_t *rdip);
+extern void e_ddi_branch_rele(dev_info_t *rdip);
+extern int e_ddi_branch_held(dev_info_t *rdip);
+extern int e_ddi_branch_referenced(dev_info_t *rdip,
+ int (*cb)(dev_info_t *dip, void *, uint_t), void *arg);
+
+/*
* Obsolete interfaces, no longer used, to be removed.
* Retained only for driver compatibility.
*/
diff --git a/usr/src/uts/common/sys/pci_impl.h b/usr/src/uts/common/sys/pci_impl.h
index d485800b96..429d6860bd 100644
--- a/usr/src/uts/common/sys/pci_impl.h
+++ b/usr/src/uts/common/sys/pci_impl.h
@@ -116,6 +116,9 @@ extern struct pci_bus_resource *pci_bus_res;
/*
* For now, x86-only to avoid conflicts with <sys/memlist_impl.h>
*/
+#define memlist_find memlist_find_pci
+#define memlist_insert memlist_insert_pci
+
extern struct memlist *memlist_alloc(void);
extern void memlist_free(struct memlist *);
extern void memlist_free_all(struct memlist **);
diff --git a/usr/src/uts/common/sys/processor.h b/usr/src/uts/common/sys/processor.h
index 3a76c8c9b4..c0fe6e21b8 100644
--- a/usr/src/uts/common/sys/processor.h
+++ b/usr/src/uts/common/sys/processor.h
@@ -32,8 +32,6 @@
#ifndef _SYS_PROCESSOR_H
#define _SYS_PROCESSOR_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/procset.h>
@@ -140,6 +138,7 @@ extern lgrpid_t gethomelgroup();
* Internal interface prototypes
*/
extern int p_online_internal(processorid_t, int, int *);
+extern int p_online_internal_locked(processorid_t, int, int *);
#endif /* !_KERNEL */
diff --git a/usr/src/uts/common/syscall/p_online.c b/usr/src/uts/common/syscall/p_online.c
index 004627569c..88a0340bf7 100644
--- a/usr/src/uts/common/syscall/p_online.c
+++ b/usr/src/uts/common/syscall/p_online.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -24,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/param.h>
#include <sys/var.h>
@@ -49,7 +46,7 @@
* P_FAULTED
*/
int
-p_online_internal(processorid_t cpun, int new_status, int *old_status)
+p_online_internal_locked(processorid_t cpun, int new_status, int *old_status)
{
cpu_t *cp;
int status;
@@ -59,7 +56,7 @@ p_online_internal(processorid_t cpun, int new_status, int *old_status)
/*
* Try to get a pointer to the requested CPU structure.
*/
- mutex_enter(&cpu_lock); /* protects CPU states */
+ ASSERT(MUTEX_HELD(&cpu_lock));
if ((cp = cpu_get(cpun)) == NULL) {
error = EINVAL;
goto out;
@@ -219,10 +216,21 @@ p_online_internal(processorid_t cpun, int new_status, int *old_status)
break;
}
out:
- mutex_exit(&cpu_lock);
return (error);
}
+int
+p_online_internal(processorid_t cpun, int new_status, int *old_status)
+{
+ int rc;
+
+ mutex_enter(&cpu_lock); /* protects CPU states */
+ rc = p_online_internal_locked(cpun, new_status, old_status);
+ mutex_exit(&cpu_lock);
+
+ return (rc);
+}
+
/*
* p_online(2) - get/change processor operational status.
*
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h
index 8739e8a366..298b8d4415 100644
--- a/usr/src/uts/common/vm/page.h
+++ b/usr/src/uts/common/vm/page.h
@@ -1085,8 +1085,8 @@ struct memseg {
page_t *pages, *epages; /* [from, to] in page array */
pfn_t pages_base, pages_end; /* [from, to] in page numbers */
struct memseg *next; /* next segment in list */
-#if defined(__sparc)
struct memseg *lnext; /* next segment in deleted list */
+#if defined(__sparc)
uint64_t pagespa, epagespa; /* [from, to] page array physical */
uint64_t nextpa; /* physical next pointer */
pfn_t kpm_pbase; /* start of kpm range */
@@ -1096,8 +1096,8 @@ struct memseg {
kpm_spage_t *kpm_spgs; /* ptr to kpm_spage array */
} mseg_un;
uint64_t kpm_pagespa; /* physical ptr to kpm (s)pages array */
- uint_t msegflags; /* memseg flags */
#endif /* __sparc */
+ uint_t msegflags; /* memseg flags */
};
/* memseg union aliases */
diff --git a/usr/src/uts/common/vm/seg_kpm.h b/usr/src/uts/common/vm/seg_kpm.h
index 0b766bbaf4..b61980df01 100644
--- a/usr/src/uts/common/vm/seg_kpm.h
+++ b/usr/src/uts/common/vm/seg_kpm.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -27,8 +26,6 @@
#ifndef _VM_SEG_KPM_H
#define _VM_SEG_KPM_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -85,8 +82,13 @@ extern int segmap_kpm;
#define IS_KPM_ADDR(addr) \
((addr) >= segkpm->s_base && (addr) < (segkpm->s_base + segkpm->s_size))
+#ifdef __x86
+/* x86 systems use neither kpm_page_t nor kpm_spage_t when supporting kpm. */
+#define KPMPAGE_T_SZ (0)
+#else /* __x86 */
#define KPMPAGE_T_SZ \
((kpm_smallpages == 0) ? sizeof (kpm_page_t) : sizeof (kpm_spage_t))
+#endif /* __x86 */
#else /* SEGKPM_SUPPORT */
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 8a1cf4b2b7..eeaaa379ab 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -80,7 +80,10 @@ CORE_OBJS += \
mach_kdi.o \
mach_sysconfig.o \
machdep.o \
+ mem_config.o \
mem_config_stubs.o \
+ mem_config_arch.o \
+ memlist_new.o \
memnode.o \
microcode.o \
microfind.o \
@@ -187,7 +190,7 @@ PCPLUSMP_OBJS += apic.o apic_regops.o psm_common.o apic_introp.o \
mp_platform_common.o hpet_acpi.o
ACPI_DRV_OBJS += acpi_drv.o acpi_video.o
-ACPINEX_OBJS += acpinex_drv.o
+ACPINEX_OBJS += acpinex_drv.o acpinex_event.o
CPUDRV_OBJS += \
cpudrv.o \
@@ -200,10 +203,16 @@ ACPIDEV_OBJS += acpidev_drv.o \
acpidev_scope.o acpidev_device.o \
acpidev_container.o \
acpidev_cpu.o \
+ acpidev_dr.o \
acpidev_memory.o \
+ acpidev_pci.o \
acpidev_resource.o \
acpidev_util.o
+DRMACH_ACPI_OBJS += drmach_acpi.o dr_util.o drmach_err.o
+
+DR_OBJS += dr.o dr_cpu.o dr_err.o dr_io.o dr_mem_acpi.o dr_quiesce.o dr_util.o
+
ROOTNEX_OBJS += rootnex.o immu.o immu_dmar.o immu_dvma.o \
immu_intrmap.o immu_qinv.o immu_regs.o
diff --git a/usr/src/uts/i86pc/Makefile.i86pc.shared b/usr/src/uts/i86pc/Makefile.i86pc.shared
index 89b47413f3..a8fbb83fdf 100644
--- a/usr/src/uts/i86pc/Makefile.i86pc.shared
+++ b/usr/src/uts/i86pc/Makefile.i86pc.shared
@@ -61,6 +61,9 @@ LINTS_DIR = $(OBJS_DIR)
LINT_LIB_DIR = $(UTSBASE)/$(PLATFORM)/lint-libs/$(OBJS_DIR)
GEN_LINT_LIB_DIR = $(UTSBASE)/intel/lint-libs/$(OBJS_DIR)
+LINT32_DIRS = $(LINT32_BUILDS:%=$(UTSBASE)/$(PLATFORM)/lint-libs/%)
+LINT32_FILES = $(LINT32_DIRS:%=%/llib-l$(MODULE).ln)
+
DTRACESTUBS_O = $(OBJS_DIR)/dtracestubs.o
DTRACESTUBS = $(OBJS_DIR)/libdtracestubs.so
@@ -256,6 +259,7 @@ DRV_KMODS += tzmon
DRV_KMODS += acpi_drv
DRV_KMODS += acpinex
DRV_KMODS += amd_iommu
+DRV_KMODS += dr
DRV_KMODS += ioat
DRV_KMODS += fipe
@@ -311,6 +315,7 @@ SYS_KMODS +=
#
MISC_KMODS += gfx_private pcie
MISC_KMODS += acpidev
+MISC_KMODS += drmach_acpi
#
# 'Dacf' modules (/kernel/dacf)
diff --git a/usr/src/uts/i86pc/Makefile.rules b/usr/src/uts/i86pc/Makefile.rules
index ab82a3537f..592250db83 100644
--- a/usr/src/uts/i86pc/Makefile.rules
+++ b/usr/src/uts/i86pc/Makefile.rules
@@ -79,10 +79,42 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/acpi/acpinex/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+SBD_IOCTL = $(UTSBASE)/i86pc/sys/sbd_ioctl.h
+DRMACH_IO = $(UTSBASE)/i86pc/io/acpi/drmach_acpi
+DRMACH_GENERR = $(DRMACH_IO)/sbdgenerr
+DR_IO = $(UTSBASE)/i86pc/io/dr
+DR_GENERR = $(DR_IO)/sbdgenerr
+
+$(DRMACH_GENERR): $(DR_IO)/sbdgenerr.pl
+ $(RM) $@
+ $(CAT) $(DR_IO)/sbdgenerr.pl > $@
+ $(CHMOD) +x $@
+
+$(DRMACH_IO)/drmach_err.c: $(DRMACH_GENERR) $(SBD_IOCTL)
+ $(RM) $@
+ $(DRMACH_GENERR) EX86 < $(SBD_IOCTL) > $(DRMACH_IO)/drmach_err.c
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/acpi/drmach_acpi/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/amd_iommu/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(DR_GENERR): $(DR_IO)/sbdgenerr.pl
+ $(RM) $@
+ $(CAT) $(DR_IO)/sbdgenerr.pl > $@
+ $(CHMOD) +x $@
+
+$(DR_IO)/dr_err.c: $(DR_GENERR) $(SBD_IOCTL)
+ $(RM) $@
+ $(DR_GENERR) ESBD < $(SBD_IOCTL) > $(DR_IO)/dr_err.c
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/dr/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/ioat/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -307,9 +339,15 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/acpi/acpidev/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/acpi/acpinex/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/acpi/drmach_acpi/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/amd_iommu/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/dr/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/ioat/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h
index f5d852b01a..c520e06be7 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h
@@ -189,6 +189,7 @@ struct gcpu_chipshared {
kmutex_t gcpus_cfglock; /* serial MCA config from chip cores */
kmutex_t gcpus_poll_lock; /* serialize pollers on the same chip */
uint32_t gcpus_actv_banks; /* MCA bank numbers active on chip */
+ volatile uint32_t gcpus_actv_cnt; /* active cpu count in this chip */
};
struct gcpu_data {
@@ -205,11 +206,13 @@ struct regs;
* CMI implementation
*/
extern int gcpu_init(cmi_hdl_t, void **);
+extern void gcpu_fini(cmi_hdl_t);
extern void gcpu_post_startup(cmi_hdl_t);
extern void gcpu_post_mpstartup(cmi_hdl_t);
extern void gcpu_faulted_enter(cmi_hdl_t);
extern void gcpu_faulted_exit(cmi_hdl_t);
extern void gcpu_mca_init(cmi_hdl_t);
+extern void gcpu_mca_fini(cmi_hdl_t hdl);
extern cmi_errno_t gcpu_msrinject(cmi_hdl_t, cmi_mca_regs_t *, uint_t, int);
#ifndef __xpv
extern uint64_t gcpu_mca_trap(cmi_hdl_t, struct regs *);
@@ -228,6 +231,7 @@ extern int cmi_enable_cmci;
* Local functions
*/
extern void gcpu_mca_poll_init(cmi_hdl_t);
+extern void gcpu_mca_poll_fini(cmi_hdl_t);
extern void gcpu_mca_poll_start(cmi_hdl_t);
extern void gcpu_poll_trace_init(gcpu_poll_trace_ctl_t *);
extern void gcpu_poll_trace(gcpu_poll_trace_ctl_t *, uint8_t, uint8_t);
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c
index db4a695b7d..761d942f27 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c
@@ -23,6 +23,10 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* Generic x86 CPU Module
@@ -78,22 +82,52 @@ gcpu_init(cmi_hdl_t hdl, void **datap)
*/
if ((sp = gcpu_shared[chipid]) == NULL) {
sp = kmem_zalloc(sizeof (struct gcpu_chipshared), KM_SLEEP);
+ mutex_init(&sp->gcpus_poll_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&sp->gcpus_cfglock, NULL, MUTEX_DRIVER, NULL);
osp = atomic_cas_ptr(&gcpu_shared[chipid], NULL, sp);
- if (osp == NULL) {
- mutex_init(&sp->gcpus_poll_lock, NULL, MUTEX_DRIVER,
- NULL);
- mutex_init(&sp->gcpus_cfglock, NULL, MUTEX_DRIVER,
- NULL);
- } else {
+ if (osp != NULL) {
+ mutex_destroy(&sp->gcpus_cfglock);
+ mutex_destroy(&sp->gcpus_poll_lock);
kmem_free(sp, sizeof (struct gcpu_chipshared));
sp = osp;
}
}
+
+ atomic_inc_32(&sp->gcpus_actv_cnt);
gcpu->gcpu_shared = sp;
return (0);
}
+/*
+ * deconfigure gcpu_init()
+ */
+void
+gcpu_fini(cmi_hdl_t hdl)
+{
+ uint_t chipid = cmi_hdl_chipid(hdl);
+ gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
+ struct gcpu_chipshared *sp;
+
+ if (gcpu_disable || chipid >= GCPU_MAX_CHIPID)
+ return;
+
+ gcpu_mca_fini(hdl);
+
+ /*
+ * Keep shared data in cache for reuse.
+ */
+ sp = gcpu_shared[chipid];
+ ASSERT(sp != NULL);
+ atomic_dec_32(&sp->gcpus_actv_cnt);
+
+ if (gcpu != NULL)
+ kmem_free(gcpu, sizeof (gcpu_data_t));
+
+ /* Release reference count held in gcpu_init(). */
+ cmi_hdl_rele(hdl);
+}
+
void
gcpu_post_startup(cmi_hdl_t hdl)
{
@@ -150,7 +184,7 @@ const cmi_ops_t _cmi_ops = {
GCPU_OP(gcpu_cmci_trap, NULL), /* cmi_cmci_trap */
gcpu_msrinject, /* cmi_msrinject */
GCPU_OP(gcpu_hdl_poke, NULL), /* cmi_hdl_poke */
- NULL, /* cmi_fini */
+ gcpu_fini, /* cmi_fini */
GCPU_OP(NULL, gcpu_xpv_panic_callback), /* cmi_panic_callback */
};
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
index 911a7c6af7..730411ebf1 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
@@ -23,6 +23,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/mca_x86.h>
#include <sys/cpu_module_impl.h>
@@ -1064,7 +1068,7 @@ gcpu_mca_init(cmi_hdl_t hdl)
/*
* CPU startup code only calls cmi_mca_init if x86_feature indicates
* both MCA and MCE support (i.e., X86_MCA). P5, K6, and earlier
- * processors, which have their own * more primitive way of doing
+ * processors, which have their own more primitive way of doing
* machine checks, will not have cmi_mca_init called since their
* CPUID information will not indicate both MCA and MCE features.
*/
@@ -2002,3 +2006,50 @@ gcpu_msrinject(cmi_hdl_t hdl, cmi_mca_regs_t *regs, uint_t nregs,
return (errs == 0 ? CMI_SUCCESS : CMIERR_UNKNOWN);
}
+
+/* deconfigure gcpu_mca_init() */
+void
+gcpu_mca_fini(cmi_hdl_t hdl)
+{
+ gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
+ gcpu_mca_t *mca = &gcpu->gcpu_mca;
+ int i;
+
+ /*
+ * CPU startup code only calls cmi_mca_init if x86_feature indicates
+ * both MCA and MCE support (i.e., X86_MCA). P5, K6, and earlier
+ * processors, which have their own more primitive way of doing
+ * machine checks, will not have cmi_mca_init called since their
+ * CPUID information will not indicate both MCA and MCE features.
+ */
+ if ((x86_feature & X86_MCA) == 0)
+ return;
+#ifndef __xpv
+ /*
+ * disable machine check in CR4
+ */
+ cmi_ntv_hwdisable_mce(hdl);
+#endif
+ mutex_enter(&gcpu->gcpu_shared->gcpus_cfglock);
+ gcpu_mca_poll_fini(hdl);
+ mutex_exit(&gcpu->gcpu_shared->gcpus_cfglock);
+
+ /*
+ * free resources allocated during init
+ */
+ if (mca->gcpu_bank_cmci != NULL) {
+ kmem_free(mca->gcpu_bank_cmci, sizeof (gcpu_mca_cmci_t) *
+ mca->gcpu_mca_nbanks);
+ }
+
+ for (i = 0; i < GCPU_MCA_LOGOUT_NUM; i++) {
+ if (mca->gcpu_mca_logout[i] != NULL) {
+ kmem_free(mca->gcpu_mca_logout[i], mca->gcpu_mca_lgsz);
+ }
+ }
+
+ if (mca->gcpu_mca_bioscfg.bios_bankcfg != NULL) {
+ kmem_free(mca->gcpu_mca_bioscfg.bios_bankcfg,
+ sizeof (struct gcpu_bios_bankcfg) * mca->gcpu_mca_nbanks);
+ }
+}
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c
index f68f96259c..361d147b56 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c
@@ -23,6 +23,10 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* Native MCA polling. We establish an ommipresent cyclic to fire on all
@@ -31,6 +35,7 @@
*/
#include <sys/types.h>
+#include <sys/atomic.h>
#include <sys/cyclic.h>
#include <sys/x86_archext.h>
#include <sys/mca_x86.h>
@@ -39,7 +44,9 @@
hrtime_t gcpu_mca_poll_interval = NANOSEC * 10ULL; /* tuneable */
static cyclic_id_t gcpu_mca_poll_cycid;
-static int gcpu_mca_poll_inits;
+static volatile uint_t gcpu_mca_poll_inits;
+extern int gcpu_poll_trace_always;
+extern uint_t gcpu_poll_trace_nent;
/*
* Return nonzero of the given handle should poll the MCH. We stick with
@@ -280,7 +287,24 @@ gcpu_mca_poll_init(cmi_hdl_t hdl)
gcpu_poll_trace_init(ptc);
- gcpu_mca_poll_inits++;
+ atomic_inc_uint(&gcpu_mca_poll_inits);
+}
+
+/* deconfigure gcpu_mca_poll_init() */
+void
+gcpu_mca_poll_fini(cmi_hdl_t hdl)
+{
+ gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
+ gcpu_poll_trace_ctl_t *ptc = &gcpu->gcpu_mca.gcpu_polltrace;
+
+ ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE);
+
+ if (gcpu_poll_trace_always && (ptc->mptc_tbufs != NULL)) {
+ kmem_free(ptc->mptc_tbufs, sizeof (gcpu_poll_trace_t) *
+ gcpu_poll_trace_nent);
+ }
+
+ atomic_dec_uint(&gcpu_mca_poll_inits);
}
void
diff --git a/usr/src/uts/i86pc/dr/Makefile b/usr/src/uts/i86pc/dr/Makefile
new file mode 100644
index 0000000000..03aa5f615e
--- /dev/null
+++ b/usr/src/uts/i86pc/dr/Makefile
@@ -0,0 +1,104 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# This makefile drives the production of the dr driver module.
+#
+# i86pc architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+PLATFORM = i86pc
+
+#
+# Define the module and object file sets.
+#
+MODULE = dr
+OBJECTS = $(DR_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(DR_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/i86pc/io/dr
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Overrides
+#
+DEF_BUILDS = $(DEF_BUILDS64)
+ALL_BUILDS = $(ALL_BUILDS64)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# module dependencies
+#
+LDFLAGS += -dy -Nmisc/drmach_acpi
+
+CLEANFILES += $(DR_GENERR)
+CLEANFILES += $(DR_IO)/dr_err.c
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS) lint32
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS) $(CONF_INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
+
diff --git a/usr/src/uts/i86pc/drmach_acpi/Makefile b/usr/src/uts/i86pc/drmach_acpi/Makefile
new file mode 100644
index 0000000000..24f9197b32
--- /dev/null
+++ b/usr/src/uts/i86pc/drmach_acpi/Makefile
@@ -0,0 +1,101 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# This makefile drives the production of the drmach_acpi loadable module.
+#
+# i86pc architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = drmach_acpi
+OBJECTS = $(DRMACH_ACPI_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(DRMACH_ACPI_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Overrides
+#
+DEF_BUILDS = $(DEF_BUILDS64)
+ALL_BUILDS = $(ALL_BUILDS64)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# module dependencies
+#
+LDFLAGS += -dy -Nmisc/acpica -Nmisc/acpidev
+
+CLEANFILES += $(DRMACH_GENERR)
+CLEANFILES += $(DRMACH_IO)/drmach_err.c
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS) lint32
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c
index 33de316493..5c58266634 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -54,11 +54,13 @@
#include <sys/types.h>
#include <sys/atomic.h>
+#include <sys/note.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
#include <sys/acpidev.h>
+#include <sys/acpidev_dr.h>
#include <sys/acpidev_impl.h>
static ACPI_STATUS acpidev_container_probe(acpidev_walk_info_t *infop);
@@ -124,7 +126,7 @@ static acpidev_filter_rule_t acpidev_container_filters[] = {
static ACPI_STATUS
acpidev_container_probe(acpidev_walk_info_t *infop)
{
- ACPI_STATUS rc;
+ ACPI_STATUS rc = AE_OK;
int flags;
ASSERT(infop != NULL);
@@ -137,19 +139,35 @@ acpidev_container_probe(acpidev_walk_info_t *infop)
return (AE_OK);
}
- if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u in "
+ flags = ACPIDEV_PROCESS_FLAG_SCAN;
+ switch (infop->awi_op_type) {
+ case ACPIDEV_OP_BOOT_PROBE:
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_CONTAINER)) {
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE;
+ acpidev_dr_check(infop);
+ }
+ break;
+
+ case ACPIDEV_OP_BOOT_REPROBE:
+ break;
+
+ case ACPIDEV_OP_HOTPLUG_PROBE:
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_CONTAINER)) {
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+ ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+ ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+ }
+ break;
+
+ default:
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u in "
"acpidev_container_probe().", infop->awi_op_type);
rc = AE_BAD_PARAMETER;
+ break;
+ }
+
+ if (rc == AE_OK) {
+ rc = acpidev_process_object(infop, flags);
}
if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
cmn_err(CE_WARN,
@@ -162,11 +180,12 @@ acpidev_container_probe(acpidev_walk_info_t *infop)
return (rc);
}
-/*ARGSUSED*/
static ACPI_STATUS
acpidev_container_search_dev(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
void **retval)
{
+ _NOTE(ARGUNUSED(hdl, retval));
+
int *fp = (int *)ctx;
*fp = lvl;
@@ -221,11 +240,11 @@ acpidev_container_filter_func(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
}
if (cpu_lvl == 1) {
/* CPU as child, most likely a physical CPU. */
- (void) strncpy(devname, ACPIDEV_NODE_NAME_MODULE_CPU,
+ (void) strlcpy(devname, ACPIDEV_NODE_NAME_MODULE_CPU,
devnamelen);
} else if (cpu_lvl == 2 && module_lvl == 1) {
/* CPU as grandchild, most likely a system board. */
- (void) strncpy(devname, ACPIDEV_NODE_NAME_MODULE_SBD,
+ (void) strlcpy(devname, ACPIDEV_NODE_NAME_MODULE_SBD,
devnamelen);
} else if (ACPI_FAILURE(AcpiGetName(infop->awi_hdl,
ACPI_SINGLE_NAME, &buf))) {
@@ -233,7 +252,7 @@ acpidev_container_filter_func(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
* Failed to get ACPI object name; use ACPI object name
* as the default name.
*/
- (void) strncpy(devname, ACPIDEV_NODE_NAME_CONTAINER,
+ (void) strlcpy(devname, ACPIDEV_NODE_NAME_CONTAINER,
devnamelen);
}
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c
index 0576591147..fa93871fb4 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -43,6 +43,7 @@
#include <sys/bootconf.h>
#include <sys/cpuvar.h>
#include <sys/machsystm.h>
+#include <sys/note.h>
#include <sys/psm_types.h>
#include <sys/x86_archext.h>
#include <sys/sunddi.h>
@@ -70,11 +71,14 @@ static ACPI_STATUS acpidev_cpu_probe(acpidev_walk_info_t *infop);
static acpidev_filter_result_t acpidev_cpu_filter(acpidev_walk_info_t *infop,
char *devname, int maxlen);
static ACPI_STATUS acpidev_cpu_init(acpidev_walk_info_t *infop);
+static void acpidev_cpu_fini(ACPI_HANDLE hdl, acpidev_data_handle_t dhdl,
+ acpidev_class_t *clsp);
static acpidev_filter_result_t acpidev_cpu_filter_func(
acpidev_walk_info_t *infop, ACPI_HANDLE hdl, acpidev_filter_rule_t *afrp,
char *devname, int len);
-static int acpidev_cpu_query_dip(cpu_t *, dev_info_t **);
+static int acpidev_cpu_create_dip(cpu_t *, dev_info_t **);
+static int acpidev_cpu_get_dip(cpu_t *, dev_info_t **);
/*
* Default class driver for ACPI processor/CPU objects.
@@ -91,7 +95,7 @@ acpidev_class_t acpidev_class_cpu = {
acpidev_cpu_probe, /* adc_probe */
acpidev_cpu_filter, /* adc_filter */
acpidev_cpu_init, /* adc_init */
- NULL, /* adc_fini */
+ acpidev_cpu_fini, /* adc_fini */
};
/*
@@ -133,12 +137,14 @@ static char *acpidev_cpu_uid_formats[] = {
"SCK%x-CPU%x",
};
-static ACPI_HANDLE acpidev_cpu_map_hdl = NULL;
-static uint32_t acpidev_cpu_map_count = 0;
-static struct acpidev_cpu_map_item *acpidev_cpu_map = NULL;
+static ACPI_HANDLE acpidev_cpu_map_hdl;
+static uint32_t acpidev_cpu_map_count;
+static struct acpidev_cpu_map_item *acpidev_cpu_map;
extern int (*psm_cpu_create_devinfo)(cpu_t *, dev_info_t **);
-static int (*psm_cpu_create_devinfo_old)(cpu_t *, dev_info_t **) = NULL;
+static int (*psm_cpu_create_devinfo_old)(cpu_t *, dev_info_t **);
+extern int (*psm_cpu_get_devinfo)(cpu_t *, dev_info_t **);
+static int (*psm_cpu_get_devinfo_old)(cpu_t *, dev_info_t **);
/* Count how many enabled CPUs are in the MADT table. */
static ACPI_STATUS
@@ -200,7 +206,7 @@ acpidev_cpu_parse_MADT(ACPI_SUBTABLE_HEADER *ap, void *context)
/* See comment at beginning about 255 limitation. */
if (mpx2a->LocalApicId < 255) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: encountered CPU with X2APIC Id < 255.");
+ "!acpidev: encountered CPU with X2APIC Id < 255.");
} else if (mpx2a->LapicFlags & ACPI_MADT_ENABLED) {
ASSERT(*cntp < acpidev_cpu_map_count);
acpidev_cpu_map[*cntp].proc_id = mpx2a->Uid;
@@ -270,7 +276,7 @@ acpidev_cpu_query_MAT(ACPI_SUBTABLE_HEADER *ap, void *context)
}
return (AE_CTRL_TERMINATE);
} else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: encountered CPU "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: encountered CPU "
"with X2APIC Id < 255 in _MAT.");
}
break;
@@ -292,7 +298,7 @@ acpidev_cpu_query_MAT(ACPI_SUBTABLE_HEADER *ap, void *context)
* x2APIC and x2APIC NMI Structure.
*/
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: unknown APIC entry type %u in _MAT.", ap->Type);
+ "!acpidev: unknown APIC entry type %u in _MAT.", ap->Type);
break;
}
@@ -313,7 +319,7 @@ acpidev_cpu_get_procid(acpidev_walk_info_t *infop, uint32_t *idp)
if (infop->awi_info->Type != ACPI_TYPE_PROCESSOR &&
infop->awi_info->Type != ACPI_TYPE_DEVICE) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: object %s is not PROCESSOR or DEVICE.",
+ "!acpidev: object %s is not PROCESSOR or DEVICE.",
infop->awi_name);
return (AE_BAD_PARAMETER);
}
@@ -345,7 +351,7 @@ acpidev_cpu_get_procid(acpidev_walk_info_t *infop, uint32_t *idp)
return (AE_OK);
} else {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to evaluate ACPI object %s.",
+ "!acpidev: failed to evaluate ACPI object %s.",
infop->awi_name);
}
}
@@ -365,6 +371,59 @@ acpidev_cpu_get_procid(acpidev_walk_info_t *infop, uint32_t *idp)
}
static ACPI_STATUS
+acpidev_cpu_get_proximity_id(ACPI_HANDLE hdl, uint32_t apicid, uint32_t *pxmidp)
+{
+ int len, off;
+ ACPI_SUBTABLE_HEADER *sp;
+ ACPI_SRAT_CPU_AFFINITY *xp;
+ ACPI_SRAT_X2APIC_CPU_AFFINITY *x2p;
+
+ ASSERT(hdl != NULL);
+ ASSERT(pxmidp != NULL);
+ *pxmidp = UINT32_MAX;
+
+ if (ACPI_SUCCESS(acpidev_eval_pxm(hdl, pxmidp))) {
+ return (AE_OK);
+ }
+ if (acpidev_srat_tbl_ptr == NULL) {
+ return (AE_NOT_FOUND);
+ }
+
+ /* Search the static ACPI SRAT table for proximity domain id. */
+ sp = (ACPI_SUBTABLE_HEADER *)(acpidev_srat_tbl_ptr + 1);
+ len = acpidev_srat_tbl_ptr->Header.Length;
+ off = sizeof (*acpidev_srat_tbl_ptr);
+ while (off < len) {
+ switch (sp->Type) {
+ case ACPI_SRAT_TYPE_CPU_AFFINITY:
+ xp = (ACPI_SRAT_CPU_AFFINITY *)sp;
+ if ((xp->Flags & ACPI_SRAT_CPU_ENABLED) &&
+ xp->ApicId == apicid) {
+ *pxmidp = xp->ProximityDomainLo;
+ *pxmidp |= xp->ProximityDomainHi[0] << 8;
+ *pxmidp |= xp->ProximityDomainHi[1] << 16;
+ *pxmidp |= xp->ProximityDomainHi[2] << 24;
+ return (AE_OK);
+ }
+ break;
+
+ case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
+ x2p = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)sp;
+ if ((x2p->Flags & ACPI_SRAT_CPU_ENABLED) &&
+ x2p->ApicId == apicid) {
+ *pxmidp = x2p->ProximityDomain;
+ return (AE_OK);
+ }
+ break;
+ }
+ off += sp->Length;
+ sp = (ACPI_SUBTABLE_HEADER *)(((char *)sp) + sp->Length);
+ }
+
+ return (AE_NOT_FOUND);
+}
+
+static ACPI_STATUS
acpidev_cpu_pre_probe(acpidev_walk_info_t *infop)
{
uint32_t count = 0;
@@ -373,6 +432,7 @@ acpidev_cpu_pre_probe(acpidev_walk_info_t *infop)
ASSERT(infop != NULL);
if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE &&
acpidev_cpu_map_hdl == NULL) {
+ /* Parse CPU relative information in the ACPI MADT table. */
(void) acpidev_walk_apic(NULL, NULL, NULL,
acpidev_cpu_count_MADT, &acpidev_cpu_map_count);
acpidev_cpu_map = kmem_zalloc(sizeof (acpidev_cpu_map[0])
@@ -381,6 +441,12 @@ acpidev_cpu_pre_probe(acpidev_walk_info_t *infop)
acpidev_cpu_parse_MADT, &count);
ASSERT(count == acpidev_cpu_map_count);
acpidev_cpu_map_hdl = infop->awi_hdl;
+
+ /* Cache pointer to the ACPI SRAT table. */
+ if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_SRAT, 1,
+ (ACPI_TABLE_HEADER **)&acpidev_srat_tbl_ptr))) {
+ acpidev_srat_tbl_ptr = NULL;
+ }
}
return (AE_OK);
@@ -404,7 +470,9 @@ acpidev_cpu_post_probe(acpidev_walk_info_t *infop)
/* replace psm_cpu_create_devinfo with local implementation. */
psm_cpu_create_devinfo_old = psm_cpu_create_devinfo;
- psm_cpu_create_devinfo = acpidev_cpu_query_dip;
+ psm_cpu_create_devinfo = acpidev_cpu_create_dip;
+ psm_cpu_get_devinfo_old = psm_cpu_get_devinfo;
+ psm_cpu_get_devinfo = acpidev_cpu_get_dip;
}
return (AE_OK);
@@ -427,27 +495,40 @@ acpidev_cpu_probe(acpidev_walk_info_t *infop)
return (AE_OK);
}
- /*
- * Mark device as offline. It will be changed to online state
- * when the corresponding CPU starts up.
- */
- if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN |
- ACPIDEV_PROCESS_FLAG_CREATE |
- ACPIDEV_PROCESS_FLAG_OFFLINE;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN |
- ACPIDEV_PROCESS_FLAG_CREATE |
- ACPIDEV_PROCESS_FLAG_OFFLINE;
- rc = acpidev_process_object(infop, flags);
- } else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u in "
+ flags = ACPIDEV_PROCESS_FLAG_SCAN;
+ switch (infop->awi_op_type) {
+ case ACPIDEV_OP_BOOT_PROBE:
+ /*
+ * Mark device as offline. It will be changed to online state
+ * when the corresponding CPU starts up.
+ */
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_CPU)) {
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+ ACPIDEV_PROCESS_FLAG_OFFLINE;
+ }
+ break;
+
+ case ACPIDEV_OP_BOOT_REPROBE:
+ break;
+
+ case ACPIDEV_OP_HOTPLUG_PROBE:
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_CPU)) {
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+ ACPIDEV_PROCESS_FLAG_OFFLINE |
+ ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+ ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+ }
+ break;
+
+ default:
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u in "
"acpidev_cpu_probe().", infop->awi_op_type);
rc = AE_BAD_PARAMETER;
+ break;
+ }
+
+ if (rc == AE_OK) {
+ rc = acpidev_process_object(infop, flags);
}
if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
cmn_err(CE_WARN,
@@ -474,12 +555,12 @@ acpidev_cpu_filter_func(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
if (acpidev_cpu_get_procid(infop, &procid) != 0) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to query processor id for %s.",
+ "!acpidev: failed to query processor id for %s.",
infop->awi_name);
return (ACPIDEV_FILTER_SKIP);
} else if (acpidev_cpu_get_apicid(procid, &apicid) != 0) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to query apic id for %s.",
+ "!acpidev: failed to query apic id for %s.",
infop->awi_name);
return (ACPIDEV_FILTER_SKIP);
}
@@ -499,7 +580,7 @@ acpidev_cpu_filter_func(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
return (ACPIDEV_FILTER_SKIP);
} else if (!mat.enabled) {
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: CPU %s has been disabled.",
+ "!acpidev: CPU %s has been disabled.",
infop->awi_name);
return (ACPIDEV_FILTER_SKIP);
}
@@ -536,6 +617,7 @@ static ACPI_STATUS
acpidev_cpu_init(acpidev_walk_info_t *infop)
{
int count;
+ uint32_t pxmid;
dev_info_t *dip;
ACPI_HANDLE hdl;
char unitaddr[64];
@@ -550,7 +632,7 @@ acpidev_cpu_init(acpidev_walk_info_t *infop)
dip = infop->awi_dip;
hdl = infop->awi_hdl;
- /* Create "apic_id" and "processor_id" properties. */
+ /* Create "apic_id", "processor_id" and "proximity_id" properties. */
if (ndi_prop_update_int(DDI_DEV_T_NONE, dip,
ACPIDEV_PROP_NAME_PROCESSOR_ID, infop->awi_scratchpad[0]) !=
NDI_SUCCESS) {
@@ -567,6 +649,15 @@ acpidev_cpu_init(acpidev_walk_info_t *infop)
infop->awi_name);
return (AE_ERROR);
}
+ if (ACPI_SUCCESS(acpidev_cpu_get_proximity_id(infop->awi_hdl,
+ infop->awi_scratchpad[1], &pxmid))) {
+ if (ndi_prop_update_int(DDI_DEV_T_NONE, dip,
+ ACPIDEV_PROP_NAME_PROXIMITY_ID, pxmid) != NDI_SUCCESS) {
+ cmn_err(CE_WARN, "!acpidev: failed to set proximity id "
+ "property for %s.", infop->awi_name);
+ return (AE_ERROR);
+ }
+ }
/* Set "compatible" property for CPU dip */
count = sizeof (compatible) / sizeof (compatible[0]);
@@ -616,7 +707,7 @@ acpidev_cpu_init(acpidev_walk_info_t *infop)
}
} else {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: unknown operation type %u in acpidev_cpu_init.",
+ "!acpidev: unknown operation type %u in acpidev_cpu_init.",
infop->awi_op_type);
return (AE_BAD_PARAMETER);
}
@@ -624,18 +715,38 @@ acpidev_cpu_init(acpidev_walk_info_t *infop)
return (AE_OK);
}
+static void
+acpidev_cpu_fini(ACPI_HANDLE hdl, acpidev_data_handle_t dhdl,
+ acpidev_class_t *clsp)
+{
+ _NOTE(ARGUNUSED(clsp, dhdl));
+
+ int rc;
+ uint32_t procid;
+
+ rc = acpica_get_procid_by_object(hdl, &procid);
+ ASSERT(ACPI_SUCCESS(rc));
+ if (ACPI_SUCCESS(rc)) {
+ rc = acpica_remove_processor_from_map(procid);
+ ASSERT(ACPI_SUCCESS(rc));
+ if (ACPI_FAILURE(rc)) {
+ cmn_err(CE_WARN, "!acpidev: failed to remove "
+ "processor from ACPICA.");
+ }
+ }
+}
+
+/*
+ * Lookup the dip for a CPU if ACPI CPU autoconfig is enabled.
+ */
static int
-acpidev_cpu_query_dip(cpu_t *cp, dev_info_t **dipp)
+acpidev_cpu_lookup_dip(cpu_t *cp, dev_info_t **dipp)
{
uint32_t apicid;
ACPI_HANDLE hdl;
dev_info_t *dip = NULL;
*dipp = NULL;
- /*
- * Try to get the dip associated with the CPU if ACPI_DEVCFG_CPU is
- * enabled.
- */
if (acpica_get_devcfg_feature(ACPI_DEVCFG_CPU)) {
apicid = cpuid_get_apicid(cp);
if (acpica_get_cpu_object_by_cpuid(cp->cpu_id, &hdl) == 0 ||
@@ -644,18 +755,41 @@ acpidev_cpu_query_dip(cpu_t *cp, dev_info_t **dipp)
ASSERT(hdl != NULL);
if (ACPI_SUCCESS(acpica_get_devinfo(hdl, &dip))) {
ASSERT(dip != NULL);
- ndi_hold_devi(dip);
*dipp = dip;
return (PSM_SUCCESS);
}
}
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to lookup dip for cpu %d(%p).",
+ cp->cpu_id, (void *)cp);
}
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to get dip for cpu %d(%p).",
- cp->cpu_id, (void *)cp);
+ return (PSM_FAILURE);
+}
+
+static int
+acpidev_cpu_create_dip(cpu_t *cp, dev_info_t **dipp)
+{
+ if (acpidev_cpu_lookup_dip(cp, dipp) == PSM_SUCCESS) {
+ ndi_hold_devi(*dipp);
+ return (PSM_SUCCESS);
+ }
if (psm_cpu_create_devinfo_old != NULL) {
return (psm_cpu_create_devinfo_old(cp, dipp));
} else {
return (PSM_FAILURE);
}
}
+
+static int
+acpidev_cpu_get_dip(cpu_t *cp, dev_info_t **dipp)
+{
+ if (acpidev_cpu_lookup_dip(cp, dipp) == PSM_SUCCESS) {
+ return (PSM_SUCCESS);
+ }
+ if (psm_cpu_get_devinfo_old != NULL) {
+ return (psm_cpu_get_devinfo_old(cp, dipp));
+ } else {
+ return (PSM_FAILURE);
+ }
+}
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c
index fbf6e944b4..cf65cf4084 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -103,7 +103,7 @@ static acpidev_filter_rule_t acpidev_device_filters[] = {
static ACPI_STATUS
acpidev_device_probe(acpidev_walk_info_t *infop)
{
- ACPI_STATUS rc;
+ ACPI_STATUS rc = AE_OK;
int flags;
ASSERT(infop != NULL);
@@ -114,20 +114,31 @@ acpidev_device_probe(acpidev_walk_info_t *infop)
return (AE_OK);
}
- if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else {
+ flags = ACPIDEV_PROCESS_FLAG_SCAN;
+ switch (infop->awi_op_type) {
+ case ACPIDEV_OP_BOOT_PROBE:
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE;
+ break;
+
+ case ACPIDEV_OP_BOOT_REPROBE:
+ break;
+
+ case ACPIDEV_OP_HOTPLUG_PROBE:
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+ ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+ ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+ break;
+
+ default:
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: unknown operation type %u in "
+ "!acpidev: unknown operation type %u in "
"acpi_device_probe().", infop->awi_op_type);
rc = AE_BAD_PARAMETER;
+ break;
+ }
+
+ if (rc == AE_OK) {
+ rc = acpidev_process_object(infop, flags);
}
if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
cmn_err(CE_WARN,
@@ -153,7 +164,7 @@ acpidev_device_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
ACPIDEV_ARRAY_PARAM(acpidev_device_filters),
devname, maxlen);
} else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
"in acpidev_device_filter().", infop->awi_op_type);
res = ACPIDEV_FILTER_FAILED;
}
@@ -161,7 +172,6 @@ acpidev_device_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
return (res);
}
-/*ARGSUSED*/
static ACPI_STATUS
acpidev_device_init(acpidev_walk_info_t *infop)
{
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_dr.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_dr.c
new file mode 100644
index 0000000000..e346c65cfb
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_dr.c
@@ -0,0 +1,2743 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/cmn_err.h>
+#include <sys/cpuvar.h>
+#include <sys/memlist.h>
+#include <sys/memlist_impl.h>
+#include <sys/note.h>
+#include <sys/obpdefs.h>
+#include <sys/synch.h>
+#include <sys/sysmacros.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/x86_archext.h>
+#include <sys/machsystm.h>
+#include <sys/memnode.h> /* for lgrp_plat_node_cnt */
+#include <sys/psm_types.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_rsc.h>
+#include <sys/acpidev_dr.h>
+#include <sys/acpidev_impl.h>
+
+struct acpidev_dr_set_prop_arg {
+ uint32_t level;
+ uint32_t bdnum;
+ uint32_t cpu_id;
+ uint32_t mem_id;
+ uint32_t io_id;
+ uint32_t mod_id;
+};
+
+struct acpidev_dr_device_remove_arg {
+ uint32_t level;
+};
+
+extern int acpidev_options;
+
+/* User configurable option to enable/disable ACPI based DR operations. */
+int acpidev_dr_enable = 1;
+int acpidev_dr_hierarchy_name = 1;
+uint32_t acpidev_dr_max_segs_per_mem_device = ACPIDEV_DR_SEGS_PER_MEM_DEV;
+uint32_t acpidev_dr_max_memlists_per_seg = ACPIDEV_DR_MEMLISTS_PER_SEG;
+
+ACPI_TABLE_SRAT *acpidev_srat_tbl_ptr;
+ACPI_TABLE_SLIT *acpidev_slit_tbl_ptr;
+
+/* ACPI based DR operations are unsupported if zero. */
+static int acpidev_dr_supported = -1;
+
+/* Failed to initialize support of DR operations if non-zero. */
+static int acpidev_dr_failed;
+
+static volatile uint32_t acpidev_dr_boards;
+static volatile uint32_t acpidev_dr_board_index;
+static uint32_t acpidev_dr_max_cmp_per_board;
+static uint32_t acpidev_dr_max_memory_per_board;
+static uint32_t acpidev_dr_max_io_per_board;
+static uint32_t acpidev_dr_memory_device_cnt;
+
+static ACPI_HANDLE *acpidev_dr_board_handles[ACPIDEV_DR_MAX_BOARDS];
+
+/* Lock to protect/block DR operations at runtime. */
+static kmutex_t acpidev_dr_lock;
+
+static acpidev_dr_capacity_t acpidev_dr_capacities[] = {
+ { /* Nehalem-EX */
+ X86_VENDOR_Intel, 0x6, 0x2e, 0x2e, 0, UINT_MAX,
+ B_TRUE, /* Hotplug capable */
+ 1ULL << 30, /* Align on 1GB boundary */
+ },
+ { /* the last item is used to mark end of the table */
+ UINT_MAX, UINT_MAX, UINT_MAX, 0, UINT_MAX, 0,
+ B_FALSE,
+ 0,
+ },
+};
+
+static ACPI_STATUS acpidev_dr_scan_topo(ACPI_HANDLE hdl, UINT32 lvl, void *arg,
+ void **retval);
+
+static acpidev_dr_capacity_t *
+acpidev_dr_get_capacity(void)
+{
+ acpidev_dr_capacity_t *cp, *cp1;
+ uint_t vendor, family, model, step;
+ static acpidev_dr_capacity_t *acpidev_dr_capacity_curr = NULL;
+
+ if (acpidev_dr_capacity_curr != NULL) {
+ return (acpidev_dr_capacity_curr);
+ }
+
+ kpreempt_disable();
+ vendor = cpuid_getvendor(CPU);
+ family = cpuid_getfamily(CPU);
+ model = cpuid_getmodel(CPU);
+ step = cpuid_getstep(CPU);
+ kpreempt_enable();
+
+ for (cp = acpidev_dr_capacities; ; cp++) {
+ ASSERT(cp < acpidev_dr_capacities +
+ sizeof (acpidev_dr_capacities) / sizeof (*cp));
+
+ /* Check whether it reaches the last item of the table. */
+ if (cp->cpu_vendor == UINT_MAX && cp->cpu_family == UINT_MAX &&
+ cp->cpu_model_min == UINT_MAX && cp->cpu_model_max == 0 &&
+ cp->cpu_step_min == UINT_MAX && cp->cpu_step_max == 0) {
+ break;
+ }
+ if (cp->cpu_vendor == vendor && cp->cpu_family == family &&
+ model >= cp->cpu_model_min && model <= cp->cpu_model_max &&
+ step >= cp->cpu_step_min && step <= cp->cpu_step_max) {
+ break;
+ }
+ }
+
+ /* Assume all CPUs in system are homogeneous. */
+ cp1 = atomic_cas_ptr(&acpidev_dr_capacity_curr, NULL, cp);
+ ASSERT(cp1 == NULL || cp1 == cp);
+ if (cp1 != NULL && cp1 != cp) {
+ return (NULL);
+ }
+
+ return (cp);
+}
+
+int
+acpidev_dr_capable(void)
+{
+ uint64_t flags1, flags2;
+ acpidev_dr_capacity_t *cp;
+
+ /*
+ * Disable support of DR operations if:
+ * 1) acpidev fails to initialize DR interfaces.
+ * 2) ACPI based DR has been disabled by user.
+ * 3) No DR capable devices have been detected.
+ * 4) The system doesn't support DR operations.
+ * 5) Some acpidev features have been disabled by user.
+ */
+ if (acpidev_dr_failed != 0 || acpidev_dr_enable == 0 ||
+ acpidev_dr_supported == 0) {
+ return (0);
+ }
+
+ flags1 = ACPI_FEATURE_DEVCFG | ACPI_FEATURE_OSI_MODULE;
+ flags2 = ACPI_DEVCFG_CPU | ACPI_DEVCFG_MEMORY |
+ ACPI_DEVCFG_CONTAINER | ACPI_DEVCFG_PCI;
+ if (acpica_get_core_feature(flags1) != flags1 ||
+ acpica_get_devcfg_feature(flags2) != flags2) {
+ cmn_err(CE_CONT,
+ "?acpidev: disable support of ACPI based DR because "
+ "some acpidev features have been disabled by user.\n");
+ acpidev_dr_supported = 0;
+ return (0);
+ }
+
+ cp = acpidev_dr_get_capacity();
+ if (cp == NULL || cp->hotplug_supported == B_FALSE) {
+ return (0);
+ }
+
+ return (1);
+}
+
+uint32_t
+acpidev_dr_max_boards(void)
+{
+ return (acpidev_dr_boards);
+}
+
+uint32_t
+acpidev_dr_max_io_units_per_board(void)
+{
+ return (acpidev_dr_max_io_per_board);
+}
+
+uint32_t
+acpidev_dr_max_mem_units_per_board(void)
+{
+ return (acpidev_dr_max_memory_per_board);
+}
+
+uint32_t
+acpidev_dr_max_cmp_units_per_board(void)
+{
+ return (acpidev_dr_max_cmp_per_board);
+}
+
+uint32_t
+acpidev_dr_max_cpu_units_per_cmp(void)
+{
+ static int max_cnt;
+
+ if (max_cnt == 0) {
+ kpreempt_disable();
+ max_cnt = cpuid_get_ncpu_per_chip(CPU);
+ kpreempt_enable();
+ }
+
+ return (max_cnt);
+}
+
+uint32_t
+acpidev_dr_max_segments_per_mem_device(void)
+{
+ if (acpidev_dr_max_segs_per_mem_device < 1) {
+ return (ACPIDEV_DR_SEGS_PER_MEM_DEV);
+ } else {
+ return (acpidev_dr_max_segs_per_mem_device);
+ }
+}
+
+uint32_t
+acpidev_dr_max_memlists_per_segment(void)
+{
+ if (acpidev_dr_max_memlists_per_seg < ACPIDEV_DR_MEMLISTS_PER_SEG) {
+ return (ACPIDEV_DR_MEMLISTS_PER_SEG);
+ } else {
+ return (acpidev_dr_max_memlists_per_seg);
+ }
+}
+
+void
+acpidev_dr_init(void)
+{
+ mutex_init(&acpidev_dr_lock, NULL, MUTEX_DRIVER, NULL);
+}
+
+static void
+acpidev_dr_check_board_type(acpidev_data_handle_t dhdl,
+ struct acpidev_dr_set_prop_arg *ap, char *objname)
+{
+ if (dhdl->aod_class_id == ACPIDEV_CLASS_ID_MEMORY) {
+ /* Memory board should have only one memory device. */
+ ASSERT(ap->cpu_id == 0);
+ ASSERT(ap->mem_id == 1);
+ ASSERT(ap->io_id == 0);
+ ASSERT(ap->mod_id == 0);
+ dhdl->aod_bdtype = ACPIDEV_MEMORY_BOARD;
+ } else if (dhdl->aod_class_id == ACPIDEV_CLASS_ID_PCI ||
+ dhdl->aod_class_id == ACPIDEV_CLASS_ID_PCIEX) {
+ /* IO board should have only one IO device. */
+ ASSERT(ap->cpu_id == 0);
+ ASSERT(ap->mem_id == 0);
+ ASSERT(ap->io_id == 1);
+ ASSERT(ap->mod_id == 0);
+ dhdl->aod_bdtype = ACPIDEV_IO_BOARD;
+ } else if (dhdl->aod_class_id == ACPIDEV_CLASS_ID_CONTAINER) {
+ if (ap->mod_id == 1 && ap->mem_id == 0) {
+ dhdl->aod_bdtype = ACPIDEV_CPU_BOARD;
+ } else {
+ dhdl->aod_bdtype = ACPIDEV_SYSTEM_BOARD;
+ }
+ } else {
+ cmn_err(CE_WARN,
+ "!acpidev: unknown type of hotplug capable board %s.",
+ objname);
+ ASSERT(0);
+ }
+}
+
+/*
+ * Check for hotplug capable boards and create environment to support
+ * ACPI based DR operations. No need to acquire lock here, it's called
+ * from single-threaded context during boot.
+ */
+void
+acpidev_dr_check(acpidev_walk_info_t *infop)
+{
+ uint_t cmp;
+ boolean_t found = B_FALSE;
+ ACPI_HANDLE phdl;
+ acpidev_data_handle_t dhdl, pdhdl;
+ struct acpidev_dr_set_prop_arg arg;
+
+ if (infop == NULL ||
+ infop->awi_op_type != ACPIDEV_OP_BOOT_PROBE) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: invalid parameter to acpidev_dr_check().");
+ return;
+ }
+
+ if (acpidev_dr_capable() == 0) {
+ return;
+ }
+
+ dhdl = infop->awi_data;
+ ASSERT(dhdl != NULL);
+
+ /* This device has already been handled before. */
+ if (ACPIDEV_DR_IS_PROCESSED(dhdl)) {
+ return;
+ }
+
+ /*
+ * It implies that the device is hotplug capable if ACPI _EJ0 method
+ * is available.
+ */
+ if (!ACPIDEV_DR_IS_BOARD(dhdl) &&
+ acpidev_dr_device_hotplug_capable(infop->awi_hdl)) {
+ ACPIDEV_DR_SET_BOARD(dhdl);
+ }
+
+ /* All things are done if the device isn't hotplug capable. */
+ if (!ACPIDEV_DR_IS_BOARD(dhdl)) {
+ return;
+ }
+
+ /* Check whether hardware topology is supported or not. */
+ if (ACPI_FAILURE(acpidev_dr_scan_topo(infop->awi_hdl, 0, NULL,
+ NULL))) {
+ ACPIDEV_DR_SET_FAILED(dhdl);
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: hardware topology under %s "
+ "is unsupported for DR operations.", infop->awi_name);
+ return;
+ }
+
+ /* Generate board/index/port number for the hotplug capable board. */
+ dhdl->aod_bdnum = atomic_inc_32_nv(&acpidev_dr_boards) - 1;
+ dhdl->aod_portid = 0;
+ phdl = infop->awi_hdl;
+ while (ACPI_SUCCESS(AcpiGetParent(phdl, &phdl)) &&
+ phdl != ACPI_ROOT_OBJECT) {
+ pdhdl = acpidev_data_get_handle(phdl);
+ if (pdhdl != NULL && ACPIDEV_DR_IS_BOARD(pdhdl)) {
+ dhdl->aod_bdidx = atomic_inc_32_nv(&pdhdl->aod_chidx);
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (found == B_FALSE) {
+ dhdl->aod_bdidx = atomic_inc_32_nv(&acpidev_dr_board_index);
+ }
+ dhdl->aod_bdidx -= 1;
+
+ /* Found too many hotplug capable boards. */
+ if (dhdl->aod_bdnum >= ACPIDEV_DR_MAX_BOARDS) {
+ ACPIDEV_DR_SET_FAILED(dhdl);
+ cmn_err(CE_WARN, "!acpidev: too many hotplug capable boards, "
+ "max %d, found %d.",
+ ACPIDEV_DR_MAX_BOARDS, dhdl->aod_bdnum + 1);
+ return;
+ }
+
+ /* Scan all descendant devices to prepare info for DR operations. */
+ bzero(&arg, sizeof (arg));
+ arg.bdnum = dhdl->aod_bdnum;
+ arg.level = infop->awi_level;
+ if (ACPI_FAILURE(acpidev_dr_scan_topo(infop->awi_hdl, 0, &arg,
+ NULL))) {
+ ACPIDEV_DR_SET_FAILED(dhdl);
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: failed to set DR properties "
+ "for descendants of %s.", infop->awi_name);
+ return;
+ }
+
+ /* Get type of the hotplug capable board. */
+ acpidev_dr_check_board_type(dhdl, &arg, infop->awi_name);
+
+ /*
+ * Save ACPI handle of the hotplug capable board to speed up lookup
+ * board handle if caching is enabled.
+ */
+ if ((acpidev_options & ACPIDEV_OUSER_NO_CACHE) == 0) {
+ acpidev_dr_board_handles[dhdl->aod_bdnum] = infop->awi_hdl;
+ }
+
+ /* Update system maximum DR capabilities. */
+ cmp = (arg.cpu_id + acpidev_dr_max_cpu_units_per_cmp() - 1);
+ cmp /= acpidev_dr_max_cpu_units_per_cmp();
+ if (cmp > acpidev_dr_max_cmp_per_board) {
+ acpidev_dr_max_cmp_per_board = cmp;
+ }
+ if (arg.mem_id > acpidev_dr_max_memory_per_board) {
+ acpidev_dr_max_memory_per_board = arg.mem_id;
+ }
+ if (arg.io_id > acpidev_dr_max_io_per_board) {
+ acpidev_dr_max_io_per_board = arg.io_id;
+ }
+}
+
+static void
+acpidev_dr_initialize_memory_hotplug(void)
+{
+ caddr_t buf;
+ uint32_t cnt;
+ acpidev_dr_capacity_t *cp;
+
+ /*
+ * We have already checked that the platform supports DR operations.
+ */
+ cp = acpidev_dr_get_capacity();
+ ASSERT(cp != NULL && cp->hotplug_supported);
+ ASSERT(ISP2(cp->memory_alignment));
+ ASSERT(cp->memory_alignment > MMU_PAGESIZE);
+ mem_node_physalign = cp->memory_alignment;
+
+ /* Pre-populate memlist cache. */
+ cnt = acpidev_dr_memory_device_cnt;
+ cnt *= acpidev_dr_max_segments_per_mem_device();
+ cnt *= acpidev_dr_max_memlists_per_segment();
+ if (cnt > ACPIDEV_DR_MAX_MEMLIST_ENTRIES) {
+ cmn_err(CE_WARN, "!acpidev: attempted to reserve too many "
+ "memlist entries (%u), max %u. Falling back to %u and "
+ "some memory hot add operations may fail.",
+ cnt, ACPIDEV_DR_MAX_MEMLIST_ENTRIES,
+ ACPIDEV_DR_MAX_MEMLIST_ENTRIES);
+ cnt = ACPIDEV_DR_MAX_MEMLIST_ENTRIES;
+ }
+ cnt *= sizeof (struct memlist);
+ buf = kmem_zalloc(cnt, KM_SLEEP);
+ memlist_free_block(buf, cnt);
+}
+
+/*
+ * Create pseudo DR control device node if the system is hotplug capable.
+ * No need to acquire lock, it's called from single-threaded context
+ * during boot. pdip has been held by the caller.
+ */
+static ACPI_STATUS
+acpidev_dr_create_node(dev_info_t *pdip)
+{
+ dev_info_t *dip;
+ char unit[32];
+ char *path;
+ char *comps[] = {
+ "acpidr_sbd",
+ };
+
+ /*
+ * Disable support of DR operations if no hotplug capable board has
+ * been detected.
+ */
+ if (acpidev_dr_boards == 0) {
+ acpidev_dr_supported = 0;
+ } else {
+ acpidev_dr_supported = 1;
+ }
+
+ /*
+ * Don't create control device node if the system isn't hotplug capable.
+ */
+ if (acpidev_dr_capable() == 0) {
+ return (AE_SUPPORT);
+ }
+
+ /* Cache pointer to the ACPI SLIT table. */
+ if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_SLIT, 1,
+ (ACPI_TABLE_HEADER **)&acpidev_slit_tbl_ptr))) {
+ acpidev_slit_tbl_ptr = NULL;
+ }
+ if (acpidev_srat_tbl_ptr == NULL || acpidev_slit_tbl_ptr == NULL) {
+ if (lgrp_plat_node_cnt != 1) {
+ /*
+ * Disable support of CPU/memory DR operations if lgrp
+ * is enabled but failed to cache SRAT/SLIT table
+ * pointers.
+ */
+ cmn_err(CE_WARN,
+ "!acpidev: failed to get ACPI SRAT/SLIT table.");
+ plat_dr_disable_cpu();
+ plat_dr_disable_memory();
+ }
+ }
+
+ ndi_devi_alloc_sleep(pdip, ACPIDEV_NODE_NAME_ACPIDR,
+ (pnode_t)DEVI_PSEUDO_NODEID, &dip);
+
+ /* Set "unit-address" device property. */
+ (void) snprintf(unit, sizeof (unit), "%u", 0);
+ if (ndi_prop_update_string(DDI_DEV_T_NONE, dip,
+ ACPIDEV_PROP_NAME_UNIT_ADDR, unit) != NDI_SUCCESS) {
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ cmn_err(CE_CONT,
+ "?acpidev: failed to set unit-address property for %s.\n",
+ ddi_pathname(dip, path));
+ kmem_free(path, MAXPATHLEN);
+ (void) ddi_remove_child(dip, 0);
+ acpidev_dr_failed = 1;
+ return (AE_ERROR);
+ }
+
+ /* Set "compatible" device property. */
+ if (ndi_prop_update_string_array(DDI_DEV_T_NONE, dip, OBP_COMPATIBLE,
+ comps, sizeof (comps) / sizeof (comps[0])) != NDI_SUCCESS) {
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ cmn_err(CE_CONT, "?acpidev: failed to set compatible "
+ "property for %s.\n", ddi_pathname(dip, path));
+ kmem_free(path, MAXPATHLEN);
+ (void) ddi_remove_child(dip, 0);
+ acpidev_dr_failed = 1;
+ return (AE_ERROR);
+ }
+
+ (void) ndi_devi_bind_driver(dip, 0);
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_initialize(dev_info_t *pdip)
+{
+ ACPI_STATUS rc;
+
+ rc = acpidev_dr_create_node(pdip);
+ if (ACPI_FAILURE(rc)) {
+ return (rc);
+ }
+
+ /* Initialize support of memory DR operations. */
+ if (plat_dr_support_memory()) {
+ acpidev_dr_initialize_memory_hotplug();
+ }
+
+ /* Mark the DR subsystem is ready for use. */
+ plat_dr_enable();
+
+ return (AE_OK);
+}
+
+static ACPI_STATUS
+acpidev_dr_find_board(ACPI_HANDLE hdl, uint_t lvl, void *ctx, void **retval)
+{
+ _NOTE(ARGUNUSED(lvl));
+
+ acpidev_data_handle_t dhdl;
+
+ ASSERT(hdl != NULL);
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ /* No data handle available, not ready for DR operations. */
+ return (AE_CTRL_DEPTH);
+ } else if (ACPIDEV_DR_IS_BOARD(dhdl) && ACPIDEV_DR_IS_WORKING(dhdl) &&
+ dhdl->aod_bdnum == (intptr_t)ctx) {
+ ASSERT(retval != NULL);
+ *(ACPI_HANDLE *)retval = hdl;
+ return (AE_CTRL_TERMINATE);
+ }
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_get_board_handle(uint_t board, ACPI_HANDLE *hdlp)
+{
+ ACPI_STATUS rc = AE_OK;
+ ACPI_HANDLE hdl;
+
+ ASSERT(hdlp != NULL);
+ if (hdlp == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_get_board_handle().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ if (board >= acpidev_dr_boards) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: board number %d is out of range, max %d.",
+ board, acpidev_dr_boards);
+ return (AE_NOT_FOUND);
+ }
+
+ /* Use cached handles if caching is enabled. */
+ if ((acpidev_options & ACPIDEV_OUSER_NO_CACHE) == 0) {
+ if (acpidev_dr_board_handles[board] != NULL) {
+ hdl = acpidev_dr_board_handles[board];
+ if (ACPI_FAILURE(acpidev_dr_find_board(hdl, 1,
+ (void *)(intptr_t)board, (void **)hdlp)) &&
+ *hdlp != NULL) {
+ return (AE_OK);
+ }
+ }
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: board %d doesn't exist.", board);
+ *hdlp = NULL;
+ return (AE_NOT_FOUND);
+ }
+
+ /* All hotplug capable boards should exist under \_SB_. */
+ if (ACPI_FAILURE(AcpiGetHandle(ACPI_ROOT_OBJECT,
+ ACPIDEV_OBJECT_NAME_SB, &hdl))) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get handle of %s.",
+ ACPIDEV_OBJECT_NAME_SB);
+ return (AE_ERROR);
+ }
+
+ *hdlp = NULL;
+ if (ACPI_FAILURE(AcpiWalkNamespace(ACPI_TYPE_DEVICE, hdl,
+ ACPIDEV_MAX_ENUM_LEVELS - 1, acpidev_dr_find_board, NULL,
+ (void *)(intptr_t)board, (void **)hdlp))) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to find ACPI handle "
+ "for board %d.", board);
+ rc = AE_NOT_FOUND;
+ } else if (*hdlp == NULL) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: board %d doesn't exist.", board);
+ rc = AE_NOT_FOUND;
+ }
+
+ return (rc);
+}
+
+acpidev_board_type_t
+acpidev_dr_get_board_type(ACPI_HANDLE hdl)
+{
+ acpidev_data_handle_t dhdl;
+ acpidev_board_type_t type = ACPIDEV_INVALID_BOARD;
+
+ if (hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_get_board_type().");
+ return (type);
+ }
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data associated with %p.", hdl);
+ } else {
+ type = dhdl->aod_bdtype;
+ }
+
+ return (type);
+}
+
+ACPI_STATUS
+acpidev_dr_get_board_number(ACPI_HANDLE hdl, uint32_t *bnump)
+{
+ acpidev_data_handle_t dhdl;
+
+ if (hdl == NULL || bnump == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_get_board_number().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data associated with %p.", hdl);
+ return (AE_ERROR);
+ }
+ *bnump = dhdl->aod_bdnum;
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_get_board_name(ACPI_HANDLE hdl, char *buf, size_t len)
+{
+ char *fmt;
+ int count = 0;
+ size_t rlen = 0;
+ ACPI_HANDLE thdl;
+ acpidev_data_handle_t dhdl;
+ acpidev_data_handle_t dhdls[ACPIDEV_MAX_ENUM_LEVELS];
+
+ if (hdl == NULL || buf == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_get_board_name().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ /* Find ancestors of the device which are hotplug capable. */
+ for (thdl = hdl; thdl != NULL; ) {
+ dhdl = acpidev_data_get_handle(thdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get data "
+ "associated with %p.", thdl);
+ return (AE_ERROR);
+ }
+
+ if (!ACPIDEV_DR_IS_BOARD(dhdl)) {
+ /* The board itself should be hotplug capable. */
+ if (count == 0) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is "
+ "not hotplug capable.", thdl);
+ return (AE_ERROR);
+ }
+ } else {
+ if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is "
+ "in the FAILED state.", thdl);
+ }
+
+ if (count >= ACPIDEV_MAX_ENUM_LEVELS) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: recursive level for hotplug "
+ "capable board is too deep.");
+ return (AE_ERROR);
+ }
+
+ dhdls[count] = dhdl;
+ count++;
+ }
+
+ if (acpidev_dr_hierarchy_name == 0) {
+ thdl = NULL;
+ } else if (ACPI_FAILURE(AcpiGetParent(thdl, &thdl))) {
+ thdl = NULL;
+ }
+ }
+
+ /* Generate hierarchy board name for the board. */
+ ASSERT(count > 0);
+ for (count--; count >= 0 && rlen < len; count--) {
+ dhdl = dhdls[count];
+ switch (dhdl->aod_bdtype) {
+ case ACPIDEV_CPU_BOARD:
+ fmt = ACPIDEV_DR_CPU_BD_FMT;
+ break;
+ case ACPIDEV_MEMORY_BOARD:
+ fmt = ACPIDEV_DR_MEMORY_BD_FMT;
+ break;
+ case ACPIDEV_IO_BOARD:
+ fmt = ACPIDEV_DR_IO_BD_FMT;
+ break;
+ case ACPIDEV_SYSTEM_BOARD:
+ fmt = ACPIDEV_DR_SYSTEM_BD_FMT;
+ break;
+ case ACPIDEV_INVALID_BOARD:
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid board type.");
+ return (AE_ERROR);
+ default:
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: unknown board type %u.",
+ dhdl->aod_bdtype);
+ return (AE_ERROR);
+ }
+
+ /* Add "." before component name except first item. */
+ if (rlen != 0) {
+ rlen += snprintf(buf + rlen, len - rlen, ".");
+ }
+ if (rlen < len) {
+ rlen += snprintf(buf + rlen, len - rlen, fmt,
+ dhdl->aod_bdidx);
+ }
+ }
+
+ /* Check whether the buffer is sufficient. */
+ if (rlen >= len) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: buffer length to "
+ "acpidev_dr_get_board_name() is too small.");
+ return (AE_NO_MEMORY);
+ }
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_get_attachment_point(ACPI_HANDLE hdl, char *buf, size_t len)
+{
+ size_t rlen;
+
+ if (hdl == NULL || buf == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_get_attachment_point().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ rlen = snprintf(buf, len, "/devices/%s/%s@%u:",
+ ACPIDEV_NODE_NAME_ROOT, ACPIDEV_NODE_NAME_ACPIDR, 0);
+ if (rlen >= len) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: buffer to "
+ "acpidev_dr_get_attachment_point() is too small.");
+ return (AE_NO_MEMORY);
+ }
+
+ return (acpidev_dr_get_board_name(hdl, buf + rlen, len - rlen));
+}
+
+/*
+ * Existence of ACPI _EJ0 method implies that the device is hotplug capable.
+ */
+int
+acpidev_dr_device_hotplug_capable(ACPI_HANDLE hdl)
+{
+ ACPI_HANDLE ej0;
+
+ ASSERT(hdl != NULL);
+ if (ACPI_FAILURE(AcpiGetHandle(hdl, ACPIDEV_METHOD_NAME_EJ0, &ej0))) {
+ return (0);
+ }
+
+ return (1);
+}
+
+int
+acpidev_dr_device_has_edl(ACPI_HANDLE hdl)
+{
+ ACPI_HANDLE edl;
+
+ ASSERT(hdl != NULL);
+ if (ACPI_FAILURE(AcpiGetHandle(hdl, ACPIDEV_METHOD_NAME_EDL, &edl))) {
+ return (0);
+ }
+
+ return (1);
+}
+
+int
+acpidev_dr_device_is_present(ACPI_HANDLE hdl)
+{
+ int status;
+
+ ASSERT(hdl != NULL);
+
+ status = acpidev_query_device_status(hdl);
+ if (acpidev_check_device_present(status)) {
+ return (1);
+ }
+
+ return (0);
+}
+
+int
+acpidev_dr_device_is_powered(ACPI_HANDLE hdl)
+{
+ int status;
+
+ ASSERT(hdl != NULL);
+
+ /*
+ * Check device status returned by ACPI _STA method.
+ * It implies that the device is powered if status is both PRESENT
+ * and ENABLED.
+ */
+ status = acpidev_query_device_status(hdl);
+ if (acpidev_check_device_enabled(status)) {
+ return (1);
+ }
+
+ return (0);
+}
+
+ACPI_STATUS
+acpidev_dr_get_mem_alignment(ACPI_HANDLE hdl, uint64_t *ap)
+{
+ acpidev_dr_capacity_t *cp;
+
+ ASSERT(hdl != NULL);
+ ASSERT(ap != NULL);
+ if (ap == NULL || hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_get_mem_alignment().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ cp = acpidev_dr_get_capacity();
+ if (cp == NULL || cp->hotplug_supported == B_FALSE) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get memory alignment.");
+ return (AE_SUPPORT);
+ }
+ *ap = cp->memory_alignment;
+
+ return (AE_OK);
+}
+
+/*
+ * Get the device property for the given name and store it into buf.
+ * Returns the amount of data copied to buf if len is large enough to
+ * hold all of the data. If len is not large enough, then the required
+ * len would be returned and buf would not be modified. On any errors,
+ * -1 is returned and buf is not modified.
+ */
+ACPI_STATUS
+acpidev_dr_device_get_regspec(ACPI_HANDLE hdl, boolean_t assigned,
+ acpidev_regspec_t **regpp, uint_t *cntp)
+{
+ int *valp;
+ uint_t count;
+ char *propname;
+ dev_info_t *dip;
+ acpidev_data_handle_t dhdl;
+
+ ASSERT(hdl != NULL);
+ ASSERT(regpp != NULL && cntp != NULL);
+ if (hdl == NULL || regpp == NULL || cntp == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters to "
+ "acpidev_dr_device_get_regspec().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ /* Set default return value. */
+ *regpp = NULL;
+ *cntp = 0;
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data associated with %p.", hdl);
+ return (AE_ERROR);
+ } else if ((dip = acpidev_data_get_devinfo(dhdl)) == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get dip associated with %p.", hdl);
+ return (AE_NOT_FOUND);
+ }
+
+ propname = assigned ? "assigned-addresses" : "reg";
+ if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ propname, &valp, &count) != DDI_PROP_SUCCESS) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to lookup device property %s.", propname);
+ return (AE_NOT_FOUND);
+ }
+
+ if (count % (sizeof (**regpp) / sizeof (int)) != 0) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: device property %s is invalid.", propname);
+ ddi_prop_free(valp);
+ return (AE_ERROR);
+ }
+
+ *regpp = (acpidev_regspec_t *)valp;
+ *cntp = count / (sizeof (**regpp) / sizeof (int));
+
+ return (AE_OK);
+}
+
+void
+acpidev_dr_device_free_regspec(acpidev_regspec_t *regp, uint_t count)
+{
+ _NOTE(ARGUNUSED(count));
+
+ if (regp != NULL) {
+ ddi_prop_free(regp);
+ }
+}
+
+/*
+ * Return values
+ * . negative values on error
+ * . size of data copied to buffer if it's bigger enough
+ * . size of buffer needed if buffer is too small
+ */
+int
+acpidev_dr_device_getprop(ACPI_HANDLE hdl, char *name, caddr_t buf, size_t len)
+{
+ int rlen = -1;
+ acpidev_data_handle_t dhdl;
+
+ if (hdl == NULL) {
+ return (-1);
+ }
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ return (-1);
+ } else if (!ACPIDEV_DR_IS_WORKING(dhdl)) {
+ return (-1);
+ }
+
+ if (strcmp(name, ACPIDEV_DR_PROP_PORTID) == 0) {
+ if (len >= sizeof (uint32_t)) {
+ *(uint32_t *)(void *)buf = dhdl->aod_portid;
+ }
+ rlen = sizeof (uint32_t);
+ } else if (strcmp(name, ACPIDEV_DR_PROP_BOARDNUM) == 0) {
+ if (len >= sizeof (uint32_t)) {
+ *(uint32_t *)(void *)buf = dhdl->aod_bdnum;
+ }
+ rlen = sizeof (uint32_t);
+ } else if (strcmp(name, ACPIDEV_DR_PROP_DEVNAME) == 0) {
+ switch (dhdl->aod_class_id) {
+ case ACPIDEV_CLASS_ID_CPU:
+ if (len >= sizeof (ACPIDEV_NODE_NAME_CPU)) {
+ (void) strlcpy((char *)buf,
+ ACPIDEV_NODE_NAME_CPU, len);
+ }
+ rlen = sizeof (ACPIDEV_NODE_NAME_CPU);
+ break;
+
+ case ACPIDEV_CLASS_ID_MEMORY:
+ if (len >= sizeof (ACPIDEV_NODE_NAME_MEMORY)) {
+ (void) strlcpy((char *)buf,
+ ACPIDEV_NODE_NAME_MEMORY, len);
+ }
+ rlen = sizeof (ACPIDEV_NODE_NAME_MEMORY);
+ break;
+
+ case ACPIDEV_CLASS_ID_PCI:
+ case ACPIDEV_CLASS_ID_PCIEX:
+ if (len >= sizeof (ACPIDEV_NODE_NAME_PCI)) {
+ (void) strlcpy((char *)buf,
+ ACPIDEV_NODE_NAME_PCI, len);
+ }
+ rlen = sizeof (ACPIDEV_NODE_NAME_PCI);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return (rlen);
+}
+
+/*
+ * Figure out device class of the device.
+ * It only supports device classes which may be involved in DR operations.
+ */
+acpidev_class_id_t
+acpidev_dr_device_get_class(ACPI_HANDLE hdl)
+{
+ ACPI_OBJECT_TYPE type;
+ ACPI_DEVICE_INFO *infop;
+ acpidev_class_id_t id = ACPIDEV_CLASS_ID_INVALID;
+
+ static char *acpidev_id_cpu[] = {
+ ACPIDEV_HID_CPU,
+ };
+ static char *acpidev_id_mem[] = {
+ ACPIDEV_HID_MEMORY,
+ };
+ static char *acpidev_id_mod[] = {
+ ACPIDEV_HID_MODULE,
+ };
+ static char *acpidev_id_pci[] = {
+ ACPIDEV_HID_PCI_HOSTBRIDGE,
+ };
+ static char *acpidev_id_pciex[] = {
+ ACPIDEV_HID_PCIEX_HOSTBRIDGE,
+ };
+
+ /* Figure out device type by checking ACPI object type. */
+ if (ACPI_FAILURE(AcpiGetType(hdl, &type))) {
+ return (ACPIDEV_CLASS_ID_INVALID);
+ } else if (type == ACPI_TYPE_PROCESSOR) {
+ return (ACPIDEV_CLASS_ID_CPU);
+ } else if (type != ACPI_TYPE_DEVICE) {
+ return (ACPIDEV_CLASS_ID_INVALID);
+ }
+
+ if (ACPI_FAILURE(AcpiGetObjectInfo(hdl, &infop))) {
+ return (ACPIDEV_CLASS_ID_INVALID);
+ }
+
+ /* Figure out device type by checking _HID and _CID. */
+ if (acpidev_match_device_id(infop,
+ ACPIDEV_ARRAY_PARAM(acpidev_id_cpu))) {
+ id = ACPIDEV_CLASS_ID_CPU;
+ } else if (acpidev_match_device_id(infop,
+ ACPIDEV_ARRAY_PARAM(acpidev_id_mem))) {
+ id = ACPIDEV_CLASS_ID_MEMORY;
+ } else if (acpidev_match_device_id(infop,
+ ACPIDEV_ARRAY_PARAM(acpidev_id_mod))) {
+ id = ACPIDEV_CLASS_ID_CONTAINER;
+ } else if (acpidev_match_device_id(infop,
+ ACPIDEV_ARRAY_PARAM(acpidev_id_pciex))) {
+ id = ACPIDEV_CLASS_ID_PCIEX;
+ } else if (acpidev_match_device_id(infop,
+ ACPIDEV_ARRAY_PARAM(acpidev_id_pci))) {
+ id = ACPIDEV_CLASS_ID_PCI;
+ }
+
+ AcpiOsFree(infop);
+
+ return (id);
+}
+
+ACPI_STATUS
+acpidev_dr_device_get_memory_index(ACPI_HANDLE hdl, uint32_t *idxp)
+{
+ acpidev_data_handle_t dhdl;
+
+ ASSERT(idxp != NULL);
+ ASSERT(hdl != NULL);
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle for %p.", hdl);
+ return (AE_ERROR);
+ } else if (dhdl->aod_class_id != ACPIDEV_CLASS_ID_MEMORY) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %p is not a memory device.", hdl);
+ return (AE_ERROR);
+ } else {
+ *idxp = dhdl->aod_memidx;
+ }
+
+ return (AE_OK);
+}
+
+int
+acpidev_dr_device_is_board(ACPI_HANDLE hdl)
+{
+ acpidev_data_handle_t dhdl;
+
+ ASSERT(hdl != NULL);
+ if (hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_is_board().");
+ return (0);
+ }
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ return (0);
+ } else if (!ACPIDEV_DR_IS_BOARD(dhdl)) {
+ return (0);
+ }
+
+ return (1);
+}
+
+ACPI_STATUS
+acpidev_dr_device_walk_edl(ACPI_HANDLE hdl,
+ ACPI_WALK_CALLBACK cb, void *arg, void **retval)
+{
+ ACPI_STATUS rc = AE_OK;
+ int i;
+ char *objname;
+ ACPI_OBJECT *obj;
+ ACPI_BUFFER buf;
+ char *method = ACPIDEV_METHOD_NAME_EDL;
+
+ ASSERT(hdl != NULL);
+ ASSERT(cb != NULL);
+ if (hdl == NULL || cb == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_device_walk_edl().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ objname = acpidev_get_object_name(hdl);
+ buf.Length = ACPI_ALLOCATE_BUFFER;
+ rc = AcpiEvaluateObjectTyped(hdl, method, NULL, &buf,
+ ACPI_TYPE_PACKAGE);
+ if (rc == AE_NOT_FOUND) {
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ } else if (ACPI_FAILURE(rc)) {
+ cmn_err(CE_WARN,
+ "!acpidev: failed to evaluate method %s under %s.",
+ method, objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /* Validate the package structure. */
+ obj = buf.Pointer;
+ for (i = 0; i < obj->Package.Count; i++) {
+ if (obj->Package.Elements[i].Type !=
+ ACPI_TYPE_LOCAL_REFERENCE) {
+ cmn_err(CE_WARN, "!acpidev: element %d in package "
+ "returned by %s of %s is not local reference.",
+ i, method, objname);
+ AcpiOsFree(buf.Pointer);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ } else if (obj->Package.Elements[i].Reference.ActualType !=
+ ACPI_TYPE_DEVICE) {
+ cmn_err(CE_WARN, "!acpidev: element %d in package "
+ "returned by %s of %s doesn't refer to device.",
+ i, method, objname);
+ AcpiOsFree(buf.Pointer);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+ }
+
+ for (i = 0; i < obj->Package.Count; i++) {
+ if (obj->Package.Elements[i].Reference.Handle == NULL) {
+ cmn_err(CE_WARN, "!acpidev: handle of element %d in "
+ "package returned by %s of %s is NULL.",
+ i, method, objname);
+ continue;
+ }
+ rc = (*cb)(obj->Package.Elements[i].Reference.Handle,
+ UINT32_MAX, arg, retval);
+ if (rc == AE_CTRL_DEPTH || rc == AE_CTRL_TERMINATE) {
+ rc = AE_OK;
+ }
+ if (ACPI_FAILURE(rc)) {
+ break;
+ }
+ }
+
+ AcpiOsFree(buf.Pointer);
+ acpidev_free_object_name(objname);
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpidev_dr_device_walk_ejd(ACPI_HANDLE hdl,
+ ACPI_WALK_CALLBACK cb, void *arg, void **retval)
+{
+ ACPI_STATUS rc = AE_OK;
+ char *objname;
+ ACPI_OBJECT *obj;
+ ACPI_BUFFER buf;
+ ACPI_HANDLE chdl;
+ char *method = ACPIDEV_METHOD_NAME_EJD;
+
+ ASSERT(hdl != NULL);
+ ASSERT(cb != NULL);
+ if (hdl == NULL || cb == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_device_walk_ejd().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ objname = acpidev_get_object_name(hdl);
+ buf.Length = ACPI_ALLOCATE_BUFFER;
+ rc = AcpiEvaluateObjectTyped(hdl, method, NULL, &buf,
+ ACPI_TYPE_STRING);
+ if (rc == AE_NOT_FOUND) {
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ } else if (ACPI_FAILURE(rc)) {
+ cmn_err(CE_WARN,
+ "!acpidev: failed to evaluate method %s under %s.",
+ method, objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ obj = buf.Pointer;
+ ASSERT(obj->String.Pointer);
+ if (ACPI_FAILURE(AcpiGetHandle(NULL, obj->String.Pointer, &chdl))) {
+ cmn_err(CE_WARN, "!acpidev: failed to get handle for %s.",
+ obj->String.Pointer);
+ rc = AE_ERROR;
+ } else {
+ rc = (*cb)(chdl, UINT32_MAX, arg, retval);
+ if (rc == AE_CTRL_DEPTH || rc == AE_CTRL_TERMINATE) {
+ rc = AE_OK;
+ }
+ }
+
+ AcpiOsFree(buf.Pointer);
+ acpidev_free_object_name(objname);
+
+ return (rc);
+}
+
+/*
+ * Walk all child devices and special devices in the eject device list.
+ */
+static ACPI_STATUS
+acpidev_dr_device_walk_child(ACPI_HANDLE hdl, boolean_t init, uint_t max_lvl,
+ ACPI_WALK_CALLBACK cb, void *arg, void **retval)
+{
+ ACPI_STATUS rc = AE_OK;
+
+ ASSERT(hdl != NULL);
+ ASSERT(cb != NULL);
+ if (hdl == NULL || cb == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_device_walk_child().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ /*
+ * Walk the eject device list first when destroying.
+ * According to ACPI spec, devices in _EDL list must be handled first
+ * when the ejecting device.
+ */
+ if (init == B_FALSE) {
+ rc = acpidev_dr_device_walk_edl(hdl, cb, arg, retval);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: failed to walk eject device list in "
+ "acpidev_dr_device_walk_child().");
+ }
+ }
+
+ /* Walk all child ACPI DEVICE objects. */
+ if (ACPI_SUCCESS(rc)) {
+ rc = AcpiWalkNamespace(ACPI_TYPE_DEVICE, hdl,
+ max_lvl, cb, NULL, arg, retval);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: failed to walk DEVICE objects in "
+ "acpidev_dr_device_walk_child().");
+ }
+ }
+
+ /* Walk all child ACPI PROCESSOR objects. */
+ if (ACPI_SUCCESS(rc)) {
+ rc = AcpiWalkNamespace(ACPI_TYPE_PROCESSOR, hdl,
+ max_lvl, cb, NULL, arg, retval);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: failed to walk PROCESSOR objects in "
+ "acpidev_dr_device_walk_child().");
+ }
+ }
+
+ /*
+ * Walk the eject device list last when initializing.
+ */
+ if (init == B_TRUE && ACPI_SUCCESS(rc)) {
+ rc = acpidev_dr_device_walk_edl(hdl, cb, arg, retval);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: failed to walk eject device list in "
+ "acpidev_dr_device_walk_child().");
+ }
+ }
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpidev_dr_device_walk_device(ACPI_HANDLE hdl, uint_t max_lvl,
+ ACPI_WALK_CALLBACK cb, void *arg, void **retval)
+{
+ ACPI_STATUS rc = AE_OK;
+ char *objname;
+
+ ASSERT(hdl != NULL);
+ ASSERT(cb != NULL);
+ if (hdl == NULL || cb == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_walk_device().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ /* Walk the top object itself first. */
+ rc = (*cb)(hdl, 0, arg, retval);
+ if (rc == AE_CTRL_DEPTH || rc == AE_CTRL_TERMINATE) {
+ rc = AE_OK;
+ } else if (ACPI_FAILURE(rc)) {
+ objname = acpidev_get_object_name(hdl);
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to handle top node %s "
+ "in acpidev_dr_walk_device().", objname);
+ acpidev_free_object_name(objname);
+ } else {
+ rc = acpidev_dr_device_walk_child(hdl, B_TRUE, max_lvl,
+ cb, arg, retval);
+ if (ACPI_FAILURE(rc)) {
+ objname = acpidev_get_object_name(hdl);
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to handle descendant nodes of %s "
+ "in acpidev_dr_walk_device().", objname);
+ acpidev_free_object_name(objname);
+ }
+ }
+
+ return (rc);
+}
+
+static ACPI_STATUS
+acpidev_dr_no_support(ACPI_HANDLE hdl, UINT32 lvl, void *arg, void **retval)
+{
+ _NOTE(ARGUNUSED(arg, retval));
+
+ char *objname;
+
+ ASSERT(hdl != NULL);
+
+ objname = acpidev_get_object_name(hdl);
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: device %s at level 0x%x is unsupported.",
+ objname, lvl);
+ acpidev_free_object_name(objname);
+
+ return (AE_SUPPORT);
+}
+
+static ACPI_STATUS
+acpidev_dr_set_prop(ACPI_HANDLE hdl, char *objname,
+ struct acpidev_dr_set_prop_arg *ap, uint32_t lvl,
+ acpidev_class_id_t clsid, uint_t *devid)
+{
+ acpidev_data_handle_t dhdl;
+
+ /* Create data handle first if it doesn't exist yet. */
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ uint32_t rlvl;
+ ACPI_HANDLE phdl;
+
+ /*
+ * Compute level by walking ACPI namespace if it's a device
+ * from the eject device list.
+ */
+ if (lvl == UINT32_MAX) {
+ /*
+ * AcpiGetParent() fails when it tries to get
+ * the parent of the ACPI namespace root node.
+ */
+ for (rlvl = 0, phdl = hdl;
+ ACPI_SUCCESS(AcpiGetParent(phdl, &phdl));
+ rlvl++) {
+ if (phdl == ACPI_ROOT_OBJECT) {
+ break;
+ }
+ }
+ if (rlvl == 0) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get level of %s.",
+ objname);
+ return (AE_BAD_PARAMETER);
+ }
+ } else {
+ rlvl = ap->level;
+ }
+ if (rlvl >= ACPIDEV_MAX_ENUM_LEVELS) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: recursive level of %s is too deep.",
+ objname);
+ return (AE_SUPPORT);
+ }
+
+ dhdl = acpidev_data_create_handle(hdl);
+ if (dhdl != NULL) {
+ dhdl->aod_hdl = hdl;
+ dhdl->aod_level = rlvl;
+ }
+ }
+
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create data handle "
+ "for device %s.", objname);
+ return (AE_NO_MEMORY);
+ }
+
+ if (ACPIDEV_DR_IS_READY(dhdl)) {
+ /*
+ * The same device may be enumerated twice at most. Once as
+ * child devices, another time from the eject device list.
+ */
+ if (dhdl->aod_bdnum == ap->bdnum) {
+ return (AE_OK);
+ } else {
+ /*
+ * A device has been enumerated more than once from
+ * different paths. It's dangerous to support such
+ * a topology. Disable support of DR operations.
+ */
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: device %s has been "
+ "enumerated more than once for DR.", objname);
+ acpidev_dr_failed = 1;
+ return (AE_SUPPORT);
+ }
+ }
+
+ /* Set properties for DR operations. */
+ dhdl->aod_class_id = clsid;
+ dhdl->aod_bdnum = ap->bdnum;
+ dhdl->aod_portid = atomic_inc_32_nv(devid) - 1;
+ if (clsid == ACPIDEV_CLASS_ID_MEMORY) {
+ dhdl->aod_memidx = acpidev_dr_memory_device_cnt;
+ ASSERT(dhdl->aod_memidx < ACPI_MEMNODE_DEVID_BOOT);
+ }
+ ACPIDEV_DR_SET_READY(dhdl);
+
+ return (AE_OK);
+}
+
+/*
+ * Verify whether the hardware topology is supported by the DR driver.
+ * The ACPI specification is so flexible that for safety reasons, only
+ * a few well defined topologies are supported.
+ * Possible values of parameter lvl:
+ * 0: the device is the board itself.
+ * UINT32_MAX: the device is from the _EDL list of the board.
+ * other: the device is a descendant of the board.
+ * Return values:
+ * AE_OK: the topology is supported
+ * AE_SUPPORT: the topology is unsupported
+ * AE_ERROR: other errors
+ */
+static ACPI_STATUS
+acpidev_dr_scan_topo(ACPI_HANDLE hdl, UINT32 lvl, void *arg, void **retval)
+{
+ _NOTE(ARGUNUSED(retval));
+
+ ACPI_STATUS rc = AE_OK;
+ char *objname;
+ acpidev_class_id_t cid;
+ struct acpidev_dr_set_prop_arg *ap = arg;
+
+ ASSERT(hdl != NULL);
+ ASSERT(lvl == 0 || lvl == 1 || lvl == UINT32_MAX);
+ objname = acpidev_get_object_name(hdl);
+
+ /*
+ * Validate descendants of the hotplug capable board.
+ * lvl is zero if it's the hotplug capable board itself, otherwise
+ * non-zero for descendants.
+ */
+ if (lvl != 0) {
+ /*
+ * Skip subtree if the device is hotplug capable.
+ * It will be treated as another hotplug capable board.
+ */
+ if (acpidev_dr_device_hotplug_capable(hdl)) {
+ acpidev_free_object_name(objname);
+ return (AE_CTRL_DEPTH);
+ }
+
+ /*
+ * Don't support the _EDL list of a non-hotplug-capable device.
+ */
+ if (acpidev_dr_device_has_edl(hdl)) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: non-hotplug-capable "
+ "object %s has _EDL method.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+ }
+
+ cid = acpidev_dr_device_get_class(hdl);
+ switch (cid) {
+ case ACPIDEV_CLASS_ID_CPU:
+ /* Don't support logical CPUs in the _EDL list. */
+ if (lvl == UINT32_MAX) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: logical CPU %s in "
+ "_EDL is unsupported.", objname);
+ rc = AE_SUPPORT;
+ break;
+ }
+
+ /* Don't support logical CPUs with children. */
+ ap->level++;
+ rc = acpidev_dr_device_walk_child(hdl, B_TRUE, 1,
+ acpidev_dr_no_support, arg, NULL);
+ ap->level--;
+ if (rc == AE_SUPPORT) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: logical CPU %s has "
+ "child or dependent devices.", objname);
+ break;
+ } else if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to scan "
+ "children of logical CPU %s.", objname);
+ rc = AE_ERROR;
+ break;
+ } else if (ap != NULL) {
+ rc = acpidev_dr_set_prop(hdl, objname, ap, lvl,
+ ACPIDEV_CLASS_ID_CPU, &ap->cpu_id);
+ }
+ break;
+
+ case ACPIDEV_CLASS_ID_MEMORY:
+ /* Don't support memory devices with children. */
+ ap->level++;
+ rc = acpidev_dr_device_walk_child(hdl, B_TRUE, 1,
+ acpidev_dr_no_support, arg, NULL);
+ ap->level--;
+ if (rc == AE_SUPPORT) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: memory device %s has child or "
+ "dependent devices.", objname);
+ } else if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to scan children of "
+ "memory device %s.", objname);
+ rc = AE_ERROR;
+ } else if (ap != NULL) {
+ acpidev_dr_memory_device_cnt++;
+ rc = acpidev_dr_set_prop(hdl, objname, ap, lvl,
+ ACPIDEV_CLASS_ID_MEMORY, &ap->mem_id);
+ }
+ break;
+
+ case ACPIDEV_CLASS_ID_PCI:
+ case ACPIDEV_CLASS_ID_PCIEX:
+ /* Don't scan child/descendant devices of PCI/PCIex devices. */
+ if (ap != NULL) {
+ rc = acpidev_dr_set_prop(hdl, objname, ap, lvl,
+ cid, &ap->io_id);
+ }
+ break;
+
+ case ACPIDEV_CLASS_ID_CONTAINER:
+ /* Don't support module devices in the _EDL list. */
+ if (lvl == UINT32_MAX) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: module device %s in "
+ "_EDL is unsupported.", objname);
+ rc = AE_SUPPORT;
+ break;
+ }
+
+ /* Don't support recurrence of module devices. */
+ if (lvl > 0) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: recursion level of "
+ "module device %s is too deep.", objname);
+ rc = AE_SUPPORT;
+ break;
+ }
+
+ ap->level++;
+ rc = acpidev_dr_device_walk_child(hdl, B_TRUE, 1,
+ acpidev_dr_scan_topo, arg, NULL);
+ ap->level--;
+ if (ACPI_SUCCESS(rc) && ap != NULL) {
+ rc = acpidev_dr_set_prop(hdl, objname, ap, lvl,
+ ACPIDEV_CLASS_ID_CONTAINER, &ap->mod_id);
+ }
+ break;
+
+ case ACPIDEV_CLASS_ID_INVALID:
+ /*FALLTHROUGH*/
+ default:
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: device %s is unsupported.", objname);
+ rc = AE_SUPPORT;
+ break;
+ }
+
+ acpidev_free_object_name(objname);
+
+ return (rc);
+}
+
+/* Create walk information structures. */
+static ACPI_STATUS
+acpidev_dr_create_walk_info(ACPI_HANDLE hdl, acpidev_data_handle_t dhdl,
+ char *objname, acpidev_walk_info_t **infopp, acpidev_walk_info_t **cinfopp)
+{
+ ACPI_HANDLE phdl = NULL;
+ dev_info_t *pdip = NULL;
+ acpidev_data_handle_t pdhdl, tdhdl;
+ acpidev_walk_info_t *infop = NULL, *cinfop = NULL;
+
+ ASSERT(hdl != NULL);
+ ASSERT(dhdl != NULL);
+ ASSERT(dhdl->aod_class_list != NULL);
+ ASSERT(objname != NULL);
+ ASSERT(infopp != NULL);
+ ASSERT(cinfopp != NULL);
+
+ if (ACPI_FAILURE(AcpiGetParent(hdl, &phdl))) {
+ cmn_err(CE_WARN,
+ "!acpidev: failed to get parent object of %s.", objname);
+ return (AE_ERROR);
+ }
+
+ pdhdl = acpidev_data_get_handle(phdl);
+ if (pdhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get data "
+ "associated with parent of %s.", objname);
+ return (AE_ERROR);
+ }
+ if (pdhdl->aod_level >= ACPIDEV_MAX_ENUM_LEVELS - 1) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: recursion level (%d) of %s is too deep.",
+ pdhdl->aod_level, objname);
+ return (AE_ERROR);
+ }
+ ASSERT(pdhdl->aod_class_list != NULL);
+ if (pdhdl->aod_class_list == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: class list for parent of %s is NULL.", objname);
+ return (AE_ERROR);
+ }
+
+ /* Allocate a walk info structure for its parent. */
+ infop = acpidev_alloc_walk_info(ACPIDEV_OP_HOTPLUG_PROBE,
+ pdhdl->aod_level, phdl, dhdl->aod_class_list, NULL);
+ if (infop == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to allocate walk info "
+ "structure for parent of %s.", objname);
+ return (AE_ERROR);
+ }
+
+ /* Get the parent dip if it's not ready yet. */
+ while (infop->awi_dip == NULL) {
+ if (ACPI_FAILURE(AcpiGetParent(phdl, &phdl))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get parent of object %p.",
+ phdl);
+ break;
+ }
+ tdhdl = acpidev_data_get_handle(phdl);
+ if (tdhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get data "
+ "associated with object %p.", phdl);
+ break;
+ }
+ pdip = acpidev_data_get_devinfo(tdhdl);
+ if (pdip != NULL) {
+ infop->awi_dip = pdip;
+ break;
+ }
+ /* Give up if reaches the ACPI namespace root node. */
+ if (phdl == ACPI_ROOT_OBJECT) {
+ break;
+ }
+ }
+ if (infop->awi_dip == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get parent dip of %s.", objname);
+ acpidev_free_walk_info(infop);
+ return (AE_ERROR);
+ }
+
+ /* Allocate a walk info for the child. */
+ cinfop = acpidev_alloc_walk_info(ACPIDEV_OP_HOTPLUG_PROBE,
+ infop->awi_level + 1, hdl, NULL, infop);
+ if (cinfop == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to allocate walk info "
+ "structure for %s.", objname);
+ acpidev_free_walk_info(infop);
+ return (AE_ERROR);
+ }
+
+ *infopp = infop;
+ *cinfopp = cinfop;
+
+ return (AE_OK);
+}
+
+static ACPI_STATUS
+acpidev_dr_probe_object(ACPI_HANDLE hdl, acpidev_data_handle_t dhdl)
+{
+ ACPI_STATUS rc = AE_OK;
+ int circ;
+ char *objname;
+ dev_info_t *pdip;
+ ACPI_STATUS res;
+ ACPI_OBJECT_TYPE type;
+ acpidev_class_list_t *it;
+ acpidev_walk_info_t *infop, *cinfop;
+
+ ASSERT(hdl != NULL);
+ ASSERT(dhdl != NULL);
+ if (hdl == NULL || dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: hdl or dhdl is NULL in "
+ "acpidev_dr_probe_object().");
+ return (AE_BAD_PARAMETER);
+ }
+ objname = acpidev_get_object_name(hdl);
+
+ /* Check whether the device is of interest. */
+ if (ACPI_FAILURE(AcpiGetType(hdl, &type)) ||
+ type > ACPI_TYPE_NS_NODE_MAX ||
+ BT_TEST(acpidev_object_type_mask, type) == 0) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: ACPI object %s is unsupported.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+
+ if (dhdl->aod_class_list == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: class list is NULL in data associated with %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ pdip = NULL;
+ infop = NULL;
+ cinfop = NULL;
+ rc = acpidev_dr_create_walk_info(hdl, dhdl, objname, &infop, &cinfop);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to create walk info structures for %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (rc);
+ }
+ ASSERT(infop != NULL);
+ ASSERT(infop->awi_dip != NULL);
+ ASSERT(infop->awi_class_list != NULL);
+ ASSERT(cinfop != NULL);
+ ASSERT(cinfop->awi_data == dhdl);
+
+ /* Lock the parent dip before touching children. */
+ pdip = infop->awi_dip;
+ ndi_devi_enter(pdip, &circ);
+ rw_enter(&acpidev_class_lock, RW_READER);
+
+ /* Call pre-probe callback functions to prepare for probing. */
+ for (it = *(infop->awi_class_list); it != NULL; it = it->acl_next) {
+ if (it->acl_class->adc_pre_probe == NULL) {
+ continue;
+ }
+ infop->awi_class_curr = it->acl_class;
+ if (ACPI_FAILURE(it->acl_class->adc_pre_probe(infop))) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: failed to pre-probe "
+ "device of type %s under %s.",
+ it->acl_class->adc_class_name, infop->awi_name);
+ }
+ }
+
+ /* Call registered probe callback functions to probe devices. */
+ for (it = *(infop->awi_class_list); it != NULL; it = it->acl_next) {
+ if (it->acl_class->adc_probe == NULL) {
+ continue;
+ }
+ cinfop->awi_class_curr = it->acl_class;
+ res = it->acl_class->adc_probe(cinfop);
+ if (ACPI_FAILURE(res)) {
+ rc = res;
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: failed to process object %s under %s.",
+ objname, infop->awi_name);
+ }
+ }
+
+ /* Call post-probe callback functions to clean up. */
+ for (it = *(infop->awi_class_list); it != NULL; it = it->acl_next) {
+ if (it->acl_class->adc_post_probe == NULL) {
+ continue;
+ }
+ infop->awi_class_curr = it->acl_class;
+ if (ACPI_FAILURE(it->acl_class->adc_post_probe(infop))) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: failed to post-probe "
+ "device of type %s under %s.",
+ it->acl_class->adc_class_name, infop->awi_name);
+ }
+ }
+
+ rw_exit(&acpidev_class_lock);
+ ndi_devi_exit(pdip, circ);
+
+ acpidev_free_walk_info(cinfop);
+ acpidev_free_walk_info(infop);
+ acpidev_free_object_name(objname);
+
+ return (rc);
+}
+
+/*
+ * Some PCI/PCIex buses embedded in physical processors may be presented in
+ * the eject device list instead of being presented as child devices.
+ * This function figures out such devices and create device nodes for them.
+ */
+static ACPI_STATUS
+acpidev_dr_probe_dependent(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+ void **retval)
+{
+ _NOTE(ARGUNUSED(retval));
+
+ ACPI_STATUS rc = AE_OK;
+ int status;
+ char *objname;
+ ACPI_HANDLE phdl, thdl;
+ acpidev_data_handle_t dhdl;
+
+ ASSERT(lvl == UINT32_MAX);
+ ASSERT(hdl != NULL);
+ ASSERT(ctx != NULL);
+ phdl = ctx;
+ objname = acpidev_get_object_name(hdl);
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data associated with %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /*
+ * It should be treated as another board if device is hotplug capable.
+ */
+ if (ACPIDEV_DR_IS_BOARD(dhdl)) {
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ } else if (!ACPIDEV_DR_IS_WORKING(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: %s is unusable for DR operations.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+
+ /*
+ * Skip hdl if it's a descendant of phdl because it should have
+ * already been handled when handling phdl itself.
+ */
+ for (thdl = hdl; ACPI_SUCCESS(AcpiGetParent(thdl, &thdl)); ) {
+ /* Return when reaches the phdl. */
+ if (thdl == phdl) {
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ }
+ /* Break out when reaches the ACPI namespace root node. */
+ if (thdl == ACPI_ROOT_OBJECT) {
+ break;
+ }
+ }
+
+ /*
+ * No support of enumerating PCI/PCIex Host Bridge devices yet.
+ * It will be enabled when PCI/PCIex Host Bridge hotplug is ready.
+ */
+ if (dhdl->aod_class_id == ACPIDEV_CLASS_ID_PCI ||
+ dhdl->aod_class_id == ACPIDEV_CLASS_ID_PCIEX) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: PCI/PCIEX host bridge %s is "
+ "unsupported, skip it.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ }
+
+ /* Check whether the device exists and has been enabled. */
+ status = acpidev_query_device_status(hdl);
+ if (!acpidev_check_device_enabled(status)) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: object %s is disabled/absent "
+ "when trying to connect it.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ }
+
+ /* Probe the device and its children. */
+ rc = acpidev_dr_probe_object(hdl, dhdl);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to probe object %s in eject device list.",
+ objname);
+ return (rc);
+ }
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_device_insert(ACPI_HANDLE hdl)
+{
+ ACPI_STATUS rc = AE_OK;
+ int status, circ;
+ char *objname;
+ dev_info_t *dip;
+ acpidev_data_handle_t dhdl;
+
+ ASSERT(acpidev_root_node() != NULL);
+ ASSERT(hdl != NULL);
+ if (hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter hdl to "
+ "acpidev_dr_insert_insert() is NULL.");
+ return (AE_BAD_PARAMETER);
+ }
+
+ objname = acpidev_get_object_name(hdl);
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle associated with %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /* Validate that the object is hotplug capable. */
+ if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %s is not hotplug capable.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %s is in the FAILED "
+ "state, unusable for DR.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /* Check whether the device exists and has been enabled. */
+ status = acpidev_query_device_status(hdl);
+ if (!acpidev_check_device_enabled(status)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %s is disabled/absent "
+ "when trying to connect it.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_NOT_EXIST);
+ }
+
+ /* Check that there's no device node created for object yet. */
+ dip = acpidev_data_get_devinfo(dhdl);
+ if (dip != NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: device node for object %s "
+ "already exists when trying to connect it.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_ALREADY_EXISTS);
+ }
+
+ /*
+ * Solaris has a limitation that all device nodes for PCI/PCIex host
+ * bridges must exist directly under /devices.
+ * Special care is needed here to deal with hot-adding PCI/PCIex host
+ * bridges to avoid dead lock caused by ndi_devi_enter().
+ * Here the lock on ddi_root_node() is held first, which will break
+ * the dead lock loop.
+ */
+ ndi_devi_enter(ddi_root_node(), &circ);
+
+ rc = acpidev_dr_probe_object(hdl, dhdl);
+ if (ACPI_SUCCESS(rc)) {
+ rc = acpidev_dr_device_walk_edl(hdl,
+ &acpidev_dr_probe_dependent, hdl, NULL);
+ }
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create device "
+ "nodes for children of %s.", objname);
+ cmn_err(CE_WARN, "!acpidev: disable DR support for object %s "
+ "due to failure when creating device nodes for it.",
+ objname);
+ ACPIDEV_DR_SET_FAILED(dhdl);
+ }
+
+ ndi_devi_exit(ddi_root_node(), circ);
+ acpidev_free_object_name(objname);
+
+ return (rc);
+}
+
+static ACPI_STATUS
+acpidev_dr_device_remove_cb(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+ void **retval)
+{
+ _NOTE(ARGUNUSED(lvl));
+
+ ACPI_STATUS rc = AE_OK;
+ int status;
+ char *objname;
+ dev_info_t *dip;
+ acpidev_data_handle_t dhdl;
+ struct acpidev_dr_device_remove_arg *argp;
+
+ ASSERT(hdl != NULL && ctx != NULL);
+ if (hdl == NULL || ctx == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter to "
+ "acpidev_dr_device_remove_cb() is NULL.");
+ return (AE_BAD_PARAMETER);
+ }
+
+ argp = (struct acpidev_dr_device_remove_arg *)ctx;
+ objname = acpidev_get_object_name(hdl);
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle associated with %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /* Validate that the object is hotplug capable. */
+ /* It's the hotplug capable board itself if level is zero. */
+ if (argp->level == 0) {
+ if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %s is not hotplug capable.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %s is unusable for DR.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+ } else {
+ /* It's a device under the hotplug capable board. */
+ /*
+ * Skip it if device itself is hotplug capable.
+ * It will be treated as another hotplug capable board.
+ */
+ if (ACPIDEV_DR_IS_BOARD(dhdl)) {
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ }
+
+ if (!ACPIDEV_DR_IS_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %s is not hotplug capable.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %s is unusable for DR.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+ }
+
+ /* Skip the device if it hasn't been enabled at all. */
+ status = acpidev_data_get_status(dhdl);
+ if (!acpidev_check_device_enabled(status)) {
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ }
+
+ dip = acpidev_data_get_devinfo(dhdl);
+ if (dip == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get dev_info associated with %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+
+ /* For safety, only handle supported device types when unconfiguring. */
+ switch (dhdl->aod_class_id) {
+ case ACPIDEV_CLASS_ID_CONTAINER:
+ /*FALLTHROUGH*/
+ case ACPIDEV_CLASS_ID_CPU:
+ /*FALLTHROUGH*/
+ case ACPIDEV_CLASS_ID_MEMORY:
+ break;
+
+ default:
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %s (type %d) doesn't "
+ "support unconfiguration.", objname, dhdl->aod_class_id);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+
+ /* Destroy descendants first. */
+ argp->level++;
+ rc = acpidev_dr_device_walk_child(hdl, B_FALSE, 1,
+ acpidev_dr_device_remove_cb, ctx, retval);
+ argp->level--;
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to destroy descendants of %s.", objname);
+ acpidev_free_object_name(objname);
+ return (rc);
+ }
+
+ /* Untag dip and ACPI object before destroying the dip. */
+ if ((dhdl->aod_iflag & ACPIDEV_ODF_DEVINFO_TAGGED) &&
+ ACPI_FAILURE(acpica_untag_devinfo(dip, hdl))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to untag object %s.", objname);
+ /* Mark the node as unusable. */
+ ACPIDEV_DR_SET_FAILED(dhdl);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /* Destroy the node itself. */
+ if (e_ddi_branch_destroy(dip, NULL, 0) != 0) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ if ((dhdl->aod_iflag & ACPIDEV_ODF_DEVINFO_TAGGED) &&
+ ACPI_FAILURE(acpica_tag_devinfo(dip, hdl))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to retag object %s.", objname);
+ }
+
+ /* Mark the node as unusable. */
+ ACPIDEV_DR_SET_FAILED(dhdl);
+
+ (void) ddi_pathname(dip, path);
+ cmn_err(CE_WARN,
+ "acpidev: failed to remove node %s (%s).", path, objname);
+ kmem_free(path, MAXPATHLEN);
+ acpidev_free_object_name(objname);
+
+ return (AE_ERROR);
+ }
+
+ /* Update status and information associated with the device. */
+ dhdl->aod_dip = NULL;
+ dhdl->aod_iflag &= ~ACPIDEV_ODF_DEVINFO_CREATED;
+ dhdl->aod_iflag &= ~ACPIDEV_ODF_DEVINFO_TAGGED;
+ if (dhdl->aod_class != NULL) {
+ if (dhdl->aod_class->adc_fini != NULL) {
+ (*(dhdl->aod_class->adc_fini))(hdl, dhdl,
+ dhdl->aod_class);
+ }
+ atomic_dec_32(&(dhdl->aod_class->adc_refcnt));
+ dhdl->aod_class = NULL;
+ }
+ dhdl->aod_iflag &= ~ACPIDEV_ODF_STATUS_VALID;
+ dhdl->aod_status = 0;
+
+ acpidev_free_object_name(objname);
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_device_remove(ACPI_HANDLE hdl)
+{
+ ACPI_STATUS rc = AE_OK;
+ int circ;
+ char *objname;
+ acpidev_data_handle_t dhdl;
+ struct acpidev_dr_device_remove_arg arg;
+
+ ASSERT(acpidev_root_node() != NULL);
+ ASSERT(hdl != NULL);
+ if (hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter hdl to "
+ "acpidev_dr_device_remove() is NULL.");
+ return (AE_BAD_PARAMETER);
+ }
+
+ objname = acpidev_get_object_name(hdl);
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle associated with %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /* Validate that the device is hotplug capable. */
+ if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %s is not hotplug capable.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %s is in the FAILED "
+ "state, unusable for DR.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /*
+ * Recursively destroy descendants under the top node.
+ * No need to undo what has been done if error happens, it will be
+ * handled by DR driver.
+ */
+ /*
+ * Lock ddi_root_node() to avoid deadlock.
+ */
+ ndi_devi_enter(ddi_root_node(), &circ);
+
+ arg.level = 0;
+ rc = acpidev_dr_device_remove_cb(hdl, 0, &arg, NULL);
+ ASSERT(arg.level == 0);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to destroy device "
+ "nodes for children of %s.", objname);
+ cmn_err(CE_WARN, "!acpidev: disable DR support for object %s "
+ "due to failure when destroying device nodes for it.",
+ objname);
+ ACPIDEV_DR_SET_FAILED(dhdl);
+ }
+
+ ndi_devi_exit(ddi_root_node(), circ);
+ acpidev_free_object_name(objname);
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpidev_dr_device_poweron(ACPI_HANDLE hdl)
+{
+ acpidev_data_handle_t dhdl;
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle associated with %p.",
+ hdl);
+ return (AE_ERROR);
+ }
+
+ /* Check whether the device is hotplug capable. */
+ if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %p is not hotplug capable.", hdl);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is in the FAILED "
+ "state, unusable for DR.", hdl);
+ return (AE_ERROR);
+ }
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_device_poweroff(ACPI_HANDLE hdl)
+{
+ ACPI_STATUS rc;
+ acpidev_data_handle_t dhdl;
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle associated with %p.",
+ hdl);
+ return (AE_ERROR);
+ }
+
+ /* Check whether the device is hotplug capable. */
+ if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %p is not hotplug capable.", hdl);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is in the FAILED "
+ "state, unusable for DR.", hdl);
+ return (AE_ERROR);
+ }
+
+ rc = acpidev_eval_ej0(hdl);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to evaluate _EJ0 for object %p.", hdl);
+ }
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpidev_dr_device_check_status(ACPI_HANDLE hdl)
+{
+ acpidev_data_handle_t dhdl;
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle associated with %p.",
+ hdl);
+ return (AE_ERROR);
+ }
+
+ /* Check whether the device is hotplug capable. */
+ if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %p is not hotplug capable.", hdl);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is in the FAILED "
+ "state, unusable for DR.", hdl);
+ return (AE_ERROR);
+ }
+
+ return (AE_OK);
+}
+
+void
+acpidev_dr_lock_all(void)
+{
+ mutex_enter(&acpidev_dr_lock);
+}
+
+void
+acpidev_dr_unlock_all(void)
+{
+ mutex_exit(&acpidev_dr_lock);
+}
+
+ACPI_STATUS
+acpidev_dr_allocate_cpuid(ACPI_HANDLE hdl, processorid_t *idp)
+{
+ int rv;
+ processorid_t cpuid;
+ uint32_t procid, apicid;
+ mach_cpu_add_arg_t arg;
+ acpidev_data_handle_t dhdl;
+ dev_info_t *dip = NULL;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ ASSERT(hdl != NULL);
+ if (hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter hdl to "
+ "acpidev_dr_allocate_cpuid() is NULL.");
+ return (AE_BAD_PARAMETER);
+ }
+
+ /* Validate that the device is ready for hotplug. */
+ if (ACPI_FAILURE(acpica_get_devinfo(hdl, &dip))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get devinfo for object %p.", hdl);
+ return (AE_ERROR);
+ }
+ ASSERT(dip != NULL);
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data associated with object %p",
+ hdl);
+ return (AE_SUPPORT);
+ }
+ if (!ACPIDEV_DR_IS_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: dip %p is not hotplug ready.", (void *)dip);
+ return (AE_SUPPORT);
+ }
+ if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: dip %p is in the FAILED state.", (void *)dip);
+ return (AE_SUPPORT);
+ }
+
+ /* Query CPU relative information */
+ apicid = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, ACPIDEV_PROP_NAME_LOCALAPIC_ID, UINT32_MAX);
+ procid = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, ACPIDEV_PROP_NAME_PROCESSOR_ID, UINT32_MAX);
+ if (procid == UINT32_MAX || apicid == UINT32_MAX || apicid == 255) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: dip %p is malformed, "
+ "procid(0x%x) or apicid(0x%x) is invalid.",
+ (void *)dip, procid, apicid);
+ return (AE_ERROR);
+ }
+
+ /* Check whether the CPU device is in offline state. */
+ mutex_enter(&(DEVI(dip)->devi_lock));
+ if (!DEVI_IS_DEVICE_OFFLINE(dip)) {
+ mutex_exit(&DEVI(dip)->devi_lock);
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: dip %p isn't in offline state.", (void *)dip);
+ return (AE_ERROR);
+ }
+ mutex_exit(&DEVI(dip)->devi_lock);
+
+ /* Check whether the CPU already exists. */
+ if (ACPI_SUCCESS(acpica_get_cpu_id_by_object(hdl, &cpuid))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: dip %p already has CPU id(%d) assigned.",
+ (void *)dip, cpuid);
+ return (AE_ALREADY_EXISTS);
+ }
+
+ /* Allocate cpuid for the CPU */
+ arg.arg.apic.apic_id = apicid;
+ arg.arg.apic.proc_id = procid;
+ if (apicid >= 255) {
+ arg.type = MACH_CPU_ARG_LOCAL_X2APIC;
+ } else {
+ arg.type = MACH_CPU_ARG_LOCAL_APIC;
+ }
+ rv = mach_cpu_add(&arg, &cpuid);
+ if (rv != PSM_SUCCESS) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to allocate cpu id for dip %p.",
+ (void *)dip);
+ return (AE_NOT_EXIST);
+ }
+
+ ASSERT(cpuid >= 0 && cpuid < NCPU && cpuid < max_ncpus);
+ if (idp != NULL) {
+ *idp = cpuid;
+ }
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_free_cpuid(ACPI_HANDLE hdl)
+{
+ ACPI_STATUS rv = AE_OK;
+ processorid_t cpuid;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ ASSERT(hdl != NULL);
+ if (hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter hdl to "
+ "acpidev_dr_free_cpuid() is NULL.");
+ return (AE_BAD_PARAMETER);
+ }
+
+ if (ACPI_FAILURE(acpica_get_cpu_id_by_object(hdl, &cpuid))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get cpuid for object %p.", hdl);
+ rv = AE_NOT_EXIST;
+ } else if (cpuid < 0 || cpuid > max_ncpus) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: cpuid(%d) of object %p is invalid.",
+ cpuid, hdl);
+ rv = AE_ERROR;
+ } else if (mach_cpu_remove(cpuid) != PSM_SUCCESS) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to free cpuid(%d) for object %p.",
+ cpuid, hdl);
+ rv = AE_ERROR;
+ }
+
+ return (rv);
+}
+
+static ACPI_STATUS
+acpidev_dr_get_latency(ACPI_HANDLE hdl, void **hdlpp,
+ uint32_t pxmid, uint32_t *slicntp, uchar_t **slipp)
+{
+ ACPI_STATUS rc;
+ ACPI_BUFFER buf;
+ uint32_t i, pxmcnt;
+ uchar_t *valp, *sp, *ep;
+
+ /* Evaluate the ACPI _SLI method under the object. */
+ buf.Length = ACPI_ALLOCATE_BUFFER;
+ rc = AcpiEvaluateObjectTyped(hdl, ACPIDEV_METHOD_NAME_SLI, NULL, &buf,
+ ACPI_TYPE_BUFFER);
+ if (ACPI_SUCCESS(rc)) {
+ valp = (uchar_t *)buf.Pointer;
+ if (acpidev_slit_tbl_ptr->LocalityCount > pxmid) {
+ pxmcnt = acpidev_slit_tbl_ptr->LocalityCount;
+ } else {
+ pxmcnt = pxmid + 1;
+ }
+
+ /*
+ * Validate data returned by the ACPI _SLI method.
+ * Please refer to 6.2.14 "_SLI (System Locality Information)"
+ * in ACPI4.0 for data format returned by _SLI method.
+ */
+ if (buf.Length != pxmcnt * 2 * sizeof (uchar_t)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: buffer length returned by _SLI method "
+ "under %p is invalid.", hdl);
+ AcpiOsFree(buf.Pointer);
+ } else if (valp[pxmid] != ACPI_SLIT_SELF_LATENCY ||
+ valp[pxmid + pxmcnt] != ACPI_SLIT_SELF_LATENCY) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: local latency returned by _SLI method "
+ "under %p is not %u.", hdl, ACPI_SLIT_SELF_LATENCY);
+ AcpiOsFree(buf.Pointer);
+ } else {
+ *slicntp = pxmcnt;
+ *slipp = (uchar_t *)buf.Pointer;
+ *hdlpp = buf.Pointer;
+ return (AE_OK);
+ }
+ } else if (rc != AE_NOT_FOUND) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to evaluate "
+ "_SLI method under object %p.", hdl);
+ }
+
+ /* Return data from the ACPI SLIT table. */
+ ASSERT(acpidev_slit_tbl_ptr != NULL);
+ pxmcnt = acpidev_slit_tbl_ptr->LocalityCount;
+ if (pxmid >= pxmcnt) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: proximity domain id "
+ "(%u) is too big, max %u.", pxmid, pxmcnt - 1);
+ *slicntp = 0;
+ *slipp = NULL;
+ return (AE_ERROR);
+ } else {
+ sp = AcpiOsAllocate(pxmcnt * 2 * sizeof (uchar_t));
+ ep = acpidev_slit_tbl_ptr->Entry;
+ for (i = 0; i < pxmcnt; i++) {
+ sp[i] = ep[pxmcnt * pxmid + i];
+ sp[i + pxmcnt] = ep[pxmcnt * i + pxmid];
+ }
+ *slicntp = pxmcnt;
+ *slipp = sp;
+ *hdlpp = sp;
+ return (AE_OK);
+ }
+}
+
+/*
+ * Query NUMA information for the CPU device.
+ * It returns APIC id, Proximity id and latency information of the CPU device.
+ */
+int
+acpidev_dr_get_cpu_numa_info(cpu_t *cp, void **hdlpp, uint32_t *apicidp,
+ uint32_t *pxmidp, uint32_t *slicntp, uchar_t **slipp)
+{
+ dev_info_t *dip = NULL;
+ ACPI_HANDLE hdl = NULL;
+
+ ASSERT(cp != NULL);
+ ASSERT(hdlpp != NULL);
+ ASSERT(apicidp != NULL);
+ ASSERT(pxmidp != NULL);
+ if (cp == NULL || hdlpp == NULL || apicidp == NULL || pxmidp == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters to "
+ "acpidev_dr_get_cpu_numa_info().");
+ return (-1);
+ }
+
+ *hdlpp = NULL;
+ *apicidp = UINT32_MAX;
+ *pxmidp = UINT32_MAX;
+ if (lgrp_plat_node_cnt == 1) {
+ return (-1);
+ }
+ ASSERT(acpidev_slit_tbl_ptr != NULL);
+
+ /* Query APIC id and Proximity id from device properties. */
+ if (ACPI_FAILURE(acpica_get_cpu_object_by_cpuid(cp->cpu_id, &hdl))) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get ACPI object "
+ "for CPU(%d).", cp->cpu_id);
+ return (-1);
+ }
+ if (ACPI_FAILURE(acpica_get_devinfo(hdl, &dip))) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get device node "
+ "for CPU(%d).", cp->cpu_id);
+ return (-1);
+ }
+ *apicidp = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ ACPIDEV_PROP_NAME_LOCALAPIC_ID, UINT32_MAX);
+ *pxmidp = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ ACPIDEV_PROP_NAME_PROXIMITY_ID, UINT32_MAX);
+ if (*apicidp == UINT32_MAX || *pxmidp == UINT32_MAX) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get local APIC id "
+ "or proximity id for CPU(%d).", cp->cpu_id);
+ return (-1);
+ }
+
+ ASSERT((slicntp && slipp) || (!slicntp && !slipp));
+ if (slicntp != NULL && slipp != NULL) {
+ if (ACPI_FAILURE(acpidev_dr_get_latency(hdl, hdlpp, *pxmidp,
+ slicntp, slipp))) {
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+void
+acpidev_dr_free_cpu_numa_info(void *hdlp)
+{
+ if (hdlp != NULL) {
+ AcpiOsFree(hdlp);
+ }
+}
+
+static ACPI_STATUS
+acpidev_dr_mem_search_srat(struct memlist *ml, uint32_t *pxmidp)
+{
+ int len, off;
+ uint64_t start, end;
+ boolean_t found = B_FALSE;
+ ACPI_SUBTABLE_HEADER *sp;
+ ACPI_SRAT_MEM_AFFINITY *mp;
+
+ ASSERT(ml != NULL);
+ ASSERT(pxmidp != NULL);
+ ASSERT(acpidev_srat_tbl_ptr != NULL);
+
+ /* Search the static ACPI SRAT table for proximity domain. */
+ sp = (ACPI_SUBTABLE_HEADER *)(acpidev_srat_tbl_ptr + 1);
+ len = acpidev_srat_tbl_ptr->Header.Length;
+ off = sizeof (*acpidev_srat_tbl_ptr);
+ while (off < len) {
+ if (sp->Type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) {
+ mp = (ACPI_SRAT_MEM_AFFINITY *)sp;
+ if ((mp->Flags & ACPI_SRAT_MEM_ENABLED) &&
+ (mp->Flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
+ ml->ml_address >= mp->BaseAddress &&
+ ml->ml_address <= mp->BaseAddress + mp->Length) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ off += sp->Length;
+ sp = (ACPI_SUBTABLE_HEADER *)(((char *)sp) + sp->Length);
+ }
+ if (!found)
+ return (AE_NOT_FOUND);
+
+ /*
+ * Verify that all memory regions in the list belong to the same domain.
+ */
+ start = mp->BaseAddress;
+ end = mp->BaseAddress + mp->Length;
+ while (ml) {
+ if (ml->ml_address < start ||
+ ml->ml_address + ml->ml_size > end) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: memory for hot-adding doesn't belong "
+ "to the same proximity domain.");
+ return (AE_ERROR);
+ }
+ ml = ml->ml_next;
+ }
+
+ return (AE_OK);
+}
+
+/*
+ * Query lgrp information for a memory device.
+ * It returns proximity domain id and latency information of the memory device.
+ */
+ACPI_STATUS
+acpidev_dr_get_mem_numa_info(ACPI_HANDLE hdl, struct memlist *ml,
+ void **hdlpp, uint32_t *pxmidp, uint32_t *slicntp, uchar_t **slipp)
+{
+ ASSERT(ml != NULL);
+ ASSERT(hdlpp != NULL);
+ ASSERT(pxmidp != NULL);
+ if (ml == NULL || hdlpp == NULL || pxmidp == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters to "
+ "acpidev_dr_get_mem_numa_info().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ *pxmidp = UINT32_MAX;
+ if (lgrp_plat_node_cnt == 1) {
+ return (AE_SUPPORT);
+ }
+
+ if (ACPI_FAILURE(acpidev_eval_pxm(hdl, pxmidp))) {
+ /*
+ * Try to get proximity domain id from SRAT table if failed to
+ * evaluate ACPI _PXM method for memory device.
+ */
+ if (ACPI_FAILURE(acpidev_dr_mem_search_srat(ml, pxmidp))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get proximity domain id for "
+ "memory device %p.", hdl);
+ return (AE_ERROR);
+ }
+ }
+
+ ASSERT((slicntp && slipp) || (!slicntp && !slipp));
+ if (slicntp != NULL && slipp != NULL) {
+ if (ACPI_FAILURE(acpidev_dr_get_latency(hdl, hdlpp, *pxmidp,
+ slicntp, slipp))) {
+ return (AE_ERROR);
+ }
+ }
+
+ return (AE_OK);
+}
+
+void
+acpidev_dr_free_mem_numa_info(void *hdlp)
+{
+ if (hdlp != NULL) {
+ AcpiOsFree(hdlp);
+ }
+}
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c
index eee5d84715..2c0dab936b 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -60,19 +60,23 @@
#include <sys/errno.h>
#include <sys/modctl.h>
#include <sys/mutex.h>
+#include <sys/note.h>
#include <sys/obpdefs.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
#include <sys/acpidev.h>
+#include <sys/acpidev_dr.h>
#include <sys/acpidev_impl.h>
/* Patchable through /etc/system */
int acpidev_options = 0;
int acpidev_debug = 0;
+krwlock_t acpidev_class_lock;
acpidev_class_list_t *acpidev_class_list_root = NULL;
+ulong_t acpidev_object_type_mask[BT_BITOUL(ACPI_TYPE_NS_NODE_MAX + 1)];
/* ACPI device autoconfig global status */
typedef enum acpidev_status {
@@ -86,9 +90,7 @@ typedef enum acpidev_status {
static acpidev_status_t acpidev_status = ACPIDEV_STATUS_UNKNOWN;
static kmutex_t acpidev_drv_lock;
-static krwlock_t acpidev_class_lock;
static dev_info_t *acpidev_root_dip = NULL;
-static ulong_t acpidev_object_type_mask[BT_BITOUL(ACPI_TYPE_NS_NODE_MAX + 1)];
/* Boot time ACPI device enumerator. */
static void acpidev_boot_probe(int type);
@@ -117,6 +119,7 @@ _init(void)
sizeof (acpidev_object_type_mask));
mutex_init(&acpidev_drv_lock, NULL, MUTEX_DRIVER, NULL);
rw_init(&acpidev_class_lock, NULL, RW_DEFAULT, NULL);
+ acpidev_dr_init();
impl_bus_add_probe(acpidev_boot_probe);
} else {
cmn_err(CE_WARN, "!acpidev: failed to install driver.");
@@ -157,6 +160,13 @@ acpidev_class_list_fini(void)
{
acpidev_unload_plat_modules();
+ if ((acpidev_options & ACPIDEV_OUSER_NO_PCI) == 0) {
+ (void) acpidev_unregister_class(&acpidev_class_list_scope,
+ &acpidev_class_pci);
+ (void) acpidev_unregister_class(&acpidev_class_list_device,
+ &acpidev_class_pci);
+ }
+
if ((acpidev_options & ACPIDEV_OUSER_NO_MEM) == 0) {
(void) acpidev_unregister_class(&acpidev_class_list_device,
&acpidev_class_memory);
@@ -252,7 +262,7 @@ acpidev_class_list_init(uint64_t *fp)
*fp |= ACPI_DEVCFG_CPU;
}
- /* Check support for ACPI memory device. */
+ /* Check support of ACPI memory devices. */
if ((acpidev_options & ACPIDEV_OUSER_NO_MEM) == 0) {
/*
* Register the ACPI memory class driver onto the
@@ -267,10 +277,36 @@ acpidev_class_list_init(uint64_t *fp)
*fp |= ACPI_DEVCFG_MEMORY;
}
+ /* Check support of PCI/PCIex Host Bridge devices. */
+ if ((acpidev_options & ACPIDEV_OUSER_NO_PCI) == 0) {
+ /*
+ * Register pci/pciex class drivers onto
+ * the acpidev_class_list_device class list because ACPI
+ * module class driver uses that list.
+ */
+ if (ACPI_FAILURE(acpidev_register_class(
+ &acpidev_class_list_device, &acpidev_class_pci,
+ B_FALSE))) {
+ goto error_device_pci;
+ }
+
+ /*
+ * Register pci/pciex class drivers onto the
+ * acpidev_class_list_scope class list.
+ */
+ if (ACPI_FAILURE(acpidev_register_class(
+ &acpidev_class_list_scope, &acpidev_class_pci,
+ B_FALSE))) {
+ goto error_scope_pci;
+ }
+
+ *fp |= ACPI_DEVCFG_PCI;
+ }
+
/* Check blacklist and load platform specific modules. */
rc = acpidev_load_plat_modules();
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to check blacklist "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to check blacklist "
"or load pratform modules.");
goto error_plat;
}
@@ -278,6 +314,16 @@ acpidev_class_list_init(uint64_t *fp)
return (AE_OK);
error_plat:
+ if ((acpidev_options & ACPIDEV_OUSER_NO_PCI) == 0) {
+ (void) acpidev_unregister_class(&acpidev_class_list_scope,
+ &acpidev_class_pci);
+ }
+error_scope_pci:
+ if ((acpidev_options & ACPIDEV_OUSER_NO_PCI) == 0) {
+ (void) acpidev_unregister_class(&acpidev_class_list_device,
+ &acpidev_class_pci);
+ }
+error_device_pci:
if ((acpidev_options & ACPIDEV_OUSER_NO_MEM) == 0) {
(void) acpidev_unregister_class(&acpidev_class_list_device,
&acpidev_class_memory);
@@ -313,7 +359,7 @@ error_root_device:
&acpidev_class_scope);
error_out:
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to register built-in class drivers.");
+ "!acpidev: failed to register built-in class drivers.");
*fp = 0;
return (AE_ERROR);
@@ -353,7 +399,7 @@ acpidev_create_root_node(void)
(pnode_t)DEVI_SID_NODEID, &dip);
if (rv != NDI_SUCCESS) {
ndi_devi_exit(ddi_root_node(), circ);
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to create device node "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create device node "
"for ACPI root with errcode %d.", rv);
return (AE_ERROR);
}
@@ -362,7 +408,7 @@ acpidev_create_root_node(void)
if (ACPI_FAILURE(acpica_tag_devinfo(dip, ACPI_ROOT_OBJECT))) {
(void) ddi_remove_child(dip, 0);
ndi_devi_exit(ddi_root_node(), circ);
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to tag object %s.",
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to tag object %s.",
ACPIDEV_OBJECT_NAME_SB);
return (AE_ERROR);
}
@@ -376,7 +422,7 @@ acpidev_create_root_node(void)
}
if (rv != DDI_SUCCESS) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to set device property for /devices/%s.",
+ "!acpidev: failed to set device property for /devices/%s.",
ACPIDEV_NODE_NAME_ROOT);
goto error_out;
}
@@ -384,7 +430,7 @@ acpidev_create_root_node(void)
/* Manually create an object handle for the root node */
objhdl = acpidev_data_create_handle(ACPI_ROOT_OBJECT);
if (objhdl == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to create object "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create object "
"handle for the root node.");
goto error_out;
}
@@ -425,7 +471,7 @@ acpidev_initialize(void)
return;
} else if (acpidev_status != ACPIDEV_STATUS_UNKNOWN) {
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: initialization called more than once.");
+ "!acpidev: initialization called more than once.");
return;
}
@@ -476,6 +522,18 @@ acpidev_initialize(void)
acpidev_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(),
DDI_PROP_DONTPASS, "acpidev-options", acpidev_options);
+ /* Check whether ACPI based DR has been disabled by user. */
+ rc = ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
+ DDI_PROP_DONTPASS, "acpidev-dr", &str);
+ if (rc == DDI_SUCCESS) {
+ if (strcasecmp(str, "off") == 0 || strcasecmp(str, "no") == 0) {
+ cmn_err(CE_CONT, "?acpidev: ACPI based DR has been "
+ "disabled by user.\n");
+ acpidev_dr_enable = 0;
+ }
+ ddi_prop_free(str);
+ }
+
/* Register all device class drivers. */
if (ACPI_FAILURE(acpidev_class_list_init(&features))) {
cmn_err(CE_WARN,
@@ -518,7 +576,7 @@ acpidev_boot_probe_device(acpidev_op_type_t op_type)
infop = acpidev_alloc_walk_info(op_type, 0, ACPI_ROOT_OBJECT,
&acpidev_class_list_root, NULL);
if (infop == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to allocate walk info "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to allocate walk info "
"object in acpi_boot_probe_device().");
return (AE_ERROR);
}
@@ -558,9 +616,18 @@ acpidev_boot_probe(int type)
if (type == 0 && acpidev_status == ACPIDEV_STATUS_INITIALIZED) {
rc = acpidev_boot_probe_device(ACPIDEV_OP_BOOT_PROBE);
if (ACPI_SUCCESS(rc)) {
+ /*
+ * Support of DR operations will be disabled
+ * if failed to initialize DR subsystem.
+ */
+ rc = acpidev_dr_initialize(acpidev_root_dip);
+ if (ACPI_FAILURE(rc) && rc != AE_SUPPORT) {
+ cmn_err(CE_CONT, "?acpidev: failed to "
+ "initialize DR subsystem.");
+ }
acpidev_status = ACPIDEV_STATUS_FIRST_PASS;
} else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to probe ACPI "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to probe ACPI "
"devices during boot.");
acpidev_status = ACPIDEV_STATUS_FAILED;
}
@@ -569,7 +636,7 @@ acpidev_boot_probe(int type)
if (ACPI_SUCCESS(rc)) {
acpidev_status = ACPIDEV_STATUS_READY;
} else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to reprobe "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to reprobe "
"ACPI devices during boot.");
acpidev_status = ACPIDEV_STATUS_FAILED;
}
@@ -598,12 +665,12 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
ASSERT(infop != NULL);
if (infop == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: infop is NULL in acpidev_probe_child().");
+ "!acpidev: infop is NULL in acpidev_probe_child().");
return (AE_BAD_PARAMETER);
}
ASSERT(infop->awi_level < ACPIDEV_MAX_ENUM_LEVELS - 1);
if (infop->awi_level >= ACPIDEV_MAX_ENUM_LEVELS - 1) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: recursive level is too deep "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: recursive level is too deep "
"in acpidev_probe_child().");
return (AE_BAD_PARAMETER);
}
@@ -615,14 +682,14 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
if (infop->awi_class_list == NULL || infop->awi_hdl == NULL ||
infop->awi_info == NULL || infop->awi_name == NULL ||
infop->awi_data == NULL) {
- ACPIDEV_DEBUG(CE_WARN,
- "acpidev: infop has NULL fields in acpidev_probe_child().");
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: infop has NULL fields in "
+ "acpidev_probe_child().");
return (AE_BAD_PARAMETER);
}
pdip = acpidev_walk_info_get_pdip(infop);
if (pdip == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: pdip is NULL in acpidev_probe_child().");
+ "!acpidev: pdip is NULL in acpidev_probe_child().");
return (AE_BAD_PARAMETER);
}
@@ -636,7 +703,7 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
}
infop->awi_class_curr = it->acl_class;
if (ACPI_FAILURE(it->acl_class->adc_pre_probe(infop))) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to pre-probe "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to pre-probe "
"device of type %s under %s.",
it->acl_class->adc_class_name, infop->awi_name);
}
@@ -653,11 +720,17 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
continue;
}
+ /* It's another hotplug-capable board, skip it. */
+ if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE &&
+ acpidev_dr_device_is_board(child)) {
+ continue;
+ }
+
/* Allocate the walk info structure. */
cinfop = acpidev_alloc_walk_info(infop->awi_op_type,
infop->awi_level + 1, child, NULL, infop);
if (cinfop == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to allocate "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to allocate "
"walk info child object of %s.",
infop->awi_name);
/* Mark error and continue to handle next child. */
@@ -673,12 +746,6 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
datap = cinfop->awi_data;
if (cinfop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
datap->aod_class_list = infop->awi_class_list;
- } else if (datap->aod_class_list != infop->awi_class_list) {
- ACPIDEV_DEBUG(CE_WARN,
- "acpidev: class list for %s has been changed",
- infop->awi_name);
- acpidev_free_walk_info(cinfop);
- continue;
}
/* Call registered process callbacks. */
@@ -691,7 +758,7 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
res = it->acl_class->adc_probe(cinfop);
if (ACPI_FAILURE(res)) {
rc = res;
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"process object of type %s under %s.",
it->acl_class->adc_class_name,
infop->awi_name);
@@ -709,7 +776,7 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
}
infop->awi_class_curr = it->acl_class;
if (ACPI_FAILURE(it->acl_class->adc_post_probe(infop))) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to post-probe "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to post-probe "
"device of type %s under %s.",
it->acl_class->adc_class_name, infop->awi_name);
}
@@ -737,7 +804,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
ASSERT(infop != NULL);
if (infop == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: infop is NULL in acpidev_process_object().");
+ "!acpidev: infop is NULL in acpidev_process_object().");
return (AE_BAD_PARAMETER);
}
ASSERT(infop->awi_hdl != NULL);
@@ -751,13 +818,13 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
clsp = infop->awi_class_curr;
if (hdl == NULL || datap == NULL || adip == NULL || clsp == NULL ||
clsp->adc_filter == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: infop has NULL pointer in "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: infop has NULL pointer in "
"acpidev_process_object().");
return (AE_BAD_PARAMETER);
}
pdip = acpidev_walk_info_get_pdip(infop);
if (pdip == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to get pdip for %s "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get pdip for %s "
"in acpidev_process_object().", infop->awi_name);
return (AE_BAD_PARAMETER);
}
@@ -782,7 +849,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
(infop->awi_flags & ACPIDEV_WI_DEVICE_CREATED)) {
ASSERT(infop->awi_dip != NULL);
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: device has already been created for object %s.",
+ "!acpidev: device has already been created for object %s.",
infop->awi_name);
return (AE_ALREADY_EXISTS);
}
@@ -793,9 +860,9 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
* 6.3.1 and 6.5.1.
* present functioning enabled Action
* 0 0 x Do nothing
- * 1 x 0 Create node in OFFLINE and scan child
+ * 1 x 0 Do nothing
* 1 x 1 Create node and scan child
- * x 1 0 Create node in OFFLINE and scan child
+ * x 1 0 Do nothing
* x 1 1 Create node and scan child
*/
if ((datap->aod_iflag & ACPIDEV_ODF_STATUS_VALID) == 0 ||
@@ -807,31 +874,34 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
}
datap->aod_iflag |= ACPIDEV_ODF_STATUS_VALID;
}
- if (!acpidev_check_device_present(datap->aod_status)) {
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: object %s doesn't exist.",
+ if (!acpidev_check_device_enabled(datap->aod_status)) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: object %s doesn't exist.",
infop->awi_name);
- return (AE_NOT_EXIST);
+ /*
+ * Need to scan for hotplug-capable boards even if object
+ * doesn't exist or has been disabled during the first pass.
+ * So just disable creating device node and keep on scanning.
+ */
+ if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
+ flags &= ~ACPIDEV_PROCESS_FLAG_CREATE;
+ } else {
+ return (AE_NOT_EXIST);
+ }
}
ASSERT(infop->awi_data != NULL);
ASSERT(infop->awi_parent != NULL);
ASSERT(infop->awi_parent->awi_data != NULL);
- /* Put device into offline state if parent is in offline state. */
- if (infop->awi_parent->awi_data->aod_iflag &
- ACPIDEV_ODF_DEVINFO_OFFLINE) {
- flags |= ACPIDEV_PROCESS_FLAG_OFFLINE;
- /* Put device into offline state if it's disabled. */
- } else if (!acpidev_check_device_enabled(datap->aod_status)) {
- flags |= ACPIDEV_PROCESS_FLAG_OFFLINE;
- }
- /*
- * Mark current node status as OFFLINE even if a device node will not
- * be created for it. This is needed to handle the case when the current
- * node is SKIPPED (no device node will be created for it), so that all
- * descedants of current nodes could be correctly marked as OFFLINE.
- */
- if (flags & ACPIDEV_PROCESS_FLAG_OFFLINE) {
- infop->awi_data->aod_iflag |= ACPIDEV_ODF_DEVINFO_OFFLINE;
+ if (flags & ACPIDEV_PROCESS_FLAG_CREATE) {
+ mutex_enter(&(DEVI(pdip)->devi_lock));
+ /*
+ * Put the device into offline state if its parent is in
+ * offline state.
+ */
+ if (DEVI_IS_DEVICE_OFFLINE(pdip)) {
+ flags |= ACPIDEV_PROCESS_FLAG_OFFLINE;
+ }
+ mutex_exit(&(DEVI(pdip)->devi_lock));
}
/* Evaluate filtering rules and generate device name. */
@@ -862,7 +932,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
OBP_DEVICETYPE, clsp->adc_dev_type);
if (ret != NDI_SUCCESS) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to set device property for %s.",
+ "!acpidev: failed to set device property for %s.",
infop->awi_name);
(void) ddi_remove_child(dip, 0);
infop->awi_dip = NULL;
@@ -886,7 +956,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
if (clsp->adc_init != NULL &&
ACPI_FAILURE(clsp->adc_init(infop))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to initialize device %s.",
+ "!acpidev: failed to initialize device %s.",
infop->awi_name);
if ((flags & ACPIDEV_PROCESS_FLAG_NOTAG) == 0) {
(void) acpica_untag_devinfo(dip, hdl);
@@ -923,6 +993,11 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
} else {
(void) ndi_devi_bind_driver(dip, 0);
}
+
+ /* Hold reference on branch when hot-adding devices. */
+ if (flags & ACPIDEV_PROCESS_FLAG_HOLDBRANCH) {
+ e_ddi_branch_hold(dip);
+ }
}
/* Free resources */
@@ -942,7 +1017,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
rc = acpidev_probe_child(infop);
if (ACPI_FAILURE(rc)) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to probe subtree of %s.",
+ "!acpidev: failed to probe subtree of %s.",
infop->awi_name);
rc = AE_ERROR;
}
@@ -960,7 +1035,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
case ACPIDEV_FILTER_FAILED:
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to probe device for %s.",
+ "!acpidev: failed to probe device for %s.",
infop->awi_name);
rc = AE_ERROR;
break;
@@ -975,11 +1050,12 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
return (rc);
}
-/*ARGSUSED*/
acpidev_filter_result_t
acpidev_filter_default(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
acpidev_filter_rule_t *afrp, char *devname, int len)
{
+ _NOTE(ARGUNUSED(hdl));
+
ASSERT(afrp != NULL);
ASSERT(devname == NULL || len >= ACPIDEV_MAX_NAMELEN);
if (infop->awi_level < afrp->adf_minlvl ||
@@ -992,8 +1068,7 @@ acpidev_filter_default(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
return (ACPIDEV_FILTER_CONTINUE);
}
if (afrp->adf_replace != NULL && devname != NULL) {
- (void) strncpy(devname, afrp->adf_replace, len - 1);
- devname[len - 1] = 0;
+ (void) strlcpy(devname, afrp->adf_replace, len);
}
return (afrp->adf_retcode);
@@ -1042,7 +1117,7 @@ acpidev_register_class(acpidev_class_list_t **listpp, acpidev_class_t *clsp,
ASSERT(listpp != NULL);
if (listpp == NULL || clsp == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: invalid parameter in acpidev_register_class().");
+ "!acpidev: invalid parameter in acpidev_register_class().");
return (AE_BAD_PARAMETER);
} else if (clsp->adc_version != ACPIDEV_CLASS_REV) {
cmn_err(CE_WARN,
@@ -1092,7 +1167,7 @@ acpidev_unregister_class(acpidev_class_list_t **listpp,
ASSERT(clsp != NULL);
ASSERT(listpp != NULL);
if (listpp == NULL || clsp == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameter "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter "
"in acpidev_unregister_class().");
return (AE_BAD_PARAMETER);
}
@@ -1106,12 +1181,12 @@ acpidev_unregister_class(acpidev_class_list_t **listpp,
}
}
if (temp == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: class %p(%s) doesn't exist "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: class %p(%s) doesn't exist "
"in acpidev_unregister_class().",
(void *)clsp, clsp->adc_class_name);
rc = AE_NOT_FOUND;
} else if (temp->acl_class->adc_refcnt != 0) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: class %p(%s) is still in use "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: class %p(%s) is still in use "
"in acpidev_unregister_class()..",
(void *)clsp, clsp->adc_class_name);
rc = AE_ERROR;
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c
index e4dcbcdd67..4185817dd9 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -31,6 +31,7 @@
#include <sys/acpica.h>
#include <sys/acpidev.h>
#include <sys/acpidev_rsc.h>
+#include <sys/acpidev_dr.h>
#include <sys/acpidev_impl.h>
static ACPI_STATUS acpidev_memory_probe(acpidev_walk_info_t *infop);
@@ -97,7 +98,7 @@ static acpidev_filter_rule_t acpidev_memory_filters[] = {
static ACPI_STATUS
acpidev_memory_probe(acpidev_walk_info_t *infop)
{
- ACPI_STATUS rc;
+ ACPI_STATUS rc = AE_OK;
int flags;
ASSERT(infop != NULL);
@@ -109,19 +110,35 @@ acpidev_memory_probe(acpidev_walk_info_t *infop)
return (AE_OK);
}
- if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u "
+ flags = ACPIDEV_PROCESS_FLAG_SCAN;
+ switch (infop->awi_op_type) {
+ case ACPIDEV_OP_BOOT_PROBE:
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_MEMORY)) {
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE;
+ acpidev_dr_check(infop);
+ }
+ break;
+
+ case ACPIDEV_OP_BOOT_REPROBE:
+ break;
+
+ case ACPIDEV_OP_HOTPLUG_PROBE:
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_MEMORY)) {
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+ ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+ ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+ }
+ break;
+
+ default:
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
"in acpidev_memory_probe.", infop->awi_op_type);
rc = AE_BAD_PARAMETER;
+ break;
+ }
+
+ if (rc == AE_OK) {
+ rc = acpidev_process_object(infop, flags);
}
if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
cmn_err(CE_WARN,
@@ -153,7 +170,6 @@ acpidev_memory_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
return (res);
}
-/*ARGSUSED*/
static ACPI_STATUS
acpidev_memory_init(acpidev_walk_info_t *infop)
{
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_pci.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_pci.c
new file mode 100644
index 0000000000..5f0c6c6c13
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_pci.c
@@ -0,0 +1,172 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_rsc.h>
+#include <sys/acpidev_dr.h>
+#include <sys/acpidev_impl.h>
+
+static ACPI_STATUS acpidev_pci_probe(acpidev_walk_info_t *infop);
+
+/*
+ * Default class driver for PCI/PCIEX Host Bridge devices.
+ */
+acpidev_class_t acpidev_class_pci = {
+ 0, /* adc_refcnt */
+ ACPIDEV_CLASS_REV1, /* adc_version */
+ ACPIDEV_CLASS_ID_PCI, /* adc_class_id */
+ "PCI/PCIex Host Bridge", /* adc_class_name */
+ ACPIDEV_TYPE_PCI, /* adc_dev_type */
+ NULL, /* adc_private */
+ NULL, /* adc_pre_probe */
+ NULL, /* adc_post_probe */
+ acpidev_pci_probe, /* adc_probe */
+ NULL, /* adc_filter */
+ NULL, /* adc_init */
+ NULL, /* adc_fini */
+};
+
+static char *acpidev_pci_device_ids[] = {
+ ACPIDEV_HID_PCIEX_HOSTBRIDGE,
+ ACPIDEV_HID_PCI_HOSTBRIDGE,
+};
+
+static char *acpidev_pciex_device_ids[] = {
+ ACPIDEV_HID_PCIEX_HOSTBRIDGE,
+};
+
+static void
+acpidev_pci_update_status(acpidev_walk_info_t *infop)
+{
+ int status;
+ dev_info_t *dip = NULL;
+ acpidev_data_handle_t dhdl;
+
+ dhdl = infop->awi_data;
+ ASSERT((dhdl->aod_iflag & ACPIDEV_ODF_DEVINFO_CREATED) == 0);
+ ASSERT((dhdl->aod_iflag & ACPIDEV_ODF_DEVINFO_TAGGED) == 0);
+ if ((dhdl->aod_iflag & ACPIDEV_ODF_STATUS_VALID) == 0) {
+ status = acpidev_query_device_status(infop->awi_hdl);
+ dhdl->aod_status = status;
+ dhdl->aod_iflag |= ACPIDEV_ODF_STATUS_VALID;
+ } else {
+ status = dhdl->aod_status;
+ }
+ dhdl->aod_level = infop->awi_level;
+ dhdl->aod_hdl = infop->awi_hdl;
+ dhdl->aod_class = NULL;
+ dhdl->aod_class_list = NULL;
+ if (acpidev_match_device_id(infop->awi_info,
+ ACPIDEV_ARRAY_PARAM(acpidev_pciex_device_ids))) {
+ dhdl->aod_class_id = ACPIDEV_CLASS_ID_PCIEX;
+ } else {
+ dhdl->aod_class_id = ACPIDEV_CLASS_ID_PCI;
+ }
+
+ if (ACPI_FAILURE(acpica_get_devinfo(infop->awi_hdl, &dip))) {
+ dip = NULL;
+ } else {
+ ASSERT(dip != NULL);
+ }
+ if (acpidev_check_device_enabled(status)) {
+ /*
+ * Mark the device as DISABLE if no device node created.
+ * BIOS may hide some special PCI/PCIex buses from OS.
+ */
+ if (dip == NULL) {
+ dhdl->aod_dip = NULL;
+ dhdl->aod_status &= ~ACPI_STA_DEVICE_ENABLED;
+ } else {
+ dhdl->aod_dip = dip;
+ dhdl->aod_iflag |= ACPIDEV_ODF_DEVINFO_CREATED;
+ }
+ } else {
+ ASSERT(dip == NULL);
+ dhdl->aod_dip = NULL;
+ dhdl->aod_status &= ~ACPI_STA_DEVICE_ENABLED;
+ }
+}
+
+static ACPI_STATUS
+acpidev_pci_probe(acpidev_walk_info_t *infop)
+{
+ ACPI_STATUS rc = AE_OK;
+
+ ASSERT(infop != NULL);
+ ASSERT(infop->awi_hdl != NULL);
+ ASSERT(infop->awi_info != NULL);
+ if (infop->awi_info->Type != ACPI_TYPE_DEVICE ||
+ acpidev_match_device_id(infop->awi_info,
+ ACPIDEV_ARRAY_PARAM(acpidev_pci_device_ids)) == B_FALSE) {
+ return (AE_OK);
+ }
+
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_PCI) == 0) {
+ return (AE_OK);
+ }
+
+ if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
+ /*
+ * Check hotplug capability on the first pass.
+ */
+ acpidev_dr_check(infop);
+ } else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
+ /*
+ * Check whether the PCI device enumerator has created device
+ * nodes for PCI/PCIEX host bridges.
+ */
+ acpidev_pci_update_status(infop);
+ } else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
+ /*
+ * No support of PCI/PCIEX host bridge hotplug yet.
+ * It will come in next phase.
+ */
+ cmn_err(CE_WARN,
+ "!acpidev: no support of PCI/PCIEX host bridge hotplug.");
+ /*
+ * Don't block the hot-adding process, just skip it.
+ */
+ rc = AE_OK;
+ } else {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
+ "in acpidev_pci_probe().", infop->awi_op_type);
+ rc = AE_BAD_PARAMETER;
+ }
+ if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
+ cmn_err(CE_CONT, "?acpidev: failed to process PCI/PCIEX host "
+ "bridge object %s.\n", infop->awi_name);
+ } else {
+ rc = AE_OK;
+ }
+
+ return (rc);
+}
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c
index 17dd9c774b..e9664c7614 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -331,7 +331,7 @@ acpidev_resource_insert_dma(acpidev_resource_handle_t rhdl, int dma)
ASSERT(rhdl != NULL);
if (rhdl->acpidev_dma_count >= ACPIDEV_RES_DMA_MAX) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: too many DMA resources, max %u.",
+ "!acpidev: too many DMA resources, max %u.",
ACPIDEV_RES_DMA_MAX);
return (AE_LIMIT);
}
@@ -380,7 +380,7 @@ acpidev_resource_insert_irq(acpidev_resource_handle_t rhdl, int irq)
ASSERT(rhdl != NULL);
if (rhdl->acpidev_irq_count >= ACPIDEV_RES_IRQ_MAX) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: too many IRQ resources, max %u.",
+ "!acpidev: too many IRQ resources, max %u.",
ACPIDEV_RES_IRQ_MAX);
return (AE_LIMIT);
}
@@ -456,7 +456,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
high |= ACPIDEV_REG_MEM_COHERENT_PF;
} else {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: unknown memory caching type %u.",
+ "!acpidev: unknown memory caching type %u.",
addrp->Info.Mem.Caching);
rc = AE_ERROR;
break;
@@ -477,7 +477,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
reg.size_low = addrp->AddressLength & 0xFFFFFFFF;
rc = acpidev_resource_insert_reg(rhdl, &reg);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert regspec into resource handle.");
}
/* Generate 'ranges' for producer. */
@@ -502,7 +502,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
range.size_low = addrp->AddressLength & 0xFFFFFFFF;
rc = acpidev_resource_insert_range(rhdl, &range);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert range into resource handle.");
}
}
@@ -524,7 +524,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
high |= ACPIDEV_REG_IO_RANGE_FULL;
} else {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: unknown IO range type %u.",
+ "!acpidev: unknown IO range type %u.",
addrp->Info.Io.RangeType);
rc = AE_ERROR;
break;
@@ -545,7 +545,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
reg.size_low = addrp->AddressLength & 0xFFFFFFFF;
rc = acpidev_resource_insert_reg(rhdl, &reg);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert regspec into resource handle.");
}
/* Generate 'ranges' for producer. */
@@ -570,7 +570,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
range.size_low = addrp->AddressLength & 0xFFFFFFFF;
rc = acpidev_resource_insert_range(rhdl, &range);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert range into resource handle.");
}
}
@@ -585,7 +585,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
end = addrp->Minimum + addrp->AddressLength;
if (end < addrp->Minimum || end > UINT_MAX) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: bus range "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: bus range "
"in ADDRESS64 is invalid.");
rc = AE_ERROR;
break;
@@ -595,7 +595,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
ASSERT(bus.bus_start <= bus.bus_end);
rc = acpidev_resource_insert_bus(rhdl, &bus);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert bus range into resource handle.");
}
}
@@ -603,7 +603,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
default:
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: unknown resource type %u in ADDRESS64.",
+ "!acpidev: unknown resource type %u in ADDRESS64.",
addrp->ResourceType);
rc = AE_BAD_PARAMETER;
}
@@ -632,7 +632,7 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
case ACPI_RESOURCE_TYPE_GENERIC_REGISTER:
case ACPI_RESOURCE_TYPE_VENDOR:
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: unsupported producer resource type %u, ignored.",
+ "!acpidev: unsupported producer resource type %u, ignored.",
rscp->Type);
break;
@@ -651,13 +651,13 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
range.size_hi = 0;
range.size_low = rscp->Data.Io.AddressLength;
if ((uint64_t)range.child_low + range.size_low > UINT16_MAX) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid IO record, "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid IO record, "
"IO max is out of range.");
rc = AE_ERROR;
} else if (range.size_low != 0) {
rc = acpidev_resource_insert_range(rhdl, &range);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert range into resource handle.");
}
}
@@ -671,16 +671,16 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
ACPI_RESOURCE_ADDRESS64 addr64;
if (rscp->Data.Address.ProducerConsumer != ACPI_PRODUCER) {
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: producer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: producer encountered "
"a CONSUMER resource, ignored.");
} else if (ACPI_FAILURE(AcpiResourceToAddress64(rscp,
&addr64))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to convert resource to ADDR64.");
+ "!acpidev: failed to convert resource to ADDR64.");
} else if (ACPI_FAILURE(rc = acpidev_resource_address64(rhdl,
&addr64))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to handle ADDRESS resource.");
+ "!acpidev: failed to handle ADDRESS resource.");
}
break;
}
@@ -690,7 +690,7 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
ACPI_RESOURCE_ADDRESS64 addr64;
if (rscp->Data.ExtAddress64.ProducerConsumer != ACPI_PRODUCER) {
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: producer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: producer encountered "
"a CONSUMER resource, ignored.");
break;
}
@@ -705,14 +705,14 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
if (ACPI_FAILURE(rc = acpidev_resource_address64(rhdl,
&addr64))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to handle EXTADDRESS resource.");
+ "!acpidev: failed to handle EXTADDRESS resource.");
}
break;
}
case ACPI_RESOURCE_TYPE_START_DEPENDENT:
case ACPI_RESOURCE_TYPE_END_DEPENDENT:
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: producer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: producer encountered "
"START_DEPENDENT or END_DEPENDENT tag, ignored.");
break;
@@ -723,7 +723,7 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
default:
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: unknown ACPI resource type %u, ignored.",
+ "!acpidev: unknown ACPI resource type %u, ignored.",
rscp->Type);
break;
}
@@ -746,7 +746,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
case ACPI_RESOURCE_TYPE_GENERIC_REGISTER:
case ACPI_RESOURCE_TYPE_VENDOR:
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: unsupported consumer resource type %u, ignored.",
+ "!acpidev: unsupported consumer resource type %u, ignored.",
rscp->Type);
break;
@@ -755,7 +755,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
int i;
if (rscp->Data.ExtendedIrq.ProducerConsumer != ACPI_CONSUMER) {
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: consumer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: consumer encountered "
"a PRODUCER resource, ignored.");
break;
}
@@ -764,7 +764,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
rscp->Data.ExtendedIrq.Interrupts[i]))) {
continue;
}
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to insert"
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to insert"
"Extended IRQ into resource handle.");
rc = AE_ERROR;
break;
@@ -781,7 +781,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
rscp->Data.Irq.Interrupts[i]))) {
continue;
}
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to insert"
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to insert"
"IRQ into resource handle.");
rc = AE_ERROR;
break;
@@ -798,7 +798,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
rscp->Data.Dma.Channels[i]))) {
continue;
}
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to insert"
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to insert"
"dma into resource handle.");
rc = AE_ERROR;
break;
@@ -827,13 +827,13 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
reg.phys_mid = 0;
reg.size_hi = 0;
if ((uint64_t)reg.phys_low + reg.size_low > UINT16_MAX) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid IO/FIXEDIO "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid IO/FIXEDIO "
"record, IO max is out of range.");
rc = AE_ERROR;
} else if (reg.size_low != 0) {
rc = acpidev_resource_insert_reg(rhdl, &reg);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert reg into resource handle.");
}
}
@@ -866,13 +866,13 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
reg.size_hi = 0;
if ((uint64_t)reg.phys_low + reg.size_low > UINT32_MAX) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: invalid MEMORY32/FIXEDMEMORY32 record, "
+ "!acpidev: invalid MEMORY32/FIXEDMEMORY32 record, "
"memory max is out of range.");
rc = AE_ERROR;
} else if (reg.size_low != 0) {
rc = acpidev_resource_insert_reg(rhdl, &reg);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert reg into resource handle.");
}
}
@@ -886,16 +886,16 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
ACPI_RESOURCE_ADDRESS64 addr64;
if (rscp->Data.Address.ProducerConsumer != ACPI_CONSUMER) {
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: consumer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: consumer encountered "
"a PRODUCER resource, ignored.");
} else if (ACPI_FAILURE(AcpiResourceToAddress64(rscp,
&addr64))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to convert resource to ADDR64.");
+ "!acpidev: failed to convert resource to ADDR64.");
} else if (ACPI_FAILURE(rc = acpidev_resource_address64(rhdl,
&addr64))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to handle ADDRESS resource.");
+ "!acpidev: failed to handle ADDRESS resource.");
}
break;
}
@@ -905,7 +905,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
ACPI_RESOURCE_ADDRESS64 addr64;
if (rscp->Data.ExtAddress64.ProducerConsumer != ACPI_CONSUMER) {
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: consumer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: consumer encountered "
"a PRODUCER resource, ignored.");
break;
}
@@ -920,14 +920,14 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
if (ACPI_FAILURE(rc = acpidev_resource_address64(rhdl,
&addr64))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to handle EXTADDRESS resource.");
+ "!acpidev: failed to handle EXTADDRESS resource.");
}
break;
}
case ACPI_RESOURCE_TYPE_START_DEPENDENT:
case ACPI_RESOURCE_TYPE_END_DEPENDENT:
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: consumer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: consumer encountered "
"START_DEPENDENT or END_DEPENDENT tag, ignored.");
break;
@@ -938,7 +938,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
default:
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: unknown ACPI resource type %u, ignored.",
+ "!acpidev: unknown ACPI resource type %u, ignored.",
rscp->Type);
break;
}
@@ -959,14 +959,14 @@ acpidev_resource_walk(ACPI_HANDLE hdl, char *method,
ASSERT(rhdlp != NULL);
if (hdl == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: hdl is NULL in acpidev_resource_walk().");
+ "!acpidev: hdl is NULL in acpidev_resource_walk().");
return (AE_BAD_PARAMETER);
} else if (method == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: method is NULL in acpidev_resource_walk().");
+ "!acpidev: method is NULL in acpidev_resource_walk().");
return (AE_BAD_PARAMETER);
} else if (rhdlp == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: resource handle ptr is NULL "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: resource handle ptr is NULL "
"in acpidev_resource_walk().");
return (AE_BAD_PARAMETER);
}
@@ -975,7 +975,7 @@ acpidev_resource_walk(ACPI_HANDLE hdl, char *method,
if (ACPI_FAILURE(AcpiGetHandle(hdl, method, &mhdl))) {
char *objname = acpidev_get_object_name(hdl);
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: method %s doesn't exist under %s",
+ "!acpidev: method %s doesn't exist under %s",
method, objname);
acpidev_free_object_name(objname);
return (AE_NOT_FOUND);
@@ -997,11 +997,9 @@ acpidev_resource_walk(ACPI_HANDLE hdl, char *method,
}
if (ACPI_FAILURE(rc)) {
char *objname = acpidev_get_object_name(hdl);
- ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to walk resource from method %s under %s.",
- method, objname);
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to walk resource from "
+ "method %s under %s.", method, objname);
acpidev_free_object_name(objname);
-
}
return (rc);
@@ -1016,7 +1014,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
ASSERT(infop != NULL);
if (infop == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameter "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter "
"in acpidev_resource_process().");
return (AE_BAD_PARAMETER);
}
@@ -1027,7 +1025,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
consumer, &rhdl);
if (ACPI_FAILURE(rc)) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to walk ACPI resources of %s(%s).",
+ "!acpidev: failed to walk ACPI resources of %s(%s).",
path, infop->awi_name);
return (rc);
}
@@ -1041,7 +1039,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
"reg", (int *)rhdl->acpidev_regp,
rhdl->acpidev_reg_count * sizeof (acpidev_regspec_t) /
sizeof (int)) != NDI_SUCCESS) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
"'reg' property for %s.", path);
rc = AE_ERROR;
goto out;
@@ -1051,7 +1049,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
"assigned-addresses", (int *)rhdl->acpidev_regp,
rhdl->acpidev_reg_count * sizeof (acpidev_regspec_t) /
sizeof (int)) != NDI_SUCCESS) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
"'assigned-addresses' property for %s.", path);
rc = AE_ERROR;
goto out;
@@ -1062,7 +1060,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
ndi_prop_update_int_array(DDI_DEV_T_NONE, infop->awi_dip,
"interrupts", (int *)rhdl->acpidev_irqp,
rhdl->acpidev_irq_count) != NDI_SUCCESS) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
"'interrupts' property for %s.", path);
rc = AE_ERROR;
goto out;
@@ -1073,7 +1071,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
ndi_prop_update_int_array(DDI_DEV_T_NONE, infop->awi_dip,
"dma-channels", (int *)rhdl->acpidev_dmap,
rhdl->acpidev_dma_count) != NDI_SUCCESS) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
"'dma-channels' property for %s.", path);
rc = AE_ERROR;
goto out;
@@ -1088,7 +1086,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
"ranges", (int *)rhdl->acpidev_rangep,
rhdl->acpidev_range_count * sizeof (acpidev_ranges_t) /
sizeof (int)) != NDI_SUCCESS) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
"'ranges' property for %s.", path);
rc = AE_ERROR;
goto out;
@@ -1100,7 +1098,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
"bus-range", (int *)rhdl->acpidev_busp,
rhdl->acpidev_bus_count * sizeof (acpidev_bus_range_t) /
sizeof (int)) != NDI_SUCCESS) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
"'bus-range' property for %s.", path);
rc = AE_ERROR;
goto out;
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c
index 144cec2dbc..e19a6e92e4 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -102,7 +102,7 @@ static acpidev_filter_rule_t acpidev_scope_filters[] = {
static ACPI_STATUS
acpidev_scope_probe(acpidev_walk_info_t *infop)
{
- ACPI_STATUS rc;
+ ACPI_STATUS rc = AE_OK;
int flags;
ASSERT(infop != NULL);
@@ -112,19 +112,29 @@ acpidev_scope_probe(acpidev_walk_info_t *infop)
return (AE_OK);
}
- if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN;
- rc = acpidev_process_object(infop, flags);
- } else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u "
+ flags = ACPIDEV_PROCESS_FLAG_SCAN;
+ switch (infop->awi_op_type) {
+ case ACPIDEV_OP_BOOT_PROBE:
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE;
+ break;
+
+ case ACPIDEV_OP_BOOT_REPROBE:
+ break;
+
+ case ACPIDEV_OP_HOTPLUG_PROBE:
+ flags |= ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+ ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+ break;
+
+ default:
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
"in acpidev_scope_probe().", infop->awi_op_type);
rc = AE_BAD_PARAMETER;
+ break;
+ }
+
+ if (rc == AE_OK) {
+ rc = acpidev_process_object(infop, flags);
}
if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
cmn_err(CE_WARN,
@@ -150,7 +160,7 @@ acpidev_scope_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
ACPIDEV_ARRAY_PARAM(acpidev_scope_filters),
devname, maxlen);
} else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
"in acpidev_scope_filter().", infop->awi_op_type);
res = ACPIDEV_FILTER_FAILED;
}
@@ -158,7 +168,6 @@ acpidev_scope_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
return (res);
}
-/*ARGSUSED*/
static ACPI_STATUS
acpidev_scope_init(acpidev_walk_info_t *infop)
{
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c
index 869a935f0d..1b1a0cf3ad 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c
@@ -24,12 +24,13 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
#include <sys/types.h>
#include <sys/cmn_err.h>
+#include <sys/note.h>
#include <sys/sysmacros.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
@@ -55,7 +56,7 @@ acpidev_query_device_status(ACPI_HANDLE hdl)
ASSERT(hdl != NULL);
if (hdl == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: hdl is NULL in acpidev_query_device_status().");
+ "!acpidev: hdl is NULL in acpidev_query_device_status().");
return (0);
}
@@ -114,7 +115,7 @@ acpidev_match_device_id(ACPI_DEVICE_INFO *infop, char **ids, int count)
if (count == 0) {
return (B_TRUE);
} else if (infop == NULL || ids == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameters in "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters in "
"acpidev_match_device_id().");
return (B_FALSE);
}
@@ -206,7 +207,7 @@ acpidev_get_device_by_id(ACPI_HANDLE hdl, char **ids, int count,
ASSERT(userfunc != NULL);
if (hdl == NULL || userfunc == NULL || (ids == NULL && count != 0)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameters "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters "
"in acpidev_get_device_by_id().");
return (AE_BAD_PARAMETER);
}
@@ -237,7 +238,7 @@ acpidev_walk_apic(ACPI_BUFFER *bufp, ACPI_HANDLE hdl, char *method,
ASSERT(func != NULL);
if (func == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: invalid parameters for acpidev_walk_apic().");
+ "!acpidev: invalid parameters for acpidev_walk_apic().");
return (AE_BAD_PARAMETER);
}
@@ -361,7 +362,7 @@ acpidev_alloc_walk_info(acpidev_op_type_t op_type, int lvl, ACPI_HANDLE hdl,
datap->aod_level = lvl;
datap->aod_hdl = hdl;
} else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to create object "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create object "
"handle for %s in acpidev_alloc_walk_info().",
infop->awi_name);
AcpiOsFree(infop->awi_info);
@@ -416,12 +417,17 @@ acpidev_walk_info_get_pdip(acpidev_walk_info_t *infop)
* Called to release resources when the corresponding object is going
* to be destroyed.
*/
-/*ARGSUSED*/
static void
-acpidev_get_object_handler(ACPI_HANDLE hdl, void *data)
+acpidev_free_object_handler(ACPI_HANDLE hdl, void *data)
{
+ _NOTE(ARGUNUSED(hdl));
+
acpidev_data_handle_t objhdl = data;
+ if (objhdl->aod_class != NULL) {
+ atomic_dec_32(&objhdl->aod_class->adc_refcnt);
+ objhdl->aod_class = NULL;
+ }
kmem_free(objhdl, sizeof (acpidev_data_handle_t));
}
@@ -431,7 +437,7 @@ acpidev_data_get_handle(ACPI_HANDLE hdl)
void *ptr;
acpidev_data_handle_t objhdl = NULL;
- if (ACPI_SUCCESS(AcpiGetData(hdl, acpidev_get_object_handler, &ptr))) {
+ if (ACPI_SUCCESS(AcpiGetData(hdl, acpidev_free_object_handler, &ptr))) {
objhdl = (acpidev_data_handle_t)ptr;
}
@@ -444,7 +450,12 @@ acpidev_data_create_handle(ACPI_HANDLE hdl)
acpidev_data_handle_t objhdl;
objhdl = kmem_zalloc(sizeof (*objhdl), KM_SLEEP);
- if (ACPI_FAILURE(AcpiAttachData(hdl, acpidev_get_object_handler,
+ objhdl->aod_bdtype = ACPIDEV_INVALID_BOARD;
+ objhdl->aod_bdnum = UINT32_MAX;
+ objhdl->aod_portid = UINT32_MAX;
+ objhdl->aod_class_id = ACPIDEV_CLASS_ID_INVALID;
+
+ if (ACPI_FAILURE(AcpiAttachData(hdl, acpidev_free_object_handler,
(void *)objhdl))) {
cmn_err(CE_WARN,
"!acpidev: failed to attach handle data to object.");
@@ -459,9 +470,15 @@ void
acpidev_data_destroy_handle(ACPI_HANDLE hdl)
{
void *ptr;
+ acpidev_data_handle_t objhdl = NULL;
- if (ACPI_SUCCESS(AcpiGetData(hdl, acpidev_get_object_handler, &ptr)) &&
- ACPI_SUCCESS(AcpiDetachData(hdl, acpidev_get_object_handler))) {
+ if (ACPI_SUCCESS(AcpiGetData(hdl, acpidev_free_object_handler, &ptr)) &&
+ ACPI_SUCCESS(AcpiDetachData(hdl, acpidev_free_object_handler))) {
+ objhdl = ptr;
+ if (objhdl->aod_class != NULL) {
+ atomic_dec_32(&objhdl->aod_class->adc_refcnt);
+ objhdl->aod_class = NULL;
+ }
kmem_free(ptr, sizeof (acpidev_data_handle_t));
}
}
@@ -502,14 +519,14 @@ void
acpidev_data_set_flag(acpidev_data_handle_t hdl, uint32_t flag)
{
ASSERT(hdl != NULL);
- hdl->aod_eflag |= flag;
+ atomic_or_32(&hdl->aod_eflag, flag);
}
void
acpidev_data_clear_flag(acpidev_data_handle_t hdl, uint32_t flag)
{
ASSERT(hdl != NULL);
- hdl->aod_eflag &= ~flag;
+ atomic_and_32(&hdl->aod_eflag, ~flag);
}
uint32_t
@@ -519,6 +536,27 @@ acpidev_data_get_flag(acpidev_data_handle_t hdl, uint32_t flag)
return (hdl->aod_eflag & flag);
}
+boolean_t
+acpidev_data_dr_capable(acpidev_data_handle_t hdl)
+{
+ ASSERT(hdl != NULL);
+ return (hdl->aod_iflag & ACPIDEV_ODF_HOTPLUG_CAPABLE);
+}
+
+boolean_t
+acpidev_data_dr_ready(acpidev_data_handle_t hdl)
+{
+ ASSERT(hdl != NULL);
+ return (hdl->aod_iflag & ACPIDEV_ODF_HOTPLUG_READY);
+}
+
+boolean_t
+acpidev_data_dr_failed(acpidev_data_handle_t hdl)
+{
+ ASSERT(hdl != NULL);
+ return (hdl->aod_iflag & ACPIDEV_ODF_HOTPLUG_FAILED);
+}
+
static char *
acpidev_generate_pseudo_unitaddr(char *uid, acpidev_class_id_t cid,
char *buf, size_t len)
@@ -591,7 +629,7 @@ acpidev_gen_unitaddr(char *uid, char *fmt, char *buf, size_t len)
}
if (cnt != 1 && cnt != 2) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: invalid uid format string '%s'.", fmt);
+ "!acpidev: invalid uid format string '%s'.", fmt);
return (NULL);
}
@@ -605,11 +643,11 @@ acpidev_gen_unitaddr(char *uid, char *fmt, char *buf, size_t len)
*/
if (cnt == 2 && snprintf(buf, len, "%u,%u", id2, id1) >= len) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: generated unitaddr is too long.");
+ "!acpidev: generated unitaddr is too long.");
return (NULL);
} else if (cnt == 1 && snprintf(buf, len, "%u", id1) >= len) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: generated unitaddr is too long.");
+ "!acpidev: generated unitaddr is too long.");
return (NULL);
}
@@ -681,7 +719,7 @@ acpidev_set_unitaddr(acpidev_walk_info_t *infop, char **fmts, size_t nfmt,
if (infop == NULL || infop->awi_dip == NULL ||
infop->awi_info == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: invalid parameters in acpidev_set_unitaddr().");
+ "!acpidev: invalid parameters in acpidev_set_unitaddr().");
return (AE_BAD_PARAMETER);
}
@@ -758,7 +796,7 @@ acpidev_set_compatible(acpidev_walk_info_t *infop, char **compat, int acount)
ASSERT(compat != NULL || acount == 0);
if (infop == NULL || infop->awi_dip == NULL ||
infop->awi_info == NULL || (compat == NULL && acount != 0)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameters "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters "
"in acpidev_set_compatible().");
return (AE_BAD_PARAMETER);
}
@@ -802,3 +840,90 @@ acpidev_set_compatible(acpidev_walk_info_t *infop, char **compat, int acount)
return (AE_OK);
}
+
+/* Evaluate _OST method under object, which is used to support hotplug event. */
+ACPI_STATUS
+acpidev_eval_ost(ACPI_HANDLE hdl, uint32_t code, uint32_t status,
+ char *bufp, size_t len)
+{
+ ACPI_STATUS rc;
+ ACPI_OBJECT args[3];
+ ACPI_OBJECT_LIST arglist;
+
+ args[0].Type = ACPI_TYPE_INTEGER;
+ args[0].Integer.Value = code;
+ args[1].Type = ACPI_TYPE_INTEGER;
+ args[1].Integer.Value = status;
+ args[2].Type = ACPI_TYPE_BUFFER;
+ args[2].Buffer.Pointer = (UINT8 *)bufp;
+ args[2].Buffer.Length = (UINT32)len;
+ if (bufp == NULL || len == 0) {
+ arglist.Count = 2;
+ } else {
+ arglist.Count = 3;
+ }
+ arglist.Pointer = args;
+ rc = AcpiEvaluateObject(hdl, ACPIDEV_METHOD_NAME_OST, &arglist, NULL);
+ if (rc != AE_OK && rc != AE_NOT_FOUND) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to evaluate _OST method, code 0x%x.", rc);
+ }
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpidev_eval_ej0(ACPI_HANDLE hdl)
+{
+ ACPI_STATUS rc;
+ ACPI_OBJECT args[1];
+ ACPI_OBJECT_LIST arglist;
+
+ /*
+ * Quotation from ACPI spec 4.0 section 6.3.3.
+ * Arg0 An Integer containing a device ejection control
+ * 0 Cancel a mark for ejection request (EJ0 will never be called
+ * with this value)
+ * 1 Hot eject or mark for ejection
+ */
+ args[0].Type = ACPI_TYPE_INTEGER;
+ args[0].Integer.Value = 1;
+ arglist.Count = 1;
+ arglist.Pointer = args;
+ rc = AcpiEvaluateObject(hdl, ACPIDEV_METHOD_NAME_EJ0, &arglist, NULL);
+ if (rc != AE_OK) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to evaluate _EJ0 method, code 0x%x.", rc);
+ }
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpidev_eval_pxm(ACPI_HANDLE hdl, uint32_t *idp)
+{
+ int pxmid;
+
+ ASSERT(idp != NULL);
+
+ /*
+ * Try to evaluate ACPI _PXM method to get proximity doamin id.
+ * Quotation from ACPI4.0:
+ * If the Local APIC ID / Local SAPIC ID / Local x2APIC ID of a
+ * dynamically added processor is not present in the System Resource
+ * Affinity Table (SRAT), a _PXM object must exist for the processor's
+ * device or one of its ancestors in the ACPI Namespace.
+ */
+ while (hdl != NULL) {
+ if (ACPI_SUCCESS(acpica_eval_int(hdl,
+ ACPIDEV_METHOD_NAME_PXM, &pxmid))) {
+ *idp = (uint32_t)pxmid;
+ return (AE_OK);
+ }
+ if (ACPI_FAILURE(AcpiGetParent(hdl, &hdl))) {
+ break;
+ }
+ }
+
+ return (AE_NOT_FOUND);
+}
diff --git a/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c b/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c
index d9908aec59..35fb4b6af5 100644
--- a/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c
+++ b/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
/*
@@ -40,6 +40,7 @@
#include <sys/ddi.h>
#include <sys/ddi_impldefs.h>
#include <sys/ddifm.h>
+#include <sys/note.h>
#include <sys/ndifm.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
@@ -175,6 +176,9 @@ _init(void)
return (error);
}
+ /* Initialize event subsystem. */
+ acpinex_event_init();
+
/* Install the module. */
if ((error = mod_install(&modlinkage)) != 0) {
cmn_err(CE_WARN, "acpinex: failed to install module.");
@@ -197,6 +201,9 @@ _fini(void)
return (error);
}
+ /* Shut down event subsystem. */
+ acpinex_event_fini();
+
/* Free the soft state info. */
ddi_soft_state_fini(&acpinex_softstates);
@@ -211,10 +218,11 @@ _info(struct modinfo *modinfop)
return (mod_info(&modlinkage, modinfop));
}
-/* ARGSUSED */
static int
acpinex_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
+ _NOTE(ARGUNUSED(dip));
+
dev_t dev;
int instance;
@@ -268,16 +276,24 @@ acpinex_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
(void) ddi_pathname(devi, softsp->ans_path);
if (ACPI_FAILURE(acpica_get_handle(devi, &softsp->ans_hdl))) {
ACPINEX_DEBUG(CE_WARN,
- "acpinex: failed to get ACPI handle for %s.",
+ "!acpinex: failed to get ACPI handle for %s.",
softsp->ans_path);
ddi_soft_state_free(acpinex_softstates, instance);
return (DDI_FAILURE);
}
mutex_init(&softsp->ans_lock, NULL, MUTEX_DRIVER, NULL);
+ /* Install event handler for child/descendant objects. */
+ if (acpinex_event_scan(softsp, B_TRUE) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "!acpinex: failed to install event handler "
+ "for children of %s.", softsp->ans_path);
+ }
+
/* nothing to suspend/resume here */
(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
"pm-hardware-state", "no-suspend-resume");
+ (void) ddi_prop_update_int(DDI_DEV_T_NONE, devi,
+ DDI_NO_AUTODETACH, 1);
acpinex_fm_init(softsp);
ddi_report_dev(devi);
@@ -301,17 +317,24 @@ acpinex_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
softsp = ddi_get_soft_state(acpinex_softstates, instance);
if (softsp == NULL) {
- ACPINEX_DEBUG(CE_WARN, "acpinex: failed to get soft state "
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to get soft state "
"object for instance %d in acpinex_detach()", instance);
return (DDI_FAILURE);
}
switch (cmd) {
case DDI_DETACH:
+ if (acpinex_event_scan(softsp, B_FALSE) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "!acpinex: failed to uninstall event "
+ "handler for children of %s.", softsp->ans_path);
+ return (DDI_FAILURE);
+ }
ddi_remove_minor_node(devi, NULL);
acpinex_fm_fini(softsp);
mutex_destroy(&softsp->ans_lock);
ddi_soft_state_free(acpinex_softstates, instance);
+ (void) ddi_prop_update_int(DDI_DEV_T_NONE, devi,
+ DDI_NO_AUTODETACH, 0);
return (DDI_SUCCESS);
case DDI_SUSPEND:
@@ -332,13 +355,11 @@ name_child(dev_info_t *child, char *name, int namelen)
name[0] = '\0';
if (ddi_prop_lookup_string(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
ACPIDEV_PROP_NAME_UNIT_ADDR, &unitaddr) == DDI_SUCCESS) {
- (void) strncpy(name, unitaddr, namelen - 1);
- name[namelen - 1] = '\0';
+ (void) strlcpy(name, unitaddr, namelen);
ddi_prop_free(unitaddr);
} else {
- ACPINEX_DEBUG(CE_NOTE,
- "acpinex: failed to lookup child unit-address prop for %p.",
- (void *)child);
+ ACPINEX_DEBUG(CE_NOTE, "!acpinex: failed to lookup child "
+ "unit-address prop for %p.", (void *)child);
}
return (DDI_SUCCESS);
@@ -406,21 +427,22 @@ acpinex_bus_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp,
off_t offset, off_t len, caddr_t *vaddrp)
{
ACPINEX_DEBUG(CE_WARN,
- "acpinex: acpinex_bus_map called and it's unimplemented.");
+ "!acpinex: acpinex_bus_map called and it's unimplemented.");
return (DDI_ME_UNIMPLEMENTED);
}
-/* ARGSUSED */
static int
acpinex_open(dev_t *devi, int flags, int otyp, cred_t *credp)
{
+ _NOTE(ARGUNUSED(flags, otyp, credp));
+
minor_t minor, instance;
acpinex_softstate_t *softsp;
minor = getminor(*devi);
instance = ACPINEX_GET_INSTANCE(minor);
if (instance >= ACPINEX_INSTANCE_MAX) {
- ACPINEX_DEBUG(CE_WARN, "acpinex: instance number %d out of "
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: instance number %d out of "
"range in acpinex_open, max %d.",
instance, ACPINEX_INSTANCE_MAX - 1);
return (EINVAL);
@@ -428,7 +450,7 @@ acpinex_open(dev_t *devi, int flags, int otyp, cred_t *credp)
softsp = ddi_get_soft_state(acpinex_softstates, instance);
if (softsp == NULL) {
- ACPINEX_DEBUG(CE_WARN, "acpinex: failed to get soft state "
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to get soft state "
"object for instance %d in acpinex_open().", instance);
return (EINVAL);
}
@@ -437,23 +459,24 @@ acpinex_open(dev_t *devi, int flags, int otyp, cred_t *credp)
return (0);
} else {
ACPINEX_DEBUG(CE_WARN,
- "acpinex: invalid minor number %d in acpinex_open().",
+ "!acpinex: invalid minor number %d in acpinex_open().",
minor);
return (EINVAL);
}
}
-/* ARGSUSED */
static int
acpinex_close(dev_t dev, int flags, int otyp, cred_t *credp)
{
+ _NOTE(ARGUNUSED(flags, otyp, credp));
+
minor_t minor, instance;
acpinex_softstate_t *softsp;
minor = getminor(dev);
instance = ACPINEX_GET_INSTANCE(minor);
if (instance >= ACPINEX_INSTANCE_MAX) {
- ACPINEX_DEBUG(CE_WARN, "acpinex: instance number %d out of "
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: instance number %d out of "
"range in acpinex_close(), max %d.",
instance, ACPINEX_INSTANCE_MAX - 1);
return (EINVAL);
@@ -461,7 +484,7 @@ acpinex_close(dev_t dev, int flags, int otyp, cred_t *credp)
softsp = ddi_get_soft_state(acpinex_softstates, instance);
if (softsp == NULL) {
- ACPINEX_DEBUG(CE_WARN, "acpinex: failed to get soft state "
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to get soft state "
"object for instance %d in acpinex_close().", instance);
return (EINVAL);
}
@@ -470,17 +493,18 @@ acpinex_close(dev_t dev, int flags, int otyp, cred_t *credp)
return (0);
} else {
ACPINEX_DEBUG(CE_WARN,
- "acpinex: invalid minor number %d in acpinex_close().",
+ "!acpinex: invalid minor number %d in acpinex_close().",
minor);
return (EINVAL);
}
}
-/* ARGSUSED */
static int
acpinex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
int *rvalp)
{
+ _NOTE(ARGUNUSED(cmd, arg, mode, credp, rvalp));
+
int rv = 0;
minor_t minor, instance;
acpinex_softstate_t *softsp;
@@ -488,21 +512,21 @@ acpinex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
minor = getminor(dev);
instance = ACPINEX_GET_INSTANCE(minor);
if (instance >= ACPINEX_INSTANCE_MAX) {
- ACPINEX_DEBUG(CE_NOTE, "acpinex: instance number %d out of "
+ ACPINEX_DEBUG(CE_NOTE, "!acpinex: instance number %d out of "
"range in acpinex_ioctl(), max %d.",
instance, ACPINEX_INSTANCE_MAX - 1);
return (EINVAL);
}
softsp = ddi_get_soft_state(acpinex_softstates, instance);
if (softsp == NULL) {
- ACPINEX_DEBUG(CE_WARN, "acpinex: failed to get soft state "
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to get soft state "
"object for instance %d in acpinex_ioctl().", instance);
return (EINVAL);
}
rv = ENOTSUP;
ACPINEX_DEBUG(CE_WARN,
- "acpinex: invalid minor number %d in acpinex_ioctl().", minor);
+ "!acpinex: invalid minor number %d in acpinex_ioctl().", minor);
return (rv);
}
@@ -512,11 +536,12 @@ acpinex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
* Register error handling callback with our parent. We will just call
* our children's error callbacks and return their status.
*/
-/*ARGSUSED*/
static int
acpinex_err_callback(dev_info_t *dip, ddi_fm_error_t *derr,
const void *impl_data)
{
+ _NOTE(ARGUNUSED(impl_data));
+
/* Call our childrens error handlers */
return (ndi_fm_handler_dispatch(dip, NULL, derr));
}
@@ -560,11 +585,12 @@ acpinex_fm_fini(acpinex_softstate_t *softsp)
* Initialize FMA resources for child devices.
* Called when child calls ddi_fm_init().
*/
-/*ARGSUSED*/
static int
acpinex_fm_init_child(dev_info_t *dip, dev_info_t *tdip, int cap,
ddi_iblock_cookie_t *ibc)
{
+ _NOTE(ARGUNUSED(tdip, cap));
+
acpinex_softstate_t *softsp = ddi_get_soft_state(acpinex_softstates,
ddi_get_instance(dip));
diff --git a/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_event.c b/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_event.c
new file mode 100644
index 0000000000..633c99b442
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_event.c
@@ -0,0 +1,712 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/bitmap.h>
+#include <sys/cmn_err.h>
+#include <sys/note.h>
+#include <sys/sunndi.h>
+#include <sys/fastboot_impl.h>
+#include <sys/sysevent.h>
+#include <sys/sysevent/dr.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_dr.h>
+#include <sys/acpinex.h>
+
+int acpinex_event_support_remove = 0;
+
+static volatile uint_t acpinex_dr_event_cnt = 0;
+static ulong_t acpinex_object_type_mask[BT_BITOUL(ACPI_TYPE_NS_NODE_MAX + 1)];
+
+/*
+ * Generate DR_REQ event to syseventd.
+ * Please refer to sys/sysevent/dr.h for message definition.
+ */
+static int
+acpinex_event_generate_event(dev_info_t *dip, ACPI_HANDLE hdl, int req,
+ int event, char *objname)
+{
+ int rv = 0;
+ sysevent_id_t eid;
+ sysevent_value_t evnt_val;
+ sysevent_attr_list_t *evnt_attr_list = NULL;
+ char *attach_pnt;
+ char event_type[32];
+
+ /* Add "attachment point" attribute. */
+ attach_pnt = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+ if (ACPI_FAILURE(acpidev_dr_get_attachment_point(hdl,
+ attach_pnt, MAXPATHLEN))) {
+ cmn_err(CE_WARN,
+ "!acpinex: failed to generate AP name for %s.", objname);
+ kmem_free(attach_pnt, MAXPATHLEN);
+ return (-1);
+ }
+ ASSERT(attach_pnt[0] != '\0');
+ evnt_val.value_type = SE_DATA_TYPE_STRING;
+ evnt_val.value.sv_string = attach_pnt;
+ rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID, &evnt_val, KM_SLEEP);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "!acpinex: failed to add attr [%s] for %s event.",
+ DR_AP_ID, EC_DR);
+ kmem_free(attach_pnt, MAXPATHLEN);
+ return (rv);
+ }
+
+ /* Add "request type" attribute. */
+ evnt_val.value_type = SE_DATA_TYPE_STRING;
+ evnt_val.value.sv_string = SE_REQ2STR(req);
+ rv = sysevent_add_attr(&evnt_attr_list, DR_REQ_TYPE, &evnt_val,
+ KM_SLEEP);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "!acpinex: failed to add attr [%s] for %s event.",
+ DR_REQ_TYPE, EC_DR);
+ sysevent_free_attr(evnt_attr_list);
+ kmem_free(attach_pnt, MAXPATHLEN);
+ return (rv);
+ }
+
+ /* Add "acpi-event-type" attribute. */
+ switch (event) {
+ case ACPI_NOTIFY_BUS_CHECK:
+ (void) snprintf(event_type, sizeof (event_type),
+ ACPIDEV_EVENT_TYPE_BUS_CHECK);
+ break;
+ case ACPI_NOTIFY_DEVICE_CHECK:
+ (void) snprintf(event_type, sizeof (event_type),
+ ACPIDEV_EVENT_TYPE_DEVICE_CHECK);
+ break;
+ case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
+ (void) snprintf(event_type, sizeof (event_type),
+ ACPIDEV_EVENT_TYPE_DEVICE_CHECK_LIGHT);
+ break;
+ case ACPI_NOTIFY_EJECT_REQUEST:
+ (void) snprintf(event_type, sizeof (event_type),
+ ACPIDEV_EVENT_TYPE_EJECT_REQUEST);
+ break;
+ default:
+ cmn_err(CE_WARN,
+ "!acpinex: unknown ACPI event type %d.", event);
+ sysevent_free_attr(evnt_attr_list);
+ kmem_free(attach_pnt, MAXPATHLEN);
+ return (-1);
+ }
+ evnt_val.value_type = SE_DATA_TYPE_STRING;
+ evnt_val.value.sv_string = event_type;
+ rv = sysevent_add_attr(&evnt_attr_list, ACPIDEV_EVENT_TYPE_ATTR_NAME,
+ &evnt_val, KM_SLEEP);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "!acpinex: failed to add attr [%s] for %s event.",
+ ACPIDEV_EVENT_TYPE_ATTR_NAME, EC_DR);
+ sysevent_free_attr(evnt_attr_list);
+ kmem_free(attach_pnt, MAXPATHLEN);
+ return (rv);
+ }
+
+ rv = ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DR, ESC_DR_REQ,
+ evnt_attr_list, &eid, KM_SLEEP);
+ if (rv != DDI_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!acpinex: failed to log DR_REQ event for %s.", objname);
+ rv = -1;
+ }
+
+ nvlist_free(evnt_attr_list);
+ kmem_free(attach_pnt, MAXPATHLEN);
+
+ return (rv);
+}
+
+/*
+ * Event handler for ACPI EJECT_REQUEST notifications.
+ * EJECT_REQUEST notifications should be generated on the device to be ejected,
+ * so no need to scan subtree of it.
+ * It also invokes ACPI _OST method to update event status if call_ost is true.
+ */
+static void
+acpinex_event_handle_eject_request(ACPI_HANDLE hdl, acpinex_softstate_t *sp,
+ boolean_t call_ost)
+{
+ int code;
+ char *objname;
+
+ ASSERT(hdl != NULL);
+ objname = acpidev_get_object_name(hdl);
+
+ ASSERT(sp != NULL);
+ ASSERT(sp->ans_dip != NULL && sp->ans_hdl != NULL);
+ if (sp == NULL || sp->ans_dip == NULL || sp->ans_hdl == NULL) {
+ if (call_ost) {
+ (void) acpidev_eval_ost(hdl, ACPI_NOTIFY_EJECT_REQUEST,
+ ACPI_OST_STA_FAILURE, NULL, 0);
+ }
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: softstate data structure is invalid.");
+ cmn_err(CE_WARN,
+ "!acpinex: failed to handle EJECT_REQUEST event from %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return;
+ }
+
+ if (acpinex_event_support_remove == 0) {
+ cmn_err(CE_WARN,
+ "!acpinex: hot-removing of device %s is unsupported.",
+ objname);
+ code = ACPI_OST_STA_EJECT_NOT_SUPPORT;
+ } else if (acpinex_event_generate_event(sp->ans_dip, hdl,
+ SE_OUTGOING_RES, ACPI_NOTIFY_EJECT_REQUEST, objname) != 0) {
+ cmn_err(CE_WARN, "!acpinex: failed to generate ESC_DR_REQ "
+ "event for device eject request from %s.", objname);
+ code = ACPI_OST_STA_FAILURE;
+ } else {
+ cmn_err(CE_NOTE, "!acpinex: generate ESC_DR_REQ event for "
+ "device eject request from %s.", objname);
+ code = ACPI_OST_STA_EJECT_IN_PROGRESS;
+ }
+ if (call_ost) {
+ (void) acpidev_eval_ost(hdl, ACPI_NOTIFY_EJECT_REQUEST,
+ code, NULL, 0);
+ }
+
+ acpidev_free_object_name(objname);
+}
+
+struct acpinex_event_check_arg {
+ acpinex_softstate_t *softstatep;
+ int event_type;
+ uint32_t device_insert;
+ uint32_t device_remove;
+ uint32_t device_fail;
+};
+
+static ACPI_STATUS
+acpinex_event_handle_check_one(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+ void **retval)
+{
+ _NOTE(ARGUNUSED(lvl, retval));
+
+ char *objname;
+ int status, psta, csta;
+ acpidev_data_handle_t dhdl;
+ struct acpinex_event_check_arg *argp;
+
+ ASSERT(hdl != NULL);
+ ASSERT(ctx != NULL);
+ argp = (struct acpinex_event_check_arg *)ctx;
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ /* Skip subtree if failed to get the data handle. */
+ ACPINEX_DEBUG(CE_NOTE,
+ "!acpinex: failed to get data associated with %p.", hdl);
+ return (AE_CTRL_DEPTH);
+ } else if (!acpidev_data_dr_capable(dhdl)) {
+ return (AE_OK);
+ }
+
+ objname = acpidev_get_object_name(hdl);
+
+ status = 0;
+ /* Query previous device status. */
+ psta = acpidev_data_get_status(dhdl);
+ if (acpidev_check_device_enabled(psta)) {
+ status |= 0x1;
+ }
+ /* Query current device status. */
+ csta = acpidev_query_device_status(hdl);
+ if (acpidev_check_device_enabled(csta)) {
+ status |= 0x2;
+ }
+
+ switch (status) {
+ case 0x0:
+ /*FALLTHROUGH*/
+ case 0x3:
+ /* No status changes, keep on walking. */
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+
+ case 0x1:
+ /* Surprising removal. */
+ cmn_err(CE_WARN,
+ "!acpinex: device %s has been surprisingly removed.",
+ objname);
+ if (argp->event_type == ACPI_NOTIFY_BUS_CHECK) {
+ /*
+ * According to ACPI spec, BUS_CHECK notification
+ * should be triggered for hot-adding events only.
+ */
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: device %s has been surprisingly removed "
+ "when handling BUS_CHECK event.", objname);
+ }
+ acpidev_free_object_name(objname);
+ argp->device_remove++;
+ return (AE_CTRL_DEPTH);
+
+ case 0x2:
+ /* Hot-adding. */
+ ACPINEX_DEBUG(CE_NOTE,
+ "!acpinex: device %s has been inserted.", objname);
+ argp->device_insert++;
+ if (acpinex_event_generate_event(argp->softstatep->ans_dip, hdl,
+ SE_INCOMING_RES, argp->event_type, objname) != 0) {
+ cmn_err(CE_WARN,
+ "!acpinex: failed to generate ESC_DR_REQ event for "
+ "device insert request from %s.", objname);
+ argp->device_fail++;
+ } else {
+ cmn_err(CE_NOTE, "!acpinex: generate ESC_DR_REQ event "
+ "for device insert request from %s.", objname);
+ }
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ return (AE_ERROR);
+}
+
+/*
+ * Event handler for BUS_CHECK/DEVICE_CHECK/DEVICE_CHECK_LIGHT notifications.
+ * These events may be signaled on parent/ancestor of devices to be hot-added,
+ * so need to scan ACPI namespace to figure out devices in question.
+ * It also invokes ACPI _OST method to update event status if call_ost is true.
+ */
+static void
+acpinex_event_handle_check_request(int event, ACPI_HANDLE hdl,
+ acpinex_softstate_t *sp, boolean_t call_ost)
+{
+ ACPI_STATUS rv;
+ int code;
+ char *objname;
+ struct acpinex_event_check_arg arg;
+
+ ASSERT(hdl != NULL);
+ objname = acpidev_get_object_name(hdl);
+
+ ASSERT(sp != NULL);
+ ASSERT(sp->ans_dip != NULL && sp->ans_hdl != NULL);
+ if (sp == NULL || sp->ans_dip == NULL || sp->ans_hdl == NULL) {
+ if (call_ost) {
+ (void) acpidev_eval_ost(hdl, event,
+ ACPI_OST_STA_FAILURE, NULL, 0);
+ }
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: softstate data structure is invalid.");
+ cmn_err(CE_WARN, "!acpinex: failed to handle "
+ "BUS/DEVICE_CHECK event from %s.", objname);
+ acpidev_free_object_name(objname);
+ return;
+ }
+
+ bzero(&arg, sizeof (arg));
+ arg.event_type = event;
+ arg.softstatep = sp;
+ rv = acpinex_event_handle_check_one(hdl, 0, &arg, NULL);
+ if (ACPI_SUCCESS(rv)) {
+ rv = AcpiWalkNamespace(ACPI_TYPE_DEVICE, hdl,
+ ACPIDEV_MAX_ENUM_LEVELS,
+ &acpinex_event_handle_check_one, NULL, &arg, NULL);
+ }
+
+ if (ACPI_FAILURE(rv)) {
+ /* Failed to scan the ACPI namespace. */
+ cmn_err(CE_WARN, "!acpinex: failed to handle event %d from %s.",
+ event, objname);
+ code = ACPI_OST_STA_FAILURE;
+ } else if (arg.device_remove != 0) {
+ /* Surprising removal happened. */
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: some devices have been surprisingly removed.");
+ code = ACPI_OST_STA_NOT_SUPPORT;
+ } else if (arg.device_fail != 0) {
+ /* Failed to handle some devices. */
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: failed to check status of some devices.");
+ code = ACPI_OST_STA_FAILURE;
+ } else if (arg.device_insert == 0) {
+ /* No hot-added devices found. */
+ cmn_err(CE_WARN,
+ "!acpinex: no hot-added devices under %s found.", objname);
+ code = ACPI_OST_STA_FAILURE;
+ } else {
+ code = ACPI_OST_STA_INSERT_IN_PROGRESS;
+ }
+ if (call_ost) {
+ (void) acpidev_eval_ost(hdl, event, code, NULL, 0);
+ }
+
+ acpidev_free_object_name(objname);
+}
+
+static void
+acpinex_event_system_handler(ACPI_HANDLE hdl, UINT32 type, void *arg)
+{
+ acpinex_softstate_t *sp;
+
+ ASSERT(hdl != NULL);
+ ASSERT(arg != NULL);
+ sp = (acpinex_softstate_t *)arg;
+
+ acpidev_dr_lock_all();
+ mutex_enter(&sp->ans_lock);
+
+ switch (type) {
+ case ACPI_NOTIFY_BUS_CHECK:
+ /*
+ * Bus Check. This notification is performed on a device object
+ * to indicate to OSPM that it needs to perform the Plug and
+ * Play re-enumeration operation on the device tree starting
+ * from the point where it has been notified. OSPM will only
+ * perform this operation at boot, and when notified. It is
+ * the responsibility of the ACPI AML code to notify OSPM at
+ * any other times that this operation is required. The more
+ * accurately and closer to the actual device tree change the
+ * notification can be done, the more efficient the operating
+ * system response will be; however, it can also be an issue
+ * when a device change cannot be confirmed. For example, if
+ * the hardware cannot notice a device change for a particular
+ * location during a system sleeping state, it issues a Bus
+ * Check notification on wake to inform OSPM that it needs to
+ * check the configuration for a device change.
+ */
+ /*FALLTHROUGH*/
+ case ACPI_NOTIFY_DEVICE_CHECK:
+ /*
+ * Device Check. Used to notify OSPM that the device either
+ * appeared or disappeared. If the device has appeared, OSPM
+ * will re-enumerate from the parent. If the device has
+ * disappeared, OSPM will invalidate the state of the device.
+ * OSPM may optimize out re-enumeration. If _DCK is present,
+ * then Notify(object,1) is assumed to indicate an undock
+ * request.
+ */
+ /*FALLTHROUGH*/
+ case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
+ /*
+ * Device Check Light. Used to notify OSPM that the device
+ * either appeared or disappeared. If the device has appeared,
+ * OSPM will re-enumerate from the device itself, not the
+ * parent. If the device has disappeared, OSPM will invalidate
+ * the state of the device.
+ */
+ atomic_inc_uint(&acpinex_dr_event_cnt);
+ acpinex_event_handle_check_request(type, hdl, sp, B_TRUE);
+ break;
+
+ case ACPI_NOTIFY_EJECT_REQUEST:
+ /*
+ * Eject Request. Used to notify OSPM that the device should
+ * be ejected, and that OSPM needs to perform the Plug and Play
+ * ejection operation. OSPM will run the _EJx method.
+ */
+ atomic_inc_uint(&acpinex_dr_event_cnt);
+ acpinex_event_handle_eject_request(hdl, sp, B_TRUE);
+ break;
+
+ default:
+ ACPINEX_DEBUG(CE_NOTE,
+ "!acpinex: unhandled event(%d) on hdl %p under %s.",
+ type, hdl, sp->ans_path);
+ (void) acpidev_eval_ost(hdl, type, ACPI_OST_STA_NOT_SUPPORT,
+ NULL, 0);
+ break;
+ }
+
+ if (acpinex_dr_event_cnt != 0) {
+ /*
+ * Disable fast reboot if a CPU/MEM/IOH hotplug event happens.
+ * Note: this is a temporary solution and will be revised when
+ * fast reboot can support CPU/MEM/IOH DR operations in the
+ * future.
+ *
+ * ACPI BIOS generates some static ACPI tables, such as MADT,
+ * SRAT and SLIT, to describe the system hardware configuration
+ * on power-on. When a CPU/MEM/IOH hotplug event happens, those
+ * static tables won't be updated and will become stale.
+ *
+ * If we reset the system by fast reboot, BIOS will have no
+ * chance to regenerate those staled static tables. Fast reboot
+ * can't tolerate such inconsistency between staled ACPI tables
+ * and real hardware configuration yet.
+ *
+ * A temporary solution is introduced to disable fast reboot if
+ * CPU/MEM/IOH hotplug event happens. This solution should be
+ * revised when fast reboot is enhanced to support CPU/MEM/IOH
+ * DR operations.
+ */
+ fastreboot_disable(FBNS_HOTPLUG);
+ }
+
+ mutex_exit(&sp->ans_lock);
+ acpidev_dr_unlock_all();
+}
+
+/*
+ * Install event handler for ACPI system events.
+ * Acpinex driver handles ACPI system events for its children,
+ * device specific events will be handled by device drivers.
+ * Return DDI_SUCCESS on success, and DDI_FAILURE on failure.
+ */
+static int
+acpinex_event_install_handler(ACPI_HANDLE hdl, void *arg,
+ ACPI_DEVICE_INFO *infop, acpidev_data_handle_t dhdl)
+{
+ int rc = DDI_SUCCESS;
+
+ ASSERT(hdl != NULL);
+ ASSERT(dhdl != NULL);
+ ASSERT(infop != NULL);
+
+ /*
+ * Check whether the event handler has already been installed on the
+ * device object. With the introduction of ACPI Alias objects, which are
+ * similar to symlinks in file systems, there may be multiple name
+ * objects in the ACPI namespace pointing to the same underlying device
+ * object. Those Alias objects need to be filtered out, otherwise
+ * it will attempt to install the event handler multiple times on the
+ * same device object which will fail.
+ */
+ if (acpidev_data_get_flag(dhdl, ACPIDEV_DATA_HANDLER_READY)) {
+ return (DDI_SUCCESS);
+ }
+ if (ACPI_SUCCESS(AcpiInstallNotifyHandler(hdl, ACPI_SYSTEM_NOTIFY,
+ acpinex_event_system_handler, arg))) {
+ acpidev_data_set_flag(dhdl, ACPIDEV_DATA_HANDLER_READY);
+ } else {
+ char *objname;
+
+ objname = acpidev_get_object_name(hdl);
+ cmn_err(CE_WARN,
+ "!acpinex: failed to install system event handler for %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ rc = DDI_FAILURE;
+ }
+
+ return (rc);
+}
+
+/*
+ * Uninstall event handler for ACPI system events.
+ * Return DDI_SUCCESS on success, and DDI_FAILURE on failure.
+ */
+static int
+acpinex_event_uninstall_handler(ACPI_HANDLE hdl, ACPI_DEVICE_INFO *infop,
+ acpidev_data_handle_t dhdl)
+{
+ ASSERT(hdl != NULL);
+ ASSERT(dhdl != NULL);
+ ASSERT(infop != NULL);
+
+ if (!acpidev_data_get_flag(dhdl, ACPIDEV_DATA_HANDLER_READY)) {
+ return (DDI_SUCCESS);
+ }
+ if (ACPI_SUCCESS(AcpiRemoveNotifyHandler(hdl, ACPI_SYSTEM_NOTIFY,
+ acpinex_event_system_handler))) {
+ acpidev_data_clear_flag(dhdl, ACPIDEV_DATA_HANDLER_READY);
+ } else {
+ char *objname;
+
+ objname = acpidev_get_object_name(hdl);
+ cmn_err(CE_WARN, "!acpinex: failed to uninstall system event "
+ "handler for %s.", objname);
+ acpidev_free_object_name(objname);
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * Install/uninstall ACPI system event handler for child objects of hdl.
+ * Return DDI_SUCCESS on success, and DDI_FAILURE on failure.
+ */
+static int
+acpinex_event_walk(boolean_t init, acpinex_softstate_t *sp, ACPI_HANDLE hdl)
+{
+ int rc;
+ int retval = DDI_SUCCESS;
+ dev_info_t *dip;
+ ACPI_HANDLE child = NULL;
+ ACPI_OBJECT_TYPE type;
+ ACPI_DEVICE_INFO *infop;
+ acpidev_data_handle_t dhdl;
+
+ /* Walk all child objects. */
+ ASSERT(hdl != NULL);
+ while (ACPI_SUCCESS(AcpiGetNextObject(ACPI_TYPE_ANY, hdl, child,
+ &child))) {
+ /* Skip unwanted object types. */
+ if (ACPI_FAILURE(AcpiGetType(child, &type)) ||
+ type > ACPI_TYPE_NS_NODE_MAX ||
+ BT_TEST(acpinex_object_type_mask, type) == 0) {
+ continue;
+ }
+
+ /* Get data associated with the object. Skip it if fails. */
+ dhdl = acpidev_data_get_handle(child);
+ if (dhdl == NULL) {
+ ACPINEX_DEBUG(CE_NOTE, "!acpinex: failed to get data "
+ "associated with %p, skip.", child);
+ continue;
+ }
+
+ /* Query ACPI object info for the object. */
+ if (ACPI_FAILURE(AcpiGetObjectInfo(child, &infop))) {
+ cmn_err(CE_WARN,
+ "!acpidnex: failed to get object info for %p.",
+ child);
+ continue;
+ }
+
+ if (init) {
+ rc = acpinex_event_install_handler(child, sp, infop,
+ dhdl);
+ if (rc != DDI_SUCCESS) {
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to "
+ "install handler for child %p of %s.",
+ child, sp->ans_path);
+ retval = DDI_FAILURE;
+ /*
+ * Try to handle descendants if both of the
+ * following two conditions are true:
+ * 1) Device corresponding to the current object is
+ * enabled. If the device is absent/disabled,
+ * no notification should be generated from
+ * descendant objects of it.
+ * 2) No Solaris device node has been created for the
+ * current object yet. If the device node has been
+ * created for the current object, notification
+ * events from child objects should be handled by
+ * the corresponding driver.
+ */
+ } else if (acpidev_check_device_enabled(
+ acpidev_data_get_status(dhdl)) &&
+ ACPI_FAILURE(acpica_get_devinfo(child, &dip))) {
+ rc = acpinex_event_walk(B_TRUE, sp, child);
+ if (rc != DDI_SUCCESS) {
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: failed to install "
+ "handler for descendants of %s.",
+ sp->ans_path);
+ retval = DDI_FAILURE;
+ }
+ }
+ } else {
+ rc = DDI_SUCCESS;
+ /* Uninstall handler for descendants if needed. */
+ if (ACPI_FAILURE(acpica_get_devinfo(child, &dip))) {
+ rc = acpinex_event_walk(B_FALSE, sp, child);
+ }
+ if (rc == DDI_SUCCESS) {
+ rc = acpinex_event_uninstall_handler(child,
+ infop, dhdl);
+ }
+ /* Undo will be done by caller in case of failure. */
+ if (rc != DDI_SUCCESS) {
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to "
+ "uninstall handler for descendants of %s.",
+ sp->ans_path);
+ AcpiOsFree(infop);
+ retval = DDI_FAILURE;
+ break;
+ }
+ }
+
+ /* Release cached resources. */
+ AcpiOsFree(infop);
+ }
+
+ return (retval);
+}
+
+int
+acpinex_event_scan(acpinex_softstate_t *sp, boolean_t init)
+{
+ int rc;
+
+ ASSERT(sp != NULL);
+ ASSERT(sp->ans_hdl != NULL);
+ ASSERT(sp->ans_dip != NULL);
+ if (sp == NULL || sp->ans_hdl == NULL || sp->ans_dip == NULL) {
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: invalid parameter to acpinex_event_scan().");
+ return (DDI_FAILURE);
+ }
+
+ /* Lock current device node and walk all child device nodes of it. */
+ mutex_enter(&sp->ans_lock);
+
+ rc = acpinex_event_walk(init, sp, sp->ans_hdl);
+ if (rc != DDI_SUCCESS) {
+ if (init) {
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to "
+ "configure child objects of %s.", sp->ans_path);
+ rc = DDI_FAILURE;
+ } else {
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to "
+ "unconfigure child objects of %s.", sp->ans_path);
+ /* Undo in case of errors */
+ (void) acpinex_event_walk(B_TRUE, sp, sp->ans_hdl);
+ rc = DDI_FAILURE;
+ }
+ }
+
+ mutex_exit(&sp->ans_lock);
+
+ return (rc);
+}
+
+void
+acpinex_event_init(void)
+{
+ /*
+ * According to ACPI specifications, notification is only supported on
+ * Device, Processor and ThermalZone. Currently we only need to handle
+ * Device and Processor objects.
+ */
+ BT_SET(acpinex_object_type_mask, ACPI_TYPE_PROCESSOR);
+ BT_SET(acpinex_object_type_mask, ACPI_TYPE_DEVICE);
+}
+
+void
+acpinex_event_fini(void)
+{
+ bzero(acpinex_object_type_mask, sizeof (acpinex_object_type_mask));
+}
diff --git a/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.c b/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.c
new file mode 100644
index 0000000000..35ad4353aa
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.c
@@ -0,0 +1,2927 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/note.h>
+#include <sys/dditypes.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/varargs.h>
+#include <sys/modctl.h>
+#include <sys/kmem.h>
+#include <sys/cpuvar.h>
+#include <sys/cpupart.h>
+#include <sys/mem_config.h>
+#include <sys/mem_cage.h>
+#include <sys/memnode.h>
+#include <sys/callb.h>
+#include <sys/ontrap.h>
+#include <sys/obpdefs.h>
+#include <sys/promif.h>
+#include <sys/synch.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+#include <sys/x_call.h>
+#include <sys/x86_archext.h>
+#include <sys/fastboot_impl.h>
+#include <sys/sysevent.h>
+#include <sys/sysevent/dr.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_rsc.h>
+#include <sys/acpidev_dr.h>
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+#include <sys/drmach.h>
+#include "drmach_acpi.h"
+
+/* utility */
+#define MBYTE (1048576ull)
+#define _ptob64(p) ((uint64_t)(p) << PAGESHIFT)
+#define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
+
+static int drmach_init(void);
+static void drmach_fini(void);
+static int drmach_name2type_idx(char *);
+static sbd_error_t *drmach_mem_update_lgrp(drmachid_t);
+
+#ifdef DEBUG
+int drmach_debug = 1; /* set to non-zero to enable debug messages */
+#endif /* DEBUG */
+
+drmach_domain_info_t drmach_domain;
+
+static char *drmach_ie_fmt = "drmach_acpi.c %d";
+static drmach_array_t *drmach_boards;
+
+/* rwlock to protect drmach_boards. */
+static krwlock_t drmach_boards_rwlock;
+
+/* rwlock to block out CPR thread. */
+static krwlock_t drmach_cpr_rwlock;
+
+/* CPR callb id. */
+static callb_id_t drmach_cpr_cid;
+
+static struct {
+ const char *name;
+ const char *type;
+ sbd_error_t *(*new)(drmach_device_t *, drmachid_t *);
+} drmach_name2type[] = {
+ { ACPIDEV_NODE_NAME_CPU, DRMACH_DEVTYPE_CPU, drmach_cpu_new },
+ { ACPIDEV_NODE_NAME_MEMORY, DRMACH_DEVTYPE_MEM, drmach_mem_new },
+ { ACPIDEV_NODE_NAME_PCI, DRMACH_DEVTYPE_PCI, drmach_io_new },
+};
+
+/*
+ * drmach autoconfiguration data structures and interfaces
+ */
+static struct modlmisc modlmisc = {
+ &mod_miscops,
+ "ACPI based DR v1.0"
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modlmisc,
+ NULL
+};
+
+int
+_init(void)
+{
+ int err;
+
+ if ((err = drmach_init()) != 0) {
+ return (err);
+ }
+
+ if ((err = mod_install(&modlinkage)) != 0) {
+ drmach_fini();
+ }
+
+ return (err);
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = mod_remove(&modlinkage)) == 0) {
+ drmach_fini();
+ }
+
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*
+ * Internal support functions.
+ */
+static DRMACH_HANDLE
+drmach_node_acpi_get_dnode(drmach_node_t *np)
+{
+ return ((DRMACH_HANDLE)(uintptr_t)np->here);
+}
+
+static dev_info_t *
+drmach_node_acpi_get_dip(drmach_node_t *np)
+{
+ dev_info_t *dip = NULL;
+
+ if (ACPI_FAILURE(acpica_get_devinfo((DRMACH_HANDLE)(np->here), &dip))) {
+ return (NULL);
+ }
+
+ return (dip);
+}
+
+static int
+drmach_node_acpi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
+{
+ int rv = 0;
+ DRMACH_HANDLE hdl;
+
+ hdl = np->get_dnode(np);
+ if (hdl == NULL) {
+ DRMACH_PR("!drmach_node_acpi_get_prop: NULL handle");
+ rv = -1;
+ } else {
+ rv = acpidev_dr_device_getprop(hdl, name, buf, len);
+ if (rv >= 0) {
+ ASSERT(rv <= len);
+ rv = 0;
+ }
+ }
+
+ return (rv);
+}
+
+static int
+drmach_node_acpi_get_proplen(drmach_node_t *np, char *name, int *len)
+{
+ int rv = 0;
+ DRMACH_HANDLE hdl;
+
+ hdl = np->get_dnode(np);
+ if (hdl == NULL) {
+ DRMACH_PR("!drmach_node_acpi_get_proplen: NULL handle");
+ rv = -1;
+ } else {
+ rv = acpidev_dr_device_getprop(hdl, name, NULL, 0);
+ if (rv >= 0) {
+ *len = rv;
+ return (0);
+ }
+ }
+
+ return (-1);
+}
+
+static ACPI_STATUS
+drmach_node_acpi_callback(ACPI_HANDLE hdl, uint_t lvl, void *ctx, void **retval)
+{
+ _NOTE(ARGUNUSED(lvl));
+
+ int rv;
+ dev_info_t *dip;
+ drmach_node_walk_args_t *argp = ctx;
+ int (*cb)(drmach_node_walk_args_t *args);
+ acpidev_class_id_t clsid;
+
+ ASSERT(hdl != NULL);
+ ASSERT(ctx != NULL);
+ ASSERT(retval != NULL);
+
+ /* Skip subtree if the device is not powered. */
+ if (!acpidev_dr_device_is_powered(hdl)) {
+ return (AE_CTRL_DEPTH);
+ }
+
+ /*
+ * Keep scanning subtree if it fails to lookup device node.
+ * There may be some ACPI objects without device nodes created.
+ */
+ if (ACPI_FAILURE(acpica_get_devinfo(hdl, &dip))) {
+ return (AE_OK);
+ }
+
+ argp->node->here = hdl;
+ cb = (int (*)(drmach_node_walk_args_t *args))argp->func;
+ rv = (*cb)(argp);
+ argp->node->here = NULL;
+ if (rv) {
+ *(int *)retval = rv;
+ return (AE_CTRL_TERMINATE);
+ }
+
+ /*
+ * Skip descendants of PCI/PCIex host bridges.
+ * PCI/PCIex devices will be handled by pcihp.
+ */
+ clsid = acpidev_dr_device_get_class(hdl);
+ if (clsid == ACPIDEV_CLASS_ID_PCI || clsid == ACPIDEV_CLASS_ID_PCIEX) {
+ return (AE_CTRL_DEPTH);
+ }
+
+ return (AE_OK);
+}
+
+static int
+drmach_node_acpi_walk(drmach_node_t *np, void *data,
+ int (*cb)(drmach_node_walk_args_t *args))
+{
+ DRMACH_HANDLE hdl;
+ int rv = 0;
+ drmach_node_walk_args_t args;
+
+ /* initialize the args structure for callback */
+ args.node = np;
+ args.data = data;
+ args.func = (void *)cb;
+
+ /* save the handle, it will be modified when walking the tree. */
+ hdl = np->get_dnode(np);
+ if (hdl == NULL) {
+ DRMACH_PR("!drmach_node_acpi_walk: failed to get device node.");
+ return (EX86_INAPPROP);
+ }
+
+ if (ACPI_FAILURE(acpidev_dr_device_walk_device(hdl,
+ ACPIDEV_MAX_ENUM_LEVELS, drmach_node_acpi_callback,
+ &args, (void *)&rv))) {
+ /*
+ * If acpidev_dr_device_walk_device() itself fails, rv won't
+ * be set to suitable error code. Set it here.
+ */
+ if (rv == 0) {
+ cmn_err(CE_WARN, "!drmach_node_acpi_walk: failed to "
+ "walk ACPI namespace.");
+ rv = EX86_ACPIWALK;
+ }
+ }
+
+ /* restore the handle to original value after walking the tree. */
+ np->here = (void *)hdl;
+
+ return ((int)rv);
+}
+
+static drmach_node_t *
+drmach_node_new(void)
+{
+ drmach_node_t *np;
+
+ np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
+
+ np->get_dnode = drmach_node_acpi_get_dnode;
+ np->getdip = drmach_node_acpi_get_dip;
+ np->getproplen = drmach_node_acpi_get_proplen;
+ np->getprop = drmach_node_acpi_get_prop;
+ np->walk = drmach_node_acpi_walk;
+
+ return (np);
+}
+
+static drmachid_t
+drmach_node_dup(drmach_node_t *np)
+{
+ drmach_node_t *dup;
+
+ dup = drmach_node_new();
+ dup->here = np->here;
+ dup->get_dnode = np->get_dnode;
+ dup->getdip = np->getdip;
+ dup->getproplen = np->getproplen;
+ dup->getprop = np->getprop;
+ dup->walk = np->walk;
+
+ return (dup);
+}
+
+static void
+drmach_node_dispose(drmach_node_t *np)
+{
+ kmem_free(np, sizeof (*np));
+}
+
+static int
+drmach_node_walk(drmach_node_t *np, void *param,
+ int (*cb)(drmach_node_walk_args_t *args))
+{
+ return (np->walk(np, param, cb));
+}
+
+static DRMACH_HANDLE
+drmach_node_get_dnode(drmach_node_t *np)
+{
+ return (np->get_dnode(np));
+}
+
+/*
+ * drmach_array provides convenient array construction, access,
+ * bounds checking and array destruction logic.
+ */
+static drmach_array_t *
+drmach_array_new(uint_t min_index, uint_t max_index)
+{
+ drmach_array_t *arr;
+
+ arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
+
+ arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
+ if (arr->arr_sz > 0) {
+ arr->min_index = min_index;
+ arr->max_index = max_index;
+
+ arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
+ return (arr);
+ } else {
+ kmem_free(arr, sizeof (*arr));
+ return (0);
+ }
+}
+
+static int
+drmach_array_set(drmach_array_t *arr, uint_t idx, drmachid_t val)
+{
+ if (idx < arr->min_index || idx > arr->max_index)
+ return (-1);
+ arr->arr[idx - arr->min_index] = val;
+ return (0);
+}
+
+/*
+ * Get the item with index idx.
+ * Return 0 with the value stored in val if succeeds, otherwise return -1.
+ */
+static int
+drmach_array_get(drmach_array_t *arr, uint_t idx, drmachid_t *val)
+{
+ if (idx < arr->min_index || idx > arr->max_index)
+ return (-1);
+ *val = arr->arr[idx - arr->min_index];
+ return (0);
+}
+
+static int
+drmach_array_first(drmach_array_t *arr, uint_t *idx, drmachid_t *val)
+{
+ int rv;
+
+ *idx = arr->min_index;
+ while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
+ *idx += 1;
+
+ return (rv);
+}
+
+static int
+drmach_array_next(drmach_array_t *arr, uint_t *idx, drmachid_t *val)
+{
+ int rv;
+
+ *idx += 1;
+ while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
+ *idx += 1;
+
+ return (rv);
+}
+
+static void
+drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
+{
+ drmachid_t val;
+ uint_t idx;
+ int rv;
+
+ rv = drmach_array_first(arr, &idx, &val);
+ while (rv == 0) {
+ (*disposer)(val);
+ rv = drmach_array_next(arr, &idx, &val);
+ }
+
+ kmem_free(arr->arr, arr->arr_sz);
+ kmem_free(arr, sizeof (*arr));
+}
+
+static drmach_board_t *
+drmach_get_board_by_bnum(uint_t bnum)
+{
+ drmachid_t id;
+
+ if (drmach_array_get(drmach_boards, bnum, &id) == 0)
+ return ((drmach_board_t *)id);
+ else
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_device_new(drmach_node_t *node,
+ drmach_board_t *bp, int portid, drmachid_t *idp)
+{
+ int i;
+ int rv;
+ drmach_device_t proto;
+ sbd_error_t *err;
+ char name[OBP_MAXDRVNAME];
+
+ rv = node->getprop(node, ACPIDEV_DR_PROP_DEVNAME, name, OBP_MAXDRVNAME);
+ if (rv) {
+ /* every node is expected to have a name */
+ err = drerr_new(1, EX86_GETPROP, "device node %s: property %s",
+ ddi_node_name(node->getdip(node)),
+ ACPIDEV_DR_PROP_DEVNAME);
+ return (err);
+ }
+
+ /*
+ * The node currently being examined is not listed in the name2type[]
+ * array. In this case, the node is no interest to drmach. Both
+ * dp and err are initialized here to yield nothing (no device or
+ * error structure) for this case.
+ */
+ i = drmach_name2type_idx(name);
+ if (i < 0) {
+ *idp = (drmachid_t)0;
+ return (NULL);
+ }
+
+ /* device specific new function will set unum */
+ bzero(&proto, sizeof (proto));
+ proto.type = drmach_name2type[i].type;
+ proto.bp = bp;
+ proto.node = node;
+ proto.portid = portid;
+
+ return (drmach_name2type[i].new(&proto, idp));
+}
+
+static void
+drmach_device_dispose(drmachid_t id)
+{
+ drmach_device_t *self = id;
+
+ self->cm.dispose(id);
+}
+
+static sbd_error_t *
+drmach_device_status(drmachid_t id, drmach_status_t *stat)
+{
+ drmach_common_t *cp;
+
+ if (!DRMACH_IS_ID(id))
+ return (drerr_new(0, EX86_NOTID, NULL));
+ cp = id;
+
+ return (cp->status(id, stat));
+}
+
+drmach_board_t *
+drmach_board_new(uint_t bnum, int boot_board)
+{
+ static void drmach_board_dispose(drmachid_t id);
+ static sbd_error_t *drmach_board_release(drmachid_t);
+ static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
+
+ sbd_error_t *err;
+ drmach_board_t *bp;
+ dev_info_t *dip = NULL;
+
+ bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
+ bp->cm.isa = (void *)drmach_board_new;
+ bp->cm.release = drmach_board_release;
+ bp->cm.status = drmach_board_status;
+
+ bp->bnum = bnum;
+ bp->devices = NULL;
+ bp->tree = drmach_node_new();
+
+ acpidev_dr_lock_all();
+ if (ACPI_FAILURE(acpidev_dr_get_board_handle(bnum, &bp->tree->here))) {
+ acpidev_dr_unlock_all();
+ drmach_board_dispose(bp);
+ return (NULL);
+ }
+ acpidev_dr_unlock_all();
+ ASSERT(bp->tree->here != NULL);
+
+ err = drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
+ if (err != NULL) {
+ sbd_err_clear(&err);
+ drmach_board_dispose(bp);
+ return (NULL);
+ }
+
+ if (acpidev_dr_device_is_powered(bp->tree->here)) {
+ bp->boot_board = boot_board;
+ bp->powered = 1;
+ } else {
+ bp->boot_board = 0;
+ bp->powered = 0;
+ }
+ bp->assigned = boot_board;
+ if (ACPI_SUCCESS(acpica_get_devinfo(bp->tree->here, &dip))) {
+ bp->connected = 1;
+ } else {
+ bp->connected = 0;
+ }
+
+ (void) drmach_array_set(drmach_boards, bnum, bp);
+
+ return (bp);
+}
+
+static void
+drmach_board_dispose(drmachid_t id)
+{
+ drmach_board_t *bp;
+
+ ASSERT(DRMACH_IS_BOARD_ID(id));
+ bp = id;
+
+ if (bp->tree)
+ drmach_node_dispose(bp->tree);
+
+ if (bp->devices)
+ drmach_array_dispose(bp->devices, drmach_device_dispose);
+
+ kmem_free(bp, sizeof (drmach_board_t));
+}
+
+static sbd_error_t *
+drmach_board_release(drmachid_t id)
+{
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+static int
+drmach_board_check_power(drmach_board_t *bp)
+{
+ DRMACH_HANDLE hdl;
+
+ hdl = drmach_node_get_dnode(bp->tree);
+
+ return (acpidev_dr_device_is_powered(hdl));
+}
+
+struct drmach_board_list_dep_arg {
+ int count;
+ size_t len;
+ ssize_t off;
+ char *buf;
+ char temp[MAXPATHLEN];
+};
+
+static ACPI_STATUS
+drmach_board_generate_name(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+ void **retval)
+{
+ _NOTE(ARGUNUSED(retval));
+
+ struct drmach_board_list_dep_arg *argp = ctx;
+
+ ASSERT(hdl != NULL);
+ ASSERT(lvl == UINT32_MAX);
+ ASSERT(ctx != NULL);
+
+ /* Skip non-board devices. */
+ if (!acpidev_dr_device_is_board(hdl)) {
+ return (AE_OK);
+ }
+
+ if (ACPI_FAILURE(acpidev_dr_get_board_name(hdl, argp->temp,
+ sizeof (argp->temp)))) {
+ DRMACH_PR("!drmach_board_generate_name: failed to "
+ "generate board name for handle %p.", hdl);
+ /* Keep on walking. */
+ return (AE_OK);
+ }
+ argp->count++;
+ argp->off += snprintf(argp->buf + argp->off, argp->len - argp->off,
+ " %s", argp->temp);
+ if (argp->off >= argp->len) {
+ return (AE_CTRL_TERMINATE);
+ }
+
+ return (AE_OK);
+}
+
+static ssize_t
+drmach_board_list_dependency(ACPI_HANDLE hdl, boolean_t edl, char *prefix,
+ char *buf, size_t len)
+{
+ ACPI_STATUS rc;
+ ssize_t off;
+ struct drmach_board_list_dep_arg *ap;
+
+ ASSERT(buf != NULL && len != 0);
+ if (buf == NULL || len == 0) {
+ return (-1);
+ }
+
+ ap = kmem_zalloc(sizeof (*ap), KM_SLEEP);
+ ap->buf = buf;
+ ap->len = len;
+ ap->off = snprintf(buf, len, "%s", prefix);
+ if (ap->off >= len) {
+ *buf = '\0';
+ kmem_free(ap, sizeof (*ap));
+ return (-1);
+ }
+
+ /* Generate the device dependency list. */
+ if (edl) {
+ rc = acpidev_dr_device_walk_edl(hdl,
+ drmach_board_generate_name, ap, NULL);
+ } else {
+ rc = acpidev_dr_device_walk_ejd(hdl,
+ drmach_board_generate_name, ap, NULL);
+ }
+ if (ACPI_FAILURE(rc)) {
+ *buf = '\0';
+ ap->off = -1;
+ /* No device has dependency on this board. */
+ } else if (ap->count == 0) {
+ *buf = '\0';
+ ap->off = 0;
+ }
+
+ off = ap->off;
+ kmem_free(ap, sizeof (*ap));
+
+ return (off);
+}
+
+static sbd_error_t *
+drmach_board_status(drmachid_t id, drmach_status_t *stat)
+{
+ sbd_error_t *err = NULL;
+ drmach_board_t *bp;
+ DRMACH_HANDLE hdl;
+ size_t off;
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = id;
+
+ if (bp->tree == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ stat->busy = 0; /* assume not busy */
+ stat->configured = 0; /* assume not configured */
+ stat->assigned = bp->assigned;
+ stat->powered = bp->powered = acpidev_dr_device_is_powered(hdl);
+ stat->empty = !acpidev_dr_device_is_present(hdl);
+ if (ACPI_SUCCESS(acpidev_dr_device_check_status(hdl))) {
+ stat->cond = bp->cond = SBD_COND_OK;
+ } else {
+ stat->cond = bp->cond = SBD_COND_FAILED;
+ }
+ stat->info[0] = '\0';
+
+ /* Generate the eject device list. */
+ if (drmach_board_list_dependency(hdl, B_TRUE, "EDL:",
+ stat->info, sizeof (stat->info)) < 0) {
+ DRMACH_PR("!drmach_board_status: failed to generate "
+ "eject device list for board %d.", bp->bnum);
+ stat->info[0] = '\0';
+ }
+ off = strlen(stat->info);
+ if (off < sizeof (stat->info)) {
+ if (drmach_board_list_dependency(hdl, B_FALSE,
+ off ? ", EJD:" : "EJD:",
+ stat->info + off, sizeof (stat->info) - off) < 0) {
+ DRMACH_PR("!drmach_board_status: failed to generate "
+ "eject dependent device for board %d.", bp->bnum);
+ stat->info[off] = '\0';
+ }
+ }
+
+ switch (acpidev_dr_get_board_type(bp->tree->get_dnode(bp->tree))) {
+ case ACPIDEV_CPU_BOARD:
+ (void) strlcpy(stat->type, "CPU Board", sizeof (stat->type));
+ break;
+ case ACPIDEV_MEMORY_BOARD:
+ (void) strlcpy(stat->type, "MemoryBoard", sizeof (stat->type));
+ break;
+ case ACPIDEV_IO_BOARD:
+ (void) strlcpy(stat->type, "IO Board", sizeof (stat->type));
+ break;
+ case ACPIDEV_SYSTEM_BOARD:
+ /*FALLTHROUGH*/
+ default:
+ (void) strlcpy(stat->type, "SystemBoard", sizeof (stat->type));
+ break;
+ }
+
+ if (bp->devices) {
+ int rv;
+ uint_t d_idx;
+ drmachid_t d_id;
+
+ rv = drmach_array_first(bp->devices, &d_idx, &d_id);
+ while (rv == 0) {
+ drmach_status_t d_stat;
+
+ err = drmach_device_status(d_id, &d_stat);
+ if (err)
+ break;
+
+ stat->busy |= d_stat.busy;
+ stat->configured |= d_stat.configured;
+
+ rv = drmach_array_next(bp->devices, &d_idx, &d_id);
+ }
+ }
+
+ return (err);
+}
+
+/*
+ * When DR is initialized, we walk the device tree and acquire a hold on
+ * all the nodes that are interesting to DR. This is so that the corresponding
+ * branches cannot be deleted.
+ */
+static int
+drmach_hold_rele_devtree(dev_info_t *rdip, void *arg)
+{
+ int *holdp = (int *)arg;
+ ACPI_HANDLE hdl = NULL;
+ acpidev_data_handle_t dhdl;
+
+ /* Skip nodes and subtrees which are not created by acpidev. */
+ if (ACPI_FAILURE(acpica_get_handle(rdip, &hdl))) {
+ return (DDI_WALK_PRUNECHILD);
+ }
+ ASSERT(hdl != NULL);
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ return (DDI_WALK_PRUNECHILD);
+ }
+
+ /* Hold/release devices which are interesting to DR operations. */
+ if (acpidev_data_dr_ready(dhdl)) {
+ if (*holdp) {
+ ASSERT(!e_ddi_branch_held(rdip));
+ e_ddi_branch_hold(rdip);
+ } else {
+ ASSERT(e_ddi_branch_held(rdip));
+ e_ddi_branch_rele(rdip);
+ }
+ }
+
+ return (DDI_WALK_CONTINUE);
+}
+
+static void
+drmach_hold_devtree(void)
+{
+ dev_info_t *dip;
+ int circ;
+ int hold = 1;
+
+ dip = ddi_root_node();
+ ndi_devi_enter(dip, &circ);
+ ddi_walk_devs(ddi_get_child(dip), drmach_hold_rele_devtree, &hold);
+ ndi_devi_exit(dip, circ);
+}
+
+static void
+drmach_release_devtree(void)
+{
+ dev_info_t *dip;
+ int circ;
+ int hold = 0;
+
+ dip = ddi_root_node();
+ ndi_devi_enter(dip, &circ);
+ ddi_walk_devs(ddi_get_child(dip), drmach_hold_rele_devtree, &hold);
+ ndi_devi_exit(dip, circ);
+}
+
+static boolean_t
+drmach_cpr_callb(void *arg, int code)
+{
+ _NOTE(ARGUNUSED(arg));
+
+ if (code == CB_CODE_CPR_CHKPT) {
+ /*
+ * Temporarily block CPR operations if there are DR operations
+ * ongoing.
+ */
+ rw_enter(&drmach_cpr_rwlock, RW_WRITER);
+ } else {
+ rw_exit(&drmach_cpr_rwlock);
+ }
+
+ return (B_TRUE);
+}
+
+static int
+drmach_init(void)
+{
+ DRMACH_HANDLE hdl;
+ drmachid_t id;
+ uint_t bnum;
+
+ if (MAX_BOARDS > SHRT_MAX) {
+ cmn_err(CE_WARN, "!drmach_init: system has too many (%d) "
+ "hotplug capable boards.", MAX_BOARDS);
+ return (ENXIO);
+ } else if (MAX_CMP_UNITS_PER_BOARD > 1) {
+ cmn_err(CE_WARN, "!drmach_init: DR doesn't support multiple "
+ "(%d) physical processors on one board.",
+ MAX_CMP_UNITS_PER_BOARD);
+ return (ENXIO);
+ } else if (MAX_CORES_PER_CMP & (MAX_CORES_PER_CMP - 1)) {
+ cmn_err(CE_WARN, "!drmach_init: number of logical CPUs (%d) in "
+ "physical processor is not power of 2.",
+ MAX_CORES_PER_CMP);
+ return (ENXIO);
+ } else if (MAX_CPU_UNITS_PER_BOARD > DEVSET_CPU_NUMBER ||
+ MAX_MEM_UNITS_PER_BOARD > DEVSET_MEM_NUMBER ||
+ MAX_IO_UNITS_PER_BOARD > DEVSET_IO_NUMBER) {
+ cmn_err(CE_WARN, "!drmach_init: system has more CPU/memory/IO "
+ "units than the DR driver can handle.");
+ return (ENXIO);
+ }
+
+ rw_init(&drmach_cpr_rwlock, NULL, RW_DEFAULT, NULL);
+ drmach_cpr_cid = callb_add(drmach_cpr_callb, NULL,
+ CB_CL_CPR_PM, "drmach");
+
+ rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
+ drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
+ drmach_domain.allow_dr = acpidev_dr_capable();
+
+ for (bnum = 0; bnum < MAX_BOARDS; bnum++) {
+ hdl = NULL;
+ if (ACPI_FAILURE(acpidev_dr_get_board_handle(bnum, &hdl)) ||
+ hdl == NULL) {
+ cmn_err(CE_WARN, "!drmach_init: failed to lookup ACPI "
+ "handle for board %d.", bnum);
+ continue;
+ }
+ if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
+ DRMACH_PR("!drmach_init: failed to get handle "
+ "for board %d.", bnum);
+ ASSERT(0);
+ goto error;
+ } else if (id == NULL) {
+ (void) drmach_board_new(bnum, 1);
+ }
+ }
+
+ /*
+ * Walk descendants of the devinfo root node and hold
+ * all devinfo branches of interest.
+ */
+ drmach_hold_devtree();
+
+ return (0);
+
+error:
+ drmach_array_dispose(drmach_boards, drmach_board_dispose);
+ rw_destroy(&drmach_boards_rwlock);
+ rw_destroy(&drmach_cpr_rwlock);
+ return (ENXIO);
+}
+
+static void
+drmach_fini(void)
+{
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+ if (drmach_boards != NULL) {
+ drmach_array_dispose(drmach_boards, drmach_board_dispose);
+ drmach_boards = NULL;
+ }
+ rw_exit(&drmach_boards_rwlock);
+
+ /*
+ * Walk descendants of the root devinfo node
+ * release holds acquired on branches in drmach_init()
+ */
+ drmach_release_devtree();
+
+ (void) callb_delete(drmach_cpr_cid);
+ rw_destroy(&drmach_cpr_rwlock);
+ rw_destroy(&drmach_boards_rwlock);
+}
+
+sbd_error_t *
+drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
+{
+ static void drmach_io_dispose(drmachid_t);
+ static sbd_error_t *drmach_io_release(drmachid_t);
+ static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
+
+ drmach_io_t *ip;
+ int portid;
+
+ portid = proto->portid;
+ ASSERT(portid != -1);
+ proto->unum = portid;
+
+ ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
+ bcopy(proto, &ip->dev, sizeof (ip->dev));
+ ip->dev.node = drmach_node_dup(proto->node);
+ ip->dev.cm.isa = (void *)drmach_io_new;
+ ip->dev.cm.dispose = drmach_io_dispose;
+ ip->dev.cm.release = drmach_io_release;
+ ip->dev.cm.status = drmach_io_status;
+ (void) snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
+ ip->dev.type, ip->dev.unum);
+
+ *idp = (drmachid_t)ip;
+
+ return (NULL);
+}
+
+static void
+drmach_io_dispose(drmachid_t id)
+{
+ drmach_io_t *self;
+
+ ASSERT(DRMACH_IS_IO_ID(id));
+
+ self = id;
+ if (self->dev.node)
+ drmach_node_dispose(self->dev.node);
+
+ kmem_free(self, sizeof (*self));
+}
+
+static sbd_error_t *
+drmach_io_release(drmachid_t id)
+{
+ if (!DRMACH_IS_IO_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+static sbd_error_t *
+drmach_io_status(drmachid_t id, drmach_status_t *stat)
+{
+ drmach_device_t *dp;
+ sbd_error_t *err;
+ int configured;
+
+ ASSERT(DRMACH_IS_IO_ID(id));
+ dp = id;
+
+ err = drmach_io_is_attached(id, &configured);
+ if (err)
+ return (err);
+
+ stat->assigned = dp->bp->assigned;
+ stat->powered = dp->bp->powered;
+ stat->configured = (configured != 0);
+ stat->busy = dp->busy;
+ (void) strlcpy(stat->type, dp->type, sizeof (stat->type));
+ stat->info[0] = '\0';
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
+{
+ static void drmach_cpu_dispose(drmachid_t);
+ static sbd_error_t *drmach_cpu_release(drmachid_t);
+ static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
+
+ int portid;
+ processorid_t cpuid;
+ drmach_cpu_t *cp = NULL;
+
+ /* the portid is APIC ID of the node */
+ portid = proto->portid;
+ ASSERT(portid != -1);
+
+ /*
+ * Assume all CPUs are homogeneous and have the same number of
+ * cores/threads.
+ */
+ proto->unum = portid % MAX_CPU_UNITS_PER_BOARD;
+
+ cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
+ bcopy(proto, &cp->dev, sizeof (cp->dev));
+ cp->dev.node = drmach_node_dup(proto->node);
+ cp->dev.cm.isa = (void *)drmach_cpu_new;
+ cp->dev.cm.dispose = drmach_cpu_dispose;
+ cp->dev.cm.release = drmach_cpu_release;
+ cp->dev.cm.status = drmach_cpu_status;
+ (void) snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
+ cp->dev.type, cp->dev.unum);
+
+ cp->apicid = portid;
+ if (ACPI_SUCCESS(acpica_get_cpu_id_by_object(
+ drmach_node_get_dnode(proto->node), &cpuid))) {
+ cp->cpuid = cpuid;
+ } else {
+ cp->cpuid = -1;
+ }
+
+ /* Mark CPU0 as busy, many other components have dependency on it. */
+ if (cp->cpuid == 0) {
+ cp->dev.busy = 1;
+ }
+
+ *idp = (drmachid_t)cp;
+
+ return (NULL);
+}
+
+static void
+drmach_cpu_dispose(drmachid_t id)
+{
+ drmach_cpu_t *self;
+
+ ASSERT(DRMACH_IS_CPU_ID(id));
+
+ self = id;
+ if (self->dev.node)
+ drmach_node_dispose(self->dev.node);
+
+ kmem_free(self, sizeof (*self));
+}
+
+static sbd_error_t *
+drmach_cpu_release(drmachid_t id)
+{
+ if (!DRMACH_IS_CPU_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+static sbd_error_t *
+drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
+{
+ drmach_cpu_t *cp;
+ drmach_device_t *dp;
+
+ ASSERT(DRMACH_IS_CPU_ID(id));
+ cp = (drmach_cpu_t *)id;
+ dp = &cp->dev;
+
+ stat->assigned = dp->bp->assigned;
+ stat->powered = dp->bp->powered;
+ mutex_enter(&cpu_lock);
+ stat->configured = (cpu_get(cp->cpuid) != NULL);
+ mutex_exit(&cpu_lock);
+ stat->busy = dp->busy;
+ (void) strlcpy(stat->type, dp->type, sizeof (stat->type));
+ stat->info[0] = '\0';
+
+ return (NULL);
+}
+
+static int
+drmach_setup_mc_info(DRMACH_HANDLE hdl, drmach_mem_t *mp)
+{
+ uint_t i, j, count;
+ struct memlist *ml = NULL, *ml2 = NULL;
+ acpidev_regspec_t *regp;
+ uint64_t align, addr_min, addr_max, total_size, skipped_size;
+
+ if (hdl == NULL) {
+ return (-1);
+ } else if (ACPI_FAILURE(acpidev_dr_get_mem_alignment(hdl, &align))) {
+ return (-1);
+ } else {
+ ASSERT((align & (align - 1)) == 0);
+ mp->mem_alignment = align;
+ }
+
+ addr_min = UINT64_MAX;
+ addr_max = 0;
+ total_size = 0;
+ skipped_size = 0;
+ /*
+ * There's a memory hole just below 4G on x86, which needs special
+ * handling. All other addresses assigned to a specific memory device
+ * should be contiguous.
+ */
+ if (ACPI_FAILURE(acpidev_dr_device_get_regspec(hdl, TRUE, &regp,
+ &count))) {
+ return (-1);
+ }
+ for (i = 0, j = 0; i < count; i++) {
+ uint64_t addr, size;
+
+ addr = (uint64_t)regp[i].phys_mid << 32;
+ addr |= (uint64_t)regp[i].phys_low;
+ size = (uint64_t)regp[i].size_hi << 32;
+ size |= (uint64_t)regp[i].size_low;
+ if (size == 0)
+ continue;
+ else
+ j++;
+
+ total_size += size;
+ if (addr < addr_min)
+ addr_min = addr;
+ if (addr + size > addr_max)
+ addr_max = addr + size;
+ if (mp->dev.bp->boot_board ||
+ j <= acpidev_dr_max_segments_per_mem_device()) {
+ ml = memlist_add_span(ml, addr, size);
+ } else {
+ skipped_size += size;
+ }
+ }
+ acpidev_dr_device_free_regspec(regp, count);
+
+ if (skipped_size != 0) {
+ cmn_err(CE_WARN, "!drmach: too many (%d) segments on memory "
+ "device, max (%d) segments supported, 0x%" PRIx64 " bytes "
+ "of memory skipped.",
+ j, acpidev_dr_max_segments_per_mem_device(), skipped_size);
+ }
+
+ mp->slice_base = addr_min;
+ mp->slice_top = addr_max;
+ mp->slice_size = total_size;
+
+ if (mp->dev.bp->boot_board) {
+ uint64_t endpa = _ptob64(physmax + 1);
+
+ /*
+ * we intersect phys_install to get base_pa.
+ * This only works at boot-up time.
+ */
+ memlist_read_lock();
+ ml2 = memlist_dup(phys_install);
+ memlist_read_unlock();
+
+ ml2 = memlist_del_span(ml2, 0ull, mp->slice_base);
+ if (ml2 && endpa > addr_max) {
+ ml2 = memlist_del_span(ml2, addr_max, endpa - addr_max);
+ }
+ }
+
+ /*
+ * Create a memlist for the memory board.
+ * The created memlist only contains configured memory if there's
+ * configured memory on the board, otherwise it contains all memory
+ * on the board.
+ */
+ if (ml2) {
+ uint64_t nbytes = 0;
+ struct memlist *p;
+
+ for (p = ml2; p; p = p->ml_next) {
+ nbytes += p->ml_size;
+ }
+ if (nbytes == 0) {
+ memlist_delete(ml2);
+ ml2 = NULL;
+ } else {
+ /* Node has configured memory at boot time. */
+ mp->base_pa = ml2->ml_address;
+ mp->nbytes = nbytes;
+ mp->memlist = ml2;
+ if (ml)
+ memlist_delete(ml);
+ }
+ }
+ if (ml2 == NULL) {
+ /* Not configured at boot time. */
+ mp->base_pa = UINT64_MAX;
+ mp->nbytes = 0;
+ mp->memlist = ml;
+ }
+
+ return (0);
+}
+
+sbd_error_t *
+drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
+{
+ static void drmach_mem_dispose(drmachid_t);
+ static sbd_error_t *drmach_mem_release(drmachid_t);
+ static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
+
+ DRMACH_HANDLE hdl;
+ drmach_mem_t *mp;
+ int portid;
+
+ mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
+ portid = proto->portid;
+ ASSERT(portid != -1);
+ proto->unum = portid;
+
+ bcopy(proto, &mp->dev, sizeof (mp->dev));
+ mp->dev.node = drmach_node_dup(proto->node);
+ mp->dev.cm.isa = (void *)drmach_mem_new;
+ mp->dev.cm.dispose = drmach_mem_dispose;
+ mp->dev.cm.release = drmach_mem_release;
+ mp->dev.cm.status = drmach_mem_status;
+
+ (void) snprintf(mp->dev.cm.name, sizeof (mp->dev.cm.name), "%s%d",
+ mp->dev.type, proto->unum);
+ hdl = mp->dev.node->get_dnode(mp->dev.node);
+ ASSERT(hdl != NULL);
+ if (drmach_setup_mc_info(hdl, mp) != 0) {
+ kmem_free(mp, sizeof (drmach_mem_t));
+ *idp = (drmachid_t)NULL;
+ return (drerr_new(1, EX86_MC_SETUP, NULL));
+ }
+
+ /* make sure we do not create memoryless nodes */
+ if (mp->nbytes == 0 && mp->slice_size == 0) {
+ kmem_free(mp, sizeof (drmach_mem_t));
+ *idp = (drmachid_t)NULL;
+ } else
+ *idp = (drmachid_t)mp;
+
+ return (NULL);
+}
+
+static void
+drmach_mem_dispose(drmachid_t id)
+{
+ drmach_mem_t *mp;
+
+ ASSERT(DRMACH_IS_MEM_ID(id));
+
+ mp = id;
+
+ if (mp->dev.node)
+ drmach_node_dispose(mp->dev.node);
+
+ if (mp->memlist) {
+ memlist_delete(mp->memlist);
+ mp->memlist = NULL;
+ }
+
+ kmem_free(mp, sizeof (*mp));
+}
+
+static sbd_error_t *
+drmach_mem_release(drmachid_t id)
+{
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+static sbd_error_t *
+drmach_mem_status(drmachid_t id, drmach_status_t *stat)
+{
+ uint64_t pa;
+ drmach_mem_t *dp;
+ struct memlist *ml = NULL;
+
+ ASSERT(DRMACH_IS_MEM_ID(id));
+ dp = id;
+
+ /* get starting physical address of target memory */
+ pa = dp->base_pa;
+ /* round down to slice boundary */
+ pa &= ~(dp->mem_alignment - 1);
+
+ /* stop at first span that is in slice */
+ memlist_read_lock();
+ for (ml = phys_install; ml; ml = ml->ml_next)
+ if (ml->ml_address >= pa && ml->ml_address < dp->slice_top)
+ break;
+ memlist_read_unlock();
+
+ stat->assigned = dp->dev.bp->assigned;
+ stat->powered = dp->dev.bp->powered;
+ stat->configured = (ml != NULL);
+ stat->busy = dp->dev.busy;
+ (void) strlcpy(stat->type, dp->dev.type, sizeof (stat->type));
+ stat->info[0] = '\0';
+
+ return (NULL);
+}
+
+/*
+ * Public interfaces exported to support platform independent dr driver.
+ */
+uint_t
+drmach_max_boards(void)
+{
+ return (acpidev_dr_max_boards());
+}
+
+uint_t
+drmach_max_io_units_per_board(void)
+{
+ return (acpidev_dr_max_io_units_per_board());
+}
+
+uint_t
+drmach_max_cmp_units_per_board(void)
+{
+ return (acpidev_dr_max_cmp_units_per_board());
+}
+
+uint_t
+drmach_max_mem_units_per_board(void)
+{
+ return (acpidev_dr_max_mem_units_per_board());
+}
+
+uint_t
+drmach_max_core_per_cmp(void)
+{
+ return (acpidev_dr_max_cpu_units_per_cmp());
+}
+
+sbd_error_t *
+drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts, void *argp)
+{
+ drmach_board_t *bp = (drmach_board_t *)id;
+ sbd_error_t *err = NULL;
+
+ /* allow status and ncm operations to always succeed */
+ if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
+ return (NULL);
+ }
+
+ switch (cmd) {
+ case SBD_CMD_POWERON:
+ case SBD_CMD_POWEROFF:
+ /*
+ * Disable fast reboot if CPU/MEM/IOH hotplug event happens.
+ * Note: this is a temporary solution and will be revised when
+ * fast reboot can support CPU/MEM/IOH DR operations in future.
+ *
+ * ACPI BIOS generates some static ACPI tables, such as MADT,
+ * SRAT and SLIT, to describe system hardware configuration on
+ * power-on. When CPU/MEM/IOH hotplug event happens, those
+ * static tables won't be updated and will become stale.
+ *
+ * If we reset system by fast reboot, BIOS will have no chance
+ * to regenerate those staled static tables. Fast reboot can't
+ * tolerate such inconsistency between staled ACPI tables and
+ * real hardware configuration yet.
+ *
+ * A temporary solution is introduced to disable fast reboot if
+ * CPU/MEM/IOH hotplug event happens. This solution should be
+ * revised when fast reboot is enhanced to support CPU/MEM/IOH
+ * DR operations.
+ */
+ fastreboot_disable(FBNS_HOTPLUG);
+ /*FALLTHROUGH*/
+
+ default:
+ /* Block out the CPR thread. */
+ rw_enter(&drmach_cpr_rwlock, RW_READER);
+ break;
+ }
+
+ /* check all other commands for the required option string */
+ if ((opts->size > 0) && (opts->copts != NULL)) {
+ if (strstr(opts->copts, ACPIDEV_CMD_OST_PREFIX) == NULL) {
+ err = drerr_new(1, EX86_SUPPORT, NULL);
+ }
+ } else {
+ err = drerr_new(1, EX86_SUPPORT, NULL);
+ }
+
+ if (!err && id && DRMACH_IS_BOARD_ID(id)) {
+ switch (cmd) {
+ case SBD_CMD_TEST:
+ break;
+ case SBD_CMD_CONNECT:
+ if (bp->connected)
+ err = drerr_new(0, ESBD_STATE, NULL);
+ else if (!drmach_domain.allow_dr)
+ err = drerr_new(1, EX86_SUPPORT, NULL);
+ break;
+ case SBD_CMD_DISCONNECT:
+ if (!bp->connected)
+ err = drerr_new(0, ESBD_STATE, NULL);
+ else if (!drmach_domain.allow_dr)
+ err = drerr_new(1, EX86_SUPPORT, NULL);
+ break;
+ default:
+ if (!drmach_domain.allow_dr)
+ err = drerr_new(1, EX86_SUPPORT, NULL);
+ break;
+
+ }
+ }
+
+ /*
+ * CPU/memory/IO DR operations will be supported in stages on x86.
+ * With early versions, some operations should be blocked here.
+ * This temporary hook will be removed when all CPU/memory/IO DR
+ * operations are supported on x86 systems.
+ *
+ * We only need to filter unsupported device types for
+ * SBD_CMD_CONFIGURE/SBD_CMD_UNCONFIGURE commands, all other
+ * commands are supported by all device types.
+ */
+ if (!err && (cmd == SBD_CMD_CONFIGURE || cmd == SBD_CMD_UNCONFIGURE)) {
+ int i;
+ dr_devset_t *devsetp = (dr_devset_t *)argp;
+ dr_devset_t devset = *devsetp;
+
+ switch (cmd) {
+ case SBD_CMD_CONFIGURE:
+ if (!plat_dr_support_cpu()) {
+ DEVSET_DEL(devset, SBD_COMP_CPU,
+ DEVSET_ANYUNIT);
+ } else {
+ for (i = MAX_CPU_UNITS_PER_BOARD;
+ i < DEVSET_CPU_NUMBER; i++) {
+ DEVSET_DEL(devset, SBD_COMP_CPU, i);
+ }
+ }
+
+ if (!plat_dr_support_memory()) {
+ DEVSET_DEL(devset, SBD_COMP_MEM,
+ DEVSET_ANYUNIT);
+ } else {
+ for (i = MAX_MEM_UNITS_PER_BOARD;
+ i < DEVSET_MEM_NUMBER; i++) {
+ DEVSET_DEL(devset, SBD_COMP_MEM, i);
+ }
+ }
+
+ /* No support of configuring IOH devices yet. */
+ DEVSET_DEL(devset, SBD_COMP_IO, DEVSET_ANYUNIT);
+ break;
+
+ case SBD_CMD_UNCONFIGURE:
+ if (!plat_dr_support_cpu()) {
+ DEVSET_DEL(devset, SBD_COMP_CPU,
+ DEVSET_ANYUNIT);
+ } else {
+ for (i = MAX_CPU_UNITS_PER_BOARD;
+ i < DEVSET_CPU_NUMBER; i++) {
+ DEVSET_DEL(devset, SBD_COMP_CPU, i);
+ }
+ }
+
+ /* No support of unconfiguring MEM/IOH devices yet. */
+ DEVSET_DEL(devset, SBD_COMP_MEM, DEVSET_ANYUNIT);
+ DEVSET_DEL(devset, SBD_COMP_IO, DEVSET_ANYUNIT);
+ break;
+ }
+
+ *devsetp = devset;
+ if (DEVSET_IS_NULL(devset)) {
+ err = drerr_new(1, EX86_SUPPORT, NULL);
+ }
+ }
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts, int rv)
+{
+ _NOTE(ARGUNUSED(id, opts, rv));
+
+ switch (cmd) {
+ case SBD_CMD_STATUS:
+ case SBD_CMD_GETNCM:
+ break;
+
+ default:
+ rw_exit(&drmach_cpr_rwlock);
+ break;
+ }
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_configure(drmachid_t id, int flags)
+{
+ _NOTE(ARGUNUSED(flags));
+
+ drmach_device_t *dp;
+ sbd_error_t *err = NULL;
+ dev_info_t *rdip;
+ dev_info_t *fdip = NULL;
+
+ if (!DRMACH_IS_DEVICE_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ dp = id;
+
+ rdip = dp->node->getdip(dp->node);
+ ASSERT(rdip);
+ ASSERT(e_ddi_branch_held(rdip));
+
+ /* allocate cpu id for the CPU device. */
+ if (DRMACH_IS_CPU_ID(id)) {
+ DRMACH_HANDLE hdl = drmach_node_get_dnode(dp->node);
+ ASSERT(hdl != NULL);
+ if (ACPI_FAILURE(acpidev_dr_allocate_cpuid(hdl, NULL))) {
+ err = drerr_new(1, EX86_ALLOC_CPUID, NULL);
+ }
+ return (err);
+ }
+
+ if (DRMACH_IS_MEM_ID(id)) {
+ err = drmach_mem_update_lgrp(id);
+ if (err)
+ return (err);
+ }
+
+ if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
+
+ (void) ddi_pathname(dip, path);
+ err = drerr_new(1, EX86_DRVFAIL, path);
+ kmem_free(path, MAXPATHLEN);
+
+ /* If non-NULL, fdip is returned held and must be released */
+ if (fdip != NULL)
+ ddi_release_devi(fdip);
+ }
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_unconfigure(drmachid_t id, int flags)
+{
+ _NOTE(ARGUNUSED(flags));
+
+ drmach_device_t *dp;
+ sbd_error_t *err = NULL;
+ dev_info_t *rdip, *fdip = NULL;
+
+ if (!DRMACH_IS_DEVICE_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ dp = id;
+
+ rdip = dp->node->getdip(dp->node);
+ ASSERT(rdip);
+ ASSERT(e_ddi_branch_held(rdip));
+
+ if (DRMACH_IS_CPU_ID(id)) {
+ DRMACH_HANDLE hdl = drmach_node_get_dnode(dp->node);
+ ASSERT(hdl != NULL);
+ if (ACPI_FAILURE(acpidev_dr_free_cpuid(hdl))) {
+ err = drerr_new(1, EX86_FREE_CPUID, NULL);
+ }
+ return (err);
+ }
+
+ /*
+ * Note: FORCE flag is no longer necessary under devfs
+ */
+ if (e_ddi_branch_unconfigure(rdip, &fdip, 0)) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ /*
+ * If non-NULL, fdip is returned held and must be released.
+ */
+ if (fdip != NULL) {
+ (void) ddi_pathname(fdip, path);
+ ndi_rele_devi(fdip);
+ } else {
+ (void) ddi_pathname(rdip, path);
+ }
+
+ err = drerr_new(1, EX86_DRVFAIL, path);
+
+ kmem_free(path, MAXPATHLEN);
+ }
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_get_dip(drmachid_t id, dev_info_t **dip)
+{
+ drmach_device_t *dp;
+
+ if (!DRMACH_IS_DEVICE_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ dp = id;
+
+ *dip = dp->node->getdip(dp->node);
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_release(drmachid_t id)
+{
+ drmach_common_t *cp;
+
+ if (!DRMACH_IS_DEVICE_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ cp = id;
+
+ return (cp->release(id));
+}
+
+sbd_error_t *
+drmach_status(drmachid_t id, drmach_status_t *stat)
+{
+ drmach_common_t *cp;
+ sbd_error_t *err;
+
+ rw_enter(&drmach_boards_rwlock, RW_READER);
+ if (!DRMACH_IS_ID(id)) {
+ rw_exit(&drmach_boards_rwlock);
+ return (drerr_new(0, EX86_NOTID, NULL));
+ }
+ cp = (drmach_common_t *)id;
+ err = cp->status(id, stat);
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+static sbd_error_t *
+drmach_update_acpi_status(drmachid_t id, drmach_opts_t *opts)
+{
+ char *copts;
+ drmach_board_t *bp;
+ DRMACH_HANDLE hdl;
+ int event, code;
+ boolean_t inprogress = B_FALSE;
+
+ if (DRMACH_NULL_ID(id) || !DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = (drmach_board_t *)id;
+ hdl = drmach_node_get_dnode(bp->tree);
+ ASSERT(hdl != NULL);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ /* Get the status code. */
+ copts = opts->copts;
+ if (strncmp(copts, ACPIDEV_CMD_OST_INPROGRESS,
+ strlen(ACPIDEV_CMD_OST_INPROGRESS)) == 0) {
+ inprogress = B_TRUE;
+ code = ACPI_OST_STA_INSERT_IN_PROGRESS;
+ copts += strlen(ACPIDEV_CMD_OST_INPROGRESS);
+ } else if (strncmp(copts, ACPIDEV_CMD_OST_SUCCESS,
+ strlen(ACPIDEV_CMD_OST_SUCCESS)) == 0) {
+ code = ACPI_OST_STA_SUCCESS;
+ copts += strlen(ACPIDEV_CMD_OST_SUCCESS);
+ } else if (strncmp(copts, ACPIDEV_CMD_OST_FAILURE,
+ strlen(ACPIDEV_CMD_OST_FAILURE)) == 0) {
+ code = ACPI_OST_STA_FAILURE;
+ copts += strlen(ACPIDEV_CMD_OST_FAILURE);
+ } else if (strncmp(copts, ACPIDEV_CMD_OST_NOOP,
+ strlen(ACPIDEV_CMD_OST_NOOP)) == 0) {
+ return (NULL);
+ } else {
+ return (drerr_new(0, EX86_UNKPTCMD, opts->copts));
+ }
+
+ /* Get the event type. */
+ copts = strstr(copts, ACPIDEV_EVENT_TYPE_ATTR_NAME);
+ if (copts == NULL) {
+ return (drerr_new(0, EX86_UNKPTCMD, opts->copts));
+ }
+ copts += strlen(ACPIDEV_EVENT_TYPE_ATTR_NAME);
+ if (copts[0] != '=') {
+ return (drerr_new(0, EX86_UNKPTCMD, opts->copts));
+ }
+ copts += strlen("=");
+ if (strncmp(copts, ACPIDEV_EVENT_TYPE_BUS_CHECK,
+ strlen(ACPIDEV_EVENT_TYPE_BUS_CHECK)) == 0) {
+ event = ACPI_NOTIFY_BUS_CHECK;
+ } else if (strncmp(copts, ACPIDEV_EVENT_TYPE_DEVICE_CHECK,
+ strlen(ACPIDEV_EVENT_TYPE_DEVICE_CHECK)) == 0) {
+ event = ACPI_NOTIFY_DEVICE_CHECK;
+ } else if (strncmp(copts, ACPIDEV_EVENT_TYPE_DEVICE_CHECK_LIGHT,
+ strlen(ACPIDEV_EVENT_TYPE_DEVICE_CHECK_LIGHT)) == 0) {
+ event = ACPI_NOTIFY_DEVICE_CHECK_LIGHT;
+ } else if (strncmp(copts, ACPIDEV_EVENT_TYPE_EJECT_REQUEST,
+ strlen(ACPIDEV_EVENT_TYPE_EJECT_REQUEST)) == 0) {
+ event = ACPI_NOTIFY_EJECT_REQUEST;
+ if (inprogress) {
+ code = ACPI_OST_STA_EJECT_IN_PROGRESS;
+ }
+ } else {
+ return (drerr_new(0, EX86_UNKPTCMD, opts->copts));
+ }
+
+ (void) acpidev_eval_ost(hdl, event, code, NULL, 0);
+
+ return (NULL);
+}
+
+static struct {
+ const char *name;
+ sbd_error_t *(*handler)(drmachid_t id, drmach_opts_t *opts);
+} drmach_pt_arr[] = {
+ { ACPIDEV_CMD_OST_PREFIX, &drmach_update_acpi_status },
+ /* the following line must always be last */
+ { NULL, NULL }
+};
+
+sbd_error_t *
+drmach_passthru(drmachid_t id, drmach_opts_t *opts)
+{
+ int i;
+ sbd_error_t *err;
+
+ i = 0;
+ while (drmach_pt_arr[i].name != NULL) {
+ int len = strlen(drmach_pt_arr[i].name);
+
+ if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
+ break;
+
+ i += 1;
+ }
+
+ if (drmach_pt_arr[i].name == NULL)
+ err = drerr_new(0, EX86_UNKPTCMD, opts->copts);
+ else
+ err = (*drmach_pt_arr[i].handler)(id, opts);
+
+ return (err);
+}
+
+/*
+ * Board specific interfaces to support dr driver
+ */
+static int
+drmach_get_portid(drmach_node_t *np)
+{
+ uint32_t portid;
+
+ if (np->getprop(np, ACPIDEV_DR_PROP_PORTID,
+ &portid, sizeof (portid)) == 0) {
+ /*
+ * acpidev returns portid as uint32_t, validates it.
+ */
+ if (portid > INT_MAX) {
+ return (-1);
+ } else {
+ return (portid);
+ }
+ }
+
+ return (-1);
+}
+
+/*
+ * This is a helper function to determine if a given
+ * node should be considered for a dr operation according
+ * to predefined dr type nodes and the node's name.
+ * Formal Parameter : The name of a device node.
+ * Return Value: -1, name does not map to a valid dr type.
+ * A value greater or equal to 0, name is a valid dr type.
+ */
+static int
+drmach_name2type_idx(char *name)
+{
+ int index, ntypes;
+
+ if (name == NULL)
+ return (-1);
+
+ /*
+ * Determine how many possible types are currently supported
+ * for dr.
+ */
+ ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
+
+ /* Determine if the node's name correspond to a predefined type. */
+ for (index = 0; index < ntypes; index++) {
+ if (strcmp(drmach_name2type[index].name, name) == 0)
+ /* The node is an allowed type for dr. */
+ return (index);
+ }
+
+ /*
+ * If the name of the node does not map to any of the
+ * types in the array drmach_name2type then the node is not of
+ * interest to dr.
+ */
+ return (-1);
+}
+
+static int
+drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
+{
+ drmach_node_t *node = args->node;
+ drmach_board_cb_data_t *data = args->data;
+ drmach_board_t *obj = data->obj;
+
+ int rv, portid;
+ uint32_t bnum;
+ drmachid_t id;
+ drmach_device_t *device;
+ char name[OBP_MAXDRVNAME];
+
+ portid = drmach_get_portid(node);
+ rv = node->getprop(node, ACPIDEV_DR_PROP_DEVNAME,
+ name, OBP_MAXDRVNAME);
+ if (rv)
+ return (0);
+
+ rv = node->getprop(node, ACPIDEV_DR_PROP_BOARDNUM,
+ &bnum, sizeof (bnum));
+ if (rv) {
+ return (0);
+ }
+ if (bnum > INT_MAX) {
+ return (0);
+ }
+
+ if (bnum != obj->bnum)
+ return (0);
+
+ if (drmach_name2type_idx(name) < 0) {
+ return (0);
+ }
+
+ /*
+ * Create a device data structure from this node data.
+ * The call may yield nothing if the node is not of interest
+ * to drmach.
+ */
+ data->err = drmach_device_new(node, obj, portid, &id);
+ if (data->err)
+ return (-1);
+ else if (!id) {
+ /*
+ * drmach_device_new examined the node we passed in
+ * and determined that it was one not of interest to
+ * drmach. So, it is skipped.
+ */
+ return (0);
+ }
+
+ rv = drmach_array_set(obj->devices, data->ndevs++, id);
+ if (rv) {
+ data->err = DRMACH_INTERNAL_ERROR();
+ return (-1);
+ }
+ device = id;
+
+ data->err = (*data->found)(data->a, device->type, device->unum, id);
+
+ return (data->err == NULL ? 0 : -1);
+}
+
+sbd_error_t *
+drmach_board_find_devices(drmachid_t id, void *a,
+ sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
+{
+ drmach_board_t *bp = (drmach_board_t *)id;
+ sbd_error_t *err;
+ int max_devices;
+ int rv;
+ drmach_board_cb_data_t data;
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ max_devices = MAX_CPU_UNITS_PER_BOARD;
+ max_devices += MAX_MEM_UNITS_PER_BOARD;
+ max_devices += MAX_IO_UNITS_PER_BOARD;
+
+ if (bp->devices == NULL)
+ bp->devices = drmach_array_new(0, max_devices);
+ ASSERT(bp->tree != NULL);
+
+ data.obj = bp;
+ data.ndevs = 0;
+ data.found = found;
+ data.a = a;
+ data.err = NULL;
+
+ acpidev_dr_lock_all();
+ rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
+ acpidev_dr_unlock_all();
+ if (rv == 0) {
+ err = NULL;
+ } else {
+ drmach_array_dispose(bp->devices, drmach_device_dispose);
+ bp->devices = NULL;
+
+ if (data.err)
+ err = data.err;
+ else
+ err = DRMACH_INTERNAL_ERROR();
+ }
+
+ return (err);
+}
+
+int
+drmach_board_lookup(int bnum, drmachid_t *id)
+{
+ int rv = 0;
+
+ if (bnum < 0) {
+ *id = 0;
+ return (-1);
+ }
+
+ rw_enter(&drmach_boards_rwlock, RW_READER);
+ if (drmach_array_get(drmach_boards, (uint_t)bnum, id)) {
+ *id = 0;
+ rv = -1;
+ }
+ rw_exit(&drmach_boards_rwlock);
+
+ return (rv);
+}
+
+sbd_error_t *
+drmach_board_name(int bnum, char *buf, int buflen)
+{
+ ACPI_HANDLE hdl;
+ sbd_error_t *err = NULL;
+
+ if (bnum < 0) {
+ return (drerr_new(1, EX86_BNUM, "%d", bnum));
+ }
+
+ acpidev_dr_lock_all();
+ if (ACPI_FAILURE(acpidev_dr_get_board_handle(bnum, &hdl))) {
+ DRMACH_PR("!drmach_board_name: failed to lookup ACPI handle "
+ "for board %d.", bnum);
+ err = drerr_new(1, EX86_BNUM, "%d", bnum);
+ } else if (ACPI_FAILURE(acpidev_dr_get_board_name(hdl, buf, buflen))) {
+ DRMACH_PR("!drmach_board_name: failed to generate board name "
+ "for board %d.", bnum);
+ err = drerr_new(0, EX86_INVALID_ARG,
+ ": buffer is too small for board name.");
+ }
+ acpidev_dr_unlock_all();
+
+ return (err);
+}
+
+int
+drmach_board_is_floating(drmachid_t id)
+{
+ drmach_board_t *bp;
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (0);
+
+ bp = (drmach_board_t *)id;
+
+ return ((drmach_domain.floating & (1ULL << bp->bnum)) ? 1 : 0);
+}
+
+static ACPI_STATUS
+drmach_board_check_dependent_cb(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+ void **retval)
+{
+ uint32_t bdnum;
+ drmach_board_t *bp;
+ ACPI_STATUS rc = AE_OK;
+ int cmd = (int)(intptr_t)ctx;
+
+ ASSERT(hdl != NULL);
+ ASSERT(lvl == UINT32_MAX);
+ ASSERT(retval != NULL);
+
+ /* Skip non-board devices. */
+ if (!acpidev_dr_device_is_board(hdl)) {
+ return (AE_OK);
+ } else if (ACPI_FAILURE(acpidev_dr_get_board_number(hdl, &bdnum))) {
+ DRMACH_PR("!drmach_board_check_dependent_cb: failed to get "
+ "board number for object %p.\n", hdl);
+ return (AE_ERROR);
+ } else if (bdnum > MAX_BOARDS) {
+ DRMACH_PR("!drmach_board_check_dependent_cb: board number %u "
+ "is too big, max %u.", bdnum, MAX_BOARDS);
+ return (AE_ERROR);
+ }
+
+ bp = drmach_get_board_by_bnum(bdnum);
+ switch (cmd) {
+ case SBD_CMD_CONNECT:
+ /*
+ * Its parent board should be present, assigned, powered and
+ * connected when connecting the child board.
+ */
+ if (bp == NULL) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ } else {
+ bp->powered = acpidev_dr_device_is_powered(hdl);
+ if (!bp->connected || !bp->powered || !bp->assigned) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ }
+ }
+ break;
+
+ case SBD_CMD_POWERON:
+ /*
+ * Its parent board should be present, assigned and powered when
+ * powering on the child board.
+ */
+ if (bp == NULL) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ } else {
+ bp->powered = acpidev_dr_device_is_powered(hdl);
+ if (!bp->powered || !bp->assigned) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ }
+ }
+ break;
+
+ case SBD_CMD_ASSIGN:
+ /*
+ * Its parent board should be present and assigned when
+ * assigning the child board.
+ */
+ if (bp == NULL) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ } else if (!bp->assigned) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ }
+ break;
+
+ case SBD_CMD_DISCONNECT:
+ /*
+ * The child board should be disconnected if present when
+ * disconnecting its parent board.
+ */
+ if (bp != NULL && bp->connected) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ }
+ break;
+
+ case SBD_CMD_POWEROFF:
+ /*
+ * The child board should be disconnected and powered off if
+ * present when powering off its parent board.
+ */
+ if (bp != NULL) {
+ bp->powered = acpidev_dr_device_is_powered(hdl);
+ if (bp->connected || bp->powered) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ }
+ }
+ break;
+
+ case SBD_CMD_UNASSIGN:
+ /*
+ * The child board should be disconnected, powered off and
+ * unassigned if present when unassigning its parent board.
+ */
+ if (bp != NULL) {
+ bp->powered = acpidev_dr_device_is_powered(hdl);
+ if (bp->connected || bp->powered || bp->assigned) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ }
+ }
+ break;
+
+ default:
+ /* Return success for all other commands. */
+ break;
+ }
+
+ return (rc);
+}
+
+sbd_error_t *
+drmach_board_check_dependent(int cmd, drmach_board_t *bp)
+{
+ int reverse;
+ char *name;
+ sbd_error_t *err = NULL;
+ DRMACH_HANDLE hdl;
+ DRMACH_HANDLE dp = NULL;
+
+ ASSERT(bp != NULL);
+ ASSERT(DRMACH_IS_BOARD_ID(bp));
+ ASSERT(RW_LOCK_HELD(&drmach_boards_rwlock));
+
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ switch (cmd) {
+ case SBD_CMD_ASSIGN:
+ case SBD_CMD_POWERON:
+ case SBD_CMD_CONNECT:
+ if (ACPI_SUCCESS(acpidev_dr_device_walk_ejd(hdl,
+ &drmach_board_check_dependent_cb,
+ (void *)(intptr_t)cmd, &dp))) {
+ return (NULL);
+ }
+ reverse = 0;
+ break;
+
+ case SBD_CMD_UNASSIGN:
+ case SBD_CMD_POWEROFF:
+ case SBD_CMD_DISCONNECT:
+ if (ACPI_SUCCESS(acpidev_dr_device_walk_edl(hdl,
+ &drmach_board_check_dependent_cb,
+ (void *)(intptr_t)cmd, &dp))) {
+ return (NULL);
+ }
+ reverse = 1;
+ break;
+
+ default:
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ }
+
+ if (dp == NULL) {
+ return (drerr_new(1, EX86_WALK_DEPENDENCY, "%s", bp->cm.name));
+ }
+ name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+ if (ACPI_FAILURE(acpidev_dr_get_board_name(dp, name, MAXPATHLEN))) {
+ err = drerr_new(1, EX86_WALK_DEPENDENCY, "%s", bp->cm.name);
+ } else if (reverse == 0) {
+ err = drerr_new(1, EX86_WALK_DEPENDENCY,
+ "%s, depends on board %s", bp->cm.name, name);
+ } else {
+ err = drerr_new(1, EX86_WALK_DEPENDENCY,
+ "board %s depends on %s", name, bp->cm.name);
+ }
+ kmem_free(name, MAXPATHLEN);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_assign(int bnum, drmachid_t *id)
+{
+ sbd_error_t *err = NULL;
+
+ if (bnum < 0) {
+ return (drerr_new(1, EX86_BNUM, "%d", bnum));
+ }
+
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+
+ if (drmach_array_get(drmach_boards, bnum, id) == -1) {
+ err = drerr_new(1, EX86_BNUM, "%d", bnum);
+ } else {
+ drmach_board_t *bp;
+
+ /*
+ * Board has already been created, downgrade to reader.
+ */
+ if (*id)
+ rw_downgrade(&drmach_boards_rwlock);
+
+ bp = *id;
+ if (!(*id))
+ bp = *id =
+ (drmachid_t)drmach_board_new(bnum, 0);
+
+ if (bp == NULL) {
+ DRMACH_PR("!drmach_board_assign: failed to create "
+ "object for board %d.", bnum);
+ err = drerr_new(1, EX86_BNUM, "%d", bnum);
+ } else {
+ err = drmach_board_check_dependent(SBD_CMD_ASSIGN, bp);
+ if (err == NULL)
+ bp->assigned = 1;
+ }
+ }
+
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_unassign(drmachid_t id)
+{
+ drmach_board_t *bp;
+ sbd_error_t *err;
+ drmach_status_t stat;
+
+ if (DRMACH_NULL_ID(id))
+ return (NULL);
+
+ if (!DRMACH_IS_BOARD_ID(id)) {
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ }
+ bp = id;
+
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+
+ err = drmach_board_status(id, &stat);
+ if (err) {
+ rw_exit(&drmach_boards_rwlock);
+ return (err);
+ }
+
+ if (stat.configured || stat.busy) {
+ err = drerr_new(0, EX86_CONFIGBUSY, bp->cm.name);
+ } else if (bp->connected) {
+ err = drerr_new(0, EX86_CONNECTBUSY, bp->cm.name);
+ } else if (stat.powered) {
+ err = drerr_new(0, EX86_POWERBUSY, bp->cm.name);
+ } else {
+ err = drmach_board_check_dependent(SBD_CMD_UNASSIGN, bp);
+ if (err == NULL) {
+ if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
+ err = DRMACH_INTERNAL_ERROR();
+ else
+ drmach_board_dispose(bp);
+ }
+ }
+
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_poweron(drmachid_t id)
+{
+ drmach_board_t *bp;
+ sbd_error_t *err = NULL;
+ DRMACH_HANDLE hdl;
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = id;
+
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ bp->powered = drmach_board_check_power(bp);
+ if (bp->powered) {
+ return (NULL);
+ }
+
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+ err = drmach_board_check_dependent(SBD_CMD_POWERON, bp);
+ if (err == NULL) {
+ acpidev_dr_lock_all();
+ if (ACPI_FAILURE(acpidev_dr_device_poweron(hdl)))
+ err = drerr_new(0, EX86_POWERON, NULL);
+ acpidev_dr_unlock_all();
+
+ /* Check whether the board is powered on. */
+ bp->powered = drmach_board_check_power(bp);
+ if (err == NULL && bp->powered == 0)
+ err = drerr_new(0, EX86_POWERON, NULL);
+ }
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_poweroff(drmachid_t id)
+{
+ sbd_error_t *err = NULL;
+ drmach_board_t *bp;
+ drmach_status_t stat;
+ DRMACH_HANDLE hdl;
+
+ if (DRMACH_NULL_ID(id))
+ return (NULL);
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = id;
+
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ /* Check whether the board is busy, configured or connected. */
+ err = drmach_board_status(id, &stat);
+ if (err != NULL)
+ return (err);
+ if (stat.configured || stat.busy) {
+ return (drerr_new(0, EX86_CONFIGBUSY, bp->cm.name));
+ } else if (bp->connected) {
+ return (drerr_new(0, EX86_CONNECTBUSY, bp->cm.name));
+ }
+
+ bp->powered = drmach_board_check_power(bp);
+ if (bp->powered == 0) {
+ return (NULL);
+ }
+
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+ err = drmach_board_check_dependent(SBD_CMD_POWEROFF, bp);
+ if (err == NULL) {
+ acpidev_dr_lock_all();
+ if (ACPI_FAILURE(acpidev_dr_device_poweroff(hdl)))
+ err = drerr_new(0, EX86_POWEROFF, NULL);
+ acpidev_dr_unlock_all();
+
+ bp->powered = drmach_board_check_power(bp);
+ if (err == NULL && bp->powered != 0)
+ err = drerr_new(0, EX86_POWEROFF, NULL);
+ }
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
+{
+ _NOTE(ARGUNUSED(opts, force));
+
+ drmach_board_t *bp;
+ DRMACH_HANDLE hdl;
+
+ if (DRMACH_NULL_ID(id))
+ return (NULL);
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = id;
+
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ if (ACPI_FAILURE(acpidev_dr_device_check_status(hdl)))
+ return (drerr_new(0, EX86_IN_FAILURE, NULL));
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
+{
+ _NOTE(ARGUNUSED(opts));
+
+ sbd_error_t *err = NULL;
+ drmach_board_t *bp = (drmach_board_t *)id;
+ DRMACH_HANDLE hdl;
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = (drmach_board_t *)id;
+
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+ err = drmach_board_check_dependent(SBD_CMD_CONNECT, bp);
+ if (err == NULL) {
+ acpidev_dr_lock_all();
+ if (ACPI_FAILURE(acpidev_dr_device_insert(hdl))) {
+ (void) acpidev_dr_device_remove(hdl);
+ err = drerr_new(1, EX86_PROBE, NULL);
+ } else {
+ bp->connected = 1;
+ }
+ acpidev_dr_unlock_all();
+ }
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
+{
+ _NOTE(ARGUNUSED(opts));
+
+ DRMACH_HANDLE hdl;
+ drmach_board_t *bp;
+ drmach_status_t stat;
+ sbd_error_t *err = NULL;
+
+ if (DRMACH_NULL_ID(id))
+ return (NULL);
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = (drmach_board_t *)id;
+
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ /* Check whether the board is busy or configured. */
+ err = drmach_board_status(id, &stat);
+ if (err != NULL)
+ return (err);
+ if (stat.configured || stat.busy)
+ return (drerr_new(0, EX86_CONFIGBUSY, bp->cm.name));
+
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+ err = drmach_board_check_dependent(SBD_CMD_DISCONNECT, bp);
+ if (err == NULL) {
+ acpidev_dr_lock_all();
+ if (ACPI_SUCCESS(acpidev_dr_device_remove(hdl))) {
+ bp->connected = 0;
+ } else {
+ err = drerr_new(1, EX86_DEPROBE, bp->cm.name);
+ }
+ acpidev_dr_unlock_all();
+ }
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_deprobe(drmachid_t id)
+{
+ drmach_board_t *bp;
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = id;
+
+ cmn_err(CE_CONT, "DR: detach board %d\n", bp->bnum);
+
+ if (bp->devices) {
+ drmach_array_dispose(bp->devices, drmach_device_dispose);
+ bp->devices = NULL;
+ }
+
+ bp->boot_board = 0;
+
+ return (NULL);
+}
+
+/*
+ * CPU specific interfaces to support dr driver
+ */
+sbd_error_t *
+drmach_cpu_disconnect(drmachid_t id)
+{
+ if (!DRMACH_IS_CPU_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
+{
+ drmach_cpu_t *cpu;
+
+ if (!DRMACH_IS_CPU_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ cpu = (drmach_cpu_t *)id;
+
+ if (cpu->cpuid == -1) {
+ if (ACPI_SUCCESS(acpica_get_cpu_id_by_object(
+ drmach_node_get_dnode(cpu->dev.node), cpuid))) {
+ cpu->cpuid = *cpuid;
+ } else {
+ *cpuid = -1;
+ }
+ } else {
+ *cpuid = cpu->cpuid;
+ }
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_cpu_get_impl(drmachid_t id, int *ip)
+{
+ if (!DRMACH_IS_CPU_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ /* Assume all CPUs in system are homogeneous. */
+ *ip = X86_CPU_IMPL_UNKNOWN;
+
+ kpreempt_disable();
+ if (cpuid_getvendor(CPU) == X86_VENDOR_Intel) {
+ /* NHM-EX CPU */
+ if (cpuid_getfamily(CPU) == 0x6 &&
+ cpuid_getmodel(CPU) == 0x2e) {
+ *ip = X86_CPU_IMPL_NEHALEM_EX;
+ }
+ }
+ kpreempt_enable();
+
+ return (NULL);
+}
+
+/*
+ * Memory specific interfaces to support dr driver
+ */
+
+/*
+ * When drmach_mem_new() is called, the mp->base_pa field is set to the base
+ * address of configured memory if there's configured memory on the board,
+ * otherwise set to UINT64_MAX. For hot-added memory board, there's no
+ * configured memory when drmach_mem_new() is called, so mp->base_pa is set
+ * to UINT64_MAX and we need to set a correct value for it after memory
+ * hot-add operations.
+ * A hot-added memory board may contain multiple memory segments,
+ * drmach_mem_add_span() will be called once for each segment, so we can't
+ * rely on the basepa argument. And it's possible that only part of a memory
+ * segment is added into OS, so need to intersect with phys_installed list
+ * to get the real base address of configured memory on the board.
+ */
+sbd_error_t *
+drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
+{
+ _NOTE(ARGUNUSED(basepa));
+
+ uint64_t nbytes = 0;
+ uint64_t endpa;
+ drmach_mem_t *mp;
+ struct memlist *ml2;
+ struct memlist *p;
+
+ ASSERT(size != 0);
+
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ mp = (drmach_mem_t *)id;
+
+ /* Compute basepa and size of installed memory. */
+ endpa = _ptob64(physmax + 1);
+ memlist_read_lock();
+ ml2 = memlist_dup(phys_install);
+ memlist_read_unlock();
+ ml2 = memlist_del_span(ml2, 0ull, mp->slice_base);
+ if (ml2 && endpa > mp->slice_top) {
+ ml2 = memlist_del_span(ml2, mp->slice_top,
+ endpa - mp->slice_top);
+ }
+
+ ASSERT(ml2);
+ if (ml2) {
+ for (p = ml2; p; p = p->ml_next) {
+ nbytes += p->ml_size;
+ if (mp->base_pa > p->ml_address)
+ mp->base_pa = p->ml_address;
+ }
+ ASSERT(nbytes > 0);
+ mp->nbytes += nbytes;
+ memlist_delete(ml2);
+ }
+
+ return (NULL);
+}
+
+static sbd_error_t *
+drmach_mem_update_lgrp(drmachid_t id)
+{
+ ACPI_STATUS rc;
+ DRMACH_HANDLE hdl;
+ void *hdlp;
+ drmach_mem_t *mp;
+ update_membounds_t umb;
+
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ mp = (drmach_mem_t *)id;
+ /* No need to update lgrp if memory is already installed. */
+ if (mp->nbytes != 0)
+ return (NULL);
+ /* No need to update lgrp if lgrp is disabled. */
+ if (max_mem_nodes == 1)
+ return (NULL);
+
+ /* Add memory to lgroup */
+ hdl = mp->dev.node->get_dnode(mp->dev.node);
+ rc = acpidev_dr_device_get_memory_index(hdl, &umb.u_device_id);
+ ASSERT(ACPI_SUCCESS(rc));
+ if (ACPI_FAILURE(rc)) {
+ cmn_err(CE_WARN, "drmach: failed to get device id of memory, "
+ "can't update lgrp information.");
+ return (drerr_new(0, EX86_INTERNAL, NULL));
+ }
+ rc = acpidev_dr_get_mem_numa_info(hdl, mp->memlist, &hdlp,
+ &umb.u_domain, &umb.u_sli_cnt, &umb.u_sli_ptr);
+ ASSERT(ACPI_SUCCESS(rc));
+ if (ACPI_FAILURE(rc)) {
+ cmn_err(CE_WARN, "drmach: failed to get lgrp info of memory, "
+ "can't update lgrp information.");
+ return (drerr_new(0, EX86_INTERNAL, NULL));
+ }
+ umb.u_base = (uint64_t)mp->slice_base;
+ umb.u_length = (uint64_t)(mp->slice_top - mp->slice_base);
+ lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
+ acpidev_dr_free_mem_numa_info(hdlp);
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_mem_enable(drmachid_t id)
+{
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ else
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_mem_get_info(drmachid_t id, drmach_mem_info_t *mem)
+{
+ drmach_mem_t *mp;
+
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ mp = (drmach_mem_t *)id;
+
+ /*
+ * This is only used by dr to round up/down the memory
+ * for copying.
+ */
+ mem->mi_alignment_mask = mp->mem_alignment - 1;
+ mem->mi_basepa = mp->base_pa;
+ mem->mi_size = mp->nbytes;
+ mem->mi_slice_base = mp->slice_base;
+ mem->mi_slice_top = mp->slice_top;
+ mem->mi_slice_size = mp->slice_size;
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_mem_get_slice_info(drmachid_t id,
+ uint64_t *bp, uint64_t *ep, uint64_t *sp)
+{
+ drmach_mem_t *mp;
+
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ mp = (drmach_mem_t *)id;
+
+ if (bp)
+ *bp = mp->slice_base;
+ if (ep)
+ *ep = mp->slice_top;
+ if (sp)
+ *sp = mp->slice_size;
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
+{
+#ifdef DEBUG
+ int rv;
+#endif
+ drmach_mem_t *mem;
+ struct memlist *mlist;
+
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ mem = (drmach_mem_t *)id;
+
+ mlist = memlist_dup(mem->memlist);
+ *ml = mlist;
+
+#ifdef DEBUG
+ /*
+ * Make sure the incoming memlist doesn't already
+ * intersect with what's present in the system (phys_install).
+ */
+ memlist_read_lock();
+ rv = memlist_intersect(phys_install, mlist);
+ memlist_read_unlock();
+ if (rv) {
+ DRMACH_PR("Derived memlist intersects with phys_install\n");
+ memlist_dump(mlist);
+
+ DRMACH_PR("phys_install memlist:\n");
+ memlist_dump(phys_install);
+
+ memlist_delete(mlist);
+ return (DRMACH_INTERNAL_ERROR());
+ }
+
+ DRMACH_PR("Derived memlist:");
+ memlist_dump(mlist);
+#endif
+
+ return (NULL);
+}
+
+processorid_t
+drmach_mem_cpu_affinity(drmachid_t id)
+{
+ _NOTE(ARGUNUSED(id));
+
+ return (CPU_CURRENT);
+}
+
+int
+drmach_copy_rename_need_suspend(drmachid_t id)
+{
+ _NOTE(ARGUNUSED(id));
+
+ return (0);
+}
+
+/*
+ * IO specific interfaces to support dr driver
+ */
+sbd_error_t *
+drmach_io_pre_release(drmachid_t id)
+{
+ if (!DRMACH_IS_IO_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_io_unrelease(drmachid_t id)
+{
+ if (!DRMACH_IS_IO_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_io_post_release(drmachid_t id)
+{
+ _NOTE(ARGUNUSED(id));
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_io_post_attach(drmachid_t id)
+{
+ if (!DRMACH_IS_IO_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_io_is_attached(drmachid_t id, int *yes)
+{
+ drmach_device_t *dp;
+ dev_info_t *dip;
+ int state;
+
+ if (!DRMACH_IS_IO_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ dp = id;
+
+ dip = dp->node->getdip(dp->node);
+ if (dip == NULL) {
+ *yes = 0;
+ return (NULL);
+ }
+
+ state = ddi_get_devstate(dip);
+ *yes = ((i_ddi_node_state(dip) >= DS_ATTACHED) ||
+ (state == DDI_DEVSTATE_UP));
+
+ return (NULL);
+}
+
+/*
+ * Miscellaneous interfaces to support dr driver
+ */
+int
+drmach_verify_sr(dev_info_t *dip, int sflag)
+{
+ _NOTE(ARGUNUSED(dip, sflag));
+
+ return (0);
+}
+
+void
+drmach_suspend_last(void)
+{
+}
+
+void
+drmach_resume_first(void)
+{
+}
+
+/*
+ * Log a DR sysevent.
+ * Return value: 0 success, non-zero failure.
+ */
+int
+drmach_log_sysevent(int board, char *hint, int flag, int verbose)
+{
+ sysevent_t *ev = NULL;
+ sysevent_id_t eid;
+ int rv, km_flag;
+ sysevent_value_t evnt_val;
+ sysevent_attr_list_t *evnt_attr_list = NULL;
+ sbd_error_t *err;
+ char attach_pnt[MAXNAMELEN];
+
+ km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
+ attach_pnt[0] = '\0';
+ err = drmach_board_name(board, attach_pnt, MAXNAMELEN);
+ if (err != NULL) {
+ sbd_err_clear(&err);
+ rv = -1;
+ goto logexit;
+ }
+ if (verbose) {
+ DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
+ attach_pnt, hint, flag, verbose);
+ }
+
+ if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
+ SUNW_KERN_PUB"dr", km_flag)) == NULL) {
+ rv = -2;
+ goto logexit;
+ }
+ evnt_val.value_type = SE_DATA_TYPE_STRING;
+ evnt_val.value.sv_string = attach_pnt;
+ if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID, &evnt_val,
+ km_flag)) != 0)
+ goto logexit;
+
+ evnt_val.value_type = SE_DATA_TYPE_STRING;
+ evnt_val.value.sv_string = hint;
+ if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT, &evnt_val,
+ km_flag)) != 0) {
+ sysevent_free_attr(evnt_attr_list);
+ goto logexit;
+ }
+
+ (void) sysevent_attach_attributes(ev, evnt_attr_list);
+
+ /*
+ * Log the event but do not sleep waiting for its
+ * delivery. This provides insulation from syseventd.
+ */
+ rv = log_sysevent(ev, SE_NOSLEEP, &eid);
+
+logexit:
+ if (ev)
+ sysevent_free(ev);
+ if ((rv != 0) && verbose)
+ cmn_err(CE_WARN, "!drmach_log_sysevent failed (rv %d) for %s "
+ " %s\n", rv, attach_pnt, hint);
+
+ return (rv);
+}
diff --git a/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.h b/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.h
new file mode 100644
index 0000000000..abbc264ef0
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.h
@@ -0,0 +1,199 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_DRMACH_ACPI_H
+#define _SYS_DRMACH_ACPI_H
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/param.h>
+#include <sys/sunddi.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/drmach.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+/* Use ACPI handle as DRMACH handle on x86 systems. */
+#define DRMACH_HANDLE ACPI_HANDLE
+
+/* Macros to deal with object type. */
+#define DRMACH_OBJ(id) ((drmach_common_t *)id)
+
+#define DRMACH_NULL_ID(id) ((id) == 0)
+
+#define DRMACH_IS_BOARD_ID(id) \
+ ((id != 0) && (DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
+
+#define DRMACH_IS_CPU_ID(id) \
+ ((id != 0) && (DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
+
+#define DRMACH_IS_MEM_ID(id) \
+ ((id != 0) && (DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
+
+#define DRMACH_IS_IO_ID(id) \
+ ((id != 0) && (DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
+
+#define DRMACH_IS_DEVICE_ID(id) \
+ ((id != 0) && \
+ (DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new || \
+ DRMACH_OBJ(id)->isa == (void *)drmach_mem_new || \
+ DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
+
+#define DRMACH_IS_ID(id) \
+ ((id != 0) && \
+ (DRMACH_OBJ(id)->isa == (void *)drmach_board_new || \
+ DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new || \
+ DRMACH_OBJ(id)->isa == (void *)drmach_mem_new || \
+ DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
+
+#define DRMACH_INTERNAL_ERROR() \
+ drerr_new(1, EX86_INTERNAL, drmach_ie_fmt, __LINE__)
+
+#ifdef DEBUG
+extern int drmach_debug;
+
+#define DRMACH_PR if (drmach_debug) printf
+#else
+#define DRMACH_PR _NOTE(CONSTANTCONDITION) if (0) printf
+#endif /* DEBUG */
+
+typedef struct {
+ struct drmach_node *node;
+ void *data;
+ void *func;
+} drmach_node_walk_args_t;
+
+typedef struct drmach_node {
+ void *here;
+
+ DRMACH_HANDLE (*get_dnode)(struct drmach_node *node);
+ dev_info_t *(*getdip)(struct drmach_node *node);
+ int (*getproplen)(struct drmach_node *node, char *name,
+ int *len);
+ int (*getprop)(struct drmach_node *node, char *name,
+ void *buf, int len);
+ int (*walk)(struct drmach_node *node, void *data,
+ int (*cb)(drmach_node_walk_args_t *args));
+} drmach_node_t;
+
+typedef struct {
+ int min_index;
+ int max_index;
+ int arr_sz;
+ drmachid_t *arr;
+} drmach_array_t;
+
+typedef struct {
+ void *isa;
+
+ void (*dispose)(drmachid_t);
+ sbd_error_t *(*release)(drmachid_t);
+ sbd_error_t *(*status)(drmachid_t, drmach_status_t *);
+
+ char name[MAXNAMELEN];
+} drmach_common_t;
+
+typedef struct {
+ drmach_common_t cm;
+ uint_t bnum;
+ int assigned;
+ int powered;
+ int connected;
+ int cond;
+ drmach_node_t *tree;
+ drmach_array_t *devices;
+ int boot_board; /* if board exists on bootup */
+} drmach_board_t;
+
+typedef struct {
+ drmach_common_t cm;
+ drmach_board_t *bp;
+ int unum;
+ uint_t portid;
+ int busy;
+ int powered;
+ const char *type;
+ drmach_node_t *node;
+} drmach_device_t;
+
+typedef struct drmach_cpu {
+ drmach_device_t dev;
+ processorid_t cpuid;
+ uint32_t apicid;
+} drmach_cpu_t;
+
+typedef struct drmach_mem {
+ drmach_device_t dev;
+ uint64_t mem_alignment;
+ uint64_t slice_base;
+ uint64_t slice_top;
+ uint64_t slice_size;
+ uint64_t base_pa; /* lowest installed memory base */
+ uint64_t nbytes; /* size of installed memory */
+ struct memlist *memlist;
+} drmach_mem_t;
+
+typedef struct drmach_io {
+ drmach_device_t dev;
+} drmach_io_t;
+
+typedef struct drmach_domain_info {
+ uint64_t floating;
+ int allow_dr;
+} drmach_domain_info_t;
+
+typedef struct {
+ drmach_board_t *obj;
+ int ndevs;
+ void *a;
+ sbd_error_t *(*found)(void *a, const char *, int, drmachid_t);
+ sbd_error_t *err;
+} drmach_board_cb_data_t;
+
+extern drmach_domain_info_t drmach_domain;
+
+extern drmach_board_t *drmach_board_new(uint_t, int);
+extern sbd_error_t *drmach_device_new(drmach_node_t *,
+ drmach_board_t *, int, drmachid_t *);
+extern sbd_error_t *drmach_cpu_new(drmach_device_t *, drmachid_t *);
+extern sbd_error_t *drmach_mem_new(drmach_device_t *, drmachid_t *);
+extern sbd_error_t *drmach_io_new(drmach_device_t *, drmachid_t *);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DRMACH_ACPI_H */
diff --git a/usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c b/usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c
index 3a237509ba..cece3f77f2 100644
--- a/usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c
+++ b/usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c
@@ -919,7 +919,7 @@ acpi_drv_obj_copy(ACPI_OBJECT *op, char *bp, struct obj_desc *dp)
ep->Type == dp->type) {
ACPI_DRV_DBG(CE_NOTE, NULL, "\t%s: \"%s\"",
dp->name, ep->String.Pointer);
- (void) strncpy(fp, ep->String.Pointer, dp->size);
+ (void) strlcpy(fp, ep->String.Pointer, dp->size);
} else if (dp->type == ACPI_TYPE_STRING &&
ep->Type == ACPI_TYPE_BUFFER) {
#ifdef DEBUG
@@ -1559,9 +1559,9 @@ acpi_drv_obj_init(struct acpi_drv_dev *p)
if ((info->Valid & ACPI_VALID_HID) == 0) {
ACPI_DRV_DBG(CE_WARN, NULL,
"AcpiGetObjectInfo(): _HID not available");
- (void) strncpy(p->hid, "\0", ID_LEN);
+ p->hid[0] = 0;
} else {
- (void) strncpy(p->hid, info->HardwareId.String, ID_LEN);
+ (void) strlcpy(p->hid, info->HardwareId.String, ID_LEN);
}
/*
@@ -1572,9 +1572,9 @@ acpi_drv_obj_init(struct acpi_drv_dev *p)
ACPI_DRV_DBG(CE_WARN, NULL,
"AcpiGetObjectInfo(): _UID not available");
/* Use 0 as the default _UID */
- (void) strncpy(p->uid, "\0", ID_LEN);
+ p->uid[0] = 0;
} else {
- (void) strncpy(p->uid, info->UniqueId.String, ID_LEN);
+ (void) strlcpy(p->uid, info->UniqueId.String, ID_LEN);
}
AcpiOsFree(info);
@@ -2064,9 +2064,9 @@ acpi_drv_dev_init(struct acpi_drv_dev *p)
if ((info->Valid & ACPI_VALID_HID) == 0) {
ACPI_DRV_DBG(CE_WARN, NULL,
"!AcpiGetObjectInfo(): _HID not available");
- (void) strncpy(p->hid, "\0", ID_LEN);
+ p->hid[0] = 0;
} else {
- (void) strncpy(p->hid, info->HardwareId.String, ID_LEN);
+ (void) strlcpy(p->hid, info->HardwareId.String, ID_LEN);
}
/*
@@ -2077,9 +2077,9 @@ acpi_drv_dev_init(struct acpi_drv_dev *p)
ACPI_DRV_DBG(CE_WARN, NULL,
"!AcpiGetObjectInfo(): _UID not available");
/* Use 0 as the default _UID */
- (void) strncpy(p->uid, "\0", ID_LEN);
+ p->uid[0] = 0;
} else {
- (void) strncpy(p->uid, info->UniqueId.String, ID_LEN);
+ (void) strlcpy(p->uid, info->UniqueId.String, ID_LEN);
}
if (info->Valid & ACPI_VALID_ADR) {
diff --git a/usr/src/uts/i86pc/io/cbe.c b/usr/src/uts/i86pc/io/cbe.c
index d5e08c63bc..87ec248717 100644
--- a/usr/src/uts/i86pc/io/cbe.c
+++ b/usr/src/uts/i86pc/io/cbe.c
@@ -40,6 +40,7 @@
#include <sys/ddi_impldefs.h>
#include <sys/ddi_intr.h>
#include <sys/avintr.h>
+#include <sys/note.h>
static int cbe_vector;
static int cbe_ticks = 0;
@@ -207,6 +208,13 @@ cbe_configure(cpu_t *cpu)
return (cpu);
}
+void
+cbe_unconfigure(void *arg)
+{
+ _NOTE(ARGUNUSED(arg));
+ ASSERT(!CPU_IN_SET(cbe_enabled, ((cpu_t *)arg)->cpu_id));
+}
+
#ifndef __xpv
/*
* declarations needed for time adjustment
@@ -318,7 +326,7 @@ cbe_init(void)
{
cyc_backend_t cbe = {
cbe_configure, /* cyb_configure */
- NULL, /* cyb_unconfigure */
+ cbe_unconfigure, /* cyb_unconfigure */
cbe_enable, /* cyb_enable */
cbe_disable, /* cyb_disable */
cbe_reprogram, /* cyb_reprogram */
diff --git a/usr/src/uts/i86pc/io/dr/dr.c b/usr/src/uts/i86pc/io/dr/dr.c
new file mode 100644
index 0000000000..90f3a80adf
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr.c
@@ -0,0 +1,3329 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
+ * PIM-DR layer of DR driver. Provides interface between user
+ * level applications and the PSM-DR layer.
+ */
+
+#include <sys/note.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/cred.h>
+#include <sys/dditypes.h>
+#include <sys/devops.h>
+#include <sys/modctl.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/stat.h>
+#include <sys/kmem.h>
+#include <sys/processor.h>
+#include <sys/cpuvar.h>
+#include <sys/mem_config.h>
+
+#include <sys/autoconf.h>
+#include <sys/cmn_err.h>
+
+#include <sys/ddi_impldefs.h>
+#include <sys/promif.h>
+#include <sys/machsystm.h>
+
+#include <sys/dr.h>
+#include <sys/drmach.h>
+#include <sys/dr_util.h>
+
+extern int nulldev();
+extern int nodev();
+extern struct memlist *phys_install;
+
+#ifdef DEBUG
+uint_t dr_debug = 0; /* dr.h for bit values */
+#endif /* DEBUG */
+
+/*
+ * NOTE: state_str, nt_str and SBD_CMD_STR are only used in a debug
+ * kernel. They are, however, referenced during both debug and non-debug
+ * compiles.
+ */
+
+static char *state_str[] = {
+ "EMPTY", "OCCUPIED", "CONNECTED", "UNCONFIGURED",
+ "PARTIAL", "CONFIGURED", "RELEASE", "UNREFERENCED",
+ "FATAL"
+};
+
+#define SBD_CMD_STR(c) \
+ (((c) == SBD_CMD_ASSIGN) ? "ASSIGN" : \
+ ((c) == SBD_CMD_UNASSIGN) ? "UNASSIGN" : \
+ ((c) == SBD_CMD_POWERON) ? "POWERON" : \
+ ((c) == SBD_CMD_POWEROFF) ? "POWEROFF" : \
+ ((c) == SBD_CMD_TEST) ? "TEST" : \
+ ((c) == SBD_CMD_CONNECT) ? "CONNECT" : \
+ ((c) == SBD_CMD_DISCONNECT) ? "DISCONNECT" : \
+ ((c) == SBD_CMD_CONFIGURE) ? "CONFIGURE" : \
+ ((c) == SBD_CMD_UNCONFIGURE) ? "UNCONFIGURE" : \
+ ((c) == SBD_CMD_GETNCM) ? "GETNCM" : \
+ ((c) == SBD_CMD_PASSTHRU) ? "PASSTHRU" : \
+ ((c) == SBD_CMD_STATUS) ? "STATUS" : "unknown")
+
+#define DR_GET_BOARD_DEVUNIT(sb, ut, un) (&((sb)->b_dev[DEVSET_NIX(ut)][un]))
+
+#define DR_MAKE_MINOR(i, b) (((i) << 16) | (b))
+#define DR_MINOR2INST(m) (((m) >> 16) & 0xffff)
+#define DR_MINOR2BNUM(m) ((m) & 0xffff)
+
+/* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */
+static char *dr_ie_fmt = "dr.c %d";
+
+/* struct for drmach device name to sbd_comp_type_t mapping */
+typedef struct {
+ char *s_devtype;
+ sbd_comp_type_t s_nodetype;
+} dr_devname_t;
+
+/* struct to map starfire device attributes - name:sbd_comp_type_t */
+static dr_devname_t dr_devattr[] = {
+ { DRMACH_DEVTYPE_MEM, SBD_COMP_MEM },
+ { DRMACH_DEVTYPE_CPU, SBD_COMP_CPU },
+ { DRMACH_DEVTYPE_PCI, SBD_COMP_IO },
+#if defined(DRMACH_DEVTYPE_SBUS)
+ { DRMACH_DEVTYPE_SBUS, SBD_COMP_IO },
+#endif
+#if defined(DRMACH_DEVTYPE_WCI)
+ { DRMACH_DEVTYPE_WCI, SBD_COMP_IO },
+#endif
+ /* last s_devtype must be NULL, s_nodetype must be SBD_COMP_UNKNOWN */
+ { NULL, SBD_COMP_UNKNOWN }
+};
+
+/*
+ * Per instance soft-state structure.
+ */
+typedef struct dr_softstate {
+ dev_info_t *dip;
+ dr_board_t *boards;
+ kmutex_t i_lock;
+ int dr_initialized;
+} dr_softstate_t;
+
+/*
+ * dr Global data elements
+ */
+struct dr_global {
+ dr_softstate_t *softsp; /* pointer to initialize soft state */
+ kmutex_t lock;
+} dr_g;
+
+dr_unsafe_devs_t dr_unsafe_devs;
+
+/*
+ * Table of known passthru commands.
+ */
+struct {
+ char *pt_name;
+ int (*pt_func)(dr_handle_t *);
+} pt_arr[] = {
+ "quiesce", dr_pt_test_suspend,
+};
+
+int dr_modunload_okay = 0; /* set to non-zero to allow unload */
+
+/*
+ * State transition table. States valid transitions for "board" state.
+ * Recall that non-zero return value terminates operation, however
+ * the herrno value is what really indicates an error , if any.
+ */
+static int
+_cmd2index(int c)
+{
+ /*
+ * Translate DR CMD to index into dr_state_transition.
+ */
+ switch (c) {
+ case SBD_CMD_CONNECT: return (0);
+ case SBD_CMD_DISCONNECT: return (1);
+ case SBD_CMD_CONFIGURE: return (2);
+ case SBD_CMD_UNCONFIGURE: return (3);
+ case SBD_CMD_ASSIGN: return (4);
+ case SBD_CMD_UNASSIGN: return (5);
+ case SBD_CMD_POWERON: return (6);
+ case SBD_CMD_POWEROFF: return (7);
+ case SBD_CMD_TEST: return (8);
+ default: return (-1);
+ }
+}
+
+#define CMD2INDEX(c) _cmd2index(c)
+
+static struct dr_state_trans {
+ int x_cmd;
+ struct {
+ int x_rv; /* return value of pre_op */
+ int x_err; /* error, if any */
+ } x_op[DR_STATE_MAX];
+} dr_state_transition[] = {
+ { SBD_CMD_CONNECT,
+ {
+ { 0, 0 }, /* empty */
+ { 0, 0 }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_DISCONNECT,
+ {
+ { -1, ESBD_STATE }, /* empty */
+ { 0, 0 }, /* occupied */
+ { 0, 0 }, /* connected */
+ { 0, 0 }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_CONFIGURE,
+ {
+ { -1, ESBD_STATE }, /* empty */
+ { -1, ESBD_STATE }, /* occupied */
+ { 0, 0 }, /* connected */
+ { 0, 0 }, /* unconfigured */
+ { 0, 0 }, /* partial */
+ { 0, 0 }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_UNCONFIGURE,
+ {
+ { -1, ESBD_STATE }, /* empty */
+ { -1, ESBD_STATE }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { 0, 0 }, /* partial */
+ { 0, 0 }, /* configured */
+ { 0, 0 }, /* release */
+ { 0, 0 }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_ASSIGN,
+ {
+ { 0, 0 }, /* empty */
+ { 0, 0 }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_UNASSIGN,
+ {
+ { 0, 0 }, /* empty */
+ { 0, 0 }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_POWERON,
+ {
+ { 0, 0 }, /* empty */
+ { 0, 0 }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_POWEROFF,
+ {
+ { 0, 0 }, /* empty */
+ { 0, 0 }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_TEST,
+ {
+ { 0, 0 }, /* empty */
+ { 0, 0 }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+};
+
+/*
+ * Global R/W lock to synchronize access across
+ * multiple boards. Users wanting multi-board access
+ * must grab WRITE lock, others must grab READ lock.
+ */
+krwlock_t dr_grwlock;
+
+/*
+ * Head of the boardlist used as a reference point for
+ * locating board structs.
+ * TODO: eliminate dr_boardlist
+ */
+dr_board_t *dr_boardlist;
+
+/*
+ * DR support functions.
+ */
+static dr_devset_t dr_dev2devset(sbd_comp_id_t *cid);
+static int dr_check_transition(dr_board_t *bp,
+ dr_devset_t *devsetp,
+ struct dr_state_trans *transp,
+ int cmd);
+static int dr_check_unit_attached(dr_common_unit_t *dp);
+static sbd_error_t *dr_init_devlists(dr_board_t *bp);
+static void dr_board_discovery(dr_board_t *bp);
+static int dr_board_init(dr_board_t *bp, dev_info_t *dip, int bd);
+static void dr_board_destroy(dr_board_t *bp);
+static void dr_board_transition(dr_board_t *bp, dr_state_t st);
+
+/*
+ * DR driver (DDI) entry points.
+ */
+static int dr_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd,
+ void *arg, void **result);
+static int dr_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
+static int dr_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
+static int dr_probe(dev_info_t *dip);
+static int dr_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
+ cred_t *cred_p, int *rval_p);
+static int dr_close(dev_t dev, int flag, int otyp, cred_t *cred_p);
+static int dr_open(dev_t *dev, int flag, int otyp, cred_t *cred_p);
+
+/*
+ * DR command processing operations.
+ */
+static int dr_copyin_iocmd(dr_handle_t *hp);
+static int dr_copyout_iocmd(dr_handle_t *hp);
+static int dr_copyout_errs(dr_handle_t *hp);
+static int dr_pre_op(dr_handle_t *hp);
+static int dr_post_op(dr_handle_t *hp, int rv);
+static int dr_exec_op(dr_handle_t *hp);
+static void dr_assign_board(dr_handle_t *hp);
+static void dr_unassign_board(dr_handle_t *hp);
+static void dr_connect(dr_handle_t *hp);
+static int dr_disconnect(dr_handle_t *hp);
+static void dr_dev_configure(dr_handle_t *hp);
+static void dr_dev_release(dr_handle_t *hp);
+static int dr_dev_unconfigure(dr_handle_t *hp);
+static void dr_dev_cancel(dr_handle_t *hp);
+static int dr_dev_status(dr_handle_t *hp);
+static int dr_get_ncm(dr_handle_t *hp);
+static int dr_pt_ioctl(dr_handle_t *hp);
+static void dr_poweron_board(dr_handle_t *hp);
+static void dr_poweroff_board(dr_handle_t *hp);
+static void dr_test_board(dr_handle_t *hp);
+
+/*
+ * Autoconfiguration data structures
+ */
+struct cb_ops dr_cb_ops = {
+ dr_open, /* open */
+ dr_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ dr_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ NULL, /* struct streamtab */
+ D_NEW | D_MP | D_MTSAFE, /* compatibility flags */
+ CB_REV, /* Rev */
+ nodev, /* cb_aread */
+ nodev /* cb_awrite */
+};
+
+struct dev_ops dr_dev_ops = {
+ DEVO_REV, /* build version */
+ 0, /* dev ref count */
+ dr_getinfo, /* getinfo */
+ nulldev, /* identify */
+ dr_probe, /* probe */
+ dr_attach, /* attach */
+ dr_detach, /* detach */
+ nodev, /* reset */
+ &dr_cb_ops, /* cb_ops */
+ (struct bus_ops *)NULL, /* bus ops */
+ NULL, /* power */
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+extern struct mod_ops mod_driverops;
+
+static struct modldrv modldrv = {
+ &mod_driverops,
+ "Dynamic Reconfiguration",
+ &dr_dev_ops
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modldrv,
+ NULL
+};
+
+/*
+ * Driver entry points.
+ */
+int
+_init(void)
+{
+ int err;
+
+ /*
+ * If you need to support multiple nodes (instances), then
+ * whatever the maximum number of supported nodes is would
+ * need to passed as the third parameter to ddi_soft_state_init().
+ * Alternative would be to dynamically fini and re-init the
+ * soft state structure each time a node is attached.
+ */
+ err = ddi_soft_state_init((void **)&dr_g.softsp,
+ sizeof (dr_softstate_t), 1);
+ if (err)
+ return (err);
+
+ mutex_init(&dr_g.lock, NULL, MUTEX_DRIVER, NULL);
+ rw_init(&dr_grwlock, NULL, RW_DEFAULT, NULL);
+
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = mod_remove(&modlinkage)) != 0)
+ return (err);
+
+ mutex_destroy(&dr_g.lock);
+ rw_destroy(&dr_grwlock);
+
+ ddi_soft_state_fini((void **)&dr_g.softsp);
+
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*ARGSUSED1*/
+static int
+dr_open(dev_t *dev, int flag, int otyp, cred_t *cred_p)
+{
+ int instance;
+ dr_softstate_t *softsp;
+ dr_board_t *bp;
+
+ /*
+ * Don't open unless we've attached.
+ */
+ instance = DR_MINOR2INST(getminor(*dev));
+ softsp = ddi_get_soft_state(dr_g.softsp, instance);
+ if (softsp == NULL)
+ return (ENXIO);
+
+ mutex_enter(&softsp->i_lock);
+ if (!softsp->dr_initialized) {
+ int bd;
+ int rv = 0;
+
+ bp = softsp->boards;
+
+ /* initialize each array element */
+ for (bd = 0; bd < MAX_BOARDS; bd++, bp++) {
+ rv = dr_board_init(bp, softsp->dip, bd);
+ if (rv)
+ break;
+ }
+
+ if (rv == 0) {
+ softsp->dr_initialized = 1;
+ } else {
+ /* destroy elements initialized thus far */
+ while (--bp >= softsp->boards)
+ dr_board_destroy(bp);
+
+ /* TODO: should this be another errno val ? */
+ mutex_exit(&softsp->i_lock);
+ return (ENXIO);
+ }
+ }
+ mutex_exit(&softsp->i_lock);
+
+ bp = &softsp->boards[DR_MINOR2BNUM(getminor(*dev))];
+
+ /*
+ * prevent opening of a dyn-ap for a board
+ * that does not exist
+ */
+ if (!bp->b_assigned) {
+ if (drmach_board_lookup(bp->b_num, &bp->b_id) != 0)
+ return (ENODEV);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+dr_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
+{
+ return (0);
+}
+
+/*
+ * Enable/disable DR features.
+ */
+int dr_enable = 1;
+
+/*ARGSUSED3*/
+static int
+dr_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
+ cred_t *cred_p, int *rval_p)
+{
+ static int dr_dev_type_to_nt(char *);
+
+ int rv = 0;
+ int instance;
+ int bd;
+ dr_handle_t *hp;
+ dr_softstate_t *softsp;
+ static fn_t f = "dr_ioctl";
+
+ PR_ALL("%s...\n", f);
+
+ instance = DR_MINOR2INST(getminor(dev));
+ softsp = ddi_get_soft_state(dr_g.softsp, instance);
+ if (softsp == NULL) {
+ cmn_err(CE_WARN, "dr%d: module not yet attached", instance);
+ return (ENXIO);
+ }
+
+ if (!dr_enable) {
+ switch (cmd) {
+ case SBD_CMD_STATUS:
+ case SBD_CMD_GETNCM:
+ case SBD_CMD_PASSTHRU:
+ break;
+ default:
+ return (ENOTSUP);
+ }
+ }
+
+ bd = DR_MINOR2BNUM(getminor(dev));
+ if (bd >= MAX_BOARDS)
+ return (ENXIO);
+
+ /* get and initialize storage for new handle */
+ hp = GETSTRUCT(dr_handle_t, 1);
+ hp->h_bd = &softsp->boards[bd];
+ hp->h_err = NULL;
+ hp->h_dev = getminor(dev);
+ hp->h_cmd = cmd;
+ hp->h_mode = mode;
+ hp->h_iap = (sbd_ioctl_arg_t *)arg;
+
+ /* copy sbd command into handle */
+ rv = dr_copyin_iocmd(hp);
+ if (rv) {
+ FREESTRUCT(hp, dr_handle_t, 1);
+ return (EINVAL);
+ }
+
+ /* translate canonical name to component type */
+ if (hp->h_sbdcmd.cmd_cm.c_id.c_name[0] != '\0') {
+ hp->h_sbdcmd.cmd_cm.c_id.c_type =
+ dr_dev_type_to_nt(hp->h_sbdcmd.cmd_cm.c_id.c_name);
+
+ PR_ALL("%s: c_name = %s, c_type = %d\n",
+ f,
+ hp->h_sbdcmd.cmd_cm.c_id.c_name,
+ hp->h_sbdcmd.cmd_cm.c_id.c_type);
+ } else {
+ /*EMPTY*/
+ PR_ALL("%s: c_name is NULL\n", f);
+ }
+
+ /* determine scope of operation */
+ hp->h_devset = dr_dev2devset(&hp->h_sbdcmd.cmd_cm.c_id);
+
+ switch (hp->h_cmd) {
+ case SBD_CMD_STATUS:
+ case SBD_CMD_GETNCM:
+ /* no locks needed for these commands */
+ break;
+
+ default:
+ rw_enter(&dr_grwlock, RW_WRITER);
+ mutex_enter(&hp->h_bd->b_lock);
+
+ /*
+ * If we're dealing with memory at all, then we have
+ * to keep the "exclusive" global lock held. This is
+ * necessary since we will probably need to look at
+ * multiple board structs. Otherwise, we only have
+ * to deal with the board in question and so can drop
+ * the global lock to "shared".
+ */
+ rv = DEVSET_IN_SET(hp->h_devset, SBD_COMP_MEM, DEVSET_ANYUNIT);
+ if (rv == 0)
+ rw_downgrade(&dr_grwlock);
+ break;
+ }
+ rv = 0;
+
+ if (rv == 0)
+ rv = dr_pre_op(hp);
+ if (rv == 0) {
+ rv = dr_exec_op(hp);
+ rv = dr_post_op(hp, rv);
+ }
+
+ if (rv == -1)
+ rv = EIO;
+
+ if (hp->h_err != NULL)
+ if (!(rv = dr_copyout_errs(hp)))
+ rv = EIO;
+
+ /* undo locking, if any, done before dr_pre_op */
+ switch (hp->h_cmd) {
+ case SBD_CMD_STATUS:
+ case SBD_CMD_GETNCM:
+ break;
+
+ case SBD_CMD_ASSIGN:
+ case SBD_CMD_UNASSIGN:
+ case SBD_CMD_POWERON:
+ case SBD_CMD_POWEROFF:
+ case SBD_CMD_CONNECT:
+ case SBD_CMD_CONFIGURE:
+ case SBD_CMD_UNCONFIGURE:
+ case SBD_CMD_DISCONNECT:
+ /* Board changed state. Log a sysevent. */
+ if (rv == 0)
+ (void) drmach_log_sysevent(hp->h_bd->b_num, "",
+ SE_SLEEP, 0);
+ /* Fall through */
+
+ default:
+ mutex_exit(&hp->h_bd->b_lock);
+ rw_exit(&dr_grwlock);
+ }
+
+ if (hp->h_opts.size != 0)
+ FREESTRUCT(hp->h_opts.copts, char, hp->h_opts.size);
+
+ FREESTRUCT(hp, dr_handle_t, 1);
+
+ return (rv);
+}
+
+/*ARGSUSED*/
+static int
+dr_probe(dev_info_t *dip)
+{
+ return (DDI_PROBE_SUCCESS);
+}
+
+static int
+dr_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int rv, rv2;
+ int bd;
+ int instance;
+ sbd_error_t *err;
+ dr_softstate_t *softsp;
+
+ instance = ddi_get_instance(dip);
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ rw_enter(&dr_grwlock, RW_WRITER);
+
+ rv = ddi_soft_state_zalloc(dr_g.softsp, instance);
+ if (rv != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "dr%d: failed to alloc soft-state",
+ instance);
+ return (DDI_FAILURE);
+ }
+
+ /* initialize softstate structure */
+ softsp = ddi_get_soft_state(dr_g.softsp, instance);
+ softsp->dip = dip;
+
+ mutex_init(&softsp->i_lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* allocate board array (aka boardlist) */
+ softsp->boards = GETSTRUCT(dr_board_t, MAX_BOARDS);
+
+ /* TODO: eliminate dr_boardlist */
+ dr_boardlist = softsp->boards;
+
+ /* initialize each array element */
+ rv = DDI_SUCCESS;
+ for (bd = 0; bd < MAX_BOARDS; bd++) {
+ dr_board_t *bp = &softsp->boards[bd];
+ char *p, *name;
+ int l, minor_num;
+
+ /*
+ * initialized board attachment point path
+ * (relative to pseudo) in a form immediately
+ * reusable as an cfgadm command argument.
+ * TODO: clean this up
+ */
+ p = bp->b_path;
+ l = sizeof (bp->b_path);
+ (void) snprintf(p, l, "dr@%d:", instance);
+ while (*p != '\0') {
+ l--;
+ p++;
+ }
+
+ name = p;
+ err = drmach_board_name(bd, p, l);
+ if (err) {
+ sbd_err_clear(&err);
+ rv = DDI_FAILURE;
+ break;
+ }
+
+ minor_num = DR_MAKE_MINOR(instance, bd);
+ rv = ddi_create_minor_node(dip, name, S_IFCHR,
+ minor_num, DDI_NT_SBD_ATTACHMENT_POINT, NULL);
+ if (rv != DDI_SUCCESS)
+ rv = DDI_FAILURE;
+ }
+
+ if (rv == DDI_SUCCESS) {
+ /*
+ * Announce the node's presence.
+ */
+ ddi_report_dev(dip);
+ } else {
+ ddi_remove_minor_node(dip, NULL);
+ }
+ /*
+ * Init registered unsafe devs.
+ */
+ dr_unsafe_devs.devnames = NULL;
+ rv2 = ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ "unsupported-io-drivers", &dr_unsafe_devs.devnames,
+ &dr_unsafe_devs.ndevs);
+
+ if (rv2 != DDI_PROP_SUCCESS)
+ dr_unsafe_devs.ndevs = 0;
+
+ rw_exit(&dr_grwlock);
+ return (rv);
+
+ default:
+ return (DDI_FAILURE);
+ }
+
+ /*NOTREACHED*/
+}
+
+static int
+dr_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int instance;
+ dr_softstate_t *softsp;
+
+ switch (cmd) {
+ case DDI_DETACH:
+ if (!dr_modunload_okay)
+ return (DDI_FAILURE);
+
+ rw_enter(&dr_grwlock, RW_WRITER);
+
+ instance = ddi_get_instance(dip);
+ softsp = ddi_get_soft_state(dr_g.softsp, instance);
+
+ /* TODO: eliminate dr_boardlist */
+ ASSERT(softsp->boards == dr_boardlist);
+
+ /* remove all minor nodes */
+ ddi_remove_minor_node(dip, NULL);
+
+ if (softsp->dr_initialized) {
+ int bd;
+
+ for (bd = 0; bd < MAX_BOARDS; bd++)
+ dr_board_destroy(&softsp->boards[bd]);
+ }
+
+ FREESTRUCT(softsp->boards, dr_board_t, MAX_BOARDS);
+ mutex_destroy(&softsp->i_lock);
+ ddi_soft_state_free(dr_g.softsp, instance);
+
+ rw_exit(&dr_grwlock);
+ return (DDI_SUCCESS);
+
+ default:
+ return (DDI_FAILURE);
+ }
+ /*NOTREACHED*/
+}
+
+static int
+dr_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
+{
+ _NOTE(ARGUNUSED(dip))
+
+ dev_t dev = (dev_t)arg;
+ int instance, error;
+ dr_softstate_t *softsp;
+
+ *result = NULL;
+ error = DDI_SUCCESS;
+ instance = DR_MINOR2INST(getminor(dev));
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ softsp = ddi_get_soft_state(dr_g.softsp, instance);
+ if (softsp == NULL)
+ return (DDI_FAILURE);
+ *result = (void *)softsp->dip;
+ break;
+
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)(uintptr_t)instance;
+ break;
+
+ default:
+ error = DDI_FAILURE;
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * DR operations.
+ */
+
+static int
+dr_copyin_iocmd(dr_handle_t *hp)
+{
+ static fn_t f = "dr_copyin_iocmd";
+ sbd_cmd_t *scp = &hp->h_sbdcmd;
+
+ if (hp->h_iap == NULL)
+ return (EINVAL);
+
+ bzero((caddr_t)scp, sizeof (sbd_cmd_t));
+
+#ifdef _MULTI_DATAMODEL
+ if (ddi_model_convert_from(hp->h_mode & FMODELS) == DDI_MODEL_ILP32) {
+ sbd_cmd32_t scmd32;
+
+ bzero((caddr_t)&scmd32, sizeof (sbd_cmd32_t));
+
+ if (ddi_copyin((void *)hp->h_iap, (void *)&scmd32,
+ sizeof (sbd_cmd32_t), hp->h_mode)) {
+ cmn_err(CE_WARN,
+ "%s: (32bit) failed to copyin "
+ "sbdcmd-struct", f);
+ return (EFAULT);
+ }
+ scp->cmd_cm.c_id.c_type = scmd32.cmd_cm.c_id.c_type;
+ scp->cmd_cm.c_id.c_unit = scmd32.cmd_cm.c_id.c_unit;
+ bcopy(&scmd32.cmd_cm.c_id.c_name[0],
+ &scp->cmd_cm.c_id.c_name[0], OBP_MAXPROPNAME);
+ scp->cmd_cm.c_flags = scmd32.cmd_cm.c_flags;
+ scp->cmd_cm.c_len = scmd32.cmd_cm.c_len;
+ scp->cmd_cm.c_opts = (caddr_t)(uintptr_t)scmd32.cmd_cm.c_opts;
+
+ switch (hp->h_cmd) {
+ case SBD_CMD_STATUS:
+ scp->cmd_stat.s_nbytes = scmd32.cmd_stat.s_nbytes;
+ scp->cmd_stat.s_statp =
+ (caddr_t)(uintptr_t)scmd32.cmd_stat.s_statp;
+ break;
+ default:
+ break;
+
+ }
+ } else
+#endif /* _MULTI_DATAMODEL */
+ if (ddi_copyin((void *)hp->h_iap, (void *)scp,
+ sizeof (sbd_cmd_t), hp->h_mode) != 0) {
+ cmn_err(CE_WARN,
+ "%s: failed to copyin sbdcmd-struct", f);
+ return (EFAULT);
+ }
+
+ if ((hp->h_opts.size = scp->cmd_cm.c_len) != 0) {
+ hp->h_opts.copts = GETSTRUCT(char, scp->cmd_cm.c_len + 1);
+ ++hp->h_opts.size;
+ if (ddi_copyin((void *)scp->cmd_cm.c_opts,
+ (void *)hp->h_opts.copts,
+ scp->cmd_cm.c_len, hp->h_mode) != 0) {
+ cmn_err(CE_WARN, "%s: failed to copyin options", f);
+ return (EFAULT);
+ }
+ }
+
+ return (0);
+}
+
+static int
+dr_copyout_iocmd(dr_handle_t *hp)
+{
+ static fn_t f = "dr_copyout_iocmd";
+ sbd_cmd_t *scp = &hp->h_sbdcmd;
+
+ if (hp->h_iap == NULL)
+ return (EINVAL);
+
+#ifdef _MULTI_DATAMODEL
+ if (ddi_model_convert_from(hp->h_mode & FMODELS) == DDI_MODEL_ILP32) {
+ sbd_cmd32_t scmd32;
+
+ scmd32.cmd_cm.c_id.c_type = scp->cmd_cm.c_id.c_type;
+ scmd32.cmd_cm.c_id.c_unit = scp->cmd_cm.c_id.c_unit;
+ bcopy(&scp->cmd_cm.c_id.c_name[0],
+ &scmd32.cmd_cm.c_id.c_name[0], OBP_MAXPROPNAME);
+
+ scmd32.cmd_cm.c_flags = scp->cmd_cm.c_flags;
+ scmd32.cmd_cm.c_len = scp->cmd_cm.c_len;
+ scmd32.cmd_cm.c_opts = (caddr32_t)(uintptr_t)scp->cmd_cm.c_opts;
+
+ switch (hp->h_cmd) {
+ case SBD_CMD_GETNCM:
+ scmd32.cmd_getncm.g_ncm = scp->cmd_getncm.g_ncm;
+ break;
+ default:
+ break;
+ }
+
+ if (ddi_copyout((void *)&scmd32, (void *)hp->h_iap,
+ sizeof (sbd_cmd32_t), hp->h_mode)) {
+ cmn_err(CE_WARN,
+ "%s: (32bit) failed to copyout "
+ "sbdcmd-struct", f);
+ return (EFAULT);
+ }
+ } else
+#endif /* _MULTI_DATAMODEL */
+ if (ddi_copyout((void *)scp, (void *)hp->h_iap,
+ sizeof (sbd_cmd_t), hp->h_mode) != 0) {
+ cmn_err(CE_WARN,
+ "%s: failed to copyout sbdcmd-struct", f);
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+dr_copyout_errs(dr_handle_t *hp)
+{
+ static fn_t f = "dr_copyout_errs";
+
+ if (hp->h_err == NULL)
+ return (0);
+
+ if (hp->h_err->e_code) {
+ PR_ALL("%s: error %d %s",
+ f, hp->h_err->e_code, hp->h_err->e_rsc);
+ }
+
+#ifdef _MULTI_DATAMODEL
+ if (ddi_model_convert_from(hp->h_mode & FMODELS) == DDI_MODEL_ILP32) {
+ sbd_error32_t *serr32p;
+
+ serr32p = GETSTRUCT(sbd_error32_t, 1);
+
+ serr32p->e_code = hp->h_err->e_code;
+ bcopy(&hp->h_err->e_rsc[0], &serr32p->e_rsc[0],
+ MAXPATHLEN);
+ if (ddi_copyout((void *)serr32p,
+ (void *)&((sbd_ioctl_arg32_t *)hp->h_iap)->i_err,
+ sizeof (sbd_error32_t), hp->h_mode)) {
+ cmn_err(CE_WARN,
+ "%s: (32bit) failed to copyout", f);
+ return (EFAULT);
+ }
+ FREESTRUCT(serr32p, sbd_error32_t, 1);
+ } else
+#endif /* _MULTI_DATAMODEL */
+ if (ddi_copyout((void *)hp->h_err,
+ (void *)&hp->h_iap->i_err,
+ sizeof (sbd_error_t), hp->h_mode)) {
+ cmn_err(CE_WARN,
+ "%s: failed to copyout", f);
+ return (EFAULT);
+ }
+
+ sbd_err_clear(&hp->h_err);
+
+ return (0);
+
+}
+
+/*
+ * pre-op entry point must sbd_err_set_c(), if needed.
+ * Return value of non-zero indicates failure.
+ */
+static int
+dr_pre_op(dr_handle_t *hp)
+{
+ int rv = 0, t;
+ int cmd, serr = 0;
+ dr_devset_t devset;
+ dr_board_t *bp = hp->h_bd;
+ dr_handle_t *shp = hp;
+ static fn_t f = "dr_pre_op";
+
+ cmd = hp->h_cmd;
+ devset = shp->h_devset;
+
+ PR_ALL("%s (cmd = %s)...\n", f, SBD_CMD_STR(cmd));
+
+ devset = DEVSET_AND(devset, DR_DEVS_PRESENT(bp));
+ hp->h_err = drmach_pre_op(cmd, bp->b_id, &hp->h_opts, &devset);
+ if (hp->h_err != NULL) {
+ PR_ALL("drmach_pre_op failed for cmd %s(%d)\n",
+ SBD_CMD_STR(cmd), cmd);
+ return (-1);
+ }
+
+ /*
+ * Check for valid state transitions.
+ */
+ if ((t = CMD2INDEX(cmd)) != -1) {
+ struct dr_state_trans *transp;
+ int state_err;
+
+ transp = &dr_state_transition[t];
+ ASSERT(transp->x_cmd == cmd);
+
+ state_err = dr_check_transition(bp, &devset, transp, cmd);
+
+ if (state_err < 0) {
+ /*
+ * Invalidate device.
+ */
+ dr_op_err(CE_IGNORE, hp, ESBD_INVAL, NULL);
+ serr = -1;
+ PR_ALL("%s: invalid devset (0x%x)\n",
+ f, (uint_t)devset);
+ } else if (state_err != 0) {
+ /*
+ * State transition is not a valid one.
+ */
+ dr_op_err(CE_IGNORE, hp,
+ transp->x_op[state_err].x_err, NULL);
+
+ serr = transp->x_op[state_err].x_rv;
+
+ PR_ALL("%s: invalid state %s(%d) for cmd %s(%d)\n",
+ f, state_str[state_err], state_err,
+ SBD_CMD_STR(cmd), cmd);
+ } else {
+ shp->h_devset = devset;
+ }
+ }
+
+ if (serr) {
+ rv = -1;
+ }
+
+ return (rv);
+}
+
+static int
+dr_post_op(dr_handle_t *hp, int rv)
+{
+ int cmd;
+ sbd_error_t *err;
+ dr_board_t *bp = hp->h_bd;
+ static fn_t f = "dr_post_op";
+
+ cmd = hp->h_cmd;
+
+ PR_ALL("%s (cmd = %s)...\n", f, SBD_CMD_STR(cmd));
+
+ err = drmach_post_op(cmd, bp->b_id, &hp->h_opts, rv);
+ if (err != NULL) {
+ PR_ALL("drmach_post_op failed for cmd %s(%d)\n",
+ SBD_CMD_STR(cmd), cmd);
+ if (rv == 0) {
+ ASSERT(hp->h_err == NULL);
+ hp->h_err = err;
+ rv = -1;
+ } else if (hp->h_err == NULL) {
+ hp->h_err = err;
+ } else {
+ sbd_err_clear(&err);
+ }
+ }
+
+ return (rv);
+}
+
+static int
+dr_exec_op(dr_handle_t *hp)
+{
+ int rv = 0;
+ static fn_t f = "dr_exec_op";
+
+ /* errors should have been caught by now */
+ ASSERT(hp->h_err == NULL);
+
+ switch (hp->h_cmd) {
+ case SBD_CMD_ASSIGN:
+ dr_assign_board(hp);
+ break;
+
+ case SBD_CMD_UNASSIGN:
+ dr_unassign_board(hp);
+ break;
+
+ case SBD_CMD_POWEROFF:
+ dr_poweroff_board(hp);
+ break;
+
+ case SBD_CMD_POWERON:
+ dr_poweron_board(hp);
+ break;
+
+ case SBD_CMD_TEST:
+ dr_test_board(hp);
+ break;
+
+ case SBD_CMD_CONNECT:
+ dr_connect(hp);
+ break;
+
+ case SBD_CMD_CONFIGURE:
+ dr_dev_configure(hp);
+ break;
+
+ case SBD_CMD_UNCONFIGURE:
+ dr_dev_release(hp);
+ if (hp->h_err == NULL)
+ rv = dr_dev_unconfigure(hp);
+ else
+ dr_dev_cancel(hp);
+ break;
+
+ case SBD_CMD_DISCONNECT:
+ rv = dr_disconnect(hp);
+ break;
+
+ case SBD_CMD_STATUS:
+ rv = dr_dev_status(hp);
+ break;
+
+ case SBD_CMD_GETNCM:
+ hp->h_sbdcmd.cmd_getncm.g_ncm = dr_get_ncm(hp);
+ rv = dr_copyout_iocmd(hp);
+ break;
+
+ case SBD_CMD_PASSTHRU:
+ rv = dr_pt_ioctl(hp);
+ break;
+
+ default:
+ cmn_err(CE_WARN,
+ "%s: unknown command (%d)",
+ f, hp->h_cmd);
+ break;
+ }
+
+ if (hp->h_err != NULL) {
+ rv = -1;
+ }
+
+ return (rv);
+}
+
+static void
+dr_assign_board(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+
+ hp->h_err = drmach_board_assign(bp->b_num, &bp->b_id);
+ if (hp->h_err == NULL) {
+ bp->b_assigned = 1;
+ }
+}
+
+static void
+dr_unassign_board(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+
+ /*
+ * Block out status during unassign.
+ * Not doing cv_wait_sig here as starfire SSP software
+ * ignores unassign failure and removes board from
+ * domain mask causing system panic.
+ * TODO: Change cv_wait to cv_wait_sig when SSP software
+ * handles unassign failure.
+ */
+ dr_lock_status(bp);
+
+ hp->h_err = drmach_board_unassign(bp->b_id);
+ if (hp->h_err == NULL) {
+ /*
+ * clear drmachid_t handle; not valid after board unassign
+ */
+ bp->b_id = 0;
+ bp->b_assigned = 0;
+ }
+
+ dr_unlock_status(bp);
+}
+
+static void
+dr_poweron_board(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+
+ hp->h_err = drmach_board_poweron(bp->b_id);
+}
+
+static void
+dr_poweroff_board(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+
+ hp->h_err = drmach_board_poweroff(bp->b_id);
+}
+
+static void
+dr_test_board(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+ hp->h_err = drmach_board_test(bp->b_id, &hp->h_opts,
+ dr_cmd_flags(hp) & SBD_FLAG_FORCE);
+}
+
+/*
+ * Create and populate the component nodes for a board. Assumes that the
+ * devlists for the board have been initialized.
+ */
+static void
+dr_make_comp_nodes(dr_board_t *bp)
+{
+ int i;
+
+ /*
+ * Make nodes for the individual components on the board.
+ * First we need to initialize memory unit data structures of board
+ * structure.
+ */
+ for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+ dr_mem_unit_t *mp;
+
+ mp = dr_get_mem_unit(bp, i);
+ dr_init_mem_unit(mp);
+ }
+
+ /*
+ * Initialize cpu unit data structures.
+ */
+ for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+ dr_cpu_unit_t *cp;
+
+ cp = dr_get_cpu_unit(bp, i);
+ dr_init_cpu_unit(cp);
+ }
+
+ /*
+ * Initialize io unit data structures.
+ */
+ for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+ dr_io_unit_t *ip;
+
+ ip = dr_get_io_unit(bp, i);
+ dr_init_io_unit(ip);
+ }
+
+ dr_board_transition(bp, DR_STATE_CONNECTED);
+
+ bp->b_rstate = SBD_STAT_CONNECTED;
+ bp->b_ostate = SBD_STAT_UNCONFIGURED;
+ bp->b_cond = SBD_COND_OK;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+
+}
+
+/*
+ * Only do work if called to operate on an entire board
+ * which doesn't already have components present.
+ */
+static void
+dr_connect(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+ static fn_t f = "dr_connect";
+
+ PR_ALL("%s...\n", f);
+
+ if (DR_DEVS_PRESENT(bp)) {
+ /*
+ * Board already has devices present.
+ */
+ PR_ALL("%s: devices already present (" DEVSET_FMT_STR ")\n",
+ f, DEVSET_FMT_ARG(DR_DEVS_PRESENT(bp)));
+ return;
+ }
+
+ hp->h_err = drmach_board_connect(bp->b_id, &hp->h_opts);
+ if (hp->h_err)
+ return;
+
+ hp->h_err = dr_init_devlists(bp);
+ if (hp->h_err)
+ return;
+ else if (bp->b_ndev == 0) {
+ dr_op_err(CE_WARN, hp, ESBD_EMPTY_BD, bp->b_path);
+ return;
+ } else {
+ dr_make_comp_nodes(bp);
+ return;
+ }
+ /*NOTREACHED*/
+}
+
+static int
+dr_disconnect(dr_handle_t *hp)
+{
+ int i;
+ dr_devset_t devset;
+ dr_board_t *bp = hp->h_bd;
+ static fn_t f = "dr_disconnect";
+
+ PR_ALL("%s...\n", f);
+
+ /*
+ * Only devices which are present, but
+ * unattached can be disconnected.
+ */
+ devset = hp->h_devset & DR_DEVS_PRESENT(bp) &
+ DR_DEVS_UNATTACHED(bp);
+
+ if ((devset == 0) && DR_DEVS_PRESENT(bp)) {
+ dr_op_err(CE_IGNORE, hp, ESBD_EMPTY_BD, bp->b_path);
+ return (0);
+ }
+
+ /*
+ * Block out status during disconnect.
+ */
+ mutex_enter(&bp->b_slock);
+ while (bp->b_sflags & DR_BSLOCK) {
+ if (cv_wait_sig(&bp->b_scv, &bp->b_slock) == 0) {
+ mutex_exit(&bp->b_slock);
+ return (EINTR);
+ }
+ }
+ bp->b_sflags |= DR_BSLOCK;
+ mutex_exit(&bp->b_slock);
+
+ hp->h_err = drmach_board_disconnect(bp->b_id, &hp->h_opts);
+ if (hp->h_err && hp->h_err->e_code == EX86_WALK_DEPENDENCY) {
+ /*
+ * Other boards have dependency on this board. No device nodes
+ * have been destroyed so keep current board status.
+ */
+ goto disconnect_done;
+ }
+
+ DR_DEVS_DISCONNECT(bp, devset);
+
+ ASSERT((DR_DEVS_ATTACHED(bp) & devset) == 0);
+
+ /*
+ * Update per-device state transitions.
+ */
+ for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+ dr_cpu_unit_t *cp;
+
+ if (!DEVSET_IN_SET(devset, SBD_COMP_CPU, i))
+ continue;
+
+ cp = dr_get_cpu_unit(bp, i);
+ if (dr_disconnect_cpu(cp) == 0)
+ dr_device_transition(&cp->sbc_cm, DR_STATE_EMPTY);
+ else if (cp->sbc_cm.sbdev_error != NULL)
+ DRERR_SET_C(&hp->h_err, &cp->sbc_cm.sbdev_error);
+
+ ASSERT(cp->sbc_cm.sbdev_error == NULL);
+ }
+
+ for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+ dr_mem_unit_t *mp;
+
+ if (!DEVSET_IN_SET(devset, SBD_COMP_MEM, i))
+ continue;
+
+ mp = dr_get_mem_unit(bp, i);
+ if (dr_disconnect_mem(mp) == 0)
+ dr_device_transition(&mp->sbm_cm, DR_STATE_EMPTY);
+ else if (mp->sbm_cm.sbdev_error != NULL)
+ DRERR_SET_C(&hp->h_err, &mp->sbm_cm.sbdev_error);
+
+ ASSERT(mp->sbm_cm.sbdev_error == NULL);
+ }
+
+ for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+ dr_io_unit_t *ip;
+
+ if (!DEVSET_IN_SET(devset, SBD_COMP_IO, i))
+ continue;
+
+ ip = dr_get_io_unit(bp, i);
+ if (dr_disconnect_io(ip) == 0)
+ dr_device_transition(&ip->sbi_cm, DR_STATE_EMPTY);
+ else if (ip->sbi_cm.sbdev_error != NULL)
+ DRERR_SET_C(&hp->h_err, &ip->sbi_cm.sbdev_error);
+
+ ASSERT(ip->sbi_cm.sbdev_error == NULL);
+ }
+
+ if (hp->h_err) {
+ /*
+ * For certain errors, drmach_board_disconnect will mark
+ * the board as unusable; in these cases the devtree must
+ * be purged so that status calls will succeed.
+ * XXX
+ * This implementation checks for discrete error codes -
+ * someday, the i/f to drmach_board_disconnect should be
+ * changed to avoid the e_code testing.
+ */
+ if (hp->h_err->e_code == EX86_DEPROBE) {
+ bp->b_ostate = SBD_STAT_UNCONFIGURED;
+ bp->b_busy = 0;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+
+ if (drmach_board_deprobe(bp->b_id))
+ goto disconnect_done;
+ else
+ bp->b_ndev = 0;
+ }
+ }
+
+ /*
+ * Once all the components on a board have been disconnect
+ * the board's state can transition to disconnected and
+ * we can allow the deprobe to take place.
+ */
+ if (hp->h_err == NULL && DR_DEVS_PRESENT(bp) == 0) {
+ dr_board_transition(bp, DR_STATE_OCCUPIED);
+ bp->b_rstate = SBD_STAT_DISCONNECTED;
+ bp->b_ostate = SBD_STAT_UNCONFIGURED;
+ bp->b_busy = 0;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+
+ hp->h_err = drmach_board_deprobe(bp->b_id);
+
+ if (hp->h_err == NULL) {
+ bp->b_ndev = 0;
+ dr_board_transition(bp, DR_STATE_EMPTY);
+ bp->b_rstate = SBD_STAT_EMPTY;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+ }
+ }
+
+disconnect_done:
+ dr_unlock_status(bp);
+
+ return (0);
+}
+
+/*
+ * Check if a particular device is a valid target of the current
+ * operation. Return 1 if it is a valid target, and 0 otherwise.
+ */
+static int
+dr_dev_is_target(dr_dev_unit_t *dp, int present_only, uint_t uset)
+{
+ dr_common_unit_t *cp;
+ int is_present;
+ int is_attached;
+
+ cp = &dp->du_common;
+
+ /* check if the user requested this device */
+ if ((uset & (1 << cp->sbdev_unum)) == 0) {
+ return (0);
+ }
+
+ is_present = DR_DEV_IS_PRESENT(cp) ? 1 : 0;
+ is_attached = DR_DEV_IS_ATTACHED(cp) ? 1 : 0;
+
+ /*
+ * If the present_only flag is set, a valid target
+ * must be present but not attached. Otherwise, it
+ * must be both present and attached.
+ */
+ if (is_present && (present_only ^ is_attached)) {
+ /* sanity check */
+ ASSERT(cp->sbdev_id != (drmachid_t)0);
+
+ return (1);
+ }
+
+ return (0);
+}
+
+static void
+dr_dev_make_list(dr_handle_t *hp, sbd_comp_type_t type, int present_only,
+ dr_common_unit_t ***devlist, int *devnum)
+{
+ dr_board_t *bp = hp->h_bd;
+ int unum;
+ int nunits;
+ uint_t uset;
+ int len;
+ dr_common_unit_t **list, **wp;
+
+ switch (type) {
+ case SBD_COMP_CPU:
+ nunits = MAX_CPU_UNITS_PER_BOARD;
+ break;
+ case SBD_COMP_MEM:
+ nunits = MAX_MEM_UNITS_PER_BOARD;
+ break;
+ case SBD_COMP_IO:
+ nunits = MAX_IO_UNITS_PER_BOARD;
+ break;
+ default:
+ /* catch this in debug kernels */
+ ASSERT(0);
+ break;
+ }
+
+ /* allocate list storage. */
+ len = sizeof (dr_common_unit_t *) * (nunits + 1);
+ list = kmem_zalloc(len, KM_SLEEP);
+
+ /* record length of storage in first element */
+ *list++ = (dr_common_unit_t *)(uintptr_t)len;
+
+ /* get bit array signifying which units are to be involved */
+ uset = DEVSET_GET_UNITSET(hp->h_devset, type);
+
+ /*
+ * Adjust the loop count for CPU devices since all cores
+ * in a CMP will be examined in a single iteration.
+ */
+ if (type == SBD_COMP_CPU) {
+ nunits = MAX_CMP_UNITS_PER_BOARD;
+ }
+
+ /* populate list */
+ for (wp = list, unum = 0; unum < nunits; unum++) {
+ dr_dev_unit_t *dp;
+ int core;
+ int cunum;
+
+ dp = DR_GET_BOARD_DEVUNIT(bp, type, unum);
+ if (dr_dev_is_target(dp, present_only, uset)) {
+ *wp++ = &dp->du_common;
+ }
+
+ /* further processing is only required for CPUs */
+ if (type != SBD_COMP_CPU) {
+ continue;
+ }
+
+ /*
+ * Add any additional cores from the current CPU
+ * device. This is to ensure that all the cores
+ * are grouped together in the device list, and
+ * consequently sequenced together during the actual
+ * operation.
+ */
+ for (core = 1; core < MAX_CORES_PER_CMP; core++) {
+ cunum = DR_CMP_CORE_UNUM(unum, core);
+ dp = DR_GET_BOARD_DEVUNIT(bp, type, cunum);
+
+ if (dr_dev_is_target(dp, present_only, uset)) {
+ *wp++ = &dp->du_common;
+ }
+ }
+ }
+
+ /* calculate number of units in list, return result and list pointer */
+ *devnum = wp - list;
+ *devlist = list;
+}
+
+static void
+dr_dev_clean_up(dr_handle_t *hp, dr_common_unit_t **list, int devnum)
+{
+ int len;
+ int n = 0;
+ dr_common_unit_t *cp, **rp = list;
+
+ /*
+ * move first encountered unit error to handle if handle
+ * does not yet have a recorded error.
+ */
+ if (hp->h_err == NULL) {
+ while (n++ < devnum) {
+ cp = *rp++;
+ if (cp->sbdev_error != NULL) {
+ hp->h_err = cp->sbdev_error;
+ cp->sbdev_error = NULL;
+ break;
+ }
+ }
+ }
+
+ /* free remaining unit errors */
+ while (n++ < devnum) {
+ cp = *rp++;
+ if (cp->sbdev_error != NULL) {
+ sbd_err_clear(&cp->sbdev_error);
+ cp->sbdev_error = NULL;
+ }
+ }
+
+ /* free list */
+ list -= 1;
+ len = (int)(uintptr_t)list[0];
+ kmem_free(list, len);
+}
+
+static int
+dr_dev_walk(dr_handle_t *hp, sbd_comp_type_t type, int present_only,
+ int (*pre_op)(dr_handle_t *, dr_common_unit_t **, int),
+ void (*op)(dr_handle_t *, dr_common_unit_t *),
+ int (*post_op)(dr_handle_t *, dr_common_unit_t **, int),
+ void (*board_op)(dr_handle_t *, dr_common_unit_t **, int))
+{
+ int devnum, rv;
+ dr_common_unit_t **devlist;
+
+ dr_dev_make_list(hp, type, present_only, &devlist, &devnum);
+
+ rv = 0;
+ if (devnum > 0) {
+ rv = (*pre_op)(hp, devlist, devnum);
+ if (rv == 0) {
+ int n;
+
+ for (n = 0; n < devnum; n++)
+ (*op)(hp, devlist[n]);
+
+ rv = (*post_op)(hp, devlist, devnum);
+
+ (*board_op)(hp, devlist, devnum);
+ }
+ }
+
+ dr_dev_clean_up(hp, devlist, devnum);
+ return (rv);
+}
+
+/*ARGSUSED*/
+static int
+dr_dev_noop(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ return (0);
+}
+
+static void
+dr_attach_update_state(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ dr_board_t *bp = hp->h_bd;
+ int i;
+ dr_devset_t devs_unattached, devs_present;
+ static fn_t f = "dr_attach_update_state";
+
+ for (i = 0; i < devnum; i++) {
+ dr_common_unit_t *cp = devlist[i];
+
+ if (dr_check_unit_attached(cp) == -1) {
+ PR_ALL("%s: ERROR %s not attached\n",
+ f, cp->sbdev_path);
+ continue;
+ }
+
+ DR_DEV_SET_ATTACHED(cp);
+
+ dr_device_transition(cp, DR_STATE_CONFIGURED);
+ cp->sbdev_cond = SBD_COND_OK;
+ }
+
+ devs_present = DR_DEVS_PRESENT(bp);
+ devs_unattached = DR_DEVS_UNATTACHED(bp);
+
+ switch (bp->b_state) {
+ case DR_STATE_CONNECTED:
+ case DR_STATE_UNCONFIGURED:
+ ASSERT(devs_present);
+
+ if (devs_unattached == 0) {
+ /*
+ * All devices finally attached.
+ */
+ dr_board_transition(bp, DR_STATE_CONFIGURED);
+ hp->h_bd->b_ostate = SBD_STAT_CONFIGURED;
+ hp->h_bd->b_rstate = SBD_STAT_CONNECTED;
+ hp->h_bd->b_cond = SBD_COND_OK;
+ hp->h_bd->b_busy = 0;
+ (void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+ } else if (devs_present != devs_unattached) {
+ /*
+ * Only some devices are fully attached.
+ */
+ dr_board_transition(bp, DR_STATE_PARTIAL);
+ hp->h_bd->b_rstate = SBD_STAT_CONNECTED;
+ hp->h_bd->b_ostate = SBD_STAT_CONFIGURED;
+ (void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+ }
+ break;
+
+ case DR_STATE_PARTIAL:
+ ASSERT(devs_present);
+ /*
+ * All devices finally attached.
+ */
+ if (devs_unattached == 0) {
+ dr_board_transition(bp, DR_STATE_CONFIGURED);
+ hp->h_bd->b_rstate = SBD_STAT_CONNECTED;
+ hp->h_bd->b_ostate = SBD_STAT_CONFIGURED;
+ hp->h_bd->b_cond = SBD_COND_OK;
+ hp->h_bd->b_busy = 0;
+ (void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+dr_dev_configure(dr_handle_t *hp)
+{
+ int rv;
+
+ rv = dr_dev_walk(hp, SBD_COMP_CPU, 1,
+ dr_pre_attach_cpu,
+ dr_attach_cpu,
+ dr_post_attach_cpu,
+ dr_attach_update_state);
+
+ if (rv >= 0) {
+ rv = dr_dev_walk(hp, SBD_COMP_MEM, 1,
+ dr_pre_attach_mem,
+ dr_attach_mem,
+ dr_post_attach_mem,
+ dr_attach_update_state);
+ }
+
+ if (rv >= 0) {
+ (void) dr_dev_walk(hp, SBD_COMP_IO, 1,
+ dr_pre_attach_io,
+ dr_attach_io,
+ dr_post_attach_io,
+ dr_attach_update_state);
+ }
+}
+
+static void
+dr_release_update_state(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ _NOTE(ARGUNUSED(devlist))
+ _NOTE(ARGUNUSED(devnum))
+
+ dr_board_t *bp = hp->h_bd;
+
+ /*
+ * If the entire board was released and all components
+ * unreferenced then transfer it to the UNREFERENCED state.
+ */
+ if ((bp->b_state != DR_STATE_RELEASE) &&
+ (DR_DEVS_RELEASED(bp) == DR_DEVS_ATTACHED(bp))) {
+ dr_board_transition(bp, DR_STATE_RELEASE);
+ hp->h_bd->b_busy = 1;
+ }
+}
+
+/* called by dr_release_done [below] and dr_release_mem_done [dr_mem.c] */
+int
+dr_release_dev_done(dr_common_unit_t *cp)
+{
+ if (cp->sbdev_state == DR_STATE_RELEASE) {
+ ASSERT(DR_DEV_IS_RELEASED(cp));
+
+ DR_DEV_SET_UNREFERENCED(cp);
+
+ dr_device_transition(cp, DR_STATE_UNREFERENCED);
+
+ return (0);
+ } else {
+ return (-1);
+ }
+}
+
+static void
+dr_release_done(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ _NOTE(ARGUNUSED(hp))
+
+ dr_board_t *bp;
+ static fn_t f = "dr_release_done";
+
+ PR_ALL("%s...\n", f);
+
+ /* get board pointer & sanity check */
+ bp = cp->sbdev_bp;
+ ASSERT(bp == hp->h_bd);
+
+ /*
+ * Transfer the device which just completed its release
+ * to the UNREFERENCED state.
+ */
+ switch (cp->sbdev_type) {
+ case SBD_COMP_MEM:
+ dr_release_mem_done(cp);
+ break;
+
+ default:
+ DR_DEV_SET_RELEASED(cp);
+
+ dr_device_transition(cp, DR_STATE_RELEASE);
+
+ (void) dr_release_dev_done(cp);
+ break;
+ }
+
+ /*
+ * If we're not already in the RELEASE state for this
+ * board and we now have released all that were previously
+ * attached, then transfer the board to the RELEASE state.
+ */
+ if ((bp->b_state == DR_STATE_RELEASE) &&
+ (DR_DEVS_RELEASED(bp) == DR_DEVS_UNREFERENCED(bp))) {
+ dr_board_transition(bp, DR_STATE_UNREFERENCED);
+ bp->b_busy = 1;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+ }
+}
+
+static void
+dr_dev_release_mem(dr_handle_t *hp, dr_common_unit_t *dv)
+{
+ dr_release_mem(dv);
+ dr_release_done(hp, dv);
+}
+
+static void
+dr_dev_release(dr_handle_t *hp)
+{
+ int rv;
+
+ hp->h_bd->b_busy = 1;
+
+ rv = dr_dev_walk(hp, SBD_COMP_CPU, 0,
+ dr_pre_release_cpu,
+ dr_release_done,
+ dr_dev_noop,
+ dr_release_update_state);
+
+ if (rv >= 0) {
+ rv = dr_dev_walk(hp, SBD_COMP_MEM, 0,
+ dr_pre_release_mem,
+ dr_dev_release_mem,
+ dr_dev_noop,
+ dr_release_update_state);
+ }
+
+ if (rv >= 0) {
+ rv = dr_dev_walk(hp, SBD_COMP_IO, 0,
+ dr_pre_release_io,
+ dr_release_done,
+ dr_dev_noop,
+ dr_release_update_state);
+
+ }
+
+ if (rv < 0)
+ hp->h_bd->b_busy = 0;
+ /* else, b_busy will be cleared in dr_detach_update_state() */
+}
+
+static void
+dr_detach_update_state(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ dr_board_t *bp = hp->h_bd;
+ int i;
+ dr_state_t bstate;
+ static fn_t f = "dr_detach_update_state";
+
+ for (i = 0; i < devnum; i++) {
+ dr_common_unit_t *cp = devlist[i];
+
+ if (dr_check_unit_attached(cp) >= 0) {
+ /*
+ * Device is still attached probably due
+ * to an error. Need to keep track of it.
+ */
+ PR_ALL("%s: ERROR %s not detached\n",
+ f, cp->sbdev_path);
+
+ continue;
+ }
+
+ DR_DEV_CLR_ATTACHED(cp);
+ DR_DEV_CLR_RELEASED(cp);
+ DR_DEV_CLR_UNREFERENCED(cp);
+ dr_device_transition(cp, DR_STATE_UNCONFIGURED);
+ }
+
+ bstate = bp->b_state;
+ if (bstate != DR_STATE_UNCONFIGURED) {
+ if (DR_DEVS_PRESENT(bp) == DR_DEVS_UNATTACHED(bp)) {
+ /*
+ * All devices are finally detached.
+ */
+ dr_board_transition(bp, DR_STATE_UNCONFIGURED);
+ hp->h_bd->b_ostate = SBD_STAT_UNCONFIGURED;
+ (void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+ } else if ((bp->b_state != DR_STATE_PARTIAL) &&
+ (DR_DEVS_ATTACHED(bp) !=
+ DR_DEVS_PRESENT(bp))) {
+ /*
+ * Some devices remain attached.
+ */
+ dr_board_transition(bp, DR_STATE_PARTIAL);
+ (void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+ }
+
+ if ((hp->h_devset & DR_DEVS_UNATTACHED(bp)) == hp->h_devset)
+ hp->h_bd->b_busy = 0;
+ }
+}
+
+static int
+dr_dev_unconfigure(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+
+ /*
+ * Block out status during IO unconfig.
+ */
+ mutex_enter(&bp->b_slock);
+ while (bp->b_sflags & DR_BSLOCK) {
+ if (cv_wait_sig(&bp->b_scv, &bp->b_slock) == 0) {
+ mutex_exit(&bp->b_slock);
+ return (EINTR);
+ }
+ }
+ bp->b_sflags |= DR_BSLOCK;
+ mutex_exit(&bp->b_slock);
+
+ (void) dr_dev_walk(hp, SBD_COMP_IO, 0,
+ dr_pre_detach_io,
+ dr_detach_io,
+ dr_post_detach_io,
+ dr_detach_update_state);
+
+ dr_unlock_status(bp);
+
+ (void) dr_dev_walk(hp, SBD_COMP_CPU, 0,
+ dr_pre_detach_cpu,
+ dr_detach_cpu,
+ dr_post_detach_cpu,
+ dr_detach_update_state);
+
+ (void) dr_dev_walk(hp, SBD_COMP_MEM, 0,
+ dr_pre_detach_mem,
+ dr_detach_mem,
+ dr_post_detach_mem,
+ dr_detach_update_state);
+
+ return (0);
+}
+
+static void
+dr_dev_cancel(dr_handle_t *hp)
+{
+ int i;
+ dr_devset_t devset;
+ dr_board_t *bp = hp->h_bd;
+ static fn_t f = "dr_dev_cancel";
+
+ PR_ALL("%s...\n", f);
+
+ /*
+ * Only devices which have been "released" are
+ * subject to cancellation.
+ */
+ devset = hp->h_devset & DR_DEVS_RELEASED(bp);
+
+ /*
+ * Nothing to do for CPUs or IO other than change back
+ * their state.
+ */
+ for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+ dr_cpu_unit_t *cp;
+ dr_state_t nstate;
+
+ if (!DEVSET_IN_SET(devset, SBD_COMP_CPU, i))
+ continue;
+
+ cp = dr_get_cpu_unit(bp, i);
+ if (dr_cancel_cpu(cp) == 0)
+ nstate = DR_STATE_CONFIGURED;
+ else
+ nstate = DR_STATE_FATAL;
+
+ dr_device_transition(&cp->sbc_cm, nstate);
+ }
+
+ for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+ dr_io_unit_t *ip;
+
+ if (!DEVSET_IN_SET(devset, SBD_COMP_IO, i))
+ continue;
+ ip = dr_get_io_unit(bp, i);
+ dr_device_transition(&ip->sbi_cm, DR_STATE_CONFIGURED);
+ }
+ for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+ dr_mem_unit_t *mp;
+ dr_state_t nstate;
+
+ if (!DEVSET_IN_SET(devset, SBD_COMP_MEM, i))
+ continue;
+
+ mp = dr_get_mem_unit(bp, i);
+ if (dr_cancel_mem(mp) == 0)
+ nstate = DR_STATE_CONFIGURED;
+ else
+ nstate = DR_STATE_FATAL;
+
+ dr_device_transition(&mp->sbm_cm, nstate);
+ }
+
+ PR_ALL("%s: unreleasing devset (0x%x)\n", f, (uint_t)devset);
+
+ DR_DEVS_CANCEL(bp, devset);
+
+ if (DR_DEVS_RELEASED(bp) == 0) {
+ dr_state_t new_state;
+ /*
+ * If the board no longer has any released devices
+ * than transfer it back to the CONFIG/PARTIAL state.
+ */
+ if (DR_DEVS_ATTACHED(bp) == DR_DEVS_PRESENT(bp))
+ new_state = DR_STATE_CONFIGURED;
+ else
+ new_state = DR_STATE_PARTIAL;
+ if (bp->b_state != new_state) {
+ dr_board_transition(bp, new_state);
+ }
+ hp->h_bd->b_ostate = SBD_STAT_CONFIGURED;
+ hp->h_bd->b_busy = 0;
+ (void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+ }
+}
+
+static int
+dr_dev_status(dr_handle_t *hp)
+{
+ int nstat, mode, ncm, sz, pbsz, pnstat;
+ dr_handle_t *shp;
+ dr_devset_t devset = 0;
+ sbd_stat_t *dstatp = NULL;
+ sbd_dev_stat_t *devstatp;
+ dr_board_t *bp;
+ drmach_status_t pstat;
+ int rv = 0;
+
+#ifdef _MULTI_DATAMODEL
+ int sz32 = 0;
+#endif /* _MULTI_DATAMODEL */
+
+ static fn_t f = "dr_dev_status";
+
+ PR_ALL("%s...\n", f);
+
+ mode = hp->h_mode;
+ shp = hp;
+ devset = shp->h_devset;
+ bp = hp->h_bd;
+
+ /*
+ * Block out disconnect, unassign, IO unconfigure and
+ * devinfo branch creation during status.
+ */
+ mutex_enter(&bp->b_slock);
+ while (bp->b_sflags & DR_BSLOCK) {
+ if (cv_wait_sig(&bp->b_scv, &bp->b_slock) == 0) {
+ mutex_exit(&bp->b_slock);
+ return (EINTR);
+ }
+ }
+ bp->b_sflags |= DR_BSLOCK;
+ mutex_exit(&bp->b_slock);
+
+ ncm = 1;
+ if (hp->h_sbdcmd.cmd_cm.c_id.c_type == SBD_COMP_NONE) {
+ if (dr_cmd_flags(hp) & SBD_FLAG_ALLCMP) {
+ /*
+ * Calculate the maximum number of components possible
+ * for a board. This number will be used to size the
+ * status scratch buffer used by board and component
+ * status functions.
+ * This buffer may differ in size from what is provided
+ * by the plugin, since the known component set on the
+ * board may change between the plugin's GETNCM call, and
+ * the status call. Sizing will be adjusted to the plugin's
+ * receptacle buffer at copyout time.
+ */
+ ncm = MAX_CPU_UNITS_PER_BOARD +
+ MAX_MEM_UNITS_PER_BOARD +
+ MAX_IO_UNITS_PER_BOARD;
+
+ } else {
+ /*
+ * In the case of c_type == SBD_COMP_NONE, and
+ * SBD_FLAG_ALLCMP not specified, only the board
+ * info is to be returned, no components.
+ */
+ ncm = 0;
+ devset = 0;
+ }
+ }
+
+ sz = sizeof (sbd_stat_t);
+ if (ncm > 1)
+ sz += sizeof (sbd_dev_stat_t) * (ncm - 1);
+
+
+ pbsz = (int)hp->h_sbdcmd.cmd_stat.s_nbytes;
+ pnstat = (pbsz - sizeof (sbd_stat_t)) / sizeof (sbd_dev_stat_t);
+
+ /*
+ * s_nbytes describes the size of the preallocated user
+ * buffer into which the application is execting to
+ * receive the sbd_stat_t and sbd_dev_stat_t structures.
+ */
+
+#ifdef _MULTI_DATAMODEL
+
+ /*
+ * More buffer space is required for the 64bit to 32bit
+ * conversion of data structures.
+ */
+ if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
+ sz32 = sizeof (sbd_stat32_t);
+ if (ncm > 1)
+ sz32 += sizeof (sbd_dev_stat32_t) * (ncm - 1);
+ pnstat = (pbsz - sizeof (sbd_stat32_t))/
+ sizeof (sbd_dev_stat32_t);
+ }
+
+ sz += sz32;
+#endif
+ /*
+ * Since one sbd_dev_stat_t is included in the sbd_stat_t,
+ * increment the plugin's nstat count.
+ */
+ ++pnstat;
+
+ if (bp->b_id == 0) {
+ bzero(&pstat, sizeof (pstat));
+ } else {
+ sbd_error_t *err;
+
+ err = drmach_status(bp->b_id, &pstat);
+ if (err) {
+ DRERR_SET_C(&hp->h_err, &err);
+ rv = EIO;
+ goto status_done;
+ }
+ }
+
+ dstatp = (sbd_stat_t *)(void *)GETSTRUCT(char, sz);
+
+ devstatp = &dstatp->s_stat[0];
+
+ dstatp->s_board = bp->b_num;
+
+ /*
+ * Detect transitions between empty and disconnected.
+ */
+ if (!pstat.empty && (bp->b_rstate == SBD_STAT_EMPTY))
+ bp->b_rstate = SBD_STAT_DISCONNECTED;
+ else if (pstat.empty && (bp->b_rstate == SBD_STAT_DISCONNECTED))
+ bp->b_rstate = SBD_STAT_EMPTY;
+
+ dstatp->s_rstate = bp->b_rstate;
+ dstatp->s_ostate = bp->b_ostate;
+ dstatp->s_cond = bp->b_cond = pstat.cond;
+ dstatp->s_busy = bp->b_busy | pstat.busy;
+ dstatp->s_time = bp->b_time;
+ dstatp->s_power = pstat.powered;
+ dstatp->s_assigned = bp->b_assigned = pstat.assigned;
+ dstatp->s_nstat = nstat = 0;
+ bcopy(&pstat.type[0], &dstatp->s_type[0], SBD_TYPE_LEN);
+ bcopy(&pstat.info[0], &dstatp->s_info[0], SBD_MAX_INFO);
+
+ devset &= DR_DEVS_PRESENT(bp);
+ if (devset == 0) {
+ /*
+ * No device chosen.
+ */
+ PR_ALL("%s: no device present\n", f);
+ }
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_CPU, DEVSET_ANYUNIT))
+ if ((nstat = dr_cpu_status(hp, devset, devstatp)) > 0) {
+ dstatp->s_nstat += nstat;
+ devstatp += nstat;
+ }
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_MEM, DEVSET_ANYUNIT))
+ if ((nstat = dr_mem_status(hp, devset, devstatp)) > 0) {
+ dstatp->s_nstat += nstat;
+ devstatp += nstat;
+ }
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_IO, DEVSET_ANYUNIT))
+ if ((nstat = dr_io_status(hp, devset, devstatp)) > 0) {
+ dstatp->s_nstat += nstat;
+ devstatp += nstat;
+ }
+
+ /*
+ * Due to a possible change in number of components between
+ * the time of plugin's GETNCM call and now, there may be
+ * more or less components than the plugin's buffer can
+ * hold. Adjust s_nstat accordingly.
+ */
+
+ dstatp->s_nstat = dstatp->s_nstat > pnstat ? pnstat : dstatp->s_nstat;
+
+#ifdef _MULTI_DATAMODEL
+ if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
+ int i, j;
+ sbd_stat32_t *dstat32p;
+
+ dstat32p = (sbd_stat32_t *)devstatp;
+
+ /* Alignment Paranoia */
+ if ((ulong_t)dstat32p & 0x1) {
+ PR_ALL("%s: alignment: sz=0x%lx dstat32p=0x%p\n",
+ f, sizeof (sbd_stat32_t), (void *)dstat32p);
+ DR_OP_INTERNAL_ERROR(hp);
+ rv = EINVAL;
+ goto status_done;
+ }
+
+ /* paranoia: detect buffer overrun */
+ if ((caddr_t)&dstat32p->s_stat[dstatp->s_nstat] >
+ ((caddr_t)dstatp) + sz) {
+ DR_OP_INTERNAL_ERROR(hp);
+ rv = EINVAL;
+ goto status_done;
+ }
+
+ /* copy sbd_stat_t structure members */
+#define _SBD_STAT(t, m) dstat32p->m = (t)dstatp->m
+ _SBD_STAT(int32_t, s_board);
+ _SBD_STAT(int32_t, s_rstate);
+ _SBD_STAT(int32_t, s_ostate);
+ _SBD_STAT(int32_t, s_cond);
+ _SBD_STAT(int32_t, s_busy);
+ _SBD_STAT(time32_t, s_time);
+ _SBD_STAT(uint32_t, s_power);
+ _SBD_STAT(uint32_t, s_assigned);
+ _SBD_STAT(int32_t, s_nstat);
+ bcopy(&dstatp->s_type[0], &dstat32p->s_type[0],
+ SBD_TYPE_LEN);
+ bcopy(&dstatp->s_info[0], &dstat32p->s_info[0],
+ SBD_MAX_INFO);
+#undef _SBD_STAT
+
+ for (i = 0; i < dstatp->s_nstat; i++) {
+ sbd_dev_stat_t *dsp = &dstatp->s_stat[i];
+ sbd_dev_stat32_t *ds32p = &dstat32p->s_stat[i];
+#define _SBD_DEV_STAT(t, m) ds32p->m = (t)dsp->m
+
+ /* copy sbd_cm_stat_t structure members */
+ _SBD_DEV_STAT(int32_t, ds_type);
+ _SBD_DEV_STAT(int32_t, ds_unit);
+ _SBD_DEV_STAT(int32_t, ds_ostate);
+ _SBD_DEV_STAT(int32_t, ds_cond);
+ _SBD_DEV_STAT(int32_t, ds_busy);
+ _SBD_DEV_STAT(int32_t, ds_suspend);
+ _SBD_DEV_STAT(time32_t, ds_time);
+ bcopy(&dsp->ds_name[0], &ds32p->ds_name[0],
+ OBP_MAXPROPNAME);
+
+ switch (dsp->ds_type) {
+ case SBD_COMP_CPU:
+ /* copy sbd_cpu_stat_t structure members */
+ _SBD_DEV_STAT(int32_t, d_cpu.cs_isbootproc);
+ _SBD_DEV_STAT(int32_t, d_cpu.cs_cpuid);
+ _SBD_DEV_STAT(int32_t, d_cpu.cs_speed);
+ _SBD_DEV_STAT(int32_t, d_cpu.cs_ecache);
+ break;
+
+ case SBD_COMP_MEM:
+ /* copy sbd_mem_stat_t structure members */
+ _SBD_DEV_STAT(int32_t, d_mem.ms_interleave);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_basepfn);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_totpages);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_detpages);
+ _SBD_DEV_STAT(int32_t, d_mem.ms_pageslost);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_managed_pages);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_noreloc_pages);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_noreloc_first);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_noreloc_last);
+ _SBD_DEV_STAT(int32_t, d_mem.ms_cage_enabled);
+ _SBD_DEV_STAT(int32_t, d_mem.ms_peer_is_target);
+ bcopy(&dsp->d_mem.ms_peer_ap_id[0],
+ &ds32p->d_mem.ms_peer_ap_id[0],
+ sizeof (ds32p->d_mem.ms_peer_ap_id));
+ break;
+
+ case SBD_COMP_IO:
+ /* copy sbd_io_stat_t structure members */
+ _SBD_DEV_STAT(int32_t, d_io.is_referenced);
+ _SBD_DEV_STAT(int32_t, d_io.is_unsafe_count);
+
+ for (j = 0; j < SBD_MAX_UNSAFE; j++)
+ _SBD_DEV_STAT(int32_t,
+ d_io.is_unsafe_list[j]);
+
+ bcopy(&dsp->d_io.is_pathname[0],
+ &ds32p->d_io.is_pathname[0], MAXPATHLEN);
+ break;
+
+ case SBD_COMP_CMP:
+ /* copy sbd_cmp_stat_t structure members */
+ bcopy(&dsp->d_cmp.ps_cpuid[0],
+ &ds32p->d_cmp.ps_cpuid[0],
+ sizeof (ds32p->d_cmp.ps_cpuid));
+ _SBD_DEV_STAT(int32_t, d_cmp.ps_ncores);
+ _SBD_DEV_STAT(int32_t, d_cmp.ps_speed);
+ _SBD_DEV_STAT(int32_t, d_cmp.ps_ecache);
+ break;
+
+ default:
+ cmn_err(CE_WARN, "%s: unknown dev type (%d)",
+ f, (int)dsp->ds_type);
+ rv = EFAULT;
+ goto status_done;
+ }
+#undef _SBD_DEV_STAT
+ }
+
+
+ if (ddi_copyout((void *)dstat32p,
+ hp->h_sbdcmd.cmd_stat.s_statp, pbsz, mode) != 0) {
+ cmn_err(CE_WARN,
+ "%s: failed to copyout status "
+ "for board %d", f, bp->b_num);
+ rv = EFAULT;
+ goto status_done;
+ }
+ } else
+#endif /* _MULTI_DATAMODEL */
+
+ if (ddi_copyout((void *)dstatp, hp->h_sbdcmd.cmd_stat.s_statp,
+ pbsz, mode) != 0) {
+ cmn_err(CE_WARN,
+ "%s: failed to copyout status for board %d",
+ f, bp->b_num);
+ rv = EFAULT;
+ goto status_done;
+ }
+
+status_done:
+ if (dstatp != NULL)
+ FREESTRUCT(dstatp, char, sz);
+
+ dr_unlock_status(bp);
+
+ return (rv);
+}
+
+static int
+dr_get_ncm(dr_handle_t *hp)
+{
+ int i;
+ int ncm = 0;
+ dr_devset_t devset;
+
+ devset = DR_DEVS_PRESENT(hp->h_bd);
+ if (hp->h_sbdcmd.cmd_cm.c_id.c_type != SBD_COMP_NONE)
+ devset &= DEVSET(hp->h_sbdcmd.cmd_cm.c_id.c_type,
+ DEVSET_ANYUNIT);
+
+ /*
+ * Handle CPUs first to deal with possible CMP
+ * devices. If the CPU is a CMP, we need to only
+ * increment ncm once even if there are multiple
+ * cores for that CMP present in the devset.
+ */
+ for (i = 0; i < MAX_CMP_UNITS_PER_BOARD; i++) {
+ if (devset & DEVSET(SBD_COMP_CMP, i)) {
+ ncm++;
+ }
+ }
+
+ /* eliminate the CPU information from the devset */
+ devset &= ~(DEVSET(SBD_COMP_CMP, DEVSET_ANYUNIT));
+
+ for (i = 0; i < (sizeof (dr_devset_t) * 8); i++) {
+ ncm += devset & 0x1;
+ devset >>= 1;
+ }
+
+ return (ncm);
+}
+
+/* used by dr_mem.c */
+/* TODO: eliminate dr_boardlist */
+dr_board_t *
+dr_lookup_board(int board_num)
+{
+ dr_board_t *bp;
+
+ ASSERT(board_num >= 0 && board_num < MAX_BOARDS);
+
+ bp = &dr_boardlist[board_num];
+ ASSERT(bp->b_num == board_num);
+
+ return (bp);
+}
+
+static dr_dev_unit_t *
+dr_get_dev_unit(dr_board_t *bp, sbd_comp_type_t nt, int unit_num)
+{
+ dr_dev_unit_t *dp;
+
+ dp = DR_GET_BOARD_DEVUNIT(bp, nt, unit_num);
+ ASSERT(dp->du_common.sbdev_bp == bp);
+ ASSERT(dp->du_common.sbdev_unum == unit_num);
+ ASSERT(dp->du_common.sbdev_type == nt);
+
+ return (dp);
+}
+
+dr_cpu_unit_t *
+dr_get_cpu_unit(dr_board_t *bp, int unit_num)
+{
+ dr_dev_unit_t *dp;
+
+ ASSERT(unit_num >= 0 && unit_num < MAX_CPU_UNITS_PER_BOARD);
+
+ dp = dr_get_dev_unit(bp, SBD_COMP_CPU, unit_num);
+ return (&dp->du_cpu);
+}
+
+dr_mem_unit_t *
+dr_get_mem_unit(dr_board_t *bp, int unit_num)
+{
+ dr_dev_unit_t *dp;
+
+ ASSERT(unit_num >= 0 && unit_num < MAX_MEM_UNITS_PER_BOARD);
+
+ dp = dr_get_dev_unit(bp, SBD_COMP_MEM, unit_num);
+ return (&dp->du_mem);
+}
+
+dr_io_unit_t *
+dr_get_io_unit(dr_board_t *bp, int unit_num)
+{
+ dr_dev_unit_t *dp;
+
+ ASSERT(unit_num >= 0 && unit_num < MAX_IO_UNITS_PER_BOARD);
+
+ dp = dr_get_dev_unit(bp, SBD_COMP_IO, unit_num);
+ return (&dp->du_io);
+}
+
+dr_common_unit_t *
+dr_get_common_unit(dr_board_t *bp, sbd_comp_type_t nt, int unum)
+{
+ dr_dev_unit_t *dp;
+
+ dp = dr_get_dev_unit(bp, nt, unum);
+ return (&dp->du_common);
+}
+
+static dr_devset_t
+dr_dev2devset(sbd_comp_id_t *cid)
+{
+ static fn_t f = "dr_dev2devset";
+
+ dr_devset_t devset;
+ int unit = cid->c_unit;
+
+ switch (cid->c_type) {
+ case SBD_COMP_NONE:
+ devset = DEVSET(SBD_COMP_CPU, DEVSET_ANYUNIT);
+ devset |= DEVSET(SBD_COMP_MEM, DEVSET_ANYUNIT);
+ devset |= DEVSET(SBD_COMP_IO, DEVSET_ANYUNIT);
+ PR_ALL("%s: COMP_NONE devset = " DEVSET_FMT_STR "\n",
+ f, DEVSET_FMT_ARG(devset));
+ break;
+
+ case SBD_COMP_CPU:
+ if ((unit > MAX_CPU_UNITS_PER_BOARD) || (unit < 0)) {
+ cmn_err(CE_WARN,
+ "%s: invalid cpu unit# = %d",
+ f, unit);
+ devset = 0;
+ } else {
+ /*
+ * Generate a devset that includes all the
+ * cores of a CMP device. If this is not a
+ * CMP, the extra cores will be eliminated
+ * later since they are not present. This is
+ * also true for CMP devices that do not have
+ * all cores active.
+ */
+ devset = DEVSET(SBD_COMP_CMP, unit);
+ }
+
+ PR_ALL("%s: CPU devset = " DEVSET_FMT_STR "\n",
+ f, DEVSET_FMT_ARG(devset));
+ break;
+
+ case SBD_COMP_MEM:
+ if (unit == SBD_NULL_UNIT) {
+ unit = 0;
+ cid->c_unit = 0;
+ }
+
+ if ((unit > MAX_MEM_UNITS_PER_BOARD) || (unit < 0)) {
+ cmn_err(CE_WARN,
+ "%s: invalid mem unit# = %d",
+ f, unit);
+ devset = 0;
+ } else
+ devset = DEVSET(cid->c_type, unit);
+
+ PR_ALL("%s: MEM devset = " DEVSET_FMT_STR "\n",
+ f, DEVSET_FMT_ARG(devset));
+ break;
+
+ case SBD_COMP_IO:
+ if ((unit > MAX_IO_UNITS_PER_BOARD) || (unit < 0)) {
+ cmn_err(CE_WARN,
+ "%s: invalid io unit# = %d",
+ f, unit);
+ devset = 0;
+ } else
+ devset = DEVSET(cid->c_type, unit);
+
+ PR_ALL("%s: IO devset = " DEVSET_FMT_STR "\n",
+ f, DEVSET_FMT_ARG(devset));
+ break;
+
+ default:
+ case SBD_COMP_UNKNOWN:
+ devset = 0;
+ break;
+ }
+
+ return (devset);
+}
+
+/*
+ * Converts a dynamic attachment point name to a SBD_COMP_* type.
+ * Returns SDB_COMP_UNKNOWN if name is not recognized.
+ */
+static int
+dr_dev_type_to_nt(char *type)
+{
+ int i;
+
+ for (i = 0; dr_devattr[i].s_nodetype != SBD_COMP_UNKNOWN; i++)
+ if (strcmp(dr_devattr[i].s_devtype, type) == 0)
+ break;
+
+ return (dr_devattr[i].s_nodetype);
+}
+
+/*
+ * Converts a SBD_COMP_* type to a dynamic attachment point name.
+ * Return NULL if SBD_COMP_ type is not recognized.
+ */
+char *
+dr_nt_to_dev_type(int nt)
+{
+ int i;
+
+ for (i = 0; dr_devattr[i].s_nodetype != SBD_COMP_UNKNOWN; i++)
+ if (dr_devattr[i].s_nodetype == nt)
+ break;
+
+ return (dr_devattr[i].s_devtype);
+}
+
+/*
+ * State transition policy is that if there is some component for which
+ * the state transition is valid, then let it through. The exception is
+ * SBD_CMD_DISCONNECT. On disconnect, the state transition must be valid
+ * for ALL components.
+ * Returns the state that is in error, if any.
+ */
+static int
+dr_check_transition(dr_board_t *bp, dr_devset_t *devsetp,
+ struct dr_state_trans *transp, int cmd)
+{
+ int s, ut;
+ int state_err = 0;
+ dr_devset_t devset;
+ dr_common_unit_t *cp;
+ static fn_t f = "dr_check_transition";
+
+ devset = *devsetp;
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_CPU, DEVSET_ANYUNIT)) {
+ for (ut = 0; ut < MAX_CPU_UNITS_PER_BOARD; ut++) {
+ if (DEVSET_IN_SET(devset, SBD_COMP_CPU, ut) == 0)
+ continue;
+
+ cp = dr_get_common_unit(bp, SBD_COMP_CPU, ut);
+ s = (int)cp->sbdev_state;
+ if (!DR_DEV_IS_PRESENT(cp)) {
+ DEVSET_DEL(devset, SBD_COMP_CPU, ut);
+ } else {
+ if (transp->x_op[s].x_rv) {
+ if (!state_err)
+ state_err = s;
+ DEVSET_DEL(devset, SBD_COMP_CPU, ut);
+ }
+ }
+ }
+ }
+ if (DEVSET_IN_SET(devset, SBD_COMP_MEM, DEVSET_ANYUNIT)) {
+ for (ut = 0; ut < MAX_MEM_UNITS_PER_BOARD; ut++) {
+ if (DEVSET_IN_SET(devset, SBD_COMP_MEM, ut) == 0)
+ continue;
+
+ cp = dr_get_common_unit(bp, SBD_COMP_MEM, ut);
+ s = (int)cp->sbdev_state;
+ if (!DR_DEV_IS_PRESENT(cp)) {
+ DEVSET_DEL(devset, SBD_COMP_MEM, ut);
+ } else {
+ if (transp->x_op[s].x_rv) {
+ if (!state_err)
+ state_err = s;
+ DEVSET_DEL(devset, SBD_COMP_MEM, ut);
+ }
+ }
+ }
+ }
+ if (DEVSET_IN_SET(devset, SBD_COMP_IO, DEVSET_ANYUNIT)) {
+ for (ut = 0; ut < MAX_IO_UNITS_PER_BOARD; ut++) {
+ if (DEVSET_IN_SET(devset, SBD_COMP_IO, ut) == 0)
+ continue;
+
+ cp = dr_get_common_unit(bp, SBD_COMP_IO, ut);
+ s = (int)cp->sbdev_state;
+ if (!DR_DEV_IS_PRESENT(cp)) {
+ DEVSET_DEL(devset, SBD_COMP_IO, ut);
+ } else {
+ if (transp->x_op[s].x_rv) {
+ if (!state_err)
+ state_err = s;
+ DEVSET_DEL(devset, SBD_COMP_IO, ut);
+ }
+ }
+ }
+ }
+
+ PR_ALL("%s: requested devset = 0x%x, final devset = 0x%x\n",
+ f, (uint_t)*devsetp, (uint_t)devset);
+
+ *devsetp = devset;
+ /*
+ * If there are some remaining components for which
+ * this state transition is valid, then allow them
+ * through, otherwise if none are left then return
+ * the state error. The exception is SBD_CMD_DISCONNECT.
+ * On disconnect, the state transition must be valid for ALL
+ * components.
+ */
+ if (cmd == SBD_CMD_DISCONNECT)
+ return (state_err);
+ return (devset ? 0 : state_err);
+}
+
+void
+dr_device_transition(dr_common_unit_t *cp, dr_state_t st)
+{
+ PR_STATE("%s STATE %s(%d) -> %s(%d)\n",
+ cp->sbdev_path,
+ state_str[cp->sbdev_state], cp->sbdev_state,
+ state_str[st], st);
+
+ cp->sbdev_state = st;
+ if (st == DR_STATE_CONFIGURED) {
+ cp->sbdev_ostate = SBD_STAT_CONFIGURED;
+ if (cp->sbdev_bp->b_ostate != SBD_STAT_CONFIGURED) {
+ cp->sbdev_bp->b_ostate = SBD_STAT_CONFIGURED;
+ (void) drv_getparm(TIME,
+ (void *) &cp->sbdev_bp->b_time);
+ }
+ } else
+ cp->sbdev_ostate = SBD_STAT_UNCONFIGURED;
+
+ (void) drv_getparm(TIME, (void *) &cp->sbdev_time);
+}
+
+static void
+dr_board_transition(dr_board_t *bp, dr_state_t st)
+{
+ PR_STATE("BOARD %d STATE: %s(%d) -> %s(%d)\n",
+ bp->b_num,
+ state_str[bp->b_state], bp->b_state,
+ state_str[st], st);
+
+ bp->b_state = st;
+}
+
+void
+dr_op_err(int ce, dr_handle_t *hp, int code, char *fmt, ...)
+{
+ sbd_error_t *err;
+ va_list args;
+
+ va_start(args, fmt);
+ err = drerr_new_v(code, fmt, args);
+ va_end(args);
+
+ if (ce != CE_IGNORE)
+ sbd_err_log(err, ce);
+
+ DRERR_SET_C(&hp->h_err, &err);
+}
+
+void
+dr_dev_err(int ce, dr_common_unit_t *cp, int code)
+{
+ sbd_error_t *err;
+
+ err = drerr_new(0, code, cp->sbdev_path, NULL);
+
+ if (ce != CE_IGNORE)
+ sbd_err_log(err, ce);
+
+ DRERR_SET_C(&cp->sbdev_error, &err);
+}
+
+/*
+ * A callback routine. Called from the drmach layer as a result of
+ * call to drmach_board_find_devices from dr_init_devlists.
+ */
+static sbd_error_t *
+dr_dev_found(void *data, const char *name, int unum, drmachid_t id)
+{
+ dr_board_t *bp = data;
+ dr_dev_unit_t *dp;
+ int nt;
+ static fn_t f = "dr_dev_found";
+
+ PR_ALL("%s (board = %d, name = %s, unum = %d, id = %p)...\n",
+ f, bp->b_num, name, unum, id);
+
+ nt = dr_dev_type_to_nt((char *)name);
+ if (nt == SBD_COMP_UNKNOWN) {
+ /*
+ * this should not happen. When it does, it indicates
+ * a missmatch in devices supported by the drmach layer
+ * vs devices supported by this layer.
+ */
+ return (DR_INTERNAL_ERROR());
+ }
+
+ dp = DR_GET_BOARD_DEVUNIT(bp, nt, unum);
+
+ /* sanity check */
+ ASSERT(dp->du_common.sbdev_bp == bp);
+ ASSERT(dp->du_common.sbdev_unum == unum);
+ ASSERT(dp->du_common.sbdev_type == nt);
+
+ /* render dynamic attachment point path of this unit */
+ (void) snprintf(dp->du_common.sbdev_path,
+ sizeof (dp->du_common.sbdev_path), "%s::%s%d",
+ bp->b_path, name, DR_UNUM2SBD_UNUM(unum, nt));
+
+ dp->du_common.sbdev_id = id;
+ DR_DEV_SET_PRESENT(&dp->du_common);
+
+ bp->b_ndev++;
+
+ return (NULL);
+}
+
+static sbd_error_t *
+dr_init_devlists(dr_board_t *bp)
+{
+ int i;
+ sbd_error_t *err;
+ dr_dev_unit_t *dp;
+ static fn_t f = "dr_init_devlists";
+
+ PR_ALL("%s (%s)...\n", f, bp->b_path);
+
+ /* sanity check */
+ ASSERT(bp->b_ndev == 0);
+
+ DR_DEVS_DISCONNECT(bp, (uint_t)-1);
+
+ /*
+ * This routine builds the board's devlist and initializes
+ * the common portion of the unit data structures.
+ * Note: because the common portion is considered
+ * uninitialized, the dr_get_*_unit() routines can not
+ * be used.
+ */
+
+ /*
+ * Clear out old entries, if any.
+ */
+ for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+ dp = DR_GET_BOARD_DEVUNIT(bp, SBD_COMP_CPU, i);
+
+ bzero(dp, sizeof (*dp));
+ dp->du_common.sbdev_bp = bp;
+ dp->du_common.sbdev_unum = i;
+ dp->du_common.sbdev_type = SBD_COMP_CPU;
+ }
+
+ for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+ dp = DR_GET_BOARD_DEVUNIT(bp, SBD_COMP_MEM, i);
+
+ bzero(dp, sizeof (*dp));
+ dp->du_common.sbdev_bp = bp;
+ dp->du_common.sbdev_unum = i;
+ dp->du_common.sbdev_type = SBD_COMP_MEM;
+ }
+
+ for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+ dp = DR_GET_BOARD_DEVUNIT(bp, SBD_COMP_IO, i);
+
+ bzero(dp, sizeof (*dp));
+ dp->du_common.sbdev_bp = bp;
+ dp->du_common.sbdev_unum = i;
+ dp->du_common.sbdev_type = SBD_COMP_IO;
+ }
+
+ err = NULL;
+ if (bp->b_id) {
+ /* find devices on this board */
+ err = drmach_board_find_devices(
+ bp->b_id, bp, dr_dev_found);
+ }
+
+ return (err);
+}
+
+/*
+ * Return the unit number of the respective drmachid if
+ * it's found to be attached.
+ */
+static int
+dr_check_unit_attached(dr_common_unit_t *cp)
+{
+ int rv = 0;
+ processorid_t cpuid;
+ uint64_t basepa, endpa;
+ struct memlist *ml;
+ extern struct memlist *phys_install;
+ sbd_error_t *err;
+ int yes;
+ static fn_t f = "dr_check_unit_attached";
+
+ switch (cp->sbdev_type) {
+ case SBD_COMP_CPU:
+ err = drmach_cpu_get_id(cp->sbdev_id, &cpuid);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ rv = -1;
+ break;
+ }
+ mutex_enter(&cpu_lock);
+ if (cpu_get(cpuid) == NULL)
+ rv = -1;
+ mutex_exit(&cpu_lock);
+ break;
+
+ case SBD_COMP_MEM:
+ err = drmach_mem_get_slice_info(cp->sbdev_id,
+ &basepa, &endpa, NULL);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ rv = -1;
+ break;
+ }
+
+ /*
+ * Check if base address is in phys_install.
+ */
+ memlist_read_lock();
+ for (ml = phys_install; ml; ml = ml->ml_next)
+ if ((endpa <= ml->ml_address) ||
+ (basepa >= (ml->ml_address + ml->ml_size)))
+ continue;
+ else
+ break;
+ memlist_read_unlock();
+ if (ml == NULL)
+ rv = -1;
+ break;
+
+ case SBD_COMP_IO:
+ err = drmach_io_is_attached(cp->sbdev_id, &yes);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ rv = -1;
+ break;
+ } else if (!yes)
+ rv = -1;
+ break;
+
+ default:
+ PR_ALL("%s: unexpected nodetype(%d) for id 0x%p\n",
+ f, cp->sbdev_type, cp->sbdev_id);
+ rv = -1;
+ break;
+ }
+
+ return (rv);
+}
+
+/*
+ * See if drmach recognizes the passthru command. DRMACH expects the
+ * id to identify the thing to which the command is being applied. Using
+ * nonsense SBD terms, that information has been perversely encoded in the
+ * c_id member of the sbd_cmd_t structure. This logic reads those tea
+ * leaves, finds the associated drmach id, then calls drmach to process
+ * the passthru command.
+ */
+static int
+dr_pt_try_drmach(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+ sbd_comp_id_t *comp_id = &hp->h_sbdcmd.cmd_cm.c_id;
+ drmachid_t id;
+
+ if (comp_id->c_type == SBD_COMP_NONE) {
+ id = bp->b_id;
+ } else {
+ sbd_comp_type_t nt;
+
+ nt = dr_dev_type_to_nt(comp_id->c_name);
+ if (nt == SBD_COMP_UNKNOWN) {
+ dr_op_err(CE_IGNORE, hp, ESBD_INVAL, comp_id->c_name);
+ id = 0;
+ } else {
+ /* pt command applied to dynamic attachment point */
+ dr_common_unit_t *cp;
+ cp = dr_get_common_unit(bp, nt, comp_id->c_unit);
+ id = cp->sbdev_id;
+ }
+ }
+
+ if (hp->h_err == NULL)
+ hp->h_err = drmach_passthru(id, &hp->h_opts);
+
+ return (hp->h_err == NULL ? 0 : -1);
+}
+
+static int
+dr_pt_ioctl(dr_handle_t *hp)
+{
+ int cmd, rv, len;
+ int32_t sz;
+ int found;
+ char *copts;
+ static fn_t f = "dr_pt_ioctl";
+
+ PR_ALL("%s...\n", f);
+
+ sz = hp->h_opts.size;
+ copts = hp->h_opts.copts;
+
+ if (sz == 0 || copts == (char *)NULL) {
+ cmn_err(CE_WARN, "%s: invalid passthru args", f);
+ return (EINVAL);
+ }
+
+ found = 0;
+ for (cmd = 0; cmd < (sizeof (pt_arr) / sizeof (pt_arr[0])); cmd++) {
+ len = strlen(pt_arr[cmd].pt_name);
+ found = (strncmp(pt_arr[cmd].pt_name, copts, len) == 0);
+ if (found)
+ break;
+ }
+
+ if (found)
+ rv = (*pt_arr[cmd].pt_func)(hp);
+ else
+ rv = dr_pt_try_drmach(hp);
+
+ return (rv);
+}
+
+/*
+ * Called at driver load time to determine the state and condition
+ * of an existing board in the system.
+ */
+static void
+dr_board_discovery(dr_board_t *bp)
+{
+ int i;
+ dr_devset_t devs_lost, devs_attached = 0;
+ dr_cpu_unit_t *cp;
+ dr_mem_unit_t *mp;
+ dr_io_unit_t *ip;
+ static fn_t f = "dr_board_discovery";
+
+ if (DR_DEVS_PRESENT(bp) == 0) {
+ PR_ALL("%s: board %d has no devices present\n",
+ f, bp->b_num);
+ return;
+ }
+
+ /*
+ * Check for existence of cpus.
+ */
+ for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+ cp = dr_get_cpu_unit(bp, i);
+
+ if (!DR_DEV_IS_PRESENT(&cp->sbc_cm))
+ continue;
+
+ if (dr_check_unit_attached(&cp->sbc_cm) >= 0) {
+ DR_DEV_SET_ATTACHED(&cp->sbc_cm);
+ DEVSET_ADD(devs_attached, SBD_COMP_CPU, i);
+ PR_ALL("%s: board %d, cpu-unit %d - attached\n",
+ f, bp->b_num, i);
+ }
+ dr_init_cpu_unit(cp);
+ }
+
+ /*
+ * Check for existence of memory.
+ */
+ for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+ mp = dr_get_mem_unit(bp, i);
+
+ if (!DR_DEV_IS_PRESENT(&mp->sbm_cm))
+ continue;
+
+ if (dr_check_unit_attached(&mp->sbm_cm) >= 0) {
+ DR_DEV_SET_ATTACHED(&mp->sbm_cm);
+ DEVSET_ADD(devs_attached, SBD_COMP_MEM, i);
+ PR_ALL("%s: board %d, mem-unit %d - attached\n",
+ f, bp->b_num, i);
+ }
+ dr_init_mem_unit(mp);
+ }
+
+ /*
+ * Check for i/o state.
+ */
+ for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+ ip = dr_get_io_unit(bp, i);
+
+ if (!DR_DEV_IS_PRESENT(&ip->sbi_cm))
+ continue;
+
+ if (dr_check_unit_attached(&ip->sbi_cm) >= 0) {
+ /*
+ * Found it!
+ */
+ DR_DEV_SET_ATTACHED(&ip->sbi_cm);
+ DEVSET_ADD(devs_attached, SBD_COMP_IO, i);
+ PR_ALL("%s: board %d, io-unit %d - attached\n",
+ f, bp->b_num, i);
+ }
+ dr_init_io_unit(ip);
+ }
+
+ DR_DEVS_CONFIGURE(bp, devs_attached);
+ if (devs_attached && ((devs_lost = DR_DEVS_UNATTACHED(bp)) != 0)) {
+ int ut;
+
+ /*
+ * It is not legal on board discovery to have a
+ * board that is only partially attached. A board
+ * is either all attached or all connected. If a
+ * board has at least one attached device, then
+ * the the remaining devices, if any, must have
+ * been lost or disconnected. These devices can
+ * only be recovered by a full attach from scratch.
+ * Note that devices previously in the unreferenced
+ * state are subsequently lost until the next full
+ * attach. This is necessary since the driver unload
+ * that must have occurred would have wiped out the
+ * information necessary to re-configure the device
+ * back online, e.g. memlist.
+ */
+ PR_ALL("%s: some devices LOST (" DEVSET_FMT_STR ")...\n",
+ f, DEVSET_FMT_ARG(devs_lost));
+
+ for (ut = 0; ut < MAX_CPU_UNITS_PER_BOARD; ut++) {
+ if (!DEVSET_IN_SET(devs_lost, SBD_COMP_CPU, ut))
+ continue;
+
+ cp = dr_get_cpu_unit(bp, ut);
+ dr_device_transition(&cp->sbc_cm, DR_STATE_EMPTY);
+ }
+
+ for (ut = 0; ut < MAX_MEM_UNITS_PER_BOARD; ut++) {
+ if (!DEVSET_IN_SET(devs_lost, SBD_COMP_MEM, ut))
+ continue;
+
+ mp = dr_get_mem_unit(bp, ut);
+ dr_device_transition(&mp->sbm_cm, DR_STATE_EMPTY);
+ }
+
+ for (ut = 0; ut < MAX_IO_UNITS_PER_BOARD; ut++) {
+ if (!DEVSET_IN_SET(devs_lost, SBD_COMP_IO, ut))
+ continue;
+
+ ip = dr_get_io_unit(bp, ut);
+ dr_device_transition(&ip->sbi_cm, DR_STATE_EMPTY);
+ }
+
+ DR_DEVS_DISCONNECT(bp, devs_lost);
+ }
+}
+
+static int
+dr_board_init(dr_board_t *bp, dev_info_t *dip, int bd)
+{
+ sbd_error_t *err;
+
+ mutex_init(&bp->b_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&bp->b_slock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&bp->b_scv, NULL, CV_DRIVER, NULL);
+ bp->b_rstate = SBD_STAT_EMPTY;
+ bp->b_ostate = SBD_STAT_UNCONFIGURED;
+ bp->b_cond = SBD_COND_UNKNOWN;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+
+ (void) drmach_board_lookup(bd, &bp->b_id);
+ bp->b_num = bd;
+ bp->b_dip = dip;
+
+ bp->b_dev[DEVSET_NIX(SBD_COMP_CPU)] = GETSTRUCT(dr_dev_unit_t,
+ MAX_CPU_UNITS_PER_BOARD);
+
+ bp->b_dev[DEVSET_NIX(SBD_COMP_MEM)] = GETSTRUCT(dr_dev_unit_t,
+ MAX_MEM_UNITS_PER_BOARD);
+
+ bp->b_dev[DEVSET_NIX(SBD_COMP_IO)] = GETSTRUCT(dr_dev_unit_t,
+ MAX_IO_UNITS_PER_BOARD);
+
+ /*
+ * Initialize the devlists
+ */
+ err = dr_init_devlists(bp);
+ if (err) {
+ sbd_err_clear(&err);
+ dr_board_destroy(bp);
+ return (-1);
+ } else if (bp->b_ndev == 0) {
+ dr_board_transition(bp, DR_STATE_EMPTY);
+ } else {
+ /*
+ * Couldn't have made it down here without
+ * having found at least one device.
+ */
+ ASSERT(DR_DEVS_PRESENT(bp) != 0);
+ /*
+ * Check the state of any possible devices on the
+ * board.
+ */
+ dr_board_discovery(bp);
+
+ bp->b_assigned = 1;
+
+ if (DR_DEVS_UNATTACHED(bp) == 0) {
+ /*
+ * The board has no unattached devices, therefore
+ * by reason of insanity it must be configured!
+ */
+ dr_board_transition(bp, DR_STATE_CONFIGURED);
+ bp->b_ostate = SBD_STAT_CONFIGURED;
+ bp->b_rstate = SBD_STAT_CONNECTED;
+ bp->b_cond = SBD_COND_OK;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+ } else if (DR_DEVS_ATTACHED(bp)) {
+ dr_board_transition(bp, DR_STATE_PARTIAL);
+ bp->b_ostate = SBD_STAT_CONFIGURED;
+ bp->b_rstate = SBD_STAT_CONNECTED;
+ bp->b_cond = SBD_COND_OK;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+ } else {
+ dr_board_transition(bp, DR_STATE_CONNECTED);
+ bp->b_rstate = SBD_STAT_CONNECTED;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+ }
+ }
+
+ return (0);
+}
+
+static void
+dr_board_destroy(dr_board_t *bp)
+{
+ PR_ALL("dr_board_destroy: num %d, path %s\n",
+ bp->b_num, bp->b_path);
+
+ dr_board_transition(bp, DR_STATE_EMPTY);
+ bp->b_rstate = SBD_STAT_EMPTY;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+
+ /*
+ * Free up MEM unit structs.
+ */
+ FREESTRUCT(bp->b_dev[DEVSET_NIX(SBD_COMP_MEM)],
+ dr_dev_unit_t, MAX_MEM_UNITS_PER_BOARD);
+ bp->b_dev[DEVSET_NIX(SBD_COMP_MEM)] = NULL;
+ /*
+ * Free up CPU unit structs.
+ */
+ FREESTRUCT(bp->b_dev[DEVSET_NIX(SBD_COMP_CPU)],
+ dr_dev_unit_t, MAX_CPU_UNITS_PER_BOARD);
+ bp->b_dev[DEVSET_NIX(SBD_COMP_CPU)] = NULL;
+ /*
+ * Free up IO unit structs.
+ */
+ FREESTRUCT(bp->b_dev[DEVSET_NIX(SBD_COMP_IO)],
+ dr_dev_unit_t, MAX_IO_UNITS_PER_BOARD);
+ bp->b_dev[DEVSET_NIX(SBD_COMP_IO)] = NULL;
+
+ mutex_destroy(&bp->b_lock);
+ mutex_destroy(&bp->b_slock);
+ cv_destroy(&bp->b_scv);
+
+ /*
+ * Reset the board structure to its initial state, otherwise it will
+ * cause trouble on the next call to dr_board_init() for the same board.
+ * dr_board_init() may be called multiple times for the same board
+ * if DR driver fails to initialize some boards.
+ */
+ bzero(bp, sizeof (*bp));
+}
+
+void
+dr_lock_status(dr_board_t *bp)
+{
+ mutex_enter(&bp->b_slock);
+ while (bp->b_sflags & DR_BSLOCK)
+ cv_wait(&bp->b_scv, &bp->b_slock);
+ bp->b_sflags |= DR_BSLOCK;
+ mutex_exit(&bp->b_slock);
+}
+
+void
+dr_unlock_status(dr_board_t *bp)
+{
+ mutex_enter(&bp->b_slock);
+ bp->b_sflags &= ~DR_BSLOCK;
+ cv_signal(&bp->b_scv);
+ mutex_exit(&bp->b_slock);
+}
+
+/*
+ * Extract flags passed via ioctl.
+ */
+int
+dr_cmd_flags(dr_handle_t *hp)
+{
+ return (hp->h_sbdcmd.cmd_cm.c_flags);
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr.conf b/usr/src/uts/i86pc/io/dr/dr.conf
new file mode 100644
index 0000000000..c694f5573f
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2009, Intel Corporation.
+# All rights reserved.
+#
+
+# Force attach driver to enable DR at boot time.
+ddi-forceattach=1;
diff --git a/usr/src/uts/i86pc/io/dr/dr_cpu.c b/usr/src/uts/i86pc/io/dr/dr_cpu.c
new file mode 100644
index 0000000000..a893310977
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_cpu.c
@@ -0,0 +1,885 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
+ * CPU support routines for DR
+ */
+
+#include <sys/note.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/dditypes.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/kmem.h>
+#include <sys/processor.h>
+#include <sys/cpuvar.h>
+#include <sys/promif.h>
+#include <sys/sysmacros.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+#include <sys/cpu_module.h>
+#include <sys/cmn_err.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+
+/* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */
+static char *dr_ie_fmt = "dr_cpu.c %d";
+
+int
+dr_cpu_unit_is_sane(dr_board_t *bp, dr_cpu_unit_t *cp)
+{
+#ifdef DEBUG
+ ASSERT(cp->sbc_cm.sbdev_bp == bp);
+ ASSERT(cp->sbc_cm.sbdev_type == SBD_COMP_CPU);
+#else
+ _NOTE(ARGUNUSED(bp))
+ _NOTE(ARGUNUSED(cp))
+#endif
+
+ return (1);
+}
+
+static int
+dr_errno2ecode(int error)
+{
+ int rv;
+
+ switch (error) {
+ case EBUSY:
+ rv = ESBD_BUSY;
+ break;
+ case EINVAL:
+ rv = ESBD_INVAL;
+ break;
+ case EALREADY:
+ rv = ESBD_ALREADY;
+ break;
+ case ENODEV:
+ rv = ESBD_NODEV;
+ break;
+ case ENOMEM:
+ rv = ESBD_NOMEM;
+ break;
+ default:
+ rv = ESBD_INVAL;
+ }
+
+ return (rv);
+}
+
+/*
+ * On x86, the "clock-frequency" and cache size device properties may be
+ * unavailable before CPU starts. If they are unavailabe, just set them to zero.
+ */
+static void
+dr_cpu_set_prop(dr_cpu_unit_t *cp)
+{
+ sbd_error_t *err;
+ dev_info_t *dip;
+ uint64_t clock_freq;
+ int ecache_size = 0;
+ char *cache_str = NULL;
+
+ err = drmach_get_dip(cp->sbc_cm.sbdev_id, &dip);
+ if (err) {
+ DRERR_SET_C(&cp->sbc_cm.sbdev_error, &err);
+ return;
+ }
+
+ if (dip == NULL) {
+ DR_DEV_INTERNAL_ERROR(&cp->sbc_cm);
+ return;
+ }
+
+ /* read in the CPU speed */
+ clock_freq = (unsigned int)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "clock-frequency", 0);
+
+ /*
+ * The ecache property string is not the same
+ * for all CPU implementations.
+ */
+ switch (cp->sbc_cpu_impl) {
+ case X86_CPU_IMPL_NEHALEM_EX:
+ cache_str = "l3-cache-size";
+ break;
+ default:
+ cmn_err(CE_WARN, "Unknown cpu implementation=0x%x",
+ cp->sbc_cpu_impl);
+ break;
+ }
+
+ if (cache_str != NULL) {
+ /* read in the ecache size */
+ /*
+ * If the property is not found in the CPU node,
+ * it has to be kept in the core or cmp node so
+ * we just keep looking.
+ */
+
+ ecache_size = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
+ cache_str, 0);
+ }
+
+ /* convert to the proper units */
+ cp->sbc_speed = (clock_freq + 500000) / 1000000;
+ cp->sbc_ecache = ecache_size / (1024 * 1024);
+}
+
+void
+dr_init_cpu_unit(dr_cpu_unit_t *cp)
+{
+ sbd_error_t *err;
+ dr_state_t new_state;
+ int cpuid;
+ int impl;
+
+ if (DR_DEV_IS_ATTACHED(&cp->sbc_cm)) {
+ new_state = DR_STATE_CONFIGURED;
+ cp->sbc_cm.sbdev_cond = SBD_COND_OK;
+ } else if (DR_DEV_IS_PRESENT(&cp->sbc_cm)) {
+ new_state = DR_STATE_CONNECTED;
+ cp->sbc_cm.sbdev_cond = SBD_COND_OK;
+ } else {
+ new_state = DR_STATE_EMPTY;
+ cp->sbc_cm.sbdev_cond = SBD_COND_UNKNOWN;
+ }
+
+ if (DR_DEV_IS_PRESENT(&cp->sbc_cm)) {
+ err = drmach_cpu_get_id(cp->sbc_cm.sbdev_id, &cpuid);
+ if (err) {
+ DRERR_SET_C(&cp->sbc_cm.sbdev_error, &err);
+ new_state = DR_STATE_FATAL;
+ goto done;
+ }
+
+ err = drmach_cpu_get_impl(cp->sbc_cm.sbdev_id, &impl);
+ if (err) {
+ DRERR_SET_C(&cp->sbc_cm.sbdev_error, &err);
+ new_state = DR_STATE_FATAL;
+ goto done;
+ }
+ } else {
+ cp->sbc_cpu_id = -1;
+ cp->sbc_cpu_impl = -1;
+ goto done;
+ }
+
+ cp->sbc_cpu_id = cpuid;
+ cp->sbc_cpu_impl = impl;
+
+ /* if true at init time, it must always be true */
+ ASSERT(dr_cpu_unit_is_sane(cp->sbc_cm.sbdev_bp, cp));
+
+ mutex_enter(&cpu_lock);
+ if ((cpuid >= 0) && cpu[cpuid])
+ cp->sbc_cpu_flags = cpu[cpuid]->cpu_flags;
+ else
+ cp->sbc_cpu_flags = P_OFFLINE | P_POWEROFF;
+ mutex_exit(&cpu_lock);
+
+ dr_cpu_set_prop(cp);
+
+done:
+ /* delay transition until fully initialized */
+ dr_device_transition(&cp->sbc_cm, new_state);
+}
+
+int
+dr_pre_attach_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ int i;
+ static fn_t f = "dr_pre_attach_cpu";
+
+ PR_CPU("%s...\n", f);
+
+ for (i = 0; i < devnum; i++) {
+ dr_cpu_unit_t *up = (dr_cpu_unit_t *)devlist[i];
+
+ ASSERT(dr_cpu_unit_is_sane(hp->h_bd, up));
+
+ /*
+ * Print a console message for each attachment
+ * point. For CMP devices, this means that only
+ * one message should be printed, no matter how
+ * many cores are actually present.
+ */
+ if ((up->sbc_cm.sbdev_unum % MAX_CORES_PER_CMP) == 0) {
+ cmn_err(CE_CONT, "OS configure %s",
+ up->sbc_cm.sbdev_path);
+ }
+ }
+
+ /*
+ * Block out status threads while creating
+ * devinfo tree branches
+ */
+ dr_lock_status(hp->h_bd);
+ ndi_devi_enter(ddi_root_node(), (int *)(&hp->h_ndi));
+ mutex_enter(&cpu_lock);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+void
+dr_attach_cpu(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ sbd_error_t *err;
+ processorid_t cpuid;
+ int rv;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ err = drmach_configure(cp->sbdev_id, 0);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ return;
+ }
+
+ err = drmach_cpu_get_id(cp->sbdev_id, &cpuid);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+
+ err = drmach_unconfigure(cp->sbdev_id, DEVI_BRANCH_DESTROY);
+ if (err)
+ sbd_err_clear(&err);
+ } else if ((rv = cpu_configure(cpuid)) != 0) {
+ dr_dev_err(CE_WARN, cp, dr_errno2ecode(rv));
+ err = drmach_unconfigure(cp->sbdev_id, DEVI_BRANCH_DESTROY);
+ if (err)
+ sbd_err_clear(&err);
+ } else {
+ dr_cpu_unit_t *up = (dr_cpu_unit_t *)cp;
+ up->sbc_cpu_id = cpuid;
+ }
+}
+
+/*
+ * dr_post_attach_cpu
+ *
+ * sbd error policy: Does not stop on error. Processes all units in list.
+ */
+int
+dr_post_attach_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ int i;
+ int errflag = 0;
+ static fn_t f = "dr_post_attach_cpu";
+
+ PR_CPU("%s...\n", f);
+
+ /* Startup and online newly-attached CPUs */
+ for (i = 0; i < devnum; i++) {
+ dr_cpu_unit_t *up = (dr_cpu_unit_t *)devlist[i];
+ struct cpu *cp;
+
+ ASSERT(dr_cpu_unit_is_sane(hp->h_bd, up));
+
+ cp = cpu_get(up->sbc_cpu_id);
+ if (cp == NULL) {
+ cmn_err(CE_WARN, "%s: cpu_get failed for cpu %d",
+ f, up->sbc_cpu_id);
+ continue;
+ }
+
+ if (cpu_is_poweredoff(cp)) {
+ if (cpu_poweron(cp) != 0) {
+ dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_CPUSTART);
+ errflag = 1;
+ }
+ PR_CPU("%s: cpu %d powered ON\n", f, up->sbc_cpu_id);
+ }
+
+ if (cpu_is_offline(cp)) {
+ PR_CPU("%s: onlining cpu %d...\n", f, up->sbc_cpu_id);
+
+ if (cpu_online(cp) != 0) {
+ dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_ONLINE);
+ errflag = 1;
+ }
+ }
+
+ }
+
+ mutex_exit(&cpu_lock);
+ ndi_devi_exit(ddi_root_node(), hp->h_ndi);
+ dr_unlock_status(hp->h_bd);
+
+ if (errflag)
+ return (-1);
+ else
+ return (0);
+}
+
+/*
+ * dr_pre_release_cpu
+ *
+ * sbd error policy: Stops on first error.
+ */
+int
+dr_pre_release_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ int c, cix, i, lastoffline = -1, rv = 0;
+ processorid_t cpuid;
+ struct cpu *cp;
+ dr_cpu_unit_t *up;
+ dr_devset_t devset;
+ sbd_dev_stat_t *ds;
+ static fn_t f = "dr_pre_release_cpu";
+ int cpu_flags = 0;
+
+ devset = DR_DEVS_PRESENT(hp->h_bd);
+
+ /* allocate status struct storage. */
+ ds = (sbd_dev_stat_t *) kmem_zalloc(sizeof (sbd_dev_stat_t) *
+ MAX_CPU_UNITS_PER_BOARD, KM_SLEEP);
+
+ cix = dr_cpu_status(hp, devset, ds);
+
+ mutex_enter(&cpu_lock);
+
+ for (i = 0; i < devnum; i++) {
+ up = (dr_cpu_unit_t *)devlist[i];
+ if (!DR_DEV_IS_ATTACHED(&up->sbc_cm)) {
+ continue;
+ }
+ ASSERT(dr_cpu_unit_is_sane(hp->h_bd, up));
+
+ /*
+ * On x86 systems, some CPUs can't be unconfigured.
+ * For example, CPU0 can't be unconfigured because many other
+ * components have a dependency on it.
+ * This check determines if a CPU is currently in use and
+ * returns a "Device busy" error if so.
+ */
+ for (c = 0; c < cix; c++) {
+ if (ds[c].d_cpu.cs_unit == up->sbc_cm.sbdev_unum) {
+ if (ds[c].d_cpu.cs_busy) {
+ dr_dev_err(CE_WARN, &up->sbc_cm,
+ ESBD_BUSY);
+ rv = -1;
+ break;
+ }
+ }
+ }
+ if (c < cix)
+ break;
+
+ cpuid = up->sbc_cpu_id;
+ if ((cp = cpu_get(cpuid)) == NULL) {
+ dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_OFFLINE);
+ rv = -1;
+ break;
+ }
+
+ /* used by dr_cancel_cpu during error flow */
+ up->sbc_cpu_flags = cp->cpu_flags;
+
+ if (CPU_ACTIVE(cp)) {
+ if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
+ cpu_flags = CPU_FORCED;
+
+ PR_CPU("%s: offlining cpu %d\n", f, cpuid);
+ if (cpu_offline(cp, cpu_flags)) {
+ PR_CPU("%s: failed to offline cpu %d\n", f,
+ cpuid);
+ dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_OFFLINE);
+ if (disp_bound_threads(cp, 0)) {
+ cmn_err(CE_WARN, "%s: thread(s) bound "
+ "to cpu %d", f, cp->cpu_id);
+ }
+ rv = -1;
+ break;
+ } else
+ lastoffline = i;
+ }
+
+ if (!rv) {
+ sbd_error_t *err;
+
+ err = drmach_release(up->sbc_cm.sbdev_id);
+ if (err) {
+ DRERR_SET_C(&up->sbc_cm.sbdev_error, &err);
+ rv = -1;
+ break;
+ }
+ }
+ }
+
+ mutex_exit(&cpu_lock);
+
+ if (rv) {
+ /*
+ * Need to unwind others since at this level (pre-release)
+ * the device state has not yet transitioned and failures
+ * will prevent us from reaching the "post" release
+ * function where states are normally transitioned.
+ */
+ for (i = lastoffline; i >= 0; i--) {
+ up = (dr_cpu_unit_t *)devlist[i];
+ (void) dr_cancel_cpu(up);
+ }
+ }
+
+ kmem_free(ds, sizeof (sbd_dev_stat_t) * MAX_CPU_UNITS_PER_BOARD);
+ return (rv);
+}
+
+/*
+ * dr_pre_detach_cpu
+ *
+ * sbd error policy: Stops on first error.
+ */
+int
+dr_pre_detach_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ _NOTE(ARGUNUSED(hp))
+
+ int i;
+ int cpu_flags = 0;
+ static fn_t f = "dr_pre_detach_cpu";
+
+ PR_CPU("%s...\n", f);
+
+ /*
+ * Block out status threads while destroying devinfo tree
+ * branches
+ */
+ dr_lock_status(hp->h_bd);
+ mutex_enter(&cpu_lock);
+
+ for (i = 0; i < devnum; i++) {
+ dr_cpu_unit_t *up = (dr_cpu_unit_t *)devlist[i];
+ struct cpu *cp;
+
+ if (!DR_DEV_IS_ATTACHED(&up->sbc_cm)) {
+ continue;
+ }
+
+ ASSERT(dr_cpu_unit_is_sane(hp->h_bd, up));
+
+ cp = cpu_get(up->sbc_cpu_id);
+ if (cp == NULL)
+ continue;
+
+ /*
+ * Print a console message for each attachment
+ * point. For CMP devices, this means that only
+ * one message should be printed, no matter how
+ * many cores are actually present.
+ */
+ if ((up->sbc_cm.sbdev_unum % MAX_CORES_PER_CMP) == 0) {
+ cmn_err(CE_CONT, "OS unconfigure %s\n",
+ up->sbc_cm.sbdev_path);
+ }
+
+ /*
+ * CPUs were offlined during Release.
+ */
+ if (cpu_is_poweredoff(cp)) {
+ PR_CPU("%s: cpu %d already powered OFF\n",
+ f, up->sbc_cpu_id);
+ continue;
+ }
+
+ if (!cpu_is_offline(cp)) {
+ if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
+ cpu_flags = CPU_FORCED;
+ /* cpu was onlined after release. Offline it again */
+ PR_CPU("%s: offlining cpu %d\n", f, up->sbc_cpu_id);
+ if (cpu_offline(cp, cpu_flags)) {
+ PR_CPU("%s: failed to offline cpu %d\n",
+ f, up->sbc_cpu_id);
+ dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_OFFLINE);
+ if (disp_bound_threads(cp, 0)) {
+ cmn_err(CE_WARN, "%s: thread(s) bound "
+ "to cpu %d", f, cp->cpu_id);
+ }
+ goto err;
+ }
+ }
+ if (cpu_poweroff(cp) != 0) {
+ dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_CPUSTOP);
+ goto err;
+ } else {
+ PR_CPU("%s: cpu %d powered OFF\n", f, up->sbc_cpu_id);
+ }
+ }
+
+ return (0);
+
+err:
+ mutex_exit(&cpu_lock);
+ dr_unlock_status(hp->h_bd);
+ return (-1);
+}
+
+/*ARGSUSED*/
+void
+dr_detach_cpu(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ sbd_error_t *err;
+ processorid_t cpuid;
+ int rv;
+ dr_cpu_unit_t *up = (dr_cpu_unit_t *)cp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ if (!DR_DEV_IS_ATTACHED(&up->sbc_cm)) {
+ return;
+ }
+
+ err = drmach_cpu_get_id(cp->sbdev_id, &cpuid);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ } else if ((rv = cpu_unconfigure(cpuid)) != 0) {
+ dr_dev_err(CE_IGNORE, cp, dr_errno2ecode(rv));
+ } else {
+ err = drmach_unconfigure(cp->sbdev_id, DEVI_BRANCH_DESTROY);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ } else {
+ up->sbc_cpu_id = -1;
+ }
+ }
+}
+
+/*ARGSUSED1*/
+int
+dr_post_detach_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ static fn_t f = "dr_post_detach_cpu";
+
+ PR_CPU("%s...\n", f);
+ hp->h_ndi = 0;
+
+ mutex_exit(&cpu_lock);
+ dr_unlock_status(hp->h_bd);
+
+ return (0);
+}
+
+static void
+dr_fill_cpu_stat(dr_cpu_unit_t *cp, drmach_status_t *pstat, sbd_cpu_stat_t *csp)
+{
+ ASSERT(cp && pstat && csp);
+
+ /* Fill in the common status information */
+ bzero((caddr_t)csp, sizeof (*csp));
+ csp->cs_type = cp->sbc_cm.sbdev_type;
+ csp->cs_unit = cp->sbc_cm.sbdev_unum;
+ (void) strlcpy(csp->cs_name, pstat->type, sizeof (csp->cs_name));
+ csp->cs_cond = cp->sbc_cm.sbdev_cond;
+ csp->cs_busy = cp->sbc_cm.sbdev_busy | pstat->busy;
+ csp->cs_time = cp->sbc_cm.sbdev_time;
+ csp->cs_ostate = cp->sbc_cm.sbdev_ostate;
+ csp->cs_suspend = 0;
+
+ /* CPU specific status data */
+ csp->cs_cpuid = cp->sbc_cpu_id;
+
+ /*
+ * If the speed and ecache properties have not been
+ * cached yet, read them in from the device tree.
+ */
+ if ((cp->sbc_speed == 0) || (cp->sbc_ecache == 0))
+ dr_cpu_set_prop(cp);
+
+ /* use the cached speed and ecache values */
+ csp->cs_speed = cp->sbc_speed;
+ csp->cs_ecache = cp->sbc_ecache;
+
+ mutex_enter(&cpu_lock);
+ if (!cpu_get(csp->cs_cpuid)) {
+ /* ostate must be UNCONFIGURED */
+ csp->cs_cm.c_ostate = SBD_STAT_UNCONFIGURED;
+ }
+ mutex_exit(&cpu_lock);
+}
+
+/*ARGSUSED2*/
+static void
+dr_fill_cmp_stat(sbd_cpu_stat_t *csp, int ncores, int impl, sbd_cmp_stat_t *psp)
+{
+ int core;
+
+ ASSERT(csp && psp && (ncores >= 1));
+
+ bzero((caddr_t)psp, sizeof (*psp));
+
+ /*
+ * Fill in the common status information based
+ * on the data for the first core.
+ */
+ psp->ps_type = SBD_COMP_CMP;
+ psp->ps_unit = DR_UNUM2SBD_UNUM(csp->cs_unit, SBD_COMP_CMP);
+ (void) strlcpy(psp->ps_name, csp->cs_name, sizeof (psp->ps_name));
+ psp->ps_cond = csp->cs_cond;
+ psp->ps_busy = csp->cs_busy;
+ psp->ps_time = csp->cs_time;
+ psp->ps_ostate = csp->cs_ostate;
+ psp->ps_suspend = csp->cs_suspend;
+
+ /* CMP specific status data */
+ *psp->ps_cpuid = csp->cs_cpuid;
+ psp->ps_ncores = 1;
+ psp->ps_speed = csp->cs_speed;
+ psp->ps_ecache = csp->cs_ecache;
+
+ /*
+ * Walk through the data for the remaining cores.
+ * Make any adjustments to the common status data,
+ * or the shared CMP specific data if necessary.
+ */
+ for (core = 1; core < ncores; core++) {
+ /*
+ * The following properties should be the same
+ * for all the cores of the CMP.
+ */
+ ASSERT(psp->ps_unit == DR_UNUM2SBD_UNUM(csp[core].cs_unit,
+ SBD_COMP_CMP));
+
+ if (csp[core].cs_speed > psp->ps_speed)
+ psp->ps_speed = csp[core].cs_speed;
+ if (csp[core].cs_ecache > psp->ps_ecache)
+ psp->ps_ecache = csp[core].cs_ecache;
+
+ psp->ps_cpuid[core] = csp[core].cs_cpuid;
+ psp->ps_ncores++;
+
+ /* adjust time if necessary */
+ if (csp[core].cs_time > psp->ps_time) {
+ psp->ps_time = csp[core].cs_time;
+ }
+
+ psp->ps_busy |= csp[core].cs_busy;
+
+ /*
+ * If any of the cores are configured, the
+ * entire CMP is marked as configured.
+ */
+ if (csp[core].cs_ostate == SBD_STAT_CONFIGURED) {
+ psp->ps_ostate = csp[core].cs_ostate;
+ }
+ }
+}
+
+int
+dr_cpu_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
+{
+ int cmp;
+ int core;
+ int ncpu;
+ dr_board_t *bp;
+ sbd_cpu_stat_t *cstat;
+ int impl;
+
+ bp = hp->h_bd;
+ ncpu = 0;
+
+ devset &= DR_DEVS_PRESENT(bp);
+ cstat = kmem_zalloc(sizeof (sbd_cpu_stat_t) * MAX_CORES_PER_CMP,
+ KM_SLEEP);
+
+ /*
+ * Treat every CPU as a CMP. In the case where the
+ * device is not a CMP, treat it as a CMP with only
+ * one core.
+ */
+ for (cmp = 0; cmp < MAX_CMP_UNITS_PER_BOARD; cmp++) {
+ int ncores;
+ dr_cpu_unit_t *cp;
+ drmach_status_t pstat;
+ sbd_error_t *err;
+ sbd_cmp_stat_t *psp;
+
+ if ((devset & DEVSET(SBD_COMP_CMP, cmp)) == 0) {
+ continue;
+ }
+
+ ncores = 0;
+
+ for (core = 0; core < MAX_CORES_PER_CMP; core++) {
+
+ cp = dr_get_cpu_unit(bp, DR_CMP_CORE_UNUM(cmp, core));
+
+ if (cp->sbc_cm.sbdev_state == DR_STATE_EMPTY) {
+ /* present, but not fully initialized */
+ continue;
+ }
+
+ ASSERT(dr_cpu_unit_is_sane(hp->h_bd, cp));
+
+ /* skip if not present */
+ if (cp->sbc_cm.sbdev_id == (drmachid_t)0) {
+ continue;
+ }
+
+ /* fetch platform status */
+ err = drmach_status(cp->sbc_cm.sbdev_id, &pstat);
+ if (err) {
+ DRERR_SET_C(&cp->sbc_cm.sbdev_error, &err);
+ continue;
+ }
+
+ dr_fill_cpu_stat(cp, &pstat, &cstat[ncores++]);
+ /*
+ * We should set impl here because the last core
+ * found might be EMPTY or not present.
+ */
+ impl = cp->sbc_cpu_impl;
+ }
+
+ if (ncores == 0) {
+ continue;
+ }
+
+ /*
+ * Store the data to the outgoing array. If the
+ * device is a CMP, combine all the data for the
+ * cores into a single stat structure.
+ *
+ * The check for a CMP device uses the last core
+ * found, assuming that all cores will have the
+ * same implementation.
+ */
+ if (CPU_IMPL_IS_CMP(impl)) {
+ psp = (sbd_cmp_stat_t *)dsp;
+ dr_fill_cmp_stat(cstat, ncores, impl, psp);
+ } else {
+ ASSERT(ncores == 1);
+ bcopy(cstat, dsp, sizeof (sbd_cpu_stat_t));
+ }
+
+ dsp++;
+ ncpu++;
+ }
+
+ kmem_free(cstat, sizeof (sbd_cpu_stat_t) * MAX_CORES_PER_CMP);
+
+ return (ncpu);
+}
+
+/*
+ * Cancel previous release operation for cpu.
+ * For cpus this means simply bringing cpus that
+ * were offline back online. Note that they had
+ * to have been online at the time there were
+ * released.
+ */
+int
+dr_cancel_cpu(dr_cpu_unit_t *up)
+{
+ int rv = 0;
+ static fn_t f = "dr_cancel_cpu";
+
+ ASSERT(dr_cpu_unit_is_sane(up->sbc_cm.sbdev_bp, up));
+
+ if (cpu_flagged_active(up->sbc_cpu_flags)) {
+ struct cpu *cp;
+
+ /*
+ * CPU had been online, go ahead
+ * bring it back online.
+ */
+ PR_CPU("%s: bringing cpu %d back ONLINE\n", f, up->sbc_cpu_id);
+
+ mutex_enter(&cpu_lock);
+ cp = cpu[up->sbc_cpu_id];
+
+ if (cpu_is_poweredoff(cp)) {
+ if (cpu_poweron(cp)) {
+ cmn_err(CE_WARN, "%s: failed to power-on "
+ "cpu %d", f, up->sbc_cpu_id);
+ rv = -1;
+ }
+ }
+
+ if (rv == 0 && cpu_is_offline(cp)) {
+ if (cpu_online(cp)) {
+ cmn_err(CE_WARN, "%s: failed to online cpu %d",
+ f, up->sbc_cpu_id);
+ rv = -1;
+ }
+ }
+
+ if (rv == 0 && cpu_is_online(cp)) {
+ if (cpu_flagged_nointr(up->sbc_cpu_flags)) {
+ if (cpu_intr_disable(cp) != 0) {
+ cmn_err(CE_WARN, "%s: failed to "
+ "disable interrupts on cpu %d", f,
+ up->sbc_cpu_id);
+ }
+ }
+ }
+
+ mutex_exit(&cpu_lock);
+ }
+
+ return (rv);
+}
+
+int
+dr_disconnect_cpu(dr_cpu_unit_t *up)
+{
+ sbd_error_t *err;
+ static fn_t f = "dr_disconnect_cpu";
+
+ PR_CPU("%s...\n", f);
+
+ ASSERT((up->sbc_cm.sbdev_state == DR_STATE_CONNECTED) ||
+ (up->sbc_cm.sbdev_state == DR_STATE_UNCONFIGURED));
+
+ ASSERT(dr_cpu_unit_is_sane(up->sbc_cm.sbdev_bp, up));
+
+ if (up->sbc_cm.sbdev_state == DR_STATE_CONNECTED) {
+ /*
+ * Cpus were never brought in and so are still
+ * effectively disconnected, so nothing to do here.
+ */
+ PR_CPU("%s: cpu %d never brought in\n", f, up->sbc_cpu_id);
+ return (0);
+ }
+
+ err = drmach_cpu_disconnect(up->sbc_cm.sbdev_id);
+ if (err == NULL)
+ return (0);
+ else {
+ DRERR_SET_C(&up->sbc_cm.sbdev_error, &err);
+ return (-1);
+ }
+ /*NOTREACHED*/
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr_io.c b/usr/src/uts/i86pc/io/dr/dr_io.c
new file mode 100644
index 0000000000..96e0e458cb
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_io.c
@@ -0,0 +1,389 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * I/O support routines for DR
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/dditypes.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/kmem.h>
+#include <sys/promif.h>
+#include <sys/sysmacros.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+#include <sys/drmach.h>
+
+void
+dr_init_io_unit(dr_io_unit_t *ip)
+{
+ dr_state_t new_state;
+
+ if (DR_DEV_IS_ATTACHED(&ip->sbi_cm)) {
+ new_state = DR_STATE_CONFIGURED;
+ ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+ } else if (DR_DEV_IS_PRESENT(&ip->sbi_cm)) {
+ new_state = DR_STATE_CONNECTED;
+ ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+ } else {
+ new_state = DR_STATE_EMPTY;
+ }
+ dr_device_transition(&ip->sbi_cm, new_state);
+}
+
+/*ARGSUSED*/
+void
+dr_attach_io(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ sbd_error_t *err;
+
+ dr_lock_status(hp->h_bd);
+ err = drmach_configure(cp->sbdev_id, 0);
+ dr_unlock_status(hp->h_bd);
+
+ if (!err)
+ err = drmach_io_post_attach(cp->sbdev_id);
+
+ if (err)
+ DRERR_SET_C(&cp->sbdev_error, &err);
+}
+
+/*
+ * remove device nodes for the branch indicated by cp
+ */
+/*ARGSUSED*/
+void
+dr_detach_io(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ sbd_error_t *err;
+
+ err = drmach_unconfigure(cp->sbdev_id, 0);
+
+ if (!err)
+ err = drmach_unconfigure(cp->sbdev_id, DEVI_BRANCH_DESTROY);
+
+ if (!err)
+ err = drmach_io_post_release(cp->sbdev_id);
+
+ if (err) {
+ dr_device_transition(cp, DR_STATE_CONFIGURED);
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ }
+}
+
+/*ARGSUSED*/
+int
+dr_disconnect_io(dr_io_unit_t *ip)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_pre_attach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ int d;
+
+ for (d = 0; d < devnum; d++) {
+ dr_common_unit_t *cp = devlist[d];
+
+ cmn_err(CE_CONT, "OS configure %s", cp->sbdev_path);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_post_attach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ return (0);
+}
+
+static int
+dr_check_io_refs(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ register int i, reftotal = 0;
+ static fn_t f = "dr_check_io_refs";
+
+ for (i = 0; i < devnum; i++) {
+ dr_io_unit_t *ip = (dr_io_unit_t *)devlist[i];
+ dev_info_t *dip;
+ int ref;
+ int refcount_non_gldv3;
+ sbd_error_t *err;
+
+ err = drmach_get_dip(ip->sbi_cm.sbdev_id, &dip);
+ if (err)
+ DRERR_SET_C(&ip->sbi_cm.sbdev_error, &err);
+ else if (dip != NULL) {
+ ref = 0;
+ refcount_non_gldv3 = 0;
+ ASSERT(e_ddi_branch_held(dip));
+ dr_check_devices(dip, &ref, hp, NULL, NULL,
+ 0, &refcount_non_gldv3);
+ ASSERT(refcount_non_gldv3 >= 0);
+ ASSERT(ref >= refcount_non_gldv3);
+ /*
+ * Ignore reference counts of non-gldv3 network devices
+ * as Crossbow creates reference counts for non-active
+ * (unplumbed) instances. Reference count check in
+ * detach() known to prevent device from detaching
+ * as necessary.
+ */
+ ref -= refcount_non_gldv3;
+ hp->h_err = NULL;
+ if (ref) {
+ dr_dev_err(CE_WARN, &ip->sbi_cm, ESBD_BUSY);
+ }
+ PR_IO("%s: dip(%s) ref = %d\n",
+ f, ddi_get_name(dip), ref);
+ reftotal += ref;
+ } else {
+ PR_IO("%s: NO dip for id (0x%x)\n",
+ f, (uint_t)(uintptr_t)ip->sbi_cm.sbdev_id);
+ }
+ }
+
+ return (reftotal);
+}
+
+int
+dr_pre_release_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ static fn_t f = "dr_pre_release_io";
+ int d;
+
+ ASSERT(devnum > 0);
+
+ /* fail if any I/O device pre-release fails */
+ for (d = 0; d < devnum; d++) {
+ dr_io_unit_t *ip = (dr_io_unit_t *)devlist[d];
+
+ if ((hp->h_err = drmach_io_pre_release(
+ ip->sbi_cm.sbdev_id)) != 0) {
+ return (-1);
+ }
+ }
+
+ for (d = 0; d < devnum; d++) {
+ dr_io_unit_t *ip = (dr_io_unit_t *)devlist[d];
+ sbd_error_t *err;
+
+ err = drmach_release(ip->sbi_cm.sbdev_id);
+ if (err) {
+ DRERR_SET_C(&ip->sbi_cm.sbdev_error,
+ &err);
+ return (-1);
+ }
+ }
+
+ /* fail if any I/O devices are still referenced */
+ if (dr_check_io_refs(hp, devlist, devnum) > 0) {
+ PR_IO("%s: failed - I/O devices ref'd\n", f);
+
+ /* recover before return error */
+ for (d = 0; d < devnum; d++) {
+ dr_io_unit_t *ip = (dr_io_unit_t *)devlist[d];
+ sbd_error_t *err;
+ err = drmach_io_unrelease(ip->sbi_cm.sbdev_id);
+ if (err) {
+ DRERR_SET_C(&ip->sbi_cm.sbdev_error, &err);
+ return (-1);
+ }
+ }
+ return (-1);
+ }
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_pre_detach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ int d;
+
+ ASSERT(devnum > 0);
+
+ for (d = 0; d < devnum; d++) {
+ dr_common_unit_t *cp = devlist[d];
+
+ cmn_err(CE_CONT, "OS unconfigure %s", cp->sbdev_path);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_post_detach_io(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ register int i;
+ int rv = 0;
+ static fn_t f = "dr_post_detach_io";
+
+ ASSERT(devnum > 0);
+ for (i = 0; i < devnum; i++) {
+ dr_common_unit_t *cp = devlist[i];
+ if (cp->sbdev_error != NULL) {
+ PR_IO("%s: Failed\n", f);
+ rv = -1;
+ break;
+ }
+ }
+ return (rv);
+}
+
+static void
+dr_get_comp_cond(dr_io_unit_t *ip, dev_info_t *dip)
+{
+ if (dip == NULL) {
+ ip->sbi_cm.sbdev_cond = SBD_COND_UNKNOWN;
+ return;
+ }
+
+ if (DEVI(dip)->devi_flags & DEVI_RETIRED) {
+ ip->sbi_cm.sbdev_cond = SBD_COND_FAILED;
+ return;
+ }
+
+ if (DR_DEV_IS_ATTACHED(&ip->sbi_cm)) {
+ ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+ } else if (DR_DEV_IS_PRESENT(&ip->sbi_cm)) {
+ ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+ }
+}
+
+int
+dr_io_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
+{
+ int i, ix;
+ dr_board_t *bp;
+ sbd_io_stat_t *isp;
+ dr_io_unit_t *ip;
+
+ bp = hp->h_bd;
+
+ /*
+ * Only look for requested devices that are actually present.
+ */
+ devset &= DR_DEVS_PRESENT(bp);
+
+ for (i = ix = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+ drmachid_t id;
+ dev_info_t *dip;
+ sbd_error_t *err;
+ drmach_status_t pstat;
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_IO, i) == 0)
+ continue;
+
+ ip = dr_get_io_unit(bp, i);
+
+ if (ip->sbi_cm.sbdev_state == DR_STATE_EMPTY) {
+ /* present, but not fully initialized */
+ continue;
+ }
+
+ id = ip->sbi_cm.sbdev_id;
+ if (id == (drmachid_t)0)
+ continue;
+
+ err = drmach_status(ip->sbi_cm.sbdev_id, &pstat);
+ if (err) {
+ DRERR_SET_C(&ip->sbi_cm.sbdev_error, &err);
+ return (-1);
+ }
+
+ dip = NULL;
+ err = drmach_get_dip(id, &dip);
+ if (err) {
+ /* catch this in debug kernels */
+ ASSERT(0);
+
+ sbd_err_clear(&err);
+ continue;
+ }
+
+ isp = &dsp->d_io;
+ bzero((caddr_t)isp, sizeof (*isp));
+
+ isp->is_cm.c_id.c_type = ip->sbi_cm.sbdev_type;
+ isp->is_cm.c_id.c_unit = ip->sbi_cm.sbdev_unum;
+ (void) strlcpy(isp->is_cm.c_id.c_name, pstat.type,
+ sizeof (isp->is_cm.c_id.c_name));
+
+ dr_get_comp_cond(ip, dip);
+ isp->is_cm.c_cond = ip->sbi_cm.sbdev_cond;
+ isp->is_cm.c_busy = ip->sbi_cm.sbdev_busy | pstat.busy;
+ isp->is_cm.c_time = ip->sbi_cm.sbdev_time;
+ isp->is_cm.c_ostate = ip->sbi_cm.sbdev_ostate;
+ isp->is_cm.c_sflags = 0;
+
+ if (dip == NULL) {
+ isp->is_pathname[0] = '\0';
+ isp->is_referenced = 0;
+ isp->is_unsafe_count = 0;
+ } else {
+ int refcount = 0, idx = 0;
+ uint64_t unsafe_devs[SBD_MAX_UNSAFE];
+
+ ASSERT(e_ddi_branch_held(dip));
+ (void) ddi_pathname(dip, isp->is_pathname);
+
+ /* check reference and unsafe counts on devices */
+ isp->is_unsafe_count = 0;
+ dr_check_devices(dip, &refcount, hp, unsafe_devs,
+ &idx, SBD_MAX_UNSAFE, NULL);
+ while (idx > 0) {
+ isp->is_unsafe_list[idx-1] = unsafe_devs[idx-1];
+ --idx;
+ }
+
+ isp->is_referenced = (refcount == 0) ? 0 : 1;
+
+ hp->h_err = NULL;
+ }
+ ix++;
+ dsp++;
+ }
+
+ return (ix);
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr_mem_acpi.c b/usr/src/uts/i86pc/io/dr/dr_mem_acpi.c
new file mode 100644
index 0000000000..90ceb84044
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_mem_acpi.c
@@ -0,0 +1,639 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
+ * DR memory support routines.
+ */
+
+#include <sys/note.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>
+#include <sys/kmem.h>
+#include <sys/kobj.h>
+#include <sys/conf.h>
+#include <sys/dditypes.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/sysmacros.h>
+#include <sys/machsystm.h>
+#include <sys/promif.h>
+#include <sys/lgrp.h>
+#include <sys/mem_config.h>
+#include <vm/seg_kmem.h>
+#include <vm/page.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+#include <sys/drmach.h>
+
+extern struct memlist *phys_install;
+
+/* TODO: push this reference below drmach line */
+extern int kcage_on;
+
+/* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */
+static char *dr_ie_fmt = "dr_mem_acpi.c %d";
+
+static void dr_init_mem_unit_data(dr_mem_unit_t *mp);
+
+/*
+ * dr_mem_unit_t.sbm_flags
+ */
+#define DR_MFLAG_RESERVED 0x01 /* mem unit reserved for delete */
+#define DR_MFLAG_SOURCE 0x02 /* source brd of copy/rename op */
+#define DR_MFLAG_TARGET 0x04 /* target brd of copy/rename op */
+#define DR_MFLAG_RELOWNER 0x20 /* memory release (delete) owner */
+#define DR_MFLAG_RELDONE 0x40 /* memory release (delete) done */
+
+/* helper macros */
+#define _ptob64(p) ((uint64_t)(p) << PAGESHIFT)
+#define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
+
+static struct memlist *
+dr_get_memlist(dr_mem_unit_t *mp)
+{
+ struct memlist *mlist = NULL;
+ sbd_error_t *err;
+ static fn_t f = "dr_get_memlist";
+
+ PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
+
+ /*
+ * Return cached memlist, if present.
+ * This memlist will be present following an
+ * unconfigure (a.k.a: detach) of this memunit.
+ * It should only be used in the case were a configure
+ * is bringing this memunit back in without going
+ * through the disconnect and connect states.
+ */
+ if (mp->sbm_mlist) {
+ PR_MEM("%s: found cached memlist\n", f);
+
+ mlist = memlist_dup(mp->sbm_mlist);
+ } else {
+ uint64_t basepa = _ptob64(mp->sbm_basepfn);
+
+ /* attempt to construct a memlist using phys_install */
+
+ /* round down to slice base address */
+ basepa &= ~mp->sbm_alignment_mask;
+
+ /* get a copy of phys_install to edit */
+ memlist_read_lock();
+ mlist = memlist_dup(phys_install);
+ memlist_read_unlock();
+
+ /* trim lower irrelevant span */
+ if (mlist)
+ mlist = memlist_del_span(mlist, 0ull, basepa);
+
+ /* trim upper irrelevant span */
+ if (mlist) {
+ uint64_t endpa, toppa;
+
+ toppa = mp->sbm_slice_top;
+ endpa = _ptob64(physmax + 1);
+ if (endpa > toppa)
+ mlist = memlist_del_span(
+ mlist, toppa,
+ endpa - toppa);
+ }
+
+ if (mlist) {
+ /* successfully built a memlist */
+ PR_MEM("%s: derived memlist from phys_install\n", f);
+ }
+
+ /* if no mlist yet, try platform layer */
+ if (!mlist) {
+ err = drmach_mem_get_memlist(
+ mp->sbm_cm.sbdev_id, &mlist);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ mlist = NULL; /* paranoia */
+ }
+ }
+ }
+
+ PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(mlist);
+
+ return (mlist);
+}
+
+/*ARGSUSED*/
+void
+dr_release_mem(dr_common_unit_t *cp)
+{
+}
+
+void
+dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)cp;
+ struct memlist *ml, *mc;
+ sbd_error_t *err;
+ static fn_t f = "dr_attach_mem";
+ uint64_t dr_physmax;
+
+ PR_MEM("%s...\n", f);
+
+ dr_lock_status(hp->h_bd);
+ err = drmach_configure(cp->sbdev_id, 0);
+ dr_unlock_status(hp->h_bd);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ return;
+ }
+
+ ml = dr_get_memlist(mp);
+
+ /* Skip memory with address above plat_dr_physmax or kpm_size */
+ dr_physmax = plat_dr_physmax ? ptob(plat_dr_physmax) : UINT64_MAX;
+ if (kpm_size < dr_physmax)
+ dr_physmax = kpm_size;
+ ml = memlist_del_span(ml, dr_physmax, UINT64_MAX - dr_physmax);
+
+ for (mc = ml; mc; mc = mc->ml_next) {
+ int rv;
+ sbd_error_t *err;
+
+ rv = kphysm_add_memory_dynamic(
+ (pfn_t)btop(mc->ml_address),
+ (pgcnt_t)btop(mc->ml_size));
+ if (rv != KPHYSM_OK) {
+ /*
+ * translate kphysm error and
+ * store in devlist error
+ */
+ switch (rv) {
+ case KPHYSM_ERESOURCE:
+ rv = ESBD_NOMEM;
+ break;
+
+ case KPHYSM_EFAULT:
+ rv = ESBD_FAULT;
+ break;
+
+ default:
+ rv = ESBD_INTERNAL;
+ break;
+ }
+
+ if (rv == ESBD_INTERNAL) {
+ DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
+ } else
+ dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
+ break;
+ }
+
+ err = drmach_mem_add_span(
+ mp->sbm_cm.sbdev_id, mc->ml_address, mc->ml_size);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ break;
+ }
+ }
+
+ memlist_delete(ml);
+ dr_init_mem_unit_data(mp);
+
+ /* back out if configure failed */
+ if (mp->sbm_cm.sbdev_error != NULL) {
+ dr_lock_status(hp->h_bd);
+ err = drmach_unconfigure(cp->sbdev_id, 0);
+ if (err)
+ sbd_err_clear(&err);
+ dr_unlock_status(hp->h_bd);
+ }
+}
+
+/*ARGSUSED*/
+void
+dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+}
+
+/*
+ * This routine acts as a wrapper for kphysm_del_span_query in order to
+ * support potential memory holes in a board's physical address space.
+ * It calls kphysm_del_span_query for each node in a memlist and accumulates
+ * the results in *mp.
+ */
+static int
+dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
+{
+ int rv = 0;
+
+ if (mlist == NULL)
+ cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
+
+ mp->phys_pages = 0;
+ mp->managed = 0;
+ mp->nonrelocatable = 0;
+ mp->first_nonrelocatable = 0;
+ mp->last_nonrelocatable = 0;
+
+ return (rv);
+}
+
+/*
+ * NOTE: This routine is only partially smart about multiple
+ * mem-units. Need to make mem-status structure smart
+ * about them also.
+ */
+int
+dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
+{
+ int m, mix;
+ memquery_t mq;
+ dr_board_t *bp;
+ dr_mem_unit_t *mp;
+ sbd_mem_stat_t *msp;
+ static fn_t f = "dr_mem_status";
+
+ bp = hp->h_bd;
+ devset &= DR_DEVS_PRESENT(bp);
+
+ for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
+ int rv;
+ sbd_error_t *err;
+ drmach_status_t pstat;
+ dr_mem_unit_t *p_mp;
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
+ continue;
+
+ mp = dr_get_mem_unit(bp, m);
+
+ if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
+ /* present, but not fully initialized */
+ continue;
+ }
+
+ if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
+ continue;
+
+ /* fetch platform status */
+ err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ continue;
+ }
+
+ msp = &dsp->d_mem;
+ bzero((caddr_t)msp, sizeof (*msp));
+
+ (void) strlcpy(msp->ms_cm.c_id.c_name, pstat.type,
+ sizeof (msp->ms_cm.c_id.c_name));
+ msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
+ msp->ms_cm.c_id.c_unit = mp->sbm_cm.sbdev_unum;
+ msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
+ msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
+ msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
+ msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
+
+ msp->ms_totpages = mp->sbm_npages;
+ msp->ms_basepfn = mp->sbm_basepfn;
+ msp->ms_pageslost = mp->sbm_pageslost;
+ msp->ms_cage_enabled = kcage_on;
+
+ if (mp->sbm_flags & DR_MFLAG_RESERVED)
+ p_mp = mp->sbm_peer;
+ else
+ p_mp = NULL;
+
+ if (p_mp == NULL) {
+ msp->ms_peer_is_target = 0;
+ msp->ms_peer_ap_id[0] = '\0';
+ } else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ char *minor;
+
+ /*
+ * b_dip doesn't have to be held for ddi_pathname()
+ * because the board struct (dr_board_t) will be
+ * destroyed before b_dip detaches.
+ */
+ (void) ddi_pathname(bp->b_dip, path);
+ minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
+
+ (void) snprintf(msp->ms_peer_ap_id,
+ sizeof (msp->ms_peer_ap_id), "%s%s",
+ path, (minor == NULL) ? "" : minor);
+
+ kmem_free(path, MAXPATHLEN);
+
+ if (p_mp->sbm_flags & DR_MFLAG_TARGET)
+ msp->ms_peer_is_target = 1;
+ }
+
+ /*
+ * kphysm_del_span_query can report non-reloc pages = total
+ * pages for memory that is not yet configured
+ */
+ if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
+ struct memlist *ml;
+
+ ml = dr_get_memlist(mp);
+ rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
+ memlist_delete(ml);
+
+ if (rv == KPHYSM_OK) {
+ msp->ms_managed_pages = mq.managed;
+ msp->ms_noreloc_pages = mq.nonrelocatable;
+ msp->ms_noreloc_first =
+ mq.first_nonrelocatable;
+ msp->ms_noreloc_last =
+ mq.last_nonrelocatable;
+ msp->ms_cm.c_sflags = 0;
+ if (mq.nonrelocatable &&
+ drmach_copy_rename_need_suspend(
+ mp->sbm_cm.sbdev_id)) {
+ SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
+ msp->ms_cm.c_sflags);
+ }
+ } else {
+ PR_MEM("%s: kphysm_del_span_query() = %d\n",
+ f, rv);
+ }
+ }
+
+ /*
+ * Check source unit state during copy-rename
+ */
+ if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
+ (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
+ mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
+ msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
+
+ mix++;
+ dsp++;
+ }
+
+ return (mix);
+}
+
+/*ARGSUSED*/
+int
+dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ int err_flag = 0;
+ int d;
+ sbd_error_t *err;
+ static fn_t f = "dr_pre_attach_mem";
+
+ PR_MEM("%s...\n", f);
+
+ for (d = 0; d < devnum; d++) {
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
+ dr_state_t state;
+
+ cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
+
+ state = mp->sbm_cm.sbdev_state;
+ switch (state) {
+ case DR_STATE_UNCONFIGURED:
+ PR_MEM("%s: recovering from UNCONFIG for %s\n",
+ f, mp->sbm_cm.sbdev_path);
+
+ /* use memlist cached by dr_post_detach_mem_unit */
+ ASSERT(mp->sbm_mlist != NULL);
+ PR_MEM("%s: re-configuring cached memlist for %s:\n",
+ f, mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(mp->sbm_mlist);
+
+ /* kphysm del handle should be have been freed */
+ ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
+
+ /*FALLTHROUGH*/
+
+ case DR_STATE_CONNECTED:
+ PR_MEM("%s: reprogramming mem hardware on %s\n",
+ f, mp->sbm_cm.sbdev_bp->b_path);
+
+ PR_MEM("%s: enabling %s\n",
+ f, mp->sbm_cm.sbdev_path);
+
+ err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ err_flag = 1;
+ }
+ break;
+
+ default:
+ dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
+ err_flag = 1;
+ break;
+ }
+
+ /* exit for loop if error encountered */
+ if (err_flag)
+ break;
+ }
+
+ return (err_flag ? -1 : 0);
+}
+
+/*ARGSUSED*/
+int
+dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ int d;
+ static fn_t f = "dr_post_attach_mem";
+
+ PR_MEM("%s...\n", f);
+
+ for (d = 0; d < devnum; d++) {
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
+ struct memlist *mlist, *ml;
+
+ mlist = dr_get_memlist(mp);
+
+ /*
+ * Verify the memory really did successfully attach
+ * by checking for its existence in phys_install.
+ */
+ memlist_read_lock();
+ if (memlist_intersect(phys_install, mlist) == 0) {
+ memlist_read_unlock();
+
+ DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
+
+ PR_MEM("%s: %s memlist not in phys_install",
+ f, mp->sbm_cm.sbdev_path);
+
+ memlist_delete(mlist);
+ continue;
+ }
+ memlist_read_unlock();
+
+ for (ml = mlist; ml != NULL; ml = ml->ml_next) {
+ sbd_error_t *err;
+
+ err = drmach_mem_add_span(
+ mp->sbm_cm.sbdev_id,
+ ml->ml_address,
+ ml->ml_size);
+ if (err)
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ }
+
+ memlist_delete(mlist);
+
+ /*
+ * Destroy cached memlist, if any.
+ * There will be a cached memlist in sbm_mlist if
+ * this board is being configured directly after
+ * an unconfigure.
+ * To support this transition, dr_post_detach_mem
+ * left a copy of the last known memlist in sbm_mlist.
+ * This memlist could differ from any derived from
+ * hardware if while this memunit was last configured
+ * the system detected and deleted bad pages from
+ * phys_install. The location of those bad pages
+ * will be reflected in the cached memlist.
+ */
+ if (mp->sbm_mlist) {
+ memlist_delete(mp->sbm_mlist);
+ mp->sbm_mlist = NULL;
+ }
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ return (-1);
+}
+
+/*ARGSUSED*/
+int
+dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ return (-1);
+}
+
+/*
+ * Successful return from this function will have the memory
+ * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
+ * and waiting. This routine's job is to select the memory that
+ * actually has to be released (detached) which may not necessarily
+ * be the same memory node that came in in devlist[],
+ * i.e. a copy-rename is needed.
+ */
+/*ARGSUSED*/
+int
+dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ return (-1);
+}
+
+/*ARGSUSED*/
+void
+dr_release_mem_done(dr_common_unit_t *cp)
+{
+}
+
+/*ARGSUSED*/
+int
+dr_disconnect_mem(dr_mem_unit_t *mp)
+{
+ return (-1);
+}
+
+/*ARGSUSED*/
+int
+dr_cancel_mem(dr_mem_unit_t *s_mp)
+{
+ return (-1);
+}
+
+void
+dr_init_mem_unit(dr_mem_unit_t *mp)
+{
+ dr_state_t new_state;
+
+ if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
+ new_state = DR_STATE_CONFIGURED;
+ mp->sbm_cm.sbdev_cond = SBD_COND_OK;
+ } else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
+ new_state = DR_STATE_CONNECTED;
+ mp->sbm_cm.sbdev_cond = SBD_COND_OK;
+ } else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
+ new_state = DR_STATE_OCCUPIED;
+ } else {
+ new_state = DR_STATE_EMPTY;
+ }
+
+ if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
+ dr_init_mem_unit_data(mp);
+
+ /* delay transition until fully initialized */
+ dr_device_transition(&mp->sbm_cm, new_state);
+}
+
+static void
+dr_init_mem_unit_data(dr_mem_unit_t *mp)
+{
+ drmachid_t id = mp->sbm_cm.sbdev_id;
+ drmach_mem_info_t minfo;
+ sbd_error_t *err;
+ static fn_t f = "dr_init_mem_unit_data";
+
+ PR_MEM("%s...\n", f);
+
+ /* a little sanity checking */
+ ASSERT(mp->sbm_peer == NULL);
+ ASSERT(mp->sbm_flags == 0);
+
+ if (err = drmach_mem_get_info(id, &minfo)) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ return;
+ }
+ mp->sbm_basepfn = _b64top(minfo.mi_basepa);
+ mp->sbm_npages = _b64top(minfo.mi_size);
+ mp->sbm_alignment_mask = minfo.mi_alignment_mask;
+ mp->sbm_slice_base = minfo.mi_slice_base;
+ mp->sbm_slice_top = minfo.mi_slice_top;
+ mp->sbm_slice_size = minfo.mi_slice_size;
+
+ PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
+ f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr_quiesce.c b/usr/src/uts/i86pc/io/dr/dr_quiesce.c
new file mode 100644
index 0000000000..f3467f6eb1
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_quiesce.c
@@ -0,0 +1,1050 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * A CPR derivative specifically for starfire/starcat
+ * X86 doesn't make use of the quiesce interfaces, it's kept for simplicity.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/machparam.h>
+#include <sys/machsystm.h>
+#include <sys/ddi.h>
+#define SUNDDI_IMPL
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/devctl.h>
+#include <sys/time.h>
+#include <sys/kmem.h>
+#include <nfs/lm.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/obpdefs.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/callb.h>
+#include <sys/clock.h>
+#include <sys/x_call.h>
+#include <sys/cpuvar.h>
+#include <sys/epm.h>
+#include <sys/vfs.h>
+#include <sys/promif.h>
+#include <sys/conf.h>
+#include <sys/cyclic.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+
+extern void e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
+extern void e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
+extern int is_pseudo_device(dev_info_t *dip);
+
+extern kmutex_t cpu_lock;
+extern dr_unsafe_devs_t dr_unsafe_devs;
+
+static int dr_is_real_device(dev_info_t *dip);
+static int dr_is_unsafe_major(major_t major);
+static int dr_bypass_device(char *dname);
+static int dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
+static int dr_resolve_devname(dev_info_t *dip, char *buffer,
+ char *alias);
+static sbd_error_t *drerr_int(int e_code, uint64_t *arr, int idx,
+ int majors);
+static int dr_add_int(uint64_t *arr, int idx, int len,
+ uint64_t val);
+
+int dr_pt_test_suspend(dr_handle_t *hp);
+
+/*
+ * dr_quiesce.c interface
+ * NOTE: states used internally by dr_suspend and dr_resume
+ */
+typedef enum dr_suspend_state {
+ DR_SRSTATE_BEGIN = 0,
+ DR_SRSTATE_USER,
+ DR_SRSTATE_DRIVER,
+ DR_SRSTATE_FULL
+} suspend_state_t;
+
+struct dr_sr_handle {
+ dr_handle_t *sr_dr_handlep;
+ dev_info_t *sr_failed_dip;
+ suspend_state_t sr_suspend_state;
+ uint_t sr_flags;
+ uint64_t sr_err_ints[DR_MAX_ERR_INT];
+ int sr_err_idx;
+};
+
+#define SR_FLAG_WATCHDOG 0x1
+
+/*
+ * XXX
+ * This hack will go away before RTI. Just for testing.
+ * List of drivers to bypass when performing a suspend.
+ */
+static char *dr_bypass_list[] = {
+ ""
+};
+
+
+#define SKIP_SYNC /* bypass sync ops in dr_suspend */
+
+/*
+ * dr_skip_user_threads is used to control if user threads should
+ * be suspended. If dr_skip_user_threads is true, the rest of the
+ * flags are not used; if it is false, dr_check_user_stop_result
+ * will be used to control whether or not we need to check suspend
+ * result, and dr_allow_blocked_threads will be used to control
+ * whether or not we allow suspend to continue if there are blocked
+ * threads. We allow all combinations of dr_check_user_stop_result
+ * and dr_allow_block_threads, even though it might not make much
+ * sense to not allow block threads when we don't even check stop
+ * result.
+ */
+static int dr_skip_user_threads = 0; /* default to FALSE */
+static int dr_check_user_stop_result = 1; /* default to TRUE */
+static int dr_allow_blocked_threads = 1; /* default to TRUE */
+
+#define DR_CPU_LOOP_MSEC 1000
+
+static void
+dr_stop_intr(void)
+{
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ kpreempt_disable();
+ cyclic_suspend();
+}
+
+static void
+dr_enable_intr(void)
+{
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ cyclic_resume();
+ kpreempt_enable();
+}
+
+dr_sr_handle_t *
+dr_get_sr_handle(dr_handle_t *hp)
+{
+ dr_sr_handle_t *srh;
+
+ srh = GETSTRUCT(dr_sr_handle_t, 1);
+ srh->sr_dr_handlep = hp;
+
+ return (srh);
+}
+
+void
+dr_release_sr_handle(dr_sr_handle_t *srh)
+{
+ ASSERT(srh->sr_failed_dip == NULL);
+ FREESTRUCT(srh, dr_sr_handle_t, 1);
+}
+
+static int
+dr_is_real_device(dev_info_t *dip)
+{
+ struct regspec *regbuf = NULL;
+ int length = 0;
+ int rc;
+
+ if (ddi_get_driver(dip) == NULL)
+ return (0);
+
+ if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
+ return (1);
+ if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
+ return (0);
+
+ /*
+ * now the general case
+ */
+ rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
+ (caddr_t)&regbuf, &length);
+ ASSERT(rc != DDI_PROP_NO_MEMORY);
+ if (rc != DDI_PROP_SUCCESS) {
+ return (0);
+ } else {
+ if ((length > 0) && (regbuf != NULL))
+ kmem_free(regbuf, length);
+ return (1);
+ }
+}
+
+static int
+dr_is_unsafe_major(major_t major)
+{
+ char *dname, **cpp;
+ int i, ndevs;
+
+ if ((dname = ddi_major_to_name(major)) == NULL) {
+ PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
+ return (0);
+ }
+
+ ndevs = dr_unsafe_devs.ndevs;
+ for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
+ if (strcmp(dname, *cpp++) == 0)
+ return (1);
+ }
+ return (0);
+}
+
+static int
+dr_bypass_device(char *dname)
+{
+ int i;
+ char **lname;
+ /* check the bypass list */
+ for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
+ if (strcmp(dname, dr_bypass_list[i++]) == 0)
+ return (1);
+ }
+ return (0);
+}
+
+static int
+dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
+{
+ major_t devmajor;
+ char *aka, *name;
+
+ *buffer = *alias = 0;
+
+ if (dip == NULL)
+ return (-1);
+
+ if ((name = ddi_get_name(dip)) == NULL)
+ name = "<null name>";
+
+ aka = name;
+
+ if ((devmajor = ddi_name_to_major(aka)) != DDI_MAJOR_T_NONE)
+ aka = ddi_major_to_name(devmajor);
+
+ (void) strcpy(buffer, name);
+
+ if (strcmp(name, aka))
+ (void) strcpy(alias, aka);
+ else
+ *alias = 0;
+
+ return (0);
+}
+
+struct dr_ref {
+ int *refcount;
+ int *refcount_non_gldv3;
+ uint64_t *arr;
+ int *idx;
+ int len;
+};
+
+/* ARGSUSED */
+static int
+dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
+{
+ major_t major;
+ char *dname;
+ struct dr_ref *rp = (struct dr_ref *)arg;
+
+ if (dip == NULL)
+ return (DDI_WALK_CONTINUE);
+
+ if (!dr_is_real_device(dip))
+ return (DDI_WALK_CONTINUE);
+
+ dname = ddi_binding_name(dip);
+
+ if (dr_bypass_device(dname))
+ return (DDI_WALK_CONTINUE);
+
+ if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
+ if (ref && rp->refcount) {
+ *rp->refcount += ref;
+ PR_QR("\n %s (major# %d) is referenced(%u)\n", dname,
+ major, ref);
+ }
+ if (ref && rp->refcount_non_gldv3) {
+ if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
+ *rp->refcount_non_gldv3 += ref;
+ }
+ if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
+ PR_QR("\n %s (major# %d) not hotpluggable\n", dname,
+ major);
+ if (rp->arr != NULL && rp->idx != NULL)
+ *rp->idx = dr_add_int(rp->arr, *rp->idx,
+ rp->len, (uint64_t)major);
+ }
+ }
+ return (DDI_WALK_CONTINUE);
+}
+
+static int
+dr_check_unsafe_major(dev_info_t *dip, void *arg)
+{
+ return (dr_check_dip(dip, arg, 0));
+}
+
+
+/*ARGSUSED*/
+void
+dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
+ uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
+{
+ struct dr_ref bref = {0};
+
+ if (dip == NULL)
+ return;
+
+ bref.refcount = refcount;
+ bref.refcount_non_gldv3 = refcount_non_gldv3;
+ bref.arr = arr;
+ bref.idx = idx;
+ bref.len = len;
+
+ ASSERT(e_ddi_branch_held(dip));
+ (void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
+}
+
+/*
+ * The "dip" argument's parent (if it exists) must be held busy.
+ */
+static int
+dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
+{
+ dr_handle_t *handle;
+ major_t major;
+ char *dname;
+ int circ;
+
+ /*
+ * If dip is the root node, it has no siblings and it is
+ * always held. If dip is not the root node, dr_suspend_devices()
+ * will be invoked with the parent held busy.
+ */
+ for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
+ char d_name[40], d_alias[40], *d_info;
+
+ ndi_devi_enter(dip, &circ);
+ if (dr_suspend_devices(ddi_get_child(dip), srh)) {
+ ndi_devi_exit(dip, circ);
+ return (ENXIO);
+ }
+ ndi_devi_exit(dip, circ);
+
+ if (!dr_is_real_device(dip))
+ continue;
+
+ major = (major_t)-1;
+ if ((dname = ddi_binding_name(dip)) != NULL)
+ major = ddi_name_to_major(dname);
+
+ if (dr_bypass_device(dname)) {
+ PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
+ major);
+ continue;
+ }
+
+ if (drmach_verify_sr(dip, 1)) {
+ PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
+ major);
+ continue;
+ }
+
+ if ((d_info = ddi_get_name_addr(dip)) == NULL)
+ d_info = "<null>";
+
+ d_name[0] = 0;
+ if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
+ if (d_alias[0] != 0) {
+ prom_printf("\tsuspending %s@%s (aka %s)\n",
+ d_name, d_info, d_alias);
+ } else {
+ prom_printf("\tsuspending %s@%s\n", d_name,
+ d_info);
+ }
+ } else {
+ prom_printf("\tsuspending %s@%s\n", dname, d_info);
+ }
+
+ if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
+ prom_printf("\tFAILED to suspend %s@%s\n",
+ d_name[0] ? d_name : dname, d_info);
+
+ srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
+ srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
+
+ ndi_hold_devi(dip);
+ srh->sr_failed_dip = dip;
+
+ handle = srh->sr_dr_handlep;
+ dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
+ d_name[0] ? d_name : dname, d_info);
+
+ return (DDI_FAILURE);
+ }
+ }
+
+ return (DDI_SUCCESS);
+}
+
+static void
+dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
+{
+ dr_handle_t *handle;
+ dev_info_t *dip, *next, *last = NULL;
+ major_t major;
+ char *bn;
+ int circ;
+
+ major = (major_t)-1;
+
+ /* attach in reverse device tree order */
+ while (last != start) {
+ dip = start;
+ next = ddi_get_next_sibling(dip);
+ while (next != last && dip != srh->sr_failed_dip) {
+ dip = next;
+ next = ddi_get_next_sibling(dip);
+ }
+ if (dip == srh->sr_failed_dip) {
+ /* release hold acquired in dr_suspend_devices() */
+ srh->sr_failed_dip = NULL;
+ ndi_rele_devi(dip);
+ } else if (dr_is_real_device(dip) &&
+ srh->sr_failed_dip == NULL) {
+
+ if ((bn = ddi_binding_name(dip)) != NULL) {
+ major = ddi_name_to_major(bn);
+ } else {
+ bn = "<null>";
+ }
+ if (!dr_bypass_device(bn) &&
+ !drmach_verify_sr(dip, 0)) {
+ char d_name[40], d_alias[40], *d_info;
+
+ d_name[0] = 0;
+ d_info = ddi_get_name_addr(dip);
+ if (d_info == NULL)
+ d_info = "<null>";
+
+ if (!dr_resolve_devname(dip, d_name, d_alias)) {
+ if (d_alias[0] != 0) {
+ prom_printf("\tresuming "
+ "%s@%s (aka %s)\n", d_name,
+ d_info, d_alias);
+ } else {
+ prom_printf("\tresuming "
+ "%s@%s\n", d_name, d_info);
+ }
+ } else {
+ prom_printf("\tresuming %s@%s\n", bn,
+ d_info);
+ }
+
+ if (devi_attach(dip, DDI_RESUME) !=
+ DDI_SUCCESS) {
+ /*
+ * Print a console warning,
+ * set an e_code of ESBD_RESUME,
+ * and save the driver major
+ * number in the e_rsc.
+ */
+ prom_printf("\tFAILED to resume %s@%s",
+ d_name[0] ? d_name : bn, d_info);
+
+ srh->sr_err_idx =
+ dr_add_int(srh->sr_err_ints,
+ srh->sr_err_idx, DR_MAX_ERR_INT,
+ (uint64_t)major);
+
+ handle = srh->sr_dr_handlep;
+
+ dr_op_err(CE_IGNORE, handle,
+ ESBD_RESUME, "%s@%s",
+ d_name[0] ? d_name : bn, d_info);
+ }
+ }
+ }
+
+ /* Hold parent busy while walking its children */
+ ndi_devi_enter(dip, &circ);
+ dr_resume_devices(ddi_get_child(dip), srh);
+ ndi_devi_exit(dip, circ);
+ last = dip;
+ }
+}
+
+/*
+ * True if thread is virtually stopped. Similar to CPR_VSTOPPED
+ * but from DR point of view. These user threads are waiting in
+ * the kernel. Once they complete in the kernel, they will process
+ * the stop signal and stop.
+ */
+#define DR_VSTOPPED(t) \
+ ((t)->t_state == TS_SLEEP && \
+ (t)->t_wchan != NULL && \
+ (t)->t_astflag && \
+ ((t)->t_proc_flag & TP_CHKPT))
+
+/* ARGSUSED */
+static int
+dr_stop_user_threads(dr_sr_handle_t *srh)
+{
+ int count;
+ int bailout;
+ dr_handle_t *handle = srh->sr_dr_handlep;
+ static fn_t f = "dr_stop_user_threads";
+ kthread_id_t tp;
+
+ extern void add_one_utstop();
+ extern void utstop_timedwait(clock_t);
+ extern void utstop_init(void);
+
+#define DR_UTSTOP_RETRY 4
+#define DR_UTSTOP_WAIT hz
+
+ if (dr_skip_user_threads)
+ return (DDI_SUCCESS);
+
+ utstop_init();
+
+ /* we need to try a few times to get past fork, etc. */
+ srh->sr_err_idx = 0;
+ for (count = 0; count < DR_UTSTOP_RETRY; count++) {
+ /* walk the entire threadlist */
+ mutex_enter(&pidlock);
+ for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
+ proc_t *p = ttoproc(tp);
+
+ /* handle kernel threads separately */
+ if (p->p_as == &kas || p->p_stat == SZOMB)
+ continue;
+
+ mutex_enter(&p->p_lock);
+ thread_lock(tp);
+
+ if (tp->t_state == TS_STOPPED) {
+ /* add another reason to stop this thread */
+ tp->t_schedflag &= ~TS_RESUME;
+ } else {
+ tp->t_proc_flag |= TP_CHKPT;
+
+ thread_unlock(tp);
+ mutex_exit(&p->p_lock);
+ add_one_utstop();
+ mutex_enter(&p->p_lock);
+ thread_lock(tp);
+
+ aston(tp);
+
+ if (ISWAKEABLE(tp) || ISWAITING(tp)) {
+ setrun_locked(tp);
+ }
+
+ }
+
+ /* grab thread if needed */
+ if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
+ poke_cpu(tp->t_cpu->cpu_id);
+
+
+ thread_unlock(tp);
+ mutex_exit(&p->p_lock);
+ }
+ mutex_exit(&pidlock);
+
+
+ /* let everything catch up */
+ utstop_timedwait(count * count * DR_UTSTOP_WAIT);
+
+
+ /* now, walk the threadlist again to see if we are done */
+ mutex_enter(&pidlock);
+ for (tp = curthread->t_next, bailout = 0;
+ tp != curthread; tp = tp->t_next) {
+ proc_t *p = ttoproc(tp);
+
+ /* handle kernel threads separately */
+ if (p->p_as == &kas || p->p_stat == SZOMB)
+ continue;
+
+ /*
+ * If this thread didn't stop, and we don't allow
+ * unstopped blocked threads, bail.
+ */
+ thread_lock(tp);
+ if (!CPR_ISTOPPED(tp) &&
+ !(dr_allow_blocked_threads &&
+ DR_VSTOPPED(tp))) {
+ bailout = 1;
+ if (count == DR_UTSTOP_RETRY - 1) {
+ /*
+ * save the pid for later reporting
+ */
+ srh->sr_err_idx =
+ dr_add_int(srh->sr_err_ints,
+ srh->sr_err_idx, DR_MAX_ERR_INT,
+ (uint64_t)p->p_pid);
+
+ cmn_err(CE_WARN, "%s: "
+ "failed to stop thread: "
+ "process=%s, pid=%d",
+ f, p->p_user.u_psargs, p->p_pid);
+
+ PR_QR("%s: failed to stop thread: "
+ "process=%s, pid=%d, t_id=0x%p, "
+ "t_state=0x%x, t_proc_flag=0x%x, "
+ "t_schedflag=0x%x\n",
+ f, p->p_user.u_psargs, p->p_pid,
+ (void *)tp, tp->t_state,
+ tp->t_proc_flag, tp->t_schedflag);
+ }
+
+ }
+ thread_unlock(tp);
+ }
+ mutex_exit(&pidlock);
+
+ /* were all the threads stopped? */
+ if (!bailout)
+ break;
+ }
+
+ /* were we unable to stop all threads after a few tries? */
+ if (bailout) {
+ handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
+ srh->sr_err_idx, 0);
+ return (ESRCH);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+static void
+dr_start_user_threads(void)
+{
+ kthread_id_t tp;
+
+ mutex_enter(&pidlock);
+
+ /* walk all threads and release them */
+ for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
+ proc_t *p = ttoproc(tp);
+
+ /* skip kernel threads */
+ if (ttoproc(tp)->p_as == &kas)
+ continue;
+
+ mutex_enter(&p->p_lock);
+ tp->t_proc_flag &= ~TP_CHKPT;
+ mutex_exit(&p->p_lock);
+
+ thread_lock(tp);
+ if (CPR_ISTOPPED(tp)) {
+ /* back on the runq */
+ tp->t_schedflag |= TS_RESUME;
+ setrun_locked(tp);
+ }
+ thread_unlock(tp);
+ }
+
+ mutex_exit(&pidlock);
+}
+
+static void
+dr_signal_user(int sig)
+{
+ struct proc *p;
+
+ mutex_enter(&pidlock);
+
+ for (p = practive; p != NULL; p = p->p_next) {
+ /* only user threads */
+ if (p->p_exec == NULL || p->p_stat == SZOMB ||
+ p == proc_init || p == ttoproc(curthread))
+ continue;
+
+ mutex_enter(&p->p_lock);
+ sigtoproc(p, NULL, sig);
+ mutex_exit(&p->p_lock);
+ }
+
+ mutex_exit(&pidlock);
+
+ /* add a bit of delay */
+ delay(hz);
+}
+
+void
+dr_resume(dr_sr_handle_t *srh)
+{
+ dr_handle_t *handle;
+
+ handle = srh->sr_dr_handlep;
+
+ switch (srh->sr_suspend_state) {
+ case DR_SRSTATE_FULL:
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Prevent false alarm in tod_validate() due to tod
+ * value change between suspend and resume
+ */
+ mutex_enter(&tod_lock);
+ tod_status_set(TOD_DR_RESUME_DONE);
+ mutex_exit(&tod_lock);
+
+ dr_enable_intr(); /* enable intr & clock */
+
+ start_cpus();
+ mutex_exit(&cpu_lock);
+
+ /*
+ * This should only be called if drmach_suspend_last()
+ * was called and state transitioned to DR_SRSTATE_FULL
+ * to prevent resume attempts on device instances that
+ * were not previously suspended.
+ */
+ drmach_resume_first();
+
+ /* FALLTHROUGH */
+
+ case DR_SRSTATE_DRIVER:
+ /*
+ * resume drivers
+ */
+ srh->sr_err_idx = 0;
+
+ /* no parent dip to hold busy */
+ dr_resume_devices(ddi_root_node(), srh);
+
+ if (srh->sr_err_idx && srh->sr_dr_handlep) {
+ (srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
+ srh->sr_err_ints, srh->sr_err_idx, 1);
+ }
+
+ /*
+ * resume the lock manager
+ */
+ lm_cprresume();
+
+ /* FALLTHROUGH */
+
+ case DR_SRSTATE_USER:
+ /*
+ * finally, resume user threads
+ */
+ if (!dr_skip_user_threads) {
+ prom_printf("DR: resuming user threads...\n");
+ dr_start_user_threads();
+ }
+ /* FALLTHROUGH */
+
+ case DR_SRSTATE_BEGIN:
+ default:
+ /*
+ * let those who care know that we've just resumed
+ */
+ PR_QR("sending SIGTHAW...\n");
+ dr_signal_user(SIGTHAW);
+ break;
+ }
+
+ i_ndi_allow_device_tree_changes(handle->h_ndi);
+
+ prom_printf("DR: resume COMPLETED\n");
+}
+
+int
+dr_suspend(dr_sr_handle_t *srh)
+{
+ dr_handle_t *handle;
+ int force;
+ int dev_errs_idx;
+ uint64_t dev_errs[DR_MAX_ERR_INT];
+ int rc = DDI_SUCCESS;
+
+ handle = srh->sr_dr_handlep;
+
+ force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
+
+ i_ndi_block_device_tree_changes(&handle->h_ndi);
+
+ prom_printf("\nDR: suspending user threads...\n");
+ srh->sr_suspend_state = DR_SRSTATE_USER;
+ if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
+ dr_check_user_stop_result) {
+ dr_resume(srh);
+ return (rc);
+ }
+
+ if (!force) {
+ struct dr_ref drc = {0};
+
+ prom_printf("\nDR: checking devices...\n");
+ dev_errs_idx = 0;
+
+ drc.arr = dev_errs;
+ drc.idx = &dev_errs_idx;
+ drc.len = DR_MAX_ERR_INT;
+
+ /*
+ * Since the root node can never go away, it
+ * doesn't have to be held.
+ */
+ ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
+ if (dev_errs_idx) {
+ handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
+ dev_errs_idx, 1);
+ dr_resume(srh);
+ return (DDI_FAILURE);
+ }
+ PR_QR("done\n");
+ } else {
+ prom_printf("\nDR: dr_suspend invoked with force flag\n");
+ }
+
+#ifndef SKIP_SYNC
+ /*
+ * This sync swap out all user pages
+ */
+ vfs_sync(SYNC_ALL);
+#endif
+
+ /*
+ * special treatment for lock manager
+ */
+ lm_cprsuspend();
+
+#ifndef SKIP_SYNC
+ /*
+ * sync the file system in case we never make it back
+ */
+ sync();
+#endif
+
+ /*
+ * now suspend drivers
+ */
+ prom_printf("DR: suspending drivers...\n");
+ srh->sr_suspend_state = DR_SRSTATE_DRIVER;
+ srh->sr_err_idx = 0;
+ /* No parent to hold busy */
+ if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
+ if (srh->sr_err_idx && srh->sr_dr_handlep) {
+ (srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
+ srh->sr_err_ints, srh->sr_err_idx, 1);
+ }
+ dr_resume(srh);
+ return (rc);
+ }
+
+ drmach_suspend_last();
+
+ /*
+ * finally, grab all cpus
+ */
+ srh->sr_suspend_state = DR_SRSTATE_FULL;
+
+ mutex_enter(&cpu_lock);
+ pause_cpus(NULL);
+ dr_stop_intr();
+
+ return (rc);
+}
+
+int
+dr_pt_test_suspend(dr_handle_t *hp)
+{
+ dr_sr_handle_t *srh;
+ int err;
+ uint_t psmerr;
+ static fn_t f = "dr_pt_test_suspend";
+
+ PR_QR("%s...\n", f);
+
+ srh = dr_get_sr_handle(hp);
+ if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
+ dr_resume(srh);
+ if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
+ PR_QR("%s: error on dr_resume()", f);
+ switch (psmerr) {
+ case ESBD_RESUME:
+ PR_QR("Couldn't resume devices: %s\n",
+ DR_GET_E_RSC(hp->h_err));
+ break;
+
+ case ESBD_KTHREAD:
+ PR_ALL("psmerr is ESBD_KTHREAD\n");
+ break;
+ default:
+ PR_ALL("Resume error unknown = %d\n", psmerr);
+ break;
+ }
+ }
+ } else {
+ PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
+ psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
+ switch (psmerr) {
+ case ESBD_UNSAFE:
+ PR_ALL("Unsafe devices (major #): %s\n",
+ DR_GET_E_RSC(hp->h_err));
+ break;
+
+ case ESBD_RTTHREAD:
+ PR_ALL("RT threads (PIDs): %s\n",
+ DR_GET_E_RSC(hp->h_err));
+ break;
+
+ case ESBD_UTHREAD:
+ PR_ALL("User threads (PIDs): %s\n",
+ DR_GET_E_RSC(hp->h_err));
+ break;
+
+ case ESBD_SUSPEND:
+ PR_ALL("Non-suspendable devices (major #): %s\n",
+ DR_GET_E_RSC(hp->h_err));
+ break;
+
+ case ESBD_RESUME:
+ PR_ALL("Could not resume devices (major #): %s\n",
+ DR_GET_E_RSC(hp->h_err));
+ break;
+
+ case ESBD_KTHREAD:
+ PR_ALL("psmerr is ESBD_KTHREAD\n");
+ break;
+
+ case ESBD_NOERROR:
+ PR_ALL("sbd_error_t error code not set\n");
+ break;
+
+ default:
+ PR_ALL("Unknown error psmerr = %d\n", psmerr);
+ break;
+ }
+ }
+ dr_release_sr_handle(srh);
+
+ return (0);
+}
+
+/*
+ * Add a new integer value to the end of an array. Don't allow duplicates to
+ * appear in the array, and don't allow the array to overflow. Return the new
+ * total number of entries in the array.
+ */
+static int
+dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
+{
+ int i;
+
+ if (arr == NULL)
+ return (0);
+
+ if (idx >= len)
+ return (idx);
+
+ for (i = 0; i < idx; i++) {
+ if (arr[i] == val)
+ return (idx);
+ }
+
+ arr[idx++] = val;
+
+ return (idx);
+}
+
+/*
+ * Construct an sbd_error_t featuring a string representation of an array of
+ * integers as its e_rsc.
+ */
+static sbd_error_t *
+drerr_int(int e_code, uint64_t *arr, int idx, int majors)
+{
+ int i, n, buf_len, buf_idx, buf_avail;
+ char *dname;
+ char *buf;
+ sbd_error_t *new_sbd_err;
+ static char s_ellipsis[] = "...";
+
+ if (arr == NULL || idx <= 0)
+ return (NULL);
+
+ /* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
+ buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+
+ /*
+ * This is the total working area of the buffer. It must be computed
+ * as the size of 'buf', minus reserved space for the null terminator
+ * and the ellipsis string.
+ */
+ buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
+
+ /* Construct a string representation of the array values */
+ for (buf_idx = 0, i = 0; i < idx; i++) {
+ buf_avail = buf_len - buf_idx;
+ if (majors) {
+ dname = ddi_major_to_name(arr[i]);
+ if (dname) {
+ n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
+ dname);
+ } else {
+ n = snprintf(&buf[buf_idx], buf_avail,
+ "major %" PRIu64 ", ", arr[i]);
+ }
+ } else {
+ n = snprintf(&buf[buf_idx], buf_avail, "%" PRIu64 ", ",
+ arr[i]);
+ }
+
+ /* An ellipsis gets appended when no more values fit */
+ if (n >= buf_avail) {
+ (void) strcpy(&buf[buf_idx], s_ellipsis);
+ break;
+ }
+
+ buf_idx += n;
+ }
+
+ /* If all the contents fit, remove the trailing comma */
+ if (n < buf_avail) {
+ buf[--buf_idx] = '\0';
+ buf[--buf_idx] = '\0';
+ }
+
+ /* Return an sbd_error_t with the buffer and e_code */
+ new_sbd_err = drerr_new(1, e_code, buf);
+ kmem_free(buf, MAXPATHLEN);
+ return (new_sbd_err);
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr_util.c b/usr/src/uts/i86pc/io/dr/dr_util.c
new file mode 100644
index 0000000000..d60dc7f7b8
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_util.c
@@ -0,0 +1,457 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/param.h> /* for NULL */
+#include <sys/sbd_ioctl.h>
+#include <sys/dr_util.h>
+#include <sys/varargs.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+
+/* sbd_etab[] and sbd_etab_len provided by sbdgenerr.pl */
+extern sbd_etab_t sbd_etab[];
+extern int sbd_etab_len;
+
+sbd_error_t *
+sbd_err_new(int e_code, char *fmt, va_list args)
+{
+ sbd_error_t *new;
+
+ new = GETSTRUCT(sbd_error_t, 1);
+ new->e_code = e_code;
+
+ if (fmt)
+ (void) vsnprintf(new->e_rsc, sizeof (new->e_rsc), fmt, args);
+
+ return (new);
+}
+
+void
+sbd_err_log(sbd_error_t *ep, int ce)
+{
+ char buf[32];
+ char *fmt;
+ char *txt;
+ int i;
+ sbd_etab_t *tp;
+
+ if (!ep)
+ return;
+
+ if (ep->e_rsc[0] == '\0')
+ fmt = "%s";
+ else
+ fmt = "%s: %s";
+
+ for (tp = sbd_etab, i = 0; i < sbd_etab_len; i++, tp++)
+ if (ep->e_code >= tp->t_base && ep->e_code <= tp->t_bnd)
+ break;
+
+ if (i < sbd_etab_len)
+ txt = tp->t_text[ep->e_code - tp->t_base];
+ else {
+ (void) snprintf(buf, sizeof (buf), "error %d", ep->e_code);
+ txt = buf;
+ }
+
+ cmn_err(ce, fmt, txt, ep->e_rsc);
+}
+
+void
+sbd_err_clear(sbd_error_t **ep)
+{
+ FREESTRUCT(*ep, sbd_error_t, 1);
+ *ep = NULL;
+}
+
+void
+sbd_err_set_c(sbd_error_t **ep, int ce, int e_code, char *fmt, ...)
+{
+ sbd_error_t *tmp;
+ va_list args;
+
+ va_start(args, fmt);
+
+ tmp = sbd_err_new(e_code, fmt, args);
+
+ sbd_err_log(tmp, ce);
+
+ if (*ep == NULL)
+ *ep = tmp;
+ else
+ sbd_err_clear(&tmp);
+
+ va_end(args);
+}
+
+void
+sbd_err_set(sbd_error_t **ep, int ce, int e_code, char *fmt, ...)
+{
+ sbd_error_t *tmp;
+ va_list args;
+
+ va_start(args, fmt);
+
+ tmp = sbd_err_new(e_code, fmt, args);
+
+ sbd_err_log(tmp, ce);
+
+ *ep = tmp;
+
+ va_end(args);
+}
+
+sbd_error_t *
+drerr_new_v(int e_code, char *fmt, va_list args)
+{
+ return (sbd_err_new(e_code, fmt, args));
+}
+
+sbd_error_t *
+drerr_new(int log, int e_code, char *fmt, ...)
+{
+ sbd_error_t *ep;
+ va_list args;
+
+ va_start(args, fmt);
+ ep = sbd_err_new(e_code, fmt, args);
+ va_end(args);
+
+ if (log)
+ sbd_err_log(ep, CE_WARN);
+
+ return (ep);
+}
+
+void
+drerr_set_c(int log, sbd_error_t **ep, int e_code, char *fmt, ...)
+{
+ sbd_error_t *err;
+ va_list args;
+
+ va_start(args, fmt);
+ err = sbd_err_new(e_code, fmt, args);
+ va_end(args);
+
+ if (log)
+ sbd_err_log(err, CE_WARN);
+
+ if (*ep == NULL)
+ *ep = err;
+ else
+ sbd_err_clear(&err);
+}
+
+
+/*
+ * Memlist support.
+ */
+void
+dr_memlist_delete(struct memlist *mlist)
+{
+ register struct memlist *ml;
+
+ for (ml = mlist; ml; ml = mlist) {
+ mlist = ml->ml_next;
+ FREESTRUCT(ml, struct memlist, 1);
+ }
+}
+
+int
+dr_memlist_intersect(struct memlist *al, struct memlist *bl)
+{
+ uint64_t astart, aend, bstart, bend;
+
+ if ((al == NULL) || (bl == NULL))
+ return (0);
+
+ aend = al->ml_address + al->ml_size;
+ bstart = bl->ml_address;
+ bend = bl->ml_address + bl->ml_size;
+
+ while (al && bl) {
+ while (al && (aend <= bstart))
+ if ((al = al->ml_next) != NULL)
+ aend = al->ml_address + al->ml_size;
+ if (al == NULL)
+ return (0);
+
+ if ((astart = al->ml_address) <= bstart)
+ return (1);
+
+ while (bl && (bend <= astart))
+ if ((bl = bl->ml_next) != NULL)
+ bend = bl->ml_address + bl->ml_size;
+ if (bl == NULL)
+ return (0);
+
+ if ((bstart = bl->ml_address) <= astart)
+ return (1);
+ }
+
+ return (0);
+}
+
+void
+dr_memlist_coalesce(struct memlist *mlist)
+{
+ uint64_t end, nend;
+
+ if ((mlist == NULL) || (mlist->ml_next == NULL))
+ return;
+
+ while (mlist->ml_next) {
+ end = mlist->ml_address + mlist->ml_size;
+ if (mlist->ml_next->ml_address <= end) {
+ struct memlist *nl;
+
+ nend = mlist->ml_next->ml_address +
+ mlist->ml_next->ml_size;
+ if (nend > end)
+ mlist->ml_size += (nend - end);
+ nl = mlist->ml_next;
+ mlist->ml_next = mlist->ml_next->ml_next;
+ if (nl) {
+ FREESTRUCT(nl, struct memlist, 1);
+ }
+ if (mlist->ml_next)
+ mlist->ml_next->ml_prev = mlist;
+ } else {
+ mlist = mlist->ml_next;
+ }
+ }
+}
+
+#ifdef DEBUG
+void
+memlist_dump(struct memlist *mlist)
+{
+ register struct memlist *ml;
+
+ if (mlist == NULL)
+ printf("memlist> EMPTY\n");
+ else for (ml = mlist; ml; ml = ml->ml_next)
+ printf("memlist> 0x%" PRIx64 ", 0x%" PRIx64 "\n",
+ ml->ml_address, ml->ml_size);
+}
+#endif
+
+struct memlist *
+dr_memlist_dup(struct memlist *mlist)
+{
+ struct memlist *hl = NULL, *tl, **mlp;
+
+ if (mlist == NULL)
+ return (NULL);
+
+ mlp = &hl;
+ tl = *mlp;
+ for (; mlist; mlist = mlist->ml_next) {
+ *mlp = GETSTRUCT(struct memlist, 1);
+ (*mlp)->ml_address = mlist->ml_address;
+ (*mlp)->ml_size = mlist->ml_size;
+ (*mlp)->ml_prev = tl;
+ tl = *mlp;
+ mlp = &((*mlp)->ml_next);
+ }
+ *mlp = NULL;
+
+ return (hl);
+}
+
+struct memlist *
+dr_memlist_add_span(struct memlist *mlist, uint64_t base, uint64_t len)
+{
+ struct memlist *ml, *tl, *nl;
+
+ if (len == 0ull)
+ return (NULL);
+
+ if (mlist == NULL) {
+ mlist = GETSTRUCT(struct memlist, 1);
+ mlist->ml_address = base;
+ mlist->ml_size = len;
+ mlist->ml_next = mlist->ml_prev = NULL;
+
+ return (mlist);
+ }
+
+ for (tl = ml = mlist; ml; tl = ml, ml = ml->ml_next) {
+ if (base < ml->ml_address) {
+ if ((base + len) < ml->ml_address) {
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->ml_address = base;
+ nl->ml_size = len;
+ nl->ml_next = ml;
+ if ((nl->ml_prev = ml->ml_prev) != NULL)
+ nl->ml_prev->ml_next = nl;
+ ml->ml_prev = nl;
+ if (mlist == ml)
+ mlist = nl;
+ } else {
+ ml->ml_size = MAX((base + len),
+ (ml->ml_address + ml->ml_size)) - base;
+ ml->ml_address = base;
+ }
+ break;
+
+ } else if (base <= (ml->ml_address + ml->ml_size)) {
+ ml->ml_size = MAX((base + len),
+ (ml->ml_address + ml->ml_size)) -
+ MIN(ml->ml_address, base);
+ ml->ml_address = MIN(ml->ml_address, base);
+ break;
+ }
+ }
+ if (ml == NULL) {
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->ml_address = base;
+ nl->ml_size = len;
+ nl->ml_next = NULL;
+ nl->ml_prev = tl;
+ tl->ml_next = nl;
+ }
+
+ dr_memlist_coalesce(mlist);
+
+ return (mlist);
+}
+
+struct memlist *
+dr_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len)
+{
+ uint64_t end;
+ struct memlist *ml, *tl, *nlp;
+
+ if (mlist == NULL)
+ return (NULL);
+
+ end = base + len;
+ if ((end <= mlist->ml_address) || (base == end))
+ return (mlist);
+
+ for (tl = ml = mlist; ml; tl = ml, ml = nlp) {
+ uint64_t mend;
+
+ nlp = ml->ml_next;
+
+ if (end <= ml->ml_address)
+ break;
+
+ mend = ml->ml_address + ml->ml_size;
+ if (base < mend) {
+ if (base <= ml->ml_address) {
+ ml->ml_address = end;
+ if (end >= mend)
+ ml->ml_size = 0ull;
+ else
+ ml->ml_size = mend - ml->ml_address;
+ } else {
+ ml->ml_size = base - ml->ml_address;
+ if (end < mend) {
+ struct memlist *nl;
+ /*
+ * splitting an memlist entry.
+ */
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->ml_address = end;
+ nl->ml_size = mend - nl->ml_address;
+ if ((nl->ml_next = nlp) != NULL)
+ nlp->ml_prev = nl;
+ nl->ml_prev = ml;
+ ml->ml_next = nl;
+ nlp = nl;
+ }
+ }
+ if (ml->ml_size == 0ull) {
+ if (ml == mlist) {
+ if ((mlist = nlp) != NULL)
+ nlp->ml_prev = NULL;
+ FREESTRUCT(ml, struct memlist, 1);
+ if (mlist == NULL)
+ break;
+ ml = nlp;
+ } else {
+ if ((tl->ml_next = nlp) != NULL)
+ nlp->ml_prev = tl;
+ FREESTRUCT(ml, struct memlist, 1);
+ ml = tl;
+ }
+ }
+ }
+ }
+
+ return (mlist);
+}
+
+/*
+ * add span without merging
+ */
+struct memlist *
+dr_memlist_cat_span(struct memlist *mlist, uint64_t base, uint64_t len)
+{
+ struct memlist *ml, *tl, *nl;
+
+ if (len == 0ull)
+ return (NULL);
+
+ if (mlist == NULL) {
+ mlist = GETSTRUCT(struct memlist, 1);
+ mlist->ml_address = base;
+ mlist->ml_size = len;
+ mlist->ml_next = mlist->ml_prev = NULL;
+
+ return (mlist);
+ }
+
+ for (tl = ml = mlist; ml; tl = ml, ml = ml->ml_next) {
+ if (base < ml->ml_address) {
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->ml_address = base;
+ nl->ml_size = len;
+ nl->ml_next = ml;
+ if ((nl->ml_prev = ml->ml_prev) != NULL)
+ nl->ml_prev->ml_next = nl;
+ ml->ml_prev = nl;
+ if (mlist == ml)
+ mlist = nl;
+ break;
+ }
+ }
+
+ if (ml == NULL) {
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->ml_address = base;
+ nl->ml_size = len;
+ nl->ml_next = NULL;
+ nl->ml_prev = tl;
+ tl->ml_next = nl;
+ }
+
+ return (mlist);
+}
diff --git a/usr/src/uts/i86pc/io/dr/sbdgenerr.pl b/usr/src/uts/i86pc/io/dr/sbdgenerr.pl
new file mode 100644
index 0000000000..9980c96459
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/sbdgenerr.pl
@@ -0,0 +1,90 @@
+#!/usr/bin/perl
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+sub trim {
+ my ($line) = @_;
+ $line =~ s#/\*|\*/##g;
+ $line =~ s#^\s+|\s+$##g;
+ return $line;
+}
+
+my $filter = 0;
+my %prefix;
+while ($#ARGV >= 0) {
+ $prefix{$ARGV[0]} = 0;
+ shift @ARGV;
+ $filter = 1;
+}
+
+my $base;
+my $bnd;
+my @text;
+my @sets;
+while (<STDIN>) {
+ my $n = m@^#define\s(E\w\w\w)\w+\s+(\d+)(.*)@;
+ next unless ($n > 0);
+ next unless ($filter == 0 || defined $prefix{$1});
+ my $txt = trim($3);
+ if (length($txt) == 0) {
+ my $l = <STDIN>;
+ $txt = trim($l);
+ }
+
+ $base = $2 if (!defined $base);
+ if (defined $bnd && $2 != $bnd + 1) {
+ push(@sets, { base => $base, bnd => $bnd });
+ $base = $2;
+ }
+ $bnd = $2;
+ push(@text, $txt);
+}
+
+push(@sets, { base => $base, bnd => $bnd });
+
+printf "#include <sys/sbd_ioctl.h>\n";
+
+my $i = 0;
+my $s = 0;
+do {
+ my $set = $sets[$s];
+
+ printf "static char *sbd_t%d[] = {\n", $set->{base};
+ my $n = $set->{bnd} - $set->{base} + 1;
+ while ($n--) {
+ printf "\t\"%s\",\n", $text[$i++];
+ }
+ printf "};\n";
+} while (++$s <= $#sets);
+
+printf "sbd_etab_t sbd_etab[] = {\n";
+$s = 0;
+do {
+ my $set = $sets[$s];
+ printf "\t{ %d, %d, sbd_t%d },\n",
+ $set->{base}, $set->{bnd}, $set->{base};
+} while (++$s <= $#sets);
+printf "};\n";
+printf "int sbd_etab_len = %d;\n", $s;
diff --git a/usr/src/uts/i86pc/io/mp_platform_common.c b/usr/src/uts/i86pc/io/mp_platform_common.c
index 319193bcc0..a3afe84d07 100644
--- a/usr/src/uts/i86pc/io/mp_platform_common.c
+++ b/usr/src/uts/i86pc/io/mp_platform_common.c
@@ -22,6 +22,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* PSMI 1.1 extensions are supported only in 2.6 and later versions.
@@ -29,8 +33,9 @@
* PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
* PSMI 1.5 extensions are supported in Solaris Nevada.
* PSMI 1.6 extensions are supported in Solaris Nevada.
+ * PSMI 1.7 extensions are supported in Solaris Nevada.
*/
-#define PSMI_1_6
+#define PSMI_1_7
#include <sys/processor.h>
#include <sys/time.h>
@@ -93,6 +98,7 @@ static int apic_setup_irq_table(dev_info_t *dip, int irqno,
struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp,
int type);
static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp);
+static void apic_free_apic_cpus(void);
static void apic_try_deferred_reprogram(int ipl, int vect);
static void delete_defer_repro_ent(int which_irq);
static void apic_ioapic_wait_pending_clear(int ioapicindex,
@@ -194,6 +200,11 @@ int apic_redist_cpu_skip = 0;
int apic_num_imbalance = 0;
int apic_num_rebind = 0;
+/*
+ * Maximum number of APIC CPUs in the system, -1 indicates that dynamic
+ * allocation of CPU ids is disabled.
+ */
+int apic_max_nproc = -1;
int apic_nproc = 0;
size_t apic_cpus_size = 0;
int apic_defconf = 0;
@@ -597,6 +608,16 @@ apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp)
}
}
+static void
+apic_free_apic_cpus(void)
+{
+ if (apic_cpus != NULL) {
+ kmem_free(apic_cpus, apic_cpus_size);
+ apic_cpus = NULL;
+ apic_cpus_size = 0;
+ }
+}
+
static int
acpi_probe(char *modname)
{
@@ -639,7 +660,6 @@ acpi_probe(char *modname)
id = apic_reg_ops->apic_read(APIC_LID_REG);
local_ids[0] = (uchar_t)(id >> 24);
apic_nproc = index = 1;
- CPUSET_ONLY(apic_cpumask, 0);
apic_io_max = 0;
ap = (ACPI_SUBTABLE_HEADER *) (acpi_mapic_dtp + 1);
@@ -652,16 +672,12 @@ acpi_probe(char *modname)
mpa = (ACPI_MADT_LOCAL_APIC *) ap;
if (mpa->LapicFlags & ACPI_MADT_ENABLED) {
if (mpa->Id == local_ids[0]) {
+ ASSERT(index == 1);
proc_ids[0] = mpa->ProcessorId;
- (void) acpica_map_cpu(0,
- mpa->ProcessorId);
} else if (apic_nproc < NCPU && use_mp &&
apic_nproc < boot_ncpus) {
local_ids[index] = mpa->Id;
proc_ids[index] = mpa->ProcessorId;
- CPUSET_ADD(apic_cpumask, index);
- (void) acpica_map_cpu(index,
- mpa->ProcessorId);
index++;
apic_nproc++;
} else if (apic_nproc == NCPU && !warned) {
@@ -770,9 +786,7 @@ acpi_probe(char *modname)
if (apic_nproc < NCPU && use_mp &&
apic_nproc < boot_ncpus) {
local_ids[index] = mpx2a->LocalApicId;
- CPUSET_ADD(apic_cpumask, index);
- (void) acpica_map_cpu(index,
- mpx2a->Uid);
+ proc_ids[index] = mpa->ProcessorId;
index++;
apic_nproc++;
} else if (apic_nproc == NCPU && !warned) {
@@ -813,7 +827,14 @@ acpi_probe(char *modname)
ap = (ACPI_SUBTABLE_HEADER *)(((char *)ap) + ap->Length);
}
- apic_cpus_size = apic_nproc * sizeof (*apic_cpus);
+ /*
+ * allocate enough space for possible hot-adding of CPUs.
+ * max_ncpus may be less than apic_nproc if it's set by user.
+ */
+ if (plat_dr_support_cpu()) {
+ apic_max_nproc = max_ncpus;
+ }
+ apic_cpus_size = max(apic_nproc, max_ncpus) * sizeof (*apic_cpus);
if ((apic_cpus = kmem_zalloc(apic_cpus_size, KM_NOSLEEP)) == NULL)
goto cleanup;
@@ -825,6 +846,47 @@ acpi_probe(char *modname)
for (i = 0; i < apic_nproc; i++) {
apic_cpus[i].aci_local_id = local_ids[i];
apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF);
+ apic_cpus[i].aci_processor_id = proc_ids[i];
+ /* Only build mapping info for CPUs present at boot. */
+ if (i < boot_ncpus)
+ (void) acpica_map_cpu(i, proc_ids[i]);
+ }
+
+ /*
+ * To support CPU dynamic reconfiguration, the apic CPU info structure
+ * for each possible CPU will be pre-allocated at boot time.
+ * The state for each apic CPU info structure will be assigned according
+ * to the following rules:
+ * Rule 1:
+ * Slot index range: [0, min(apic_nproc, boot_ncpus))
+ * State flags: 0
+ * Note: cpu exists and will be configured/enabled at boot time
+ * Rule 2:
+ * Slot index range: [boot_ncpus, apic_nproc)
+ * State flags: APIC_CPU_FREE | APIC_CPU_DIRTY
+ * Note: cpu exists but won't be configured/enabled at boot time
+ * Rule 3:
+ * Slot index range: [apic_nproc, boot_ncpus)
+ * State flags: APIC_CPU_FREE
+ * Note: cpu doesn't exist at boot time
+ * Rule 4:
+ * Slot index range: [max(apic_nproc, boot_ncpus), max_ncpus)
+ * State flags: APIC_CPU_FREE
+ * Note: cpu doesn't exist at boot time
+ */
+ CPUSET_ZERO(apic_cpumask);
+ for (i = 0; i < min(boot_ncpus, apic_nproc); i++) {
+ CPUSET_ADD(apic_cpumask, i);
+ apic_cpus[i].aci_status = 0;
+ }
+ for (i = boot_ncpus; i < apic_nproc; i++) {
+ apic_cpus[i].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
+ }
+ for (i = apic_nproc; i < boot_ncpus; i++) {
+ apic_cpus[i].aci_status = APIC_CPU_FREE;
+ }
+ for (i = max(boot_ncpus, apic_nproc); i < max_ncpus; i++) {
+ apic_cpus[i].aci_status = APIC_CPU_FREE;
}
for (i = 0; i < apic_io_max; i++) {
@@ -926,10 +988,12 @@ acpi_probe(char *modname)
/* if setting APIC mode failed above, we fall through to cleanup */
cleanup:
+ apic_free_apic_cpus();
if (apicadr != NULL) {
mapout_apic((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
apicadr = NULL;
}
+ apic_max_nproc = -1;
apic_nproc = 0;
for (i = 0; i < apic_io_max; i++) {
mapout_ioapic((caddr_t)apicioadr[i], APIC_IO_MEMLEN);
@@ -954,6 +1018,11 @@ apic_handle_defconf()
{
uint_t lid;
+ /* Failed to probe ACPI MADT tables, disable CPU DR. */
+ apic_max_nproc = -1;
+ apic_free_apic_cpus();
+ plat_dr_disable_cpu();
+
/*LINTED: pointer cast may result in improper alignment */
apicioadr[0] = mapin_ioapic(APIC_IO_ADDR,
APIC_IO_MEMLEN, PROT_READ | PROT_WRITE);
@@ -998,8 +1067,7 @@ apic_handle_defconf()
return (PSM_SUCCESS);
apic_handle_defconf_fail:
- if (apic_cpus)
- kmem_free(apic_cpus, apic_cpus_size);
+ apic_free_apic_cpus();
if (apicadr)
mapout_apic((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
if (apicioadr[0])
@@ -1187,7 +1255,51 @@ apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics)
boolean_t
apic_cpu_in_range(int cpu)
{
- return ((cpu & ~IRQ_USER_BOUND) < apic_nproc);
+ cpu &= ~IRQ_USER_BOUND;
+ /* Check whether cpu id is in valid range. */
+ if (cpu < 0 || cpu >= apic_nproc) {
+ return (B_FALSE);
+ } else if (apic_max_nproc != -1 && cpu >= apic_max_nproc) {
+ /*
+ * Check whether cpuid is in valid range if CPU DR is enabled.
+ */
+ return (B_FALSE);
+ } else if (!CPU_IN_SET(apic_cpumask, cpu)) {
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * Must be called with interrupts disabled and the apic_ioapic_lock held.
+ */
+processorid_t
+apic_find_next_cpu_intr(void)
+{
+ int i, count;
+ processorid_t cpuid = 0;
+
+ ASSERT(LOCK_HELD(&apic_ioapic_lock));
+
+ /*
+ * Find next CPU with INTR_ENABLE flag set.
+ * Assume that there is at least one CPU with interrupts enabled.
+ */
+ for (count = 0; count < apic_nproc; count++) {
+ if (apic_next_bind_cpu >= apic_nproc) {
+ apic_next_bind_cpu = 0;
+ }
+ i = apic_next_bind_cpu++;
+ if (apic_cpu_in_range(i) &&
+ (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) {
+ cpuid = i;
+ break;
+ }
+ }
+ ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_INTR_ENABLE) != 0);
+
+ return (cpuid);
}
uint16_t
@@ -1770,7 +1882,7 @@ apic_delspl_common(int irqno, int ipl, int min_ipl, int max_ipl)
bind_cpu = irqptr->airq_temp_cpu;
if (((uint32_t)bind_cpu != IRQ_UNBOUND) &&
((uint32_t)bind_cpu != IRQ_UNINIT)) {
- ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc);
+ ASSERT(apic_cpu_in_range(bind_cpu));
if (bind_cpu & IRQ_USER_BOUND) {
/* If hardbound, temp_cpu == cpu */
bind_cpu &= ~IRQ_USER_BOUND;
@@ -2392,6 +2504,7 @@ apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin)
major_t major;
char *name, *drv_name, *prop_val, *cptr;
char prop_name[32];
+ ulong_t iflag;
if (apic_intr_policy == INTR_LOWEST_PRIORITY)
@@ -2475,26 +2588,28 @@ apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin)
i++;
bind_cpu = stoi(&cptr);
kmem_free(prop_val, prop_len);
- /* if specific cpu is bogus, then default to cpu 0 */
- if (bind_cpu >= apic_nproc) {
+ /* if specific CPU is bogus, then default to next cpu */
+ if (!apic_cpu_in_range(bind_cpu)) {
cmn_err(CE_WARN, "%s: %s=%s: CPU %d not present",
psm_name, prop_name, prop_val, bind_cpu);
- bind_cpu = 0;
+ rc = DDI_PROP_NOT_FOUND;
} else {
/* indicate that we are bound at user request */
bind_cpu |= IRQ_USER_BOUND;
}
/*
- * no need to check apic_cpus[].aci_status, if specific cpu is
+ * no need to check apic_cpus[].aci_status, if specific CPU is
* not up, then post_cpu_start will handle it.
*/
- } else {
- bind_cpu = apic_next_bind_cpu++;
- if (bind_cpu >= apic_nproc) {
- apic_next_bind_cpu = 1;
- bind_cpu = 0;
- }
}
+ if (rc != DDI_PROP_SUCCESS) {
+ iflag = intr_clear();
+ lock_set(&apic_ioapic_lock);
+ bind_cpu = apic_find_next_cpu_intr();
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ }
+
if (drv_name != NULL)
cmn_err(CE_CONT, "!%s: %s (%s) instance %d irq 0x%x "
"vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n",
@@ -2925,7 +3040,7 @@ apic_rebind(apic_irq_t *irq_ptr, int bind_cpu,
/* Mask off high bit so it can be used as array index */
airq_temp_cpu &= ~IRQ_USER_BOUND;
- ASSERT(airq_temp_cpu < apic_nproc);
+ ASSERT(apic_cpu_in_range(airq_temp_cpu));
}
/*
@@ -2999,7 +3114,7 @@ apic_rebind(apic_irq_t *irq_ptr, int bind_cpu,
} else {
cpu_infop->aci_temp_bound++;
}
- ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc);
+ ASSERT(apic_cpu_in_range(bind_cpu));
if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) {
apic_cpus[airq_temp_cpu].aci_temp_bound--;
@@ -3477,7 +3592,8 @@ apic_intr_redistribute()
* we are consistent.
*/
for (i = 0; i < apic_nproc; i++) {
- if (!(apic_redist_cpu_skip & (1 << i)) &&
+ if (apic_cpu_in_range(i) &&
+ !(apic_redist_cpu_skip & (1 << i)) &&
(apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) {
cpu_infop = &apic_cpus[i];
@@ -3639,7 +3755,9 @@ apic_intr_redistribute()
apic_redist_cpu_skip = 0;
}
for (i = 0; i < apic_nproc; i++) {
- apic_cpus[i].aci_busy = 0;
+ if (apic_cpu_in_range(i)) {
+ apic_cpus[i].aci_busy = 0;
+ }
}
}
@@ -3650,7 +3768,9 @@ apic_cleanup_busy()
apic_irq_t *irq_ptr;
for (i = 0; i < apic_nproc; i++) {
- apic_cpus[i].aci_busy = 0;
+ if (apic_cpu_in_range(i)) {
+ apic_cpus[i].aci_busy = 0;
+ }
}
for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic.c b/usr/src/uts/i86pc/io/pcplusmp/apic.c
index 939ac73dab..13ceaef674 100644
--- a/usr/src/uts/i86pc/io/pcplusmp/apic.c
+++ b/usr/src/uts/i86pc/io/pcplusmp/apic.c
@@ -23,6 +23,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* PSMI 1.1 extensions are supported only in 2.6 and later versions.
@@ -30,8 +34,9 @@
* PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
* PSMI 1.5 extensions are supported in Solaris Nevada.
* PSMI 1.6 extensions are supported in Solaris Nevada.
+ * PSMI 1.7 extensions are supported in Solaris Nevada.
*/
-#define PSMI_1_6
+#define PSMI_1_7
#include <sys/processor.h>
#include <sys/time.h>
@@ -88,7 +93,11 @@ static hrtime_t apic_gettime();
static hrtime_t apic_gethrtime();
static void apic_init();
static void apic_picinit(void);
-static int apic_cpu_start(processorid_t, caddr_t);
+static int apic_cpu_start(processorid_t cpuid, caddr_t ctx);
+static int apic_cpu_stop(processorid_t cpuid, caddr_t ctx);
+static int apic_cpu_add(psm_cpu_request_t *reqp);
+static int apic_cpu_remove(psm_cpu_request_t *reqp);
+static int apic_cpu_ops(psm_cpu_request_t *reqp);
static int apic_post_cpu_start(void);
static void apic_send_ipi(int cpun, int ipl);
static void apic_set_idlecpu(processorid_t cpun);
@@ -96,6 +105,7 @@ static void apic_unset_idlecpu(processorid_t cpun);
static int apic_intr_enter(int ipl, int *vect);
static void apic_setspl(int ipl);
static void x2apic_setspl(int ipl);
+static void apic_switch_ipi_callback(boolean_t enter);
static int apic_addspl(int ipl, int vector, int min_ipl, int max_ipl);
static int apic_delspl(int ipl, int vector, int min_ipl, int max_ipl);
static void apic_shutdown(int cmd, int fcn);
@@ -137,6 +147,9 @@ int apic_error_display_delay = 100;
int apic_cpcovf_vect;
int apic_enable_cpcovf_intr = 1;
+/* maximum loop count when sending Start IPIs. */
+int apic_sipi_max_loop_count = 0x1000;
+
/* vector at which CMCI interrupts come in */
int apic_cmci_vect;
extern int cmi_enable_cmci;
@@ -145,6 +158,10 @@ extern void cmi_cmci_trap(void);
static kmutex_t cmci_cpu_setup_lock; /* protects cmci_cpu_setup_registered */
static int cmci_cpu_setup_registered;
+/* number of CPUs in power-on transition state */
+static int apic_poweron_cnt = 0;
+static lock_t apic_mode_switch_lock;
+
/*
* The following vector assignments influence the value of ipltopri and
* vectortoipl. Note that vectors 0 - 0x1f are not used. We can program
@@ -209,6 +226,7 @@ int apic_verbose = 0;
/* minimum number of timer ticks to program to */
int apic_min_timer_ticks = 1;
+
/*
* Local static data
*/
@@ -252,11 +270,12 @@ static struct psm_ops apic_ops = {
apic_preshutdown,
apic_intr_ops, /* Advanced DDI Interrupt framework */
apic_state, /* save, restore apic state for S3 */
+ apic_cpu_ops, /* CPU control interface. */
};
static struct psm_info apic_psm_info = {
- PSM_INFO_VER01_6, /* version */
+ PSM_INFO_VER01_7, /* version */
PSM_OWN_EXCLUSIVE, /* ownership */
(struct psm_ops *)&apic_ops, /* operation */
APIC_PCPLUSMP_NAME, /* machine name */
@@ -786,6 +805,7 @@ apic_picinit(void)
LOCK_INIT_CLEAR(&apic_gethrtime_lock);
LOCK_INIT_CLEAR(&apic_ioapic_lock);
LOCK_INIT_CLEAR(&apic_error_lock);
+ LOCK_INIT_CLEAR(&apic_mode_switch_lock);
picsetup(); /* initialise the 8259 */
@@ -814,29 +834,27 @@ apic_picinit(void)
ioapic_init_intr(IOAPIC_MASK);
}
-
-/*ARGSUSED1*/
-static int
-apic_cpu_start(processorid_t cpun, caddr_t arg)
+static void
+apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
{
int loop_count;
uint32_t vector;
- uint_t cpu_id;
+ uint_t apicid;
ulong_t iflag;
- cpu_id = apic_cpus[cpun].aci_local_id;
-
- apic_cmos_ssb_set = 1;
+ apicid = apic_cpus[cpun].aci_local_id;
/*
- * Interrupts on BSP cpu will be disabled during these startup
+ * Interrupts on current CPU will be disabled during the
* steps in order to avoid unwanted side effects from
* executing interrupt handlers on a problematic BIOS.
*/
-
iflag = intr_clear();
- outb(CMOS_ADDR, SSB);
- outb(CMOS_DATA, BIOS_SHUTDOWN);
+
+ if (start) {
+ outb(CMOS_ADDR, SSB);
+ outb(CMOS_DATA, BIOS_SHUTDOWN);
+ }
/*
* According to X2APIC specification in section '2.3.5.1' of
@@ -860,10 +878,10 @@ apic_cpu_start(processorid_t cpun, caddr_t arg)
/* for integrated - make sure there is one INIT IPI in buffer */
/* for external - it will wake up the cpu */
- apic_reg_ops->apic_write_int_cmd(cpu_id, AV_ASSERT | AV_RESET);
+ apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
/* If only 1 CPU is installed, PENDING bit will not go low */
- for (loop_count = 0x1000; loop_count; loop_count--) {
+ for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
if (apic_mode == LOCAL_APIC &&
apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
apic_ret();
@@ -871,8 +889,7 @@ apic_cpu_start(processorid_t cpun, caddr_t arg)
break;
}
- apic_reg_ops->apic_write_int_cmd(cpu_id, AV_DEASSERT | AV_RESET);
-
+ apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
drv_usecwait(20000); /* 20 milli sec */
if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
@@ -882,18 +899,111 @@ apic_cpu_start(processorid_t cpun, caddr_t arg)
(APIC_VECTOR_MASK | APIC_IPL_MASK);
/* to offset the INIT IPI queue up in the buffer */
- apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
-
+ apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
drv_usecwait(200); /* 20 micro sec */
- apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
-
- drv_usecwait(200); /* 20 micro sec */
+ /*
+ * send the second SIPI (Startup IPI) as recommended by Intel
+ * software development manual.
+ */
+ apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
+ drv_usecwait(200); /* 20 micro sec */
}
+
intr_restore(iflag);
+}
+
+/*ARGSUSED1*/
+static int
+apic_cpu_start(processorid_t cpun, caddr_t arg)
+{
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ if (!apic_cpu_in_range(cpun)) {
+ return (EINVAL);
+ }
+
+ /*
+ * Switch to apic_common_send_ipi for safety during starting other CPUs.
+ */
+ if (apic_mode == LOCAL_X2APIC) {
+ apic_switch_ipi_callback(B_TRUE);
+ }
+
+ apic_cmos_ssb_set = 1;
+ apic_cpu_send_SIPI(cpun, B_TRUE);
+
return (0);
}
+/*
+ * Put CPU into halted state with interrupts disabled.
+ */
+/*ARGSUSED1*/
+static int
+apic_cpu_stop(processorid_t cpun, caddr_t arg)
+{
+ int rc;
+ cpu_t *cp;
+ extern cpuset_t cpu_ready_set;
+ extern void cpu_idle_intercept_cpu(cpu_t *cp);
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ if (!apic_cpu_in_range(cpun)) {
+ return (EINVAL);
+ }
+ if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
+ return (ENOTSUP);
+ }
+
+ cp = cpu_get(cpun);
+ ASSERT(cp != NULL);
+ ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
+ ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
+ ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
+
+ /* Clear CPU_READY flag to disable cross calls. */
+ cp->cpu_flags &= ~CPU_READY;
+ CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
+ rc = xc_flush_cpu(cp);
+ if (rc != 0) {
+ CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
+ cp->cpu_flags |= CPU_READY;
+ return (rc);
+ }
+
+ /* Intercept target CPU at a safe point before powering it off. */
+ cpu_idle_intercept_cpu(cp);
+
+ apic_cpu_send_SIPI(cpun, B_FALSE);
+ cp->cpu_flags &= ~CPU_RUNNING;
+
+ return (0);
+}
+
+static int
+apic_cpu_ops(psm_cpu_request_t *reqp)
+{
+ if (reqp == NULL) {
+ return (EINVAL);
+ }
+
+ switch (reqp->pcr_cmd) {
+ case PSM_CPU_ADD:
+ return (apic_cpu_add(reqp));
+
+ case PSM_CPU_REMOVE:
+ return (apic_cpu_remove(reqp));
+
+ case PSM_CPU_STOP:
+ return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
+ reqp->req.cpu_stop.ctx));
+
+ default:
+ return (ENOTSUP);
+ }
+}
#ifdef DEBUG
int apic_break_on_cpu = 9;
@@ -1377,7 +1487,6 @@ apic_post_cpu_start()
{
int cpun;
static int cpus_started = 1;
- struct psm_ops *pops = &apic_ops;
/* We know this CPU + BSP started successfully. */
cpus_started++;
@@ -1395,23 +1504,11 @@ apic_post_cpu_start()
}
/*
- * We change psm_send_ipi and send_dirintf only if Solaris
- * is booted in kmdb & the current CPU is the last CPU being
- * brought up. We don't need to do anything if Solaris is running
- * in MMIO mode (xAPIC).
+ * Switch back to x2apic IPI sending method for performance when target
+ * CPU has entered x2apic mode.
*/
- if ((boothowto & RB_DEBUG) &&
- (cpus_started == boot_ncpus || cpus_started == apic_nproc) &&
- apic_mode == LOCAL_X2APIC) {
- /*
- * We no longer need help from apic_common_send_ipi()
- * since we will not start any more CPUs.
- *
- * We will need to revisit this if we start supporting
- * hot-plugging of CPUs.
- */
- pops->psm_send_ipi = x2apic_send_ipi;
- send_dirintf = pops->psm_send_ipi;
+ if (apic_mode == LOCAL_X2APIC) {
+ apic_switch_ipi_callback(B_FALSE);
}
splx(ipltospl(LOCK_LEVEL));
@@ -1452,13 +1549,230 @@ apic_get_next_processorid(processorid_t cpu_id)
return ((processorid_t)0);
for (i = cpu_id + 1; i < NCPU; i++) {
- if (CPU_IN_SET(apic_cpumask, i))
+ if (apic_cpu_in_range(i))
return (i);
}
return ((processorid_t)-1);
}
+static int
+apic_cpu_add(psm_cpu_request_t *reqp)
+{
+ int i, rv = 0;
+ ulong_t iflag;
+ boolean_t first = B_TRUE;
+ uchar_t localver;
+ uint32_t localid, procid;
+ processorid_t cpuid = (processorid_t)-1;
+ mach_cpu_add_arg_t *ap;
+
+ ASSERT(reqp != NULL);
+ reqp->req.cpu_add.cpuid = (processorid_t)-1;
+
+ /* Check whether CPU hotplug is supported. */
+ if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
+ return (ENOTSUP);
+ }
+
+ ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
+ switch (ap->type) {
+ case MACH_CPU_ARG_LOCAL_APIC:
+ localid = ap->arg.apic.apic_id;
+ procid = ap->arg.apic.proc_id;
+ if (localid >= 255 || procid > 255) {
+ cmn_err(CE_WARN,
+ "!apic: apicid(%u) or procid(%u) is invalid.",
+ localid, procid);
+ return (EINVAL);
+ }
+ break;
+
+ case MACH_CPU_ARG_LOCAL_X2APIC:
+ localid = ap->arg.apic.apic_id;
+ procid = ap->arg.apic.proc_id;
+ if (localid >= UINT32_MAX) {
+ cmn_err(CE_WARN,
+ "!apic: x2apicid(%u) is invalid.", localid);
+ return (EINVAL);
+ } else if (localid >= 255 && apic_mode == LOCAL_APIC) {
+ cmn_err(CE_WARN, "!apic: system is in APIC mode, "
+ "can't support x2APIC processor.");
+ return (ENOTSUP);
+ }
+ break;
+
+ default:
+ cmn_err(CE_WARN,
+ "!apic: unknown argument type %d to apic_cpu_add().",
+ ap->type);
+ return (EINVAL);
+ }
+
+ /* Use apic_ioapic_lock to sync with apic_find_next_cpu_intr. */
+ iflag = intr_clear();
+ lock_set(&apic_ioapic_lock);
+
+ /* Check whether local APIC id already exists. */
+ for (i = 0; i < apic_nproc; i++) {
+ if (!CPU_IN_SET(apic_cpumask, i))
+ continue;
+ if (apic_cpus[i].aci_local_id == localid) {
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ cmn_err(CE_WARN,
+ "!apic: local apic id %u already exists.",
+ localid);
+ return (EEXIST);
+ } else if (apic_cpus[i].aci_processor_id == procid) {
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ cmn_err(CE_WARN,
+ "!apic: processor id %u already exists.",
+ (int)procid);
+ return (EEXIST);
+ }
+
+ /*
+ * There's no local APIC version number available in MADT table,
+ * so assume that all CPUs are homogeneous and use local APIC
+ * version number of the first existing CPU.
+ */
+ if (first) {
+ first = B_FALSE;
+ localver = apic_cpus[i].aci_local_ver;
+ }
+ }
+ ASSERT(first == B_FALSE);
+
+ /*
+ * Try to assign the same cpuid if APIC id exists in the dirty cache.
+ */
+ for (i = 0; i < apic_max_nproc; i++) {
+ if (CPU_IN_SET(apic_cpumask, i)) {
+ ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
+ continue;
+ }
+ ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
+ if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
+ apic_cpus[i].aci_local_id == localid &&
+ apic_cpus[i].aci_processor_id == procid) {
+ cpuid = i;
+ break;
+ }
+ }
+
+ /* Avoid the dirty cache and allocate fresh slot if possible. */
+ if (cpuid == (processorid_t)-1) {
+ for (i = 0; i < apic_max_nproc; i++) {
+ if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
+ (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
+ cpuid = i;
+ break;
+ }
+ }
+ }
+
+ /* Try to find any free slot as last resort. */
+ if (cpuid == (processorid_t)-1) {
+ for (i = 0; i < apic_max_nproc; i++) {
+ if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
+ cpuid = i;
+ break;
+ }
+ }
+ }
+
+ if (cpuid == (processorid_t)-1) {
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ cmn_err(CE_NOTE,
+ "!apic: failed to allocate cpu id for processor %u.",
+ procid);
+ rv = EAGAIN;
+ } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ cmn_err(CE_NOTE,
+ "!apic: failed to build mapping for processor %u.",
+ procid);
+ rv = EBUSY;
+ } else {
+ ASSERT(cpuid >= 0 && cpuid < NCPU);
+ ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
+ bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
+ apic_cpus[cpuid].aci_processor_id = procid;
+ apic_cpus[cpuid].aci_local_id = localid;
+ apic_cpus[cpuid].aci_local_ver = localver;
+ CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
+ if (cpuid >= apic_nproc) {
+ apic_nproc = cpuid + 1;
+ }
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ reqp->req.cpu_add.cpuid = cpuid;
+ }
+
+ return (rv);
+}
+
+static int
+apic_cpu_remove(psm_cpu_request_t *reqp)
+{
+ int i;
+ ulong_t iflag;
+ processorid_t cpuid;
+
+ /* Check whether CPU hotplug is supported. */
+ if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
+ return (ENOTSUP);
+ }
+
+ cpuid = reqp->req.cpu_remove.cpuid;
+
+ /* Use apic_ioapic_lock to sync with apic_find_next_cpu_intr. */
+ iflag = intr_clear();
+ lock_set(&apic_ioapic_lock);
+
+ if (!apic_cpu_in_range(cpuid)) {
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ cmn_err(CE_WARN,
+ "!apic: cpuid %d doesn't exist in apic_cpus array.",
+ cpuid);
+ return (ENODEV);
+ }
+ ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
+
+ if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ return (ENOENT);
+ }
+
+ if (cpuid == apic_nproc - 1) {
+ /*
+ * We are removing the highest numbered cpuid so we need to
+ * find the next highest cpuid as the new value for apic_nproc.
+ */
+ for (i = apic_nproc; i > 0; i--) {
+ if (CPU_IN_SET(apic_cpumask, i - 1)) {
+ apic_nproc = i;
+ break;
+ }
+ }
+ /* at least one CPU left */
+ ASSERT(i > 0);
+ }
+ CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
+ /* mark slot as free and keep it in the dirty cache */
+ apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
+
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+
+ return (0);
+}
/*
* type == -1 indicates it is an internal request. Do not change
@@ -1898,8 +2212,7 @@ apic_disable_intr(processorid_t cpun)
if ((irq_ptr = apic_irq_table[i]) != NULL) {
ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) ||
(irq_ptr->airq_temp_cpu == IRQ_UNINIT) ||
- ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) <
- apic_nproc));
+ (apic_cpu_in_range(irq_ptr->airq_temp_cpu)));
if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) {
hardbound = 1;
@@ -1908,12 +2221,7 @@ apic_disable_intr(processorid_t cpun)
if (irq_ptr->airq_temp_cpu == cpun) {
do {
- bind_cpu = apic_next_bind_cpu++;
- if (bind_cpu >= apic_nproc) {
- apic_next_bind_cpu = 1;
- bind_cpu = 0;
-
- }
+ bind_cpu = apic_find_next_cpu_intr();
} while (apic_rebind_all(irq_ptr, bind_cpu));
}
}
@@ -2141,6 +2449,8 @@ apic_redistribute_compute(void)
}
max_busy = 0;
for (i = 0; i < apic_nproc; i++) {
+ if (!apic_cpu_in_range(i))
+ continue;
/*
* Check if curipl is non zero & if ISR is in
@@ -2481,24 +2791,6 @@ ioapic_write_eoi(int ioapic_ix, uint32_t value)
ioapic[APIC_IO_EOI] = value;
}
-static processorid_t
-apic_find_cpu(int flag)
-{
- processorid_t acid = 0;
- int i;
-
- /* Find the first CPU with the passed-in flag set */
- for (i = 0; i < apic_nproc; i++) {
- if (apic_cpus[i].aci_status & flag) {
- acid = i;
- break;
- }
- }
-
- ASSERT((apic_cpus[acid].aci_status & flag) != 0);
- return (acid);
-}
-
/*
* Call rebind to do the actual programming.
* Must be called with interrupts disabled and apic_ioapic_lock held
@@ -2508,8 +2800,8 @@ apic_find_cpu(int flag)
* p is of the type 'apic_irq_t *'.
*
* apic_ioapic_lock must be held across this call, as it protects apic_rebind
- * and it protects apic_find_cpu() from a race in which a CPU can be taken
- * offline after a cpu is selected, but before apic_rebind is called to
+ * and it protects apic_find_next_cpu_intr() from a race in which a CPU can be
+ * taken offline after a cpu is selected, but before apic_rebind is called to
* bind interrupts to it.
*/
int
@@ -2534,8 +2826,7 @@ apic_setup_io_intr(void *p, int irq, boolean_t deferred)
* CPU is not up or interrupts are disabled. Fall back to
* the first available CPU
*/
- rv = apic_rebind(irqptr, apic_find_cpu(APIC_CPU_INTR_ENABLE),
- drep);
+ rv = apic_rebind(irqptr, apic_find_next_cpu_intr(), drep);
}
return (rv);
@@ -2555,6 +2846,45 @@ apic_get_apic_type()
return (apic_psm_info.p_mach_idstring);
}
+/*
+ * Switch between safe and x2APIC IPI sending method.
+ * CPU may power on in xapic mode or x2apic mode. If CPU needs to send IPI to
+ * other CPUs before entering x2APIC mode, it still needs to xAPIC method.
+ * Before sending StartIPI to target CPU, psm_send_ipi will be changed to
+ * apic_common_send_ipi, which detects current local APIC mode and use right
+ * method to send IPI. If some CPUs fail to start up, apic_poweron_cnt
+ * won't return to zero, so apic_common_send_ipi will always be used.
+ * psm_send_ipi can't be simply changed back to x2apic_send_ipi if some CPUs
+ * failed to start up because those failed CPUs may recover itself later at
+ * unpredictable time.
+ */
+static void
+apic_switch_ipi_callback(boolean_t enter)
+{
+ ulong_t iflag;
+ struct psm_ops *pops = &apic_ops;
+
+ iflag = intr_clear();
+ lock_set(&apic_mode_switch_lock);
+ if (enter) {
+ ASSERT(apic_poweron_cnt >= 0);
+ if (apic_poweron_cnt == 0) {
+ pops->psm_send_ipi = apic_common_send_ipi;
+ send_dirintf = pops->psm_send_ipi;
+ }
+ apic_poweron_cnt++;
+ } else {
+ ASSERT(apic_poweron_cnt > 0);
+ apic_poweron_cnt--;
+ if (apic_poweron_cnt == 0) {
+ pops->psm_send_ipi = x2apic_send_ipi;
+ send_dirintf = pops->psm_send_ipi;
+ }
+ }
+ lock_clear(&apic_mode_switch_lock);
+ intr_restore(iflag);
+}
+
void
x2apic_update_psm()
{
@@ -2562,28 +2892,9 @@ x2apic_update_psm()
ASSERT(pops != NULL);
- /*
- * We don't need to do any magic if one of the following
- * conditions is true :
- * - Not being run under kernel debugger.
- * - MP is not set.
- * - Booted with one CPU only.
- * - One CPU configured.
- *
- * We set apic_common_send_ipi() since kernel debuggers
- * attempt to send IPIs to other slave CPUs during
- * entry (exit) from (to) debugger.
- */
- if (!(boothowto & RB_DEBUG) || use_mp == 0 ||
- apic_nproc == 1 || boot_ncpus == 1) {
- pops->psm_send_ipi = x2apic_send_ipi;
- } else {
- pops->psm_send_ipi = apic_common_send_ipi;
- }
-
pops->psm_intr_exit = x2apic_intr_exit;
pops->psm_setspl = x2apic_setspl;
-
+ pops->psm_send_ipi = x2apic_send_ipi;
send_dirintf = pops->psm_send_ipi;
apic_mode = LOCAL_X2APIC;
diff --git a/usr/src/uts/i86pc/io/psm/uppc.c b/usr/src/uts/i86pc/io/psm/uppc.c
index 0b6faee267..ad4d378f76 100644
--- a/usr/src/uts/i86pc/io/psm/uppc.c
+++ b/usr/src/uts/i86pc/io/psm/uppc.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
-#define PSMI_1_6
+#define PSMI_1_7
#include <sys/mutex.h>
#include <sys/types.h>
@@ -173,12 +173,13 @@ static struct psm_ops uppc_ops = {
(int (*)(dev_info_t *, ddi_intr_handle_impl_t *,
psm_intr_op_t, int *))NULL, /* psm_intr_ops */
- uppc_state /* psm_state */
+ uppc_state, /* psm_state */
+ (int (*)(psm_cpu_request_t *))NULL /* psm_cpu_ops */
};
static struct psm_info uppc_info = {
- PSM_INFO_VER01_6, /* version */
+ PSM_INFO_VER01_7, /* version */
PSM_OWN_SYS_DEFAULT, /* ownership */
(struct psm_ops *)&uppc_ops, /* operation */
"uppc", /* machine name */
diff --git a/usr/src/uts/i86pc/ml/mpcore.s b/usr/src/uts/i86pc/ml/mpcore.s
index 721761f01d..25a943870e 100644
--- a/usr/src/uts/i86pc/ml/mpcore.s
+++ b/usr/src/uts/i86pc/ml/mpcore.s
@@ -22,9 +22,11 @@
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/asm_linkage.h>
#include <sys/asm_misc.h>
#include <sys/regset.h>
@@ -44,6 +46,7 @@
* - The GDT, IDT, ktss and page directory has been built for us
*
* Our actions:
+ * Start CPU:
* - We start using our GDT by loading correct values in the
* selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
* gs=KGS_SEL).
@@ -51,20 +54,30 @@
* - We load the default LDT into the hardware LDT register.
* - We load the default TSS into the hardware task register.
* - call mp_startup(void) indirectly through the T_PC
+ * Stop CPU:
+ * - Put CPU into halted state with interrupts disabled
*
*/
#if defined(__lint)
void
-real_mode_start(void)
+real_mode_start_cpu(void)
+{}
+
+void
+real_mode_stop_cpu_stage1(void)
+{}
+
+void
+real_mode_stop_cpu_stage2(void)
{}
#else /* __lint */
#if defined(__amd64)
- ENTRY_NP(real_mode_start)
+ ENTRY_NP(real_mode_start_cpu)
#if !defined(__GNUC_AS__)
@@ -229,8 +242,8 @@ long_mode_64:
pushq $KCS_SEL
pushq $kernel_cs_code
lretq
- .globl real_mode_end
-real_mode_end:
+ .globl real_mode_start_cpu_end
+real_mode_start_cpu_end:
nop
kernel_cs_code:
@@ -484,8 +497,8 @@ long_mode_64:
pushq $KCS_SEL
pushq $kernel_cs_code
lretq
- .globl real_mode_end
-real_mode_end:
+ .globl real_mode_start_cpu_end
+real_mode_start_cpu_end:
nop
kernel_cs_code:
@@ -575,11 +588,11 @@ kernel_cs_code:
int $20 /* whoops, returned somehow! */
#endif /* !__GNUC_AS__ */
- SET_SIZE(real_mode_start)
+ SET_SIZE(real_mode_start_cpu)
#elif defined(__i386)
- ENTRY_NP(real_mode_start)
+ ENTRY_NP(real_mode_start_cpu)
#if !defined(__GNUC_AS__)
@@ -621,8 +634,8 @@ pestart:
D16 pushl $KCS_SEL
D16 pushl $kernel_cs_code
D16 lret
- .globl real_mode_end
-real_mode_end:
+ .globl real_mode_start_cpu_end
+real_mode_start_cpu_end:
nop
.globl kernel_cs_code
@@ -714,8 +727,8 @@ pestart:
D16 pushl $KCS_SEL
D16 pushl $kernel_cs_code
D16 lret
- .globl real_mode_end
-real_mode_end:
+ .globl real_mode_start_cpu_end
+real_mode_start_cpu_end:
nop
.globl kernel_cs_code
kernel_cs_code:
@@ -767,7 +780,120 @@ kernel_cs_code:
int $20 /* whoops, returned somehow! */
#endif
- SET_SIZE(real_mode_start)
+ SET_SIZE(real_mode_start_cpu)
#endif /* __amd64 */
+
+#if defined(__amd64)
+
+ ENTRY_NP(real_mode_stop_cpu_stage1)
+
+#if !defined(__GNUC_AS__)
+
+ /*
+ * For vulcan as we need to do a .code32 and mentally invert the
+ * meaning of the addr16 and data16 prefixes to get 32-bit access when
+ * generating code to be executed in 16-bit mode (sigh...)
+ */
+ .code32
+ cli
+ movw %cs, %ax
+ movw %ax, %ds /* load cs into ds */
+ movw %ax, %ss /* and into ss */
+
+ /*
+ * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
+ */
+ movw $CPUHALTCODEOFF, %ax
+ .byte 0xff, 0xe0 /* jmp *%ax */
+
+#else /* __GNUC_AS__ */
+
+ /*
+ * NOTE: The GNU assembler automatically does the right thing to
+ * generate data size operand prefixes based on the code size
+ * generation mode (e.g. .code16, .code32, .code64) and as such
+ * prefixes need not be used on instructions EXCEPT in the case
+ * of address prefixes for code for which the reference is not
+ * automatically of the default operand size.
+ */
+ .code16
+ cli
+ movw %cs, %ax
+ movw %ax, %ds /* load cs into ds */
+ movw %ax, %ss /* and into ss */
+
+ /*
+ * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
+ */
+ movw $CPUHALTCODEOFF, %ax
+ jmp *%ax
+
+#endif /* !__GNUC_AS__ */
+
+ .globl real_mode_stop_cpu_stage1_end
+real_mode_stop_cpu_stage1_end:
+ nop
+
+ SET_SIZE(real_mode_stop_cpu_stage1)
+
+#elif defined(__i386)
+
+ ENTRY_NP(real_mode_stop_cpu_stage1)
+
+#if !defined(__GNUC_AS__)
+
+ cli
+ D16 movw %cs, %eax
+ movw %eax, %ds /* load cs into ds */
+ movw %eax, %ss /* and into ss */
+
+ /*
+ * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
+ */
+ movw $CPUHALTCODEOFF, %ax
+ .byte 0xff, 0xe0 /* jmp *%ax */
+
+#else /* __GNUC_AS__ */
+
+ cli
+ mov %cs, %ax
+ mov %eax, %ds /* load cs into ds */
+ mov %eax, %ss /* and into ss */
+
+ /*
+ * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
+ */
+ movw $CPUHALTCODEOFF, %ax
+ jmp *%ax
+
+#endif /* !__GNUC_AS__ */
+
+ .globl real_mode_stop_cpu_stage1_end
+real_mode_stop_cpu_stage1_end:
+ nop
+
+ SET_SIZE(real_mode_stop_cpu_stage1)
+
+#endif /* __amd64 */
+
+ ENTRY_NP(real_mode_stop_cpu_stage2)
+
+ movw $0xdead, %ax
+ movw %ax, CPUHALTEDOFF
+
+real_mode_stop_cpu_loop:
+ /*
+ * Put CPU into halted state.
+ * Only INIT, SMI, NMI could break the loop.
+ */
+ hlt
+ jmp real_mode_stop_cpu_loop
+
+ .globl real_mode_stop_cpu_stage2_end
+real_mode_stop_cpu_stage2_end:
+ nop
+
+ SET_SIZE(real_mode_stop_cpu_stage2)
+
#endif /* __lint */
diff --git a/usr/src/uts/i86pc/ml/offsets.in b/usr/src/uts/i86pc/ml/offsets.in
index f857617beb..029193b35e 100644
--- a/usr/src/uts/i86pc/ml/offsets.in
+++ b/usr/src/uts/i86pc/ml/offsets.in
@@ -295,6 +295,8 @@ rm_platter
rm_cpu CPUNOFF
rm_cr4 CR4OFF
rm_x86feature X86FEATURE
+ rm_cpu_halt_code CPUHALTCODEOFF
+ rm_cpu_halted CPUHALTEDOFF
ddi_acc_impl
ahi_acc_attr ACC_ATTR
diff --git a/usr/src/uts/i86pc/os/acpi_fw.h b/usr/src/uts/i86pc/os/acpi_fw.h
index 032e231f3f..54a34ef591 100644
--- a/usr/src/uts/i86pc/os/acpi_fw.h
+++ b/usr/src/uts/i86pc/os/acpi_fw.h
@@ -22,6 +22,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#ifndef _ACPI_FW_H
#define _ACPI_FW_H
@@ -161,6 +165,28 @@ struct slit {
*/
extern struct slit *slit_ptr;
+struct msct_proximity_domain {
+ uint8_t revision;
+ uint8_t length;
+ uint32_t domain_min;
+ uint32_t domain_max;
+ uint32_t processor_max;
+ uint64_t memory_max;
+};
+
+struct msct {
+ struct table_header hdr;
+ uint32_t proximity_domain_offset;
+ uint32_t maximum_proximity_domains;
+ uint32_t maximum_power_domains;
+ uint64_t maximum_physical_address;
+};
+
+/*
+ * Pointer to Maximum System Capability Table (MSCT)
+ */
+extern struct msct *msct_ptr;
+
struct cfg_base_addr_alloc {
uint64_t base_addr;
uint16_t segment;
diff --git a/usr/src/uts/i86pc/os/cmi.c b/usr/src/uts/i86pc/os/cmi.c
index f9d722f031..4d27eb61d3 100644
--- a/usr/src/uts/i86pc/os/cmi.c
+++ b/usr/src/uts/i86pc/os/cmi.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -100,6 +100,8 @@ typedef struct cmi {
} cmi_t;
static cmi_t *cmi_list;
+static const cmi_mc_ops_t *cmi_mc_global_ops;
+static void *cmi_mc_global_data;
static kmutex_t cmi_load_lock;
/*
@@ -107,6 +109,7 @@ static kmutex_t cmi_load_lock;
* interface.
*/
extern cmi_hdl_t cmi_hdl_create(enum cmi_hdl_class, uint_t, uint_t, uint_t);
+extern void cmi_hdl_destroy(cmi_hdl_t ophdl);
extern void cmi_hdl_setcmi(cmi_hdl_t, void *, void *);
extern void *cmi_hdl_getcmi(cmi_hdl_t);
extern void cmi_hdl_setmc(cmi_hdl_t, const struct cmi_mc_ops *, void *);
@@ -484,9 +487,8 @@ cmi_init(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
}
/*
- * cmi_fini is not called at the moment. It is intended to be called
- * on DR deconfigure of a cpu resource. It should not be called at
- * simple offline of a cpu.
+ * cmi_fini is called on DR deconfigure of a cpu resource.
+ * It should not be called at simple offline of a cpu.
*/
void
cmi_fini(cmi_hdl_t hdl)
@@ -499,7 +501,7 @@ cmi_fini(cmi_hdl_t hdl)
if (CMI_OP_PRESENT(cmi, cmi_fini))
CMI_OPS(cmi)->cmi_fini(hdl);
- cmi_hdl_rele(hdl); /* release hold obtained in cmi_hdl_create */
+ cmi_hdl_destroy(hdl);
}
/*
@@ -806,6 +808,21 @@ cmi_mc_register(cmi_hdl_t hdl, const cmi_mc_ops_t *mcops, void *mcdata)
cmi_hdl_setmc(hdl, mcops, mcdata);
}
+cmi_errno_t
+cmi_mc_register_global(const cmi_mc_ops_t *mcops, void *mcdata)
+{
+ if (!cmi_no_mca_init) {
+ if (cmi_mc_global_ops != NULL || cmi_mc_global_data != NULL ||
+ mcops == NULL || mcops->cmi_mc_patounum == NULL ||
+ mcops->cmi_mc_unumtopa == NULL) {
+ return (CMIERR_UNKNOWN);
+ }
+ cmi_mc_global_data = mcdata;
+ cmi_mc_global_ops = mcops;
+ }
+ return (CMI_SUCCESS);
+}
+
void
cmi_mc_sw_memscrub_disable(void)
{
@@ -820,8 +837,17 @@ cmi_mc_patounum(uint64_t pa, uint8_t valid_hi, uint8_t valid_lo, uint32_t synd,
cmi_hdl_t hdl;
cmi_errno_t rv;
- if (cmi_no_mca_init ||
- (hdl = cmi_hdl_any()) == NULL) /* short-term hold */
+ if (cmi_no_mca_init)
+ return (CMIERR_MC_ABSENT);
+
+ if (cmi_mc_global_ops != NULL) {
+ if (cmi_mc_global_ops->cmi_mc_patounum == NULL)
+ return (CMIERR_MC_NOTSUP);
+ return (cmi_mc_global_ops->cmi_mc_patounum(cmi_mc_global_data,
+ pa, valid_hi, valid_lo, synd, syndtype, up));
+ }
+
+ if ((hdl = cmi_hdl_any()) == NULL) /* short-term hold */
return (CMIERR_MC_ABSENT);
if ((mcops = cmi_hdl_getmcops(hdl)) == NULL ||
@@ -849,8 +875,17 @@ cmi_mc_unumtopa(mc_unum_t *up, nvlist_t *nvl, uint64_t *pap)
if (up != NULL && nvl != NULL)
return (CMIERR_API); /* convert from just one form */
- if (cmi_no_mca_init ||
- (hdl = cmi_hdl_any()) == NULL) /* short-term hold */
+ if (cmi_no_mca_init)
+ return (CMIERR_MC_ABSENT);
+
+ if (cmi_mc_global_ops != NULL) {
+ if (cmi_mc_global_ops->cmi_mc_unumtopa == NULL)
+ return (CMIERR_MC_NOTSUP);
+ return (cmi_mc_global_ops->cmi_mc_unumtopa(cmi_mc_global_data,
+ up, nvl, pap));
+ }
+
+ if ((hdl = cmi_hdl_any()) == NULL) /* short-term hold */
return (CMIERR_MC_ABSENT);
if ((mcops = cmi_hdl_getmcops(hdl)) == NULL ||
@@ -865,7 +900,7 @@ cmi_mc_unumtopa(mc_unum_t *up, nvlist_t *nvl, uint64_t *pap)
pap) == 0)) {
return (CMIERR_MC_PARTIALUNUMTOPA);
} else {
- return (mcops && mcops->cmi_mc_unumtopa ?
+ return (mcops && mcops->cmi_mc_unumtopa == NULL ?
CMIERR_MC_NOTSUP : CMIERR_MC_ABSENT);
}
}
@@ -882,10 +917,15 @@ cmi_mc_logout(cmi_hdl_t hdl, boolean_t ismc, boolean_t sync)
{
const struct cmi_mc_ops *mcops;
- if (cmi_no_mca_init || (mcops = cmi_hdl_getmcops(hdl)) == NULL)
+ if (cmi_no_mca_init)
return;
- if (mcops->cmi_mc_logout != NULL)
+ if (cmi_mc_global_ops != NULL)
+ mcops = cmi_mc_global_ops;
+ else
+ mcops = cmi_hdl_getmcops(hdl);
+
+ if (mcops != NULL && mcops->cmi_mc_logout != NULL)
mcops->cmi_mc_logout(hdl, ismc, sync);
}
diff --git a/usr/src/uts/i86pc/os/cmi_hw.c b/usr/src/uts/i86pc/os/cmi_hw.c
index 1c2dbeaad6..8e3ae0f0af 100644
--- a/usr/src/uts/i86pc/os/cmi_hw.c
+++ b/usr/src/uts/i86pc/os/cmi_hw.c
@@ -23,6 +23,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* CPU Module Interface - hardware abstraction.
@@ -94,6 +98,7 @@ typedef struct cmi_hdl_impl {
#define HDLOPS(hdl) ((hdl)->cmih_ops)
#define CMIH_F_INJACTV 0x1ULL
+#define CMIH_F_DEAD 0x2ULL
/*
* Ops structure for handle operations.
@@ -847,9 +852,18 @@ ntv_int(cmi_hdl_impl_t *hdl, int int_no)
static int
ntv_online(cmi_hdl_impl_t *hdl, int new_status, int *old_status)
{
+ int rc;
processorid_t cpuid = HDLPRIV(hdl)->cpu_id;
- return (p_online_internal(cpuid, new_status, old_status));
+ while (mutex_tryenter(&cpu_lock) == 0) {
+ if (hdl->cmih_flags & CMIH_F_DEAD)
+ return (EBUSY);
+ delay(1);
+ }
+ rc = p_online_internal_locked(cpuid, new_status, old_status);
+ mutex_exit(&cpu_lock);
+
+ return (rc);
}
#else /* __xpv */
@@ -1387,15 +1401,32 @@ void
cmi_hdl_rele(cmi_hdl_t ophdl)
{
cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
- cmi_hdl_ent_t *ent;
ASSERT(*hdl->cmih_refcntp > 0);
+ (void) atomic_dec_32_nv(hdl->cmih_refcntp);
+}
- if (atomic_dec_32_nv(hdl->cmih_refcntp) > 0)
- return;
+void
+cmi_hdl_destroy(cmi_hdl_t ophdl)
+{
+ cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
+ cmi_hdl_ent_t *ent;
+
+ /* Release the reference count held by cmi_hdl_create(). */
+ ASSERT(*hdl->cmih_refcntp > 0);
+ (void) atomic_dec_32_nv(hdl->cmih_refcntp);
+ hdl->cmih_flags |= CMIH_F_DEAD;
ent = cmi_hdl_ent_lookup(hdl->cmih_chipid, hdl->cmih_coreid,
hdl->cmih_strandid);
+ /*
+ * Use busy polling instead of condition variable here because
+ * cmi_hdl_rele() may be called from #MC handler.
+ */
+ while (cmi_hdl_canref(ent)) {
+ cmi_hdl_rele(ophdl);
+ delay(1);
+ }
ent->cmae_hdlp = NULL;
kmem_free(hdl, sizeof (*hdl));
@@ -1628,6 +1659,33 @@ cmi_ntv_hwstrandid(cpu_t *cp)
return (cpuid_get_clogid(cp) % strands_per_core);
}
+
+static void
+cmi_ntv_hwdisable_mce_xc(void)
+{
+ ulong_t cr4;
+
+ cr4 = getcr4();
+ cr4 = cr4 & (~CR4_MCE);
+ setcr4(cr4);
+}
+
+void
+cmi_ntv_hwdisable_mce(cmi_hdl_t hdl)
+{
+ cpuset_t set;
+ cmi_hdl_impl_t *thdl = IMPLHDL(hdl);
+ cpu_t *cp = HDLPRIV(thdl);
+
+ if (CPU->cpu_id == cp->cpu_id) {
+ cmi_ntv_hwdisable_mce_xc();
+ } else {
+ CPUSET_ONLY(set, cp->cpu_id);
+ xc_call(NULL, NULL, NULL, CPUSET2BV(set),
+ (xc_func_t)cmi_ntv_hwdisable_mce_xc);
+ }
+}
+
#endif /* __xpv */
void
diff --git a/usr/src/uts/i86pc/os/cms.c b/usr/src/uts/i86pc/os/cms.c
index 18d3b046f6..1fe03d3f30 100644
--- a/usr/src/uts/i86pc/os/cms.c
+++ b/usr/src/uts/i86pc/os/cms.c
@@ -23,6 +23,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/cpu_module_ms_impl.h>
@@ -421,9 +425,19 @@ void
cms_fini(cmi_hdl_t hdl)
{
cms_t *cms = HDL2CMS(hdl);
+ struct cms_ctl *cdp;
if (CMS_OP_PRESENT(cms, cms_fini))
CMS_OPS(cms)->cms_fini(hdl);
+
+ mutex_enter(&cms_load_lock);
+ cdp = (struct cms_ctl *)cmi_hdl_getspecific(hdl);
+ if (cdp != NULL) {
+ if (cdp->cs_cms != NULL)
+ cms_rele(cdp->cs_cms);
+ kmem_free(cdp, sizeof (*cdp));
+ }
+ mutex_exit(&cms_load_lock);
}
boolean_t
diff --git a/usr/src/uts/i86pc/os/cpr_impl.c b/usr/src/uts/i86pc/os/cpr_impl.c
index 2599d3530a..103955a097 100644
--- a/usr/src/uts/i86pc/os/cpr_impl.c
+++ b/usr/src/uts/i86pc/os/cpr_impl.c
@@ -337,7 +337,7 @@ i_cpr_pre_resume_cpus()
CPUSET_ONLY(cpu_ready_set, boot_cpuid);
CPUSET_ONLY(procset, boot_cpuid);
- for (who = 0; who < ncpus; who++) {
+ for (who = 0; who < max_ncpus; who++) {
wc_cpu_t *cpup = wc_other_cpus + who;
wc_desctbr_t gdt;
@@ -363,7 +363,10 @@ i_cpr_pre_resume_cpus()
init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
- if ((err = mach_cpuid_start(who, rm_platter_va)) != 0) {
+ mutex_enter(&cpu_lock);
+ err = mach_cpuid_start(who, rm_platter_va);
+ mutex_exit(&cpu_lock);
+ if (err != 0) {
cmn_err(CE_WARN, "cpu%d: failed to start during "
"suspend/resume error %d", who, err);
continue;
@@ -481,10 +484,13 @@ prt_other_cpus()
return;
}
- for (who = 0; who < ncpus; who++) {
+ for (who = 0; who < max_ncpus; who++) {
wc_cpu_t *cpup = wc_other_cpus + who;
+ if (!CPU_IN_SET(mp_cpus, who))
+ continue;
+
PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
"idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
AFMT ", sp=%lx\n", who,
@@ -1009,7 +1015,7 @@ i_cpr_alloc_cpus(void)
*/
if (wc_other_cpus == NULL) {
- wc_other_cpus = kmem_zalloc(ncpus * sizeof (wc_cpu_t),
+ wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t),
KM_SLEEP);
}
@@ -1022,7 +1028,7 @@ i_cpr_free_cpus(void)
wc_cpu_t *wc_cpu;
if (wc_other_cpus != NULL) {
- for (index = 0; index < ncpus; index++) {
+ for (index = 0; index < max_ncpus; index++) {
wc_cpu = wc_other_cpus + index;
if (wc_cpu->wc_saved_stack != NULL) {
kmem_free(wc_cpu->wc_saved_stack,
@@ -1030,7 +1036,8 @@ i_cpr_free_cpus(void)
}
}
- kmem_free((void *) wc_other_cpus, ncpus * sizeof (wc_cpu_t));
+ kmem_free((void *) wc_other_cpus,
+ max_ncpus * sizeof (wc_cpu_t));
wc_other_cpus = NULL;
}
}
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 8a14fccaa8..d47d680939 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -430,6 +430,9 @@ uint32_t cpuid_feature_ecx_exclude;
uint32_t cpuid_feature_edx_include;
uint32_t cpuid_feature_edx_exclude;
+/*
+ * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
+ */
void
cpuid_alloc_space(cpu_t *cpu)
{
@@ -439,6 +442,7 @@ cpuid_alloc_space(cpu_t *cpu)
* their cpuid_info struct allocated here.
*/
ASSERT(cpu->cpu_id != 0);
+ ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
cpu->cpu_m.mcpu_cpi =
kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
}
@@ -449,7 +453,8 @@ cpuid_free_space(cpu_t *cpu)
struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
int i;
- ASSERT(cpu->cpu_id != 0);
+ ASSERT(cpi != NULL);
+ ASSERT(cpi != &cpuid_info0);
/*
* Free up any function 4 related dynamic storage
@@ -460,7 +465,8 @@ cpuid_free_space(cpu_t *cpu)
kmem_free(cpi->cpi_std_4,
cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
- kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi));
+ kmem_free(cpi, sizeof (*cpi));
+ cpu->cpu_m.mcpu_cpi = NULL;
}
#if !defined(__xpv)
@@ -721,9 +727,9 @@ cpuid_pass1(cpu_t *cpu)
determine_platform();
#endif
/*
- * Space statically allocated for cpu0, ensure pointer is set
+ * Space statically allocated for BSP, ensure pointer is set
*/
- if (cpu->cpu_id == 0)
+ if (cpu->cpu_id == 0 && cpu->cpu_m.mcpu_cpi == NULL)
cpu->cpu_m.mcpu_cpi = &cpuid_info0;
cpi = cpu->cpu_m.mcpu_cpi;
ASSERT(cpi != NULL);
@@ -3977,9 +3983,9 @@ cpuid_mwait_alloc(cpu_t *cpu)
uint32_t *ret;
size_t mwait_size;
- ASSERT(cpuid_checkpass(cpu, 2));
+ ASSERT(cpuid_checkpass(CPU, 2));
- mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
+ mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
if (mwait_size == 0)
return (NULL);
@@ -4016,7 +4022,9 @@ cpuid_mwait_alloc(cpu_t *cpu)
void
cpuid_mwait_free(cpu_t *cpu)
{
- ASSERT(cpuid_checkpass(cpu, 2));
+ if (cpu->cpu_m.mcpu_cpi == NULL) {
+ return;
+ }
if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
diff --git a/usr/src/uts/i86pc/os/cpupm/cpu_acpi.c b/usr/src/uts/i86pc/os/cpupm/cpu_acpi.c
index a092046071..896b110e0e 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpu_acpi.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpu_acpi.c
@@ -733,7 +733,8 @@ cpu_acpi_cache_cst(cpu_acpi_handle_t handle)
CPU_ACPI_CSTATES_COUNT(handle) = (uint32_t)cnt;
alloc_size = CPU_ACPI_CSTATES_SIZE(cnt);
- CPU_ACPI_CSTATES(handle) = kmem_zalloc(alloc_size, KM_SLEEP);
+ if (CPU_ACPI_CSTATES(handle) == NULL)
+ CPU_ACPI_CSTATES(handle) = kmem_zalloc(alloc_size, KM_SLEEP);
cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
p = cstate;
diff --git a/usr/src/uts/i86pc/os/cpupm/cpu_idle.c b/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
index 3dd75f00a5..483e8865c7 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -47,6 +47,8 @@
#define CSTATE_USING_HPET 1
#define CSTATE_USING_LAT 2
+#define CPU_IDLE_STOP_TIMEOUT 1000
+
extern void cpu_idle_adaptive(void);
extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data,
cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start);
@@ -677,7 +679,7 @@ cpu_idle_init(cpu_t *cp)
/*
* Allocate, initialize and install cstate kstat
*/
- cstate->cs_ksp = kstat_create("cstate", CPU->cpu_id,
+ cstate->cs_ksp = kstat_create("cstate", cp->cpu_id,
name, "misc",
KSTAT_TYPE_NAMED,
sizeof (cpu_idle_kstat) / sizeof (kstat_named_t),
@@ -692,8 +694,8 @@ cpu_idle_init(cpu_t *cp)
cstate->cs_ksp->ks_data_size += MAXNAMELEN;
cstate->cs_ksp->ks_private = cstate;
kstat_install(cstate->cs_ksp);
- cstate++;
}
+ cstate++;
}
cpupm_alloc_domains(cp, CPUPM_C_STATES);
@@ -771,6 +773,19 @@ cpu_idle_fini(cpu_t *cp)
mutex_exit(&cpu_idle_callb_mutex);
}
+/*
+ * This function is introduced here to solve a race condition
+ * between the master and the slave to touch c-state data structure.
+ * After the slave calls this idle function to switch to the non
+ * deep idle function, the master can go on to reclaim the resource.
+ */
+static void
+cpu_idle_stop_sync(void)
+{
+ /* switch to the non deep idle function */
+ CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
+}
+
static void
cpu_idle_stop(cpu_t *cp)
{
@@ -778,15 +793,28 @@ cpu_idle_stop(cpu_t *cp)
(cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
cpu_acpi_cstate_t *cstate;
- uint_t cpu_max_cstates, i;
+ uint_t cpu_max_cstates, i = 0;
- /*
- * place the CPUs in a safe place so that we can disable
- * deep c-state on them.
- */
- pause_cpus(NULL);
- cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
- start_cpus();
+ mutex_enter(&cpu_idle_callb_mutex);
+ if (idle_cpu == cpu_idle_adaptive) {
+ /*
+ * invoke the slave to call synchronous idle function.
+ */
+ cp->cpu_m.mcpu_idle_cpu = cpu_idle_stop_sync;
+ poke_cpu(cp->cpu_id);
+
+ /*
+ * wait until the slave switchs to non deep idle function,
+ * so that the master is safe to go on to reclaim the resource.
+ */
+ while (cp->cpu_m.mcpu_idle_cpu != non_deep_idle_cpu) {
+ drv_usecwait(10);
+ if ((++i % CPU_IDLE_STOP_TIMEOUT) == 0)
+ cmn_err(CE_NOTE, "!cpu_idle_stop: the slave"
+ " idle stop timeout");
+ }
+ }
+ mutex_exit(&cpu_idle_callb_mutex);
cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
if (cstate) {
@@ -937,7 +965,6 @@ cpuidle_cstate_instance(cpu_t *cp)
cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state"
" object Instance: %d", cpu_id);
}
- mutex_enter(&cpu_lock);
mcpu = &(cp->cpu_m);
mcpu->max_cstates = cpu_acpi_get_max_cstates(handle);
if (mcpu->max_cstates > CPU_ACPI_C1) {
@@ -950,7 +977,6 @@ cpuidle_cstate_instance(cpu_t *cp)
cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
(void) cstate_timer_callback(CST_EVENT_ONE_CSTATE);
}
- mutex_exit(&cpu_lock);
CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result);
} while (result < 0);
diff --git a/usr/src/uts/i86pc/os/cpupm/cpupm_intel.c b/usr/src/uts/i86pc/os/cpupm/cpupm_intel.c
index 04113a9485..66b2acc7bf 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpupm_intel.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpupm_intel.c
@@ -93,8 +93,8 @@ cpupm_intel_init(cpu_t *cp)
if (x86_vendor != X86_VENDOR_Intel)
return (B_FALSE);
- family = cpuid_getfamily(CPU);
- model = cpuid_getmodel(CPU);
+ family = cpuid_getfamily(cp);
+ model = cpuid_getmodel(cp);
cpupm_intel_pdccap = CPUPM_INTEL_PDC_MP;
diff --git a/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c b/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c
index 78ad028890..72302fcf07 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c
@@ -240,7 +240,7 @@ cpupm_init(cpu_t *cp)
if (mcpu->max_cstates > CPU_ACPI_C1) {
(void) cstate_timer_callback(
CST_EVENT_MULTIPLE_CSTATES);
- CPU->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
+ cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
mcpu->mcpu_idle_type = CPU_ACPI_C1;
disp_enq_thread = cstate_wakeup;
} else {
@@ -439,9 +439,13 @@ cpupm_alloc_domains(cpu_t *cp, int state)
domain = CPU_ACPI_PSD(handle).sd_domain;
type = CPU_ACPI_PSD(handle).sd_type;
} else {
- mutex_enter(&cpu_lock);
- domain = cpuid_get_chipid(cp);
- mutex_exit(&cpu_lock);
+ if (MUTEX_HELD(&cpu_lock)) {
+ domain = cpuid_get_chipid(cp);
+ } else {
+ mutex_enter(&cpu_lock);
+ domain = cpuid_get_chipid(cp);
+ mutex_exit(&cpu_lock);
+ }
type = CPU_ACPI_HW_ALL;
}
dom_ptr = &cpupm_pstate_domains;
@@ -452,9 +456,13 @@ cpupm_alloc_domains(cpu_t *cp, int state)
domain = CPU_ACPI_TSD(handle).sd_domain;
type = CPU_ACPI_TSD(handle).sd_type;
} else {
- mutex_enter(&cpu_lock);
- domain = cpuid_get_chipid(cp);
- mutex_exit(&cpu_lock);
+ if (MUTEX_HELD(&cpu_lock)) {
+ domain = cpuid_get_chipid(cp);
+ } else {
+ mutex_enter(&cpu_lock);
+ domain = cpuid_get_chipid(cp);
+ mutex_exit(&cpu_lock);
+ }
type = CPU_ACPI_HW_ALL;
}
dom_ptr = &cpupm_tstate_domains;
@@ -465,9 +473,13 @@ cpupm_alloc_domains(cpu_t *cp, int state)
domain = CPU_ACPI_CSD(handle).sd_domain;
type = CPU_ACPI_CSD(handle).sd_type;
} else {
- mutex_enter(&cpu_lock);
- domain = cpuid_get_coreid(cp);
- mutex_exit(&cpu_lock);
+ if (MUTEX_HELD(&cpu_lock)) {
+ domain = cpuid_get_coreid(cp);
+ } else {
+ mutex_enter(&cpu_lock);
+ domain = cpuid_get_coreid(cp);
+ mutex_exit(&cpu_lock);
+ }
type = CPU_ACPI_HW_ALL;
}
dom_ptr = &cpupm_cstate_domains;
diff --git a/usr/src/uts/i86pc/os/fakebop.c b/usr/src/uts/i86pc/os/fakebop.c
index 1c23b7643c..d38bcb046f 100644
--- a/usr/src/uts/i86pc/os/fakebop.c
+++ b/usr/src/uts/i86pc/os/fakebop.c
@@ -23,6 +23,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* This file contains the functionality that mimics the boot operations
@@ -40,6 +44,7 @@
#include <sys/varargs.h>
#include <sys/param.h>
#include <sys/machparam.h>
+#include <sys/machsystm.h>
#include <sys/archsystm.h>
#include <sys/boot_console.h>
#include <sys/cmn_err.h>
@@ -151,12 +156,13 @@ char fastreboot_onpanic_cmdline[FASTBOOT_SAVED_CMDLINE_LEN];
static const char fastreboot_onpanic_args[] = " -B fastreboot_onpanic=0";
/*
- * Pointers to where System Resource Affinity Table (SRAT) and
- * System Locality Information Table (SLIT) are mapped into virtual memory
+ * Pointers to where System Resource Affinity Table (SRAT), System Locality
+ * Information Table (SLIT) and Maximum System Capability Table (MSCT)
+ * are mapped into virtual memory
*/
struct srat *srat_ptr = NULL;
struct slit *slit_ptr = NULL;
-
+struct msct *msct_ptr = NULL;
/*
* Allocate aligned physical memory at boot time. This allocator allocates
@@ -2077,6 +2083,7 @@ process_madt(struct madt *tp)
{
struct madt_processor *cpu, *end;
uint32_t cpu_count = 0;
+ uint32_t cpu_possible_count = 0;
uint8_t cpu_apicid_array[UINT8_MAX + 1];
if (tp != NULL) {
@@ -2094,6 +2101,7 @@ process_madt(struct madt *tp)
cpu->apic_id;
cpu_count++;
}
+ cpu_possible_count++;
}
cpu = (struct madt_processor *)
@@ -2109,17 +2117,44 @@ process_madt(struct madt *tp)
}
/*
+ * Check whehter property plat-max-ncpus is already set.
+ */
+ if (do_bsys_getproplen(NULL, PLAT_MAX_NCPUS_NAME) < 0) {
+ /*
+ * Set plat-max-ncpus to number of maximum possible CPUs given
+ * in MADT if it hasn't been set.
+ * There's no formal way to detect max possible CPUs supported
+ * by platform according to ACPI spec3.0b. So current CPU
+ * hotplug implementation expects that all possible CPUs will
+ * have an entry in MADT table and set plat-max-ncpus to number
+ * of entries in MADT.
+ * With introducing of ACPI4.0, Maximum System Capability Table
+ * (MSCT) provides maximum number of CPUs supported by platform.
+ * If MSCT is unavailable, fall back to old way.
+ */
+ if (tp != NULL)
+ bsetpropsi(PLAT_MAX_NCPUS_NAME, cpu_possible_count);
+ }
+
+ /*
+ * Set boot property boot-max-ncpus to number of CPUs existing at
+ * boot time. boot-max-ncpus is mainly used for optimization.
+ */
+ if (tp != NULL)
+ bsetpropsi(BOOT_MAX_NCPUS_NAME, cpu_count);
+
+ /*
* User-set boot-ncpus overrides firmware count
*/
- if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
+ if (do_bsys_getproplen(NULL, BOOT_NCPUS_NAME) >= 0)
return;
/*
- * Set boot property for boot-ncpus to number of CPUs given in MADT
- * if user hasn't set the property already
+ * Set boot property boot-ncpus to number of active CPUs given in MADT
+ * if it hasn't been set yet.
*/
if (tp != NULL)
- bsetpropsi("boot-ncpus", cpu_count);
+ bsetpropsi(BOOT_NCPUS_NAME, cpu_count);
}
static void
@@ -2146,6 +2181,7 @@ process_srat(struct srat *tp)
} memory;
#pragma pack()
char prop_name[30];
+ uint64_t maxmem = 0;
if (tp == NULL)
return;
@@ -2181,6 +2217,10 @@ process_srat(struct srat *tp)
mem_num);
bsetprop(prop_name, strlen(prop_name), &memory,
sizeof (memory));
+ if ((item->i.m.flags & SRAT_HOT_PLUG) &&
+ (memory.addr + memory.length > maxmem)) {
+ maxmem = memory.addr + memory.length;
+ }
mem_num++;
break;
case SRAT_X2APIC:
@@ -2199,6 +2239,14 @@ process_srat(struct srat *tp)
item = (struct srat_item *)
(item->len + (caddr_t)item);
}
+
+ /*
+ * The maximum physical address calculated from the SRAT table is more
+ * accurate than that calculated from the MSCT table.
+ */
+ if (maxmem != 0) {
+ plat_dr_physmax = btop(maxmem);
+ }
}
static void
@@ -2224,6 +2272,99 @@ process_slit(struct slit *tp)
tp->number * tp->number);
}
+static struct msct *
+process_msct(struct msct *tp)
+{
+ int last_seen = 0;
+ int proc_num = 0;
+ struct msct_proximity_domain *item, *end;
+ extern uint64_t plat_dr_options;
+
+ ASSERT(tp != NULL);
+
+ end = (void *)(tp->hdr.len + (uintptr_t)tp);
+ for (item = (void *)((uintptr_t)tp + tp->proximity_domain_offset);
+ item < end;
+ item = (void *)(item->length + (uintptr_t)item)) {
+ /*
+ * Sanity check according to section 5.2.19.1 of ACPI 4.0.
+ * Revision 1
+ * Length 22
+ */
+ if (item->revision != 1 || item->length != 22) {
+ cmn_err(CE_CONT,
+ "?boot: unknown proximity domain structure in MSCT "
+ "with rev(%d), len(%d).\n",
+ (int)item->revision, (int)item->length);
+ return (NULL);
+ } else if (item->domain_min > item->domain_max) {
+ cmn_err(CE_CONT,
+ "?boot: invalid proximity domain structure in MSCT "
+ "with domain_min(%u), domain_max(%u).\n",
+ item->domain_min, item->domain_max);
+ return (NULL);
+ } else if (item->domain_min != last_seen) {
+ /*
+ * Items must be organized in ascending order of the
+ * proximity domain enumerations.
+ */
+ cmn_err(CE_CONT,
+ "?boot: invalid proximity domain structure in MSCT,"
+ " items are not orginized in ascending order.\n");
+ return (NULL);
+ }
+
+ /*
+ * If processor_max is 0 then there would be no CPUs in this
+ * domain.
+ */
+ if (item->processor_max != 0) {
+ proc_num += (item->domain_max - item->domain_min + 1) *
+ item->processor_max;
+ }
+
+ last_seen = item->domain_max - item->domain_min + 1;
+ /*
+ * Break out if all proximity domains have been processed.
+ * Some BIOSes may have unused items at the end of MSCT table.
+ */
+ if (last_seen > tp->maximum_proximity_domains) {
+ break;
+ }
+ }
+ if (last_seen != tp->maximum_proximity_domains + 1) {
+ cmn_err(CE_CONT,
+ "?boot: invalid proximity domain structure in MSCT, "
+ "proximity domain count doesn't match.\n");
+ return (NULL);
+ }
+
+ /*
+ * Set plat-max-ncpus property if it hasn't been set yet.
+ */
+ if (do_bsys_getproplen(NULL, PLAT_MAX_NCPUS_NAME) < 0) {
+ if (proc_num != 0) {
+ bsetpropsi(PLAT_MAX_NCPUS_NAME, proc_num);
+ }
+ }
+
+ /*
+ * Use Maximum Physical Address from the MSCT table as upper limit for
+ * memory hot-adding by default. It may be overridden by value from
+ * the SRAT table or the "plat-dr-physmax" boot option.
+ */
+ plat_dr_physmax = btop(tp->maximum_physical_address + 1);
+
+ /*
+ * Existence of MSCT implies CPU/memory hotplug-capability for the
+ * platform.
+ */
+ plat_dr_options |= PLAT_DR_FEATURE_CPU;
+ plat_dr_options |= PLAT_DR_FEATURE_MEMORY;
+
+ return (tp);
+}
+
#else /* __xpv */
static void
enumerate_xen_cpus()
@@ -2233,7 +2374,7 @@ enumerate_xen_cpus()
/*
* User-set boot-ncpus overrides enumeration
*/
- if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
+ if (do_bsys_getproplen(NULL, BOOT_NCPUS_NAME) >= 0)
return;
/*
@@ -2246,7 +2387,7 @@ enumerate_xen_cpus()
if (HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL) == 0)
max_id = id;
- bsetpropsi("boot-ncpus", max_id+1);
+ bsetpropsi(BOOT_NCPUS_NAME, max_id+1);
}
#endif /* __xpv */
@@ -2257,6 +2398,9 @@ build_firmware_properties(void)
struct table_header *tp = NULL;
#ifndef __xpv
+ if ((msct_ptr = (struct msct *)find_fw_table("MSCT")) != NULL)
+ msct_ptr = process_msct(msct_ptr);
+
if ((tp = find_fw_table("APIC")) != NULL)
process_madt((struct madt *)tp);
diff --git a/usr/src/uts/i86pc/os/lgrpplat.c b/usr/src/uts/i86pc/os/lgrpplat.c
index 1525b7859d..02596478c0 100644
--- a/usr/src/uts/i86pc/os/lgrpplat.c
+++ b/usr/src/uts/i86pc/os/lgrpplat.c
@@ -23,6 +23,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* LOCALITY GROUP (LGROUP) PLATFORM SUPPORT FOR X86/AMD64 PLATFORMS
@@ -88,14 +92,18 @@
* - lgrp_plat_lat_stats.latencies[][] Table of latencies between same and
* different nodes indexed by node ID
*
- * - lgrp_plat_node_cnt Number of NUMA nodes in system
+ * - lgrp_plat_node_cnt Number of NUMA nodes in system for
+ * non-DR-capable systems,
+ * maximum possible number of NUMA nodes
+ * in system for DR capable systems.
*
* - lgrp_plat_node_domain[] Node ID to proximity domain ID mapping
* table indexed by node ID (only used
* for SRAT)
*
- * - lgrp_plat_node_memory[] Table with physical address range for
- * each node indexed by node ID
+ * - lgrp_plat_memnode_info[] Table with physical address range for
+ * each memory node indexed by memory node
+ * ID
*
* The code is implemented to make the following always be true:
*
@@ -113,26 +121,41 @@
* equivalent node ID since we want to keep the node IDs numbered from 0 to
* <number of nodes - 1> to minimize cost of searching and potentially space.
*
- * The code below really tries to do the above. However, the virtual memory
- * system expects the memnodes which describe the physical address range for
- * each NUMA node to be arranged in ascending order by physical address. (:-(
- * Otherwise, the kernel will panic in different semi-random places in the VM
- * system.
+ * With the introduction of support of memory DR operations on x86 platforms,
+ * things get a little complicated. The addresses of hot-added memory may not
+ * be continuous with other memory connected to the same lgrp node. In other
+ * words, memory addresses may get interleaved among lgrp nodes after memory
+ * DR operations. To work around this limitation, we have extended the
+ * relationship between lgrp node and memory node from 1:1 map to 1:N map,
+ * that means there may be multiple memory nodes associated with a lgrp node
+ * after memory DR operations.
*
- * Consequently, this module has to try to sort the nodes in ascending order by
- * each node's starting physical address to try to meet this "constraint" in
- * the VM system (see lgrp_plat_node_sort()). Also, the lowest numbered
- * proximity domain ID in the system is deteremined and used to make the lowest
- * numbered proximity domain map to node 0 in hopes that the proximity domains
- * are sorted in ascending order by physical address already even if their IDs
- * don't start at 0 (see NODE_DOMAIN_HASH() and lgrp_plat_srat_domains()).
- * Finally, it is important to note that these workarounds may not be
- * sufficient if/when memory hotplugging is supported and the VM system may
- * ultimately need to be fixed to handle this....
+ * To minimize the code changes to support memory DR operations, the
+ * following policies have been adopted.
+ * 1) On non-DR-capable systems, the relationship among lgroup platform handle,
+ * node ID and memnode ID is still kept as:
+ * lgroup platform handle == node ID == memnode ID
+ * 2) For memory present at boot time on DR capable platforms, the relationship
+ * is still kept as is.
+ * lgroup platform handle == node ID == memnode ID
+ * 3) For hot-added memory, the relationship between lgrp ID and memnode ID have
+ * been changed from 1:1 map to 1:N map. Memnode IDs [0 - lgrp_plat_node_cnt)
+ * are reserved for memory present at boot time, and memnode IDs
+ * [lgrp_plat_node_cnt, max_mem_nodes) are used to dynamically allocate
+ * memnode ID for hot-added memory.
+ * 4) All boot code having the assumption "node ID == memnode ID" can live as
+ * is, that's because node ID is always equal to memnode ID at boot time.
+ * 5) The lgrp_plat_memnode_info_update(), plat_pfn_to_mem_node() and
+ * lgrp_plat_mem_size() related logics have been enhanced to deal with
+ * the 1:N map relationship.
+ * 6) The latency probing related logics, which have the assumption
+ * "node ID == memnode ID" and may be called at run time, is disabled if
+ * memory DR operation is enabled.
*/
#include <sys/archsystm.h> /* for {in,out}{b,w,l}() */
+#include <sys/atomic.h>
#include <sys/bootconf.h>
#include <sys/cmn_err.h>
#include <sys/controlregs.h>
@@ -143,6 +166,7 @@
#include <sys/memlist.h>
#include <sys/memnode.h>
#include <sys/mman.h>
+#include <sys/note.h>
#include <sys/pci_cfgspace.h>
#include <sys/pci_impl.h>
#include <sys/param.h>
@@ -158,7 +182,8 @@
#include <vm/seg_kmem.h>
#include <vm/vm_dep.h>
-#include "acpi_fw.h" /* for SRAT and SLIT */
+#include <sys/acpidev.h>
+#include "acpi_fw.h" /* for SRAT, SLIT and MSCT */
#define MAX_NODES 8
@@ -187,7 +212,6 @@
((lgrp_plat_prox_domain_min == UINT32_MAX) ? (domain) % node_cnt : \
((domain) - lgrp_plat_prox_domain_min) % node_cnt)
-
/*
* CPU to node ID mapping structure (only used with SRAT)
*/
@@ -239,14 +263,16 @@ typedef struct node_domain_map {
} node_domain_map_t;
/*
- * Node ID and starting and ending page for physical memory in node
+ * Node ID and starting and ending page for physical memory in memory node
*/
-typedef struct node_phys_addr_map {
+typedef struct memnode_phys_addr_map {
pfn_t start;
pfn_t end;
int exists;
uint32_t prox_domain;
-} node_phys_addr_map_t;
+ uint32_t device_id;
+ uint_t lgrphand;
+} memnode_phys_addr_map_t;
/*
* Number of CPUs for which we got APIC IDs
@@ -278,7 +304,7 @@ static node_domain_map_t lgrp_plat_node_domain[MAX_NODES];
/*
* Physical address range for memory in each node
*/
-static node_phys_addr_map_t lgrp_plat_node_memory[MAX_NODES];
+static memnode_phys_addr_map_t lgrp_plat_memnode_info[MAX_MEM_NODES];
/*
* Statistics gotten from probing
@@ -306,6 +332,17 @@ static int lgrp_plat_srat_error = 0;
static int lgrp_plat_slit_error = 0;
/*
+ * Whether lgrp topology has been flattened to 2 levels.
+ */
+static int lgrp_plat_topo_flatten = 0;
+
+
+/*
+ * Maximum memory node ID in use.
+ */
+static uint_t lgrp_plat_max_mem_node;
+
+/*
* Allocate lgroup array statically
*/
static lgrp_t lgrp_space[NLGRP];
@@ -318,7 +355,7 @@ static int nlgrps_alloc;
int lgrp_plat_domain_min_enable = 1;
/*
- * Number of nodes in system
+ * Maximum possible number of nodes in system
*/
uint_t lgrp_plat_node_cnt = 1;
@@ -343,11 +380,12 @@ int lgrp_plat_probe_nsamples = LGRP_PLAT_PROBE_NSAMPLES;
int lgrp_plat_probe_nreads = LGRP_PLAT_PROBE_NREADS;
/*
- * Enable use of ACPI System Resource Affinity Table (SRAT) and System
- * Locality Information Table (SLIT)
+ * Enable use of ACPI System Resource Affinity Table (SRAT), System
+ * Locality Information Table (SLIT) and Maximum System Capability Table (MSCT)
*/
int lgrp_plat_srat_enable = 1;
int lgrp_plat_slit_enable = 1;
+int lgrp_plat_msct_enable = 1;
/*
* mnode_xwa: set to non-zero value to initiate workaround if large pages are
@@ -370,12 +408,10 @@ struct lgrp_stats lgrp_stats[NLGRP];
*/
void plat_build_mem_nodes(struct memlist *list);
-int plat_lgrphand_to_mem_node(lgrp_handle_t hand);
+int plat_mnode_xcheck(pfn_t pfncnt);
lgrp_handle_t plat_mem_node_to_lgrphand(int mnode);
-int plat_mnode_xcheck(pfn_t pfncnt);
-
int plat_pfn_to_mem_node(pfn_t pfn);
/*
@@ -420,11 +456,11 @@ static int lgrp_plat_domain_to_node(node_domain_map_t *node_domain,
static void lgrp_plat_get_numa_config(void);
-static void lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
+static void lgrp_plat_latency_adjust(memnode_phys_addr_map_t *memnode_info,
lgrp_plat_latency_stats_t *lat_stats,
lgrp_plat_probe_stats_t *probe_stats);
-static int lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
+static int lgrp_plat_latency_verify(memnode_phys_addr_map_t *memnode_info,
lgrp_plat_latency_stats_t *lat_stats);
static void lgrp_plat_main_init(void);
@@ -434,13 +470,13 @@ static pgcnt_t lgrp_plat_mem_size_default(lgrp_handle_t, lgrp_mem_query_t);
static int lgrp_plat_node_domain_update(node_domain_map_t *node_domain,
int node_cnt, uint32_t domain);
-static int lgrp_plat_node_memory_update(node_domain_map_t *node_domain,
- int node_cnt, node_phys_addr_map_t *node_memory, uint64_t start,
- uint64_t end, uint32_t domain);
+static int lgrp_plat_memnode_info_update(node_domain_map_t *node_domain,
+ int node_cnt, memnode_phys_addr_map_t *memnode_info, int memnode_cnt,
+ uint64_t start, uint64_t end, uint32_t domain, uint32_t device_id);
static void lgrp_plat_node_sort(node_domain_map_t *node_domain,
int node_cnt, cpu_node_map_t *cpu_node, int cpu_count,
- node_phys_addr_map_t *node_memory);
+ memnode_phys_addr_map_t *memnode_info);
static hrtime_t lgrp_plat_probe_time(int to, cpu_node_map_t *cpu_node,
int cpu_node_nentries, lgrp_plat_probe_mem_config_t *probe_mem_config,
@@ -448,24 +484,32 @@ static hrtime_t lgrp_plat_probe_time(int to, cpu_node_map_t *cpu_node,
static int lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node);
-static int lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
- node_phys_addr_map_t *node_memory, lgrp_plat_latency_stats_t *lat_stats);
+static int lgrp_plat_process_slit(struct slit *tp,
+ node_domain_map_t *node_domain, uint_t node_cnt,
+ memnode_phys_addr_map_t *memnode_info,
+ lgrp_plat_latency_stats_t *lat_stats);
+
+static int lgrp_plat_process_sli(uint32_t domain, uchar_t *sli_info,
+ uint32_t sli_cnt, node_domain_map_t *node_domain, uint_t node_cnt,
+ lgrp_plat_latency_stats_t *lat_stats);
-static int lgrp_plat_process_srat(struct srat *tp,
+static int lgrp_plat_process_srat(struct srat *tp, struct msct *mp,
uint32_t *prox_domain_min, node_domain_map_t *node_domain,
cpu_node_map_t *cpu_node, int cpu_count,
- node_phys_addr_map_t *node_memory);
+ memnode_phys_addr_map_t *memnode_info);
static void lgrp_plat_release_bootstrap(void);
static int lgrp_plat_srat_domains(struct srat *tp,
uint32_t *prox_domain_min);
-static void lgrp_plat_2level_setup(node_phys_addr_map_t *node_memory,
- lgrp_plat_latency_stats_t *lat_stats);
+static int lgrp_plat_msct_domains(struct msct *tp,
+ uint32_t *prox_domain_min);
+
+static void lgrp_plat_2level_setup(lgrp_plat_latency_stats_t *lat_stats);
static void opt_get_numa_config(uint_t *node_cnt, int *mem_intrlv,
- node_phys_addr_map_t *node_memory);
+ memnode_phys_addr_map_t *memnode_info);
static hrtime_t opt_probe_vendor(int dest_node, int nreads);
@@ -529,10 +573,10 @@ plat_build_mem_nodes(struct memlist *list)
* This shouldn't happen and rest of code can't deal
* with this if it does.
*/
- if (node < 0 || node >= lgrp_plat_node_cnt ||
- !lgrp_plat_node_memory[node].exists ||
- cur_start < lgrp_plat_node_memory[node].start ||
- cur_start > lgrp_plat_node_memory[node].end) {
+ if (node < 0 || node >= lgrp_plat_max_mem_node ||
+ !lgrp_plat_memnode_info[node].exists ||
+ cur_start < lgrp_plat_memnode_info[node].start ||
+ cur_start > lgrp_plat_memnode_info[node].end) {
cmn_err(CE_PANIC, "Don't know which memnode "
"to add installed memory address 0x%lx\n",
cur_start);
@@ -543,9 +587,9 @@ plat_build_mem_nodes(struct memlist *list)
*/
cur_end = end;
endcnt = 0;
- if (lgrp_plat_node_memory[node].exists &&
- cur_end > lgrp_plat_node_memory[node].end) {
- cur_end = lgrp_plat_node_memory[node].end;
+ if (lgrp_plat_memnode_info[node].exists &&
+ cur_end > lgrp_plat_memnode_info[node].end) {
+ cur_end = lgrp_plat_memnode_info[node].end;
if (mnode_xwa > 1) {
/*
* sacrifice the last page in each
@@ -572,16 +616,6 @@ plat_build_mem_nodes(struct memlist *list)
}
-int
-plat_lgrphand_to_mem_node(lgrp_handle_t hand)
-{
- if (max_mem_nodes == 1)
- return (0);
-
- return ((int)hand);
-}
-
-
/*
* plat_mnode_xcheck: checks the node memory ranges to see if there is a pfncnt
* range of pages aligned on pfncnt that crosses an node boundary. Returns 1 if
@@ -593,9 +627,9 @@ plat_mnode_xcheck(pfn_t pfncnt)
int node, prevnode = -1, basenode;
pfn_t ea, sa;
- for (node = 0; node < lgrp_plat_node_cnt; node++) {
+ for (node = 0; node < lgrp_plat_max_mem_node; node++) {
- if (lgrp_plat_node_memory[node].exists == 0)
+ if (lgrp_plat_memnode_info[node].exists == 0)
continue;
if (prevnode == -1) {
@@ -605,23 +639,23 @@ plat_mnode_xcheck(pfn_t pfncnt)
}
/* assume x86 node pfn ranges are in increasing order */
- ASSERT(lgrp_plat_node_memory[node].start >
- lgrp_plat_node_memory[prevnode].end);
+ ASSERT(lgrp_plat_memnode_info[node].start >
+ lgrp_plat_memnode_info[prevnode].end);
/*
* continue if the starting address of node is not contiguous
* with the previous node.
*/
- if (lgrp_plat_node_memory[node].start !=
- (lgrp_plat_node_memory[prevnode].end + 1)) {
+ if (lgrp_plat_memnode_info[node].start !=
+ (lgrp_plat_memnode_info[prevnode].end + 1)) {
basenode = node;
prevnode = node;
continue;
}
/* check if the starting address of node is pfncnt aligned */
- if ((lgrp_plat_node_memory[node].start & (pfncnt - 1)) != 0) {
+ if ((lgrp_plat_memnode_info[node].start & (pfncnt - 1)) != 0) {
/*
* at this point, node starts at an unaligned boundary
@@ -630,14 +664,14 @@ plat_mnode_xcheck(pfn_t pfncnt)
* range of length pfncnt that crosses this boundary.
*/
- sa = P2ALIGN(lgrp_plat_node_memory[prevnode].end,
+ sa = P2ALIGN(lgrp_plat_memnode_info[prevnode].end,
pfncnt);
- ea = P2ROUNDUP((lgrp_plat_node_memory[node].start),
+ ea = P2ROUNDUP((lgrp_plat_memnode_info[node].start),
pfncnt);
ASSERT((ea - sa) == pfncnt);
- if (sa >= lgrp_plat_node_memory[basenode].start &&
- ea <= (lgrp_plat_node_memory[node].end + 1)) {
+ if (sa >= lgrp_plat_memnode_info[basenode].start &&
+ ea <= (lgrp_plat_memnode_info[node].end + 1)) {
/*
* large page found to cross mnode boundary.
* Return Failure if workaround not enabled.
@@ -659,9 +693,10 @@ plat_mem_node_to_lgrphand(int mnode)
if (max_mem_nodes == 1)
return (LGRP_DEFAULT_HANDLE);
- return ((lgrp_handle_t)mnode);
-}
+ ASSERT(0 <= mnode && mnode < lgrp_plat_max_mem_node);
+ return ((lgrp_handle_t)(lgrp_plat_memnode_info[mnode].lgrphand));
+}
int
plat_pfn_to_mem_node(pfn_t pfn)
@@ -671,22 +706,23 @@ plat_pfn_to_mem_node(pfn_t pfn)
if (max_mem_nodes == 1)
return (0);
- for (node = 0; node < lgrp_plat_node_cnt; node++) {
+ for (node = 0; node < lgrp_plat_max_mem_node; node++) {
/*
* Skip nodes with no memory
*/
- if (!lgrp_plat_node_memory[node].exists)
+ if (!lgrp_plat_memnode_info[node].exists)
continue;
- if (pfn >= lgrp_plat_node_memory[node].start &&
- pfn <= lgrp_plat_node_memory[node].end)
+ membar_consumer();
+ if (pfn >= lgrp_plat_memnode_info[node].start &&
+ pfn <= lgrp_plat_memnode_info[node].end)
return (node);
}
/*
* Didn't find memnode where this PFN lives which should never happen
*/
- ASSERT(node < lgrp_plat_node_cnt);
+ ASSERT(node < lgrp_plat_max_mem_node);
return (-1);
}
@@ -698,7 +734,6 @@ plat_pfn_to_mem_node(pfn_t pfn)
/*
* Allocate additional space for an lgroup.
*/
-/* ARGSUSED */
lgrp_t *
lgrp_plat_alloc(lgrp_id_t lgrpid)
{
@@ -713,23 +748,198 @@ lgrp_plat_alloc(lgrp_id_t lgrpid)
/*
* Platform handling for (re)configuration changes
+ *
+ * Mechanism to protect lgrp_plat_cpu_node[] at CPU hotplug:
+ * 1) Use cpu_lock to synchronize between lgrp_plat_config() and
+ * lgrp_plat_cpu_to_hand().
+ * 2) Disable latency probing logic by making sure that the flag
+ * LGRP_PLAT_PROBE_ENABLE is cleared.
+ *
+ * Mechanism to protect lgrp_plat_memnode_info[] at memory hotplug:
+ * 1) Only inserts into lgrp_plat_memnode_info at memory hotplug, no removal.
+ * 2) Only expansion to existing entries, no shrinking.
+ * 3) On writing side, DR framework ensures that lgrp_plat_config() is called
+ * in single-threaded context. And membar_producer() is used to ensure that
+ * all changes are visible to other CPUs before setting the "exists" flag.
+ * 4) On reading side, membar_consumer() after checking the "exists" flag
+ * ensures that right values are retrieved.
+ *
+ * Mechanism to protect lgrp_plat_node_domain[] at hotplug:
+ * 1) Only insertion into lgrp_plat_node_domain at hotplug, no removal.
+ * 2) On writing side, it's single-threaded and membar_producer() is used to
+ * ensure all changes are visible to other CPUs before setting the "exists"
+ * flag.
+ * 3) On reading side, membar_consumer() after checking the "exists" flag
+ * ensures that right values are retrieved.
*/
-/* ARGSUSED */
void
lgrp_plat_config(lgrp_config_flag_t flag, uintptr_t arg)
{
+#ifdef __xpv
+ _NOTE(ARGUNUSED(flag, arg));
+#else
+ int rc, node;
+ cpu_t *cp;
+ void *hdl = NULL;
+ uchar_t *sliptr = NULL;
+ uint32_t domain, apicid, slicnt = 0;
+ update_membounds_t *mp;
+
+ extern int acpidev_dr_get_cpu_numa_info(cpu_t *, void **, uint32_t *,
+ uint32_t *, uint32_t *, uchar_t **);
+ extern void acpidev_dr_free_cpu_numa_info(void *);
+
+ /*
+ * This interface is used to support CPU/memory DR operations.
+ * Don't bother here if it's still during boot or only one lgrp node
+ * is supported.
+ */
+ if (!lgrp_topo_initialized || lgrp_plat_node_cnt == 1)
+ return;
+
+ switch (flag) {
+ case LGRP_CONFIG_CPU_ADD:
+ cp = (cpu_t *)arg;
+ ASSERT(cp != NULL);
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /* Check whether CPU already exists. */
+ ASSERT(!lgrp_plat_cpu_node[cp->cpu_id].exists);
+ if (lgrp_plat_cpu_node[cp->cpu_id].exists) {
+ cmn_err(CE_WARN,
+ "!lgrp: CPU(%d) already exists in cpu_node map.",
+ cp->cpu_id);
+ break;
+ }
+
+ /* Query CPU lgrp information. */
+ rc = acpidev_dr_get_cpu_numa_info(cp, &hdl, &apicid, &domain,
+ &slicnt, &sliptr);
+ ASSERT(rc == 0);
+ if (rc != 0) {
+ cmn_err(CE_WARN,
+ "!lgrp: failed to query lgrp info for CPU(%d).",
+ cp->cpu_id);
+ break;
+ }
+
+ /* Update node to proximity domain mapping */
+ node = lgrp_plat_domain_to_node(lgrp_plat_node_domain,
+ lgrp_plat_node_cnt, domain);
+ if (node == -1) {
+ node = lgrp_plat_node_domain_update(
+ lgrp_plat_node_domain, lgrp_plat_node_cnt, domain);
+ ASSERT(node != -1);
+ if (node == -1) {
+ acpidev_dr_free_cpu_numa_info(hdl);
+ cmn_err(CE_WARN, "!lgrp: failed to update "
+ "node_domain map for domain(%u).", domain);
+ break;
+ }
+ }
+
+ /* Update latency information among lgrps. */
+ if (slicnt != 0 && sliptr != NULL) {
+ if (lgrp_plat_process_sli(domain, sliptr, slicnt,
+ lgrp_plat_node_domain, lgrp_plat_node_cnt,
+ &lgrp_plat_lat_stats) != 0) {
+ cmn_err(CE_WARN, "!lgrp: failed to update "
+ "latency information for domain (%u).",
+ domain);
+ }
+ }
+
+ /* Update CPU to node mapping. */
+ lgrp_plat_cpu_node[cp->cpu_id].prox_domain = domain;
+ lgrp_plat_cpu_node[cp->cpu_id].node = node;
+ lgrp_plat_cpu_node[cp->cpu_id].apicid = apicid;
+ lgrp_plat_cpu_node[cp->cpu_id].exists = 1;
+ lgrp_plat_apic_ncpus++;
+
+ acpidev_dr_free_cpu_numa_info(hdl);
+ break;
+
+ case LGRP_CONFIG_CPU_DEL:
+ cp = (cpu_t *)arg;
+ ASSERT(cp != NULL);
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /* Check whether CPU exists. */
+ ASSERT(lgrp_plat_cpu_node[cp->cpu_id].exists);
+ if (!lgrp_plat_cpu_node[cp->cpu_id].exists) {
+ cmn_err(CE_WARN,
+ "!lgrp: CPU(%d) doesn't exist in cpu_node map.",
+ cp->cpu_id);
+ break;
+ }
+
+ /* Query CPU lgrp information. */
+ rc = acpidev_dr_get_cpu_numa_info(cp, &hdl, &apicid, &domain,
+ NULL, NULL);
+ ASSERT(rc == 0);
+ if (rc != 0) {
+ cmn_err(CE_WARN,
+ "!lgrp: failed to query lgrp info for CPU(%d).",
+ cp->cpu_id);
+ break;
+ }
+
+ /* Update map. */
+ ASSERT(lgrp_plat_cpu_node[cp->cpu_id].apicid == apicid);
+ ASSERT(lgrp_plat_cpu_node[cp->cpu_id].prox_domain == domain);
+ lgrp_plat_cpu_node[cp->cpu_id].exists = 0;
+ lgrp_plat_cpu_node[cp->cpu_id].apicid = UINT32_MAX;
+ lgrp_plat_cpu_node[cp->cpu_id].prox_domain = UINT32_MAX;
+ lgrp_plat_cpu_node[cp->cpu_id].node = UINT_MAX;
+ lgrp_plat_apic_ncpus--;
+
+ acpidev_dr_free_cpu_numa_info(hdl);
+ break;
+
+ case LGRP_CONFIG_MEM_ADD:
+ mp = (update_membounds_t *)arg;
+ ASSERT(mp != NULL);
+
+ /* Update latency information among lgrps. */
+ if (mp->u_sli_cnt != 0 && mp->u_sli_ptr != NULL) {
+ if (lgrp_plat_process_sli(mp->u_domain,
+ mp->u_sli_ptr, mp->u_sli_cnt,
+ lgrp_plat_node_domain, lgrp_plat_node_cnt,
+ &lgrp_plat_lat_stats) != 0) {
+ cmn_err(CE_WARN, "!lgrp: failed to update "
+ "latency information for domain (%u).",
+ domain);
+ }
+ }
+
+ if (lgrp_plat_memnode_info_update(lgrp_plat_node_domain,
+ lgrp_plat_node_cnt, lgrp_plat_memnode_info, max_mem_nodes,
+ mp->u_base, mp->u_base + mp->u_length,
+ mp->u_domain, mp->u_device_id) < 0) {
+ cmn_err(CE_WARN,
+ "!lgrp: failed to update latency information for "
+ "memory (0x%" PRIx64 " - 0x%" PRIx64 ").",
+ mp->u_base, mp->u_base + mp->u_length);
+ }
+ break;
+
+ default:
+ break;
+ }
+#endif /* __xpv */
}
/*
* Return the platform handle for the lgroup containing the given CPU
*/
-/* ARGSUSED */
lgrp_handle_t
lgrp_plat_cpu_to_hand(processorid_t id)
{
lgrp_handle_t hand;
+ ASSERT(!lgrp_initialized || MUTEX_HELD(&cpu_lock));
+
if (lgrp_plat_node_cnt == 1)
return (LGRP_DEFAULT_HANDLE);
@@ -763,6 +973,7 @@ lgrp_plat_init(lgrp_init_stages_t stage)
*/
lgrp_plat_node_cnt = max_mem_nodes = 1;
#else /* __xpv */
+
/*
* Get boot property for lgroup topology height limit
*/
@@ -782,14 +993,32 @@ lgrp_plat_init(lgrp_init_stages_t stage)
lgrp_plat_slit_enable = (int)value;
/*
+ * Get boot property for enabling/disabling MSCT
+ */
+ if (bootprop_getval(BP_LGRP_MSCT_ENABLE, &value) == 0)
+ lgrp_plat_msct_enable = (int)value;
+
+ /*
* Initialize as a UMA machine
*/
if (lgrp_topo_ht_limit() == 1) {
lgrp_plat_node_cnt = max_mem_nodes = 1;
+ lgrp_plat_max_mem_node = 1;
return;
}
lgrp_plat_get_numa_config();
+
+ /*
+ * Each lgrp node needs MAX_MEM_NODES_PER_LGROUP memnodes
+ * to support memory DR operations if memory DR is enabled.
+ */
+ lgrp_plat_max_mem_node = lgrp_plat_node_cnt;
+ if (plat_dr_support_memory() && lgrp_plat_node_cnt != 1) {
+ max_mem_nodes = MAX_MEM_NODES_PER_LGROUP *
+ lgrp_plat_node_cnt;
+ ASSERT(max_mem_nodes <= MAX_MEM_NODES);
+ }
#endif /* __xpv */
break;
@@ -817,7 +1046,6 @@ lgrp_plat_init(lgrp_init_stages_t stage)
* specific, so platform gets to decide its value. It would be nice if the
* number was at least proportional to make comparisons more meaningful though.
*/
-/* ARGSUSED */
int
lgrp_plat_latency(lgrp_handle_t from, lgrp_handle_t to)
{
@@ -844,13 +1072,29 @@ lgrp_plat_latency(lgrp_handle_t from, lgrp_handle_t to)
/*
* Probe from current CPU if its lgroup latencies haven't been set yet
- * and we are trying to get latency from current CPU to some node
+ * and we are trying to get latency from current CPU to some node.
+ * Avoid probing if CPU/memory DR is enabled.
*/
- node = lgrp_plat_cpu_to_node(CPU, lgrp_plat_cpu_node,
- lgrp_plat_cpu_node_nentries);
- ASSERT(node >= 0 && node < lgrp_plat_node_cnt);
- if (lgrp_plat_lat_stats.latencies[src][src] == 0 && node == src)
- lgrp_plat_probe();
+ if (lgrp_plat_lat_stats.latencies[src][src] == 0) {
+ /*
+ * Latency information should be updated by lgrp_plat_config()
+ * for DR operations. Something is wrong if reaches here.
+ * For safety, flatten lgrp topology to two levels.
+ */
+ if (plat_dr_support_cpu() || plat_dr_support_memory()) {
+ ASSERT(lgrp_plat_lat_stats.latencies[src][src]);
+ cmn_err(CE_WARN,
+ "lgrp: failed to get latency information, "
+ "fall back to two-level topology.");
+ lgrp_plat_2level_setup(&lgrp_plat_lat_stats);
+ } else {
+ node = lgrp_plat_cpu_to_node(CPU, lgrp_plat_cpu_node,
+ lgrp_plat_cpu_node_nentries);
+ ASSERT(node >= 0 && node < lgrp_plat_node_cnt);
+ if (node == src)
+ lgrp_plat_probe();
+ }
+ }
return (lgrp_plat_lat_stats.latencies[src][dest]);
}
@@ -859,21 +1103,45 @@ lgrp_plat_latency(lgrp_handle_t from, lgrp_handle_t to)
/*
* Return the maximum number of lgrps supported by the platform.
* Before lgrp topology is known it returns an estimate based on the number of
- * nodes. Once topology is known it returns the actual maximim number of lgrps
- * created. Since x86/x64 doesn't support Dynamic Reconfiguration (DR) and
- * dynamic addition of new nodes, this number may not grow during system
- * lifetime (yet).
+ * nodes. Once topology is known it returns:
+ * 1) the actual maximim number of lgrps created if CPU/memory DR operations
+ * are not suppported.
+ * 2) the maximum possible number of lgrps if CPU/memory DR operations are
+ * supported.
*/
int
lgrp_plat_max_lgrps(void)
{
- return (lgrp_topo_initialized ?
- lgrp_alloc_max + 1 :
- lgrp_plat_node_cnt * (lgrp_plat_node_cnt - 1) + 1);
+ if (!lgrp_topo_initialized || plat_dr_support_cpu() ||
+ plat_dr_support_memory()) {
+ return (lgrp_plat_node_cnt * (lgrp_plat_node_cnt - 1) + 1);
+ } else {
+ return (lgrp_alloc_max + 1);
+ }
}
/*
+ * Count number of memory pages (_t) based on mnode id (_n) and query type (_t).
+ */
+#define _LGRP_PLAT_MEM_SIZE(_n, _q, _t) \
+ if (mem_node_config[_n].exists) { \
+ switch (_q) { \
+ case LGRP_MEM_SIZE_FREE: \
+ _t += MNODE_PGCNT(_n); \
+ break; \
+ case LGRP_MEM_SIZE_AVAIL: \
+ _t += mem_node_memlist_pages(_n, phys_avail); \
+ break; \
+ case LGRP_MEM_SIZE_INSTALL: \
+ _t += mem_node_memlist_pages(_n, phys_install); \
+ break; \
+ default: \
+ break; \
+ } \
+ }
+
+/*
* Return the number of free pages in an lgroup.
*
* For query of LGRP_MEM_SIZE_FREE, return the number of base pagesize
@@ -896,25 +1164,19 @@ lgrp_plat_mem_size(lgrp_handle_t plathand, lgrp_mem_query_t query)
return (lgrp_plat_mem_size_default(plathand, query));
if (plathand != LGRP_NULL_HANDLE) {
- mnode = plat_lgrphand_to_mem_node(plathand);
- if (mnode >= 0 && mem_node_config[mnode].exists) {
- switch (query) {
- case LGRP_MEM_SIZE_FREE:
- npgs = MNODE_PGCNT(mnode);
- break;
- case LGRP_MEM_SIZE_AVAIL:
- npgs = mem_node_memlist_pages(mnode,
- phys_avail);
- break;
- case LGRP_MEM_SIZE_INSTALL:
- npgs = mem_node_memlist_pages(mnode,
- phys_install);
- break;
- default:
- break;
- }
+ /* Count memory node present at boot. */
+ mnode = (int)plathand;
+ ASSERT(mnode < lgrp_plat_node_cnt);
+ _LGRP_PLAT_MEM_SIZE(mnode, query, npgs);
+
+ /* Count possible hot-added memory nodes. */
+ for (mnode = lgrp_plat_node_cnt;
+ mnode < lgrp_plat_max_mem_node; mnode++) {
+ if (lgrp_plat_memnode_info[mnode].lgrphand == plathand)
+ _LGRP_PLAT_MEM_SIZE(mnode, query, npgs);
}
}
+
return (npgs);
}
@@ -923,7 +1185,6 @@ lgrp_plat_mem_size(lgrp_handle_t plathand, lgrp_mem_query_t query)
* Return the platform handle of the lgroup that contains the physical memory
* corresponding to the given page frame number
*/
-/* ARGSUSED */
lgrp_handle_t
lgrp_plat_pfn_to_hand(pfn_t pfn)
{
@@ -979,6 +1240,10 @@ lgrp_plat_probe(void)
max_mem_nodes == 1 || lgrp_topo_ht_limit() <= 2)
return;
+ /* SRAT and SLIT should be enabled if DR operations are enabled. */
+ if (plat_dr_support_cpu() || plat_dr_support_memory())
+ return;
+
/*
* Determine ID of node containing current CPU
*/
@@ -1051,14 +1316,13 @@ lgrp_plat_probe(void)
* - Fallback to just optimizing for local and remote if
* latencies didn't look right
*/
- lgrp_plat_latency_adjust(lgrp_plat_node_memory, &lgrp_plat_lat_stats,
+ lgrp_plat_latency_adjust(lgrp_plat_memnode_info, &lgrp_plat_lat_stats,
&lgrp_plat_probe_stats);
lgrp_plat_probe_stats.probe_error_code =
- lgrp_plat_latency_verify(lgrp_plat_node_memory,
+ lgrp_plat_latency_verify(lgrp_plat_memnode_info,
&lgrp_plat_lat_stats);
if (lgrp_plat_probe_stats.probe_error_code)
- lgrp_plat_2level_setup(lgrp_plat_node_memory,
- &lgrp_plat_lat_stats);
+ lgrp_plat_2level_setup(&lgrp_plat_lat_stats);
}
@@ -1078,8 +1342,11 @@ lgrp_plat_root_hand(void)
/*
- * Update CPU to node mapping for given CPU and proximity domain (and returns
- * negative numbers for errors and positive ones for success)
+ * Update CPU to node mapping for given CPU and proximity domain.
+ * Return values:
+ * - zero for success
+ * - positive numbers for warnings
+ * - negative numbers for errors
*/
static int
lgrp_plat_cpu_node_update(node_domain_map_t *node_domain, int node_cnt,
@@ -1119,6 +1386,13 @@ lgrp_plat_cpu_node_update(node_domain_map_t *node_domain, int node_cnt,
return (1);
/*
+ * It's invalid to have more than one entry with the same
+ * local APIC ID in SRAT table.
+ */
+ if (cpu_node[i].node != UINT_MAX)
+ return (-2);
+
+ /*
* Fill in node and proximity domain IDs
*/
cpu_node[i].prox_domain = domain;
@@ -1128,9 +1402,11 @@ lgrp_plat_cpu_node_update(node_domain_map_t *node_domain, int node_cnt,
}
/*
- * Return error when entry for APIC ID wasn't found in table
+ * It's possible that an apicid doesn't exist in the cpu_node map due
+ * to user limits number of CPUs powered on at boot by specifying the
+ * boot_ncpus kernel option.
*/
- return (-2);
+ return (2);
}
@@ -1189,9 +1465,11 @@ lgrp_plat_domain_to_node(node_domain_map_t *node_domain, int node_cnt,
*/
node = start = NODE_DOMAIN_HASH(domain, node_cnt);
do {
- if (node_domain[node].prox_domain == domain &&
- node_domain[node].exists)
- return (node);
+ if (node_domain[node].exists) {
+ membar_consumer();
+ if (node_domain[node].prox_domain == domain)
+ return (node);
+ }
node = (node + 1) % node_cnt;
} while (node != start);
return (-1);
@@ -1219,19 +1497,26 @@ lgrp_plat_get_numa_config(void)
if (lgrp_plat_apic_ncpus > 0) {
int retval;
+ /* Reserve enough resources if CPU DR is enabled. */
+ if (plat_dr_support_cpu() && max_ncpus > lgrp_plat_apic_ncpus)
+ lgrp_plat_cpu_node_nentries = max_ncpus;
+ else
+ lgrp_plat_cpu_node_nentries = lgrp_plat_apic_ncpus;
+
/*
* Temporarily allocate boot memory to use for CPU to node
* mapping since kernel memory allocator isn't alive yet
*/
lgrp_plat_cpu_node = (cpu_node_map_t *)BOP_ALLOC(bootops,
- NULL, lgrp_plat_apic_ncpus * sizeof (cpu_node_map_t),
+ NULL, lgrp_plat_cpu_node_nentries * sizeof (cpu_node_map_t),
sizeof (int));
ASSERT(lgrp_plat_cpu_node != NULL);
if (lgrp_plat_cpu_node) {
- lgrp_plat_cpu_node_nentries = lgrp_plat_apic_ncpus;
bzero(lgrp_plat_cpu_node, lgrp_plat_cpu_node_nentries *
sizeof (cpu_node_map_t));
+ } else {
+ lgrp_plat_cpu_node_nentries = 0;
}
/*
@@ -1240,10 +1525,10 @@ lgrp_plat_get_numa_config(void)
*/
(void) lgrp_plat_process_cpu_apicids(lgrp_plat_cpu_node);
- retval = lgrp_plat_process_srat(srat_ptr,
+ retval = lgrp_plat_process_srat(srat_ptr, msct_ptr,
&lgrp_plat_prox_domain_min,
lgrp_plat_node_domain, lgrp_plat_cpu_node,
- lgrp_plat_apic_ncpus, lgrp_plat_node_memory);
+ lgrp_plat_apic_ncpus, lgrp_plat_memnode_info);
if (retval <= 0) {
lgrp_plat_srat_error = retval;
lgrp_plat_node_cnt = 1;
@@ -1260,15 +1545,12 @@ lgrp_plat_get_numa_config(void)
if ((lgrp_plat_apic_ncpus <= 0 || lgrp_plat_srat_error != 0) &&
is_opteron())
opt_get_numa_config(&lgrp_plat_node_cnt, &lgrp_plat_mem_intrlv,
- lgrp_plat_node_memory);
+ lgrp_plat_memnode_info);
/*
* Don't bother to setup system for multiple lgroups and only use one
* memory node when memory is interleaved between any nodes or there is
* only one NUMA node
- *
- * NOTE: May need to change this for Dynamic Reconfiguration (DR)
- * when and if it happens for x86/x64
*/
if (lgrp_plat_mem_intrlv || lgrp_plat_node_cnt == 1) {
lgrp_plat_node_cnt = max_mem_nodes = 1;
@@ -1287,7 +1569,7 @@ lgrp_plat_get_numa_config(void)
/*
* There should be one memnode (physical page free list(s)) for
- * each node
+ * each node if memory DR is disabled.
*/
max_mem_nodes = lgrp_plat_node_cnt;
@@ -1303,8 +1585,35 @@ lgrp_plat_get_numa_config(void)
* exists
*/
lgrp_plat_slit_error = lgrp_plat_process_slit(slit_ptr,
- lgrp_plat_node_cnt, lgrp_plat_node_memory,
+ lgrp_plat_node_domain, lgrp_plat_node_cnt, lgrp_plat_memnode_info,
&lgrp_plat_lat_stats);
+
+ /*
+ * Disable support of CPU/memory DR operations if multiple locality
+ * domains exist in system and either of following is true.
+ * 1) Failed to process SLIT table.
+ * 2) Latency probing is enabled by user.
+ */
+ if (lgrp_plat_node_cnt > 1 &&
+ (plat_dr_support_cpu() || plat_dr_support_memory())) {
+ if (!lgrp_plat_slit_enable || lgrp_plat_slit_error != 0 ||
+ !lgrp_plat_srat_enable || lgrp_plat_srat_error != 0 ||
+ lgrp_plat_apic_ncpus <= 0) {
+ cmn_err(CE_CONT,
+ "?lgrp: failed to process ACPI SRAT/SLIT table, "
+ "disable support of CPU/memory DR operations.");
+ plat_dr_disable_cpu();
+ plat_dr_disable_memory();
+ } else if (lgrp_plat_probe_flags & LGRP_PLAT_PROBE_ENABLE) {
+ cmn_err(CE_CONT,
+ "?lgrp: latency probing enabled by user, "
+ "disable support of CPU/memory DR operations.");
+ plat_dr_disable_cpu();
+ plat_dr_disable_memory();
+ }
+ }
+
+ /* Done if succeeded to process SLIT table. */
if (lgrp_plat_slit_error == 0)
return;
@@ -1366,7 +1675,7 @@ int lgrp_plat_probe_lt_shift = LGRP_LAT_TOLERANCE_SHIFT;
* latencies be same
*/
static void
-lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
+lgrp_plat_latency_adjust(memnode_phys_addr_map_t *memnode_info,
lgrp_plat_latency_stats_t *lat_stats, lgrp_plat_probe_stats_t *probe_stats)
{
int i;
@@ -1387,7 +1696,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
if (max_mem_nodes == 1)
return;
- ASSERT(node_memory != NULL && lat_stats != NULL &&
+ ASSERT(memnode_info != NULL && lat_stats != NULL &&
probe_stats != NULL);
/*
@@ -1395,11 +1704,11 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
* (ie. latency(node0, node1) == latency(node1, node0))
*/
for (i = 0; i < lgrp_plat_node_cnt; i++) {
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
for (j = 0; j < lgrp_plat_node_cnt; j++) {
- if (!node_memory[j].exists)
+ if (!memnode_info[j].exists)
continue;
t1 = lat_stats->latencies[i][j];
@@ -1452,7 +1761,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
*/
for (i = 0; i < lgrp_plat_node_cnt; i++) {
for (j = 0; j < lgrp_plat_node_cnt; j++) {
- if (!node_memory[j].exists)
+ if (!memnode_info[j].exists)
continue;
/*
* Pick one pair of nodes (i, j)
@@ -1469,7 +1778,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
for (k = 0; k < lgrp_plat_node_cnt; k++) {
for (l = 0; l < lgrp_plat_node_cnt; l++) {
- if (!node_memory[l].exists)
+ if (!memnode_info[l].exists)
continue;
/*
* Pick another pair of nodes (k, l)
@@ -1545,7 +1854,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
min = -1;
max = 0;
for (i = 0; i < lgrp_plat_node_cnt; i++) {
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
t = lat_stats->latencies[i][i];
if (t == 0)
@@ -1559,7 +1868,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
for (i = 0; i < lgrp_plat_node_cnt; i++) {
int local;
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
local = lat_stats->latencies[i][i];
@@ -1590,7 +1899,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
lat_stats->latency_max = 0;
for (i = 0; i < lgrp_plat_node_cnt; i++) {
for (j = 0; j < lgrp_plat_node_cnt; j++) {
- if (!node_memory[j].exists)
+ if (!memnode_info[j].exists)
continue;
t = lat_stats->latencies[i][j];
if (t > lat_stats->latency_max)
@@ -1616,7 +1925,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
* -3 Local >= remote
*/
static int
-lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
+lgrp_plat_latency_verify(memnode_phys_addr_map_t *memnode_info,
lgrp_plat_latency_stats_t *lat_stats)
{
int i;
@@ -1624,7 +1933,7 @@ lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
u_longlong_t t1;
u_longlong_t t2;
- ASSERT(node_memory != NULL && lat_stats != NULL);
+ ASSERT(memnode_info != NULL && lat_stats != NULL);
/*
* Nothing to do when this is an UMA machine, lgroup topology is
@@ -1639,10 +1948,10 @@ lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
* (ie. latency(node0, node1) == latency(node1, node0))
*/
for (i = 0; i < lgrp_plat_node_cnt; i++) {
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
for (j = 0; j < lgrp_plat_node_cnt; j++) {
- if (!node_memory[j].exists)
+ if (!memnode_info[j].exists)
continue;
t1 = lat_stats->latencies[i][j];
t2 = lat_stats->latencies[j][i];
@@ -1659,7 +1968,7 @@ lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
*/
t1 = lat_stats->latencies[0][0];
for (i = 1; i < lgrp_plat_node_cnt; i++) {
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
t2 = lat_stats->latencies[i][i];
@@ -1681,7 +1990,7 @@ lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
if (t1) {
for (i = 0; i < lgrp_plat_node_cnt; i++) {
for (j = 0; j < lgrp_plat_node_cnt; j++) {
- if (!node_memory[j].exists)
+ if (!memnode_info[j].exists)
continue;
t2 = lat_stats->latencies[i][j];
if (i == j || t2 == 0)
@@ -1729,8 +2038,7 @@ lgrp_plat_main_init(void)
*/
if (ht_limit == 2 && lgrp_plat_lat_stats.latency_min == -1 &&
lgrp_plat_lat_stats.latency_max == 0)
- lgrp_plat_2level_setup(lgrp_plat_node_memory,
- &lgrp_plat_lat_stats);
+ lgrp_plat_2level_setup(&lgrp_plat_lat_stats);
return;
}
@@ -1769,7 +2077,7 @@ lgrp_plat_main_init(void)
* Skip this node and leave its probe page NULL
* if it doesn't have any memory
*/
- mnode = plat_lgrphand_to_mem_node((lgrp_handle_t)i);
+ mnode = i;
if (!mem_node_config[mnode].exists) {
lgrp_plat_probe_mem_config.probe_va[i] = NULL;
continue;
@@ -1815,10 +2123,11 @@ lgrp_plat_main_init(void)
* This is a copy of the MAX_MEM_NODES == 1 version of the routine
* used when MPO is disabled (i.e. single lgroup) or this is the root lgroup
*/
-/* ARGSUSED */
static pgcnt_t
lgrp_plat_mem_size_default(lgrp_handle_t lgrphand, lgrp_mem_query_t query)
{
+ _NOTE(ARGUNUSED(lgrphand));
+
struct memlist *mlist;
pgcnt_t npgs = 0;
extern struct memlist *phys_avail;
@@ -1866,8 +2175,9 @@ lgrp_plat_node_domain_update(node_domain_map_t *node_domain, int node_cnt,
* domain and return node ID which is index into mapping table.
*/
if (!node_domain[node].exists) {
- node_domain[node].exists = 1;
node_domain[node].prox_domain = domain;
+ membar_producer();
+ node_domain[node].exists = 1;
return (node);
}
@@ -1887,18 +2197,17 @@ lgrp_plat_node_domain_update(node_domain_map_t *node_domain, int node_cnt,
return (-1);
}
-
/*
* Update node memory information for given proximity domain with specified
* starting and ending physical address range (and return positive numbers for
* success and negative ones for errors)
*/
static int
-lgrp_plat_node_memory_update(node_domain_map_t *node_domain, int node_cnt,
- node_phys_addr_map_t *node_memory, uint64_t start, uint64_t end,
- uint32_t domain)
+lgrp_plat_memnode_info_update(node_domain_map_t *node_domain, int node_cnt,
+ memnode_phys_addr_map_t *memnode_info, int memnode_cnt, uint64_t start,
+ uint64_t end, uint32_t domain, uint32_t device_id)
{
- int node;
+ int node, mnode;
/*
* Get node number for proximity domain
@@ -1912,13 +2221,53 @@ lgrp_plat_node_memory_update(node_domain_map_t *node_domain, int node_cnt,
}
/*
+ * This function is called during boot if device_id is
+ * ACPI_MEMNODE_DEVID_BOOT, otherwise it's called at runtime for
+ * memory DR operations.
+ */
+ if (device_id != ACPI_MEMNODE_DEVID_BOOT) {
+ ASSERT(lgrp_plat_max_mem_node <= memnode_cnt);
+
+ for (mnode = lgrp_plat_node_cnt;
+ mnode < lgrp_plat_max_mem_node; mnode++) {
+ if (memnode_info[mnode].exists &&
+ memnode_info[mnode].prox_domain == domain &&
+ memnode_info[mnode].device_id == device_id) {
+ if (btop(start) < memnode_info[mnode].start)
+ memnode_info[mnode].start = btop(start);
+ if (btop(end) > memnode_info[mnode].end)
+ memnode_info[mnode].end = btop(end);
+ return (1);
+ }
+ }
+
+ if (lgrp_plat_max_mem_node >= memnode_cnt) {
+ return (-3);
+ } else {
+ lgrp_plat_max_mem_node++;
+ memnode_info[mnode].start = btop(start);
+ memnode_info[mnode].end = btop(end);
+ memnode_info[mnode].prox_domain = domain;
+ memnode_info[mnode].device_id = device_id;
+ memnode_info[mnode].lgrphand = node;
+ membar_producer();
+ memnode_info[mnode].exists = 1;
+ return (0);
+ }
+ }
+
+ /*
* Create entry in table for node if it doesn't exist
*/
- if (!node_memory[node].exists) {
- node_memory[node].exists = 1;
- node_memory[node].start = btop(start);
- node_memory[node].end = btop(end);
- node_memory[node].prox_domain = domain;
+ ASSERT(node < memnode_cnt);
+ if (!memnode_info[node].exists) {
+ memnode_info[node].start = btop(start);
+ memnode_info[node].end = btop(end);
+ memnode_info[node].prox_domain = domain;
+ memnode_info[node].device_id = device_id;
+ memnode_info[node].lgrphand = node;
+ membar_producer();
+ memnode_info[node].exists = 1;
return (0);
}
@@ -1928,11 +2277,11 @@ lgrp_plat_node_memory_update(node_domain_map_t *node_domain, int node_cnt,
* There may be more than one SRAT memory entry for a domain, so we may
* need to update existing start or end address for the node.
*/
- if (node_memory[node].prox_domain == domain) {
- if (btop(start) < node_memory[node].start)
- node_memory[node].start = btop(start);
- if (btop(end) > node_memory[node].end)
- node_memory[node].end = btop(end);
+ if (memnode_info[node].prox_domain == domain) {
+ if (btop(start) < memnode_info[node].start)
+ memnode_info[node].start = btop(start);
+ if (btop(end) > memnode_info[node].end)
+ memnode_info[node].end = btop(end);
return (1);
}
return (-2);
@@ -1940,16 +2289,14 @@ lgrp_plat_node_memory_update(node_domain_map_t *node_domain, int node_cnt,
/*
- * Have to sort node by starting physical address because VM system (physical
- * page free list management) assumes and expects memnodes to be sorted in
- * ascending order by physical address. If not, the kernel will panic in
- * potentially a number of different places. (:-(
- * NOTE: This workaround will not be sufficient if/when hotplugging memory is
- * supported on x86/x64.
+ * Have to sort nodes by starting physical address because plat_mnode_xcheck()
+ * assumes and expects memnodes to be sorted in ascending order by physical
+ * address.
*/
static void
lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
- cpu_node_map_t *cpu_node, int cpu_count, node_phys_addr_map_t *node_memory)
+ cpu_node_map_t *cpu_node, int cpu_count,
+ memnode_phys_addr_map_t *memnode_info)
{
boolean_t found;
int i;
@@ -1959,7 +2306,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
boolean_t swapped;
if (!lgrp_plat_node_sort_enable || node_cnt <= 1 ||
- node_domain == NULL || node_memory == NULL)
+ node_domain == NULL || memnode_info == NULL)
return;
/*
@@ -1970,7 +2317,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
/*
* Skip entries that don't exist
*/
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
/*
@@ -1978,7 +2325,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
*/
found = B_FALSE;
for (j = i + 1; j < node_cnt; j++) {
- if (node_memory[j].exists) {
+ if (memnode_info[j].exists) {
found = B_TRUE;
break;
}
@@ -1994,7 +2341,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
* Not sorted if starting address of current entry is bigger
* than starting address of next existing entry
*/
- if (node_memory[i].start > node_memory[j].start) {
+ if (memnode_info[i].start > memnode_info[j].start) {
sorted = B_FALSE;
break;
}
@@ -2017,7 +2364,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
/*
* Skip entries that don't exist
*/
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
/*
@@ -2025,7 +2372,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
*/
found = B_FALSE;
for (j = i + 1; j <= n; j++) {
- if (node_memory[j].exists) {
+ if (memnode_info[j].exists) {
found = B_TRUE;
break;
}
@@ -2037,8 +2384,8 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
if (found == B_FALSE)
break;
- if (node_memory[i].start > node_memory[j].start) {
- node_phys_addr_map_t save_addr;
+ if (memnode_info[i].start > memnode_info[j].start) {
+ memnode_phys_addr_map_t save_addr;
node_domain_map_t save_node;
/*
@@ -2054,12 +2401,12 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
/*
* Swap node to physical memory assignments
*/
- bcopy(&node_memory[i], &save_addr,
- sizeof (node_phys_addr_map_t));
- bcopy(&node_memory[j], &node_memory[i],
- sizeof (node_phys_addr_map_t));
- bcopy(&save_addr, &node_memory[j],
- sizeof (node_phys_addr_map_t));
+ bcopy(&memnode_info[i], &save_addr,
+ sizeof (memnode_phys_addr_map_t));
+ bcopy(&memnode_info[j], &memnode_info[i],
+ sizeof (memnode_phys_addr_map_t));
+ bcopy(&save_addr, &memnode_info[j],
+ sizeof (memnode_phys_addr_map_t));
swapped = B_TRUE;
}
}
@@ -2226,11 +2573,12 @@ lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node)
return (-1);
/*
- * Calculate number of entries in array and return when there's just
- * one CPU since that's not very interesting for NUMA
+ * Calculate number of entries in array and return when the system is
+ * not very interesting for NUMA. It's not interesting for NUMA if
+ * system has only one CPU and doesn't support CPU hotplug.
*/
n = boot_prop_len / sizeof (uint8_t);
- if (n == 1)
+ if (n == 1 && !plat_dr_support_cpu())
return (-2);
/*
@@ -2243,21 +2591,31 @@ lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node)
* Just return number of CPU APIC IDs if CPU to node mapping table is
* NULL
*/
- if (cpu_node == NULL)
- return (n);
+ if (cpu_node == NULL) {
+ if (plat_dr_support_cpu() && n >= boot_ncpus) {
+ return (boot_ncpus);
+ } else {
+ return (n);
+ }
+ }
/*
* Fill in CPU to node ID mapping table with APIC ID for each CPU
*/
for (i = 0; i < n; i++) {
+ /* Only add boot CPUs into the map if CPU DR is enabled. */
+ if (plat_dr_support_cpu() && i >= boot_ncpus)
+ break;
cpu_node[i].exists = 1;
cpu_node[i].apicid = cpu_apicid_array[i];
+ cpu_node[i].prox_domain = UINT32_MAX;
+ cpu_node[i].node = UINT_MAX;
}
/*
* Return number of CPUs based on number of APIC IDs
*/
- return (n);
+ return (i);
}
@@ -2266,11 +2624,14 @@ lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node)
* NUMA node is from each other
*/
static int
-lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
- node_phys_addr_map_t *node_memory, lgrp_plat_latency_stats_t *lat_stats)
+lgrp_plat_process_slit(struct slit *tp,
+ node_domain_map_t *node_domain, uint_t node_cnt,
+ memnode_phys_addr_map_t *memnode_info, lgrp_plat_latency_stats_t *lat_stats)
{
int i;
int j;
+ int src;
+ int dst;
int localities;
hrtime_t max;
hrtime_t min;
@@ -2284,8 +2645,6 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
return (2);
localities = tp->number;
- if (localities != node_cnt)
- return (3);
min = lat_stats->latency_min;
max = lat_stats->latency_max;
@@ -2295,11 +2654,21 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
*/
slit_entries = tp->entry;
for (i = 0; i < localities; i++) {
+ src = lgrp_plat_domain_to_node(node_domain,
+ node_cnt, i);
+ if (src == -1)
+ continue;
+
for (j = 0; j < localities; j++) {
uint8_t latency;
+ dst = lgrp_plat_domain_to_node(node_domain,
+ node_cnt, j);
+ if (dst == -1)
+ continue;
+
latency = slit_entries[(i * localities) + j];
- lat_stats->latencies[i][j] = latency;
+ lat_stats->latencies[src][dst] = latency;
if (latency < min || min == -1)
min = latency;
if (latency > max)
@@ -2310,7 +2679,7 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
/*
* Verify that latencies/distances given in SLIT look reasonable
*/
- retval = lgrp_plat_latency_verify(node_memory, lat_stats);
+ retval = lgrp_plat_latency_verify(memnode_info, lat_stats);
if (retval) {
/*
@@ -2334,20 +2703,110 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
/*
+ * Update lgrp latencies according to information returned by ACPI _SLI method.
+ */
+static int
+lgrp_plat_process_sli(uint32_t domain_id, uchar_t *sli_info,
+ uint32_t sli_cnt, node_domain_map_t *node_domain, uint_t node_cnt,
+ lgrp_plat_latency_stats_t *lat_stats)
+{
+ int i;
+ int src, dst;
+ uint8_t latency;
+ hrtime_t max, min;
+
+ if (lat_stats == NULL || sli_info == NULL ||
+ sli_cnt == 0 || domain_id >= sli_cnt)
+ return (-1);
+
+ src = lgrp_plat_domain_to_node(node_domain, node_cnt, domain_id);
+ if (src == -1) {
+ src = lgrp_plat_node_domain_update(node_domain, node_cnt,
+ domain_id);
+ if (src == -1)
+ return (-1);
+ }
+
+ /*
+ * Don't update latency info if topology has been flattened to 2 levels.
+ */
+ if (lgrp_plat_topo_flatten != 0) {
+ return (0);
+ }
+
+ /*
+ * Latency information for proximity domain is ready.
+ * TODO: support adjusting latency information at runtime.
+ */
+ if (lat_stats->latencies[src][src] != 0) {
+ return (0);
+ }
+
+ /* Validate latency information. */
+ for (i = 0; i < sli_cnt; i++) {
+ if (i == domain_id) {
+ if (sli_info[i] != ACPI_SLIT_SELF_LATENCY ||
+ sli_info[sli_cnt + i] != ACPI_SLIT_SELF_LATENCY) {
+ return (-1);
+ }
+ } else {
+ if (sli_info[i] <= ACPI_SLIT_SELF_LATENCY ||
+ sli_info[sli_cnt + i] <= ACPI_SLIT_SELF_LATENCY ||
+ sli_info[i] != sli_info[sli_cnt + i]) {
+ return (-1);
+ }
+ }
+ }
+
+ min = lat_stats->latency_min;
+ max = lat_stats->latency_max;
+ for (i = 0; i < sli_cnt; i++) {
+ dst = lgrp_plat_domain_to_node(node_domain, node_cnt, i);
+ if (dst == -1)
+ continue;
+
+ ASSERT(sli_info[i] == sli_info[sli_cnt + i]);
+
+ /* Update row in latencies matrix. */
+ latency = sli_info[i];
+ lat_stats->latencies[src][dst] = latency;
+ if (latency < min || min == -1)
+ min = latency;
+ if (latency > max)
+ max = latency;
+
+ /* Update column in latencies matrix. */
+ latency = sli_info[sli_cnt + i];
+ lat_stats->latencies[dst][src] = latency;
+ if (latency < min || min == -1)
+ min = latency;
+ if (latency > max)
+ max = latency;
+ }
+ lat_stats->latency_min = min;
+ lat_stats->latency_max = max;
+
+ return (0);
+}
+
+
+/*
* Read ACPI System Resource Affinity Table (SRAT) to determine which CPUs
* and memory are local to each other in the same NUMA node and return number
* of nodes
*/
static int
-lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
- node_domain_map_t *node_domain, cpu_node_map_t *cpu_node, int cpu_count,
- node_phys_addr_map_t *node_memory)
+lgrp_plat_process_srat(struct srat *tp, struct msct *mp,
+ uint32_t *prox_domain_min, node_domain_map_t *node_domain,
+ cpu_node_map_t *cpu_node, int cpu_count,
+ memnode_phys_addr_map_t *memnode_info)
{
struct srat_item *srat_end;
int i;
struct srat_item *item;
int node_cnt;
int proc_entry_count;
+ int rc;
/*
* Nothing to do when no SRAT or disabled
@@ -2356,11 +2815,21 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
return (-1);
/*
- * Determine number of nodes by counting number of proximity domains in
- * SRAT and return if number of nodes is 1 or less since don't need to
- * read SRAT then
+ * Try to get domain information from MSCT table.
+ * ACPI4.0: OSPM will use information provided by the MSCT only
+ * when the System Resource Affinity Table (SRAT) exists.
+ */
+ node_cnt = lgrp_plat_msct_domains(mp, prox_domain_min);
+ if (node_cnt <= 0) {
+ /*
+ * Determine number of nodes by counting number of proximity
+ * domains in SRAT.
+ */
+ node_cnt = lgrp_plat_srat_domains(tp, prox_domain_min);
+ }
+ /*
+ * Return if number of nodes is 1 or less since don't need to read SRAT.
*/
- node_cnt = lgrp_plat_srat_domains(tp, prox_domain_min);
if (node_cnt == 1)
return (1);
else if (node_cnt <= 0)
@@ -2397,16 +2866,17 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
}
apic_id = item->i.p.apic_id;
- if (lgrp_plat_cpu_node_update(node_domain, node_cnt,
- cpu_node, cpu_count, apic_id, domain) < 0)
+ rc = lgrp_plat_cpu_node_update(node_domain, node_cnt,
+ cpu_node, cpu_count, apic_id, domain);
+ if (rc < 0)
return (-3);
-
- proc_entry_count++;
+ else if (rc == 0)
+ proc_entry_count++;
break;
case SRAT_MEMORY: /* memory entry */
if (!(item->i.m.flags & SRAT_ENABLED) ||
- node_memory == NULL)
+ memnode_info == NULL)
break;
/*
@@ -2418,10 +2888,52 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
length = item->i.m.len;
end = start + length - 1;
- if (lgrp_plat_node_memory_update(node_domain, node_cnt,
- node_memory, start, end, domain) < 0)
+ /*
+ * According to ACPI 4.0, both ENABLE and HOTPLUG flags
+ * may be set for memory address range entries in SRAT
+ * table which are reserved for memory hot plug.
+ * We intersect memory address ranges in SRAT table
+ * with memory ranges in physinstalled to filter out
+ * memory address ranges reserved for hot plug.
+ */
+ if (item->i.m.flags & SRAT_HOT_PLUG) {
+ uint64_t rstart = UINT64_MAX;
+ uint64_t rend = 0;
+ struct memlist *ml;
+ extern struct bootops *bootops;
+
+ memlist_read_lock();
+ for (ml = bootops->boot_mem->physinstalled;
+ ml; ml = ml->ml_next) {
+ uint64_t tstart = ml->ml_address;
+ uint64_t tend;
+
+ tend = ml->ml_address + ml->ml_size;
+ if (tstart > end || tend < start)
+ continue;
+ if (start > tstart)
+ tstart = start;
+ if (rstart > tstart)
+ rstart = tstart;
+ if (end < tend)
+ tend = end;
+ if (rend < tend)
+ rend = tend;
+ }
+ memlist_read_unlock();
+ start = rstart;
+ end = rend;
+ /* Skip this entry if no memory installed. */
+ if (start > end)
+ break;
+ }
+
+ if (lgrp_plat_memnode_info_update(node_domain,
+ node_cnt, memnode_info, node_cnt,
+ start, end, domain, ACPI_MEMNODE_DEVID_BOOT) < 0)
return (-4);
break;
+
case SRAT_X2APIC: /* x2apic CPU entry */
if (!(item->i.xp.flags & SRAT_ENABLED) ||
cpu_node == NULL)
@@ -2434,11 +2946,12 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
domain = item->i.xp.domain;
apic_id = item->i.xp.x2apic_id;
- if (lgrp_plat_cpu_node_update(node_domain, node_cnt,
- cpu_node, cpu_count, apic_id, domain) < 0)
+ rc = lgrp_plat_cpu_node_update(node_domain, node_cnt,
+ cpu_node, cpu_count, apic_id, domain);
+ if (rc < 0)
return (-3);
-
- proc_entry_count++;
+ else if (rc == 0)
+ proc_entry_count++;
break;
default:
@@ -2460,7 +2973,7 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
* physical address
*/
lgrp_plat_node_sort(node_domain, node_cnt, cpu_node, cpu_count,
- node_memory);
+ memnode_info);
return (node_cnt);
}
@@ -2665,27 +3178,70 @@ lgrp_plat_srat_domains(struct srat *tp, uint32_t *prox_domain_min)
/*
+ * Parse domain information in ACPI Maximum System Capability Table (MSCT).
+ * MSCT table has been verified in function process_msct() in fakebop.c.
+ */
+static int
+lgrp_plat_msct_domains(struct msct *tp, uint32_t *prox_domain_min)
+{
+ int last_seen = 0;
+ uint32_t proxmin = UINT32_MAX;
+ struct msct_proximity_domain *item, *end;
+
+ if (tp == NULL || lgrp_plat_msct_enable == 0)
+ return (-1);
+
+ if (tp->maximum_proximity_domains >= MAX_NODES) {
+ cmn_err(CE_CONT,
+ "?lgrp: too many proximity domains (%d), max %d supported, "
+ "disable support of CPU/memory DR operations.",
+ tp->maximum_proximity_domains + 1, MAX_NODES);
+ plat_dr_disable_cpu();
+ plat_dr_disable_memory();
+ return (-1);
+ }
+
+ if (prox_domain_min != NULL) {
+ end = (void *)(tp->hdr.len + (uintptr_t)tp);
+ for (item = (void *)((uintptr_t)tp +
+ tp->proximity_domain_offset); item < end;
+ item = (void *)(item->length + (uintptr_t)item)) {
+ if (item->domain_min < proxmin) {
+ proxmin = item->domain_min;
+ }
+
+ last_seen = item->domain_max - item->domain_min + 1;
+ /*
+ * Break out if all proximity domains have been
+ * processed. Some BIOSes may have unused items
+ * at the end of MSCT table.
+ */
+ if (last_seen > tp->maximum_proximity_domains) {
+ break;
+ }
+ }
+ *prox_domain_min = proxmin;
+ }
+
+ return (tp->maximum_proximity_domains + 1);
+}
+
+
+/*
* Set lgroup latencies for 2 level lgroup topology
*/
static void
-lgrp_plat_2level_setup(node_phys_addr_map_t *node_memory,
- lgrp_plat_latency_stats_t *lat_stats)
+lgrp_plat_2level_setup(lgrp_plat_latency_stats_t *lat_stats)
{
- int i;
+ int i, j;
- ASSERT(node_memory != NULL && lat_stats != NULL);
+ ASSERT(lat_stats != NULL);
if (lgrp_plat_node_cnt >= 4)
cmn_err(CE_NOTE,
"MPO only optimizing for local and remote\n");
for (i = 0; i < lgrp_plat_node_cnt; i++) {
- int j;
-
- if (!node_memory[i].exists)
- continue;
for (j = 0; j < lgrp_plat_node_cnt; j++) {
- if (!node_memory[j].exists)
- continue;
if (i == j)
lat_stats->latencies[i][j] = 2;
else
@@ -2694,7 +3250,9 @@ lgrp_plat_2level_setup(node_phys_addr_map_t *node_memory,
}
lat_stats->latency_min = 2;
lat_stats->latency_max = 3;
+ /* TODO: check it. */
lgrp_config(LGRP_CONFIG_FLATTEN, 2, 0);
+ lgrp_plat_topo_flatten = 1;
}
@@ -2884,7 +3442,7 @@ is_opteron(void)
*/
static void
opt_get_numa_config(uint_t *node_cnt, int *mem_intrlv,
- node_phys_addr_map_t *node_memory)
+ memnode_phys_addr_map_t *memnode_info)
{
uint_t bus;
uint_t dev;
@@ -2999,10 +3557,10 @@ opt_get_numa_config(uint_t *node_cnt, int *mem_intrlv,
(base_lo & OPT_DRAMBASE_LO_MASK_WE) == 0) {
/*
* Mark node memory as non-existent and set start and
- * end addresses to be same in node_memory[]
+ * end addresses to be same in memnode_info[]
*/
- node_memory[node].exists = 0;
- node_memory[node].start = node_memory[node].end =
+ memnode_info[node].exists = 0;
+ memnode_info[node].start = memnode_info[node].end =
(pfn_t)-1;
continue;
}
@@ -3011,11 +3569,11 @@ opt_get_numa_config(uint_t *node_cnt, int *mem_intrlv,
* Mark node memory as existing and remember physical address
* range of each node for use later
*/
- node_memory[node].exists = 1;
+ memnode_info[node].exists = 1;
- node_memory[node].start = btop(OPT_DRAMADDR(base_hi, base_lo));
+ memnode_info[node].start = btop(OPT_DRAMADDR(base_hi, base_lo));
- node_memory[node].end = btop(OPT_DRAMADDR(limit_hi, limit_lo) |
+ memnode_info[node].end = btop(OPT_DRAMADDR(limit_hi, limit_lo) |
OPT_DRAMADDR_LO_MASK_OFF);
}
diff --git a/usr/src/uts/i86pc/os/machdep.c b/usr/src/uts/i86pc/os/machdep.c
index c9f97fc046..e9d08a9fa7 100644
--- a/usr/src/uts/i86pc/os/machdep.c
+++ b/usr/src/uts/i86pc/os/machdep.c
@@ -23,6 +23,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/t_lock.h>
@@ -147,6 +151,16 @@ int force_shutdown_method = AD_UNKNOWN;
char panicbuf[PANICBUFSIZE];
/*
+ * Flags to control Dynamic Reconfiguration features.
+ */
+uint64_t plat_dr_options;
+
+/*
+ * Maximum physical address for memory DR operations.
+ */
+uint64_t plat_dr_physmax;
+
+/*
* maxphys - used during physio
* klustsize - used for klustering by swapfs and specfs
*/
@@ -1390,3 +1404,33 @@ lbolt_softint_post(void)
{
(*setsoftint)(CBE_LOCK_PIL, lbolt_softint_hdl.ih_pending);
}
+
+boolean_t
+plat_dr_check_capability(uint64_t features)
+{
+ return ((plat_dr_options & features) == features);
+}
+
+boolean_t
+plat_dr_support_cpu(void)
+{
+ return (plat_dr_options & PLAT_DR_FEATURE_CPU);
+}
+
+boolean_t
+plat_dr_support_memory(void)
+{
+ return (plat_dr_options & PLAT_DR_FEATURE_MEMORY);
+}
+
+void
+plat_dr_enable_capability(uint64_t features)
+{
+ atomic_or_64(&plat_dr_options, features);
+}
+
+void
+plat_dr_disable_capability(uint64_t features)
+{
+ atomic_and_64(&plat_dr_options, ~features);
+}
diff --git a/usr/src/uts/i86pc/os/mem_config_arch.c b/usr/src/uts/i86pc/os/mem_config_arch.c
new file mode 100644
index 0000000000..191e8cb641
--- /dev/null
+++ b/usr/src/uts/i86pc/os/mem_config_arch.c
@@ -0,0 +1,56 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1997-1998 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/debug.h>
+#include <vm/page.h>
+#include <sys/mem_config.h>
+
+/*ARGSUSED*/
+int
+arch_kphysm_del_span_ok(pfn_t base, pgcnt_t npgs)
+{
+ ASSERT(npgs != 0);
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+arch_kphysm_relocate(pfn_t base, pgcnt_t npgs)
+{
+ ASSERT(npgs != 0);
+ return (ENOTSUP);
+}
+
+int
+arch_kphysm_del_supported(void)
+{
+ return (0);
+}
diff --git a/usr/src/uts/i86pc/os/memnode.c b/usr/src/uts/i86pc/os/memnode.c
index 908b2e8cd2..35185bcd84 100644
--- a/usr/src/uts/i86pc/os/memnode.c
+++ b/usr/src/uts/i86pc/os/memnode.c
@@ -84,7 +84,7 @@ mem_node_add_slice(pfn_t start, pfn_t end)
}
mnode = PFN_2_MEM_NODE(start);
- ASSERT(mnode < max_mem_nodes);
+ ASSERT(mnode >= 0 && mnode < max_mem_nodes);
if (cas32((uint32_t *)&mem_node_config[mnode].exists, 0, 1)) {
/*
@@ -130,7 +130,7 @@ mem_node_del_slice(pfn_t start, pfn_t end)
}
mnode = PFN_2_MEM_NODE(start);
- ASSERT(mnode < max_mem_nodes);
+ ASSERT(mnode >= 0 && mnode < max_mem_nodes);
ASSERT(mem_node_config[mnode].exists == 1);
delta_pgcnt = end - start;
diff --git a/usr/src/uts/i86pc/os/microcode.c b/usr/src/uts/i86pc/os/microcode.c
index 6f32a86c71..90d9585640 100644
--- a/usr/src/uts/i86pc/os/microcode.c
+++ b/usr/src/uts/i86pc/os/microcode.c
@@ -141,6 +141,7 @@ void
ucode_alloc_space(cpu_t *cp)
{
ASSERT(cp->cpu_id != 0);
+ ASSERT(cp->cpu_m.mcpu_ucode_info == NULL);
cp->cpu_m.mcpu_ucode_info =
kmem_zalloc(sizeof (*cp->cpu_m.mcpu_ucode_info), KM_SLEEP);
}
@@ -148,9 +149,11 @@ ucode_alloc_space(cpu_t *cp)
void
ucode_free_space(cpu_t *cp)
{
- ASSERT(cp->cpu_id != 0);
+ ASSERT(cp->cpu_m.mcpu_ucode_info != NULL);
+ ASSERT(cp->cpu_m.mcpu_ucode_info != &cpu_ucode_info0);
kmem_free(cp->cpu_m.mcpu_ucode_info,
sizeof (*cp->cpu_m.mcpu_ucode_info));
+ cp->cpu_m.mcpu_ucode_info = NULL;
}
/*
@@ -1126,7 +1129,10 @@ ucode_check(cpu_t *cp)
uint32_t new_rev = 0;
ASSERT(cp);
- if (cp->cpu_id == 0)
+ /*
+ * Space statically allocated for BSP, ensure pointer is set
+ */
+ if (cp->cpu_id == 0 && cp->cpu_m.mcpu_ucode_info == NULL)
cp->cpu_m.mcpu_ucode_info = &cpu_ucode_info0;
uinfop = cp->cpu_m.mcpu_ucode_info;
diff --git a/usr/src/uts/i86pc/os/mlsetup.c b/usr/src/uts/i86pc/os/mlsetup.c
index 3f8752638f..bf57dd3c50 100644
--- a/usr/src/uts/i86pc/os/mlsetup.c
+++ b/usr/src/uts/i86pc/os/mlsetup.c
@@ -22,6 +22,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/sysmacros.h>
@@ -107,6 +111,7 @@ mlsetup(struct regs *rp)
extern char t0stack[];
extern int post_fastreboot;
extern int console;
+ extern uint64_t plat_dr_options;
ASSERT_STACK_ALIGNED();
@@ -354,15 +359,70 @@ mlsetup(struct regs *rp)
prom_init("kernel", (void *)NULL);
- if (bootprop_getval("boot-ncpus", &prop_value) != 0)
+ /* User-set option overrides firmware value. */
+ if (bootprop_getval(PLAT_DR_OPTIONS_NAME, &prop_value) == 0) {
+ plat_dr_options = (uint64_t)prop_value;
+ }
+#if defined(__xpv)
+ /* No support of DR operations on xpv */
+ plat_dr_options = 0;
+#else /* __xpv */
+ /* Flag PLAT_DR_FEATURE_ENABLED should only be set by DR driver. */
+ plat_dr_options &= ~PLAT_DR_FEATURE_ENABLED;
+#ifndef __amd64
+ /* Only enable CPU/memory DR on 64 bits kernel. */
+ plat_dr_options &= ~PLAT_DR_FEATURE_MEMORY;
+ plat_dr_options &= ~PLAT_DR_FEATURE_CPU;
+#endif /* __amd64 */
+#endif /* __xpv */
+
+ /*
+ * Get value of "plat_dr_physmax" boot option.
+ * It overrides values calculated from MSCT or SRAT table.
+ */
+ if (bootprop_getval(PLAT_DR_PHYSMAX_NAME, &prop_value) == 0) {
+ plat_dr_physmax = ((uint64_t)prop_value) >> PAGESHIFT;
+ }
+
+ /* Get value of boot_ncpus. */
+ if (bootprop_getval(BOOT_NCPUS_NAME, &prop_value) != 0) {
boot_ncpus = NCPU;
- else {
+ } else {
boot_ncpus = (int)prop_value;
if (boot_ncpus <= 0 || boot_ncpus > NCPU)
boot_ncpus = NCPU;
}
- max_ncpus = boot_max_ncpus = boot_ncpus;
+ /*
+ * Set max_ncpus and boot_max_ncpus to boot_ncpus if platform doesn't
+ * support CPU DR operations.
+ */
+ if (plat_dr_support_cpu() == 0) {
+ max_ncpus = boot_max_ncpus = boot_ncpus;
+ } else {
+ if (bootprop_getval(PLAT_MAX_NCPUS_NAME, &prop_value) != 0) {
+ max_ncpus = NCPU;
+ } else {
+ max_ncpus = (int)prop_value;
+ if (max_ncpus <= 0 || max_ncpus > NCPU) {
+ max_ncpus = NCPU;
+ }
+ if (boot_ncpus > max_ncpus) {
+ boot_ncpus = max_ncpus;
+ }
+ }
+
+ if (bootprop_getval(BOOT_MAX_NCPUS_NAME, &prop_value) != 0) {
+ boot_max_ncpus = boot_ncpus;
+ } else {
+ boot_max_ncpus = (int)prop_value;
+ if (boot_max_ncpus <= 0 || boot_max_ncpus > NCPU) {
+ boot_max_ncpus = boot_ncpus;
+ } else if (boot_max_ncpus > max_ncpus) {
+ boot_max_ncpus = max_ncpus;
+ }
+ }
+ }
/*
* Initialize the lgrp framework
diff --git a/usr/src/uts/i86pc/os/mp_implfuncs.c b/usr/src/uts/i86pc/os/mp_implfuncs.c
index b5742b2c69..3bded8f124 100644
--- a/usr/src/uts/i86pc/os/mp_implfuncs.c
+++ b/usr/src/uts/i86pc/os/mp_implfuncs.c
@@ -22,12 +22,12 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+#define PSMI_1_7
#include <sys/vmem.h>
#include <vm/hat.h>
#include <sys/modctl.h>
#include <vm/seg_kmem.h>
-#define PSMI_1_6
#include <sys/psm.h>
#include <sys/psm_modctl.h>
#include <sys/smp_impldefs.h>
diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c
index 80e371850b..dc4023007c 100644
--- a/usr/src/uts/i86pc/os/mp_machdep.c
+++ b/usr/src/uts/i86pc/os/mp_machdep.c
@@ -24,11 +24,11 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
-#define PSMI_1_6
+#define PSMI_1_7
#include <sys/smp_impldefs.h>
#include <sys/psm.h>
#include <sys/psm_modctl.h>
@@ -157,6 +157,7 @@ void (*notify_error)(int, char *) = (void (*)(int, char *))return_instr;
void (*hrtime_tick)(void) = return_instr;
int (*psm_cpu_create_devinfo)(cpu_t *, dev_info_t **) = mach_cpu_create_devinfo;
+int (*psm_cpu_get_devinfo)(cpu_t *, dev_info_t **) = NULL;
/*
* True if the generic TSC code is our source of hrtime, rather than whatever
@@ -864,9 +865,17 @@ mach_get_platform(int owner)
total_ops = OFFSETOF(struct psm_ops, psm_preshutdown) /
sizeof (void (*)(void));
else if (mach_ver[owner] == (ushort_t)PSM_INFO_VER01_4)
- /* no psm_preshutdown function */
+ /* no psm_intr_ops function */
total_ops = OFFSETOF(struct psm_ops, psm_intr_ops) /
sizeof (void (*)(void));
+ else if (mach_ver[owner] == (ushort_t)PSM_INFO_VER01_5)
+ /* no psm_state function */
+ total_ops = OFFSETOF(struct psm_ops, psm_state) /
+ sizeof (void (*)(void));
+ else if (mach_ver[owner] == (ushort_t)PSM_INFO_VER01_6)
+ /* no psm_cpu_ops function */
+ total_ops = OFFSETOF(struct psm_ops, psm_cpu_ops) /
+ sizeof (void (*)(void));
else
total_ops = sizeof (struct psm_ops) / sizeof (void (*)(void));
@@ -1042,7 +1051,11 @@ mach_smpinit(void)
cpu_id = -1;
cpu_id = (*pops->psm_get_next_processorid)(cpu_id);
- for (cnt = 0; cpu_id != -1; cnt++) {
+ /*
+ * Only add boot_ncpus CPUs to mp_cpus. Other CPUs will be handled
+ * by CPU DR driver at runtime.
+ */
+ for (cnt = 0; cpu_id != -1 && cnt < boot_ncpus; cnt++) {
CPUSET_ADD(cpumask, cpu_id);
cpu_id = (*pops->psm_get_next_processorid)(cpu_id);
}
@@ -1092,7 +1105,7 @@ mach_smpinit(void)
psm_enable_intr = pops->psm_enable_intr;
/* check for multiple CPUs */
- if (cnt < 2)
+ if (cnt < 2 && plat_dr_support_cpu() == B_FALSE)
return;
/* check for MP platforms */
@@ -1480,6 +1493,63 @@ mach_cpuid_start(processorid_t id, void *ctx)
return ((*pops->psm_cpu_start)(id, ctx));
}
+int
+mach_cpu_stop(cpu_t *cp, void *ctx)
+{
+ struct psm_ops *pops = mach_set[0];
+ psm_cpu_request_t request;
+
+ if (pops->psm_cpu_ops == NULL) {
+ return (ENOTSUP);
+ }
+
+ ASSERT(cp->cpu_id != -1);
+ request.pcr_cmd = PSM_CPU_STOP;
+ request.req.cpu_stop.cpuid = cp->cpu_id;
+ request.req.cpu_stop.ctx = ctx;
+
+ return ((*pops->psm_cpu_ops)(&request));
+}
+
+int
+mach_cpu_add(mach_cpu_add_arg_t *argp, processorid_t *cpuidp)
+{
+ int rc;
+ struct psm_ops *pops = mach_set[0];
+ psm_cpu_request_t request;
+
+ if (pops->psm_cpu_ops == NULL) {
+ return (ENOTSUP);
+ }
+
+ request.pcr_cmd = PSM_CPU_ADD;
+ request.req.cpu_add.argp = argp;
+ request.req.cpu_add.cpuid = -1;
+ rc = (*pops->psm_cpu_ops)(&request);
+ if (rc == 0) {
+ ASSERT(request.req.cpu_add.cpuid != -1);
+ *cpuidp = request.req.cpu_add.cpuid;
+ }
+
+ return (rc);
+}
+
+int
+mach_cpu_remove(processorid_t cpuid)
+{
+ struct psm_ops *pops = mach_set[0];
+ psm_cpu_request_t request;
+
+ if (pops->psm_cpu_ops == NULL) {
+ return (ENOTSUP);
+ }
+
+ request.pcr_cmd = PSM_CPU_REMOVE;
+ request.req.cpu_remove.cpuid = cpuid;
+
+ return ((*pops->psm_cpu_ops)(&request));
+}
+
/*
* Default handler to create device node for CPU.
* One reference count will be held on created device node.
@@ -1569,6 +1639,24 @@ mach_cpu_create_device_node(struct cpu *cp, dev_info_t **dipp)
return (rv);
}
+/*
+ * The dipp contains one of following values on return:
+ * - NULL if no device node found
+ * - pointer to device node if found
+ */
+int
+mach_cpu_get_device_node(struct cpu *cp, dev_info_t **dipp)
+{
+ *dipp = NULL;
+ if (psm_cpu_get_devinfo != NULL) {
+ if (psm_cpu_get_devinfo(cp, dipp) == PSM_SUCCESS) {
+ return (PSM_SUCCESS);
+ }
+ }
+
+ return (PSM_FAILURE);
+}
+
/*ARGSUSED*/
static int
mach_translate_irq(dev_info_t *dip, int irqno)
diff --git a/usr/src/uts/i86pc/os/mp_pc.c b/usr/src/uts/i86pc/os/mp_pc.c
index 6fc571b445..bc21812187 100644
--- a/usr/src/uts/i86pc/os/mp_pc.c
+++ b/usr/src/uts/i86pc/os/mp_pc.c
@@ -22,6 +22,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* Welcome to the world of the "real mode platter".
@@ -31,6 +35,7 @@
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/cpuvar.h>
+#include <sys/cpu_module.h>
#include <sys/kmem.h>
#include <sys/archsystm.h>
#include <sys/machsystm.h>
@@ -41,10 +46,21 @@
#include <sys/mach_mmu.h>
#include <sys/promif.h>
#include <sys/cpu.h>
+#include <sys/cpu_event.h>
+#include <sys/sunndi.h>
+#include <sys/fs/dv_node.h>
#include <vm/hat_i86.h>
+#include <vm/as.h>
+
+extern cpuset_t cpu_ready_set;
-extern void real_mode_start(void);
-extern void real_mode_end(void);
+extern int mp_start_cpu_common(cpu_t *cp, boolean_t boot);
+extern void real_mode_start_cpu(void);
+extern void real_mode_start_cpu_end(void);
+extern void real_mode_stop_cpu_stage1(void);
+extern void real_mode_stop_cpu_stage1_end(void);
+extern void real_mode_stop_cpu_stage2(void);
+extern void real_mode_stop_cpu_stage2_end(void);
extern void *(*cpu_pause_func)(void *);
void rmp_gdt_init(rm_platter_t *);
@@ -61,22 +77,36 @@ int
mach_cpucontext_init(void)
{
ushort_t *vec;
+ ulong_t addr;
+ struct rm_platter *rm = (struct rm_platter *)rm_platter_va;
if (!(vec = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
sizeof (vec), PROT_READ | PROT_WRITE)))
return (-1);
+
/*
* setup secondary cpu bios boot up vector
+ * Write page offset to 0x467 and page frame number to 0x469.
*/
- *vec = (ushort_t)((caddr_t)
- ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
- + ((ulong_t)rm_platter_va & 0xf));
- vec[1] = (ushort_t)(rm_platter_pa >> 4);
+ addr = (ulong_t)((caddr_t)rm->rm_code - (caddr_t)rm) + rm_platter_pa;
+ vec[0] = (ushort_t)(addr & PAGEOFFSET);
+ vec[1] = (ushort_t)((addr & (0xfffff & PAGEMASK)) >> 4);
warm_reset_vector = vec;
- bcopy((caddr_t)real_mode_start,
- (caddr_t)((rm_platter_t *)rm_platter_va)->rm_code,
- (size_t)real_mode_end - (size_t)real_mode_start);
+ /* Map real mode platter into kas so kernel can access it. */
+ hat_devload(kas.a_hat,
+ (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
+ btop(rm_platter_pa), PROT_READ | PROT_WRITE | PROT_EXEC,
+ HAT_LOAD_NOCONSIST);
+
+ /* Copy CPU startup code to rm_platter if it's still during boot. */
+ if (!plat_dr_enabled()) {
+ ASSERT((size_t)real_mode_start_cpu_end -
+ (size_t)real_mode_start_cpu <= RM_PLATTER_CODE_SIZE);
+ bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
+ (size_t)real_mode_start_cpu_end -
+ (size_t)real_mode_start_cpu);
+ }
return (0);
}
@@ -95,12 +125,53 @@ mach_cpucontext_fini(void)
extern void *long_mode_64(void);
#endif /* __amd64 */
-void *
-mach_cpucontext_alloc(struct cpu *cp)
+/*ARGSUSED*/
+void
+rmp_gdt_init(rm_platter_t *rm)
+{
+
+#if defined(__amd64)
+ /* Use the kas address space for the CPU startup thread. */
+ if (MAKECR3(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL)
+ panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
+ "located above 4G in physical memory (@ 0x%lx)",
+ MAKECR3(kas.a_hat->hat_htable->ht_pfn));
+
+ /*
+ * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
+ * by code in real_mode_start_cpu():
+ *
+ * GDT[0]: NULL selector
+ * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
+ *
+ * Clear the IDT as interrupts will be off and a limit of 0 will cause
+ * the CPU to triple fault and reset on an NMI, seemingly as reasonable
+ * a course of action as any other, though it may cause the entire
+ * platform to reset in some cases...
+ */
+ rm->rm_temp_gdt[0] = 0;
+ rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
+
+ rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
+ rm->rm_temp_gdt_base = rm_platter_pa +
+ (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
+ rm->rm_temp_idt_lim = 0;
+ rm->rm_temp_idt_base = 0;
+
+ /*
+ * Since the CPU needs to jump to protected mode using an identity
+ * mapped address, we need to calculate it here.
+ */
+ rm->rm_longmode64_addr = rm_platter_pa +
+ ((uint32_t)long_mode_64 - (uint32_t)real_mode_start_cpu);
+#endif /* __amd64 */
+}
+
+static void *
+mach_cpucontext_alloc_tables(struct cpu *cp)
{
- rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
- struct cpu_tables *ct;
struct tss *ntss;
+ struct cpu_tables *ct;
/*
* Allocate space for stack, tss, gdt and idt. We round the size
@@ -109,7 +180,8 @@ mach_cpucontext_alloc(struct cpu *cp)
*/
ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP);
if ((uintptr_t)ct & PAGEOFFSET)
- panic("mp_startup_init: cpu%d misaligned tables", cp->cpu_id);
+ panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables",
+ cp->cpu_id);
ntss = cp->cpu_tss = &ct->ct_tss;
@@ -149,93 +221,174 @@ mach_cpucontext_alloc(struct cpu *cp)
set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
sizeof (*cp->cpu_tss) - 1, SDT_SYSTSS, SEL_KPL);
+ return (ct);
+}
+
+void *
+mach_cpucontext_xalloc(struct cpu *cp, int optype)
+{
+ size_t len;
+ struct cpu_tables *ct;
+ rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
+ static int cpu_halt_code_ready;
+
+ if (optype == MACH_CPUCONTEXT_OP_STOP) {
+ ASSERT(plat_dr_enabled());
+
+ /*
+ * The WARM_RESET_VECTOR has a limitation that the physical
+ * address written to it must be page-aligned. To work around
+ * this limitation, the CPU stop code has been splitted into
+ * two stages.
+ * The stage 2 code, which implements the real logic to halt
+ * CPUs, is copied to the rm_cpu_halt_code field in the real
+ * mode platter. The stage 1 code, which simply jumps to the
+ * stage 2 code in the rm_cpu_halt_code field, is copied to
+ * rm_code field in the real mode platter and it may be
+ * overwritten after the CPU has been stopped.
+ */
+ if (!cpu_halt_code_ready) {
+ /*
+ * The rm_cpu_halt_code field in the real mode platter
+ * is used by the CPU stop code only. So only copy the
+ * CPU stop stage 2 code into the rm_cpu_halt_code
+ * field on the first call.
+ */
+ len = (size_t)real_mode_stop_cpu_stage2_end -
+ (size_t)real_mode_stop_cpu_stage2;
+ ASSERT(len <= RM_PLATTER_CPU_HALT_CODE_SIZE);
+ bcopy((caddr_t)real_mode_stop_cpu_stage2,
+ (caddr_t)rm->rm_cpu_halt_code, len);
+ cpu_halt_code_ready = 1;
+ }
+
+ /*
+ * The rm_code field in the real mode platter is shared by
+ * the CPU start, CPU stop, CPR and fast reboot code. So copy
+ * the CPU stop stage 1 code into the rm_code field every time.
+ */
+ len = (size_t)real_mode_stop_cpu_stage1_end -
+ (size_t)real_mode_stop_cpu_stage1;
+ ASSERT(len <= RM_PLATTER_CODE_SIZE);
+ bcopy((caddr_t)real_mode_stop_cpu_stage1,
+ (caddr_t)rm->rm_code, len);
+ rm->rm_cpu_halted = 0;
+
+ return (cp->cpu_m.mcpu_mach_ctx_ptr);
+ } else if (optype != MACH_CPUCONTEXT_OP_START) {
+ return (NULL);
+ }
+
+ /*
+ * Only need to allocate tables when starting CPU.
+ * Tables allocated when starting CPU will be reused when stopping CPU.
+ */
+ ct = mach_cpucontext_alloc_tables(cp);
+ if (ct == NULL) {
+ return (NULL);
+ }
+
+ /* Copy CPU startup code to rm_platter for CPU hot-add operations. */
+ if (plat_dr_enabled()) {
+ bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
+ (size_t)real_mode_start_cpu_end -
+ (size_t)real_mode_start_cpu);
+ }
+
/*
* Now copy all that we've set up onto the real mode platter
* for the real mode code to digest as part of starting the cpu.
*/
-
rm->rm_idt_base = cp->cpu_idt;
rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1;
rm->rm_gdt_base = cp->cpu_gdt;
rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1;
- rm->rm_pdbr = getcr3();
+ /*
+ * CPU needs to access kernel address space after powering on.
+ * When hot-adding CPU at runtime, directly use top level page table
+ * of kas other than the return value of getcr3(). getcr3() returns
+ * current process's top level page table, which may be different from
+ * the one of kas.
+ */
+ rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn);
rm->rm_cpu = cp->cpu_id;
rm->rm_x86feature = x86_feature;
+
+ /*
+ * For hot-adding CPU at runtime, Machine Check and Performance Counter
+ * should be disabled. They will be enabled on demand after CPU powers
+ * on successfully
+ */
rm->rm_cr4 = getcr4();
+ rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE);
rmp_gdt_init(rm);
return (ct);
}
-/*ARGSUSED*/
void
-rmp_gdt_init(rm_platter_t *rm)
+mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype)
{
+ struct cpu_tables *ct = arg;
-#if defined(__amd64)
-
- if (getcr3() > 0xffffffffUL)
- panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
- "located above 4G in physical memory (@ 0x%lx)", getcr3());
-
- /*
- * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
- * by code in real_mode_start():
- *
- * GDT[0]: NULL selector
- * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
- *
- * Clear the IDT as interrupts will be off and a limit of 0 will cause
- * the CPU to triple fault and reset on an NMI, seemingly as reasonable
- * a course of action as any other, though it may cause the entire
- * platform to reset in some cases...
- */
- rm->rm_temp_gdt[0] = 0;
- rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
-
- rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
- rm->rm_temp_gdt_base = rm_platter_pa +
- (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
- rm->rm_temp_idt_lim = 0;
- rm->rm_temp_idt_base = 0;
+ ASSERT(&ct->ct_tss == cp->cpu_tss);
+ if (optype == MACH_CPUCONTEXT_OP_START) {
+ switch (err) {
+ case 0:
+ /*
+ * Save pointer for reuse when stopping CPU.
+ */
+ cp->cpu_m.mcpu_mach_ctx_ptr = arg;
+ break;
+ case ETIMEDOUT:
+ /*
+ * The processor was poked, but failed to start before
+ * we gave up waiting for it. In case it starts later,
+ * don't free anything.
+ */
+ cp->cpu_m.mcpu_mach_ctx_ptr = arg;
+ break;
+ default:
+ /*
+ * Some other, passive, error occurred.
+ */
+ kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
+ cp->cpu_tss = NULL;
+ break;
+ }
+ } else if (optype == MACH_CPUCONTEXT_OP_STOP) {
+ switch (err) {
+ case 0:
+ /*
+ * Free resources allocated when starting CPU.
+ */
+ kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
+ cp->cpu_tss = NULL;
+ cp->cpu_m.mcpu_mach_ctx_ptr = NULL;
+ break;
+ default:
+ /*
+ * Don't touch table pointer in case of failure.
+ */
+ break;
+ }
+ } else {
+ ASSERT(0);
+ }
+}
- /*
- * Since the CPU needs to jump to protected mode using an identity
- * mapped address, we need to calculate it here.
- */
- rm->rm_longmode64_addr = rm_platter_pa +
- ((uint32_t)long_mode_64 - (uint32_t)real_mode_start);
-#endif /* __amd64 */
+void *
+mach_cpucontext_alloc(struct cpu *cp)
+{
+ return (mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_START));
}
-/*ARGSUSED*/
void
mach_cpucontext_free(struct cpu *cp, void *arg, int err)
{
- struct cpu_tables *ct = arg;
-
- ASSERT(&ct->ct_tss == cp->cpu_tss);
-
- switch (err) {
- case 0:
- break;
- case ETIMEDOUT:
- /*
- * The processor was poked, but failed to start before
- * we gave up waiting for it. In case it starts later,
- * don't free anything.
- */
- break;
- default:
- /*
- * Some other, passive, error occurred.
- */
- kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
- cp->cpu_tss = NULL;
- break;
- }
+ mach_cpucontext_xfree(cp, arg, err, MACH_CPUCONTEXT_OP_START);
}
/*
@@ -276,25 +429,221 @@ mach_cpu_pause(volatile char *safe)
}
/*
- * Power on CPU.
+ * Power on the target CPU.
*/
-/*ARGSUSED*/
int
mp_cpu_poweron(struct cpu *cp)
{
- ASSERT(MUTEX_HELD(&cpu_lock));
- return (ENOTSUP); /* not supported */
+ int error;
+ cpuset_t tempset;
+ processorid_t cpuid;
+
+ ASSERT(cp != NULL);
+ cpuid = cp->cpu_id;
+ if (use_mp == 0 || plat_dr_support_cpu() == 0) {
+ return (ENOTSUP);
+ } else if (cpuid < 0 || cpuid >= max_ncpus) {
+ return (EINVAL);
+ }
+
+ /*
+ * The currrent x86 implementaiton of mp_cpu_configure() and
+ * mp_cpu_poweron() have a limitation that mp_cpu_poweron() could only
+ * be called once after calling mp_cpu_configure() for a specific CPU.
+ * It's because mp_cpu_poweron() will destroy data structure created
+ * by mp_cpu_configure(). So reject the request if the CPU has already
+ * been powered on once after calling mp_cpu_configure().
+ * This limitaiton only affects the p_online syscall and the DR driver
+ * won't be affected because the DR driver always invoke public CPU
+ * management interfaces in the predefined order:
+ * cpu_configure()->cpu_poweron()...->cpu_poweroff()->cpu_unconfigure()
+ */
+ if (cpuid_checkpass(cp, 4) || cp->cpu_thread == cp->cpu_idle_thread) {
+ return (ENOTSUP);
+ }
+
+ /*
+ * Check if there's at least a Mbyte of kmem available
+ * before attempting to start the cpu.
+ */
+ if (kmem_avail() < 1024 * 1024) {
+ /*
+ * Kick off a reap in case that helps us with
+ * later attempts ..
+ */
+ kmem_reap();
+ return (ENOMEM);
+ }
+
+ affinity_set(CPU->cpu_id);
+
+ /*
+ * Start the target CPU. No need to call mach_cpucontext_fini()
+ * if mach_cpucontext_init() fails.
+ */
+ if ((error = mach_cpucontext_init()) == 0) {
+ error = mp_start_cpu_common(cp, B_FALSE);
+ mach_cpucontext_fini();
+ }
+ if (error != 0) {
+ affinity_clear();
+ return (error);
+ }
+
+ /* Wait for the target cpu to reach READY state. */
+ tempset = cpu_ready_set;
+ while (!CPU_IN_SET(tempset, cpuid)) {
+ delay(1);
+ tempset = *((volatile cpuset_t *)&cpu_ready_set);
+ }
+
+ /* Mark the target CPU as available for mp operation. */
+ CPUSET_ATOMIC_ADD(mp_cpus, cpuid);
+
+ /* Free the space allocated to hold the microcode file */
+ ucode_cleanup();
+
+ affinity_clear();
+
+ return (0);
+}
+
+#define MP_CPU_DETACH_MAX_TRIES 5
+#define MP_CPU_DETACH_DELAY 100
+
+static int
+mp_cpu_detach_driver(dev_info_t *dip)
+{
+ int i;
+ int rv = EBUSY;
+ dev_info_t *pdip;
+
+ pdip = ddi_get_parent(dip);
+ ASSERT(pdip != NULL);
+ /*
+ * Check if caller holds pdip busy - can cause deadlocks in
+ * e_ddi_branch_unconfigure(), which calls devfs_clean().
+ */
+ if (DEVI_BUSY_OWNED(pdip)) {
+ return (EDEADLOCK);
+ }
+
+ for (i = 0; i < MP_CPU_DETACH_MAX_TRIES; i++) {
+ if (e_ddi_branch_unconfigure(dip, NULL, 0) == 0) {
+ rv = 0;
+ break;
+ }
+ DELAY(MP_CPU_DETACH_DELAY);
+ }
+
+ return (rv);
}
/*
- * Power off CPU.
+ * Power off the target CPU.
+ * Note: cpu_lock will be released and then reacquired.
*/
-/*ARGSUSED*/
int
mp_cpu_poweroff(struct cpu *cp)
{
- ASSERT(MUTEX_HELD(&cpu_lock));
- return (ENOTSUP); /* not supported */
+ int rv = 0;
+ void *ctx;
+ dev_info_t *dip = NULL;
+ rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
+ extern void cpupm_start(cpu_t *);
+ extern void cpupm_stop(cpu_t *);
+
+ ASSERT(cp != NULL);
+ ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
+ ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
+
+ if (use_mp == 0 || plat_dr_support_cpu() == 0) {
+ return (ENOTSUP);
+ }
+ /*
+ * There is no support for powering off cpu0 yet.
+ * There are many pieces of code which have a hard dependency on cpu0.
+ */
+ if (cp->cpu_id == 0) {
+ return (ENOTSUP);
+ };
+
+ if (mach_cpu_get_device_node(cp, &dip) != PSM_SUCCESS) {
+ return (ENXIO);
+ }
+ ASSERT(dip != NULL);
+ if (mp_cpu_detach_driver(dip) != 0) {
+ rv = EBUSY;
+ goto out_online;
+ }
+
+ /* Allocate CPU context for stopping */
+ if (mach_cpucontext_init() != 0) {
+ rv = ENXIO;
+ goto out_online;
+ }
+ ctx = mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_STOP);
+ if (ctx == NULL) {
+ rv = ENXIO;
+ goto out_context_fini;
+ }
+
+ cpupm_stop(cp);
+ cpu_event_fini_cpu(cp);
+
+ if (cp->cpu_m.mcpu_cmi_hdl != NULL) {
+ cmi_fini(cp->cpu_m.mcpu_cmi_hdl);
+ cp->cpu_m.mcpu_cmi_hdl = NULL;
+ }
+
+ rv = mach_cpu_stop(cp, ctx);
+ if (rv != 0) {
+ goto out_enable_cmi;
+ }
+
+ /* Wait until the target CPU has been halted. */
+ while (*(volatile ushort_t *)&(rm->rm_cpu_halted) != 0xdead) {
+ delay(1);
+ }
+ rm->rm_cpu_halted = 0xffff;
+
+ /* CPU_READY has been cleared by mach_cpu_stop. */
+ ASSERT((cp->cpu_flags & CPU_READY) == 0);
+ ASSERT((cp->cpu_flags & CPU_RUNNING) == 0);
+ cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
+ CPUSET_ATOMIC_DEL(mp_cpus, cp->cpu_id);
+
+ mach_cpucontext_xfree(cp, ctx, 0, MACH_CPUCONTEXT_OP_STOP);
+ mach_cpucontext_fini();
+
+ return (0);
+
+out_enable_cmi:
+ {
+ cmi_hdl_t hdl;
+
+ if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
+ cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) {
+ if (x86_feature & X86_MCA)
+ cmi_mca_init(hdl);
+ cp->cpu_m.mcpu_cmi_hdl = hdl;
+ }
+ }
+ cpu_event_init_cpu(cp);
+ cpupm_start(cp);
+ mach_cpucontext_xfree(cp, ctx, rv, MACH_CPUCONTEXT_OP_STOP);
+
+out_context_fini:
+ mach_cpucontext_fini();
+
+out_online:
+ (void) e_ddi_branch_configure(dip, NULL, 0);
+
+ if (rv != EAGAIN && rv != ETIME) {
+ rv = ENXIO;
+ }
+
+ return (rv);
}
/*
diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c
index 0ba377f67c..146f879a66 100644
--- a/usr/src/uts/i86pc/os/mp_startup.c
+++ b/usr/src/uts/i86pc/os/mp_startup.c
@@ -23,10 +23,15 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/thread.h>
#include <sys/cpuvar.h>
+#include <sys/cpu.h>
#include <sys/t_lock.h>
#include <sys/param.h>
#include <sys/proc.h>
@@ -34,6 +39,7 @@
#include <sys/class.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
+#include <sys/note.h>
#include <sys/asm_linkage.h>
#include <sys/x_call.h>
#include <sys/systm.h>
@@ -60,6 +66,7 @@
#include <sys/reboot.h>
#include <sys/kdi_machimpl.h>
#include <vm/hat_i86.h>
+#include <vm/vm_dep.h>
#include <sys/memnode.h>
#include <sys/pci_cfgspace.h>
#include <sys/mach_mmu.h>
@@ -71,8 +78,11 @@
struct cpu cpus[1]; /* CPU data */
struct cpu *cpu[NCPU] = {&cpus[0]}; /* pointers to all CPUs */
+struct cpu *cpu_free_list; /* list for released CPUs */
cpu_core_t cpu_core[NCPU]; /* cpu_core structures */
+#define cpu_next_free cpu_prev
+
/*
* Useful for disabling MP bring-up on a MP capable system.
*/
@@ -94,7 +104,8 @@ int flushes_require_xcalls;
cpuset_t cpu_ready_set; /* initialized in startup() */
-static void mp_startup(void);
+static void mp_startup_boot(void);
+static void mp_startup_hotplug(void);
static void cpu_sep_enable(void);
static void cpu_sep_disable(void);
@@ -108,7 +119,6 @@ void
init_cpu_info(struct cpu *cp)
{
processor_info_t *pi = &cp->cpu_type_info;
- char buf[CPU_IDSTRLEN];
/*
* Get clock-frequency property for the CPU.
@@ -131,14 +141,18 @@ init_cpu_info(struct cpu *cp)
if (fpu_exists)
(void) strcpy(pi->pi_fputypes, "i387 compatible");
- (void) cpuid_getidstr(cp, buf, sizeof (buf));
+ cp->cpu_idstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP);
+ cp->cpu_brandstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP);
- cp->cpu_idstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
- (void) strcpy(cp->cpu_idstr, buf);
-
- (void) cpuid_getbrandstr(cp, buf, sizeof (buf));
- cp->cpu_brandstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
- (void) strcpy(cp->cpu_brandstr, buf);
+ /*
+ * If called for the BSP, cp is equal to current CPU.
+ * For non-BSPs, cpuid info of cp is not ready yet, so use cpuid info
+ * of current CPU as default values for cpu_idstr and cpu_brandstr.
+ * They will be corrected in mp_startup_common() after cpuid_pass1()
+ * has been invoked on target CPU.
+ */
+ (void) cpuid_getidstr(CPU, cp->cpu_idstr, CPU_IDSTRLEN);
+ (void) cpuid_getbrandstr(CPU, cp->cpu_brandstr, CPU_IDSTRLEN);
}
/*
@@ -228,9 +242,11 @@ init_cpu_syscall(struct cpu *cp)
*
* Allocate and initialize the cpu structure, TRAPTRACE buffer, and the
* startup and idle threads for the specified CPU.
+ * Parameter boot is true for boot time operations and is false for CPU
+ * DR operations.
*/
-struct cpu *
-mp_startup_init(int cpun)
+static struct cpu *
+mp_cpu_configure_common(int cpun, boolean_t boot)
{
struct cpu *cp;
kthread_id_t tp;
@@ -247,27 +263,25 @@ mp_startup_init(int cpun)
trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun];
#endif
+ ASSERT(MUTEX_HELD(&cpu_lock));
ASSERT(cpun < NCPU && cpu[cpun] == NULL);
- cp = kmem_zalloc(sizeof (*cp), KM_SLEEP);
-#if !defined(__xpv)
- if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) {
- cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(CPU);
- cp->cpu_m.mcpu_idle_cpu = cpu_idle_mwait;
- } else
-#endif
- cp->cpu_m.mcpu_idle_cpu = cpu_idle;
+ if (cpu_free_list == NULL) {
+ cp = kmem_zalloc(sizeof (*cp), KM_SLEEP);
+ } else {
+ cp = cpu_free_list;
+ cpu_free_list = cp->cpu_next_free;
+ }
cp->cpu_m.mcpu_istamp = cpun << 16;
- procp = curthread->t_procp;
+ /* Create per CPU specific threads in the process p0. */
+ procp = &p0;
- mutex_enter(&cpu_lock);
/*
* Initialize the dispatcher first.
*/
disp_cpu_init(cp);
- mutex_exit(&cpu_lock);
cpu_vm_data_init(cp);
@@ -294,14 +308,21 @@ mp_startup_init(int cpun)
tp->t_disp_queue = cp->cpu_disp;
/*
- * Setup thread to start in mp_startup.
+ * Setup thread to start in mp_startup_common.
*/
sp = tp->t_stk;
- tp->t_pc = (uintptr_t)mp_startup;
tp->t_sp = (uintptr_t)(sp - MINFRAME);
#if defined(__amd64)
tp->t_sp -= STACK_ENTRY_ALIGN; /* fake a call */
#endif
+ /*
+ * Setup thread start entry point for boot or hotplug.
+ */
+ if (boot) {
+ tp->t_pc = (uintptr_t)mp_startup_boot;
+ } else {
+ tp->t_pc = (uintptr_t)mp_startup_hotplug;
+ }
cp->cpu_id = cpun;
cp->cpu_self = cp;
@@ -312,12 +333,13 @@ mp_startup_init(int cpun)
/*
* cpu_base_spl must be set explicitly here to prevent any blocking
- * operations in mp_startup from causing the spl of the cpu to drop
- * to 0 (allowing device interrupts before we're ready) in resume().
+ * operations in mp_startup_common from causing the spl of the cpu
+ * to drop to 0 (allowing device interrupts before we're ready) in
+ * resume().
* cpu_base_spl MUST remain at LOCK_LEVEL until the cpu is CPU_READY.
* As an extra bit of security on DEBUG kernels, this is enforced with
- * an assertion in mp_startup() -- before cpu_base_spl is set to its
- * proper value.
+ * an assertion in mp_startup_common() -- before cpu_base_spl is set
+ * to its proper value.
*/
cp->cpu_base_spl = ipltospl(LOCK_LEVEL);
@@ -392,6 +414,15 @@ mp_startup_init(int cpun)
* alloc space for cpuid info
*/
cpuid_alloc_space(cp);
+#if !defined(__xpv)
+ if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) {
+ cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(cp);
+ cp->cpu_m.mcpu_idle_cpu = cpu_idle_mwait;
+ } else
+#endif
+ cp->cpu_m.mcpu_idle_cpu = cpu_idle;
+
+ init_cpu_info(cp);
/*
* alloc space for ucode_info
@@ -408,31 +439,34 @@ mp_startup_init(int cpun)
ttc->ttc_next = ttc->ttc_first;
ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize;
#endif
+
/*
* Record that we have another CPU.
*/
- mutex_enter(&cpu_lock);
/*
* Initialize the interrupt threads for this CPU
*/
cpu_intr_alloc(cp, NINTR_THREADS);
+
+ cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
+ cpu_set_state(cp);
+
/*
* Add CPU to list of available CPUs. It'll be on the active list
- * after mp_startup().
+ * after mp_startup_common().
*/
cpu_add_unit(cp);
- mutex_exit(&cpu_lock);
return (cp);
}
/*
- * Undo what was done in mp_startup_init
+ * Undo what was done in mp_cpu_configure_common
*/
static void
-mp_startup_fini(struct cpu *cp, int error)
+mp_cpu_unconfigure_common(struct cpu *cp, int error)
{
- mutex_enter(&cpu_lock);
+ ASSERT(MUTEX_HELD(&cpu_lock));
/*
* Remove the CPU from the list of available CPUs.
@@ -451,7 +485,6 @@ mp_startup_fini(struct cpu *cp, int error)
* leave the fundamental data structures intact.
*/
cp->cpu_flags = 0;
- mutex_exit(&cpu_lock);
return;
}
@@ -468,8 +501,7 @@ mp_startup_fini(struct cpu *cp, int error)
*/
segkp_release(segkp,
cp->cpu_intr_stack - (INTR_STACK_SIZE - SA(MINFRAME)));
-
- mutex_exit(&cpu_lock);
+ cp->cpu_intr_stack = NULL;
#ifdef TRAPTRACE
/*
@@ -485,10 +517,26 @@ mp_startup_fini(struct cpu *cp, int error)
hat_cpu_offline(cp);
- cpuid_free_space(cp);
-
ucode_free_space(cp);
+ /* Free CPU ID string and brand string. */
+ if (cp->cpu_idstr) {
+ kmem_free(cp->cpu_idstr, CPU_IDSTRLEN);
+ cp->cpu_idstr = NULL;
+ }
+ if (cp->cpu_brandstr) {
+ kmem_free(cp->cpu_brandstr, CPU_IDSTRLEN);
+ cp->cpu_brandstr = NULL;
+ }
+
+#if !defined(__xpv)
+ if (cp->cpu_m.mcpu_mwait != NULL) {
+ cpuid_mwait_free(cp);
+ cp->cpu_m.mcpu_mwait = NULL;
+ }
+#endif
+ cpuid_free_space(cp);
+
if (cp->cpu_idt != CPU->cpu_idt)
kmem_free(cp->cpu_idt, PAGESIZE);
cp->cpu_idt = NULL;
@@ -496,6 +544,12 @@ mp_startup_fini(struct cpu *cp, int error)
kmem_free(cp->cpu_gdt, PAGESIZE);
cp->cpu_gdt = NULL;
+ if (cp->cpu_supp_freqs != NULL) {
+ size_t len = strlen(cp->cpu_supp_freqs) + 1;
+ kmem_free(cp->cpu_supp_freqs, len);
+ cp->cpu_supp_freqs = NULL;
+ }
+
teardown_vaddr_for_ppcopy(cp);
kcpc_hw_fini(cp);
@@ -505,15 +559,13 @@ mp_startup_fini(struct cpu *cp, int error)
cpu_vm_data_destroy(cp);
- mutex_enter(&cpu_lock);
+ xc_fini_cpu(cp);
disp_cpu_fini(cp);
- mutex_exit(&cpu_lock);
-#if !defined(__xpv)
- if (cp->cpu_m.mcpu_mwait != NULL)
- cpuid_mwait_free(cp);
-#endif
- kmem_free(cp, sizeof (*cp));
+ ASSERT(cp != CPU0);
+ bzero(cp, sizeof (*cp));
+ cp->cpu_next_free = cpu_free_list;
+ cpu_free_list = cp;
}
/*
@@ -529,8 +581,8 @@ mp_startup_fini(struct cpu *cp, int error)
* system.
*
* workaround_errata is invoked early in mlsetup() for CPU 0, and in
- * mp_startup() for all slave CPUs. Slaves process workaround_errata prior
- * to acknowledging their readiness to the master, so this routine will
+ * mp_startup_common() for all slave CPUs. Slaves process workaround_errata
+ * prior to acknowledging their readiness to the master, so this routine will
* never be executed by multiple CPUs in parallel, thus making updates to
* global data safe.
*
@@ -1214,77 +1266,82 @@ workaround_errata_end()
#endif
}
-static cpuset_t procset;
-
/*
- * Start a single cpu, assuming that the kernel context is available
- * to successfully start another cpu.
- *
- * (For example, real mode code is mapped into the right place
- * in memory and is ready to be run.)
+ * The procset_slave and procset_master are used to synchronize
+ * between the control CPU and the target CPU when starting CPUs.
*/
+static cpuset_t procset_slave, procset_master;
+
+static void
+mp_startup_wait(cpuset_t *sp, processorid_t cpuid)
+{
+ cpuset_t tempset;
+
+ for (tempset = *sp; !CPU_IN_SET(tempset, cpuid);
+ tempset = *(volatile cpuset_t *)sp) {
+ SMT_PAUSE();
+ }
+ CPUSET_ATOMIC_DEL(*(cpuset_t *)sp, cpuid);
+}
+
+static void
+mp_startup_signal(cpuset_t *sp, processorid_t cpuid)
+{
+ cpuset_t tempset;
+
+ CPUSET_ATOMIC_ADD(*(cpuset_t *)sp, cpuid);
+ for (tempset = *sp; CPU_IN_SET(tempset, cpuid);
+ tempset = *(volatile cpuset_t *)sp) {
+ SMT_PAUSE();
+ }
+}
+
int
-start_cpu(processorid_t who)
+mp_start_cpu_common(cpu_t *cp, boolean_t boot)
{
+ _NOTE(ARGUNUSED(boot));
+
void *ctx;
- cpu_t *cp;
int delays;
int error = 0;
+ cpuset_t tempset;
+ processorid_t cpuid;
+#ifndef __xpv
+ extern void cpupm_init(cpu_t *);
+#endif
- ASSERT(who != 0);
-
- /*
- * Check if there's at least a Mbyte of kmem available
- * before attempting to start the cpu.
- */
- if (kmem_avail() < 1024 * 1024) {
- /*
- * Kick off a reap in case that helps us with
- * later attempts ..
- */
- kmem_reap();
- return (ENOMEM);
+ ASSERT(cp != NULL);
+ cpuid = cp->cpu_id;
+ ctx = mach_cpucontext_alloc(cp);
+ if (ctx == NULL) {
+ cmn_err(CE_WARN,
+ "cpu%d: failed to allocate context", cp->cpu_id);
+ return (EAGAIN);
}
-
- cp = mp_startup_init(who);
- if ((ctx = mach_cpucontext_alloc(cp)) == NULL ||
- (error = mach_cpu_start(cp, ctx)) != 0) {
-
- /*
- * Something went wrong before we even started it
- */
- if (ctx)
- cmn_err(CE_WARN,
- "cpu%d: failed to start error %d",
- cp->cpu_id, error);
- else
- cmn_err(CE_WARN,
- "cpu%d: failed to allocate context", cp->cpu_id);
-
- if (ctx)
- mach_cpucontext_free(cp, ctx, error);
- else
- error = EAGAIN; /* hmm. */
- mp_startup_fini(cp, error);
+ error = mach_cpu_start(cp, ctx);
+ if (error != 0) {
+ cmn_err(CE_WARN,
+ "cpu%d: failed to start, error %d", cp->cpu_id, error);
+ mach_cpucontext_free(cp, ctx, error);
return (error);
}
- for (delays = 0; !CPU_IN_SET(procset, who); delays++) {
+ for (delays = 0, tempset = procset_slave; !CPU_IN_SET(tempset, cpuid);
+ delays++) {
if (delays == 500) {
/*
* After five seconds, things are probably looking
* a bit bleak - explain the hang.
*/
cmn_err(CE_NOTE, "cpu%d: started, "
- "but not running in the kernel yet", who);
+ "but not running in the kernel yet", cpuid);
} else if (delays > 2000) {
/*
* We waited at least 20 seconds, bail ..
*/
error = ETIMEDOUT;
- cmn_err(CE_WARN, "cpu%d: timed out", who);
+ cmn_err(CE_WARN, "cpu%d: timed out", cpuid);
mach_cpucontext_free(cp, ctx, error);
- mp_startup_fini(cp, error);
return (error);
}
@@ -1292,37 +1349,105 @@ start_cpu(processorid_t who)
* wait at least 10ms, then check again..
*/
delay(USEC_TO_TICK_ROUNDUP(10000));
+ tempset = *((volatile cpuset_t *)&procset_slave);
}
+ CPUSET_ATOMIC_DEL(procset_slave, cpuid);
mach_cpucontext_free(cp, ctx, 0);
#ifndef __xpv
if (tsc_gethrtime_enable)
- tsc_sync_master(who);
+ tsc_sync_master(cpuid);
#endif
if (dtrace_cpu_init != NULL) {
+ (*dtrace_cpu_init)(cpuid);
+ }
+
+ /*
+ * During CPU DR operations, the cpu_lock is held by current
+ * (the control) thread. We can't release the cpu_lock here
+ * because that will break the CPU DR logic.
+ * On the other hand, CPUPM and processor group initialization
+ * routines need to access the cpu_lock. So we invoke those
+ * routines here on behalf of mp_startup_common().
+ *
+ * CPUPM and processor group initialization routines depend
+ * on the cpuid probing results. Wait for mp_startup_common()
+ * to signal that cpuid probing is done.
+ */
+ mp_startup_wait(&procset_slave, cpuid);
+#ifndef __xpv
+ cpupm_init(cp);
+#endif
+ (void) pg_cpu_init(cp, B_FALSE);
+ cpu_set_state(cp);
+ mp_startup_signal(&procset_master, cpuid);
+
+ return (0);
+}
+
+/*
+ * Start a single cpu, assuming that the kernel context is available
+ * to successfully start another cpu.
+ *
+ * (For example, real mode code is mapped into the right place
+ * in memory and is ready to be run.)
+ */
+int
+start_cpu(processorid_t who)
+{
+ cpu_t *cp;
+ int error = 0;
+ cpuset_t tempset;
+
+ ASSERT(who != 0);
+
+ /*
+ * Check if there's at least a Mbyte of kmem available
+ * before attempting to start the cpu.
+ */
+ if (kmem_avail() < 1024 * 1024) {
/*
- * DTrace CPU initialization expects cpu_lock to be held.
+ * Kick off a reap in case that helps us with
+ * later attempts ..
*/
- mutex_enter(&cpu_lock);
- (*dtrace_cpu_init)(who);
- mutex_exit(&cpu_lock);
+ kmem_reap();
+ return (ENOMEM);
+ }
+
+ /*
+ * First configure cpu.
+ */
+ cp = mp_cpu_configure_common(who, B_TRUE);
+ ASSERT(cp != NULL);
+
+ /*
+ * Then start cpu.
+ */
+ error = mp_start_cpu_common(cp, B_TRUE);
+ if (error != 0) {
+ mp_cpu_unconfigure_common(cp, error);
+ return (error);
}
- while (!CPU_IN_SET(cpu_ready_set, who))
- delay(1);
+ mutex_exit(&cpu_lock);
+ tempset = cpu_ready_set;
+ while (!CPU_IN_SET(tempset, who)) {
+ drv_usecwait(1);
+ tempset = *((volatile cpuset_t *)&cpu_ready_set);
+ }
+ mutex_enter(&cpu_lock);
return (0);
}
-
-/*ARGSUSED*/
void
start_other_cpus(int cprboot)
{
+ _NOTE(ARGUNUSED(cprboot));
+
uint_t who;
- uint_t skipped = 0;
uint_t bootcpuid = 0;
/*
@@ -1347,9 +1472,12 @@ start_other_cpus(int cprboot)
CPUSET_ADD(cpu_ready_set, bootcpuid);
/*
- * if only 1 cpu or not using MP, skip the rest of this
+ * skip the rest of this if
+ * . only 1 cpu dectected and system isn't hotplug-capable
+ * . not using MP
*/
- if (CPUSET_ISNULL(mp_cpus) || use_mp == 0) {
+ if ((CPUSET_ISNULL(mp_cpus) && plat_dr_support_cpu() == 0) ||
+ use_mp == 0) {
if (use_mp == 0)
cmn_err(CE_CONT, "?***** Not in MP mode\n");
goto done;
@@ -1375,18 +1503,13 @@ start_other_cpus(int cprboot)
affinity_set(CPU_CURRENT);
for (who = 0; who < NCPU; who++) {
-
if (!CPU_IN_SET(mp_cpus, who))
continue;
ASSERT(who != bootcpuid);
- if (ncpus >= max_ncpus) {
- skipped = who;
- continue;
- }
- if (start_cpu(who) != 0)
- CPUSET_DEL(mp_cpus, who);
mutex_enter(&cpu_lock);
+ if (start_cpu(who) != 0)
+ CPUSET_DEL(mp_cpus, who);
cpu_state_change_notify(who, CPU_SETUP);
mutex_exit(&cpu_lock);
}
@@ -1396,59 +1519,92 @@ start_other_cpus(int cprboot)
affinity_clear();
- if (skipped) {
+ mach_cpucontext_fini();
+
+done:
+ if (get_hwenv() == HW_NATIVE)
+ workaround_errata_end();
+ cmi_post_mpstartup();
+
+ if (use_mp && ncpus != boot_max_ncpus) {
cmn_err(CE_NOTE,
"System detected %d cpus, but "
"only %d cpu(s) were enabled during boot.",
- skipped + 1, ncpus);
+ boot_max_ncpus, ncpus);
cmn_err(CE_NOTE,
"Use \"boot-ncpus\" parameter to enable more CPU(s). "
"See eeprom(1M).");
}
-
-done:
- if (get_hwenv() == HW_NATIVE)
- workaround_errata_end();
- mach_cpucontext_fini();
-
- cmi_post_mpstartup();
}
-/*
- * Dummy functions - no i86pc platforms support dynamic cpu allocation.
- */
-/*ARGSUSED*/
int
mp_cpu_configure(int cpuid)
{
- return (ENOTSUP); /* not supported */
+ cpu_t *cp;
+
+ if (use_mp == 0 || plat_dr_support_cpu() == 0) {
+ return (ENOTSUP);
+ }
+
+ cp = cpu_get(cpuid);
+ if (cp != NULL) {
+ return (EALREADY);
+ }
+
+ /*
+ * Check if there's at least a Mbyte of kmem available
+ * before attempting to start the cpu.
+ */
+ if (kmem_avail() < 1024 * 1024) {
+ /*
+ * Kick off a reap in case that helps us with
+ * later attempts ..
+ */
+ kmem_reap();
+ return (ENOMEM);
+ }
+
+ cp = mp_cpu_configure_common(cpuid, B_FALSE);
+ ASSERT(cp != NULL && cpu_get(cpuid) == cp);
+
+ return (cp != NULL ? 0 : EAGAIN);
}
-/*ARGSUSED*/
int
mp_cpu_unconfigure(int cpuid)
{
- return (ENOTSUP); /* not supported */
+ cpu_t *cp;
+
+ if (use_mp == 0 || plat_dr_support_cpu() == 0) {
+ return (ENOTSUP);
+ } else if (cpuid < 0 || cpuid >= max_ncpus) {
+ return (EINVAL);
+ }
+
+ cp = cpu_get(cpuid);
+ if (cp == NULL) {
+ return (ENODEV);
+ }
+ mp_cpu_unconfigure_common(cp, 0);
+
+ return (0);
}
/*
* Startup function for 'other' CPUs (besides boot cpu).
* Called from real_mode_start.
*
- * WARNING: until CPU_READY is set, mp_startup and routines called by
- * mp_startup should not call routines (e.g. kmem_free) that could call
+ * WARNING: until CPU_READY is set, mp_startup_common and routines called by
+ * mp_startup_common should not call routines (e.g. kmem_free) that could call
* hat_unload which requires CPU_READY to be set.
*/
-void
-mp_startup(void)
+static void
+mp_startup_common(boolean_t boot)
{
- struct cpu *cp = CPU;
+ cpu_t *cp = CPU;
uint_t new_x86_feature;
- extern void cpu_event_init_cpu(cpu_t *);
-#ifndef __xpv
- extern void cpupm_init(cpu_t *);
-#endif
const char *fmt = "?cpu%d: %b\n";
+ extern void cpu_event_init_cpu(cpu_t *);
/*
* We need to get TSC on this proc synced (i.e., any delta
@@ -1457,8 +1613,8 @@ mp_startup(void)
* interrupts, cmn_err, etc.
*/
- /* Let cpu0 continue into tsc_sync_master() */
- CPUSET_ATOMIC_ADD(procset, cp->cpu_id);
+ /* Let the control CPU continue into tsc_sync_master() */
+ mp_startup_signal(&procset_slave, cp->cpu_id);
#ifndef __xpv
if (tsc_gethrtime_enable)
@@ -1537,39 +1693,42 @@ mp_startup(void)
if (workaround_errata(cp) != 0)
panic("critical workaround(s) missing for cpu%d", cp->cpu_id);
+ /*
+ * We can touch cpu_flags here without acquiring the cpu_lock here
+ * because the cpu_lock is held by the control CPU which is running
+ * mp_start_cpu_common().
+ * Need to clear CPU_QUIESCED flag before calling any function which
+ * may cause thread context switching, such as kmem_alloc() etc.
+ * The idle thread checks for CPU_QUIESCED flag and loops for ever if
+ * it's set. So the startup thread may have no chance to switch back
+ * again if it's switched away with CPU_QUIESCED set.
+ */
+ cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED);
+
cpuid_pass2(cp);
cpuid_pass3(cp);
(void) cpuid_pass4(cp);
- init_cpu_info(cp);
-
- mutex_enter(&cpu_lock);
-
- cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_EXISTS;
-
- cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr);
- cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr);
-
- if (dtrace_cpu_init != NULL) {
- (*dtrace_cpu_init)(cp->cpu_id);
- }
-
/*
- * Fill out cpu_ucode_info. Update microcode if necessary.
+ * Correct cpu_idstr and cpu_brandstr on target CPU after
+ * cpuid_pass1() is done.
*/
- ucode_check(cp);
+ (void) cpuid_getidstr(cp, cp->cpu_idstr, CPU_IDSTRLEN);
+ (void) cpuid_getbrandstr(cp, cp->cpu_brandstr, CPU_IDSTRLEN);
- mutex_exit(&cpu_lock);
+ cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_EXISTS;
post_startup_cpu_fixups();
+ cpu_event_init_cpu(cp);
+
/*
* Enable preemption here so that contention for any locks acquired
- * later in mp_startup may be preempted if the thread owning those
- * locks is continuously executing on other CPUs (for example, this
- * CPU must be preemptible to allow other CPUs to pause it during their
- * startup phases). It's safe to enable preemption here because the
- * CPU state is pretty-much fully constructed.
+ * later in mp_startup_common may be preempted if the thread owning
+ * those locks is continuously executing on other CPUs (for example,
+ * this CPU must be preemptible to allow other CPUs to pause it during
+ * their startup phases). It's safe to enable preemption here because
+ * the CPU state is pretty-much fully constructed.
*/
curthread->t_preempt = 0;
@@ -1577,29 +1736,31 @@ mp_startup(void)
ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL));
set_base_spl(); /* Restore the spl to its proper value */
- cpu_event_init_cpu(cp);
-#ifndef __xpv
- cpupm_init(cp);
-#endif
-
+ pghw_physid_create(cp);
/*
- * Processor group initialization for this CPU is dependent on the
- * cpuid probing, which must be done in the context of the current
- * CPU, as well as the CPU's device node initialization (for ACPI).
+ * Delegate initialization tasks, which need to access the cpu_lock,
+ * to mp_start_cpu_common() because we can't acquire the cpu_lock here
+ * during CPU DR operations.
*/
- mutex_enter(&cpu_lock);
- pghw_physid_create(cp);
- (void) pg_cpu_init(cp, B_FALSE);
+ mp_startup_signal(&procset_slave, cp->cpu_id);
+ mp_startup_wait(&procset_master, cp->cpu_id);
pg_cmt_cpu_startup(cp);
- mutex_exit(&cpu_lock);
+
+ if (boot) {
+ mutex_enter(&cpu_lock);
+ cp->cpu_flags &= ~CPU_OFFLINE;
+ cpu_enable_intr(cp);
+ cpu_add_active(cp);
+ mutex_exit(&cpu_lock);
+ }
/* Enable interrupts */
(void) spl0();
- mutex_enter(&cpu_lock);
- cpu_enable_intr(cp);
- cpu_add_active(cp);
- mutex_exit(&cpu_lock);
+ /*
+ * Fill out cpu_ucode_info. Update microcode if necessary.
+ */
+ ucode_check(cp);
#ifndef __xpv
{
@@ -1616,6 +1777,7 @@ mp_startup(void)
cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL) {
if (x86_feature & X86_MCA)
cmi_mca_init(hdl);
+ cp->cpu_m.mcpu_cmi_hdl = hdl;
}
}
#endif /* __xpv */
@@ -1630,16 +1792,13 @@ mp_startup(void)
*/
CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id);
- /*
- * Because mp_startup() gets fired off after init() starts, we
- * can't use the '?' trick to do 'boot -v' printing - so we
- * always direct the 'cpu .. online' messages to the log.
- */
- cmn_err(CE_CONT, "!cpu%d initialization complete - online\n",
- cp->cpu_id);
-
(void) mach_cpu_create_device_node(cp, NULL);
+ cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr);
+ cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr);
+ cmn_err(CE_CONT, "?cpu%d initialization complete - online\n",
+ cp->cpu_id);
+
/*
* Now we are done with the startup thread, so free it up.
*/
@@ -1648,6 +1807,23 @@ mp_startup(void)
/*NOTREACHED*/
}
+/*
+ * Startup function for 'other' CPUs at boot time (besides boot cpu).
+ */
+static void
+mp_startup_boot(void)
+{
+ mp_startup_common(B_TRUE);
+}
+
+/*
+ * Startup function for hotplug CPUs at runtime.
+ */
+void
+mp_startup_hotplug(void)
+{
+ mp_startup_common(B_FALSE);
+}
/*
* Start CPU on user request.
@@ -1663,7 +1839,6 @@ mp_cpu_start(struct cpu *cp)
/*
* Stop CPU on user request.
*/
-/* ARGSUSED */
int
mp_cpu_stop(struct cpu *cp)
{
@@ -1713,31 +1888,42 @@ cpu_enable_intr(struct cpu *cp)
psm_enable_intr(cp->cpu_id);
}
-
-/*ARGSUSED*/
void
mp_cpu_faulted_enter(struct cpu *cp)
{
-#ifndef __xpv
- cmi_hdl_t hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
- cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
+#ifdef __xpv
+ _NOTE(ARGUNUSED(cp));
+#else
+ cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl;
if (hdl != NULL) {
+ cmi_hdl_hold(hdl);
+ } else {
+ hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
+ cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
+ }
+ if (hdl != NULL) {
cmi_faulted_enter(hdl);
cmi_hdl_rele(hdl);
}
#endif
}
-/*ARGSUSED*/
void
mp_cpu_faulted_exit(struct cpu *cp)
{
-#ifndef __xpv
- cmi_hdl_t hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
- cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
+#ifdef __xpv
+ _NOTE(ARGUNUSED(cp));
+#else
+ cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl;
if (hdl != NULL) {
+ cmi_hdl_hold(hdl);
+ } else {
+ hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
+ cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
+ }
+ if (hdl != NULL) {
cmi_faulted_exit(hdl);
cmi_hdl_rele(hdl);
}
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index b4b235cf5c..c0e9c2404b 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -22,6 +22,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/t_lock.h>
@@ -63,6 +67,7 @@
#include <sys/dumphdr.h>
#include <sys/bootconf.h>
+#include <sys/memlist_plat.h>
#include <sys/varargs.h>
#include <sys/promif.h>
#include <sys/modctl.h>
@@ -116,7 +121,7 @@
#include <sys/systeminfo.h>
#include <sys/multiboot.h>
-#ifdef __xpv
+#ifdef __xpv
#include <sys/hypervisor.h>
#include <sys/xen_mmu.h>
@@ -130,6 +135,10 @@ extern void xen_late_startup(void);
struct xen_evt_data cpu0_evt_data;
+#else /* __xpv */
+#include <sys/memlist_impl.h>
+
+extern void mem_config_init(void);
#endif /* __xpv */
extern void progressbar_init(void);
@@ -315,6 +324,17 @@ pgcnt_t segkpsize = 0;
pgcnt_t segziosize = 0; /* size of zio segment in pages */
/*
+ * A static DR page_t VA map is reserved that can map the page structures
+ * for a domain's entire RA space. The pages that back this space are
+ * dynamically allocated and need not be physically contiguous. The DR
+ * map size is derived from KPM size.
+ * This mechanism isn't used by x86 yet, so just stubs here.
+ */
+int ppvm_enable = 0; /* Static virtual map for page structs */
+page_t *ppvm_base = NULL; /* Base of page struct map */
+pgcnt_t ppvm_size = 0; /* Size of page struct map */
+
+/*
* VA range available to the debugger
*/
const caddr_t kdi_segdebugbase = (const caddr_t)SEGDEBUGBASE;
@@ -965,20 +985,38 @@ startup_memlist(void)
if (prom_debug)
print_memlist("boot physinstalled",
bootops->boot_mem->physinstalled);
- installed_top_size(bootops->boot_mem->physinstalled, &physmax,
+ installed_top_size_ex(bootops->boot_mem->physinstalled, &physmax,
&physinstalled, &memblocks);
PRM_DEBUG(physmax);
PRM_DEBUG(physinstalled);
PRM_DEBUG(memblocks);
/*
+ * Compute maximum physical address for memory DR operations.
+ * Memory DR operations are unsupported on xpv or 32bit OSes.
+ */
+#ifdef __amd64
+ if (plat_dr_support_memory()) {
+ if (plat_dr_physmax == 0) {
+ uint_t pabits = UINT_MAX;
+
+ cpuid_get_addrsize(CPU, &pabits, NULL);
+ plat_dr_physmax = btop(1ULL << pabits);
+ }
+ if (plat_dr_physmax > PHYSMEM_MAX64)
+ plat_dr_physmax = PHYSMEM_MAX64;
+ } else
+#endif
+ plat_dr_physmax = 0;
+
+ /*
* Examine the bios reserved memory to find out:
* - the number of discontiguous segments of memory.
*/
if (prom_debug)
print_memlist("boot reserved mem",
bootops->boot_mem->rsvdmem);
- installed_top_size(bootops->boot_mem->rsvdmem, &rsvd_high_pfn,
+ installed_top_size_ex(bootops->boot_mem->rsvdmem, &rsvd_high_pfn,
&rsvd_pgcnt, &rsvdmemblocks);
PRM_DEBUG(rsvd_high_pfn);
PRM_DEBUG(rsvd_pgcnt);
@@ -1137,9 +1175,14 @@ startup_memlist(void)
* memory is at addresses above 1 TB. When adjusted, segkpm_base must
* be aligned on KERNEL_REDZONE_SIZE boundary (span of top level pte).
*/
- if (physmax + 1 > mmu_btop(TERABYTE)) {
+ if (physmax + 1 > mmu_btop(TERABYTE) ||
+ plat_dr_physmax > mmu_btop(TERABYTE)) {
uint64_t kpm_resv_amount = mmu_ptob(physmax + 1);
+ if (kpm_resv_amount < mmu_ptob(plat_dr_physmax)) {
+ kpm_resv_amount = mmu_ptob(plat_dr_physmax);
+ }
+
segkpm_base = -(P2ROUNDUP((2 * kpm_resv_amount),
KERNEL_REDZONE_SIZE)); /* down from top VA */
@@ -1186,6 +1229,16 @@ startup_memlist(void)
panic("physavail was too big!");
if (prom_debug)
print_memlist("phys_avail", phys_avail);
+#ifndef __xpv
+ /*
+ * Free unused memlist items, which may be used by memory DR driver
+ * at runtime.
+ */
+ if ((caddr_t)current < (caddr_t)memlist + memlist_sz) {
+ memlist_free_block((caddr_t)current,
+ (caddr_t)memlist + memlist_sz - (caddr_t)current);
+ }
+#endif
/*
* Build bios reserved memspace
@@ -1196,6 +1249,16 @@ startup_memlist(void)
panic("bios_rsvd was too big!");
if (prom_debug)
print_memlist("bios_rsvd", bios_rsvd);
+#ifndef __xpv
+ /*
+ * Free unused memlist items, which may be used by memory DR driver
+ * at runtime.
+ */
+ if ((caddr_t)current < (caddr_t)bios_rsvd + rsvdmemlist_sz) {
+ memlist_free_block((caddr_t)current,
+ (caddr_t)bios_rsvd + rsvdmemlist_sz - (caddr_t)current);
+ }
+#endif
/*
* setup page coloring
@@ -1391,7 +1454,11 @@ startup_kmem(void)
}
#endif
-#ifdef __xpv
+#ifndef __xpv
+ if (plat_dr_support_memory()) {
+ mem_config_init();
+ }
+#else /* __xpv */
/*
* Some of the xen start information has to be relocated up
* into the kernel's permanent address space.
@@ -1406,7 +1473,7 @@ startup_kmem(void)
*/
CPU->cpu_m.mcpu_vcpu_info =
&HYPERVISOR_shared_info->vcpu_info[CPU->cpu_id];
-#endif
+#endif /* __xpv */
PRM_POINT("startup_kmem() done");
}
@@ -1591,8 +1658,10 @@ startup_modules(void)
if ((get_hwenv() != HW_XEN_HVM) &&
(hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(CPU),
cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL &&
- (x86_feature & X86_MCA))
+ (x86_feature & X86_MCA)) {
cmi_mca_init(hdl);
+ CPU->cpu_m.mcpu_cmi_hdl = hdl;
+ }
#endif /* __xpv */
/*
@@ -1701,7 +1770,11 @@ layout_kernel_va(void)
#if defined(__amd64)
kpm_vbase = (caddr_t)segkpm_base;
- kpm_size = ROUND_UP_LPAGE(mmu_ptob(physmax + 1));
+ if (physmax + 1 < plat_dr_physmax) {
+ kpm_size = ROUND_UP_LPAGE(mmu_ptob(plat_dr_physmax));
+ } else {
+ kpm_size = ROUND_UP_LPAGE(mmu_ptob(physmax + 1));
+ }
if ((uintptr_t)kpm_vbase + kpm_size > (uintptr_t)valloc_base)
panic("not enough room for kpm!");
PRM_DEBUG(kpm_size);
@@ -2342,10 +2415,6 @@ release_bootstrap(void)
rm_platter_va = i86devmap(pfn, 1,
PROT_READ | PROT_WRITE | PROT_EXEC);
rm_platter_pa = ptob(pfn);
- hat_devload(kas.a_hat,
- (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
- pfn, PROT_READ | PROT_WRITE | PROT_EXEC,
- HAT_LOAD_NOCONSIST);
break;
}
if (pfn == btop(1*1024*1024) && use_mp)
diff --git a/usr/src/uts/i86pc/os/x_call.c b/usr/src/uts/i86pc/os/x_call.c
index 33b75c1a5a..17d5123508 100644
--- a/usr/src/uts/i86pc/os/x_call.c
+++ b/usr/src/uts/i86pc/os/x_call.c
@@ -22,6 +22,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/param.h>
@@ -186,11 +190,11 @@ xc_extract(xc_msg_t **queue)
return (old_head);
}
-
/*
* Initialize the machcpu fields used for cross calls
*/
static uint_t xc_initialized = 0;
+
void
xc_init_cpu(struct cpu *cpup)
{
@@ -198,34 +202,100 @@ xc_init_cpu(struct cpu *cpup)
int c;
/*
- * add a new msg to each existing CPU's free list, as well as one for
- * my list for each of them. ncpus has an inconsistent value when this
- * function is called, so use cpup->cpu_id.
+ * Allocate message buffers for the new CPU.
*/
- for (c = 0; c < cpup->cpu_id; ++c) {
- if (cpu[c] == NULL)
- continue;
- msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
- msg->xc_command = XC_MSG_FREE;
- msg->xc_master = c;
- xc_insert(&cpu[c]->cpu_m.xc_free, msg);
+ for (c = 0; c < max_ncpus; ++c) {
+ if (plat_dr_support_cpu()) {
+ /*
+ * Allocate a message buffer for every CPU possible
+ * in system, including our own, and add them to our xc
+ * message queue.
+ */
+ msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
+ msg->xc_command = XC_MSG_FREE;
+ msg->xc_master = cpup->cpu_id;
+ xc_insert(&cpup->cpu_m.xc_free, msg);
+ } else if (cpu[c] != NULL && cpu[c] != cpup) {
+ /*
+ * Add a new message buffer to each existing CPU's free
+ * list, as well as one for my list for each of them.
+ * Note: cpu0 is statically inserted into cpu[] array,
+ * so need to check cpu[c] isn't cpup itself to avoid
+ * allocating extra message buffers for cpu0.
+ */
+ msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
+ msg->xc_command = XC_MSG_FREE;
+ msg->xc_master = c;
+ xc_insert(&cpu[c]->cpu_m.xc_free, msg);
+
+ msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
+ msg->xc_command = XC_MSG_FREE;
+ msg->xc_master = cpup->cpu_id;
+ xc_insert(&cpup->cpu_m.xc_free, msg);
+ }
+ }
+ if (!plat_dr_support_cpu()) {
+ /*
+ * Add one for self messages if CPU hotplug is disabled.
+ */
msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
msg->xc_command = XC_MSG_FREE;
msg->xc_master = cpup->cpu_id;
xc_insert(&cpup->cpu_m.xc_free, msg);
}
+ if (!xc_initialized)
+ xc_initialized = 1;
+}
+
+void
+xc_fini_cpu(struct cpu *cpup)
+{
+ xc_msg_t *msg;
+
+ ASSERT((cpup->cpu_flags & CPU_READY) == 0);
+ ASSERT(cpup->cpu_m.xc_msgbox == NULL);
+ ASSERT(cpup->cpu_m.xc_work_cnt == 0);
+
+ while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
+ kmem_free(msg, sizeof (*msg));
+ }
+}
+
+#define XC_FLUSH_MAX_WAITS 1000
+
+/* Flush inflight message buffers. */
+int
+xc_flush_cpu(struct cpu *cpup)
+{
+ int i;
+
+ ASSERT((cpup->cpu_flags & CPU_READY) == 0);
+
/*
- * Add one for self messages
+ * Pause all working CPUs, which ensures that there's no CPU in
+ * function xc_common().
+ * This is used to work around a race condition window in xc_common()
+ * between checking CPU_READY flag and increasing working item count.
*/
- msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
- msg->xc_command = XC_MSG_FREE;
- msg->xc_master = cpup->cpu_id;
- xc_insert(&cpup->cpu_m.xc_free, msg);
+ pause_cpus(cpup);
+ start_cpus();
- if (!xc_initialized)
- xc_initialized = 1;
+ for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
+ if (cpup->cpu_m.xc_work_cnt == 0) {
+ break;
+ }
+ DELAY(1);
+ }
+ for (; i < XC_FLUSH_MAX_WAITS; i++) {
+ if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
+ break;
+ }
+ DELAY(1);
+ }
+
+ return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
}
/*
@@ -416,7 +486,7 @@ xc_common(
* Post messages to all CPUs involved that are CPU_READY
*/
CPU->cpu_m.xc_wait_cnt = 0;
- for (c = 0; c < ncpus; ++c) {
+ for (c = 0; c < max_ncpus; ++c) {
if (!BT_TEST(set, c))
continue;
cpup = cpu[c];
@@ -487,7 +557,7 @@ xc_priority_common(
/*
* Wait briefly for any previous xc_priority to have finished.
*/
- for (c = 0; c < ncpus; ++c) {
+ for (c = 0; c < max_ncpus; ++c) {
cpup = cpu[c];
if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
continue;
@@ -527,7 +597,7 @@ xc_priority_common(
* Post messages to all CPUs involved that are CPU_READY
* We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
*/
- for (c = 0; c < ncpus; ++c) {
+ for (c = 0; c < max_ncpus; ++c) {
if (!BT_TEST(set, c))
continue;
cpup = cpu[c];
diff --git a/usr/src/uts/i86pc/sys/Makefile b/usr/src/uts/i86pc/sys/Makefile
index 11ebb58187..7fa9f8f6ef 100644
--- a/usr/src/uts/i86pc/sys/Makefile
+++ b/usr/src/uts/i86pc/sys/Makefile
@@ -60,6 +60,7 @@ HDRS= \
psm_types.h \
rm_platter.h \
smp_impldefs.h \
+ sbd_ioctl.h \
vm_machparam.h \
x_call.h \
xc_levels.h \
diff --git a/usr/src/uts/i86pc/sys/acpidev.h b/usr/src/uts/i86pc/sys/acpidev.h
index e0fbaa4999..6d11277aaf 100644
--- a/usr/src/uts/i86pc/sys/acpidev.h
+++ b/usr/src/uts/i86pc/sys/acpidev.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -28,8 +28,10 @@
#include <sys/types.h>
#include <sys/obpdefs.h>
#include <sys/sunddi.h>
+#ifdef _KERNEL
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
+#endif
#ifdef __cplusplus
extern "C" {
@@ -55,23 +57,19 @@ extern "C" {
#define ACPIDEV_HID_CPU "ACPI0007"
#define ACPIDEV_HID_PCI_HOSTBRIDGE "PNP0A03"
#define ACPIDEV_HID_PCIE_HOSTBRIDGE "PNP0A08"
+#define ACPIDEV_HID_PCIEX_HOSTBRIDGE "PNP0A08"
#define ACPIDEV_HID_MEMORY "PNP0C80"
-/* Common ACPI object names. */
-#define ACPIDEV_OBJECT_NAME_SB ACPI_NS_SYSTEM_BUS
-#define ACPIDEV_OBJECT_NAME_PR "_PR_"
-
-/* Common ACPI method names. */
-#define ACPIDEV_METHOD_NAME_MAT "_MAT"
-
/* Device names for ACPI objects. */
#define ACPIDEV_NODE_NAME_ROOT "fw"
+#define ACPIDEV_NODE_NAME_ACPIDR "acpidr"
#define ACPIDEV_NODE_NAME_CONTAINER "container"
#define ACPIDEV_NODE_NAME_MODULE_SBD "sb"
#define ACPIDEV_NODE_NAME_MODULE_CPU "socket"
#define ACPIDEV_NODE_NAME_CPU "cpu"
#define ACPIDEV_NODE_NAME_PROCESSOR "cpus"
#define ACPIDEV_NODE_NAME_MEMORY "mem"
+#define ACPIDEV_NODE_NAME_PCI "pci"
/* Device types for ACPI objects. */
#define ACPIDEV_TYPE_ROOTNEX "acpirootnex"
@@ -81,15 +79,31 @@ extern "C" {
#define ACPIDEV_TYPE_CONTAINER "acpicontainer"
#define ACPIDEV_TYPE_CPU "acpicpu"
#define ACPIDEV_TYPE_MEMORY "acpimemory"
+#define ACPIDEV_TYPE_PCI "pci"
+#define ACPIDEV_TYPE_PCIEX "pciex"
/* Device property names for ACPI objects. */
#define ACPIDEV_PROP_NAME_UNIT_ADDR "unit-address"
#define ACPIDEV_PROP_NAME_ACPI_UID "acpi-uid"
#define ACPIDEV_PROP_NAME_PROCESSOR_ID "acpi-processor-id"
#define ACPIDEV_PROP_NAME_LOCALAPIC_ID "apic-id"
+#define ACPIDEV_PROP_NAME_PROXIMITY_ID "proximity-id"
#define ACPIDEV_PROP_NAME_UID_FORMAT "acpidev-uid-format"
+/* Miscellaneous strings. */
+#define ACPIDEV_CMD_OST_PREFIX "acpi-update-status"
+#define ACPIDEV_CMD_OST_INPROGRESS "acpi-update-status=inprogress"
+#define ACPIDEV_CMD_OST_SUCCESS "acpi-update-status=success"
+#define ACPIDEV_CMD_OST_FAILURE "acpi-update-status=failure"
+#define ACPIDEV_CMD_OST_NOOP "acpi-update-status=noop"
+
+#define ACPIDEV_EVENT_TYPE_ATTR_NAME "acpi-event-type"
+#define ACPIDEV_EVENT_TYPE_BUS_CHECK "bus_check"
+#define ACPIDEV_EVENT_TYPE_DEVICE_CHECK "device_check"
+#define ACPIDEV_EVENT_TYPE_DEVICE_CHECK_LIGHT "device_check_light"
+#define ACPIDEV_EVENT_TYPE_EJECT_REQUEST "eject_request"
+
/* ACPI device class Id. */
typedef enum acpidev_class_id {
ACPIDEV_CLASS_ID_INVALID = 0,
@@ -99,6 +113,8 @@ typedef enum acpidev_class_id {
ACPIDEV_CLASS_ID_CONTAINER = 4,
ACPIDEV_CLASS_ID_CPU = 5,
ACPIDEV_CLASS_ID_MEMORY = 6,
+ ACPIDEV_CLASS_ID_PCI = 7,
+ ACPIDEV_CLASS_ID_PCIEX = 8,
ACPIDEV_CLASS_ID_MAX
} acpidev_class_id_t;
@@ -106,9 +122,88 @@ typedef enum acpidev_class_id {
#define ACPIDEV_OUSER_NO_CPU 0x1
#define ACPIDEV_OUSER_NO_MEM 0x2
#define ACPIDEV_OUSER_NO_CONTAINER 0x4
+#define ACPIDEV_OUSER_NO_PCI 0x8
+#define ACPIDEV_OUSER_NO_CACHE 0x10000
#ifdef _KERNEL
+/* Common ACPI object names. */
+#define ACPIDEV_OBJECT_NAME_SB ACPI_NS_SYSTEM_BUS
+#define ACPIDEV_OBJECT_NAME_PR "_PR_"
+
+/* Common ACPI method names. */
+#define ACPIDEV_METHOD_NAME_MAT "_MAT"
+#define ACPIDEV_METHOD_NAME_EJ0 "_EJ0"
+#define ACPIDEV_METHOD_NAME_EDL "_EDL"
+#define ACPIDEV_METHOD_NAME_EJD "_EJD"
+#define ACPIDEV_METHOD_NAME_OST "_OST"
+#define ACPIDEV_METHOD_NAME_PXM "_PXM"
+#define ACPIDEV_METHOD_NAME_SLI "_SLI"
+
+/* Source event code for _OST. */
+#define ACPI_OST_EVENT_EJECTING 0x103
+#define ACPI_OST_EVENT_INSERTING 0x200
+
+/* Status code for _OST. */
+#define ACPI_OST_STA_SUCCESS 0x0
+
+/* Non-specific failure. */
+#define ACPI_OST_STA_FAILURE 0x1
+
+/* Unrecognized Notify Code. */
+#define ACPI_OST_STA_NOT_SUPPORT 0x2
+
+/* Device ejection not supported by OSPM. */
+#define ACPI_OST_STA_EJECT_NOT_SUPPORT 0x80
+
+/* Device in use by application. */
+#define ACPI_OST_STA_EJECT_IN_USE 0x81
+
+/* Device Busy. */
+#define ACPI_OST_STA_EJECT_BUSY 0x82
+
+/* Ejection dependency is busy or not supported for ejection by OSPM. */
+#define ACPI_OST_STA_EJECT_DEPENDENCY 0x83
+
+/* Ejection is in progress (pending). */
+#define ACPI_OST_STA_EJECT_IN_PROGRESS 0x84
+
+/* Device insertion in progress (pending). */
+#define ACPI_OST_STA_INSERT_IN_PROGRESS 0x80
+
+/* Device driver load failure. */
+#define ACPI_OST_STA_INSERT_DRIVER 0x81
+
+/* Device insertion not supported by OSPM. */
+#define ACPI_OST_STA_INSERT_NOT_SUPPORT 0x82
+
+/*
+ * Insertion failure
+ * Resources Unavailable as described by the following bit encodings:
+ * Bit[3] Bus Numbers
+ * Bit[2] Interrupts
+ * Bit[1] I/O
+ * Bit[0] Memory
+ */
+#define ACPI_OST_STA_INSERT_NO_RESOURCE 0x90
+#define ACPI_OST_STA_INSERT_NO_BUS 0x8
+#define ACPI_OST_STA_INSERT_NO_INTR 0x4
+#define ACPI_OST_STA_INSERT_NO_IO 0x2
+#define ACPI_OST_STA_INSERT_NO_MEM 0x1
+
+/*
+ * According to the ACPI specification, self latency (entry[n][n]) in the
+ * SLIT table should be 10.
+ */
+#define ACPI_SLIT_SELF_LATENCY 10
+
+/*
+ * The DR driver assigns a unique device id for each hot-added memory device.
+ * ACPI_MEMNODE_DEVID_BOOT is assigned to memory devices present at boot,
+ * which is distinguished from device ids assigned by the DR driver.
+ */
+#define ACPI_MEMNODE_DEVID_BOOT UINT32_MAX
+
/* Forward declaration */
typedef struct acpidev_data_impl *acpidev_data_handle_t;
typedef struct acpidev_walk_info acpidev_walk_info_t;
@@ -197,7 +292,7 @@ typedef ACPI_STATUS (* acpidev_probe_t)(acpidev_walk_info_t *);
typedef acpidev_filter_result_t (* acpidev_filter_t)(acpidev_walk_info_t *,
char *, int);
typedef ACPI_STATUS (* acpidev_init_t)(acpidev_walk_info_t *);
-typedef ACPI_STATUS (* acpidev_fini_t)(ACPI_HANDLE, dev_info_t *,
+typedef void (* acpidev_fini_t)(ACPI_HANDLE, acpidev_data_handle_t,
acpidev_class_t *);
/* Device class driver interface. */
@@ -237,6 +332,7 @@ extern acpidev_class_t acpidev_class_device;
extern acpidev_class_t acpidev_class_container;
extern acpidev_class_t acpidev_class_cpu;
extern acpidev_class_t acpidev_class_memory;
+extern acpidev_class_t acpidev_class_pci;
/*
* Class driver lists.
@@ -309,6 +405,7 @@ extern ACPI_STATUS acpidev_process_object(acpidev_walk_info_t *infop,
#define ACPIDEV_PROCESS_FLAG_OFFLINE 0x400 /* Put device into offline. */
#define ACPIDEV_PROCESS_FLAG_NOTAG 0x800 /* Skip tag dip with object. */
#define ACPIDEV_PROCESS_FLAG_SYNCSTATUS 0x1000 /* Sync object status. */
+#define ACPIDEV_PROCESS_FLAG_HOLDBRANCH 0x10000 /* Hold device branch. */
/*
* Filter ACPI objects according to filter rules, generate devname if needed.
@@ -346,10 +443,16 @@ extern void acpidev_data_destroy_handle(ACPI_HANDLE hdl);
extern ACPI_HANDLE acpidev_data_get_object(acpidev_data_handle_t hdl);
extern dev_info_t *acpidev_data_get_devinfo(acpidev_data_handle_t hdl);
extern int acpidev_data_get_status(acpidev_data_handle_t hdl);
+extern boolean_t acpidev_data_dr_capable(acpidev_data_handle_t hdl);
+extern boolean_t acpidev_data_dr_ready(acpidev_data_handle_t hdl);
+extern boolean_t acpidev_data_dr_failed(acpidev_data_handle_t hdl);
extern void acpidev_data_set_flag(acpidev_data_handle_t hdl, uint32_t flag);
extern void acpidev_data_clear_flag(acpidev_data_handle_t hdl, uint32_t flag);
extern uint32_t acpidev_data_get_flag(acpidev_data_handle_t hdl, uint32_t flag);
+/* ACPI system event handler has been registered. */
+#define ACPIDEV_DATA_HANDLER_READY 0x1
+
/*
* Try to generate meaningful device unit address from uid.
* Return buf on success and NULL on failure.
@@ -430,6 +533,26 @@ typedef ACPI_STATUS (* acpidev_apic_walker_t)(ACPI_SUBTABLE_HEADER *, void *);
extern ACPI_STATUS acpidev_walk_apic(ACPI_BUFFER *bufp, ACPI_HANDLE hdl,
char *method, acpidev_apic_walker_t func, void *context);
+/*
+ * Evaluate _OST method under object, which is used to support hotplug event.
+ * hdl: object handle
+ * code: _OST source event code
+ * stauts: _OST result status code
+ * bufp and len: optional third parameter for _OST.
+ */
+extern ACPI_STATUS acpidev_eval_ost(ACPI_HANDLE hdl, uint32_t code,
+ uint32_t status, char *bufp, size_t len);
+
+/*
+ * Evaluate _EJ0 method under object.
+ */
+extern ACPI_STATUS acpidev_eval_ej0(ACPI_HANDLE hdl);
+
+/*
+ * Evaluate _PXM method under object.
+ */
+extern ACPI_STATUS acpidev_eval_pxm(ACPI_HANDLE hdl, uint32_t *idp);
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/i86pc/sys/acpidev_dr.h b/usr/src/uts/i86pc/sys/acpidev_dr.h
new file mode 100644
index 0000000000..6219f98bf1
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/acpidev_dr.h
@@ -0,0 +1,252 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+/*
+ * Interfaces to support System Board Dynamic Reconfiguration.
+ */
+
+#ifndef _SYS_ACPIDEV_DR_H
+#define _SYS_ACPIDEV_DR_H
+#include <sys/types.h>
+#include <sys/obpdefs.h>
+#include <sys/cpuvar.h>
+#include <sys/memlist.h>
+#include <sys/sunddi.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_rsc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+/* Maximum number of DR capable system boards supported. */
+#define ACPIDEV_DR_MAX_BOARDS 0x40
+#define ACPIDEV_DR_SEGS_PER_MEM_DEV 0x10
+#define ACPIDEV_DR_MEMLISTS_PER_SEG 0x10
+#define ACPIDEV_DR_MAX_MEMLIST_ENTRIES 0x10000
+
+#define ACPIDEV_DR_PROP_PORTID "portid"
+#define ACPIDEV_DR_PROP_BOARDNUM OBP_BOARDNUM
+#define ACPIDEV_DR_PROP_DEVNAME OBP_NAME
+
+/*
+ * Format strings for DR capable system boards.
+ * They will be used as attachment point names.
+ */
+#define ACPIDEV_DR_CPU_BD_FMT "CPU%u"
+#define ACPIDEV_DR_MEMORY_BD_FMT "MEM%u"
+#define ACPIDEV_DR_IO_BD_FMT "IO%u"
+#define ACPIDEV_DR_SYSTEM_BD_FMT "SB%u"
+
+typedef enum {
+ ACPIDEV_INVALID_BOARD = 0,
+ ACPIDEV_CPU_BOARD,
+ ACPIDEV_MEMORY_BOARD,
+ ACPIDEV_IO_BOARD,
+ ACPIDEV_SYSTEM_BOARD
+} acpidev_board_type_t;
+
+/* Check whether the system is DR capable. */
+extern int acpidev_dr_capable(void);
+
+extern uint32_t acpidev_dr_max_boards(void);
+extern uint32_t acpidev_dr_max_mem_units_per_board(void);
+extern uint32_t acpidev_dr_max_io_units_per_board(void);
+extern uint32_t acpidev_dr_max_cmp_units_per_board(void);
+extern uint32_t acpidev_dr_max_cpu_units_per_cmp(void);
+extern uint32_t acpidev_dr_max_segments_per_mem_device(void);
+extern uint32_t acpidev_dr_max_memlists_per_segment(void);
+extern ACPI_STATUS acpidev_dr_get_mem_alignment(ACPI_HANDLE hdl, uint64_t *ap);
+
+/* Initialize support of DR operations. */
+extern void acpidev_dr_init(void);
+
+/* Scan for DR capable boards and setup environment for DR operations. */
+extern void acpidev_dr_check(acpidev_walk_info_t *infop);
+
+/*
+ * Initialize DR interfaces to enable DR operations.
+ */
+extern ACPI_STATUS acpidev_dr_initialize(dev_info_t *pdip);
+
+/* Get ACPI handle of the DR capable board. */
+extern ACPI_STATUS acpidev_dr_get_board_handle(uint_t board,
+ ACPI_HANDLE *hdlp);
+
+/* Get board type of the DR capable board. */
+extern acpidev_board_type_t acpidev_dr_get_board_type(ACPI_HANDLE hdl);
+
+/* Get board number of the DR capable board. */
+extern ACPI_STATUS acpidev_dr_get_board_number(ACPI_HANDLE hdl,
+ uint32_t *bnump);
+
+/* Get board name of the DR capable board. */
+extern ACPI_STATUS acpidev_dr_get_board_name(ACPI_HANDLE hdl,
+ char *buf, size_t len);
+
+/* Get attachment point of the DR capable board. */
+extern ACPI_STATUS acpidev_dr_get_attachment_point(ACPI_HANDLE hdl,
+ char *buf, size_t len);
+
+/*
+ * Figure out device type of the object/device.
+ * It only supports device types which may be involved in DR operations.
+ */
+extern acpidev_class_id_t acpidev_dr_device_get_class(ACPI_HANDLE hdl);
+
+/* Get memory device index/id. */
+extern ACPI_STATUS acpidev_dr_device_get_memory_index(ACPI_HANDLE hdl,
+ uint32_t *idxp);
+
+/* Check whether the device is a DR capable board or not. */
+extern int acpidev_dr_device_is_board(ACPI_HANDLE hdl);
+
+/* Check whether the device is present or not. */
+extern int acpidev_dr_device_is_present(ACPI_HANDLE hdl);
+
+/* Check whether the device is powered-on or not. */
+extern int acpidev_dr_device_is_powered(ACPI_HANDLE hdl);
+
+/* Check whether the device is DR capable. */
+extern int acpidev_dr_device_hotplug_capable(ACPI_HANDLE hdl);
+
+/* Check whether the device has an eject device list. */
+extern int acpidev_dr_device_has_edl(ACPI_HANDLE hdl);
+
+/*
+ * Simulate OBP property interfaces to support drmach driver,
+ * so we can keep drmach in consistency with SPARC version.
+ * Return size of data copied to buf if it's big enough,
+ * otherwise return size of buffer needed.
+ */
+extern int acpidev_dr_device_getprop(ACPI_HANDLE hdl, char *name,
+ caddr_t buf, size_t len);
+
+/*
+ * Get "reg" or "assigned-address" property of the device.
+ * Return "assigned-address" property if assigned is non-zero,
+ * otherwise return "reg" property.
+ * Caller needs to release returned resources by calling
+ * acpidev_dr_device_free_regspec().
+ */
+extern ACPI_STATUS acpidev_dr_device_get_regspec(ACPI_HANDLE hdl,
+ boolean_t assigned, acpidev_regspec_t **regpp, uint_t *cntp);
+
+/* Free resources returned by acpidev_dr_device_get_regspec(). */
+extern void acpidev_dr_device_free_regspec(acpidev_regspec_t *regp,
+ uint_t count);
+
+/* Walk devices in eject device list (ACPI _EDL method). */
+extern ACPI_STATUS acpidev_dr_device_walk_edl(ACPI_HANDLE hdl,
+ ACPI_WALK_CALLBACK cb, void *arg, void **retval);
+
+/* Walk devices in eject dependency list (ACPI _EJD method). */
+extern ACPI_STATUS acpidev_dr_device_walk_ejd(ACPI_HANDLE hdl,
+ ACPI_WALK_CALLBACK cb, void *arg, void **retval);
+
+/*
+ * Walk child and dependent devices which may be involved in DR operations.
+ * PCI host bridges embedded in physical processors may be presented in eject
+ * device list instead of as children of processors.
+ */
+extern ACPI_STATUS acpidev_dr_device_walk_device(ACPI_HANDLE hdl,
+ uint_t max_lvl, ACPI_WALK_CALLBACK cb, void *arg, void **retval);
+
+/* Check whether the device is in working state without any error. */
+extern ACPI_STATUS acpidev_dr_device_check_status(ACPI_HANDLE hdl);
+
+/* Power on the device. */
+extern ACPI_STATUS acpidev_dr_device_poweron(ACPI_HANDLE hdl);
+
+/* Power off the device. */
+extern ACPI_STATUS acpidev_dr_device_poweroff(ACPI_HANDLE hdl);
+
+/*
+ * Create device nodes for hot-added devices under hdl.
+ * Return:
+ * AE_OK: on success
+ * AE_SUPPORT: if it's not capable of DR operation.
+ * AE_ERROR: for other errors
+ */
+extern ACPI_STATUS acpidev_dr_device_insert(ACPI_HANDLE hdl);
+
+/*
+ * Destroy device nodes to be removed under hdl.
+ * AE_OK: on success
+ * AE_SUPPORT: if it's not capable of DR operation.
+ * AE_ERROR: for other errors
+ */
+extern ACPI_STATUS acpidev_dr_device_remove(ACPI_HANDLE hdl);
+
+/* Block dynamic reconfiguration operations. */
+extern void acpidev_dr_lock_all(void);
+
+/* Unblock dynamic reconfiguration operations. */
+extern void acpidev_dr_unlock_all(void);
+
+extern ACPI_STATUS acpidev_dr_allocate_cpuid(ACPI_HANDLE hdl,
+ processorid_t *idp);
+extern ACPI_STATUS acpidev_dr_free_cpuid(ACPI_HANDLE hdl);
+
+/*
+ * Query NUMA relative information for the CPU device.
+ * It returns APIC id, Proximity id and latency information of the CPU device.
+ * Latency information is retrieved from the ACPI _SLI method or the ACPI SLIT
+ * table.
+ */
+extern int acpidev_dr_get_cpu_numa_info(cpu_t *cp, void **hdlpp,
+ uint32_t *apicidp, uint32_t *pxmidp, uint32_t *slicntp, uchar_t **slipp);
+
+/*
+ * Release resources allocated by acpidev_dr_get_cpu_numa_info().
+ */
+extern void acpidev_dr_free_cpu_numa_info(void *hdlp);
+
+/*
+ * Query NUMA relative information for a memory device.
+ * It returns proximity id and latency information of the memory device.
+ * Latency information is obtained from the ACPI _SLI method or the ACPI
+ * SLIT table.
+ */
+extern ACPI_STATUS acpidev_dr_get_mem_numa_info(ACPI_HANDLE hdl,
+ struct memlist *ml, void **hdlpp, uint32_t *pxmidp,
+ uint32_t *slicntp, uchar_t **slipp);
+
+/*
+ * Release resources allocated by acpidev_dr_get_mem_numa_info().
+ */
+extern void acpidev_dr_free_mem_numa_info(void *hdlp);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_ACPIDEV_DR_H */
diff --git a/usr/src/uts/i86pc/sys/acpidev_impl.h b/usr/src/uts/i86pc/sys/acpidev_impl.h
index 666ab5578d..c01ea9f5cb 100644
--- a/usr/src/uts/i86pc/sys/acpidev_impl.h
+++ b/usr/src/uts/i86pc/sys/acpidev_impl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -27,10 +27,13 @@
#define _SYS_ACPIDEV_IMPL_H
#include <sys/types.h>
#include <sys/cmn_err.h>
+#include <sys/bitmap.h>
+#include <sys/synch.h>
#include <sys/sunddi.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
#include <sys/acpidev.h>
+#include <sys/acpidev_dr.h>
#ifdef __cplusplus
extern "C" {
@@ -54,12 +57,52 @@ struct acpidev_data_impl {
dev_info_t *aod_dip;
acpidev_class_t *aod_class;
acpidev_class_list_t **aod_class_list;
+ acpidev_board_type_t aod_bdtype; /* Type of board. */
+ uint32_t aod_bdnum; /* Board # for DR. */
+ uint32_t aod_portid; /* Port id for DR. */
+ uint32_t aod_bdidx; /* Index # of AP */
+ volatile uint32_t aod_chidx; /* Index # of child */
+ uint32_t aod_memidx; /* Index # of memory */
+ acpidev_class_id_t aod_class_id; /* Dev type for DR. */
};
#define ACPIDEV_ODF_STATUS_VALID 0x1
#define ACPIDEV_ODF_DEVINFO_CREATED 0x2
#define ACPIDEV_ODF_DEVINFO_TAGGED 0x4
-#define ACPIDEV_ODF_DEVINFO_OFFLINE 0x8
+#define ACPIDEV_ODF_HOTPLUG_CAPABLE 0x100
+#define ACPIDEV_ODF_HOTPLUG_READY 0x200
+#define ACPIDEV_ODF_HOTPLUG_FAILED 0x400
+
+#define ACPIDEV_DR_IS_BOARD(hdl) \
+ ((hdl)->aod_iflag & ACPIDEV_ODF_HOTPLUG_CAPABLE)
+
+#define ACPIDEV_DR_SET_BOARD(hdl) \
+ (hdl)->aod_iflag |= ACPIDEV_ODF_HOTPLUG_CAPABLE
+
+#define ACPIDEV_DR_IS_READY(hdl) \
+ ((hdl)->aod_iflag & ACPIDEV_ODF_HOTPLUG_READY)
+
+#define ACPIDEV_DR_SET_READY(hdl) \
+ (hdl)->aod_iflag |= ACPIDEV_ODF_HOTPLUG_READY
+
+#define ACPIDEV_DR_IS_FAILED(hdl) \
+ ((hdl)->aod_iflag & ACPIDEV_ODF_HOTPLUG_FAILED)
+
+#define ACPIDEV_DR_SET_FAILED(hdl) \
+ (hdl)->aod_iflag |= ACPIDEV_ODF_HOTPLUG_FAILED
+
+#define ACPIDEV_DR_IS_WORKING(hdl) \
+ (((hdl)->aod_iflag & (ACPIDEV_ODF_HOTPLUG_READY | \
+ ACPIDEV_ODF_HOTPLUG_FAILED)) == ACPIDEV_ODF_HOTPLUG_READY)
+
+#define ACPIDEV_DR_IS_PROCESSED(hdl) \
+ ((hdl)->aod_iflag & (ACPIDEV_ODF_HOTPLUG_READY | \
+ ACPIDEV_ODF_HOTPLUG_FAILED | ACPIDEV_ODF_HOTPLUG_CAPABLE))
+
+#define ACPIDEV_DR_BOARD_READY(hdl) \
+ (((hdl)->aod_iflag & \
+ (ACPIDEV_ODF_HOTPLUG_READY | ACPIDEV_ODF_HOTPLUG_CAPABLE)) == \
+ (ACPIDEV_ODF_HOTPLUG_READY | ACPIDEV_ODF_HOTPLUG_CAPABLE))
/*
* List of registered device class drivers.
@@ -83,6 +126,23 @@ typedef struct acpidev_pseudo_uid_head {
acpidev_pseudo_uid_t *apuh_first;
} acpidev_pseudo_uid_head_t;
+typedef struct acpidev_dr_capacity {
+ uint_t cpu_vendor;
+ uint_t cpu_family;
+ uint_t cpu_model_min;
+ uint_t cpu_model_max;
+ uint_t cpu_step_min;
+ uint_t cpu_step_max;
+ boolean_t hotplug_supported;
+ uint64_t memory_alignment;
+} acpidev_dr_capacity_t;
+
+extern int acpidev_dr_enable;
+extern krwlock_t acpidev_class_lock;
+extern ulong_t acpidev_object_type_mask[BT_BITOUL(ACPI_TYPE_NS_NODE_MAX + 1)];
+extern ACPI_TABLE_SRAT *acpidev_srat_tbl_ptr;
+extern ACPI_TABLE_SLIT *acpidev_slit_tbl_ptr;
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/i86pc/sys/acpinex.h b/usr/src/uts/i86pc/sys/acpinex.h
index be21c8e0e2..4aa6f19636 100644
--- a/usr/src/uts/i86pc/sys/acpinex.h
+++ b/usr/src/uts/i86pc/sys/acpinex.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -62,6 +62,10 @@ typedef struct {
char ans_path[MAXPATHLEN];
} acpinex_softstate_t;
+extern void acpinex_event_init(void);
+extern void acpinex_event_fini(void);
+extern int acpinex_event_scan(acpinex_softstate_t *, boolean_t);
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/i86pc/sys/apic.h b/usr/src/uts/i86pc/sys/apic.h
index d0aea5d877..48500c4396 100644
--- a/usr/src/uts/i86pc/sys/apic.h
+++ b/usr/src/uts/i86pc/sys/apic.h
@@ -502,7 +502,7 @@ typedef struct apic_irq {
* adjust APIC_PADSZ as we add/modify any member of apic_cpus_info. We also
* don't want the compiler to optimize apic_cpus_info.
*/
-#define APIC_PADSZ 19
+#define APIC_PADSZ 15
#pragma pack(1)
typedef struct apic_cpus_info {
@@ -517,6 +517,7 @@ typedef struct apic_cpus_info {
uchar_t aci_current[MAXIPL]; /* Current IRQ at each IPL */
uint32_t aci_bound; /* # of user requested binds ? */
uint32_t aci_temp_bound; /* # of non user IRQ binds */
+ uint32_t aci_processor_id; /* Only used in ACPI mode. */
uchar_t aci_idle; /* The CPU is idle */
/*
* Fill to make sure each struct is in separate 64-byte cache line.
@@ -527,6 +528,8 @@ typedef struct apic_cpus_info {
#define APIC_CPU_ONLINE 1
#define APIC_CPU_INTR_ENABLE 2
+#define APIC_CPU_FREE 4 /* APIC CPU slot is free */
+#define APIC_CPU_DIRTY 8 /* Slot was once used */
/*
* APIC ops to support various flavors of APIC like APIC and x2APIC.
@@ -784,6 +787,7 @@ extern int apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
psm_intr_op_t intr_op, int *result);
extern int apic_state(psm_state_request_t *);
extern boolean_t apic_cpu_in_range(int cpu);
+extern processorid_t apic_find_next_cpu_intr(void);
extern int apic_check_msi_support();
extern apic_irq_t *apic_find_irq(dev_info_t *dip, struct intrspec *ispec,
int type);
@@ -849,6 +853,7 @@ extern int apic_diff_for_redistribution;
extern int apic_poweroff_method;
extern int apic_enable_acpi;
extern int apic_nproc;
+extern int apic_max_nproc;
extern int apic_next_bind_cpu;
extern int apic_redistribute_sample_interval;
extern int apic_multi_msi_enable;
diff --git a/usr/src/uts/i86pc/sys/dr.h b/usr/src/uts/i86pc/sys/dr.h
new file mode 100644
index 0000000000..492eaeaae9
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/dr.h
@@ -0,0 +1,500 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_DR_H
+#define _SYS_DR_H
+#include <sys/types.h>
+#include <sys/note.h>
+#include <sys/processor.h>
+#include <sys/obpdefs.h>
+#include <sys/memlist.h>
+#include <sys/mem_config.h>
+#include <sys/param.h> /* for MAXPATHLEN */
+#include <sys/varargs.h>
+#include <sys/sbd_ioctl.h>
+#include <sys/dr_util.h>
+#include <sys/drmach.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * helper macros for constructing and reporting internal error messages.
+ * NOTE: each module which uses one or more this these macros is expected
+ * to supply a char *dr_ie_fmt string containing the SCCS filename
+ * expansion macro (percent M percent) and a sprintf %d to render the
+ * line number argument.
+ */
+#define DR_INTERNAL_ERROR(hp) \
+ drerr_new(1, ESBD_INTERNAL, dr_ie_fmt, __LINE__)
+
+#define DR_OP_INTERNAL_ERROR(hp) \
+ drerr_set_c(CE_WARN, &(hp)->h_err, \
+ ESBD_INTERNAL, dr_ie_fmt, __LINE__)
+
+#define DR_DEV_INTERNAL_ERROR(cp) \
+ drerr_set_c(CE_WARN, &(cp)->sbdev_error, \
+ ESBD_INTERNAL, dr_ie_fmt, __LINE__)
+
+/*
+ * Macros for keeping an error code and an associated list of integers.
+ */
+#define DR_MAX_ERR_INT (32)
+#define DR_GET_E_CODE(sep) ((sep)->e_code)
+#define DR_SET_E_CODE(sep, en) ((sep)->e_code = (en))
+#define DR_GET_E_RSC(sep) ((sep)->e_rsc)
+
+/* Number of device node types. */
+#define DR_MAXNUM_NT 3
+
+/* used to map sbd_comp_type_t to array index */
+#define DEVSET_NIX(t) \
+ (((t) == SBD_COMP_CPU) ? 0 : \
+ ((t) == SBD_COMP_MEM) ? 1 : \
+ ((t) == SBD_COMP_IO) ? 2 : \
+ ((t) == SBD_COMP_CMP) ? 0 : DR_MAXNUM_NT)
+
+/*
+ * Format of dr_devset_t bit masks:
+ *
+ * 64 56 48 40 32 24 16 8 0
+ * |....|IIII|IIII|IIII|IIII|MMMM|MMMM|CCCC|CCCC|CCCC|CCCC|CCCC|CCCC|CCCC|CCCC|
+ *
+ * 1 = indicates respective component present/attached.
+ * I = I/O, M = Memory, C = CPU.
+ */
+#define DEVSET_CPU_OFFSET 0
+#define DEVSET_CPU_NUMBER 32
+#define DEVSET_MEM_OFFSET (DEVSET_CPU_OFFSET + DEVSET_CPU_NUMBER)
+#define DEVSET_MEM_NUMBER 8
+#define DEVSET_IO_OFFSET (DEVSET_MEM_OFFSET + DEVSET_MEM_NUMBER)
+#define DEVSET_IO_NUMBER 16
+#define DEVSET_MAX_BITS (DEVSET_IO_OFFSET + DEVSET_IO_NUMBER)
+
+#define DEVSET_BIX(t) \
+ (((t) == SBD_COMP_CPU) ? DEVSET_CPU_OFFSET : \
+ ((t) == SBD_COMP_MEM) ? DEVSET_MEM_OFFSET : \
+ ((t) == SBD_COMP_IO) ? DEVSET_IO_OFFSET : \
+ ((t) == SBD_COMP_CMP) ? DEVSET_CPU_OFFSET : 0)
+
+#define DEVSET_NT2DEVPOS(t, u) (((t) == SBD_COMP_CMP) ?\
+ (DEVSET_BIX(t) + (u) * MAX_CORES_PER_CMP) : DEVSET_BIX(t) + (u))
+
+#if (DEVSET_MAX_BITS <= 64)
+typedef uint64_t dr_devset_t;
+
+#define DEVSET_ONEUNIT ((dr_devset_t)1)
+#define DEVSET_ANYUNIT ((dr_devset_t)(-1))
+#define DEVSET_CPU_NMASK ((dr_devset_t)((1ULL << DEVSET_CPU_NUMBER) - 1))
+#define DEVSET_MEM_NMASK ((dr_devset_t)((1ULL << DEVSET_MEM_NUMBER) - 1))
+#define DEVSET_IO_NMASK ((dr_devset_t)((1ULL << DEVSET_IO_NUMBER) - 1))
+#define DEVSET_CMP_NMASK ((dr_devset_t)((1ULL << MAX_CORES_PER_CMP) - 1))
+
+#define DEVSET_NMASK(t) \
+ (((t) == SBD_COMP_CPU) ? DEVSET_CPU_NMASK : \
+ ((t) == SBD_COMP_MEM) ? DEVSET_MEM_NMASK : \
+ ((t) == SBD_COMP_IO) ? DEVSET_IO_NMASK : \
+ ((t) == SBD_COMP_CMP) ? DEVSET_CPU_NMASK : 0)
+
+#define DEVSET_MASK \
+ ((DEVSET_CPU_NMASK << DEVSET_CPU_OFFSET) | \
+ (DEVSET_MEM_NMASK << DEVSET_MEM_OFFSET) | \
+ (DEVSET_IO_NMASK << DEVSET_IO_OFFSET))
+
+#define DEVSET(t, u) \
+ (((u) == DEVSET_ANYUNIT) ? \
+ ((DEVSET_NMASK(t) << DEVSET_NT2DEVPOS((t), 0)) & \
+ DEVSET_MASK) : \
+ ((t) == SBD_COMP_CMP) ? \
+ (DEVSET_CMP_NMASK << DEVSET_NT2DEVPOS((t), (u))) : \
+ (DEVSET_ONEUNIT << DEVSET_NT2DEVPOS((t), (u))))
+
+#define DEVSET_IS_NULL(ds) ((ds) == 0)
+#define DEVSET_IN_SET(ds, t, u) (((ds) & DEVSET((t), (u))) != 0)
+#define DEVSET_ADD(ds, t, u) ((ds) |= DEVSET((t), (u)))
+#define DEVSET_DEL(ds, t, u) ((ds) &= ~DEVSET((t), (u)))
+#define DEVSET_AND(ds1, ds2) ((ds1) & (ds2))
+#define DEVSET_OR(ds1, ds2) ((ds1) | (ds2))
+#define DEVSET_NAND(ds1, ds2) ((ds1) & ~(ds2))
+#define DEVSET_GET_UNITSET(ds, t) \
+ (((ds) & DEVSET((t), DEVSET_ANYUNIT)) >> DEVSET_NT2DEVPOS((t), 0))
+#define DEVSET_FMT_STR "0x%" PRIx64 ""
+#define DEVSET_FMT_ARG(ds) (ds)
+#else /* DEVSET_MAX_BITS <= 64 */
+#error please implement devset with bitmap to support more 64 devices
+#endif /* DEVSET_MAX_BITS <= 64 */
+
+/*
+ * Ops for dr_board_t.b_dev_*
+ */
+#define DR_DEV_IS(ds, cp) DEVSET_IN_SET( \
+ (cp)->sbdev_bp->b_dev_##ds, \
+ (cp)->sbdev_type, \
+ (cp)->sbdev_unum)
+
+#define DR_DEV_ADD(ds, cp) DEVSET_ADD( \
+ (cp)->sbdev_bp->b_dev_##ds, \
+ (cp)->sbdev_type, \
+ (cp)->sbdev_unum)
+
+#define DR_DEV_DEL(ds, cp) DEVSET_DEL( \
+ (cp)->sbdev_bp->b_dev_##ds, \
+ (cp)->sbdev_type, \
+ (cp)->sbdev_unum)
+
+/*
+ * Ops for dr_board_t.b_dev_present
+ */
+#define DR_DEV_IS_PRESENT(cp) DR_DEV_IS(present, cp)
+#define DR_DEV_SET_PRESENT(cp) DR_DEV_ADD(present, cp)
+#define DR_DEV_CLR_PRESENT(cp) DR_DEV_DEL(present, cp)
+
+/*
+ * Ops for dr_board_t.b_dev_attached
+ */
+#define DR_DEV_IS_ATTACHED(cp) DR_DEV_IS(attached, cp)
+#define DR_DEV_SET_ATTACHED(cp) DR_DEV_ADD(attached, cp)
+#define DR_DEV_CLR_ATTACHED(cp) DR_DEV_DEL(attached, cp)
+
+/*
+ * Ops for dr_board_t.b_dev_released
+ */
+#define DR_DEV_IS_RELEASED(cp) DR_DEV_IS(released, cp)
+#define DR_DEV_SET_RELEASED(cp) DR_DEV_ADD(released, cp)
+#define DR_DEV_CLR_RELEASED(cp) DR_DEV_DEL(released, cp)
+
+/*
+ * Ops for dr_board_t.b_dev_unreferenced
+ */
+#define DR_DEV_IS_UNREFERENCED(cp) DR_DEV_IS(unreferenced, cp)
+#define DR_DEV_SET_UNREFERENCED(cp) DR_DEV_ADD(unreferenced, cp)
+#define DR_DEV_CLR_UNREFERENCED(cp) DR_DEV_DEL(unreferenced, cp)
+
+#define DR_DEVS_PRESENT(bp) \
+ ((bp)->b_dev_present)
+#define DR_DEVS_ATTACHED(bp) \
+ ((bp)->b_dev_attached)
+#define DR_DEVS_RELEASED(bp) \
+ ((bp)->b_dev_released)
+#define DR_DEVS_UNREFERENCED(bp) \
+ ((bp)->b_dev_unreferenced)
+#define DR_DEVS_UNATTACHED(bp) \
+ ((bp)->b_dev_present & ~(bp)->b_dev_attached)
+#define DR_DEVS_CONFIGURE(bp, devs) \
+ ((bp)->b_dev_attached = (devs))
+#define DR_DEVS_DISCONNECT(bp, devs) \
+ ((bp)->b_dev_present &= ~(devs))
+#define DR_DEVS_CANCEL(bp, devs) \
+ ((bp)->b_dev_released &= ~(devs), \
+ (bp)->b_dev_unreferenced &= ~(devs))
+
+/*
+ * CMP Specific Helpers
+ */
+#define DR_CMP_CORE_UNUM(cmp, core) ((cmp) * MAX_CORES_PER_CMP + (core))
+
+/*
+ * For CPU and CMP devices, DR_UNUM2SBD_UNUM is used to extract the physical
+ * CPU/CMP id from the device id.
+ */
+#define DR_UNUM2SBD_UNUM(n, d) \
+ ((d) == SBD_COMP_CPU ? ((n) / MAX_CORES_PER_CMP) : \
+ (d) == SBD_COMP_CMP ? ((n) / MAX_CORES_PER_CMP) : (n))
+
+/*
+ * Some stuff to assist in debug.
+ */
+#ifdef DEBUG
+#define DRDBG_STATE 0x00000001
+#define DRDBG_QR 0x00000002
+#define DRDBG_CPU 0x00000004
+#define DRDBG_MEM 0x00000008
+#define DRDBG_IO 0x00000010
+
+#define PR_ALL if (dr_debug) printf
+#define PR_STATE if (dr_debug & DRDBG_STATE) printf
+#define PR_QR if (dr_debug & DRDBG_QR) prom_printf
+#define PR_CPU if (dr_debug & DRDBG_CPU) printf
+#define PR_MEM if (dr_debug & DRDBG_MEM) printf
+#define PR_IO if (dr_debug & DRDBG_IO) printf
+#define PR_MEMLIST_DUMP if (dr_debug & DRDBG_MEM) MEMLIST_DUMP
+
+extern uint_t dr_debug;
+#else /* DEBUG */
+#define PR_ALL _NOTE(CONSTANTCONDITION) if (0) printf
+#define PR_STATE PR_ALL
+#define PR_QR PR_ALL
+#define PR_CPU PR_ALL
+#define PR_MEM PR_ALL
+#define PR_IO PR_ALL
+#define PR_MEMLIST_DUMP _NOTE(CONSTANTCONDITION) if (0) MEMLIST_DUMP
+
+#endif /* DEBUG */
+
+/*
+ * dr_board_t b_sflags.
+ */
+#define DR_BSLOCK 0x01 /* for blocking status (protected by b_slock) */
+
+typedef const char *fn_t;
+
+/*
+ * Unsafe devices based on dr.conf prop "unsupported-io-drivers"
+ */
+typedef struct {
+ char **devnames;
+ uint_t ndevs;
+} dr_unsafe_devs_t;
+
+/*
+ * Device states.
+ * PARTIAL state is really only relevant for board state.
+ */
+typedef enum {
+ DR_STATE_EMPTY = 0,
+ DR_STATE_OCCUPIED,
+ DR_STATE_CONNECTED,
+ DR_STATE_UNCONFIGURED,
+ DR_STATE_PARTIAL, /* part connected, part configured */
+ DR_STATE_CONFIGURED,
+ DR_STATE_RELEASE,
+ DR_STATE_UNREFERENCED,
+ DR_STATE_FATAL,
+ DR_STATE_MAX
+} dr_state_t;
+
+typedef struct dr_handle {
+ struct dr_board *h_bd;
+ sbd_error_t *h_err;
+ int h_op_intr; /* nz if op interrupted */
+ dev_t h_dev; /* dev_t of opened device */
+ int h_cmd; /* PIM ioctl argument */
+ int h_mode; /* device open mode */
+ sbd_cmd_t h_sbdcmd; /* copied-in ioctl cmd struct */
+ sbd_ioctl_arg_t *h_iap; /* ptr to caller-space cmd struct */
+ dr_devset_t h_devset; /* based on h_dev */
+ uint_t h_ndi;
+ drmach_opts_t h_opts; /* command-line platform options */
+} dr_handle_t;
+
+typedef struct dr_common_unit {
+ dr_state_t sbdev_state;
+ sbd_state_t sbdev_ostate;
+ sbd_cond_t sbdev_cond;
+ time_t sbdev_time;
+ int sbdev_busy;
+ struct dr_board *sbdev_bp;
+ int sbdev_unum;
+ sbd_comp_type_t sbdev_type;
+ drmachid_t sbdev_id;
+ char sbdev_path[MAXNAMELEN];
+ sbd_error_t *sbdev_error;
+} dr_common_unit_t;
+
+typedef struct dr_mem_unit {
+ dr_common_unit_t sbm_cm; /* mem-unit state */
+ uint_t sbm_flags;
+ pfn_t sbm_basepfn;
+ pgcnt_t sbm_npages;
+ pgcnt_t sbm_pageslost;
+ struct memlist *sbm_dyn_segs; /* kphysm_add_dynamic segs */
+ /*
+ * The following fields are used during
+ * the memory detach process only. sbm_mlist
+ * will be used to store the board memlist
+ * following a detach. The memlist will be
+ * used to re-attach the board when configuring
+ * the unit directly after an unconfigure.
+ */
+ struct dr_mem_unit *sbm_peer;
+ struct memlist *sbm_mlist;
+ struct memlist *sbm_del_mlist;
+ memhandle_t sbm_memhandle;
+ uint64_t sbm_alignment_mask;
+ uint64_t sbm_slice_base;
+ uint64_t sbm_slice_top;
+ uint64_t sbm_slice_size;
+} dr_mem_unit_t;
+
+/*
+ * Currently only maintain state information for individual
+ * components.
+ */
+typedef struct dr_cpu_unit {
+ dr_common_unit_t sbc_cm; /* cpu-unit state */
+ processorid_t sbc_cpu_id;
+ cpu_flag_t sbc_cpu_flags; /* snapshot of CPU flags */
+ ushort_t sbc_pad1; /* padded for compatibility */
+ int sbc_speed;
+ int sbc_ecache;
+ int sbc_cpu_impl;
+} dr_cpu_unit_t;
+
+typedef struct dr_io_unit {
+ dr_common_unit_t sbi_cm; /* io-unit state */
+} dr_io_unit_t;
+
+typedef union {
+ dr_common_unit_t du_common;
+ dr_mem_unit_t du_mem;
+ dr_cpu_unit_t du_cpu;
+ dr_io_unit_t du_io;
+} dr_dev_unit_t;
+
+typedef struct dr_board {
+ kmutex_t b_lock; /* lock for this board struct */
+ kmutex_t b_slock; /* lock for status on the board */
+ kcondvar_t b_scv; /* condvar for status on the board */
+ int b_sflags; /* for serializing status */
+ sbd_state_t b_rstate; /* board's cfgadm receptacle state */
+ sbd_state_t b_ostate; /* board's cfgadm occupant state */
+ sbd_cond_t b_cond; /* cfgadm condition */
+ int b_busy;
+ int b_assigned;
+ time_t b_time; /* time of last board operation */
+ char b_type[MAXNAMELEN];
+ drmachid_t b_id;
+ int b_num; /* board number */
+ int b_ndev; /* # of devices on board */
+ dev_info_t *b_dip; /* dip for make-nodes */
+ dr_state_t b_state; /* board DR state */
+ dr_devset_t b_dev_present; /* present mask */
+ dr_devset_t b_dev_attached; /* attached mask */
+ dr_devset_t b_dev_released; /* released mask */
+ dr_devset_t b_dev_unreferenced; /* unreferenced mask */
+ char b_path[MAXNAMELEN];
+ dr_dev_unit_t *b_dev[DR_MAXNUM_NT];
+} dr_board_t;
+
+/*
+ * dr_quiesce.c interfaces
+ */
+struct dr_sr_handle;
+typedef struct dr_sr_handle dr_sr_handle_t;
+
+extern dr_sr_handle_t *dr_get_sr_handle(dr_handle_t *handle);
+extern void dr_release_sr_handle(dr_sr_handle_t *srh);
+extern int dr_suspend(dr_sr_handle_t *srh);
+extern void dr_resume(dr_sr_handle_t *srh);
+extern void dr_check_devices(dev_info_t *dip, int *refcount,
+ dr_handle_t *handle, uint64_t *arr, int *idx,
+ int len, int *refcount_non_gldv3);
+extern int dr_pt_test_suspend(dr_handle_t *hp);
+
+/*
+ * dr_cpu.c interface
+ */
+extern void dr_init_cpu_unit(dr_cpu_unit_t *cp);
+extern int dr_pre_attach_cpu(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_attach_cpu(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int dr_post_attach_cpu(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_pre_release_cpu(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_pre_detach_cpu(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_detach_cpu(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int dr_post_detach_cpu(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_cpu_status(dr_handle_t *hp, dr_devset_t devset,
+ sbd_dev_stat_t *dsp);
+extern int dr_cancel_cpu(dr_cpu_unit_t *cp);
+extern int dr_disconnect_cpu(dr_cpu_unit_t *cp);
+
+
+/*
+ * dr_mem.c interface
+ */
+extern void dr_init_mem_unit(dr_mem_unit_t *mp);
+extern int dr_pre_attach_mem(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int dr_post_attach_mem(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_pre_release_mem(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_release_mem(dr_common_unit_t *cp);
+extern void dr_release_mem_done(dr_common_unit_t *cp);
+extern int dr_pre_detach_mem(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_detach_mem(dr_handle_t *, dr_common_unit_t *);
+extern int dr_post_detach_mem(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_mem_status(dr_handle_t *hp, dr_devset_t devset,
+ sbd_dev_stat_t *dsp);
+extern int dr_cancel_mem(dr_mem_unit_t *mp);
+extern int dr_disconnect_mem(dr_mem_unit_t *mp);
+
+/*
+ * dr_io.c interface
+ */
+extern void dr_init_io_unit(dr_io_unit_t *io);
+extern int dr_pre_attach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_attach_io(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int dr_post_attach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_pre_release_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_pre_detach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_detach_io(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int dr_post_detach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_io_status(dr_handle_t *hp, dr_devset_t devset,
+ sbd_dev_stat_t *dsp);
+extern int dr_disconnect_io(dr_io_unit_t *ip);
+
+
+/*
+ * dr.c interface
+ */
+extern void dr_op_err(int ce, dr_handle_t *hp, int code, char *fmt, ...);
+extern void dr_dev_err(int ce, dr_common_unit_t *cp, int code);
+
+extern dr_cpu_unit_t *dr_get_cpu_unit(dr_board_t *bp, int unit_num);
+extern dr_mem_unit_t *dr_get_mem_unit(dr_board_t *bp, int unit_num);
+extern dr_io_unit_t *dr_get_io_unit(dr_board_t *bp, int unit_num);
+
+extern dr_board_t *dr_lookup_board(int board_num);
+extern int dr_release_dev_done(dr_common_unit_t *cp);
+extern char *dr_nt_to_dev_type(int type);
+extern void dr_device_transition(dr_common_unit_t *cp,
+ dr_state_t new_state);
+extern void dr_lock_status(dr_board_t *bp);
+extern void dr_unlock_status(dr_board_t *bp);
+extern int dr_cmd_flags(dr_handle_t *hp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DR_H */
diff --git a/usr/src/uts/i86pc/sys/dr_util.h b/usr/src/uts/i86pc/sys/dr_util.h
new file mode 100644
index 0000000000..46a66c759b
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/dr_util.h
@@ -0,0 +1,99 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_DR_UTIL_H_
+#define _SYS_DR_UTIL_H_
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/memlist.h>
+#include <sys/varargs.h>
+#include <sys/sbd_ioctl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define GETSTRUCT(t, n) \
+ ((t *)kmem_zalloc((size_t)(n) * sizeof (t), KM_SLEEP))
+#define FREESTRUCT(p, t, n) \
+ (kmem_free((caddr_t)(p), sizeof (t) * (size_t)(n)))
+
+#define DRERR_SET_C(epps, eppn) \
+ if (*(epps) == NULL) \
+ *(epps) = *(eppn); \
+ else \
+ sbd_err_clear(eppn)
+
+#ifdef DEBUG
+#define MEMLIST_DUMP(ml) memlist_dump(ml)
+#else
+#define MEMLIST_DUMP(ml)
+#endif
+
+extern sbd_error_t *sbd_err_new(int e_code, char *fmt, va_list args);
+extern void sbd_err_log(sbd_error_t *ep, int ce);
+extern void sbd_err_clear(sbd_error_t **ep);
+extern void sbd_err_set_c(sbd_error_t **ep, int ce,
+ int e_code, char *fmt, ...);
+extern void sbd_err_set(sbd_error_t **ep, int ce,
+ int e_code, char *fmt, ...);
+
+extern sbd_error_t *drerr_new(int log, int e_code, char *fmt, ...);
+extern sbd_error_t *drerr_new_v(int e_code, char *fmt, va_list args);
+extern void drerr_set_c(int log, sbd_error_t **ep,
+ int e_code, char *fmt, ...);
+
+extern void dr_memlist_delete(struct memlist *mlist);
+extern void memlist_dump(struct memlist *mlist);
+extern int dr_memlist_intersect(struct memlist *al,
+ struct memlist *bl);
+extern void dr_memlist_coalesce(struct memlist *mlist);
+extern struct memlist *dr_memlist_dup(struct memlist *mlist);
+extern struct memlist *dr_memlist_add_span(struct memlist *mlist,
+ uint64_t base, uint64_t len);
+extern struct memlist *dr_memlist_del_span(struct memlist *mlist,
+ uint64_t base, uint64_t len);
+extern struct memlist *dr_memlist_cat_span(struct memlist *mlist,
+ uint64_t base, uint64_t len);
+
+/*
+ * These are all utilities internal for DR. There are
+ * similar functions in common/os which have similar names.
+ * We rename them to make sure there is no name space
+ * conflict.
+ */
+#define memlist_delete dr_memlist_delete
+#define memlist_intersect dr_memlist_intersect
+#define memlist_coalesce dr_memlist_coalesce
+#define memlist_dup dr_memlist_dup
+#define memlist_add_span dr_memlist_add_span
+#define memlist_del_span dr_memlist_del_span
+#define memlist_cat_span dr_memlist_cat_span
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DR_UTIL_H_ */
diff --git a/usr/src/uts/i86pc/sys/drmach.h b/usr/src/uts/i86pc/sys/drmach.h
new file mode 100644
index 0000000000..2d2a2cff7b
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/drmach.h
@@ -0,0 +1,177 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_DRMACH_H_
+#define _SYS_DRMACH_H_
+#include <sys/types.h>
+#include <sys/memlist.h>
+#include <sys/ddi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/sysevent.h>
+#include <sys/x86_archext.h>
+#include <sys/sbd_ioctl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_BOARDS drmach_max_boards()
+#define MAX_MEM_UNITS_PER_BOARD drmach_max_mem_units_per_board()
+#define MAX_IO_UNITS_PER_BOARD drmach_max_io_units_per_board()
+#define MAX_CMP_UNITS_PER_BOARD drmach_max_cmp_units_per_board()
+
+/* DR uses MAX_CORES_PER_CMP as number of logical CPUs within a CMP. */
+#define MAX_CORES_PER_CMP drmach_max_core_per_cmp()
+
+/* Maximum possible logical CPUs per board. */
+#define MAX_CPU_UNITS_PER_BOARD (MAX_CMP_UNITS_PER_BOARD * MAX_CORES_PER_CMP)
+
+/* Check whether CPU is CMP. True if chip has more than one core/thread. */
+#define CPU_IMPL_IS_CMP(impl) (MAX_CORES_PER_CMP > 1)
+
+/* CPU implementation ID for Intel Nehalem CPU. */
+#define X86_CPU_IMPL_NEHALEM_EX 0x062E0000
+#define X86_CPU_IMPL_UNKNOWN 0x00000000
+
+/* returned with drmach_board_find_devices callback */
+#define DRMACH_DEVTYPE_CPU "cpu"
+#define DRMACH_DEVTYPE_MEM "memory"
+#define DRMACH_DEVTYPE_PCI "pci"
+
+/*
+ * x86 platform specific routines currently only defined
+ * in drmach_acpi.c and referenced by DR.
+ */
+
+typedef void *drmachid_t;
+
+typedef struct {
+ boolean_t assigned;
+ boolean_t powered;
+ boolean_t configured;
+ boolean_t busy;
+ boolean_t empty;
+ sbd_cond_t cond;
+ char type[SBD_TYPE_LEN];
+ char info[SBD_MAX_INFO];
+} drmach_status_t;
+
+typedef struct {
+ int size;
+ char *copts;
+} drmach_opts_t;
+
+typedef struct {
+ uint64_t mi_basepa;
+ uint64_t mi_size;
+ uint64_t mi_slice_base;
+ uint64_t mi_slice_top;
+ uint64_t mi_slice_size;
+ uint64_t mi_alignment_mask;
+} drmach_mem_info_t;
+
+extern uint_t drmach_max_boards(void);
+extern uint_t drmach_max_io_units_per_board(void);
+extern uint_t drmach_max_cmp_units_per_board(void);
+extern uint_t drmach_max_mem_units_per_board(void);
+extern uint_t drmach_max_core_per_cmp(void);
+
+extern sbd_error_t *drmach_get_dip(drmachid_t id, dev_info_t **dip);
+extern sbd_error_t *drmach_release(drmachid_t id);
+extern sbd_error_t *drmach_pre_op(int cmd, drmachid_t id,
+ drmach_opts_t *opts, void *devsetp);
+extern sbd_error_t *drmach_post_op(int cmd, drmachid_t id,
+ drmach_opts_t *opts, int rv);
+extern sbd_error_t *drmach_configure(drmachid_t id, int flags);
+extern sbd_error_t *drmach_unconfigure(drmachid_t id, int flags);
+extern sbd_error_t *drmach_status(drmachid_t id, drmach_status_t *stat);
+extern sbd_error_t *drmach_passthru(drmachid_t id,
+ drmach_opts_t *opts);
+
+extern sbd_error_t *drmach_board_find_devices(drmachid_t id, void *a,
+ sbd_error_t *(*found)(void *a, const char *, int, drmachid_t));
+extern int drmach_board_lookup(int bnum, drmachid_t *id);
+extern sbd_error_t *drmach_board_name(int bnum, char *buf, int buflen);
+extern sbd_error_t *drmach_board_assign(int bnum, drmachid_t *id);
+extern sbd_error_t *drmach_board_unassign(drmachid_t id);
+extern sbd_error_t *drmach_board_poweroff(drmachid_t id);
+extern sbd_error_t *drmach_board_poweron(drmachid_t id);
+extern sbd_error_t *drmach_board_test(drmachid_t id, drmach_opts_t *opts,
+ int force);
+extern sbd_error_t *drmach_board_connect(drmachid_t id,
+ drmach_opts_t *opts);
+extern sbd_error_t *drmach_board_disconnect(drmachid_t id,
+ drmach_opts_t *opts);
+extern sbd_error_t *drmach_board_deprobe(drmachid_t id);
+extern int drmach_board_is_floating(drmachid_t);
+
+extern sbd_error_t *drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid);
+extern sbd_error_t *drmach_cpu_get_impl(drmachid_t id, int *ip);
+extern sbd_error_t *drmach_cpu_disconnect(drmachid_t id);
+
+extern sbd_error_t *drmach_io_is_attached(drmachid_t id, int *yes);
+extern sbd_error_t *drmach_io_post_attach(drmachid_t id);
+extern sbd_error_t *drmach_io_pre_release(drmachid_t id);
+extern sbd_error_t *drmach_io_unrelease(drmachid_t id);
+extern sbd_error_t *drmach_io_post_release(drmachid_t id);
+
+extern sbd_error_t *drmach_mem_get_slice_info(drmachid_t id,
+ uint64_t *basepa, uint64_t *endpa,
+ uint64_t *sizep);
+extern sbd_error_t *drmach_mem_get_memlist(drmachid_t id,
+ struct memlist **ml);
+extern sbd_error_t *drmach_mem_get_info(drmachid_t, drmach_mem_info_t *);
+extern sbd_error_t *drmach_mem_enable(drmachid_t id);
+extern sbd_error_t *drmach_mem_disable(drmachid_t id);
+extern sbd_error_t *drmach_mem_add_span(drmachid_t id,
+ uint64_t basepa, uint64_t size);
+extern sbd_error_t *drmach_mem_del_span(drmachid_t id,
+ uint64_t basepa, uint64_t size);
+extern sbd_error_t *drmach_copy_rename_init(
+ drmachid_t dst_id, drmachid_t src_id,
+ struct memlist *src_copy_ml,
+ drmachid_t *pgm_id);
+extern sbd_error_t *drmach_copy_rename_fini(drmachid_t id);
+extern void drmach_copy_rename(drmachid_t id);
+extern int drmach_copy_rename_need_suspend(drmachid_t id);
+
+extern int drmach_log_sysevent(int board, char *hint, int flag,
+ int verbose);
+
+extern int drmach_verify_sr(dev_info_t *dip, int sflag);
+extern void drmach_suspend_last();
+extern void drmach_resume_first();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DRMACH_H_ */
diff --git a/usr/src/uts/i86pc/sys/fastboot_msg.h b/usr/src/uts/i86pc/sys/fastboot_msg.h
index a35fcf2f67..4c7ee7f8b5 100644
--- a/usr/src/uts/i86pc/sys/fastboot_msg.h
+++ b/usr/src/uts/i86pc/sys/fastboot_msg.h
@@ -45,6 +45,7 @@
fastboot_nosup_msg(FBNS_DEFAULT, "")
fastboot_nosup_msg(FBNS_SUSPEND, " after suspend/resume")
fastboot_nosup_msg(FBNS_FMAHWERR, " due to FMA recovery from hardware error")
+fastboot_nosup_msg(FBNS_HOTPLUG, " after DR operations")
/*
* Should ALWAYS be the last one.
diff --git a/usr/src/uts/i86pc/sys/machcpuvar.h b/usr/src/uts/i86pc/sys/machcpuvar.h
index b652331715..67b9a07439 100644
--- a/usr/src/uts/i86pc/sys/machcpuvar.h
+++ b/usr/src/uts/i86pc/sys/machcpuvar.h
@@ -63,6 +63,7 @@ typedef void *cpu_pri_lev_t;
struct cpuid_info;
struct cpu_ucode_info;
+struct cmi_hdl;
/*
* A note about the hypervisor affinity bits: a one bit in the affinity mask
@@ -135,7 +136,9 @@ struct machcpu {
struct cpu_ucode_info *mcpu_ucode_info;
- void *mcpu_pm_mach_state;
+ void *mcpu_pm_mach_state;
+ struct cmi_hdl *mcpu_cmi_hdl;
+ void *mcpu_mach_ctx_ptr;
/*
* A stamp that is unique per processor and changes
diff --git a/usr/src/uts/i86pc/sys/machsystm.h b/usr/src/uts/i86pc/sys/machsystm.h
index b98a48326e..8c79243a29 100644
--- a/usr/src/uts/i86pc/sys/machsystm.h
+++ b/usr/src/uts/i86pc/sys/machsystm.h
@@ -23,6 +23,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#ifndef _SYS_MACHSYSTM_H
#define _SYS_MACHSYSTM_H
@@ -51,11 +55,30 @@ extern "C" {
#ifdef _KERNEL
+typedef enum mach_cpu_add_arg_type {
+ MACH_CPU_ARG_LOCAL_APIC,
+ MACH_CPU_ARG_LOCAL_X2APIC,
+} mach_cpu_add_arg_type_t;
+
+typedef struct mach_cpu_add_arg {
+ mach_cpu_add_arg_type_t type;
+ union {
+ struct {
+ uint32_t apic_id;
+ uint32_t proc_id;
+ } apic;
+ } arg;
+} mach_cpu_add_arg_t;
+
extern void mach_cpu_idle(void);
extern void mach_cpu_halt(char *);
extern int mach_cpu_start(cpu_t *, void *);
extern int mach_cpuid_start(processorid_t, void *);
+extern int mach_cpu_stop(cpu_t *, void *);
+extern int mach_cpu_add(mach_cpu_add_arg_t *, processorid_t *);
+extern int mach_cpu_remove(processorid_t);
extern int mach_cpu_create_device_node(cpu_t *, dev_info_t **);
+extern int mach_cpu_get_device_node(cpu_t *, dev_info_t **);
extern int Cpudelay;
extern void setcpudelay(void);
@@ -120,10 +143,16 @@ extern int use_mp;
extern struct cpu cpus[]; /* pointer to other cpus */
extern struct cpu *cpu[]; /* pointer to all cpus */
+/* Operation types for extended mach_cpucontext interfaces */
+#define MACH_CPUCONTEXT_OP_START 0
+#define MACH_CPUCONTEXT_OP_STOP 1
+
extern int mach_cpucontext_init(void);
extern void mach_cpucontext_fini(void);
extern void *mach_cpucontext_alloc(struct cpu *);
extern void mach_cpucontext_free(struct cpu *, void *, int);
+extern void *mach_cpucontext_xalloc(struct cpu *, int);
+extern void mach_cpucontext_xfree(struct cpu *, void *, int, int);
extern void rmp_gdt_init(rm_platter_t *);
extern uintptr_t hole_start, hole_end;
@@ -145,6 +174,52 @@ extern int dtrace_linear_pc(struct regs *rp, proc_t *p, caddr_t *linearp);
extern int force_shutdown_method;
+/* Dynamic Reconfiguration capability interface. */
+#define PLAT_DR_OPTIONS_NAME "plat-dr-options"
+#define PLAT_DR_PHYSMAX_NAME "plat-dr-physmax"
+#define PLAT_MAX_NCPUS_NAME "plat-max-ncpus"
+#define BOOT_MAX_NCPUS_NAME "boot-max-ncpus"
+#define BOOT_NCPUS_NAME "boot-ncpus"
+
+#define PLAT_DR_FEATURE_CPU 0x1
+#define PLAT_DR_FEATURE_MEMORY 0x2
+#define PLAT_DR_FEATURE_ENABLED 0x1000000
+
+#define plat_dr_enabled() \
+ plat_dr_check_capability(PLAT_DR_FEATURE_ENABLED)
+
+#define plat_dr_enable() \
+ plat_dr_enable_capability(PLAT_DR_FEATURE_ENABLED)
+
+#define plat_dr_disable_cpu() \
+ plat_dr_disable_capability(PLAT_DR_FEATURE_CPU)
+#define plat_dr_disable_memory() \
+ plat_dr_disable_capability(PLAT_DR_FEATURE_MEMORY)
+
+extern boolean_t plat_dr_support_cpu(void);
+extern boolean_t plat_dr_support_memory(void);
+extern boolean_t plat_dr_check_capability(uint64_t features);
+extern void plat_dr_enable_capability(uint64_t features);
+extern void plat_dr_disable_capability(uint64_t features);
+
+#pragma weak plat_dr_support_cpu
+#pragma weak plat_dr_support_memory
+
+/*
+ * Used to communicate DR updates to platform lgroup framework
+ */
+typedef struct {
+ uint64_t u_base;
+ uint64_t u_length;
+ uint32_t u_domain;
+ uint32_t u_device_id;
+ uint32_t u_sli_cnt;
+ uchar_t *u_sli_ptr;
+} update_membounds_t;
+
+/* Maximum physical page number (PFN) for memory DR operations. */
+extern uint64_t plat_dr_physmax;
+
#ifdef __xpv
#include <sys/xen_mmu.h>
extern page_t *page_get_high_mfn(mfn_t);
diff --git a/usr/src/uts/i86pc/sys/memnode.h b/usr/src/uts/i86pc/sys/memnode.h
index dde3931627..2bcb623617 100644
--- a/usr/src/uts/i86pc/sys/memnode.h
+++ b/usr/src/uts/i86pc/sys/memnode.h
@@ -41,8 +41,9 @@ extern "C" {
* memory slice ID and the high-order bits are the SSM nodeid.
*/
+#define MAX_MEM_NODES_PER_LGROUP 3
#ifndef MAX_MEM_NODES
-#define MAX_MEM_NODES (8)
+#define MAX_MEM_NODES (8 * MAX_MEM_NODES_PER_LGROUP)
#endif /* MAX_MEM_NODES */
#define PFN_2_MEM_NODE(pfn) \
@@ -57,14 +58,12 @@ extern "C" {
*/
extern int plat_pfn_to_mem_node(pfn_t);
-extern int plat_lgrphand_to_mem_node(lgrp_handle_t);
extern void plat_assign_lgrphand_to_mem_node(lgrp_handle_t, int);
extern lgrp_handle_t plat_mem_node_to_lgrphand(int);
extern void plat_slice_add(pfn_t, pfn_t);
extern void plat_slice_del(pfn_t, pfn_t);
#pragma weak plat_pfn_to_mem_node
-#pragma weak plat_lgrphand_to_mem_node
#pragma weak plat_mem_node_to_lgrphand
#pragma weak plat_slice_add
#pragma weak plat_slice_del
diff --git a/usr/src/uts/i86pc/sys/psm_types.h b/usr/src/uts/i86pc/sys/psm_types.h
index 17e9db17b1..24ad7d700f 100644
--- a/usr/src/uts/i86pc/sys/psm_types.h
+++ b/usr/src/uts/i86pc/sys/psm_types.h
@@ -23,12 +23,14 @@
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#ifndef _SYS_PSM_TYPES_H
#define _SYS_PSM_TYPES_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Platform Specific Module Types
*/
@@ -88,6 +90,31 @@ typedef struct psm_state_req {
} req;
} psm_state_request_t;
+typedef enum psm_cpu_op_e {
+ PSM_CPU_ADD = 1,
+ PSM_CPU_REMOVE,
+ PSM_CPU_STOP
+} psm_cpu_op_t;
+
+typedef struct psm_cpu_request {
+ psm_cpu_op_t pcr_cmd;
+ union {
+ struct {
+ processorid_t cpuid;
+ void *argp;
+ } cpu_add;
+
+ struct {
+ processorid_t cpuid;
+ } cpu_remove;
+
+ struct {
+ processorid_t cpuid;
+ void *ctx;
+ } cpu_stop;
+ } req;
+} psm_cpu_request_t;
+
struct psm_ops {
int (*psm_probe)(void);
@@ -106,7 +133,7 @@ struct psm_ops {
void (*psm_unset_idlecpu)(processorid_t cpun);
#if defined(PSMI_1_3) || defined(PSMI_1_4) || defined(PSMI_1_5) || \
- defined(PSMI_1_6)
+ defined(PSMI_1_6) || defined(PSMI_1_7)
int (*psm_clkinit)(int hertz);
#else
void (*psm_clkinit)(int hertz);
@@ -117,14 +144,14 @@ struct psm_ops {
hrtime_t (*psm_gethrtime)(void);
processorid_t (*psm_get_next_processorid)(processorid_t cpu_id);
-#if defined(PSMI_1_5) || defined(PSMI_1_6)
+#if defined(PSMI_1_5) || defined(PSMI_1_6) || defined(PSMI_1_7)
int (*psm_cpu_start)(processorid_t cpun, caddr_t ctxt);
#else
void (*psm_cpu_start)(processorid_t cpun, caddr_t rm_code);
#endif
int (*psm_post_cpu_start)(void);
#if defined(PSMI_1_2) || defined(PSMI_1_3) || defined(PSMI_1_4) || \
- defined(PSMI_1_5) || defined(PSMI_1_6)
+ defined(PSMI_1_5) || defined(PSMI_1_6) || defined(PSMI_1_7)
void (*psm_shutdown)(int cmd, int fcn);
#else
void (*psm_shutdown)(void);
@@ -140,26 +167,30 @@ struct psm_ops {
#endif
void (*psm_notify_error)(int level, char *errmsg);
#if defined(PSMI_1_2) || defined(PSMI_1_3) || defined(PSMI_1_4) || \
- defined(PSMI_1_5) || defined(PSMI_1_6)
+ defined(PSMI_1_5) || defined(PSMI_1_6) || defined(PSMI_1_7)
void (*psm_notify_func)(int msg);
#endif
#if defined(PSMI_1_3) || defined(PSMI_1_4) || defined(PSMI_1_5) || \
- defined(PSMI_1_6)
+ defined(PSMI_1_6) || defined(PSMI_1_7)
void (*psm_timer_reprogram)(hrtime_t time);
void (*psm_timer_enable)(void);
void (*psm_timer_disable)(void);
void (*psm_post_cyclic_setup)(void *arg);
#endif
-#if defined(PSMI_1_4) || defined(PSMI_1_5) || defined(PSMI_1_6)
+#if defined(PSMI_1_4) || defined(PSMI_1_5) || defined(PSMI_1_6) || \
+ defined(PSMI_1_7)
void (*psm_preshutdown)(int cmd, int fcn);
#endif
-#if defined(PSMI_1_5) || defined(PSMI_1_6)
+#if defined(PSMI_1_5) || defined(PSMI_1_6) || defined(PSMI_1_7)
int (*psm_intr_ops)(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
psm_intr_op_t op, int *result);
#endif
-#if defined(PSMI_1_6)
+#if defined(PSMI_1_6) || defined(PSMI_1_7)
int (*psm_state)(psm_state_request_t *request);
#endif
+#if defined(PSMI_1_7)
+ int (*psm_cpu_ops)(psm_cpu_request_t *reqp);
+#endif
};
@@ -183,7 +214,8 @@ struct psm_info {
#define PSM_INFO_VER01_3 0x8604
#define PSM_INFO_VER01_4 0x8605
#define PSM_INFO_VER01_5 0x8606
-#define PSM_INFO_VER01_6 0x8706
+#define PSM_INFO_VER01_6 0x8607
+#define PSM_INFO_VER01_7 0x8608
#define PSM_INFO_VER01_X (PSM_INFO_VER01_1 & 0xFFF0) /* ver 1.X */
/*
diff --git a/usr/src/uts/i86pc/sys/rm_platter.h b/usr/src/uts/i86pc/sys/rm_platter.h
index b21c54dd63..48e141126b 100644
--- a/usr/src/uts/i86pc/sys/rm_platter.h
+++ b/usr/src/uts/i86pc/sys/rm_platter.h
@@ -22,12 +22,14 @@
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#ifndef _SYS_RM_PLATTER_H
#define _SYS_RM_PLATTER_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/tss.h>
#include <sys/segments.h>
@@ -36,8 +38,12 @@
extern "C" {
#endif
+#define RM_PLATTER_CODE_SIZE 0x400
+#define RM_PLATTER_CPU_HALT_CODE_SIZE 0x100
+
typedef struct rm_platter {
- char rm_code[1024];
+ char rm_code[RM_PLATTER_CODE_SIZE];
+ char rm_cpu_halt_code[RM_PLATTER_CPU_HALT_CODE_SIZE];
#if defined(__amd64)
/*
* The compiler will want to 64-bit align the 64-bit rm_gdt_base
@@ -59,7 +65,7 @@ typedef struct rm_platter {
*/
uint32_t rm_idt_pad;
#endif /* __amd64 */
- ushort_t rm_filler2; /* till I am sure that pragma works */
+ ushort_t rm_cpu_halted; /* non-zero if CPU has been halted */
ushort_t rm_idt_lim; /* stuff for lidt */
gate_desc_t *rm_idt_base;
uint_t rm_pdbr; /* cr3 value */
diff --git a/usr/src/uts/i86pc/sys/sbd_ioctl.h b/usr/src/uts/i86pc/sys/sbd_ioctl.h
new file mode 100644
index 0000000000..1eab543aa3
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/sbd_ioctl.h
@@ -0,0 +1,655 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SBD_IOCTL_H
+#define _SBD_IOCTL_H
+
+#ifndef _ASM
+#include <sys/types.h>
+#include <sys/obpdefs.h>
+#include <sys/processor.h>
+#include <sys/param.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _ASM
+typedef enum {
+ SBD_COMP_NONE,
+ SBD_COMP_CPU,
+ SBD_COMP_MEM,
+ SBD_COMP_IO,
+ SBD_COMP_CMP,
+ SBD_COMP_UNKNOWN
+} sbd_comp_type_t;
+
+typedef enum {
+ SBD_STAT_NONE = 0,
+ SBD_STAT_EMPTY,
+ SBD_STAT_DISCONNECTED,
+ SBD_STAT_CONNECTED,
+ SBD_STAT_UNCONFIGURED,
+ SBD_STAT_CONFIGURED
+} sbd_state_t;
+
+typedef enum {
+ SBD_COND_UNKNOWN = 0,
+ SBD_COND_OK,
+ SBD_COND_FAILING,
+ SBD_COND_FAILED,
+ SBD_COND_UNUSABLE
+} sbd_cond_t;
+
+typedef int sbd_busy_t;
+
+#define SBD_MAX_UNSAFE 16
+#define SBD_TYPE_LEN 12
+#define SBD_NULL_UNIT -1
+
+typedef struct {
+ sbd_comp_type_t c_type;
+ int c_unit;
+ char c_name[OBP_MAXPROPNAME];
+} sbd_comp_id_t;
+
+typedef struct {
+ sbd_comp_id_t c_id;
+ sbd_state_t c_ostate;
+ sbd_cond_t c_cond;
+ sbd_busy_t c_busy;
+ uint_t c_sflags;
+ time_t c_time;
+} sbd_cm_stat_t;
+
+#define ci_type c_id.c_type
+#define ci_unit c_id.c_unit
+#define ci_name c_id.c_name
+
+typedef struct {
+ sbd_cm_stat_t cs_cm;
+ int cs_isbootproc;
+ processorid_t cs_cpuid;
+ int cs_speed;
+ int cs_ecache;
+} sbd_cpu_stat_t;
+
+#define cs_type cs_cm.ci_type
+#define cs_unit cs_cm.ci_unit
+#define cs_name cs_cm.ci_name
+#define cs_ostate cs_cm.c_ostate
+#define cs_cond cs_cm.c_cond
+#define cs_busy cs_cm.c_busy
+#define cs_suspend cs_cm.c_sflags
+#define cs_time cs_cm.c_time
+
+typedef struct {
+ sbd_cm_stat_t ms_cm;
+ int ms_interleave;
+ pfn_t ms_basepfn;
+ pgcnt_t ms_totpages;
+ pgcnt_t ms_detpages;
+ pgcnt_t ms_pageslost;
+ pgcnt_t ms_managed_pages;
+ pgcnt_t ms_noreloc_pages;
+ pgcnt_t ms_noreloc_first;
+ pgcnt_t ms_noreloc_last;
+ int ms_cage_enabled;
+ int ms_peer_is_target; /* else peer is source */
+ char ms_peer_ap_id[MAXPATHLEN]; /* board's AP name */
+} sbd_mem_stat_t;
+
+#define ms_type ms_cm.ci_type
+#define ms_unit ms_cm.ci_unit
+#define ms_name ms_cm.ci_name
+#define ms_ostate ms_cm.c_ostate
+#define ms_cond ms_cm.c_cond
+#define ms_busy ms_cm.c_busy
+#define ms_suspend ms_cm.c_sflags
+#define ms_time ms_cm.c_time
+
+typedef struct {
+ sbd_cm_stat_t is_cm;
+ int is_referenced;
+ int is_unsafe_count;
+ int is_unsafe_list[SBD_MAX_UNSAFE];
+ char is_pathname[MAXPATHLEN];
+} sbd_io_stat_t;
+
+#define is_type is_cm.ci_type
+#define is_unit is_cm.ci_unit
+#define is_name is_cm.ci_name
+#define is_ostate is_cm.c_ostate
+#define is_cond is_cm.c_cond
+#define is_busy is_cm.c_busy
+#define is_suspend is_cm.c_sflags
+#define is_time is_cm.c_time
+
+/* This constant must be the max of the max cores on all platforms */
+#define SBD_MAX_CORES_PER_CMP 64
+
+typedef struct {
+ sbd_cm_stat_t ps_cm;
+ processorid_t ps_cpuid[SBD_MAX_CORES_PER_CMP];
+ int ps_ncores;
+ int ps_speed;
+ int ps_ecache;
+} sbd_cmp_stat_t;
+
+#define ps_type ps_cm.ci_type
+#define ps_unit ps_cm.ci_unit
+#define ps_name ps_cm.ci_name
+#define ps_ostate ps_cm.c_ostate
+#define ps_cond ps_cm.c_cond
+#define ps_busy ps_cm.c_busy
+#define ps_suspend ps_cm.c_sflags
+#define ps_time ps_cm.c_time
+
+typedef union {
+ sbd_cm_stat_t d_cm;
+ sbd_cpu_stat_t d_cpu;
+ sbd_mem_stat_t d_mem;
+ sbd_io_stat_t d_io;
+ sbd_cmp_stat_t d_cmp;
+} sbd_dev_stat_t;
+
+#define ds_type d_cm.ci_type
+#define ds_unit d_cm.ci_unit
+#define ds_name d_cm.ci_name
+#define ds_ostate d_cm.c_ostate
+#define ds_cond d_cm.c_cond
+#define ds_busy d_cm.c_busy
+#define ds_suspend d_cm.c_sflags
+#define ds_time d_cm.c_time
+
+#define SBD_MAX_INFO 256
+
+typedef struct {
+ int s_board;
+ char s_type[SBD_TYPE_LEN];
+ char s_info[SBD_MAX_INFO];
+ sbd_state_t s_rstate;
+ sbd_state_t s_ostate;
+ sbd_cond_t s_cond;
+ sbd_busy_t s_busy;
+ time_t s_time;
+ uint_t s_power:1;
+ uint_t s_assigned:1;
+ uint_t s_platopts;
+ int s_nstat;
+ sbd_dev_stat_t s_stat[1];
+} sbd_stat_t;
+
+typedef struct {
+ sbd_comp_id_t c_id;
+ uint_t c_flags;
+ int c_len;
+ caddr_t c_opts;
+} sbd_cm_cmd_t;
+
+typedef struct {
+ sbd_cm_cmd_t g_cm;
+ int g_ncm;
+} sbd_getncm_cmd_t;
+
+typedef struct {
+ sbd_cm_cmd_t s_cm;
+ int s_nbytes;
+ caddr_t s_statp;
+} sbd_stat_cmd_t;
+
+typedef union {
+ sbd_cm_cmd_t cmd_cm;
+ sbd_getncm_cmd_t cmd_getncm;
+ sbd_stat_cmd_t cmd_stat;
+} sbd_cmd_t;
+
+typedef struct {
+ int e_code;
+ char e_rsc[MAXPATHLEN];
+} sbd_error_t;
+
+typedef struct {
+ sbd_cmd_t i_cmd;
+ sbd_error_t i_err;
+} sbd_ioctl_arg_t;
+
+typedef struct {
+ int t_base;
+ int t_bnd;
+ char **t_text;
+} sbd_etab_t;
+
+#define i_flags i_cmd.cmd_cm.c_flags
+#define i_len i_cmd.cmd_cm.c_len
+#define i_opts i_cmd.cmd_cm.c_opts
+#define ic_type i_cmd.cmd_cm.ci_type
+#define ic_name i_cmd.cmd_cm.ci_name
+#define ic_unit i_cmd.cmd_cm.ci_unit
+#define ie_code i_err.e_code
+#define ie_rsc i_err.e_rsc
+
+#define _SBD_IOC (('D' << 16) | ('R' << 8))
+
+#define SBD_CMD_ASSIGN (_SBD_IOC | 0x01)
+#define SBD_CMD_UNASSIGN (_SBD_IOC | 0x02)
+#define SBD_CMD_POWERON (_SBD_IOC | 0x03)
+#define SBD_CMD_POWEROFF (_SBD_IOC | 0x04)
+#define SBD_CMD_TEST (_SBD_IOC | 0x05)
+#define SBD_CMD_CONNECT (_SBD_IOC | 0x06)
+#define SBD_CMD_CONFIGURE (_SBD_IOC | 0x07)
+#define SBD_CMD_UNCONFIGURE (_SBD_IOC | 0x08)
+#define SBD_CMD_DISCONNECT (_SBD_IOC | 0x09)
+#define SBD_CMD_STATUS (_SBD_IOC | 0x0a)
+#define SBD_CMD_GETNCM (_SBD_IOC | 0x0b)
+#define SBD_CMD_PASSTHRU (_SBD_IOC | 0x0c)
+
+#define SBD_CHECK_SUSPEND(cmd, c_sflags) \
+ (((c_sflags) >> (((cmd) & 0xf) - 1)) & 0x01)
+
+#define SBD_SET_SUSPEND(cmd, c_sflags) \
+ ((c_sflags) |= (0x01 << (((cmd) & 0xf) - 1)))
+
+#define SBD_CHECK_PLATOPTS(cmd, c_platopts) \
+ (((c_platopts) >> (((cmd) & 0xf) - 1)) & 0x01)
+
+#define SBD_SET_PLATOPTS(cmd, c_platopts) \
+ ((c_platopts) &= ~(0x01 << (((cmd) & 0xf) - 1)))
+
+#define SBD_FLAG_FORCE 0x1
+#define SBD_FLAG_ALLCMP 0x2
+#define SBD_FLAG_QUIESCE_OKAY 0x4
+
+#if defined(_SYSCALL32)
+
+typedef struct {
+ int32_t c_type;
+ int32_t c_unit;
+ char c_name[OBP_MAXPROPNAME];
+} sbd_comp_id32_t;
+
+typedef struct {
+ sbd_comp_id32_t c_id;
+ int32_t c_ostate;
+ int32_t c_cond;
+ int32_t c_busy;
+ uint32_t c_sflags;
+ time32_t c_time;
+} sbd_cm_stat32_t;
+
+typedef struct {
+ sbd_cm_stat32_t cs_cm;
+ int32_t cs_isbootproc;
+ int32_t cs_cpuid;
+ int32_t cs_speed;
+ int32_t cs_ecache;
+} sbd_cpu_stat32_t;
+
+typedef struct {
+ sbd_cm_stat32_t ms_cm;
+ int32_t ms_interleave;
+ uint32_t ms_basepfn;
+ uint32_t ms_totpages;
+ uint32_t ms_detpages;
+ int32_t ms_pageslost;
+ uint32_t ms_managed_pages;
+ uint32_t ms_noreloc_pages;
+ uint32_t ms_noreloc_first;
+ uint32_t ms_noreloc_last;
+ int32_t ms_cage_enabled;
+ int32_t ms_peer_is_target;
+ char ms_peer_ap_id[MAXPATHLEN];
+} sbd_mem_stat32_t;
+
+typedef struct {
+ sbd_cm_stat32_t is_cm;
+ int32_t is_referenced;
+ int32_t is_unsafe_count;
+ int32_t is_unsafe_list[SBD_MAX_UNSAFE];
+ char is_pathname[MAXPATHLEN];
+} sbd_io_stat32_t;
+
+typedef struct {
+ sbd_cm_stat32_t ps_cm;
+ int32_t ps_cpuid[SBD_MAX_CORES_PER_CMP];
+ int32_t ps_ncores;
+ int32_t ps_speed;
+ int32_t ps_ecache;
+} sbd_cmp_stat32_t;
+
+typedef union {
+ sbd_cm_stat32_t d_cm;
+ sbd_cpu_stat32_t d_cpu;
+ sbd_mem_stat32_t d_mem;
+ sbd_io_stat32_t d_io;
+ sbd_cmp_stat32_t d_cmp;
+} sbd_dev_stat32_t;
+
+typedef struct {
+ int32_t s_board;
+ char s_type[SBD_TYPE_LEN];
+ char s_info[SBD_MAX_INFO];
+ int32_t s_rstate;
+ int32_t s_ostate;
+ int32_t s_cond;
+ int32_t s_busy;
+ time32_t s_time;
+ uint32_t s_power:1;
+ uint32_t s_assigned:1;
+ uint32_t s_platopts;
+ int32_t s_nstat;
+ sbd_dev_stat32_t s_stat[1];
+} sbd_stat32_t;
+
+typedef struct {
+ int32_t e_code;
+ char e_rsc[MAXPATHLEN];
+} sbd_error32_t;
+
+typedef struct {
+ sbd_comp_id32_t c_id;
+ uint32_t c_flags;
+ int32_t c_len;
+ caddr32_t c_opts;
+} sbd_cm_cmd32_t;
+
+typedef struct {
+ sbd_cm_cmd32_t g_cm;
+ int32_t g_ncm;
+} sbd_getncm_cmd32_t;
+
+typedef struct {
+ sbd_cm_cmd32_t s_cm;
+ int32_t s_nbytes;
+ caddr32_t s_statp;
+} sbd_stat_cmd32_t;
+
+typedef union {
+ sbd_cm_cmd32_t cmd_cm;
+ sbd_getncm_cmd32_t cmd_getncm;
+ sbd_stat_cmd32_t cmd_stat;
+} sbd_cmd32_t;
+
+typedef struct {
+ sbd_cmd32_t i_cmd;
+ sbd_error32_t i_err;
+} sbd_ioctl_arg32_t;
+
+typedef struct {
+ int32_t t_base;
+ int32_t t_bnd;
+ char **t_text;
+} sbd_etab32_t;
+
+#endif /* _SYSCALL32 */
+#endif /* _ASM */
+
+/* Common error codes */
+
+#define ESBD_NOERROR 0 /* no error */
+#define ESBD_INTERNAL 1 /* Internal error */
+#define ESBD_NOMEM 2 /* Insufficient memory */
+#define ESBD_PROTO 3 /* Protocol error */
+#define ESBD_BUSY 4 /* Device busy */
+#define ESBD_NODEV 5 /* No such device */
+#define ESBD_ALREADY 6 /* Operation already in progress */
+#define ESBD_IO 7 /* I/O error */
+#define ESBD_FAULT 8 /* Bad address */
+#define ESBD_EMPTY_BD 9 /* No device(s) on board */
+#define ESBD_INVAL 10 /* Invalid argument */
+#define ESBD_STATE 11 /* Invalid state transition */
+#define ESBD_FATAL_STATE 12 /* Device in fatal state */
+#define ESBD_OUTSTANDING 13 /* Outstanding error */
+#define ESBD_SUSPEND 14 /* Device failed to suspend */
+#define ESBD_RESUME 15 /* Device failed to resume */
+#define ESBD_UTHREAD 16 /* Cannot stop user thread */
+#define ESBD_RTTHREAD 17 /* Cannot quiesce realtime thread */
+#define ESBD_KTHREAD 18 /* Cannot stop kernel thread */
+#define ESBD_OFFLINE 19 /* Failed to off-line */
+#define ESBD_ONLINE 20 /* Failed to on-line */
+#define ESBD_CPUSTART 21 /* Failed to start CPU */
+#define ESBD_CPUSTOP 22 /* Failed to stop CPU */
+#define ESBD_INVAL_COMP 23 /* Invalid component type */
+#define ESBD_KCAGE_OFF 24 /* Kernel cage is disabled */
+#define ESBD_NO_TARGET 25 /* No available memory target */
+#define ESBD_HW_PROGRAM 26 /* Hardware programming error */
+#define ESBD_MEM_NOTVIABLE 27 /* VM viability test failed */
+#define ESBD_MEM_REFUSED 28 /* Memory operation refused */
+#define ESBD_MEM_NONRELOC 29 /* Non-relocatable pages in span */
+#define ESBD_MEM_CANCELLED 30 /* Memory operation cancelled */
+#define ESBD_MEMFAIL 31 /* Memory operation failed */
+#define ESBD_MEMONLINE 32 /* Can't unconfig cpu if mem online */
+#define ESBD_QUIESCE_REQD 33
+ /* Operator confirmation for quiesce is required */
+#define ESBD_MEMINTLV 34
+ /* Memory is interleaved across boards */
+#define ESBD_CPUONLINE 35
+ /* Can't config memory if not all cpus are online */
+#define ESBD_UNSAFE 36 /* Unsafe driver present */
+#define ESBD_INVAL_OPT 37 /* option invalid */
+
+/* Starcat error codes */
+
+#define ESTC_NONE 1000 /* No error */
+#define ESTC_GETPROP 1001 /* Cannot read property value */
+#define ESTC_BNUM 1002 /* Invalid board number */
+#define ESTC_CONFIGBUSY 1003
+ /* Cannot proceed; Board is configured or busy */
+#define ESTC_PROBE 1004 /* Solaris failed to probe */
+#define ESTC_DEPROBE 1005 /* Solaris failed to deprobe */
+#define ESTC_MOVESIGB 1006 /* Firmware move-cpu0 failed */
+#define ESTC_SUPPORT 1007 /* Operation not supported */
+#define ESTC_DRVFAIL 1008 /* Device driver failure */
+#define ESTC_UNKPTCMD 1012 /* Unrecognized platform command */
+#define ESTC_NOTID 1013
+ /* drmach parameter is not a valid ID */
+#define ESTC_INAPPROP 1014
+ /* drmach parameter is inappropriate for operation */
+#define ESTC_INTERNAL 1015 /* Unexpected internal condition */
+#define ESTC_MBXRQST 1016
+ /* Mailbox framework failure: outgoing */
+#define ESTC_MBXRPLY 1017
+ /* Mailbox framework failure: incoming */
+#define ESTC_NOACL 1018 /* Board is not in domain ACL */
+#define ESTC_NOT_ASSIGNED 1019 /* Board is not assigned to domain */
+#define ESTC_NOT_ACTIVE 1020 /* Board is not active */
+#define ESTC_EMPTY_SLOT 1021 /* Slot is empty */
+#define ESTC_POWER_OFF 1022 /* Board is powered off */
+#define ESTC_TEST_IN_PROGRESS 1023 /* Board is already being tested */
+#define ESTC_TESTING_BUSY 1024
+ /* Wait: All SC test resources are in use */
+#define ESTC_TEST_REQUIRED 1025 /* Board requires test prior to use */
+#define ESTC_TEST_ABORTED 1026 /* Board test has been aborted */
+#define ESTC_MBOX_UNKNOWN 1027
+ /* Unknown error type received from SC */
+#define ESTC_TEST_STATUS_UNKNOWN 1028
+ /* Test completed with unknown status */
+#define ESTC_TEST_RESULT_UNKNOWN 1029
+ /* Unknown test result returned by SC */
+#define ESTC_TEST_FAILED 1030
+ /* SMS hpost reported error, see POST log for details */
+#define ESTC_UNAVAILABLE 1031 /* Slot is unavailable to the domain */
+#define ESTC_NZ_LPA 1032 /* Nonzero LPA not yet supported */
+#define ESTC_IOSWITCH 1033
+ /* Cannot unconfigure I/O board: tunnel switch failed */
+#define ESTC_IOCAGE_NO_CPU_AVAIL 1034
+ /* No CPU available for I/O cage test. */
+#define ESTC_SMS_ERR_RECOVERABLE 1035
+ /* SMS reported recoverable error: check SMS status and Retry */
+#define ESTC_SMS_ERR_UNRECOVERABLE 1036
+ /* SMS reported unrecoverable error: Board is Unusable */
+#define ESTC_NWSWITCH 1037
+ /* Cannot unconfigure I/O board: network switch failed */
+
+/* Starfire error codes */
+
+#define ESTF_NONE 2000 /* No error */
+#define ESTF_GETPROP 2001 /* Cannot read property value */
+#define ESTF_GETPROPLEN 2002 /* Cannot determine property length */
+#define ESTF_BNUM 2003 /* Invalid board number */
+#define ESTF_CONFIGBUSY 2004
+ /* Cannot proceed; Board is configured or busy */
+#define ESTF_NOCPUID 2005 /* No CPU specified for connect */
+#define ESTF_PROBE 2006 /* Firmware probe failed */
+#define ESTF_DEPROBE 2007 /* Firmware deprobe failed */
+#define ESTF_MOVESIGB 2008 /* Firmware move-cpu0 failed */
+#define ESTF_JUGGLE 2009 /* Cannot move SIGB assignment */
+#define ESTF_HASSIGB 2010
+ /* Cannot disconnect CPU; SIGB is currently assigned */
+#define ESTF_SUPPORT 2011 /* Operation not supported */
+#define ESTF_DRVFAIL 2012 /* Device driver failure */
+#define ESTF_SETCPUVAL 2013
+ /* Must specify a CPU on the given board */
+#define ESTF_NODEV 2014 /* No such device */
+#define ESTF_INTERBOARD 2015
+ /* Memory configured with inter-board interleaving */
+#define ESTF_UNKPTCMD 2016 /* Unrecognized platform command */
+#define ESTF_NOTID 2017 /* drmach parameter is not a valid ID */
+#define ESTF_INAPPROP 2018
+ /* drmach parameter is inappropriate for operation */
+#define ESTF_INTERNAL 2019 /* Unexpected internal condition */
+
+/* Daktari error codes */
+
+#define EDAK_NONE 3000 /* no error */
+#define EDAK_INTERNAL 3001 /* Internal error */
+#define EDAK_NOFRUINFO 3002 /* Didn't receive fru info */
+#define EDAK_NONDR_BOARD 3003
+ /* DR is not supported on this board type */
+#define EDAK_POWERON 3004 /* Power on request failed */
+#define EDAK_POWEROK 3005 /* Failed to power on */
+#define EDAK_INTERRUPTED 3006 /* Operation interrupted */
+#define EDAK_BOARDINIT 3007 /* Board initialization failed */
+#define EDAK_CPUINIT 3008 /* CPU intialization failed */
+#define EDAK_MEMFAIL 3009 /* Memory operation failed */
+
+/* Serengeti error codes */
+
+#define ESGT_NONE 4000 /* no error */
+#define ESGT_INTERNAL 4001 /* Internal error */
+#define ESGT_INVAL 4002 /* Invalid argument */
+#define ESGT_MEMFAIL 4003 /* Memory operation failed */
+#define ESGT_PROBE 4004 /* Board probe failed */
+#define ESGT_DEPROBE 4005 /* Board deprobe failed */
+#define ESGT_JUGGLE_BOOTPROC 4006 /* Failed to juggle bootproc */
+#define ESGT_NOT_CPUTYPE 4007 /* Not a cpu device */
+#define ESGT_NO_DEV_TYPE 4008 /* Cannot find device type */
+#define ESGT_BAD_PORTID 4009 /* Bad port id */
+#define ESGT_RESUME 4010 /* Failed to resume device */
+#define ESGT_SUSPEND 4011 /* Failed to suspend device */
+#define ESGT_KTHREAD 4012 /* failed to stop kernel thd */
+#define ESGT_UNSAFE 4013 /* unsafe */
+#define ESGT_RTTHREAD 4014 /* real time threads */
+#define ESGT_UTHREAD 4015 /* failed to stop user thd */
+#define ESGT_PROM_ATTACH 4016 /* prom failed attach board */
+#define ESGT_PROM_DETACH 4017 /* prom failed detach board */
+#define ESGT_SC_ERR 4018 /* sc return a failure */
+#define ESGT_GET_BOARD_STAT 4019 /* Failed to obtain board information */
+#define ESGT_WAKEUPCPU 4020 /* Failed to wake up cpu */
+#define ESGT_STOPCPU 4021 /* Failed to stop cpu */
+/* Serengeti SC return codes */
+#define ESGT_HW_FAIL 4022 /* Hardware Failure */
+#define ESGT_BD_ACCESS 4023 /* Board access denied */
+#define ESGT_STALE_CMP 4024 /* Stale components */
+#define ESGT_STALE_OBJ 4025 /* Stale objects */
+#define ESGT_NO_SEPROM_SPACE 4026 /* No SEPROM space */
+#define ESGT_NOT_SUPP 4027 /* Operation not supported */
+#define ESGT_NO_MEM 4028 /* No Memory */
+
+/* OPL error codes */
+
+#define EOPL_GETPROP 5001 /* Cannot read property value */
+#define EOPL_BNUM 5002 /* Invalid board number */
+#define EOPL_CONFIGBUSY 5003
+ /* Cannot proceed; Board is configured or busy */
+#define EOPL_PROBE 5004 /* Firmware probe failed */
+#define EOPL_DEPROBE 5005 /* Firmware deprobe failed */
+#define EOPL_SUPPORT 5006 /* Operation not supported */
+#define EOPL_DRVFAIL 5007 /* Device driver failure */
+#define EOPL_UNKPTCMD 5008 /* Unrecognized platform command */
+#define EOPL_NOTID 5009 /* drmach parameter is not a valid ID */
+#define EOPL_INAPPROP 5010
+ /* drmach parameter is inappropriate for operation */
+#define EOPL_INTERNAL 5011 /* Unexpected internal condition */
+#define EOPL_FINDDEVICE 5012 /* Firmware cannot find node. */
+#define EOPL_MC_SETUP 5013 /* Cannot setup memory node */
+#define EOPL_CPU_STATE 5014 /* Invalid CPU/core state */
+#define EOPL_MC_OPL 5015 /* Cannot find mc-opl interface */
+#define EOPL_SCF_FMEM 5016 /* Cannot find scf_fmem interface */
+#define EOPL_FMEM_SETUP 5017 /* Error setting up FMEM buffer */
+#define EOPL_SCF_FMEM_START 5018 /* scf_fmem_start error */
+#define EOPL_FMEM_ERROR 5019 /* FMEM error */
+#define EOPL_SCF_FMEM_CANCEL 5020 /* scf_fmem_cancel error */
+#define EOPL_FMEM_XC_TIMEOUT 5021 /* xcall timeout */
+#define EOPL_FMEM_COPY_TIMEOUT 5022 /* DR parellel copy timeout */
+#define EOPL_FMEM_SCF_BUSY 5023 /* SCF busy */
+#define EOPL_FMEM_RETRY_OUT 5024 /* SCF IO Retry Error */
+#define EOPL_FMEM_TIMEOUT 5025 /* FMEM command timeout */
+#define EOPL_FMEM_HW_ERROR 5026 /* Hardware error */
+#define EOPL_FMEM_TERMINATE 5027 /* FMEM operation Terminated */
+#define EOPL_FMEM_COPY_ERROR 5028 /* Memory copy error */
+#define EOPL_FMEM_SCF_ERR 5029 /* SCF error */
+#define EOPL_MIXED_CPU 5030
+ /* Cannot add SPARC64-VI to domain booted with all SPARC64-VII CPUs */
+#define EOPL_FMEM_SCF_OFFLINE 5031 /* SCF OFFLINE */
+
+/* X86 error codes */
+
+#define EX86_GETPROP 10001 /* Cannot read property value */
+#define EX86_BNUM 10002 /* Invalid board number */
+#define EX86_NOTID 10003 /* drmach parameter is not a valid ID */
+#define EX86_INAPPROP 10004
+ /* drmach parameter is inappropriate for operation */
+#define EX86_PROBE 10005 /* Firmware probe failed */
+#define EX86_DEPROBE 10006 /* Firmware deprobe failed */
+#define EX86_SUPPORT 10007 /* Operation not supported */
+#define EX86_INTERNAL 10008 /* Unexpected internal condition */
+#define EX86_CONFIGBUSY 10009
+ /* Cannot proceed, board is configured or busy */
+#define EX86_POWERBUSY 10010 /* Cannot proceed, board is powered */
+#define EX86_CONNECTBUSY 10011 /* Cannot proceed, board is connected */
+#define EX86_INVALID_ARG 10012 /* Invalid argument */
+#define EX86_DRVFAIL 10013 /* Device driver failure */
+#define EX86_UNKPTCMD 10014 /* Unrecognized platform command */
+#define EX86_ALLOC_CPUID 10015 /* Failed to allocate processor id */
+#define EX86_FREE_CPUID 10016 /* Failed to release processor id */
+#define EX86_POWERON 10017 /* Failed to power on board */
+#define EX86_POWEROFF 10018 /* Failed to power off board */
+#define EX86_MC_SETUP 10019 /* Cannot setup memory node */
+#define EX86_ACPIWALK 10020 /* Cannot walk ACPI namespace */
+#define EX86_WALK_DEPENDENCY 10021
+ /* Failed to check dependency for board */
+#define EX86_IN_FAILURE 10022 /* Board is in failure state */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SBD_IOCTL_H */
diff --git a/usr/src/uts/i86pc/sys/x_call.h b/usr/src/uts/i86pc/sys/x_call.h
index ca778a4c21..65b5813b95 100644
--- a/usr/src/uts/i86pc/sys/x_call.h
+++ b/usr/src/uts/i86pc/sys/x_call.h
@@ -68,6 +68,8 @@ typedef struct xc_msg {
#if defined(_KERNEL)
extern void xc_init_cpu(struct cpu *);
+extern void xc_fini_cpu(struct cpu *);
+extern int xc_flush_cpu(struct cpu *);
extern uint_t xc_serv(caddr_t, caddr_t);
#define CPUSET2BV(set) ((ulong_t *)(void *)&(set))
diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c
index ade54caab2..6ff1216d76 100644
--- a/usr/src/uts/i86pc/vm/hat_i86.c
+++ b/usr/src/uts/i86pc/vm/hat_i86.c
@@ -22,6 +22,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
@@ -188,6 +192,10 @@ int pt_kern;
extern void atomic_orb(uchar_t *addr, uchar_t val);
extern void atomic_andb(uchar_t *addr, uchar_t val);
+#ifndef __xpv
+extern pfn_t memseg_get_start(struct memseg *);
+#endif
+
#define PP_GETRM(pp, rmmask) (pp->p_nrm & rmmask)
#define PP_ISMOD(pp) PP_GETRM(pp, P_MOD)
#define PP_ISREF(pp) PP_GETRM(pp, P_REF)
@@ -4050,8 +4058,8 @@ hat_cpu_offline(struct cpu *cpup)
ASSERT(cpup != CPU);
CPUSET_ATOMIC_DEL(khat_cpuset, cpup->cpu_id);
- x86pte_cpu_fini(cpup);
hat_vlp_teardown(cpup);
+ x86pte_cpu_fini(cpup);
}
/*
@@ -4363,7 +4371,96 @@ void
hat_kpm_mseghash_update(pgcnt_t inx, struct memseg *msp)
{}
-#ifdef __xpv
+#ifndef __xpv
+void
+hat_kpm_addmem_mseg_update(struct memseg *msp, pgcnt_t nkpmpgs,
+ offset_t kpm_pages_off)
+{
+ _NOTE(ARGUNUSED(nkpmpgs, kpm_pages_off));
+ pfn_t base, end;
+
+ /*
+ * kphysm_add_memory_dynamic() does not set nkpmpgs
+ * when page_t memory is externally allocated. That
+ * code must properly calculate nkpmpgs in all cases
+ * if nkpmpgs needs to be used at some point.
+ */
+
+ /*
+ * The meta (page_t) pages for dynamically added memory are allocated
+ * either from the incoming memory itself or from existing memory.
+ * In the former case the base of the incoming pages will be different
+ * than the base of the dynamic segment so call memseg_get_start() to
+ * get the actual base of the incoming memory for each case.
+ */
+
+ base = memseg_get_start(msp);
+ end = msp->pages_end;
+
+ hat_devload(kas.a_hat, kpm_vbase + mmu_ptob(base),
+ mmu_ptob(end - base), base, PROT_READ | PROT_WRITE,
+ HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
+}
+
+void
+hat_kpm_addmem_mseg_insert(struct memseg *msp)
+{
+ _NOTE(ARGUNUSED(msp));
+}
+
+void
+hat_kpm_addmem_memsegs_update(struct memseg *msp)
+{
+ _NOTE(ARGUNUSED(msp));
+}
+
+/*
+ * Return end of metadata for an already setup memseg.
+ * X86 platforms don't need per-page meta data to support kpm.
+ */
+caddr_t
+hat_kpm_mseg_reuse(struct memseg *msp)
+{
+ return ((caddr_t)msp->epages);
+}
+
+void
+hat_kpm_delmem_mseg_update(struct memseg *msp, struct memseg **mspp)
+{
+ _NOTE(ARGUNUSED(msp, mspp));
+ ASSERT(0);
+}
+
+void
+hat_kpm_split_mseg_update(struct memseg *msp, struct memseg **mspp,
+ struct memseg *lo, struct memseg *mid, struct memseg *hi)
+{
+ _NOTE(ARGUNUSED(msp, mspp, lo, mid, hi));
+ ASSERT(0);
+}
+
+/*
+ * Walk the memsegs chain, applying func to each memseg span.
+ */
+void
+hat_kpm_walk(void (*func)(void *, void *, size_t), void *arg)
+{
+ pfn_t pbase, pend;
+ void *base;
+ size_t size;
+ struct memseg *msp;
+
+ for (msp = memsegs; msp; msp = msp->next) {
+ pbase = msp->pages_base;
+ pend = msp->pages_end;
+ base = ptob(pbase) + kpm_vbase;
+ size = ptob(pend - pbase);
+ func(arg, base, size);
+ }
+}
+
+#else /* __xpv */
+
/*
* There are specific Hypervisor calls to establish and remove mappings
* to grant table references and the privcmd driver. We have to ensure
@@ -4406,5 +4503,5 @@ hat_release_mapping(hat_t *hat, caddr_t addr)
htable_release(ht);
htable_release(ht);
XPV_ALLOW_MIGRATE();
- }
-#endif
+}
+#endif /* __xpv */
diff --git a/usr/src/uts/i86pc/vm/vm_dep.h b/usr/src/uts/i86pc/vm/vm_dep.h
index ae2b6c12c1..753aa9d146 100644
--- a/usr/src/uts/i86pc/vm/vm_dep.h
+++ b/usr/src/uts/i86pc/vm/vm_dep.h
@@ -22,6 +22,10 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* UNIX machine dependent virtual memory support.
@@ -71,7 +75,14 @@ extern void plcnt_inc_dec(page_t *, int, int, long, int);
*/
#define PLCNT_XFER_NORELOC(pp)
-#define PLCNT_MODIFY_MAX(pfn, cnt) mtype_modify_max(pfn, (pgcnt_t)cnt)
+/*
+ * macro to modify the page list max counts when memory is added to
+ * the page lists during startup (add_physmem) or during a DR operation
+ * when memory is added (kphysm_add_memory_dynamic) or deleted
+ * (kphysm_del_cleanup).
+ */
+#define PLCNT_MODIFY_MAX(pfn, cnt) mtype_modify_max(pfn, cnt)
+
extern int memrange_num(pfn_t);
extern int pfn_2_mtype(pfn_t);
extern int mtype_func(int, int, uint_t);
@@ -82,7 +93,7 @@ extern int mnode_range_cnt(int);
/*
* candidate counters in vm_pagelist.c are indexed by color and range
*/
-#define NUM_MEM_RANGES 4 /* memory range types */
+#define NUM_MEM_RANGES 4 /* memory range types */
#define MAX_MNODE_MRANGES NUM_MEM_RANGES
#define MNODE_RANGE_CNT(mnode) mnode_range_cnt(mnode)
#define MNODE_MAX_MRANGE(mnode) memrange_num(mem_node_config[mnode].physbase)
@@ -161,6 +172,25 @@ extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int);
rw_exit(&page_ctrs_rwlock[(mnode)]); \
}
+/*
+ * macro to call page_ctrs_adjust() when memory is added
+ * during a DR operation.
+ */
+#define PAGE_CTRS_ADJUST(pfn, cnt, rv) { \
+ spgcnt_t _cnt = (spgcnt_t)(cnt); \
+ int _mn; \
+ pgcnt_t _np; \
+ pfn_t _pfn = (pfn); \
+ pfn_t _endpfn = _pfn + _cnt; \
+ while (_pfn < _endpfn) { \
+ _mn = PFN_2_MEM_NODE(_pfn); \
+ _np = MIN(_endpfn, mem_node_config[_mn].physmax + 1) - _pfn; \
+ _pfn += _np; \
+ if ((rv = page_ctrs_adjust(_mn)) != 0) \
+ break; \
+ } \
+}
+
#define PAGE_GET_COLOR_SHIFT(szc, nszc) \
(hw_page_array[(nszc)].hp_shift - hw_page_array[(szc)].hp_shift)
@@ -524,6 +554,7 @@ extern page_t *page_freelist_split(uchar_t,
uint_t, int, int, pfn_t, pfn_t, page_list_walker_t *);
extern page_t *page_freelist_coalesce(int, uchar_t, uint_t, uint_t, int,
pfn_t);
+extern void page_freelist_coalesce_all(int);
extern uint_t page_get_pagecolors(uint_t);
extern void pfnzero(pfn_t, uint_t, uint_t);
diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c
index 63c8129ddf..79c0ee073e 100644
--- a/usr/src/uts/i86pc/vm/vm_machdep.c
+++ b/usr/src/uts/i86pc/vm/vm_machdep.c
@@ -22,6 +22,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
@@ -127,7 +131,6 @@ int largepagesupport = 0;
extern uint_t page_create_new;
extern uint_t page_create_exists;
extern uint_t page_create_putbacks;
-extern uint_t page_create_putbacks;
/*
* Allow users to disable the kernel's use of SSE.
*/
@@ -142,6 +145,8 @@ typedef struct {
pfn_t mnr_pfnhi;
int mnr_mnode;
int mnr_memrange; /* index into memranges[] */
+ int mnr_next; /* next lower PA mnoderange */
+ int mnr_exists;
/* maintain page list stats */
pgcnt_t mnr_mt_clpgcnt; /* cache list cnt */
pgcnt_t mnr_mt_flpgcnt[MMU_PAGE_SIZES]; /* free list cnt per szc */
@@ -175,6 +180,11 @@ typedef struct {
*/
#define PFN_4GIG 0x100000
#define PFN_16MEG 0x1000
+/* Indices into the memory range (arch_memranges) array. */
+#define MRI_4G 0
+#define MRI_2G 1
+#define MRI_16M 2
+#define MRI_0 3
static pfn_t arch_memranges[NUM_MEM_RANGES] = {
PFN_4GIG, /* pfn range for 4G and above */
0x80000, /* pfn range for 2G-4G */
@@ -191,6 +201,8 @@ int nranges = NUM_MEM_RANGES;
mnoderange_t *mnoderanges;
int mnoderangecnt;
int mtype4g;
+int mtype16m;
+int mtypetop; /* index of highest pfn'ed mnoderange */
/*
* 4g memory management variables for systems with more than 4g of memory:
@@ -220,7 +232,6 @@ int mtype4g;
* for requests that don't specifically require it.
*/
-#define LOTSFREE4G (maxmem4g >> lotsfree4gshift)
#define DESFREE4G (maxmem4g >> desfree4gshift)
#define RESTRICT4G_ALLOC \
@@ -230,7 +241,6 @@ static pgcnt_t maxmem4g;
static pgcnt_t freemem4g;
static int physmax4g;
static int desfree4gshift = 4; /* maxmem4g shift to derive DESFREE4G */
-static int lotsfree4gshift = 3;
/*
* 16m memory management:
@@ -246,7 +256,7 @@ static int lotsfree4gshift = 3;
* are not restricted.
*/
-#define FREEMEM16M MTYPE_FREEMEM(0)
+#define FREEMEM16M MTYPE_FREEMEM(mtype16m)
#define DESFREE16M desfree16m
#define RESTRICT16M_ALLOC(freemem, pgcnt, flags) \
((freemem != 0) && ((flags & PG_PANIC) == 0) && \
@@ -337,6 +347,9 @@ hw_pagesize_t hw_page_array[MAX_NUM_LEVEL + 1];
kmutex_t *fpc_mutex[NPC_MUTEX];
kmutex_t *cpc_mutex[NPC_MUTEX];
+/* Lock to protect mnoderanges array for memory DR operations. */
+static kmutex_t mnoderange_lock;
+
/*
* Only let one thread at a time try to coalesce large pages, to
* prevent them from working against each other.
@@ -951,7 +964,8 @@ pfn_2_mtype(pfn_t pfn)
#else
int n;
- for (n = mnoderangecnt - 1; n >= 0; n--) {
+ /* Always start from highest pfn and work our way down */
+ for (n = mtypetop; n != -1; n = mnoderanges[n].mnr_next) {
if (pfn >= mnoderanges[n].mnr_pfnlo) {
break;
}
@@ -1063,7 +1077,7 @@ retry:
* verify that pages being returned from allocator have correct DMA attribute
*/
#ifndef DEBUG
-#define check_dma(a, b, c) (0)
+#define check_dma(a, b, c) (void)(0)
#else
static void
check_dma(ddi_dma_attr_t *dma_attr, page_t *pp, int cnt)
@@ -1253,7 +1267,12 @@ mnode_range_cnt(int mnode)
void
mnode_range_setup(mnoderange_t *mnoderanges)
{
+ mnoderange_t *mp = mnoderanges;
int mnode, mri;
+ int mindex = 0; /* current index into mnoderanges array */
+ int i, j;
+ pfn_t hipfn;
+ int last, hi;
for (mnode = 0; mnode < max_mem_nodes; mnode++) {
if (mem_node_config[mnode].exists == 0)
@@ -1272,26 +1291,168 @@ mnode_range_setup(mnoderange_t *mnoderanges)
mem_node_config[mnode].physmax);
mnoderanges->mnr_mnode = mnode;
mnoderanges->mnr_memrange = mri;
+ mnoderanges->mnr_exists = 1;
mnoderanges++;
+ mindex++;
if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
mri--;
else
break;
}
}
+
+ /*
+ * For now do a simple sort of the mnoderanges array to fill in
+ * the mnr_next fields. Since mindex is expected to be relatively
+ * small, using a simple O(N^2) algorithm.
+ */
+ for (i = 0; i < mindex; i++) {
+ if (mp[i].mnr_pfnlo == 0) /* find lowest */
+ break;
+ }
+ ASSERT(i < mindex);
+ last = i;
+ mtype16m = last;
+ mp[last].mnr_next = -1;
+ for (i = 0; i < mindex - 1; i++) {
+ hipfn = (pfn_t)(-1);
+ hi = -1;
+ /* find next highest mnode range */
+ for (j = 0; j < mindex; j++) {
+ if (mp[j].mnr_pfnlo > mp[last].mnr_pfnlo &&
+ mp[j].mnr_pfnlo < hipfn) {
+ hipfn = mp[j].mnr_pfnlo;
+ hi = j;
+ }
+ }
+ mp[hi].mnr_next = last;
+ last = hi;
+ }
+ mtypetop = last;
}
+#ifndef __xpv
+/*
+ * Update mnoderanges for memory hot-add DR operations.
+ */
+static void
+mnode_range_add(int mnode)
+{
+ int *prev;
+ int n, mri;
+ pfn_t start, end;
+ extern void membar_sync(void);
+
+ ASSERT(0 <= mnode && mnode < max_mem_nodes);
+ ASSERT(mem_node_config[mnode].exists);
+ start = mem_node_config[mnode].physbase;
+ end = mem_node_config[mnode].physmax;
+ ASSERT(start <= end);
+ mutex_enter(&mnoderange_lock);
+
+#ifdef DEBUG
+ /* Check whether it interleaves with other memory nodes. */
+ for (n = mtypetop; n != -1; n = mnoderanges[n].mnr_next) {
+ ASSERT(mnoderanges[n].mnr_exists);
+ if (mnoderanges[n].mnr_mnode == mnode)
+ continue;
+ ASSERT(start > mnoderanges[n].mnr_pfnhi ||
+ end < mnoderanges[n].mnr_pfnlo);
+ }
+#endif /* DEBUG */
+
+ mri = nranges - 1;
+ while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase)
+ mri--;
+ while (mri >= 0 && mem_node_config[mnode].physmax >= MEMRANGELO(mri)) {
+ /* Check whether mtype already exists. */
+ for (n = mtypetop; n != -1; n = mnoderanges[n].mnr_next) {
+ if (mnoderanges[n].mnr_mnode == mnode &&
+ mnoderanges[n].mnr_memrange == mri) {
+ mnoderanges[n].mnr_pfnlo = MAX(MEMRANGELO(mri),
+ start);
+ mnoderanges[n].mnr_pfnhi = MIN(MEMRANGEHI(mri),
+ end);
+ break;
+ }
+ }
+
+ /* Add a new entry if it doesn't exist yet. */
+ if (n == -1) {
+ /* Try to find an unused entry in mnoderanges array. */
+ for (n = 0; n < mnoderangecnt; n++) {
+ if (mnoderanges[n].mnr_exists == 0)
+ break;
+ }
+ ASSERT(n < mnoderangecnt);
+ mnoderanges[n].mnr_pfnlo = MAX(MEMRANGELO(mri), start);
+ mnoderanges[n].mnr_pfnhi = MIN(MEMRANGEHI(mri), end);
+ mnoderanges[n].mnr_mnode = mnode;
+ mnoderanges[n].mnr_memrange = mri;
+ mnoderanges[n].mnr_exists = 1;
+ /* Page 0 should always be present. */
+ for (prev = &mtypetop;
+ mnoderanges[*prev].mnr_pfnlo > start;
+ prev = &mnoderanges[*prev].mnr_next) {
+ ASSERT(mnoderanges[*prev].mnr_next >= 0);
+ ASSERT(mnoderanges[*prev].mnr_pfnlo > end);
+ }
+ mnoderanges[n].mnr_next = *prev;
+ membar_sync();
+ *prev = n;
+ }
+
+ if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
+ mri--;
+ else
+ break;
+ }
+
+ mutex_exit(&mnoderange_lock);
+}
+
+/*
+ * Update mnoderanges for memory hot-removal DR operations.
+ */
+static void
+mnode_range_del(int mnode)
+{
+ _NOTE(ARGUNUSED(mnode));
+ ASSERT(0 <= mnode && mnode < max_mem_nodes);
+ /* TODO: support deletion operation. */
+ ASSERT(0);
+}
+
+void
+plat_slice_add(pfn_t start, pfn_t end)
+{
+ mem_node_add_slice(start, end);
+ if (plat_dr_enabled()) {
+ mnode_range_add(PFN_2_MEM_NODE(start));
+ }
+}
+
+void
+plat_slice_del(pfn_t start, pfn_t end)
+{
+ ASSERT(PFN_2_MEM_NODE(start) == PFN_2_MEM_NODE(end));
+ ASSERT(plat_dr_enabled());
+ mnode_range_del(PFN_2_MEM_NODE(start));
+ mem_node_del_slice(start, end);
+}
+#endif /* __xpv */
+
/*ARGSUSED*/
int
mtype_init(vnode_t *vp, caddr_t vaddr, uint_t *flags, size_t pgsz)
{
- int mtype = mnoderangecnt - 1;
+ int mtype = mtypetop;
#if !defined(__xpv)
#if defined(__i386)
/*
* set the mtype range
- * - kmem requests needs to be below 4g if restricted_kmemalloc is set.
+ * - kmem requests need to be below 4g if restricted_kmemalloc is set.
* - for non kmem requests, set range to above 4g if memory below 4g
* runs low.
*/
@@ -1334,8 +1495,8 @@ mtype_init(vnode_t *vp, caddr_t vaddr, uint_t *flags, size_t pgsz)
int
mtype_pgr_init(int *flags, page_t *pp, int mnode, pgcnt_t pgcnt)
{
- int mtype = mnoderangecnt - 1;
-#if !defined(__ixpv)
+ int mtype = mtypetop;
+#if !defined(__xpv)
if (RESTRICT16M_ALLOC(freemem, pgcnt, *flags)) {
*flags |= PGI_MT_RANGE16M;
} else {
@@ -1349,7 +1510,7 @@ mtype_pgr_init(int *flags, page_t *pp, int mnode, pgcnt_t pgcnt)
/*
* Determine if the mnode range specified in mtype contains memory belonging
* to memory node mnode. If flags & PGI_MT_RANGE is set then mtype contains
- * the range of indices from high pfn to 0, 16m or 4g.
+ * the range from high pfn to 0, 16m or 4g.
*
* Return first mnode range type index found otherwise return -1 if none found.
*/
@@ -1357,18 +1518,20 @@ int
mtype_func(int mnode, int mtype, uint_t flags)
{
if (flags & PGI_MT_RANGE) {
- int mtlim = 0;
+ int mnr_lim = MRI_0;
- if (flags & PGI_MT_NEXT)
- mtype--;
+ if (flags & PGI_MT_NEXT) {
+ mtype = mnoderanges[mtype].mnr_next;
+ }
if (flags & PGI_MT_RANGE4G)
- mtlim = mtype4g + 1; /* exclude 0-4g range */
+ mnr_lim = MRI_4G; /* exclude 0-4g range */
else if (flags & PGI_MT_RANGE16M)
- mtlim = 1; /* exclude 0-16m range */
- while (mtype >= mtlim) {
+ mnr_lim = MRI_16M; /* exclude 0-16m range */
+ while (mtype != -1 &&
+ mnoderanges[mtype].mnr_memrange <= mnr_lim) {
if (mnoderanges[mtype].mnr_mnode == mnode)
return (mtype);
- mtype--;
+ mtype = mnoderanges[mtype].mnr_next;
}
} else if (mnoderanges[mtype].mnr_mnode == mnode) {
return (mtype);
@@ -1378,34 +1541,40 @@ mtype_func(int mnode, int mtype, uint_t flags)
/*
* Update the page list max counts with the pfn range specified by the
- * input parameters. Called from add_physmem() when physical memory with
- * page_t's are initially added to the page lists.
+ * input parameters.
*/
void
mtype_modify_max(pfn_t startpfn, long cnt)
{
- int mtype = 0;
- pfn_t endpfn = startpfn + cnt, pfn;
- pgcnt_t inc;
-
- ASSERT(cnt > 0);
+ int mtype;
+ pgcnt_t inc;
+ spgcnt_t scnt = (spgcnt_t)(cnt);
+ pgcnt_t acnt = ABS(scnt);
+ pfn_t endpfn = startpfn + acnt;
+ pfn_t pfn, lo;
if (!physmax4g)
return;
- for (pfn = startpfn; pfn < endpfn; ) {
- if (pfn <= mnoderanges[mtype].mnr_pfnhi) {
- if (endpfn < mnoderanges[mtype].mnr_pfnhi) {
- inc = endpfn - pfn;
+ mtype = mtypetop;
+ for (pfn = endpfn; pfn > startpfn; ) {
+ ASSERT(mtype != -1);
+ lo = mnoderanges[mtype].mnr_pfnlo;
+ if (pfn > lo) {
+ if (startpfn >= lo) {
+ inc = pfn - startpfn;
} else {
- inc = mnoderanges[mtype].mnr_pfnhi - pfn + 1;
+ inc = pfn - lo;
+ }
+ if (mnoderanges[mtype].mnr_memrange != MRI_4G) {
+ if (scnt > 0)
+ maxmem4g += inc;
+ else
+ maxmem4g -= inc;
}
- if (mtype <= mtype4g)
- maxmem4g += inc;
- pfn += inc;
+ pfn -= inc;
}
- mtype++;
- ASSERT(mtype < mnoderangecnt || pfn >= endpfn);
+ mtype = mnoderanges[mtype].mnr_next;
}
}
@@ -1418,6 +1587,7 @@ mtype_2_mrange(int mtype)
void
mnodetype_2_pfn(int mnode, int mtype, pfn_t *pfnlo, pfn_t *pfnhi)
{
+ _NOTE(ARGUNUSED(mnode));
ASSERT(mnoderanges[mtype].mnr_mnode == mnode);
*pfnlo = mnoderanges[mtype].mnr_pfnlo;
*pfnhi = mnoderanges[mtype].mnr_pfnhi;
@@ -1462,6 +1632,7 @@ plcnt_init(caddr_t addr)
void
plcnt_inc_dec(page_t *pp, int mtype, int szc, long cnt, int flags)
{
+ _NOTE(ARGUNUSED(pp));
#ifdef DEBUG
int bin = PP_2_BIN(pp);
@@ -1470,7 +1641,7 @@ plcnt_inc_dec(page_t *pp, int mtype, int szc, long cnt, int flags)
cnt);
#endif
ASSERT(mtype == PP_2_MTYPE(pp));
- if (physmax4g && mtype <= mtype4g)
+ if (physmax4g && mnoderanges[mtype].mnr_memrange != MRI_4G)
atomic_add_long(&freemem4g, cnt);
if (flags & PG_CACHE_LIST)
atomic_add_long(&mnoderanges[mtype].mnr_mt_clpgcnt, cnt);
@@ -1485,7 +1656,7 @@ plcnt_inc_dec(page_t *pp, int mtype, int szc, long cnt, int flags)
int
mnode_pgcnt(int mnode)
{
- int mtype = mnoderangecnt - 1;
+ int mtype = mtypetop;
int flags = PGI_MT_RANGE0;
pgcnt_t pgcnt = 0;
@@ -1505,6 +1676,7 @@ mnode_pgcnt(int mnode)
size_t
page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc)
{
+ _NOTE(ARGUNUSED(l2_linesz));
size_t colorsz = 0;
int i;
int colors;
@@ -1519,14 +1691,19 @@ page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc)
/*
* Reduce the memory ranges lists if we don't have large amounts
* of memory. This avoids searching known empty free lists.
+ * To support memory DR operations, we need to keep memory ranges
+ * for possible memory hot-add operations.
*/
- i = memrange_num(physmax);
+ if (plat_dr_physmax > physmax)
+ i = memrange_num(plat_dr_physmax);
+ else
+ i = memrange_num(physmax);
#if defined(__i386)
- if (i > 0)
+ if (i > MRI_4G)
restricted_kmemalloc = 0;
#endif
/* physmax greater than 4g */
- if (i == 0)
+ if (i == MRI_4G)
physmax4g = 1;
#endif /* !__xpv */
memranges += i;
@@ -1619,6 +1796,16 @@ page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc)
/* size for mnoderanges */
for (mnoderangecnt = 0, i = 0; i < max_mem_nodes; i++)
mnoderangecnt += mnode_range_cnt(i);
+ if (plat_dr_support_memory()) {
+ /*
+ * Reserve enough space for memory DR operations.
+ * Two extra mnoderanges for possbile fragmentations,
+ * one for the 2G boundary and the other for the 4G boundary.
+ * We don't expect a memory board crossing the 16M boundary
+ * for memory hot-add operations on x86 platforms.
+ */
+ mnoderangecnt += 2 + max_mem_nodes - lgrp_plat_node_cnt;
+ }
colorsz = mnoderangecnt * sizeof (mnoderange_t);
/* size for fpc_mutex and cpc_mutex */
@@ -3122,8 +3309,8 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
* ordering.
*/
if (dma_attr == NULL) {
- n = 0;
- m = mnoderangecnt - 1;
+ n = mtype16m;
+ m = mtypetop;
fullrange = 1;
VM_STAT_ADD(pga_vmstats.pga_nulldmaattr);
} else {
@@ -3136,6 +3323,10 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
if (dma_attr->dma_attr_align > MMU_PAGESIZE)
return (NULL);
+ /* Sanity check the dma_attr */
+ if (pfnlo > pfnhi)
+ return (NULL);
+
n = pfn_2_mtype(pfnlo);
m = pfn_2_mtype(pfnhi);
@@ -3144,13 +3335,10 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
}
VM_STAT_COND_ADD(fullrange == 0, pga_vmstats.pga_notfullrange);
- if (n > m)
- return (NULL);
-
szc = 0;
- /* cylcing thru mtype handled by RANGE0 if n == 0 */
- if (n == 0) {
+ /* cylcing thru mtype handled by RANGE0 if n == mtype16m */
+ if (n == mtype16m) {
flags |= PGI_MT_RANGE0;
n = m;
}
@@ -3164,7 +3352,8 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
/*
* allocate pages from high pfn to low.
*/
- for (mtype = m; mtype >= n; mtype--) {
+ mtype = m;
+ do {
if (fullrange != 0) {
pp = page_get_mnode_freelist(mnode,
bin, mtype, szc, flags);
@@ -3181,7 +3370,8 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
check_dma(dma_attr, pp, 1);
return (pp);
}
- }
+ } while (mtype != n &&
+ (mtype = mnoderanges[mtype].mnr_next) != -1);
if (!local_failed_stat) {
lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1);
local_failed_stat = 1;
diff --git a/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c b/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c
index f0a257d1fd..8bc46f8e3e 100644
--- a/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c
+++ b/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c
@@ -161,6 +161,12 @@ gcpu_mca_poll_init(cmi_hdl_t hdl)
}
}
+/* deconfigure gcpu_mca_poll_init() */
+void
+gcpu_mca_poll_fini(cmi_hdl_t hdl)
+{
+}
+
void
gcpu_mca_poll_start(cmi_hdl_t hdl)
{
diff --git a/usr/src/uts/i86xpv/io/psm/xpv_psm.c b/usr/src/uts/i86xpv/io/psm/xpv_psm.c
index 07db030ed5..c59d1015a0 100644
--- a/usr/src/uts/i86xpv/io/psm/xpv_psm.c
+++ b/usr/src/uts/i86xpv/io/psm/xpv_psm.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
-#define PSMI_1_6
+#define PSMI_1_7
#include <sys/mutex.h>
#include <sys/types.h>
@@ -1672,7 +1672,8 @@ static struct psm_ops xen_psm_ops = {
(void (*)(void *arg))NULL, /* psm_post_cyclic_setup */
(void (*)(int, int))NULL, /* psm_preshutdown */
xen_intr_ops, /* Advanced DDI Interrupt framework */
- (int (*)(psm_state_request_t *))NULL /* psm_state */
+ (int (*)(psm_state_request_t *))NULL, /* psm_state */
+ (int (*)(psm_cpu_request_t *))NULL /* psm_cpu_ops */
};
static struct psm_info xen_psm_info = {
diff --git a/usr/src/uts/i86xpv/io/psm/xpv_uppc.c b/usr/src/uts/i86xpv/io/psm/xpv_uppc.c
index 9427ead955..06049ed9e0 100644
--- a/usr/src/uts/i86xpv/io/psm/xpv_uppc.c
+++ b/usr/src/uts/i86xpv/io/psm/xpv_uppc.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
-#define PSMI_1_6
+#define PSMI_1_7
#include <sys/mutex.h>
#include <sys/types.h>
@@ -891,7 +891,8 @@ static struct psm_ops xen_uppc_ops = {
(int (*)(dev_info_t *, ddi_intr_handle_impl_t *,
psm_intr_op_t, int *))NULL, /* psm_intr_ops */
- (int (*)(psm_state_request_t *))NULL /* psm_state */
+ (int (*)(psm_state_request_t *))NULL, /* psm_state */
+ (int (*)(psm_cpu_request_t *))NULL /* psm_cpu_ops */
};
static struct psm_info xen_uppc_info = {
diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s
index 87e1323772..807cb8f790 100644
--- a/usr/src/uts/intel/ia32/ml/modstubs.s
+++ b/usr/src/uts/intel/ia32/ml/modstubs.s
@@ -1359,6 +1359,17 @@ fcnname/**/_info: \
END_MODULE(acpica);
#endif
+/*
+ * Stubs for acpidev
+ */
+#ifndef ACPIDEV_MODULE
+ MODULE(acpidev,misc);
+ NO_UNLOAD_STUB(acpidev, acpidev_dr_get_cpu_numa_info, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpidev, acpidev_dr_free_cpu_numa_info,
+ nomod_minus_one) ;
+ END_MODULE(acpidev);
+#endif
+
#ifndef IPNET_MODULE
MODULE(ipnet,drv);
STUB(ipnet, ipnet_if_getdev, nomod_zero);
diff --git a/usr/src/uts/intel/ia32/os/sundep.c b/usr/src/uts/intel/ia32/os/sundep.c
index 9aca9f4ea1..13f04658aa 100644
--- a/usr/src/uts/intel/ia32/os/sundep.c
+++ b/usr/src/uts/intel/ia32/os/sundep.c
@@ -115,9 +115,10 @@ check_boot_version(int boots_version)
* 1) the pfn of the highest installed physical page,
* 2) the number of pages installed
* 3) the number of distinct contiguous regions these pages fall into.
+ * 4) the number of contiguous memory ranges
*/
void
-installed_top_size(
+installed_top_size_ex(
struct memlist *list, /* pointer to start of installed list */
pfn_t *high_pfn, /* return ptr for top value */
pgcnt_t *pgcnt, /* return ptr for sum of installed pages */
@@ -141,6 +142,21 @@ installed_top_size(
*ranges = cnt;
}
+void
+installed_top_size(
+ struct memlist *list, /* pointer to start of installed list */
+ pfn_t *high_pfn, /* return ptr for top value */
+ pgcnt_t *pgcnt) /* return ptr for sum of installed pages */
+{
+ int ranges;
+
+ installed_top_size_ex(list, high_pfn, pgcnt, &ranges);
+}
+
+void
+phys_install_has_changed(void)
+{}
+
/*
* Copy in a memory list from boot to kernel, with a filter function
* to remove pages. The filter function can increase the address and/or
diff --git a/usr/src/uts/intel/io/acpica/acpica.c b/usr/src/uts/intel/io/acpica/acpica.c
index 088b1f0eb4..30a8d79be1 100644
--- a/usr/src/uts/intel/io/acpica/acpica.c
+++ b/usr/src/uts/intel/io/acpica/acpica.c
@@ -36,6 +36,7 @@
#include <sys/modctl.h>
#include <sys/open.h>
#include <sys/stat.h>
+#include <sys/spl.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/esunddi.h>
@@ -121,8 +122,11 @@ _init(void)
int error = EBUSY;
int status;
extern int (*acpi_fp_setwake)();
+ extern kmutex_t cpu_map_lock;
mutex_init(&acpica_module_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&cpu_map_lock, NULL, MUTEX_SPIN,
+ (ddi_iblock_cookie_t)ipltospl(DISP_LEVEL));
if ((error = mod_install(&modlinkage)) != 0) {
mutex_destroy(&acpica_module_lock);
diff --git a/usr/src/uts/intel/io/acpica/osl.c b/usr/src/uts/intel/io/acpica/osl.c
index 7593bde61a..6cb08ee27e 100644
--- a/usr/src/uts/intel/io/acpica/osl.c
+++ b/usr/src/uts/intel/io/acpica/osl.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
/*
@@ -98,7 +98,7 @@ static int scanning_d2a_map = 0;
static int d2a_done = 0;
/* features supported by ACPICA and ACPI device configuration. */
-uint64_t acpica_core_features = 0;
+uint64_t acpica_core_features = ACPI_FEATURE_OSI_MODULE;
static uint64_t acpica_devcfg_features = 0;
/* set by acpi_poweroff() in PSMs and appm_ioctl() in acpippm for S3 */
@@ -112,7 +112,7 @@ struct cpu_map_item {
ACPI_HANDLE obj;
};
-static kmutex_t cpu_map_lock;
+kmutex_t cpu_map_lock;
static struct cpu_map_item **cpu_map = NULL;
static int cpu_map_count_max = 0;
static int cpu_map_count = 0;
@@ -1552,7 +1552,7 @@ acpica_get_handle_cpu(int cpu_id, ACPI_HANDLE *rh)
}
}
if (i < cpu_map_count && (cpu_map[i]->obj != NULL)) {
- *rh = cpu_map[cpu_id]->obj;
+ *rh = cpu_map[i]->obj;
mutex_exit(&cpu_map_lock);
return (AE_OK);
}
@@ -1567,7 +1567,7 @@ acpica_get_handle_cpu(int cpu_id, ACPI_HANDLE *rh)
}
}
if (i < cpu_map_count && (cpu_map[i]->obj != NULL)) {
- *rh = cpu_map[cpu_id]->obj;
+ *rh = cpu_map[i]->obj;
mutex_exit(&cpu_map_lock);
return (AE_OK);
}
@@ -2209,6 +2209,82 @@ acpica_get_cpu_object_by_apicid(UINT32 apicid, ACPI_HANDLE *hdlp)
return (rc);
}
+ACPI_STATUS
+acpica_get_cpu_id_by_object(ACPI_HANDLE hdl, processorid_t *cpuidp)
+{
+ int i;
+ ACPI_STATUS rc = AE_NOT_EXIST;
+
+ ASSERT(cpuidp != NULL);
+ if (hdl == NULL || cpuidp == NULL) {
+ return (rc);
+ }
+
+ *cpuidp = -1;
+ mutex_enter(&cpu_map_lock);
+ for (i = 0; i < cpu_map_count; i++) {
+ if (cpu_map[i]->obj == hdl && cpu_map[i]->cpu_id != -1) {
+ *cpuidp = cpu_map[i]->cpu_id;
+ rc = AE_OK;
+ break;
+ }
+ }
+ mutex_exit(&cpu_map_lock);
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpica_get_apicid_by_object(ACPI_HANDLE hdl, UINT32 *rp)
+{
+ int i;
+ ACPI_STATUS rc = AE_NOT_EXIST;
+
+ ASSERT(rp != NULL);
+ if (hdl == NULL || rp == NULL) {
+ return (rc);
+ }
+
+ *rp = UINT32_MAX;
+ mutex_enter(&cpu_map_lock);
+ for (i = 0; i < cpu_map_count; i++) {
+ if (cpu_map[i]->obj == hdl &&
+ cpu_map[i]->apic_id != UINT32_MAX) {
+ *rp = cpu_map[i]->apic_id;
+ rc = AE_OK;
+ break;
+ }
+ }
+ mutex_exit(&cpu_map_lock);
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpica_get_procid_by_object(ACPI_HANDLE hdl, UINT32 *rp)
+{
+ int i;
+ ACPI_STATUS rc = AE_NOT_EXIST;
+
+ ASSERT(rp != NULL);
+ if (hdl == NULL || rp == NULL) {
+ return (rc);
+ }
+
+ *rp = UINT32_MAX;
+ mutex_enter(&cpu_map_lock);
+ for (i = 0; i < cpu_map_count; i++) {
+ if (cpu_map[i]->obj == hdl) {
+ *rp = cpu_map[i]->proc_id;
+ rc = AE_OK;
+ break;
+ }
+ }
+ mutex_exit(&cpu_map_lock);
+
+ return (rc);
+}
+
void
acpica_set_core_feature(uint64_t features)
{
diff --git a/usr/src/uts/intel/sys/acpica.h b/usr/src/uts/intel/sys/acpica.h
index 5f74655418..ff55dcb302 100644
--- a/usr/src/uts/intel/sys/acpica.h
+++ b/usr/src/uts/intel/sys/acpica.h
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -133,6 +133,7 @@ typedef struct iflag {
#define ACPI_DEVCFG_CPU 0x1
#define ACPI_DEVCFG_MEMORY 0x2
#define ACPI_DEVCFG_CONTAINER 0x4
+#define ACPI_DEVCFG_PCI 0x8
/*
* master_ops.c
@@ -187,6 +188,9 @@ extern ACPI_STATUS acpica_unmap_cpu(processorid_t);
extern ACPI_STATUS acpica_get_cpu_object_by_cpuid(processorid_t, ACPI_HANDLE *);
extern ACPI_STATUS acpica_get_cpu_object_by_procid(UINT32, ACPI_HANDLE *);
extern ACPI_STATUS acpica_get_cpu_object_by_apicid(UINT32, ACPI_HANDLE *);
+extern ACPI_STATUS acpica_get_cpu_id_by_object(ACPI_HANDLE, processorid_t *);
+extern ACPI_STATUS acpica_get_apicid_by_object(ACPI_HANDLE, UINT32 *);
+extern ACPI_STATUS acpica_get_procid_by_object(ACPI_HANDLE, UINT32 *);
extern uint64_t acpica_get_core_feature(uint64_t);
extern void acpica_set_core_feature(uint64_t);
diff --git a/usr/src/uts/intel/sys/bootconf.h b/usr/src/uts/intel/sys/bootconf.h
index f626fbbb79..4f6c48c395 100644
--- a/usr/src/uts/intel/sys/bootconf.h
+++ b/usr/src/uts/intel/sys/bootconf.h
@@ -50,6 +50,7 @@ extern "C" {
#define BP_CPU_APICID_ARRAY "cpu_apicid_array"
#define BP_LGRP_SLIT_ENABLE "lgrp_slit_enable"
#define BP_LGRP_SRAT_ENABLE "lgrp_srat_enable"
+#define BP_LGRP_MSCT_ENABLE "lgrp_msct_enable"
#define BP_LGRP_TOPO_LEVELS "lgrp_topo_levels"
/*
@@ -213,18 +214,6 @@ extern int octet_to_hexascii(const void *, uint_t, char *, uint_t *);
extern int dhcpinit(void);
-/*
- * XXX The memlist stuff belongs in a header of its own
- */
-extern int check_boot_version(int);
-extern void size_physavail(struct memlist *, pgcnt_t *, int *, pfn_t);
-extern int copy_physavail(struct memlist *, struct memlist **,
- uint_t, uint_t);
-extern void installed_top_size(struct memlist *, pfn_t *, pgcnt_t *, int *);
-extern int check_memexp(struct memlist *, uint_t);
-extern void copy_memlist_filter(struct memlist *, struct memlist **,
- void (*filter)(uint64_t *, uint64_t *));
-
extern struct bootops *bootops;
extern int netboot;
extern int swaploaded;
diff --git a/usr/src/uts/intel/sys/cpu_module.h b/usr/src/uts/intel/sys/cpu_module.h
index 9b64b8527a..8d801b07f5 100644
--- a/usr/src/uts/intel/sys/cpu_module.h
+++ b/usr/src/uts/intel/sys/cpu_module.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -143,7 +143,6 @@ extern cmi_errno_t cmi_hdl_rdmsr(cmi_hdl_t, uint_t, uint64_t *);
extern cmi_errno_t cmi_hdl_wrmsr(cmi_hdl_t, uint_t, uint64_t);
extern void cmi_hdl_enable_mce(cmi_hdl_t);
-
extern uint_t cmi_hdl_vendor(cmi_hdl_t);
extern const char *cmi_hdl_vendorstr(cmi_hdl_t);
extern uint_t cmi_hdl_family(cmi_hdl_t);
@@ -172,6 +171,7 @@ extern uint_t cmi_ntv_hwchipid(cpu_t *);
extern uint_t cmi_ntv_hwprocnodeid(cpu_t *);
extern uint_t cmi_ntv_hwcoreid(cpu_t *);
extern uint_t cmi_ntv_hwstrandid(cpu_t *);
+extern void cmi_ntv_hwdisable_mce(cmi_hdl_t);
#endif /* __xpv */
typedef struct cmi_mca_regs {
@@ -210,6 +210,7 @@ extern void cmi_mca_trap(struct regs *);
extern boolean_t cmi_panic_on_ue(void);
extern void cmi_mc_register(cmi_hdl_t, const struct cmi_mc_ops *, void *);
+extern cmi_errno_t cmi_mc_register_global(const struct cmi_mc_ops *, void *);
extern void cmi_mc_sw_memscrub_disable(void);
extern cmi_errno_t cmi_mc_patounum(uint64_t, uint8_t, uint8_t, uint32_t, int,
mc_unum_t *);
diff --git a/usr/src/uts/intel/sys/memlist_plat.h b/usr/src/uts/intel/sys/memlist_plat.h
new file mode 100644
index 0000000000..065f8ed37a
--- /dev/null
+++ b/usr/src/uts/intel/sys/memlist_plat.h
@@ -0,0 +1,55 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_MEMLIST_PLAT_H
+#define _SYS_MEMLIST_PLAT_H
+
+/*
+ * Boot time configuration information objects
+ */
+
+#include <sys/types.h>
+#include <sys/memlist.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int check_boot_version(int);
+extern void copy_memlist_filter(struct memlist *, struct memlist **,
+ void (*filter)(uint64_t *, uint64_t *));
+extern void installed_top_size(struct memlist *, pfn_t *, pgcnt_t *);
+extern void installed_top_size_ex(struct memlist *, pfn_t *, pgcnt_t *, int *);
+extern void phys_install_has_changed(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_MEMLIST_PLAT_H */
diff --git a/usr/src/uts/sun4/os/ddi_impl.c b/usr/src/uts/sun4/os/ddi_impl.c
index d01eabae19..f3efc433c5 100644
--- a/usr/src/uts/sun4/os/ddi_impl.c
+++ b/usr/src/uts/sun4/os/ddi_impl.c
@@ -48,12 +48,8 @@
#include <sys/systeminfo.h>
#include <sys/fpu/fpusystm.h>
#include <sys/vm.h>
-#include <sys/fs/dv_node.h>
-#include <sys/fs/snode.h>
#include <sys/ddi_isa.h>
-#include <sys/modhash.h>
#include <sys/modctl.h>
-#include <sys/sunldi_impl.h>
dev_info_t *get_intr_parent(dev_info_t *, dev_info_t *,
ddi_intr_handle_impl_t *);
@@ -1954,937 +1950,3 @@ peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
return (err);
}
-
-/*
- * Platform independent DR routines
- */
-
-static int
-ndi2errno(int n)
-{
- int err = 0;
-
- switch (n) {
- case NDI_NOMEM:
- err = ENOMEM;
- break;
- case NDI_BUSY:
- err = EBUSY;
- break;
- case NDI_FAULT:
- err = EFAULT;
- break;
- case NDI_FAILURE:
- err = EIO;
- break;
- case NDI_SUCCESS:
- break;
- case NDI_BADHANDLE:
- default:
- err = EINVAL;
- break;
- }
- return (err);
-}
-
-/*
- * Prom tree node list
- */
-struct ptnode {
- pnode_t nodeid;
- struct ptnode *next;
-};
-
-/*
- * Prom tree walk arg
- */
-struct pta {
- dev_info_t *pdip;
- devi_branch_t *bp;
- uint_t flags;
- dev_info_t *fdip;
- struct ptnode *head;
-};
-
-static void
-visit_node(pnode_t nodeid, struct pta *ap)
-{
- struct ptnode **nextp;
- int (*select)(pnode_t, void *, uint_t);
-
- ASSERT(nodeid != OBP_NONODE && nodeid != OBP_BADNODE);
-
- select = ap->bp->create.prom_branch_select;
-
- ASSERT(select);
-
- if (select(nodeid, ap->bp->arg, 0) == DDI_SUCCESS) {
-
- for (nextp = &ap->head; *nextp; nextp = &(*nextp)->next)
- ;
-
- *nextp = kmem_zalloc(sizeof (struct ptnode), KM_SLEEP);
-
- (*nextp)->nodeid = nodeid;
- }
-
- if ((ap->flags & DEVI_BRANCH_CHILD) == DEVI_BRANCH_CHILD)
- return;
-
- nodeid = prom_childnode(nodeid);
- while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
- visit_node(nodeid, ap);
- nodeid = prom_nextnode(nodeid);
- }
-}
-
-/*
- * NOTE: The caller of this function must check for device contracts
- * or LDI callbacks against this dip before setting the dip offline.
- */
-static int
-set_infant_dip_offline(dev_info_t *dip, void *arg)
-{
- char *path = (char *)arg;
-
- ASSERT(dip);
- ASSERT(arg);
-
- if (i_ddi_node_state(dip) >= DS_ATTACHED) {
- (void) ddi_pathname(dip, path);
- cmn_err(CE_WARN, "Attempt to set offline flag on attached "
- "node: %s", path);
- return (DDI_FAILURE);
- }
-
- mutex_enter(&(DEVI(dip)->devi_lock));
- if (!DEVI_IS_DEVICE_OFFLINE(dip))
- DEVI_SET_DEVICE_OFFLINE(dip);
- mutex_exit(&(DEVI(dip)->devi_lock));
-
- return (DDI_SUCCESS);
-}
-
-typedef struct result {
- char *path;
- int result;
-} result_t;
-
-static int
-dip_set_offline(dev_info_t *dip, void *arg)
-{
- int end;
- result_t *resp = (result_t *)arg;
-
- ASSERT(dip);
- ASSERT(resp);
-
- /*
- * We stop the walk if e_ddi_offline_notify() returns
- * failure, because this implies that one or more consumers
- * (either LDI or contract based) has blocked the offline.
- * So there is no point in conitnuing the walk
- */
- if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
- resp->result = DDI_FAILURE;
- return (DDI_WALK_TERMINATE);
- }
-
- /*
- * If set_infant_dip_offline() returns failure, it implies
- * that we failed to set a particular dip offline. This
- * does not imply that the offline as a whole should fail.
- * We want to do the best we can, so we continue the walk.
- */
- if (set_infant_dip_offline(dip, resp->path) == DDI_SUCCESS)
- end = DDI_SUCCESS;
- else
- end = DDI_FAILURE;
-
- e_ddi_offline_finalize(dip, end);
-
- return (DDI_WALK_CONTINUE);
-}
-
-/*
- * The call to e_ddi_offline_notify() exists for the
- * unlikely error case that a branch we are trying to
- * create already exists and has device contracts or LDI
- * event callbacks against it.
- *
- * We allow create to succeed for such branches only if
- * no constraints block the offline.
- */
-static int
-branch_set_offline(dev_info_t *dip, char *path)
-{
- int circ;
- int end;
- result_t res;
-
-
- if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
- return (DDI_FAILURE);
- }
-
- if (set_infant_dip_offline(dip, path) == DDI_SUCCESS)
- end = DDI_SUCCESS;
- else
- end = DDI_FAILURE;
-
- e_ddi_offline_finalize(dip, end);
-
- if (end == DDI_FAILURE)
- return (DDI_FAILURE);
-
- res.result = DDI_SUCCESS;
- res.path = path;
-
- ndi_devi_enter(dip, &circ);
- ddi_walk_devs(ddi_get_child(dip), dip_set_offline, &res);
- ndi_devi_exit(dip, circ);
-
- return (res.result);
-}
-
-/*ARGSUSED*/
-static int
-create_prom_branch(void *arg, int has_changed)
-{
- int circ;
- int exists, rv;
- pnode_t nodeid;
- struct ptnode *tnp;
- dev_info_t *dip;
- struct pta *ap = arg;
- devi_branch_t *bp;
- char *path;
-
- ASSERT(ap);
- ASSERT(ap->fdip == NULL);
- ASSERT(ap->pdip && ndi_dev_is_prom_node(ap->pdip));
-
- bp = ap->bp;
-
- nodeid = ddi_get_nodeid(ap->pdip);
- if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE) {
- cmn_err(CE_WARN, "create_prom_branch: invalid "
- "nodeid: 0x%x", nodeid);
- return (EINVAL);
- }
-
- ap->head = NULL;
-
- nodeid = prom_childnode(nodeid);
- while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
- visit_node(nodeid, ap);
- nodeid = prom_nextnode(nodeid);
- }
-
- if (ap->head == NULL)
- return (ENODEV);
-
- path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- rv = 0;
- while ((tnp = ap->head) != NULL) {
- ap->head = tnp->next;
-
- ndi_devi_enter(ap->pdip, &circ);
-
- /*
- * Check if the branch already exists.
- */
- exists = 0;
- dip = e_ddi_nodeid_to_dip(tnp->nodeid);
- if (dip != NULL) {
- exists = 1;
-
- /* Parent is held busy, so release hold */
- ndi_rele_devi(dip);
-#ifdef DEBUG
- cmn_err(CE_WARN, "create_prom_branch: dip(%p) exists"
- " for nodeid 0x%x", (void *)dip, tnp->nodeid);
-#endif
- } else {
- dip = i_ddi_create_branch(ap->pdip, tnp->nodeid);
- }
-
- kmem_free(tnp, sizeof (struct ptnode));
-
- /*
- * Hold the branch if it is not already held
- */
- if (dip && !exists) {
- e_ddi_branch_hold(dip);
- }
-
- ASSERT(dip == NULL || e_ddi_branch_held(dip));
-
- /*
- * Set all dips in the newly created branch offline so that
- * only a "configure" operation can attach
- * the branch
- */
- if (dip == NULL || branch_set_offline(dip, path)
- == DDI_FAILURE) {
- ndi_devi_exit(ap->pdip, circ);
- rv = EIO;
- continue;
- }
-
- ASSERT(ddi_get_parent(dip) == ap->pdip);
-
- ndi_devi_exit(ap->pdip, circ);
-
- if (ap->flags & DEVI_BRANCH_CONFIGURE) {
- int error = e_ddi_branch_configure(dip, &ap->fdip, 0);
- if (error && rv == 0)
- rv = error;
- }
-
- /*
- * Invoke devi_branch_callback() (if it exists) only for
- * newly created branches
- */
- if (bp->devi_branch_callback && !exists)
- bp->devi_branch_callback(dip, bp->arg, 0);
- }
-
- kmem_free(path, MAXPATHLEN);
-
- return (rv);
-}
-
-static int
-sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp)
-{
- int rv, circ, len;
- int i, flags, ret;
- dev_info_t *dip;
- char *nbuf;
- char *path;
- static const char *noname = "<none>";
-
- ASSERT(pdip);
- ASSERT(DEVI_BUSY_OWNED(pdip));
-
- flags = 0;
-
- /*
- * Creating the root of a branch ?
- */
- if (rdipp) {
- *rdipp = NULL;
- flags = DEVI_BRANCH_ROOT;
- }
-
- ndi_devi_alloc_sleep(pdip, (char *)noname, DEVI_SID_NODEID, &dip);
- rv = bp->create.sid_branch_create(dip, bp->arg, flags);
-
- nbuf = kmem_alloc(OBP_MAXDRVNAME, KM_SLEEP);
-
- if (rv == DDI_WALK_ERROR) {
- cmn_err(CE_WARN, "e_ddi_branch_create: Error setting"
- " properties on devinfo node %p", (void *)dip);
- goto fail;
- }
-
- len = OBP_MAXDRVNAME;
- if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
- DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "name", nbuf, &len)
- != DDI_PROP_SUCCESS) {
- cmn_err(CE_WARN, "e_ddi_branch_create: devinfo node %p has"
- "no name property", (void *)dip);
- goto fail;
- }
-
- ASSERT(i_ddi_node_state(dip) == DS_PROTO);
- if (ndi_devi_set_nodename(dip, nbuf, 0) != NDI_SUCCESS) {
- cmn_err(CE_WARN, "e_ddi_branch_create: cannot set name (%s)"
- " for devinfo node %p", nbuf, (void *)dip);
- goto fail;
- }
-
- kmem_free(nbuf, OBP_MAXDRVNAME);
-
- /*
- * Ignore bind failures just like boot does
- */
- (void) ndi_devi_bind_driver(dip, 0);
-
- switch (rv) {
- case DDI_WALK_CONTINUE:
- case DDI_WALK_PRUNESIB:
- ndi_devi_enter(dip, &circ);
-
- i = DDI_WALK_CONTINUE;
- for (; i == DDI_WALK_CONTINUE; ) {
- i = sid_node_create(dip, bp, NULL);
- }
-
- ASSERT(i == DDI_WALK_ERROR || i == DDI_WALK_PRUNESIB);
- if (i == DDI_WALK_ERROR)
- rv = i;
- /*
- * If PRUNESIB stop creating siblings
- * of dip's child. Subsequent walk behavior
- * is determined by rv returned by dip.
- */
-
- ndi_devi_exit(dip, circ);
- break;
- case DDI_WALK_TERMINATE:
- /*
- * Don't create children and ask our parent
- * to not create siblings either.
- */
- rv = DDI_WALK_PRUNESIB;
- break;
- case DDI_WALK_PRUNECHILD:
- /*
- * Don't create children, but ask parent to continue
- * with siblings.
- */
- rv = DDI_WALK_CONTINUE;
- break;
- default:
- ASSERT(0);
- break;
- }
-
- if (rdipp)
- *rdipp = dip;
-
- /*
- * Set device offline - only the "configure" op should cause an attach.
- * Note that it is safe to set the dip offline without checking
- * for either device contract or layered driver (LDI) based constraints
- * since there cannot be any contracts or LDI opens of this device.
- * This is because this node is a newly created dip with the parent busy
- * held, so no other thread can come in and attach this dip. A dip that
- * has never been attached cannot have contracts since by definition
- * a device contract (an agreement between a process and a device minor
- * node) can only be created against a device that has minor nodes
- * i.e is attached. Similarly an LDI open will only succeed if the
- * dip is attached. We assert below that the dip is not attached.
- */
- ASSERT(i_ddi_node_state(dip) < DS_ATTACHED);
- path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- ret = set_infant_dip_offline(dip, path);
- ASSERT(ret == DDI_SUCCESS);
- kmem_free(path, MAXPATHLEN);
-
- return (rv);
-fail:
- (void) ndi_devi_free(dip);
- kmem_free(nbuf, OBP_MAXDRVNAME);
- return (DDI_WALK_ERROR);
-}
-
-static int
-create_sid_branch(
- dev_info_t *pdip,
- devi_branch_t *bp,
- dev_info_t **dipp,
- uint_t flags)
-{
- int rv = 0, state = DDI_WALK_CONTINUE;
- dev_info_t *rdip;
-
- while (state == DDI_WALK_CONTINUE) {
- int circ;
-
- ndi_devi_enter(pdip, &circ);
-
- state = sid_node_create(pdip, bp, &rdip);
- if (rdip == NULL) {
- ndi_devi_exit(pdip, circ);
- ASSERT(state == DDI_WALK_ERROR);
- break;
- }
-
- e_ddi_branch_hold(rdip);
-
- ndi_devi_exit(pdip, circ);
-
- if (flags & DEVI_BRANCH_CONFIGURE) {
- int error = e_ddi_branch_configure(rdip, dipp, 0);
- if (error && rv == 0)
- rv = error;
- }
-
- /*
- * devi_branch_callback() is optional
- */
- if (bp->devi_branch_callback)
- bp->devi_branch_callback(rdip, bp->arg, 0);
- }
-
- ASSERT(state == DDI_WALK_ERROR || state == DDI_WALK_PRUNESIB);
-
- return (state == DDI_WALK_ERROR ? EIO : rv);
-}
-
-int
-e_ddi_branch_create(
- dev_info_t *pdip,
- devi_branch_t *bp,
- dev_info_t **dipp,
- uint_t flags)
-{
- int prom_devi, sid_devi, error;
-
- if (pdip == NULL || bp == NULL || bp->type == 0)
- return (EINVAL);
-
- prom_devi = (bp->type == DEVI_BRANCH_PROM) ? 1 : 0;
- sid_devi = (bp->type == DEVI_BRANCH_SID) ? 1 : 0;
-
- if (prom_devi && bp->create.prom_branch_select == NULL)
- return (EINVAL);
- else if (sid_devi && bp->create.sid_branch_create == NULL)
- return (EINVAL);
- else if (!prom_devi && !sid_devi)
- return (EINVAL);
-
- if (flags & DEVI_BRANCH_EVENT)
- return (EINVAL);
-
- if (prom_devi) {
- struct pta pta = {0};
-
- pta.pdip = pdip;
- pta.bp = bp;
- pta.flags = flags;
-
- error = prom_tree_access(create_prom_branch, &pta, NULL);
-
- if (dipp)
- *dipp = pta.fdip;
- else if (pta.fdip)
- ndi_rele_devi(pta.fdip);
- } else {
- error = create_sid_branch(pdip, bp, dipp, flags);
- }
-
- return (error);
-}
-
-int
-e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp, uint_t flags)
-{
- int rv;
- char *devnm;
- dev_info_t *pdip;
-
- if (dipp)
- *dipp = NULL;
-
- if (rdip == NULL || flags != 0 || (flags & DEVI_BRANCH_EVENT))
- return (EINVAL);
-
- pdip = ddi_get_parent(rdip);
-
- ndi_hold_devi(pdip);
-
- if (!e_ddi_branch_held(rdip)) {
- ndi_rele_devi(pdip);
- cmn_err(CE_WARN, "e_ddi_branch_configure: "
- "dip(%p) not held", (void *)rdip);
- return (EINVAL);
- }
-
- if (i_ddi_node_state(rdip) < DS_INITIALIZED) {
- /*
- * First attempt to bind a driver. If we fail, return
- * success (On some platforms, dips for some device
- * types (CPUs) may not have a driver)
- */
- if (ndi_devi_bind_driver(rdip, 0) != NDI_SUCCESS) {
- ndi_rele_devi(pdip);
- return (0);
- }
-
- if (ddi_initchild(pdip, rdip) != DDI_SUCCESS) {
- rv = NDI_FAILURE;
- goto out;
- }
- }
-
- ASSERT(i_ddi_node_state(rdip) >= DS_INITIALIZED);
-
- devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
-
- (void) ddi_deviname(rdip, devnm);
-
- if ((rv = ndi_devi_config_one(pdip, devnm+1, &rdip,
- NDI_DEVI_ONLINE | NDI_CONFIG)) == NDI_SUCCESS) {
- /* release hold from ndi_devi_config_one() */
- ndi_rele_devi(rdip);
- }
-
- kmem_free(devnm, MAXNAMELEN + 1);
-out:
- if (rv != NDI_SUCCESS && dipp && rdip) {
- ndi_hold_devi(rdip);
- *dipp = rdip;
- }
- ndi_rele_devi(pdip);
- return (ndi2errno(rv));
-}
-
-void
-e_ddi_branch_hold(dev_info_t *rdip)
-{
- if (e_ddi_branch_held(rdip)) {
- cmn_err(CE_WARN, "e_ddi_branch_hold: branch already held");
- return;
- }
-
- mutex_enter(&DEVI(rdip)->devi_lock);
- if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) == 0) {
- DEVI(rdip)->devi_flags |= DEVI_BRANCH_HELD;
- DEVI(rdip)->devi_ref++;
- }
- ASSERT(DEVI(rdip)->devi_ref > 0);
- mutex_exit(&DEVI(rdip)->devi_lock);
-}
-
-int
-e_ddi_branch_held(dev_info_t *rdip)
-{
- int rv = 0;
-
- mutex_enter(&DEVI(rdip)->devi_lock);
- if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) &&
- DEVI(rdip)->devi_ref > 0) {
- rv = 1;
- }
- mutex_exit(&DEVI(rdip)->devi_lock);
-
- return (rv);
-}
-void
-e_ddi_branch_rele(dev_info_t *rdip)
-{
- mutex_enter(&DEVI(rdip)->devi_lock);
- DEVI(rdip)->devi_flags &= ~DEVI_BRANCH_HELD;
- DEVI(rdip)->devi_ref--;
- mutex_exit(&DEVI(rdip)->devi_lock);
-}
-
-int
-e_ddi_branch_unconfigure(
- dev_info_t *rdip,
- dev_info_t **dipp,
- uint_t flags)
-{
- int circ, rv;
- int destroy;
- char *devnm;
- uint_t nflags;
- dev_info_t *pdip;
-
- if (dipp)
- *dipp = NULL;
-
- if (rdip == NULL)
- return (EINVAL);
-
- pdip = ddi_get_parent(rdip);
-
- ASSERT(pdip);
-
- /*
- * Check if caller holds pdip busy - can cause deadlocks during
- * devfs_clean()
- */
- if (DEVI_BUSY_OWNED(pdip)) {
- cmn_err(CE_WARN, "e_ddi_branch_unconfigure: failed: parent"
- " devinfo node(%p) is busy held", (void *)pdip);
- return (EINVAL);
- }
-
- destroy = (flags & DEVI_BRANCH_DESTROY) ? 1 : 0;
-
- devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
-
- ndi_devi_enter(pdip, &circ);
- (void) ddi_deviname(rdip, devnm);
- ndi_devi_exit(pdip, circ);
-
- /*
- * ddi_deviname() returns a component name with / prepended.
- */
- (void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
-
- ndi_devi_enter(pdip, &circ);
-
- /*
- * Recreate device name as it may have changed state (init/uninit)
- * when parent busy lock was dropped for devfs_clean()
- */
- (void) ddi_deviname(rdip, devnm);
-
- if (!e_ddi_branch_held(rdip)) {
- kmem_free(devnm, MAXNAMELEN + 1);
- ndi_devi_exit(pdip, circ);
- cmn_err(CE_WARN, "e_ddi_%s_branch: dip(%p) not held",
- destroy ? "destroy" : "unconfigure", (void *)rdip);
- return (EINVAL);
- }
-
- /*
- * Release hold on the branch. This is ok since we are holding the
- * parent busy. If rdip is not removed, we must do a hold on the
- * branch before returning.
- */
- e_ddi_branch_rele(rdip);
-
- nflags = NDI_DEVI_OFFLINE;
- if (destroy || (flags & DEVI_BRANCH_DESTROY)) {
- nflags |= NDI_DEVI_REMOVE;
- destroy = 1;
- } else {
- nflags |= NDI_UNCONFIG; /* uninit but don't remove */
- }
-
- if (flags & DEVI_BRANCH_EVENT)
- nflags |= NDI_POST_EVENT;
-
- if (i_ddi_devi_attached(pdip) &&
- (i_ddi_node_state(rdip) >= DS_INITIALIZED)) {
- rv = ndi_devi_unconfig_one(pdip, devnm+1, dipp, nflags);
- } else {
- rv = e_ddi_devi_unconfig(rdip, dipp, nflags);
- if (rv == NDI_SUCCESS) {
- ASSERT(!destroy || ddi_get_child(rdip) == NULL);
- rv = ndi_devi_offline(rdip, nflags);
- }
- }
-
- if (!destroy || rv != NDI_SUCCESS) {
- /* The dip still exists, so do a hold */
- e_ddi_branch_hold(rdip);
- }
-out:
- kmem_free(devnm, MAXNAMELEN + 1);
- ndi_devi_exit(pdip, circ);
- return (ndi2errno(rv));
-}
-
-int
-e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp, uint_t flag)
-{
- return (e_ddi_branch_unconfigure(rdip, dipp,
- flag|DEVI_BRANCH_DESTROY));
-}
-
-/*
- * Number of chains for hash table
- */
-#define NUMCHAINS 17
-
-/*
- * Devinfo busy arg
- */
-struct devi_busy {
- int dv_total;
- int s_total;
- mod_hash_t *dv_hash;
- mod_hash_t *s_hash;
- int (*callback)(dev_info_t *, void *, uint_t);
- void *arg;
-};
-
-static int
-visit_dip(dev_info_t *dip, void *arg)
-{
- uintptr_t sbusy, dvbusy, ref;
- struct devi_busy *bsp = arg;
-
- ASSERT(bsp->callback);
-
- /*
- * A dip cannot be busy if its reference count is 0
- */
- if ((ref = e_ddi_devi_holdcnt(dip)) == 0) {
- return (bsp->callback(dip, bsp->arg, 0));
- }
-
- if (mod_hash_find(bsp->dv_hash, dip, (mod_hash_val_t *)&dvbusy))
- dvbusy = 0;
-
- /*
- * To catch device opens currently maintained on specfs common snodes.
- */
- if (mod_hash_find(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
- sbusy = 0;
-
-#ifdef DEBUG
- if (ref < sbusy || ref < dvbusy) {
- cmn_err(CE_WARN, "dip(%p): sopen = %lu, dvopen = %lu "
- "dip ref = %lu\n", (void *)dip, sbusy, dvbusy, ref);
- }
-#endif
-
- dvbusy = (sbusy > dvbusy) ? sbusy : dvbusy;
-
- return (bsp->callback(dip, bsp->arg, dvbusy));
-}
-
-static int
-visit_snode(struct snode *sp, void *arg)
-{
- uintptr_t sbusy;
- dev_info_t *dip;
- int count;
- struct devi_busy *bsp = arg;
-
- ASSERT(sp);
-
- /*
- * The stable lock is held. This prevents
- * the snode and its associated dip from
- * going away.
- */
- dip = NULL;
- count = spec_devi_open_count(sp, &dip);
-
- if (count <= 0)
- return (DDI_WALK_CONTINUE);
-
- ASSERT(dip);
-
- if (mod_hash_remove(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
- sbusy = count;
- else
- sbusy += count;
-
- if (mod_hash_insert(bsp->s_hash, dip, (mod_hash_val_t)sbusy)) {
- cmn_err(CE_WARN, "%s: s_hash insert failed: dip=0x%p, "
- "sbusy = %lu", "e_ddi_branch_referenced",
- (void *)dip, sbusy);
- }
-
- bsp->s_total += count;
-
- return (DDI_WALK_CONTINUE);
-}
-
-static void
-visit_dvnode(struct dv_node *dv, void *arg)
-{
- uintptr_t dvbusy;
- uint_t count;
- struct vnode *vp;
- struct devi_busy *bsp = arg;
-
- ASSERT(dv && dv->dv_devi);
-
- vp = DVTOV(dv);
-
- mutex_enter(&vp->v_lock);
- count = vp->v_count;
- mutex_exit(&vp->v_lock);
-
- if (!count)
- return;
-
- if (mod_hash_remove(bsp->dv_hash, dv->dv_devi,
- (mod_hash_val_t *)&dvbusy))
- dvbusy = count;
- else
- dvbusy += count;
-
- if (mod_hash_insert(bsp->dv_hash, dv->dv_devi,
- (mod_hash_val_t)dvbusy)) {
- cmn_err(CE_WARN, "%s: dv_hash insert failed: dip=0x%p, "
- "dvbusy=%lu", "e_ddi_branch_referenced",
- (void *)dv->dv_devi, dvbusy);
- }
-
- bsp->dv_total += count;
-}
-
-/*
- * Returns reference count on success or -1 on failure.
- */
-int
-e_ddi_branch_referenced(
- dev_info_t *rdip,
- int (*callback)(dev_info_t *dip, void *arg, uint_t ref),
- void *arg)
-{
- int circ;
- char *path;
- dev_info_t *pdip;
- struct devi_busy bsa = {0};
-
- ASSERT(rdip);
-
- path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-
- ndi_hold_devi(rdip);
-
- pdip = ddi_get_parent(rdip);
-
- ASSERT(pdip);
-
- /*
- * Check if caller holds pdip busy - can cause deadlocks during
- * devfs_walk()
- */
- if (!e_ddi_branch_held(rdip) || DEVI_BUSY_OWNED(pdip)) {
- cmn_err(CE_WARN, "e_ddi_branch_referenced: failed: "
- "devinfo branch(%p) not held or parent busy held",
- (void *)rdip);
- ndi_rele_devi(rdip);
- kmem_free(path, MAXPATHLEN);
- return (-1);
- }
-
- ndi_devi_enter(pdip, &circ);
- (void) ddi_pathname(rdip, path);
- ndi_devi_exit(pdip, circ);
-
- bsa.dv_hash = mod_hash_create_ptrhash("dv_node busy hash", NUMCHAINS,
- mod_hash_null_valdtor, sizeof (struct dev_info));
-
- bsa.s_hash = mod_hash_create_ptrhash("snode busy hash", NUMCHAINS,
- mod_hash_null_valdtor, sizeof (struct snode));
-
- if (devfs_walk(path, visit_dvnode, &bsa)) {
- cmn_err(CE_WARN, "e_ddi_branch_referenced: "
- "devfs walk failed for: %s", path);
- kmem_free(path, MAXPATHLEN);
- bsa.s_total = bsa.dv_total = -1;
- goto out;
- }
-
- kmem_free(path, MAXPATHLEN);
-
- /*
- * Walk the snode table to detect device opens, which are currently
- * maintained on specfs common snodes.
- */
- spec_snode_walk(visit_snode, &bsa);
-
- if (callback == NULL)
- goto out;
-
- bsa.callback = callback;
- bsa.arg = arg;
-
- if (visit_dip(rdip, &bsa) == DDI_WALK_CONTINUE) {
- ndi_devi_enter(rdip, &circ);
- ddi_walk_devs(ddi_get_child(rdip), visit_dip, &bsa);
- ndi_devi_exit(rdip, circ);
- }
-
-out:
- ndi_rele_devi(rdip);
- mod_hash_destroy_ptrhash(bsa.s_hash);
- mod_hash_destroy_ptrhash(bsa.dv_hash);
- return (bsa.s_total > bsa.dv_total ? bsa.s_total : bsa.dv_total);
-}
diff --git a/usr/src/uts/sun4u/sys/machsystm.h b/usr/src/uts/sun4u/sys/machsystm.h
index 76e96a56d6..ccacd922b4 100644
--- a/usr/src/uts/sun4u/sys/machsystm.h
+++ b/usr/src/uts/sun4u/sys/machsystm.h
@@ -26,8 +26,6 @@
#ifndef _SYS_MACHSYSTM_H
#define _SYS_MACHSYSTM_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Numerous platform-dependent interfaces that don't seem to belong
* in any other header file.
@@ -357,46 +355,6 @@ extern uint64_t cpu_pa[];
extern void ptl1_init_cpu(struct cpu *);
/*
- * Defines for DR interfaces
- */
-#define DEVI_BRANCH_CHILD 0x01 /* Walk immediate children of root */
-#define DEVI_BRANCH_CONFIGURE 0x02 /* Configure branch after create */
-#define DEVI_BRANCH_DESTROY 0x04 /* Destroy branch after unconfigure */
-#define DEVI_BRANCH_EVENT 0x08 /* Post NDI event */
-#define DEVI_BRANCH_PROM 0x10 /* Branches derived from PROM nodes */
-#define DEVI_BRANCH_SID 0x20 /* SID node branches */
-#define DEVI_BRANCH_ROOT 0x40 /* Node is the root of a branch */
-
-typedef struct devi_branch {
- void *arg;
- void (*devi_branch_callback)(dev_info_t *, void *, uint_t);
- int type;
- union {
- int (*prom_branch_select)(pnode_t, void *, uint_t);
- int (*sid_branch_create)(dev_info_t *, void *, uint_t);
- } create;
-} devi_branch_t;
-
-
-/*
- * Prototypes which really belongs to sunddi.c, and should be moved to
- * sunddi.c if there is another platform using these calls.
- */
-extern int e_ddi_branch_create(dev_info_t *pdip, devi_branch_t *bp,
- dev_info_t **dipp, uint_t flags);
-extern int e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp,
- uint_t flags);
-extern int e_ddi_branch_unconfigure(dev_info_t *rdip, dev_info_t **dipp,
- uint_t flags);
-extern int e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp,
- uint_t flags);
-extern void e_ddi_branch_hold(dev_info_t *rdip);
-extern void e_ddi_branch_rele(dev_info_t *rdip);
-extern int e_ddi_branch_held(dev_info_t *rdip);
-extern int e_ddi_branch_referenced(dev_info_t *rdip,
- int (*cb)(dev_info_t *dip, void *, uint_t), void *arg);
-
-/*
* Constants which define the "hole" in the 64-bit sfmmu address space.
* These are set to specific values by the CPU module code.
*/
diff --git a/usr/src/uts/sun4v/sys/machsystm.h b/usr/src/uts/sun4v/sys/machsystm.h
index 832a634d0f..d9c795ecf9 100644
--- a/usr/src/uts/sun4v/sys/machsystm.h
+++ b/usr/src/uts/sun4v/sys/machsystm.h
@@ -341,46 +341,6 @@ extern uint64_t cpu_pa[];
extern void ptl1_init_cpu(struct cpu *);
/*
- * Defines for DR interfaces
- */
-#define DEVI_BRANCH_CHILD 0x01 /* Walk immediate children of root */
-#define DEVI_BRANCH_CONFIGURE 0x02 /* Configure branch after create */
-#define DEVI_BRANCH_DESTROY 0x04 /* Destroy branch after unconfigure */
-#define DEVI_BRANCH_EVENT 0x08 /* Post NDI event */
-#define DEVI_BRANCH_PROM 0x10 /* Branches derived from PROM nodes */
-#define DEVI_BRANCH_SID 0x20 /* SID node branches */
-#define DEVI_BRANCH_ROOT 0x40 /* Node is the root of a branch */
-
-typedef struct devi_branch {
- void *arg;
- void (*devi_branch_callback)(dev_info_t *, void *, uint_t);
- int type;
- union {
- int (*prom_branch_select)(pnode_t, void *, uint_t);
- int (*sid_branch_create)(dev_info_t *, void *, uint_t);
- } create;
-} devi_branch_t;
-
-
-/*
- * Prototypes which really belongs to sunddi.c, and should be moved to
- * sunddi.c if there is another platform using these calls.
- */
-extern int e_ddi_branch_create(dev_info_t *pdip, devi_branch_t *bp,
- dev_info_t **dipp, uint_t flags);
-extern int e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp,
- uint_t flags);
-extern int e_ddi_branch_unconfigure(dev_info_t *rdip, dev_info_t **dipp,
- uint_t flags);
-extern int e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp,
- uint_t flags);
-extern void e_ddi_branch_hold(dev_info_t *rdip);
-extern void e_ddi_branch_rele(dev_info_t *rdip);
-extern int e_ddi_branch_held(dev_info_t *rdip);
-extern int e_ddi_branch_referenced(dev_info_t *rdip,
- int (*cb)(dev_info_t *dip, void *, uint_t), void *arg);
-
-/*
* Constants which define the "hole" in the 64-bit sfmmu address space.
* These are set to specific values by the CPU module code.
*/