summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGerry Liu <jiang.liu@intel.com>2010-03-25 15:36:50 -0700
committerGerry Liu <jiang.liu@intel.com>2010-03-25 15:36:50 -0700
commita31148363f598def767ac48c5d82e1572e44b935 (patch)
tree756cfb0c3c9a79f98087875bb4731e0366f895b7
parent19843f01b1bef3453f717c23c8f89fb9313f6749 (diff)
downloadillumos-gate-a31148363f598def767ac48c5d82e1572e44b935.tar.gz
PSARC/2009/104 Hot-Plug Support for ACPI-based Systems
PSARC/2009/550 PSMI Extensions for CPU Hotplug PSARC/2009/551 acpihpd ACPI Hotplug Daemon PSARC/2009/591 Attachment Points for Hotpluggable x86 Systems 6862510 provide support for cpu hot add on x86 6883891 cmi interface needs to support dynamic reconfiguration 6884154 x2APIC and kmdb may not function properly during CPU hotplug event. 6904971 low priority acpi nexus code review feedback 6877301 lgrp should support memory hotplug flag in SRAT table
-rw-r--r--usr/src/Makefile.lint2
-rw-r--r--usr/src/cmd/Makefile1
-rw-r--r--usr/src/cmd/acpihpd/Makefile62
-rw-r--r--usr/src/cmd/acpihpd/Makefile.com46
-rw-r--r--usr/src/cmd/acpihpd/Makefile.targ62
-rw-r--r--usr/src/cmd/acpihpd/acpihpd.c325
-rw-r--r--usr/src/cmd/acpihpd/acpihpd.xml93
-rw-r--r--usr/src/cmd/acpihpd/i386/Makefile37
-rw-r--r--usr/src/cmd/acpihpd/notify.c353
-rw-r--r--usr/src/cmd/acpihpd/svc-acpihpd39
-rw-r--r--usr/src/lib/cfgadm_plugins/Makefile4
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/Makefile.com5
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/amd64/Makefile40
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/common/ap.c21
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c13
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c106
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c29
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c52
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/common/cfga.c13
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/i386/Makefile39
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile9
-rw-r--r--usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile9
-rw-r--r--usr/src/pkg/license_files/cr_Intel.hotplug1
-rw-r--r--usr/src/pkg/manifests/system-header.mf1
-rw-r--r--usr/src/pkg/manifests/system-kernel-dynamic-reconfiguration-i86pc.mf83
-rw-r--r--usr/src/uts/common/os/cpu_event.c65
-rw-r--r--usr/src/uts/common/os/cpu_pm.c10
-rw-r--r--usr/src/uts/common/os/mem_config.c27
-rw-r--r--usr/src/uts/common/os/mem_config_stubs.c6
-rw-r--r--usr/src/uts/common/os/sunddi.c939
-rw-r--r--usr/src/uts/common/sys/esunddi.h43
-rw-r--r--usr/src/uts/common/sys/pci_impl.h3
-rw-r--r--usr/src/uts/common/sys/processor.h3
-rw-r--r--usr/src/uts/common/syscall/p_online.c24
-rw-r--r--usr/src/uts/common/vm/page.h4
-rw-r--r--usr/src/uts/common/vm/seg_kpm.h12
-rw-r--r--usr/src/uts/i86pc/Makefile.files11
-rw-r--r--usr/src/uts/i86pc/Makefile.i86pc.shared5
-rw-r--r--usr/src/uts/i86pc/Makefile.rules38
-rw-r--r--usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h4
-rw-r--r--usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c48
-rw-r--r--usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c53
-rw-r--r--usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c28
-rw-r--r--usr/src/uts/i86pc/dr/Makefile104
-rw-r--r--usr/src/uts/i86pc/drmach_acpi/Makefile101
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c53
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c224
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c40
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_dr.c2743
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c205
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c44
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_pci.c172
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c106
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c39
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c163
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c74
-rw-r--r--usr/src/uts/i86pc/io/acpi/acpinex/acpinex_event.c712
-rw-r--r--usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.c2927
-rw-r--r--usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.h199
-rw-r--r--usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c18
-rw-r--r--usr/src/uts/i86pc/io/cbe.c10
-rw-r--r--usr/src/uts/i86pc/io/dr/dr.c3329
-rw-r--r--usr/src/uts/i86pc/io/dr/dr.conf27
-rw-r--r--usr/src/uts/i86pc/io/dr/dr_cpu.c885
-rw-r--r--usr/src/uts/i86pc/io/dr/dr_io.c389
-rw-r--r--usr/src/uts/i86pc/io/dr/dr_mem_acpi.c639
-rw-r--r--usr/src/uts/i86pc/io/dr/dr_quiesce.c1050
-rw-r--r--usr/src/uts/i86pc/io/dr/dr_util.c457
-rw-r--r--usr/src/uts/i86pc/io/dr/sbdgenerr.pl90
-rw-r--r--usr/src/uts/i86pc/io/mp_platform_common.c180
-rw-r--r--usr/src/uts/i86pc/io/pcplusmp/apic.c495
-rw-r--r--usr/src/uts/i86pc/io/psm/uppc.c7
-rw-r--r--usr/src/uts/i86pc/ml/mpcore.s156
-rw-r--r--usr/src/uts/i86pc/ml/offsets.in2
-rw-r--r--usr/src/uts/i86pc/os/acpi_fw.h26
-rw-r--r--usr/src/uts/i86pc/os/cmi.c64
-rw-r--r--usr/src/uts/i86pc/os/cmi_hw.c66
-rw-r--r--usr/src/uts/i86pc/os/cms.c14
-rw-r--r--usr/src/uts/i86pc/os/cpr_impl.c19
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c22
-rw-r--r--usr/src/uts/i86pc/os/cpupm/cpu_acpi.c3
-rw-r--r--usr/src/uts/i86pc/os/cpupm/cpu_idle.c52
-rw-r--r--usr/src/uts/i86pc/os/cpupm/cpupm_intel.c4
-rw-r--r--usr/src/uts/i86pc/os/cpupm/cpupm_mach.c32
-rw-r--r--usr/src/uts/i86pc/os/fakebop.c162
-rw-r--r--usr/src/uts/i86pc/os/lgrpplat.c1034
-rw-r--r--usr/src/uts/i86pc/os/machdep.c44
-rw-r--r--usr/src/uts/i86pc/os/mem_config_arch.c56
-rw-r--r--usr/src/uts/i86pc/os/memnode.c4
-rw-r--r--usr/src/uts/i86pc/os/microcode.c10
-rw-r--r--usr/src/uts/i86pc/os/mlsetup.c66
-rw-r--r--usr/src/uts/i86pc/os/mp_implfuncs.c2
-rw-r--r--usr/src/uts/i86pc/os/mp_machdep.c98
-rw-r--r--usr/src/uts/i86pc/os/mp_pc.c513
-rw-r--r--usr/src/uts/i86pc/os/mp_startup.c578
-rw-r--r--usr/src/uts/i86pc/os/startup.c93
-rw-r--r--usr/src/uts/i86pc/os/x_call.c112
-rw-r--r--usr/src/uts/i86pc/sys/Makefile1
-rw-r--r--usr/src/uts/i86pc/sys/acpidev.h141
-rw-r--r--usr/src/uts/i86pc/sys/acpidev_dr.h252
-rw-r--r--usr/src/uts/i86pc/sys/acpidev_impl.h64
-rw-r--r--usr/src/uts/i86pc/sys/acpinex.h6
-rw-r--r--usr/src/uts/i86pc/sys/apic.h7
-rw-r--r--usr/src/uts/i86pc/sys/dr.h500
-rw-r--r--usr/src/uts/i86pc/sys/dr_util.h99
-rw-r--r--usr/src/uts/i86pc/sys/drmach.h177
-rw-r--r--usr/src/uts/i86pc/sys/fastboot_msg.h1
-rw-r--r--usr/src/uts/i86pc/sys/machcpuvar.h5
-rw-r--r--usr/src/uts/i86pc/sys/machsystm.h75
-rw-r--r--usr/src/uts/i86pc/sys/memnode.h5
-rw-r--r--usr/src/uts/i86pc/sys/psm_types.h54
-rw-r--r--usr/src/uts/i86pc/sys/rm_platter.h14
-rw-r--r--usr/src/uts/i86pc/sys/sbd_ioctl.h655
-rw-r--r--usr/src/uts/i86pc/sys/x_call.h2
-rw-r--r--usr/src/uts/i86pc/vm/hat_i86.c105
-rw-r--r--usr/src/uts/i86pc/vm/vm_dep.h35
-rw-r--r--usr/src/uts/i86pc/vm/vm_machdep.c288
-rw-r--r--usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c6
-rw-r--r--usr/src/uts/i86xpv/io/psm/xpv_psm.c5
-rw-r--r--usr/src/uts/i86xpv/io/psm/xpv_uppc.c5
-rw-r--r--usr/src/uts/intel/ia32/ml/modstubs.s11
-rw-r--r--usr/src/uts/intel/ia32/os/sundep.c18
-rw-r--r--usr/src/uts/intel/io/acpica/acpica.c4
-rw-r--r--usr/src/uts/intel/io/acpica/osl.c86
-rw-r--r--usr/src/uts/intel/sys/acpica.h6
-rw-r--r--usr/src/uts/intel/sys/bootconf.h13
-rw-r--r--usr/src/uts/intel/sys/cpu_module.h5
-rw-r--r--usr/src/uts/intel/sys/memlist_plat.h55
-rw-r--r--usr/src/uts/sun4/os/ddi_impl.c938
-rw-r--r--usr/src/uts/sun4u/sys/machsystm.h42
-rw-r--r--usr/src/uts/sun4v/sys/machsystm.h40
131 files changed, 22855 insertions, 2347 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint
index 90b76afe47..2182a61ff9 100644
--- a/usr/src/Makefile.lint
+++ b/usr/src/Makefile.lint
@@ -477,11 +477,13 @@ $(CLOSED_BUILD)COMMON_SUBDIRS += \
$(CLOSED)/lib/libc_i18n
i386_SUBDIRS= \
+ cmd/acpihpd \
cmd/biosdev \
cmd/rtc \
cmd/ucodeadm \
lib/brand/lx \
lib/cfgadm_plugins/sata \
+ lib/cfgadm_plugins/sbd \
lib/libfdisk
sparc_SUBDIRS= \
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index 2d0ce5e6d5..b493fa6ef1 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -476,6 +476,7 @@ ODE $(CLOSED)/cmd/tail \
$(CLOSED)/cmd/tr_xpg4
i386_SUBDIRS= \
+ acpihpd \
addbadsec \
biosdev \
diskscan \
diff --git a/usr/src/cmd/acpihpd/Makefile b/usr/src/cmd/acpihpd/Makefile
new file mode 100644
index 0000000000..33a26fb2be
--- /dev/null
+++ b/usr/src/cmd/acpihpd/Makefile
@@ -0,0 +1,62 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# cmd/acpihpd/Makefile
+#
+
+PLATFORM = i86pc
+MANIFEST = acpihpd.xml
+SVCMETHOD = svc-acpihpd
+
+include ../Makefile.cmd
+
+$(64ONLY)SUBDIRS= $(MACH)
+
+ROOTMANIFESTDIR = $(ROOTSVCPLATFORM)/${PLATFORM}
+
+all := TARGET = all
+install := TARGET = install
+clean := TARGET = clean
+clobber := TARGET = clobber
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all: $(SUBDIRS)
+
+install: $(SUBDIRS) $(ROOTMANIFEST) $(ROOTSVCMETHOD)
+
+clean clobber lint: $(SUBDIRS)
+
+check: $(CHKMANIFEST)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/acpihpd/Makefile.com b/usr/src/cmd/acpihpd/Makefile.com
new file mode 100644
index 0000000000..287a48d896
--- /dev/null
+++ b/usr/src/cmd/acpihpd/Makefile.com
@@ -0,0 +1,46 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# cmd/acpihpd/Makefile.com
+#
+
+LIBS = acpihpd
+PLATFORM = i86pc
+
+include ../../Makefile.cmd
+include ../../../Makefile.psm
+
+OBJS = acpihpd.o notify.o
+SRCS = $(OBJS:%.o=../%.c)
+LINTFILES = $(SRCS:%.c=%.ln)
+CLOBBERFILES = $(LIBS)
+
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -I$(USR_PSM_INCL_DIR)
+LDLIBS += -lsysevent -lnvpair -lcfgadm
+
+FILEMODE = 0755
diff --git a/usr/src/cmd/acpihpd/Makefile.targ b/usr/src/cmd/acpihpd/Makefile.targ
new file mode 100644
index 0000000000..89da583847
--- /dev/null
+++ b/usr/src/cmd/acpihpd/Makefile.targ
@@ -0,0 +1,62 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# cmd/acpihpd/Makefile.targ
+#
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+clean:
+ $(RM) $(OBJS)
+
+lint:
+ $(LINT.c) $(SRCS) $(LDLIBS)
+
+$(LIBS): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+%.o: ../%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+$(USR_PLAT_DIR):
+ -$(INS.dir)
+
+$(USR_PSM_DIR): $(USR_PLAT_DIR)
+ -$(INS.dir)
+
+$(USR_PSM_LIB_DIR): $(USR_PSM_DIR)
+ -$(INS.dir)
+
+$(USR_PSM_LIB_DIR)/%: % $(USR_PSM_LIB_DIR)
+ $(INS.file)
+
+include ../../Makefile.targ
+
diff --git a/usr/src/cmd/acpihpd/acpihpd.c b/usr/src/cmd/acpihpd/acpihpd.c
new file mode 100644
index 0000000000..6beee85f54
--- /dev/null
+++ b/usr/src/cmd/acpihpd/acpihpd.c
@@ -0,0 +1,325 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/dr.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <syslog.h>
+#include <string.h>
+#include <strings.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <config_admin.h>
+#include <libscf.h>
+#include <libsysevent.h>
+#include <stdarg.h>
+
+/* Signal handler type */
+typedef void (sig_handler_t)(int);
+
+#define ACPIHPD_PID_FILE "/var/run/acpihpd.pid" /* lock file path */
+
+/* Program Name */
+char *g_prog_name;
+int g_debuglevel = 0;
+
+static int s_pid_fd;
+static sysevent_handle_t *s_acpihpd_hdl;
+
+static int daemon_init(void);
+static void daemon_quit(int);
+static int set_sig_handler(int, sig_handler_t *);
+static int acpihpd_init(void);
+static void acpihpd_fini(void);
+static void acpihpd_event(sysevent_t *);
+extern void notify_hotplug(sysevent_t *ev);
+void debug_print(int, const char *, ...);
+
+int
+main(int argc, char *argv[])
+{
+ int c;
+
+ /* Get Program Name */
+ if ((g_prog_name = strrchr(argv[0], '/')) == NULL) {
+ g_prog_name = argv[0];
+ } else {
+ g_prog_name++;
+ }
+
+ while ((c = getopt(argc, argv, ":d:")) != -1) {
+ switch (c) {
+ case 'd':
+ g_debuglevel = atoi(optarg);
+ if ((g_debuglevel < 0) || (g_debuglevel > 2)) {
+ g_debuglevel = 0;
+ }
+ break;
+
+ case ':':
+ syslog(LOG_ERR,
+ "missed argument for option %c.", optopt);
+ break;
+
+ case '?':
+ syslog(LOG_ERR, "unrecognized option %c.", optopt);
+ break;
+ }
+ }
+
+ s_acpihpd_hdl = NULL;
+
+ /* Check the daemon running lock and initialize the signal */
+ if (daemon_init() != 0) {
+ debug_print(0, "%s could not startup!", g_prog_name);
+ exit(SMF_EXIT_ERR_FATAL);
+ }
+
+ /* Subscribe to the hotplug event */
+ if (acpihpd_init() != 0) {
+ debug_print(0, "%s could not startup!", g_prog_name);
+ daemon_quit(SMF_EXIT_ERR_FATAL);
+ }
+
+ debug_print(2, "daemon is running.");
+ /*CONSTCOND*/
+ while (1) {
+ (void) pause();
+ }
+
+ return (SMF_EXIT_OK);
+}
+
+static int
+daemon_init(void)
+{
+ int i, ret;
+ pid_t pid;
+ char pid_str[32];
+
+ if (geteuid() != 0) {
+ debug_print(0, "must be root to execute %s", g_prog_name);
+ return (1);
+ }
+
+ if ((pid = fork()) < 0) {
+ return (1);
+ }
+
+ if (pid > 0) {
+ /* Parent to exit. */
+ exit(SMF_EXIT_OK);
+ }
+
+ (void) setsid();
+ (void) chdir("/");
+ (void) umask(0);
+ (void) closefrom(0);
+ (void) open("/dev/null", O_RDONLY);
+ (void) open("/dev/null", O_WRONLY);
+ (void) dup(1);
+ (void) openlog(g_prog_name, LOG_PID, LOG_DAEMON);
+
+ /*
+ * Create the lock file for singleton
+ */
+ if ((s_pid_fd = open(ACPIHPD_PID_FILE, O_RDWR | O_CREAT, 0644)) < 0) {
+ debug_print(0, "could not create pid file: %s",
+ strerror(errno));
+ return (1);
+ }
+
+ if (lockf(s_pid_fd, F_TLOCK, 0L) < 0) {
+ if (errno == EACCES || errno == EAGAIN) {
+ debug_print(0, "another acpihpd is already running");
+ } else {
+ debug_print(0, "could not lock pid file");
+ }
+
+ return (1);
+ }
+
+ (void) ftruncate(s_pid_fd, 0);
+ i = sprintf(pid_str, "%ld", (long)getpid());
+ while ((ret = write(s_pid_fd, pid_str, i)) != i) {
+ if (errno == EINTR) {
+ continue;
+ }
+ if (ret < 0) {
+ debug_print(0, "pid file write failed: %s",
+ strerror(errno));
+ return (1);
+ }
+ }
+
+ if (set_sig_handler(SIGTERM, (sig_handler_t *)daemon_quit) != 0) {
+ debug_print(2, "could not set signal handler(SIGTERM)");
+ return (1);
+ }
+
+ if (set_sig_handler(SIGQUIT, (sig_handler_t *)daemon_quit) != 0) {
+ debug_print(2, "could not set signal handler(SIGQUIT)");
+ return (1);
+ }
+
+ if (set_sig_handler(SIGINT, (sig_handler_t *)daemon_quit) != 0) {
+ debug_print(2, "could not set signal handler(SIGINT)");
+ return (1);
+ }
+
+ if (set_sig_handler(SIGCHLD, SIG_IGN) != 0) {
+ debug_print(2, "could not set signal handler(SIGCHLD)");
+ return (1);
+ }
+
+ return (0);
+}
+
+static void
+daemon_quit(int signo)
+{
+ int status = 0;
+ id_t pgid;
+
+ debug_print(1, "daemon quit [signal#:%d].", signo);
+
+ acpihpd_fini();
+ (void) set_sig_handler(SIGTERM, SIG_IGN);
+ pgid = getpgrp();
+ (void) kill(-pgid, SIGTERM);
+ (void) close(s_pid_fd);
+ (void) unlink(ACPIHPD_PID_FILE);
+
+ if (signo < 0) {
+ status = signo;
+ }
+ _exit(status);
+}
+
+static int
+set_sig_handler(int sig, sig_handler_t *handler)
+{
+ struct sigaction act;
+
+ act.sa_handler = handler;
+ act.sa_flags = 0;
+ if (sig == SIGCHLD && handler == SIG_IGN) {
+ act.sa_flags |= SA_NOCLDWAIT;
+ }
+
+ (void) sigemptyset(&act.sa_mask);
+ if (sigaction(sig, &act, NULL) < 0) {
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+acpihpd_init(void)
+{
+ const char *subclass = ESC_DR_REQ;
+
+ debug_print(2, "acpihpd_init");
+
+ if ((s_acpihpd_hdl = sysevent_bind_handle(acpihpd_event)) == NULL) {
+ debug_print(2, "could not bind to sysevent.");
+ return (-1);
+ }
+
+ if (sysevent_subscribe_event(s_acpihpd_hdl, EC_DR, &subclass, 1) != 0) {
+ debug_print(2, "could not subscribe an event.");
+ sysevent_unbind_handle(s_acpihpd_hdl);
+ s_acpihpd_hdl = NULL;
+ return (-1);
+ }
+
+ return (0);
+}
+
+static void
+acpihpd_fini(void)
+{
+ debug_print(2, "acpihpd_fini");
+
+ if (s_acpihpd_hdl != NULL) {
+ sysevent_unsubscribe_event(s_acpihpd_hdl, EC_DR);
+ sysevent_unbind_handle(s_acpihpd_hdl);
+ }
+}
+
+static void
+acpihpd_event(sysevent_t *ev)
+{
+ debug_print(2, "*** got an event ***");
+
+ /* Inform cfgadm of the hot-plug event. */
+ notify_hotplug(ev);
+}
+
+void
+debug_print(int level, const char *fmt, ...)
+{
+ va_list ap;
+ int pri, pr_out = 0;
+
+ if (level <= g_debuglevel) {
+ switch (level) {
+ case 0:
+ pri = LOG_ERR;
+ pr_out = 1;
+ break;
+
+ case 1:
+ pri = LOG_NOTICE;
+ pr_out = 1;
+ break;
+
+ case 2:
+ pri = LOG_DEBUG;
+ pr_out = 1;
+ break;
+ }
+
+ if (pr_out) {
+ va_start(ap, fmt);
+ vsyslog(pri, fmt, ap);
+ va_end(ap);
+ }
+ }
+}
diff --git a/usr/src/cmd/acpihpd/acpihpd.xml b/usr/src/cmd/acpihpd/acpihpd.xml
new file mode 100644
index 0000000000..e8e799de08
--- /dev/null
+++ b/usr/src/cmd/acpihpd/acpihpd.xml
@@ -0,0 +1,93 @@
+<?xml version="1.0"?>
+<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
+<!--
+
+ Copyright (c) 2010, Intel Corporation.
+ All rights reserved.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ Note:
+ NOTE: This service manifest is not editable; its contents will
+ be overwritten by package or patch operations, including
+ operating system upgrade. Make customizations in a different
+ file.
+-->
+
+<service_bundle type='manifest' name='SUNWacpihpd:acpihpd'>
+
+<service
+ name='platform/i86pc/acpihpd'
+ type='service'
+ version='1'>
+
+ <create_default_instance enabled='true' />
+
+ <single_instance/>
+
+ <!--
+ acpihpd should be started after syseventd is running.
+ -->
+ <dependency
+ name='startup_req'
+ grouping='require_all'
+ restart_on='none'
+ type='service'>
+ <service_fmri value='svc:/system/sysevent' />
+ </dependency>
+
+ <exec_method
+ type='method'
+ name='start'
+ exec='/lib/svc/method/svc-acpihpd'
+ timeout_seconds='60' >
+ <method_context>
+ <method_credential user='root' group='root'
+ privileges='basic,!proc_info,sys_config,sys_mount,sys_devices' />
+ </method_context>
+ </exec_method>
+
+ <exec_method
+ type='method'
+ name='stop'
+ exec=':kill'
+ timeout_seconds='60' >
+ <method_context>
+ <method_credential user='root' group='root' />
+ </method_context>
+ </exec_method>
+
+ <stability value='Unstable' />
+
+ <template>
+ <common_name>
+ <loctext xml:lang='C'>
+ Intel ACPI hot-plug daemon
+ </loctext>
+ </common_name>
+ <documentation>
+ <manpage title='acpihpd' section='1M'
+ manpath='/usr/share/man' />
+ </documentation>
+ </template>
+</service>
+
+</service_bundle>
+
diff --git a/usr/src/cmd/acpihpd/i386/Makefile b/usr/src/cmd/acpihpd/i386/Makefile
new file mode 100644
index 0000000000..4356b38b82
--- /dev/null
+++ b/usr/src/cmd/acpihpd/i386/Makefile
@@ -0,0 +1,37 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# cmd/acpihpd/i386/Makefile
+#
+
+include ../Makefile.com
+
+.KEEP_STATE:
+
+install: all $(USR_PSM_LIBS)
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/acpihpd/notify.c b/usr/src/cmd/acpihpd/notify.c
new file mode 100644
index 0000000000..138b1a233d
--- /dev/null
+++ b/usr/src/cmd/acpihpd/notify.c
@@ -0,0 +1,353 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <config_admin.h>
+#include <strings.h>
+#include <syslog.h>
+#include <libsysevent.h>
+#include <libdevinfo.h>
+#include <libnvpair.h>
+#include <assert.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/dr.h>
+#include <sys/sbd_ioctl.h>
+#include <sys/acpidev.h>
+
+#define PMCONFIG_PATH "/usr/sbin/pmconfig"
+
+#define CFGADM_CMD_ASSIGN "assign"
+#define CFGADM_CMD_POWERON "poweron"
+#define CFGADM_CMD_PASSTHRU "passthru"
+
+#define STATUS_INPROGRESS 0
+#define STATUS_SUCCESS 1
+#define STATUS_FAILURE 2
+#define STATUS_NOOP 3
+
+static char *s_status_array[] = {
+ ACPIDEV_CMD_OST_INPROGRESS,
+ ACPIDEV_CMD_OST_SUCCESS,
+ ACPIDEV_CMD_OST_FAILURE,
+ ACPIDEV_CMD_OST_NOOP
+};
+
+extern void debug_print(int, const char *, ...);
+
+/*ARGSUSED*/
+static int
+confirm_no(void *appdata_ptr, const char *message)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+message_output(void *appdata_ptr, const char *message)
+{
+ debug_print(2, "cfgadm message: %s", message);
+ return (CFGA_OK);
+}
+
+static char *
+plat_opt_str_alloc(int cmd, char *acpi_event_type, int status)
+{
+ char *opt;
+ size_t len;
+
+ if (cmd == SBD_CMD_PASSTHRU) {
+ len = strlen(s_status_array[status]) +
+ strlen(ACPIDEV_EVENT_TYPE_ATTR_NAME) +
+ strlen(acpi_event_type) + 10;
+ if ((opt = malloc(len)) != NULL) {
+ (void) snprintf(opt, len, "%s %s=%s",
+ s_status_array[status],
+ ACPIDEV_EVENT_TYPE_ATTR_NAME,
+ acpi_event_type);
+ debug_print(2, "plat_opt_str_alloc = '%s'", opt);
+ }
+ } else {
+ len = strlen("platform=''") +
+ strlen(s_status_array[status]) +
+ strlen(ACPIDEV_EVENT_TYPE_ATTR_NAME) +
+ strlen(acpi_event_type) + 10;
+ if ((opt = malloc(len)) != NULL) {
+ (void) snprintf(opt, len, "platform='%s %s=%s'",
+ s_status_array[status],
+ ACPIDEV_EVENT_TYPE_ATTR_NAME,
+ acpi_event_type);
+ debug_print(2, "plat_opt_str_alloc = '%s'", opt);
+ }
+ }
+
+ return (opt);
+}
+
+static int
+cfgadm_cmd_wrapper(int cmd, int apid_num, char **apids,
+ char *acpi_event_type, int status,
+ struct cfga_confirm *confirm, struct cfga_msg *message)
+{
+ cfga_err_t ret;
+ char *plat_opts;
+ char *estrp = NULL;
+
+ assert(apid_num == 1);
+ assert(apids != NULL);
+
+ plat_opts = plat_opt_str_alloc(cmd, acpi_event_type, status);
+ if (plat_opts == NULL) {
+ debug_print(0,
+ "failed to generate platform option string for cfgadm");
+ return (-1);
+ }
+
+ switch (cmd) {
+ case SBD_CMD_CONNECT:
+ ret = config_change_state(CFGA_CMD_CONNECT, apid_num, apids,
+ plat_opts, confirm, message, &estrp, 0);
+ if (ret != CFGA_OK) {
+ debug_print(0, "cfgadm('connect', '%s') failed, "
+ "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+ }
+ break;
+
+ case SBD_CMD_CONFIGURE:
+ ret = config_change_state(CFGA_CMD_CONFIGURE, apid_num, apids,
+ plat_opts, confirm, message, &estrp, 0);
+ if (ret != CFGA_OK) {
+ debug_print(0, "cfgadm('configure', '%s') failed, "
+ "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+ }
+ break;
+
+ case SBD_CMD_ASSIGN:
+ ret = config_private_func(CFGADM_CMD_ASSIGN, apid_num, apids,
+ plat_opts, confirm, message, &estrp, 0);
+ if (ret != CFGA_OK) {
+ debug_print(0, "cfgadm('assign', '%s') failed, "
+ "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+ }
+ break;
+
+ case SBD_CMD_POWERON:
+ ret = config_private_func(CFGADM_CMD_POWERON, apid_num, apids,
+ plat_opts, confirm, message, &estrp, 0);
+ if (ret != CFGA_OK) {
+ debug_print(0, "cfgadm('poweron', '%s') failed, "
+ "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+ }
+ break;
+
+ case SBD_CMD_PASSTHRU:
+ ret = config_private_func(CFGADM_CMD_PASSTHRU, apid_num, apids,
+ plat_opts, confirm, message, &estrp, 0);
+ if (ret != CFGA_OK) {
+ debug_print(0, "cfgadm('passthru', '%s') failed, "
+ "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+ }
+ break;
+
+ default:
+ debug_print(2, "unknown command (%d) to cfgadm_cmd_wrapper()");
+ ret = CFGA_ERROR;
+ break;
+ }
+
+ if (plat_opts != NULL)
+ free(plat_opts);
+
+ return (ret == CFGA_OK ? 0 : -1);
+}
+
+static int
+event_process(char *ap_id, char *req, char *acpi_event_type)
+{
+ char *apids[1];
+ struct cfga_msg message;
+ struct cfga_confirm confirm;
+
+ if (strcmp(req, DR_REQ_INCOMING_RES) != 0) {
+ debug_print(2,
+ "Event is not supported (ap_id = %s, req = %s)",
+ ap_id, req);
+ return (-1);
+ }
+
+ apids[0] = ap_id;
+ (void) memset(&confirm, 0, sizeof (confirm));
+ confirm.confirm = confirm_no;
+ (void) memset(&message, 0, sizeof (message));
+ message.message_routine = message_output;
+
+ if (cfgadm_cmd_wrapper(SBD_CMD_ASSIGN, 1, apids,
+ acpi_event_type, STATUS_NOOP, &confirm, &message) != 0) {
+ goto L_ERR;
+ }
+ syslog(LOG_NOTICE,
+ "board '%s' has been assigned successfully", ap_id);
+ (void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+ acpi_event_type, STATUS_INPROGRESS, &confirm, &message);
+
+ if (cfgadm_cmd_wrapper(SBD_CMD_POWERON, 1, apids,
+ acpi_event_type, STATUS_NOOP, &confirm, &message) != 0) {
+ goto L_ERR_SIG;
+ }
+ syslog(LOG_NOTICE,
+ "board '%s' has been powered on successfully", ap_id);
+ (void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+ acpi_event_type, STATUS_INPROGRESS, &confirm, &message);
+
+ if (cfgadm_cmd_wrapper(SBD_CMD_CONNECT, 1, apids,
+ acpi_event_type, STATUS_INPROGRESS, &confirm, &message) != 0) {
+ goto L_ERR_SIG;
+ }
+ syslog(LOG_NOTICE,
+ "board '%s' has been connected successfully", ap_id);
+ (void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+ acpi_event_type, STATUS_INPROGRESS, &confirm, &message);
+
+ if (cfgadm_cmd_wrapper(SBD_CMD_CONFIGURE, 1, apids,
+ acpi_event_type, STATUS_INPROGRESS, &confirm, &message) != 0) {
+ goto L_ERR_SIG;
+ }
+ syslog(LOG_NOTICE,
+ "board '%s' has been configured successfully", ap_id);
+ (void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+ acpi_event_type, STATUS_SUCCESS, &confirm, &message);
+
+ (void) system(PMCONFIG_PATH);
+ syslog(LOG_NOTICE,
+ "board '%s' has been added into system successfully", ap_id);
+
+ return (0);
+
+L_ERR_SIG:
+ (void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+ acpi_event_type, STATUS_FAILURE, &confirm, &message);
+L_ERR:
+ syslog(LOG_ERR, "failed to add board '%s' into system", ap_id);
+
+ return (-1);
+}
+
+void
+notify_hotplug(sysevent_t *ev)
+{
+ char *vendor = NULL;
+ nvlist_t *attr_list = NULL;
+ char *class, *subclass;
+ char *ap_id, *req, *acpi_event_type;
+
+ vendor = sysevent_get_vendor_name(ev);
+ debug_print(2, "message_vendor = '%s'", vendor ? vendor : "unknown");
+ if (vendor == NULL || strcmp(vendor, SUNW_VENDOR) != 0) {
+ debug_print(2,
+ "vendor id of message is not '%s'", SUNW_VENDOR);
+ goto L_EXIT;
+ }
+
+ class = sysevent_get_class_name(ev);
+ debug_print(2, "message_class = '%s'", class ? class : "unknown");
+ if (class == NULL || strcmp(class, EC_DR) != 0) {
+ debug_print(2, "class of message is not '%s'", EC_DR);
+ goto L_EXIT;
+ }
+
+ subclass = sysevent_get_subclass_name(ev);
+ debug_print(2,
+ "message_subclass = '%s'", subclass ? subclass : "unknown");
+ if (subclass == NULL || strcmp(subclass, ESC_DR_REQ) != 0) {
+ debug_print(2,
+ "subclass of message is not '%s'", ESC_DR_REQ);
+ goto L_EXIT;
+ }
+
+ if (sysevent_get_attr_list(ev, &attr_list) != 0) {
+ debug_print(2,
+ "can't retrieve attribute list from DR message");
+ goto L_EXIT;
+ }
+
+ if (nvlist_lookup_string(attr_list, DR_AP_ID, &ap_id) != 0) {
+ debug_print(2,
+ "can't retrieve '%s' property from attribute list",
+ DR_AP_ID);
+ goto L_EXIT;
+ }
+ debug_print(2, "%s = '%s'", DR_AP_ID, ap_id ? ap_id : "<null>");
+ if ((ap_id == NULL) || (strlen(ap_id) == 0)) {
+ debug_print(2, "'%s' property in message is NULL", DR_AP_ID);
+ goto L_EXIT;
+ }
+
+ if (nvlist_lookup_string(attr_list, DR_REQ_TYPE, &req) != 0) {
+ debug_print(2,
+ "can't retrieve '%s' property from attribute list",
+ DR_REQ_TYPE);
+ goto L_EXIT;
+ }
+ debug_print(2, "%s = '%s'", DR_REQ_TYPE, req ? req : "<null>");
+ if ((req == NULL) || (strlen(req) == 0)) {
+ debug_print(2, "'%s' property in message is NULL", DR_REQ_TYPE);
+ goto L_EXIT;
+ }
+
+ if (nvlist_lookup_string(attr_list, ACPIDEV_EVENT_TYPE_ATTR_NAME,
+ &acpi_event_type) != 0) {
+ debug_print(2,
+ "can't retrieve '%s' property from attribute list",
+ ACPIDEV_EVENT_TYPE_ATTR_NAME);
+ goto L_EXIT;
+ }
+ debug_print(2, "%s = '%s'", ACPIDEV_EVENT_TYPE_ATTR_NAME,
+ acpi_event_type ? acpi_event_type : "<null>");
+ if ((acpi_event_type == NULL) || (strlen(acpi_event_type) == 0)) {
+ debug_print(2, "'%s' property in message is NULL",
+ ACPIDEV_EVENT_TYPE_ATTR_NAME);
+ goto L_EXIT;
+ }
+
+ (void) event_process(ap_id, req, acpi_event_type);
+
+L_EXIT:
+ if (vendor != NULL) {
+ free(vendor);
+ }
+
+ if (attr_list != NULL) {
+ nvlist_free(attr_list);
+ }
+
+ /* No need to free class & subclass. */
+}
diff --git a/usr/src/cmd/acpihpd/svc-acpihpd b/usr/src/cmd/acpihpd/svc-acpihpd
new file mode 100644
index 0000000000..f45f05ed06
--- /dev/null
+++ b/usr/src/cmd/acpihpd/svc-acpihpd
@@ -0,0 +1,39 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+
+. /lib/svc/share/smf_include.sh
+
+PLATFORM="i86pc"
+ACPIHPD=/usr/platform/${PLATFORM}/lib/acpihpd
+
+/usr/sbin/prtconf -D /devices/fw/acpidr@0 > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+ svcadm disable -t $SMF_FMRI
+ sleep 5&
+ exit $SMF_EXIT_OK
+fi
+
+$ACPIHPD && exit $SMF_EXIT_OK || exit $SMF_EXIT_ERR_FATAL
diff --git a/usr/src/lib/cfgadm_plugins/Makefile b/usr/src/lib/cfgadm_plugins/Makefile
index bb37297b11..9fc029e2ec 100644
--- a/usr/src/lib/cfgadm_plugins/Makefile
+++ b/usr/src/lib/cfgadm_plugins/Makefile
@@ -27,8 +27,8 @@
include $(SRC)/Makefile.master
-COMMON_SUBDIRS= scsi sdcard pci usb ib fp shp
-sparc_SUBDIRS= sbd ac sysctrl
+COMMON_SUBDIRS= scsi sdcard pci usb ib fp shp sbd
+sparc_SUBDIRS= ac sysctrl
i386_SUBDIRS= sata
diff --git a/usr/src/lib/cfgadm_plugins/sbd/Makefile.com b/usr/src/lib/cfgadm_plugins/sbd/Makefile.com
index fb3046ba42..e28db75c53 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/Makefile.com
+++ b/usr/src/lib/cfgadm_plugins/sbd/Makefile.com
@@ -23,7 +23,6 @@
# Use is subject to license terms.
#
-PLATFORM= sun4u
LIBRARY= sbd.a
VERS= .1
@@ -33,7 +32,7 @@ OBJECTS= ap.o ap_msg.o ap_rcm.o ap_sbd.o ap_seq.o cfga.o ap_err.o
include ../../../Makefile.lib
USR_PLAT_DIR = $(ROOT)/usr/platform
-USR_PSM_DIR = $(USR_PLAT_DIR)/sun4u
+USR_PSM_DIR = $(USR_PLAT_DIR)/$(PLATFORM)
USR_PSM_LIB_DIR = $(USR_PSM_DIR)/lib
USR_PSM_LIB_CFG_DIR = $(USR_PSM_LIB_DIR)/cfgadm
USR_PSM_LIB_CFG_DIR_64 = $(USR_PSM_LIB_CFG_DIR)/$(MACH64)
@@ -54,7 +53,7 @@ CPPFLAGS += -I$(ROOT)/usr/platform/$(PLATFORM)/include -DSBD_DEBUG
#
GENERRDIR= $(SRC)/lib/cfgadm_plugins/sbd
GENERR= $(GENERRDIR)/sbdgenerr
-ERRSRC= $(ROOT)/usr/platform/sun4u/include/sys/sbd_ioctl.h
+ERRSRC= $(ROOT)/usr/platform/$(PLATFORM)/include/sys/sbd_ioctl.h
.KEEP_STATE:
diff --git a/usr/src/lib/cfgadm_plugins/sbd/amd64/Makefile b/usr/src/lib/cfgadm_plugins/sbd/amd64/Makefile
new file mode 100644
index 0000000000..9fd69cde68
--- /dev/null
+++ b/usr/src/lib/cfgadm_plugins/sbd/amd64/Makefile
@@ -0,0 +1,40 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+
+PLATFORM = i86pc
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+CFLAGS += -D_POSIX_PTHREAD_SEMANTICS
+LINTFLAGS += -D_POSIX_PTHREAD_SEMANTICS
+LDLIBS += -lscf
+
+.KEEP_STATE:
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap.c
index c7c3e96f69..502d192014 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
@@ -40,6 +37,7 @@
#include <config_admin.h>
#include "ap.h"
+/*ARGSUSED0*/
int
ap_symid(apd_t *a, char *apid, char *symid, size_t bufsize)
{
@@ -78,13 +76,13 @@ ap_symid(apd_t *a, char *apid, char *symid, size_t bufsize)
cp--; /* Get the '/' */
if (strcmp(cp, apid) == 0) {
- snprintf(symid, bufsize, "%s", dp->d_name);
+ (void) snprintf(symid, bufsize, "%s", dp->d_name);
rc = 0;
break;
}
}
- closedir(dirp);
+ (void) closedir(dirp);
return (rc);
}
@@ -293,7 +291,7 @@ done:
DBG("path=<%s> ", a->path ? a->path : "");
DBG("drv=<%s> inst=%d minor=<%s> ",
- a->drv ? a->drv : "", a->inst, a->minor ? a->minor : "");
+ a->drv ? a->drv : "", a->inst, a->minor ? a->minor : "");
DBG("target=<%s>\n", a->target ? a->target : "");
DBG("cid=<%s> ", a->cid ? a->cid : "");
DBG("cname=<%s> ", a->cname ? a->cname : "");
@@ -612,7 +610,7 @@ ap_opt_parse(apd_t *a, ap_cmd_t *acp, const char *options)
* during sequencing.
*/
for (p = strtok(value, ":"); p != NULL;
- p = strtok(NULL, ":")) {
+ p = strtok(NULL, ":")) {
if ((i = ap_cmd(p)) == CMD_NONE) {
ap_err(a, ERR_CMD_INVAL, p);
return (-1);
@@ -711,8 +709,7 @@ ap_cmd_parse(apd_t *a, const char *f, const char *options, int *cmd)
target = mask(tgt);
DBG("cmd=%s(%d) tmask=0x%x cmask=0x%x omask=0x%x\n",
- ap_cmd_name(c), c, target,
- acp->cmask, acp->omask[tgt]);
+ ap_cmd_name(c), c, target, acp->cmask, acp->omask[tgt]);
if ((acp->cmask & target) == 0)
ap_err(a, ERR_CMD_NOTSUPP, c);
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c
index d2049bdc1d..1005dab325 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -63,8 +61,7 @@ debugging(void)
if ((debug_fp = fopen(ep, "a")) == NULL)
return (0);
}
- (void) fprintf(debug_fp,
- "\nDebug started, pid=%d\n", (int)getpid());
+ (void) fprintf(debug_fp, "\nDebug started, pid=%d\n", (int)getpid());
return (1);
}
@@ -110,7 +107,7 @@ ap_err_fmts[] = {
NULL
};
-#define ap_err_fmt(i) ap_err_fmts[min((i), ERR_NONE)]
+#define ap_err_fmt(i) ap_err_fmts[min((uint_t)(i), ERR_NONE)]
static char *
ap_msg_fmts[] = {
@@ -125,7 +122,7 @@ ap_msg_fmts[] = {
NULL
};
-#define ap_msg_fmt(i) ap_msg_fmts[min((i), MSG_NONE)]
+#define ap_msg_fmt(i) ap_msg_fmts[min((uint_t)(i), MSG_NONE)]
#define STR_BD "board"
#define STR_SEP ": "
@@ -366,7 +363,7 @@ ap_err(apd_t *a, ...)
switch (err) {
case ERR_CMD_FAIL:
(void) snprintf(p, len, fmt, cmd, target,
- syserr, rsep, sysrsrc);
+ syserr, rsep, sysrsrc);
break;
case ERR_CMD_ABORT:
case ERR_CMD_NACK:
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c
index f917bb5ae4..770784f16f 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -145,6 +143,8 @@ ap_rcm_ops[] = {
#ifdef __sparcv9
#define RCMLIB "/lib/sparcv9/librcm.so";
+#elif defined(__amd64)
+#define RCMLIB "/lib/amd64/librcm.so";
#else
#define RCMLIB "/lib/librcm.so";
#endif
@@ -323,22 +323,21 @@ ap_rcm_init(apd_t *a)
switch (i) {
case ALLOC_HANDLE:
rcm->alloc_handle = (int(*)
- (char *, uint_t, void *, rcm_handle_t **))sym;
+ (char *, uint_t, void *, rcm_handle_t **))sym;
break;
case FREE_HANDLE:
rcm->free_handle = (void (*)(rcm_handle_t *))sym;
break;
case GET_INFO:
rcm->get_info = (int (*)
- (rcm_handle_t *, char *, uint_t,
- rcm_info_t **))sym;
+ (rcm_handle_t *, char *, uint_t, rcm_info_t **))sym;
break;
case FREE_INFO:
rcm->free_info = (void (*)(rcm_info_t *))sym;
break;
case INFO_TUPLE_NEXT:
rcm->info_next = (rcm_info_tuple_t *(*)
- (rcm_info_t *, rcm_info_tuple_t *))sym;
+ (rcm_info_t *, rcm_info_tuple_t *))sym;
break;
case INFO_TUPLE_STATE:
rcm->info_state = (int (*)(rcm_info_tuple_t *))sym;
@@ -348,50 +347,50 @@ ap_rcm_init(apd_t *a)
break;
case INFO_TUPLE_ERROR:
rcm->info_error = (const char *(*)
- (rcm_info_tuple_t *))sym;
+ (rcm_info_tuple_t *))sym;
break;
case INFO_TUPLE_INFO:
rcm->info_info = (const char *(*)
- (rcm_info_tuple_t *))sym;
+ (rcm_info_tuple_t *))sym;
break;
case INFO_TUPLE_RSRC:
rcm->info_rsrc = (const char *(*)
- (rcm_info_tuple_t *))sym;
+ (rcm_info_tuple_t *))sym;
break;
case REQUEST_OFFLINE:
rcm->request_offline_list = (int (*)
- (rcm_handle_t *, char **, uint_t,
- rcm_info_t **))sym;
+ (rcm_handle_t *, char **, uint_t,
+ rcm_info_t **))sym;
break;
case NOTIFY_ONLINE:
rcm->notify_online_list = (int (*)
- (rcm_handle_t *, char **, uint_t,
- rcm_info_t **))sym;
+ (rcm_handle_t *, char **, uint_t,
+ rcm_info_t **))sym;
break;
case REQUEST_SUSPEND:
rcm->request_suspend = (int (*)
- (rcm_handle_t *, char *, uint_t,
- timespec_t *, rcm_info_t **))sym;
+ (rcm_handle_t *, char *, uint_t,
+ timespec_t *, rcm_info_t **))sym;
break;
case NOTIFY_RESUME:
rcm->notify_resume = (int (*)
- (rcm_handle_t *, char *, uint_t,
- rcm_info_t **))sym;
+ (rcm_handle_t *, char *, uint_t,
+ rcm_info_t **))sym;
break;
case NOTIFY_REMOVE:
rcm->notify_remove_list = (int (*)
- (rcm_handle_t *, char **, uint_t,
- rcm_info_t **))sym;
+ (rcm_handle_t *, char **, uint_t,
+ rcm_info_t **))sym;
break;
case REQUEST_CAP_CHANGE:
rcm->request_capacity_change = (int (*)
- (rcm_handle_t *, char *, uint_t,
- nvlist_t *, rcm_info_t **))sym;
+ (rcm_handle_t *, char *, uint_t,
+ nvlist_t *, rcm_info_t **))sym;
break;
case NOTIFY_CAP_CHANGE:
rcm->notify_capacity_change = (int (*)
- (rcm_handle_t *, char *, uint_t,
- nvlist_t *, rcm_info_t **))sym;
+ (rcm_handle_t *, char *, uint_t,
+ nvlist_t *, rcm_info_t **))sym;
break;
default:
break;
@@ -400,7 +399,7 @@ ap_rcm_init(apd_t *a)
if (rcm->alloc_handle == NULL ||
(*rcm->alloc_handle)(NULL, RCM_NOPID, NULL, &rcm->hd)
- != RCM_SUCCESS) {
+ != RCM_SUCCESS) {
ap_err(a, ERR_RCM_HANDLE);
return (CFGA_LIB_ERROR);
}
@@ -441,7 +440,7 @@ ap_rcm_init(apd_t *a)
rcm->ncpus = ncpu;
if ((rcm->cpuids = (cpuid_t *)calloc(ncpu, sizeof (cpuid_t)))
- == NULL) {
+ == NULL) {
ap_err(a, ERR_NOMEM);
return (CFGA_LIB_ERROR);
}
@@ -781,20 +780,21 @@ ap_rcm_cap_cpu(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd, uint_t flags,
nvlist_add_int32(nvl, "old_total", oldncpuids) != 0 ||
nvlist_add_int32(nvl, "new_total", newncpuids) != 0 ||
nvlist_add_int32_array(nvl, "old_cpu_list", oldcpuids,
- oldncpuids) != 0 ||
+ oldncpuids) != 0 ||
nvlist_add_int32_array(nvl, "new_cpu_list", newcpuids,
- newncpuids) != 0)
+ newncpuids) != 0)
goto done;
- snprintf(buf, sizeof (buf), fmt, change);
+ (void) snprintf(buf, sizeof (buf), fmt, change);
ap_msg(a, MSG_ISSUE, cmd, buf);
- if (cmd == CMD_RCM_CAP_DEL)
+ if (cmd == CMD_RCM_CAP_DEL) {
rv = (*rcm->request_capacity_change)(hd, "SUNW_cpu",
- flags, nvl, rinfo);
- else
+ flags, nvl, rinfo);
+ } else {
rv = (*rcm->notify_capacity_change)(hd, "SUNW_cpu",
- flags & ~RCM_FORCE, nvl, rinfo);
+ flags & ~RCM_FORCE, nvl, rinfo);
+ }
done:
if (nvl)
@@ -865,12 +865,13 @@ ap_rcm_cap_mem(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd, uint_t flags,
(void) snprintf(buf, sizeof (buf), "(%ld pages)", change);
ap_msg(a, MSG_ISSUE, cmd, buf);
- if (cmd == CMD_RCM_CAP_DEL)
+ if (cmd == CMD_RCM_CAP_DEL) {
rv = (*rcm->request_capacity_change)(hd, "SUNW_memory",
- flags, nvl, rinfo);
- else
+ flags, nvl, rinfo);
+ } else {
rv = (*rcm->notify_capacity_change)(hd, "SUNW_memory",
- flags & ~RCM_FORCE, nvl, rinfo);
+ flags & ~RCM_FORCE, nvl, rinfo);
+ }
nvlist_free(nvl);
@@ -923,11 +924,13 @@ ap_rcm_request_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
}
if (ncpus && ((*rv = ap_rcm_cap_cpu(a, rcm, hd, flags, rinfo,
- CMD_RCM_CAP_DEL, ncpus)) != RCM_SUCCESS))
+ CMD_RCM_CAP_DEL, ncpus)) != RCM_SUCCESS)) {
return (CFGA_LIB_ERROR);
+ }
if (npages && ((*rv = ap_rcm_cap_mem(a, rcm, hd, flags, rinfo,
- CMD_RCM_CAP_DEL, npages)) != RCM_SUCCESS))
+ CMD_RCM_CAP_DEL, npages)) != RCM_SUCCESS)) {
return (CFGA_LIB_ERROR);
+ }
return (CFGA_OK);
}
@@ -945,8 +948,8 @@ ap_rcm_add_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
DBG("ap_rcm_add_cap(%p)\n", (void *)a);
/* Get the new capacity info to figure out what has changed */
- if ((rc = ap_capinfo(a, rcm->firstcm, rcm->lastcm, &capinfo))
- != CFGA_OK)
+ if ((rc = ap_capinfo(a, rcm->firstcm, rcm->lastcm, &capinfo)) !=
+ CFGA_OK)
return (rc);
if (capinfo == NULL) {
@@ -981,7 +984,7 @@ ap_rcm_add_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
type = ap_cm_type(a, cm);
DBG("cm=%d valid=%d type=%d, prevos=%d os=%d\n",
- cm, prevvalidity, type, prevos, os);
+ cm, prevvalidity, type, prevos, os);
/*
* We are interested only in those components
@@ -1008,11 +1011,13 @@ ap_rcm_add_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
free(capinfo);
if (ncpus && ((*rv = ap_rcm_cap_cpu(a, rcm, hd, flags, rinfo,
- CMD_RCM_CAP_ADD, ncpus)) != RCM_SUCCESS))
+ CMD_RCM_CAP_ADD, ncpus)) != RCM_SUCCESS)) {
return (CFGA_LIB_ERROR);
+ }
if (npages && ((*rv = ap_rcm_cap_mem(a, rcm, hd, flags, rinfo,
- CMD_RCM_CAP_ADD, npages)) != RCM_SUCCESS))
+ CMD_RCM_CAP_ADD, npages)) != RCM_SUCCESS)) {
return (CFGA_LIB_ERROR);
+ }
return (CFGA_OK);
}
@@ -1045,8 +1050,8 @@ ap_rcm_notify_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
DBG("ap_rcm_notify_cap(%p)\n", (void *)a);
/* Get the new capacity info to figure out what has changed */
- if ((rc = ap_capinfo(a, rcm->firstcm, rcm->lastcm, &capinfo))
- != CFGA_OK)
+ if ((rc = ap_capinfo(a, rcm->firstcm, rcm->lastcm, &capinfo)) !=
+ CFGA_OK)
return (rc);
if (capinfo == NULL) {
@@ -1183,8 +1188,7 @@ ap_rcm_ctl(apd_t *a, int cmd)
if (rcm->capinfo == NULL)
noop++;
else
- rc = ap_rcm_request_cap(a, rcm, hd, &rv,
- flags, &rinfo);
+ rc = ap_rcm_request_cap(a, rcm, hd, &rv, flags, &rinfo);
break;
case CMD_RCM_CAP_ADD:
rc = ap_rcm_add_cap(a, rcm, hd, &rv, flags, &rinfo);
@@ -1266,13 +1270,13 @@ ap_rcm_ctl(apd_t *a, int cmd)
ap_msg(a, MSG_ISSUE, cmd, rlist[nrsrc]);
if (cmd == CMD_RCM_OFFLINE)
rv = (*rcm->request_offline_list)(hd, rlist, flags,
- &rinfo);
+ &rinfo);
else if (cmd == CMD_RCM_ONLINE)
rv = (*rcm->notify_online_list)(hd, rlist,
- flags & ~RCM_FORCE, &rinfo);
+ flags & ~RCM_FORCE, &rinfo);
else
rv = (*rcm->notify_remove_list)(hd, rlist,
- flags & ~RCM_FORCE, &rinfo);
+ flags & ~RCM_FORCE, &rinfo);
break;
}
case CMD_RCM_SUSPEND: {
@@ -1440,7 +1444,7 @@ ap_rcm_info(apd_t *a, char **msg)
if ((infostr = (*rcm->info_info)(tuple)) != NULL) {
(void) strcat(*msg, "\n");
(void) sprintf(&((*msg)[strlen(*msg)]), format,
- (*rcm->info_rsrc)(tuple), infostr);
+ (*rcm->info_rsrc)(tuple), infostr);
}
}
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c
index 70c3fe7dee..7c3b34b1f0 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -175,11 +172,11 @@ ap_stat(apd_t *a, int all)
*/
ctl->ic_type = SBD_COMP_UNKNOWN;
ctl->ic_unit = a->cnum;
- strcpy(ctl->ic_name, a->cname);
+ (void) strcpy(ctl->ic_name, a->cname);
}
DBG("ioctl(%d SBD_CMD_STATUS, sc=0x%p sz=%d flags=%d",
- fd, (void *)sc->s_statp, sc->s_nbytes, ctl->i_flags);
+ fd, (void *)sc->s_statp, sc->s_nbytes, ctl->i_flags);
if (select)
DBG(" cname=<%s> cnum=%d", a->cname, a->cnum);
DBG(")\n");
@@ -282,7 +279,7 @@ apd_init(apd_t *a, int all)
for (dst = st->s_stat, i = 0; i < st->s_nstat; i++, dst++) {
DBG("ds_name,ds_unit,ds_type=<%s,%d,%d> ",
- dst->ds_name, dst->ds_unit, dst->ds_type);
+ dst->ds_name, dst->ds_unit, dst->ds_type);
if (dst->ds_unit != a->cnum)
continue;
@@ -331,7 +328,7 @@ apd_free(apd_t *a)
ap_rcm_fini(a);
if (a->fd != -1)
- close(a->fd);
+ (void) close(a->fd);
s_free(a->options);
s_free(a->path);
@@ -405,7 +402,7 @@ ap_init(apd_t *a, cfga_list_data_t *ap)
st = (sbd_stat_t *)a->stat;
DBG("ap_init bd=%d rs=%d os=%d type=<%s>\n",
- a->bnum, st->s_rstate, st->s_ostate, st->s_type);
+ a->bnum, st->s_rstate, st->s_ostate, st->s_type);
parsable_strncpy(ap->ap_type, st->s_type, sizeof (ap->ap_type));
ap->ap_r_state = (cfga_stat_t)st->s_rstate;
@@ -555,7 +552,7 @@ ap_ioctl(apd_t *a, int cmd)
else {
ctl->ic_type = SBD_COMP_UNKNOWN;
ctl->ic_unit = a->cnum;
- strcpy(ctl->ic_name, a->cname);
+ (void) strcpy(ctl->ic_name, a->cname);
}
if (!(ioc = ap_ioc(cmd))) {
@@ -638,7 +635,7 @@ mod_estr(int code)
if (i == sbd_etab_len) {
char buf[32];
- snprintf(buf, sizeof (buf), "error %d", code);
+ (void) snprintf(buf, sizeof (buf), "error %d", code);
s = strdup(buf);
}
@@ -760,7 +757,7 @@ ap_help(struct cfga_msg *msgp, const char *options, cfga_flags_t flags)
continue;
if ((q = (char *)calloc(len + 1, 1)) == NULL)
continue;
- strcpy(q, *p);
+ (void) strcpy(q, *p);
(*msgp->message_routine)(msgp->appdata_ptr, q);
free(q);
}
@@ -823,7 +820,7 @@ ap_cm_devpath(apd_t *a, int seq)
* Assume an io component so that we can get
* the path if the component is indeed of type io.
*/
- if (seq == -1)
+ if (seq == CM_DFLT)
dst = (sbd_io_stat_t *)a->cmstat;
else {
sbd_stat_t *st;
@@ -1002,7 +999,7 @@ ap_cm_init(apd_t *a, cfga_list_data_t *ap, int seq)
a->cmstat = (void *)dst;
DBG("ap_cm_init bd=%d rs=%d os=%d type=<%s> seq=%d\n",
- a->bnum, st->s_rstate, dst->ds_ostate, type, seq);
+ a->bnum, st->s_rstate, dst->ds_ostate, type, seq);
(void) strncpy(ap->ap_type, type, sizeof (ap->ap_type));
ap->ap_r_state = (cfga_stat_t)st->s_rstate;
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c
index 929dc0251c..0536b838f4 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -24,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
@@ -40,6 +37,12 @@
#include <config_admin.h>
#include "ap.h"
+#ifdef __x86
+#include <libscf_priv.h>
+
+static int fastreboot_disabled;
+#endif /* __x86 */
+
static cfga_err_t
ap_suspend_check(apd_t *a, int cmd, int first, int last, int *suspend)
{
@@ -202,7 +205,7 @@ ap_seq_get(apd_t *a, int cmd, int *first, int *last)
*last = l;
DBG("ap_seq(%d, %d, %d, %p, %p) = (%d, %d)\n",
- rs, os, cmd, (void *)first, (void *)last, f, l);
+ rs, os, cmd, (void *)first, (void *)last, f, l);
return (AP_SEQ_OK);
}
@@ -268,7 +271,7 @@ ap_seq_exec(apd_t *a, int cmd, int first, int last)
* interpose on the suspend operation.
*/
rc = ap_suspend_check(a, cmd,
- first + 1, last, &suspend);
+ first + 1, last, &suspend);
break;
case CMD_RCM_SUSPEND:
if (suspend && ((rc = ap_rcm_ctl(a, c)) == CFGA_OK)) {
@@ -297,6 +300,41 @@ ap_seq_exec(apd_t *a, int cmd, int first, int last)
case CMD_RCM_CAP_NOTIFY:
(void) ap_rcm_ctl(a, c);
break;
+
+#ifdef __x86
+ /*
+ * Disable fast reboot if a CPU/MEM/IOH hotplug event happens.
+ * Note: this is a temporary solution and will be revised when
+ * fast reboot can support CPU/MEM/IOH DR operations in the
+ * future.
+ *
+ * ACPI BIOS generates some static ACPI tables, such as MADT,
+ * SRAT and SLIT, to describe the system hardware configuration
+ * on power-on. When a CPU/MEM/IOH hotplug event happens, those
+ * static tables won't be updated and will become stale.
+ *
+ * If we reset the system by fast reboot, BIOS will have no
+ * chance to regenerate those staled static tables. Fast reboot
+ * can't tolerate such inconsistency between staled ACPI tables
+ * and real hardware configuration yet.
+ *
+ * A temporary solution is introduced to disable fast reboot if
+ * CPU/MEM/IOH hotplug event happens. This solution should be
+ * revised when fast reboot is enhanced to support CPU/MEM/IOH
+ * DR operations.
+ */
+ case CMD_ASSIGN:
+ case CMD_POWERON:
+ case CMD_POWEROFF:
+ case CMD_UNASSIGN:
+ if (!fastreboot_disabled &&
+ scf_fastreboot_default_set_transient(B_FALSE) ==
+ SCF_SUCCESS) {
+ fastreboot_disabled = 1;
+ }
+ /* FALLTHROUGH */
+#endif /* __x86 */
+
default:
rc = ap_ioctl(a, c);
break;
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/cfga.c b/usr/src/lib/cfgadm_plugins/sbd/common/cfga.c
index a891746634..c03350e19b 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/cfga.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/cfga.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -24,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
@@ -243,14 +240,14 @@ cfga_list_ext(
ap_cm_id(a, i, dyn, sizeof (dyn));
(void) snprintf(ap->ap_log_id, szl, "%s::%s",
- a->target, dyn);
+ a->target, dyn);
(void) snprintf(ap->ap_phys_id, szp, "%s::%s",
- a->path, dyn);
+ a->path, dyn);
ap_cm_init(a, ap, i);
DBG("ap_phys_id=%s ap_log_id=%s\n",
- ap->ap_phys_id, ap->ap_log_id);
+ ap->ap_phys_id, ap->ap_log_id);
}
} else
diff --git a/usr/src/lib/cfgadm_plugins/sbd/i386/Makefile b/usr/src/lib/cfgadm_plugins/sbd/i386/Makefile
new file mode 100644
index 0000000000..7a382d291f
--- /dev/null
+++ b/usr/src/lib/cfgadm_plugins/sbd/i386/Makefile
@@ -0,0 +1,39 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+
+PLATFORM = i86pc
+
+include ../Makefile.com
+
+CFLAGS += -D_POSIX_PTHREAD_SEMANTICS
+LINTFLAGS += -D_POSIX_PTHREAD_SEMANTICS
+LDLIBS += -lscf
+
+.KEEP_STATE:
+
+install: all $(ROOTLIBS) $(ROOTLINKS)
diff --git a/usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile b/usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile
index dcaf7b8438..39bd47a725 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile
+++ b/usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -23,10 +22,10 @@
# Copyright (c) 2000 by Sun Microsystems, Inc.
# All rights reserved.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# lib/sbd/sparc/Makefile
+PLATFORM = sun4u
+
include ../Makefile.com
.KEEP_STATE:
diff --git a/usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile b/usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile
index 4ac3f6310c..1d9c67c4e1 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile
+++ b/usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -23,10 +22,10 @@
# Copyright (c) 2000 by Sun Microsystems, Inc.
# All rights reserved.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# lib/sbd/sparcv9/Makefile
+PLATFORM = sun4u
+
include ../Makefile.com
include ../../../Makefile.lib.64
diff --git a/usr/src/pkg/license_files/cr_Intel.hotplug b/usr/src/pkg/license_files/cr_Intel.hotplug
new file mode 100644
index 0000000000..bf0648aabc
--- /dev/null
+++ b/usr/src/pkg/license_files/cr_Intel.hotplug
@@ -0,0 +1 @@
+ * Copyright(c) 2009 - 2010 Intel Corporation. All rights reserved.
diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf
index 113817866b..b95dd46026 100644
--- a/usr/src/pkg/manifests/system-header.mf
+++ b/usr/src/pkg/manifests/system-header.mf
@@ -1660,6 +1660,7 @@ $(i386_ONLY)file path=usr/platform/i86pc/include/sys/psm_defs.h
$(i386_ONLY)file path=usr/platform/i86pc/include/sys/psm_modctl.h
$(i386_ONLY)file path=usr/platform/i86pc/include/sys/psm_types.h
$(i386_ONLY)file path=usr/platform/i86pc/include/sys/rm_platter.h
+$(i386_ONLY)file path=usr/platform/i86pc/include/sys/sbd_ioctl.h
$(i386_ONLY)file path=usr/platform/i86pc/include/sys/smp_impldefs.h
$(i386_ONLY)file path=usr/platform/i86pc/include/sys/vm_machparam.h
$(i386_ONLY)file path=usr/platform/i86pc/include/sys/x_call.h
diff --git a/usr/src/pkg/manifests/system-kernel-dynamic-reconfiguration-i86pc.mf b/usr/src/pkg/manifests/system-kernel-dynamic-reconfiguration-i86pc.mf
new file mode 100644
index 0000000000..28ddc833bb
--- /dev/null
+++ b/usr/src/pkg/manifests/system-kernel-dynamic-reconfiguration-i86pc.mf
@@ -0,0 +1,83 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# This package will install successfully into any zone, global or
+# non-global. The files, directories, links, and hardlinks, however,
+# will only be installed into the global zone.
+#
+<include hollow_zone_pkg>
+set name=pkg.fmri \
+ value=pkg:/system/kernel/dynamic-reconfiguration/i86pc@$(PKGVERS)
+set name=pkg.description \
+ value="Dynamic Reconfiguration Modules for i86pc required to support \
+ hotplug on i86pc based systems. Includes the ACPI Hotplug Daemon, SMF \
+ manifests/methods and drivers for hotplug operations."
+set name=pkg.summary value="Dynamic Reconfiguration Modules for i86pc"
+set name=info.classification \
+ value="org.opensolaris.category.2008:System/Administration and Configuration" \
+ value="org.opensolaris.category.2008:System/Enterprise Management"
+set name=variant.arch value=i386
+set name=variant.opensolaris.zone value=global value=nonglobal
+dir path=platform group=sys
+dir path=platform/i86pc group=sys
+dir path=platform/i86pc/kernel group=sys
+dir path=platform/i86pc/kernel/drv group=sys
+dir path=platform/i86pc/kernel/drv/$(ARCH64) group=sys
+dir path=platform/i86pc/kernel/misc group=sys
+dir path=platform/i86pc/kernel/misc/$(ARCH64) group=sys
+file path=platform/i86pc/kernel/misc/$(ARCH64)/drmach_acpi group=sys \
+ mode=0755
+dir path=usr group=sys
+dir path=usr/platform group=sys
+dir path=usr/platform/i86pc group=sys
+dir path=usr/platform/i86pc/lib group=sys
+dir path=var group=sys
+dir path=var/svc group=sys
+dir path=var/svc/manifest group=sys
+dir path=var/svc/manifest/platform group=sys
+dir path=var/svc/manifest/platform/i86pc group=sys
+file path=platform/i86pc/kernel/drv/$(ARCH64)/dr group=sys
+driver name=dr alias=acpidr_sbd
+file path=platform/i86pc/kernel/drv/dr.conf group=sys mode=0644
+file path=usr/platform/i86pc/lib/acpihpd group=sys mode=0755
+file path=var/svc/manifest/platform/i86pc/acpihpd.xml group=sys mode=0444
+dir path=lib group=sys
+dir path=lib/svc group=sys
+dir path=lib/svc/method group=sys
+file path=lib/svc/method/svc-acpihpd group=sys mode=0555
+dir path=usr/platform/i86pc/lib/cfgadm
+dir path=usr/platform/i86pc/lib/cfgadm/$(ARCH64)
+file path=usr/platform/i86pc/lib/cfgadm/sbd.so.1
+link path=usr/platform/i86pc/lib/cfgadm/sbd.so \
+ target=./sbd.so.1
+file path=usr/platform/i86pc/lib/cfgadm/$(ARCH64)/sbd.so.1
+link path=usr/platform/i86pc/lib/cfgadm/$(ARCH64)/sbd.so \
+ target=./sbd.so.1
+license cr_Intel.hotplug license=cr_Intel
+license cr_Sun license=cr_Sun
+license lic_CDDL license=lic_CDDL
+depend fmri=pkg:/system/kernel/power type=require
diff --git a/usr/src/uts/common/os/cpu_event.c b/usr/src/uts/common/os/cpu_event.c
index 11162ccfc9..b3ed323f92 100644
--- a/usr/src/uts/common/os/cpu_event.c
+++ b/usr/src/uts/common/os/cpu_event.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -152,8 +152,13 @@ typedef struct cpu_idle_cb_item {
/* Per-CPU state aligned to CPU_CACHE_COHERENCE_SIZE to avoid false sharing. */
typedef union cpu_idle_cb_state {
struct {
+ /* Index of already invoked callbacks. */
int index;
+ /* Invoke registered callbacks if true. */
+ boolean_t enabled;
+ /* Property values are valid if true. */
boolean_t ready;
+ /* Pointers to per-CPU properties. */
cpu_idle_prop_value_t *idle_state;
cpu_idle_prop_value_t *enter_ts;
cpu_idle_prop_value_t *exit_ts;
@@ -182,6 +187,17 @@ static int cpu_idle_cb_max = 0;
static cpu_idle_cb_state_t *cpu_idle_cb_state;
+#ifdef __x86
+/*
+ * cpuset used to intercept CPUs before powering them off.
+ * The control CPU sets the bit corresponding to the target CPU and waits
+ * until the bit is cleared.
+ * The target CPU disables interrupts before clearing corresponding bit and
+ * then loops for ever.
+ */
+static cpuset_t cpu_idle_intercept_set;
+#endif
+
static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum,
cpu_idle_prop_value_t *valp);
@@ -326,17 +342,26 @@ cpu_event_init(void)
#endif
}
+/*
+ * This function is called to initialize per CPU state when starting CPUs.
+ */
void
cpu_event_init_cpu(cpu_t *cp)
{
ASSERT(cp->cpu_seqid < max_ncpus);
+ cpu_idle_cb_state[cp->cpu_seqid].v.index = 0;
cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE;
+ cpu_idle_cb_state[cp->cpu_seqid].v.enabled = B_TRUE;
}
+/*
+ * This function is called to clean up per CPU state when stopping CPUs.
+ */
void
cpu_event_fini_cpu(cpu_t *cp)
{
ASSERT(cp->cpu_seqid < max_ncpus);
+ cpu_idle_cb_state[cp->cpu_seqid].v.enabled = B_FALSE;
cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE;
}
@@ -618,6 +643,21 @@ cpu_idle_enter(int state, int flag,
ASSERT(CPU->cpu_seqid < max_ncpus);
sp = &cpu_idle_cb_state[CPU->cpu_seqid];
ASSERT(sp->v.index == 0);
+ if (sp->v.enabled == B_FALSE) {
+#if defined(__x86)
+ /* Intercept CPU at a safe point before powering off it. */
+ if (CPU_IN_SET(cpu_idle_intercept_set, CPU->cpu_id)) {
+ iflags = intr_clear();
+ CPUSET_ATOMIC_DEL(cpu_idle_intercept_set, CPU->cpu_id);
+ /*CONSTCOND*/
+ while (1) {
+ SMT_PAUSE();
+ }
+ }
+#endif
+
+ return (0);
+ }
/*
* On x86, cpu_idle_enter can be called from idle thread with either
@@ -1091,3 +1131,26 @@ cpu_idle_get_cpu_state(cpu_t *cp)
cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle,
CPU_IDLE_GET_CTX(cp)));
}
+
+#if defined(__x86)
+/*
+ * Intercept CPU at a safe point in idle() before powering it off.
+ */
+void
+cpu_idle_intercept_cpu(cpu_t *cp)
+{
+ ASSERT(cp->cpu_seqid < max_ncpus);
+ ASSERT(cpu_idle_cb_state[cp->cpu_seqid].v.enabled == B_FALSE);
+
+ /* Set flag to intercept CPU. */
+ CPUSET_ATOMIC_ADD(cpu_idle_intercept_set, cp->cpu_id);
+ /* Wake up CPU from possible sleep state. */
+ poke_cpu(cp->cpu_id);
+ while (CPU_IN_SET(cpu_idle_intercept_set, cp->cpu_id)) {
+ DELAY(1);
+ }
+ /*
+ * Now target CPU is spinning in a pause loop with interrupts disabled.
+ */
+}
+#endif
diff --git a/usr/src/uts/common/os/cpu_pm.c b/usr/src/uts/common/os/cpu_pm.c
index 324e4168d7..b885e421b8 100644
--- a/usr/src/uts/common/os/cpu_pm.c
+++ b/usr/src/uts/common/os/cpu_pm.c
@@ -677,9 +677,13 @@ cpupm_redefine_max_activepwr_state(struct cpu *cp, int max_perf_level)
cpupm_state_t *new_state = NULL;
did = cpupm_domain_id(cp, type);
- mutex_enter(&cpu_lock);
- dom = cpupm_domain_find(did, type);
- mutex_exit(&cpu_lock);
+ if (MUTEX_HELD(&cpu_lock)) {
+ dom = cpupm_domain_find(did, type);
+ } else {
+ mutex_enter(&cpu_lock);
+ dom = cpupm_domain_find(did, type);
+ mutex_exit(&cpu_lock);
+ }
/*
* Can use a lock to avoid changing the power state of the cpu when
diff --git a/usr/src/uts/common/os/mem_config.c b/usr/src/uts/common/os/mem_config.c
index be67866592..20a89b0611 100644
--- a/usr/src/uts/common/os/mem_config.c
+++ b/usr/src/uts/common/os/mem_config.c
@@ -59,9 +59,6 @@
extern struct memlist *phys_avail;
-extern void mem_node_add(pfn_t, pfn_t);
-extern void mem_node_del(pfn_t, pfn_t);
-
extern uint_t page_ctrs_adjust(int);
void page_ctrs_cleanup(void);
static void kphysm_setup_post_add(pgcnt_t);
@@ -107,6 +104,13 @@ static void memseg_cpu_vm_flush(void);
int meta_alloc_enable;
+#ifdef DEBUG
+static int memseg_debug;
+#define MEMSEG_DEBUG(args...) if (memseg_debug) printf(args)
+#else
+#define MEMSEG_DEBUG(...)
+#endif
+
/*
* Add a chunk of memory to the system.
* base: starting PAGESIZE page of new memory.
@@ -117,13 +121,6 @@ int meta_alloc_enable;
* dynamically most likely means more hash misses, since the tables will
* be smaller than they otherwise would be.
*/
-#ifdef DEBUG
-static int memseg_debug;
-#define MEMSEG_DEBUG(args...) if (memseg_debug) printf(args)
-#else
-#define MEMSEG_DEBUG(...)
-#endif
-
int
kphysm_add_memory_dynamic(pfn_t base, pgcnt_t npgs)
{
@@ -341,6 +338,9 @@ mapalloc:
* this may change with COD or in larger SSM systems with
* nested latency groups, so we must not assume that the
* node does not yet exist.
+ *
+ * Note that there may be multiple memory nodes associated with
+ * a single lgrp node on x86 systems.
*/
pnum = pt_base + tpgs - 1;
mem_node_add_range(pt_base, pnum);
@@ -569,8 +569,8 @@ mapalloc:
(uint64_t)(tpgs) << PAGESHIFT);
delspan_unreserve(pt_base, tpgs);
- return (KPHYSM_OK); /* Successfully added system memory */
+ return (KPHYSM_OK); /* Successfully added system memory */
}
/*
@@ -2566,7 +2566,7 @@ remap_to_dummy(caddr_t va, pgcnt_t metapgs)
{
int phase;
- ASSERT(IS_P2ALIGNED((uint64_t)va, PAGESIZE));
+ ASSERT(IS_P2ALIGNED((uint64_t)(uintptr_t)va, PAGESIZE));
/*
* We may start remapping at a non-zero page offset
@@ -2574,7 +2574,8 @@ remap_to_dummy(caddr_t va, pgcnt_t metapgs)
* of the outgoing pp's could be shared by other
* memsegs (see memseg_remap_meta).
*/
- phase = btop((uint64_t)va) % pp_dummy_npages;
+ phase = btop((uint64_t)(uintptr_t)va) % pp_dummy_npages;
+ /*CONSTCOND*/
ASSERT(PAGESIZE % sizeof (page_t) || phase == 0);
while (metapgs != 0) {
diff --git a/usr/src/uts/common/os/mem_config_stubs.c b/usr/src/uts/common/os/mem_config_stubs.c
index d3226ff633..1a265173f4 100644
--- a/usr/src/uts/common/os/mem_config_stubs.c
+++ b/usr/src/uts/common/os/mem_config_stubs.c
@@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/cmn_err.h>
#include <sys/errno.h>
@@ -44,6 +42,7 @@ pgcnt_t kcage_desfree;
pgcnt_t kcage_needfree;
pgcnt_t kcage_lotsfree = 1;
+#if !defined(__x86) || defined(__xpv)
/*ARGSUSED*/
int
kphysm_setup_func_register(kphysm_setup_vector_t *vec, void *arg)
@@ -56,6 +55,7 @@ void
kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, void *arg)
{
}
+#endif
/*ARGSUSED*/
int
diff --git a/usr/src/uts/common/os/sunddi.c b/usr/src/uts/common/os/sunddi.c
index 91e2b72dc3..33cc970aea 100644
--- a/usr/src/uts/common/os/sunddi.c
+++ b/usr/src/uts/common/os/sunddi.c
@@ -85,6 +85,10 @@
#include <sys/zone.h>
#include <sys/clock_impl.h>
#include <sys/ddi.h>
+#include <sys/modhash.h>
+#include <sys/sunldi_impl.h>
+#include <sys/fs/dv_node.h>
+#include <sys/fs/snode.h>
extern pri_t minclsyspri;
@@ -9369,3 +9373,938 @@ ddi_cb_unregister(ddi_cb_handle_t hdl)
return (DDI_SUCCESS);
}
+
+/*
+ * Platform independent DR routines
+ */
+
+static int
+ndi2errno(int n)
+{
+ int err = 0;
+
+ switch (n) {
+ case NDI_NOMEM:
+ err = ENOMEM;
+ break;
+ case NDI_BUSY:
+ err = EBUSY;
+ break;
+ case NDI_FAULT:
+ err = EFAULT;
+ break;
+ case NDI_FAILURE:
+ err = EIO;
+ break;
+ case NDI_SUCCESS:
+ break;
+ case NDI_BADHANDLE:
+ default:
+ err = EINVAL;
+ break;
+ }
+ return (err);
+}
+
+/*
+ * Prom tree node list
+ */
+struct ptnode {
+ pnode_t nodeid;
+ struct ptnode *next;
+};
+
+/*
+ * Prom tree walk arg
+ */
+struct pta {
+ dev_info_t *pdip;
+ devi_branch_t *bp;
+ uint_t flags;
+ dev_info_t *fdip;
+ struct ptnode *head;
+};
+
+static void
+visit_node(pnode_t nodeid, struct pta *ap)
+{
+ struct ptnode **nextp;
+ int (*select)(pnode_t, void *, uint_t);
+
+ ASSERT(nodeid != OBP_NONODE && nodeid != OBP_BADNODE);
+
+ select = ap->bp->create.prom_branch_select;
+
+ ASSERT(select);
+
+ if (select(nodeid, ap->bp->arg, 0) == DDI_SUCCESS) {
+
+ for (nextp = &ap->head; *nextp; nextp = &(*nextp)->next)
+ ;
+
+ *nextp = kmem_zalloc(sizeof (struct ptnode), KM_SLEEP);
+
+ (*nextp)->nodeid = nodeid;
+ }
+
+ if ((ap->flags & DEVI_BRANCH_CHILD) == DEVI_BRANCH_CHILD)
+ return;
+
+ nodeid = prom_childnode(nodeid);
+ while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
+ visit_node(nodeid, ap);
+ nodeid = prom_nextnode(nodeid);
+ }
+}
+
+/*
+ * NOTE: The caller of this function must check for device contracts
+ * or LDI callbacks against this dip before setting the dip offline.
+ */
+static int
+set_infant_dip_offline(dev_info_t *dip, void *arg)
+{
+ char *path = (char *)arg;
+
+ ASSERT(dip);
+ ASSERT(arg);
+
+ if (i_ddi_node_state(dip) >= DS_ATTACHED) {
+ (void) ddi_pathname(dip, path);
+ cmn_err(CE_WARN, "Attempt to set offline flag on attached "
+ "node: %s", path);
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&(DEVI(dip)->devi_lock));
+ if (!DEVI_IS_DEVICE_OFFLINE(dip))
+ DEVI_SET_DEVICE_OFFLINE(dip);
+ mutex_exit(&(DEVI(dip)->devi_lock));
+
+ return (DDI_SUCCESS);
+}
+
+typedef struct result {
+ char *path;
+ int result;
+} result_t;
+
+static int
+dip_set_offline(dev_info_t *dip, void *arg)
+{
+ int end;
+ result_t *resp = (result_t *)arg;
+
+ ASSERT(dip);
+ ASSERT(resp);
+
+ /*
+ * We stop the walk if e_ddi_offline_notify() returns
+ * failure, because this implies that one or more consumers
+ * (either LDI or contract based) has blocked the offline.
+ * So there is no point in conitnuing the walk
+ */
+ if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
+ resp->result = DDI_FAILURE;
+ return (DDI_WALK_TERMINATE);
+ }
+
+ /*
+ * If set_infant_dip_offline() returns failure, it implies
+ * that we failed to set a particular dip offline. This
+ * does not imply that the offline as a whole should fail.
+ * We want to do the best we can, so we continue the walk.
+ */
+ if (set_infant_dip_offline(dip, resp->path) == DDI_SUCCESS)
+ end = DDI_SUCCESS;
+ else
+ end = DDI_FAILURE;
+
+ e_ddi_offline_finalize(dip, end);
+
+ return (DDI_WALK_CONTINUE);
+}
+
+/*
+ * The call to e_ddi_offline_notify() exists for the
+ * unlikely error case that a branch we are trying to
+ * create already exists and has device contracts or LDI
+ * event callbacks against it.
+ *
+ * We allow create to succeed for such branches only if
+ * no constraints block the offline.
+ */
+static int
+branch_set_offline(dev_info_t *dip, char *path)
+{
+ int circ;
+ int end;
+ result_t res;
+
+
+ if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
+ return (DDI_FAILURE);
+ }
+
+ if (set_infant_dip_offline(dip, path) == DDI_SUCCESS)
+ end = DDI_SUCCESS;
+ else
+ end = DDI_FAILURE;
+
+ e_ddi_offline_finalize(dip, end);
+
+ if (end == DDI_FAILURE)
+ return (DDI_FAILURE);
+
+ res.result = DDI_SUCCESS;
+ res.path = path;
+
+ ndi_devi_enter(dip, &circ);
+ ddi_walk_devs(ddi_get_child(dip), dip_set_offline, &res);
+ ndi_devi_exit(dip, circ);
+
+ return (res.result);
+}
+
+/*ARGSUSED*/
+static int
+create_prom_branch(void *arg, int has_changed)
+{
+ int circ;
+ int exists, rv;
+ pnode_t nodeid;
+ struct ptnode *tnp;
+ dev_info_t *dip;
+ struct pta *ap = arg;
+ devi_branch_t *bp;
+ char *path;
+
+ ASSERT(ap);
+ ASSERT(ap->fdip == NULL);
+ ASSERT(ap->pdip && ndi_dev_is_prom_node(ap->pdip));
+
+ bp = ap->bp;
+
+ nodeid = ddi_get_nodeid(ap->pdip);
+ if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE) {
+ cmn_err(CE_WARN, "create_prom_branch: invalid "
+ "nodeid: 0x%x", nodeid);
+ return (EINVAL);
+ }
+
+ ap->head = NULL;
+
+ nodeid = prom_childnode(nodeid);
+ while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
+ visit_node(nodeid, ap);
+ nodeid = prom_nextnode(nodeid);
+ }
+
+ if (ap->head == NULL)
+ return (ENODEV);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ rv = 0;
+ while ((tnp = ap->head) != NULL) {
+ ap->head = tnp->next;
+
+ ndi_devi_enter(ap->pdip, &circ);
+
+ /*
+ * Check if the branch already exists.
+ */
+ exists = 0;
+ dip = e_ddi_nodeid_to_dip(tnp->nodeid);
+ if (dip != NULL) {
+ exists = 1;
+
+ /* Parent is held busy, so release hold */
+ ndi_rele_devi(dip);
+#ifdef DEBUG
+ cmn_err(CE_WARN, "create_prom_branch: dip(%p) exists"
+ " for nodeid 0x%x", (void *)dip, tnp->nodeid);
+#endif
+ } else {
+ dip = i_ddi_create_branch(ap->pdip, tnp->nodeid);
+ }
+
+ kmem_free(tnp, sizeof (struct ptnode));
+
+ /*
+ * Hold the branch if it is not already held
+ */
+ if (dip && !exists) {
+ e_ddi_branch_hold(dip);
+ }
+
+ ASSERT(dip == NULL || e_ddi_branch_held(dip));
+
+ /*
+ * Set all dips in the newly created branch offline so that
+ * only a "configure" operation can attach
+ * the branch
+ */
+ if (dip == NULL || branch_set_offline(dip, path)
+ == DDI_FAILURE) {
+ ndi_devi_exit(ap->pdip, circ);
+ rv = EIO;
+ continue;
+ }
+
+ ASSERT(ddi_get_parent(dip) == ap->pdip);
+
+ ndi_devi_exit(ap->pdip, circ);
+
+ if (ap->flags & DEVI_BRANCH_CONFIGURE) {
+ int error = e_ddi_branch_configure(dip, &ap->fdip, 0);
+ if (error && rv == 0)
+ rv = error;
+ }
+
+ /*
+ * Invoke devi_branch_callback() (if it exists) only for
+ * newly created branches
+ */
+ if (bp->devi_branch_callback && !exists)
+ bp->devi_branch_callback(dip, bp->arg, 0);
+ }
+
+ kmem_free(path, MAXPATHLEN);
+
+ return (rv);
+}
+
+static int
+sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp)
+{
+ int rv, circ, len;
+ int i, flags, ret;
+ dev_info_t *dip;
+ char *nbuf;
+ char *path;
+ static const char *noname = "<none>";
+
+ ASSERT(pdip);
+ ASSERT(DEVI_BUSY_OWNED(pdip));
+
+ flags = 0;
+
+ /*
+ * Creating the root of a branch ?
+ */
+ if (rdipp) {
+ *rdipp = NULL;
+ flags = DEVI_BRANCH_ROOT;
+ }
+
+ ndi_devi_alloc_sleep(pdip, (char *)noname, DEVI_SID_NODEID, &dip);
+ rv = bp->create.sid_branch_create(dip, bp->arg, flags);
+
+ nbuf = kmem_alloc(OBP_MAXDRVNAME, KM_SLEEP);
+
+ if (rv == DDI_WALK_ERROR) {
+ cmn_err(CE_WARN, "e_ddi_branch_create: Error setting"
+ " properties on devinfo node %p", (void *)dip);
+ goto fail;
+ }
+
+ len = OBP_MAXDRVNAME;
+ if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "name", nbuf, &len)
+ != DDI_PROP_SUCCESS) {
+ cmn_err(CE_WARN, "e_ddi_branch_create: devinfo node %p has"
+ "no name property", (void *)dip);
+ goto fail;
+ }
+
+ ASSERT(i_ddi_node_state(dip) == DS_PROTO);
+ if (ndi_devi_set_nodename(dip, nbuf, 0) != NDI_SUCCESS) {
+ cmn_err(CE_WARN, "e_ddi_branch_create: cannot set name (%s)"
+ " for devinfo node %p", nbuf, (void *)dip);
+ goto fail;
+ }
+
+ kmem_free(nbuf, OBP_MAXDRVNAME);
+
+ /*
+ * Ignore bind failures just like boot does
+ */
+ (void) ndi_devi_bind_driver(dip, 0);
+
+ switch (rv) {
+ case DDI_WALK_CONTINUE:
+ case DDI_WALK_PRUNESIB:
+ ndi_devi_enter(dip, &circ);
+
+ i = DDI_WALK_CONTINUE;
+ for (; i == DDI_WALK_CONTINUE; ) {
+ i = sid_node_create(dip, bp, NULL);
+ }
+
+ ASSERT(i == DDI_WALK_ERROR || i == DDI_WALK_PRUNESIB);
+ if (i == DDI_WALK_ERROR)
+ rv = i;
+ /*
+ * If PRUNESIB stop creating siblings
+ * of dip's child. Subsequent walk behavior
+ * is determined by rv returned by dip.
+ */
+
+ ndi_devi_exit(dip, circ);
+ break;
+ case DDI_WALK_TERMINATE:
+ /*
+ * Don't create children and ask our parent
+ * to not create siblings either.
+ */
+ rv = DDI_WALK_PRUNESIB;
+ break;
+ case DDI_WALK_PRUNECHILD:
+ /*
+ * Don't create children, but ask parent to continue
+ * with siblings.
+ */
+ rv = DDI_WALK_CONTINUE;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ if (rdipp)
+ *rdipp = dip;
+
+ /*
+ * Set device offline - only the "configure" op should cause an attach.
+ * Note that it is safe to set the dip offline without checking
+ * for either device contract or layered driver (LDI) based constraints
+ * since there cannot be any contracts or LDI opens of this device.
+ * This is because this node is a newly created dip with the parent busy
+ * held, so no other thread can come in and attach this dip. A dip that
+ * has never been attached cannot have contracts since by definition
+ * a device contract (an agreement between a process and a device minor
+ * node) can only be created against a device that has minor nodes
+ * i.e is attached. Similarly an LDI open will only succeed if the
+ * dip is attached. We assert below that the dip is not attached.
+ */
+ ASSERT(i_ddi_node_state(dip) < DS_ATTACHED);
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ ret = set_infant_dip_offline(dip, path);
+ ASSERT(ret == DDI_SUCCESS);
+ kmem_free(path, MAXPATHLEN);
+
+ return (rv);
+fail:
+ (void) ndi_devi_free(dip);
+ kmem_free(nbuf, OBP_MAXDRVNAME);
+ return (DDI_WALK_ERROR);
+}
+
+static int
+create_sid_branch(
+ dev_info_t *pdip,
+ devi_branch_t *bp,
+ dev_info_t **dipp,
+ uint_t flags)
+{
+ int rv = 0, state = DDI_WALK_CONTINUE;
+ dev_info_t *rdip;
+
+ while (state == DDI_WALK_CONTINUE) {
+ int circ;
+
+ ndi_devi_enter(pdip, &circ);
+
+ state = sid_node_create(pdip, bp, &rdip);
+ if (rdip == NULL) {
+ ndi_devi_exit(pdip, circ);
+ ASSERT(state == DDI_WALK_ERROR);
+ break;
+ }
+
+ e_ddi_branch_hold(rdip);
+
+ ndi_devi_exit(pdip, circ);
+
+ if (flags & DEVI_BRANCH_CONFIGURE) {
+ int error = e_ddi_branch_configure(rdip, dipp, 0);
+ if (error && rv == 0)
+ rv = error;
+ }
+
+ /*
+ * devi_branch_callback() is optional
+ */
+ if (bp->devi_branch_callback)
+ bp->devi_branch_callback(rdip, bp->arg, 0);
+ }
+
+ ASSERT(state == DDI_WALK_ERROR || state == DDI_WALK_PRUNESIB);
+
+ return (state == DDI_WALK_ERROR ? EIO : rv);
+}
+
+int
+e_ddi_branch_create(
+ dev_info_t *pdip,
+ devi_branch_t *bp,
+ dev_info_t **dipp,
+ uint_t flags)
+{
+ int prom_devi, sid_devi, error;
+
+ if (pdip == NULL || bp == NULL || bp->type == 0)
+ return (EINVAL);
+
+ prom_devi = (bp->type == DEVI_BRANCH_PROM) ? 1 : 0;
+ sid_devi = (bp->type == DEVI_BRANCH_SID) ? 1 : 0;
+
+ if (prom_devi && bp->create.prom_branch_select == NULL)
+ return (EINVAL);
+ else if (sid_devi && bp->create.sid_branch_create == NULL)
+ return (EINVAL);
+ else if (!prom_devi && !sid_devi)
+ return (EINVAL);
+
+ if (flags & DEVI_BRANCH_EVENT)
+ return (EINVAL);
+
+ if (prom_devi) {
+ struct pta pta = {0};
+
+ pta.pdip = pdip;
+ pta.bp = bp;
+ pta.flags = flags;
+
+ error = prom_tree_access(create_prom_branch, &pta, NULL);
+
+ if (dipp)
+ *dipp = pta.fdip;
+ else if (pta.fdip)
+ ndi_rele_devi(pta.fdip);
+ } else {
+ error = create_sid_branch(pdip, bp, dipp, flags);
+ }
+
+ return (error);
+}
+
+int
+e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp, uint_t flags)
+{
+ int rv;
+ char *devnm;
+ dev_info_t *pdip;
+
+ if (dipp)
+ *dipp = NULL;
+
+ if (rdip == NULL || flags != 0 || (flags & DEVI_BRANCH_EVENT))
+ return (EINVAL);
+
+ pdip = ddi_get_parent(rdip);
+
+ ndi_hold_devi(pdip);
+
+ if (!e_ddi_branch_held(rdip)) {
+ ndi_rele_devi(pdip);
+ cmn_err(CE_WARN, "e_ddi_branch_configure: "
+ "dip(%p) not held", (void *)rdip);
+ return (EINVAL);
+ }
+
+ if (i_ddi_node_state(rdip) < DS_INITIALIZED) {
+ /*
+ * First attempt to bind a driver. If we fail, return
+ * success (On some platforms, dips for some device
+ * types (CPUs) may not have a driver)
+ */
+ if (ndi_devi_bind_driver(rdip, 0) != NDI_SUCCESS) {
+ ndi_rele_devi(pdip);
+ return (0);
+ }
+
+ if (ddi_initchild(pdip, rdip) != DDI_SUCCESS) {
+ rv = NDI_FAILURE;
+ goto out;
+ }
+ }
+
+ ASSERT(i_ddi_node_state(rdip) >= DS_INITIALIZED);
+
+ devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
+
+ (void) ddi_deviname(rdip, devnm);
+
+ if ((rv = ndi_devi_config_one(pdip, devnm+1, &rdip,
+ NDI_DEVI_ONLINE | NDI_CONFIG)) == NDI_SUCCESS) {
+ /* release hold from ndi_devi_config_one() */
+ ndi_rele_devi(rdip);
+ }
+
+ kmem_free(devnm, MAXNAMELEN + 1);
+out:
+ if (rv != NDI_SUCCESS && dipp && rdip) {
+ ndi_hold_devi(rdip);
+ *dipp = rdip;
+ }
+ ndi_rele_devi(pdip);
+ return (ndi2errno(rv));
+}
+
+void
+e_ddi_branch_hold(dev_info_t *rdip)
+{
+ if (e_ddi_branch_held(rdip)) {
+ cmn_err(CE_WARN, "e_ddi_branch_hold: branch already held");
+ return;
+ }
+
+ mutex_enter(&DEVI(rdip)->devi_lock);
+ if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) == 0) {
+ DEVI(rdip)->devi_flags |= DEVI_BRANCH_HELD;
+ DEVI(rdip)->devi_ref++;
+ }
+ ASSERT(DEVI(rdip)->devi_ref > 0);
+ mutex_exit(&DEVI(rdip)->devi_lock);
+}
+
+int
+e_ddi_branch_held(dev_info_t *rdip)
+{
+ int rv = 0;
+
+ mutex_enter(&DEVI(rdip)->devi_lock);
+ if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) &&
+ DEVI(rdip)->devi_ref > 0) {
+ rv = 1;
+ }
+ mutex_exit(&DEVI(rdip)->devi_lock);
+
+ return (rv);
+}
+
+void
+e_ddi_branch_rele(dev_info_t *rdip)
+{
+ mutex_enter(&DEVI(rdip)->devi_lock);
+ DEVI(rdip)->devi_flags &= ~DEVI_BRANCH_HELD;
+ DEVI(rdip)->devi_ref--;
+ mutex_exit(&DEVI(rdip)->devi_lock);
+}
+
+int
+e_ddi_branch_unconfigure(
+ dev_info_t *rdip,
+ dev_info_t **dipp,
+ uint_t flags)
+{
+ int circ, rv;
+ int destroy;
+ char *devnm;
+ uint_t nflags;
+ dev_info_t *pdip;
+
+ if (dipp)
+ *dipp = NULL;
+
+ if (rdip == NULL)
+ return (EINVAL);
+
+ pdip = ddi_get_parent(rdip);
+
+ ASSERT(pdip);
+
+ /*
+ * Check if caller holds pdip busy - can cause deadlocks during
+ * devfs_clean()
+ */
+ if (DEVI_BUSY_OWNED(pdip)) {
+ cmn_err(CE_WARN, "e_ddi_branch_unconfigure: failed: parent"
+ " devinfo node(%p) is busy held", (void *)pdip);
+ return (EINVAL);
+ }
+
+ destroy = (flags & DEVI_BRANCH_DESTROY) ? 1 : 0;
+
+ devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
+
+ ndi_devi_enter(pdip, &circ);
+ (void) ddi_deviname(rdip, devnm);
+ ndi_devi_exit(pdip, circ);
+
+ /*
+ * ddi_deviname() returns a component name with / prepended.
+ */
+ (void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
+
+ ndi_devi_enter(pdip, &circ);
+
+ /*
+ * Recreate device name as it may have changed state (init/uninit)
+ * when parent busy lock was dropped for devfs_clean()
+ */
+ (void) ddi_deviname(rdip, devnm);
+
+ if (!e_ddi_branch_held(rdip)) {
+ kmem_free(devnm, MAXNAMELEN + 1);
+ ndi_devi_exit(pdip, circ);
+ cmn_err(CE_WARN, "e_ddi_%s_branch: dip(%p) not held",
+ destroy ? "destroy" : "unconfigure", (void *)rdip);
+ return (EINVAL);
+ }
+
+ /*
+ * Release hold on the branch. This is ok since we are holding the
+ * parent busy. If rdip is not removed, we must do a hold on the
+ * branch before returning.
+ */
+ e_ddi_branch_rele(rdip);
+
+ nflags = NDI_DEVI_OFFLINE;
+ if (destroy || (flags & DEVI_BRANCH_DESTROY)) {
+ nflags |= NDI_DEVI_REMOVE;
+ destroy = 1;
+ } else {
+ nflags |= NDI_UNCONFIG; /* uninit but don't remove */
+ }
+
+ if (flags & DEVI_BRANCH_EVENT)
+ nflags |= NDI_POST_EVENT;
+
+ if (i_ddi_devi_attached(pdip) &&
+ (i_ddi_node_state(rdip) >= DS_INITIALIZED)) {
+ rv = ndi_devi_unconfig_one(pdip, devnm+1, dipp, nflags);
+ } else {
+ rv = e_ddi_devi_unconfig(rdip, dipp, nflags);
+ if (rv == NDI_SUCCESS) {
+ ASSERT(!destroy || ddi_get_child(rdip) == NULL);
+ rv = ndi_devi_offline(rdip, nflags);
+ }
+ }
+
+ if (!destroy || rv != NDI_SUCCESS) {
+ /* The dip still exists, so do a hold */
+ e_ddi_branch_hold(rdip);
+ }
+out:
+ kmem_free(devnm, MAXNAMELEN + 1);
+ ndi_devi_exit(pdip, circ);
+ return (ndi2errno(rv));
+}
+
+int
+e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp, uint_t flag)
+{
+ return (e_ddi_branch_unconfigure(rdip, dipp,
+ flag|DEVI_BRANCH_DESTROY));
+}
+
+/*
+ * Number of chains for hash table
+ */
+#define NUMCHAINS 17
+
+/*
+ * Devinfo busy arg
+ */
+struct devi_busy {
+ int dv_total;
+ int s_total;
+ mod_hash_t *dv_hash;
+ mod_hash_t *s_hash;
+ int (*callback)(dev_info_t *, void *, uint_t);
+ void *arg;
+};
+
+static int
+visit_dip(dev_info_t *dip, void *arg)
+{
+ uintptr_t sbusy, dvbusy, ref;
+ struct devi_busy *bsp = arg;
+
+ ASSERT(bsp->callback);
+
+ /*
+ * A dip cannot be busy if its reference count is 0
+ */
+ if ((ref = e_ddi_devi_holdcnt(dip)) == 0) {
+ return (bsp->callback(dip, bsp->arg, 0));
+ }
+
+ if (mod_hash_find(bsp->dv_hash, dip, (mod_hash_val_t *)&dvbusy))
+ dvbusy = 0;
+
+ /*
+ * To catch device opens currently maintained on specfs common snodes.
+ */
+ if (mod_hash_find(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
+ sbusy = 0;
+
+#ifdef DEBUG
+ if (ref < sbusy || ref < dvbusy) {
+ cmn_err(CE_WARN, "dip(%p): sopen = %lu, dvopen = %lu "
+ "dip ref = %lu\n", (void *)dip, sbusy, dvbusy, ref);
+ }
+#endif
+
+ dvbusy = (sbusy > dvbusy) ? sbusy : dvbusy;
+
+ return (bsp->callback(dip, bsp->arg, dvbusy));
+}
+
+static int
+visit_snode(struct snode *sp, void *arg)
+{
+ uintptr_t sbusy;
+ dev_info_t *dip;
+ int count;
+ struct devi_busy *bsp = arg;
+
+ ASSERT(sp);
+
+ /*
+ * The stable lock is held. This prevents
+ * the snode and its associated dip from
+ * going away.
+ */
+ dip = NULL;
+ count = spec_devi_open_count(sp, &dip);
+
+ if (count <= 0)
+ return (DDI_WALK_CONTINUE);
+
+ ASSERT(dip);
+
+ if (mod_hash_remove(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
+ sbusy = count;
+ else
+ sbusy += count;
+
+ if (mod_hash_insert(bsp->s_hash, dip, (mod_hash_val_t)sbusy)) {
+ cmn_err(CE_WARN, "%s: s_hash insert failed: dip=0x%p, "
+ "sbusy = %lu", "e_ddi_branch_referenced",
+ (void *)dip, sbusy);
+ }
+
+ bsp->s_total += count;
+
+ return (DDI_WALK_CONTINUE);
+}
+
+static void
+visit_dvnode(struct dv_node *dv, void *arg)
+{
+ uintptr_t dvbusy;
+ uint_t count;
+ struct vnode *vp;
+ struct devi_busy *bsp = arg;
+
+ ASSERT(dv && dv->dv_devi);
+
+ vp = DVTOV(dv);
+
+ mutex_enter(&vp->v_lock);
+ count = vp->v_count;
+ mutex_exit(&vp->v_lock);
+
+ if (!count)
+ return;
+
+ if (mod_hash_remove(bsp->dv_hash, dv->dv_devi,
+ (mod_hash_val_t *)&dvbusy))
+ dvbusy = count;
+ else
+ dvbusy += count;
+
+ if (mod_hash_insert(bsp->dv_hash, dv->dv_devi,
+ (mod_hash_val_t)dvbusy)) {
+ cmn_err(CE_WARN, "%s: dv_hash insert failed: dip=0x%p, "
+ "dvbusy=%lu", "e_ddi_branch_referenced",
+ (void *)dv->dv_devi, dvbusy);
+ }
+
+ bsp->dv_total += count;
+}
+
+/*
+ * Returns reference count on success or -1 on failure.
+ */
+int
+e_ddi_branch_referenced(
+ dev_info_t *rdip,
+ int (*callback)(dev_info_t *dip, void *arg, uint_t ref),
+ void *arg)
+{
+ int circ;
+ char *path;
+ dev_info_t *pdip;
+ struct devi_busy bsa = {0};
+
+ ASSERT(rdip);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ ndi_hold_devi(rdip);
+
+ pdip = ddi_get_parent(rdip);
+
+ ASSERT(pdip);
+
+ /*
+ * Check if caller holds pdip busy - can cause deadlocks during
+ * devfs_walk()
+ */
+ if (!e_ddi_branch_held(rdip) || DEVI_BUSY_OWNED(pdip)) {
+ cmn_err(CE_WARN, "e_ddi_branch_referenced: failed: "
+ "devinfo branch(%p) not held or parent busy held",
+ (void *)rdip);
+ ndi_rele_devi(rdip);
+ kmem_free(path, MAXPATHLEN);
+ return (-1);
+ }
+
+ ndi_devi_enter(pdip, &circ);
+ (void) ddi_pathname(rdip, path);
+ ndi_devi_exit(pdip, circ);
+
+ bsa.dv_hash = mod_hash_create_ptrhash("dv_node busy hash", NUMCHAINS,
+ mod_hash_null_valdtor, sizeof (struct dev_info));
+
+ bsa.s_hash = mod_hash_create_ptrhash("snode busy hash", NUMCHAINS,
+ mod_hash_null_valdtor, sizeof (struct snode));
+
+ if (devfs_walk(path, visit_dvnode, &bsa)) {
+ cmn_err(CE_WARN, "e_ddi_branch_referenced: "
+ "devfs walk failed for: %s", path);
+ kmem_free(path, MAXPATHLEN);
+ bsa.s_total = bsa.dv_total = -1;
+ goto out;
+ }
+
+ kmem_free(path, MAXPATHLEN);
+
+ /*
+ * Walk the snode table to detect device opens, which are currently
+ * maintained on specfs common snodes.
+ */
+ spec_snode_walk(visit_snode, &bsa);
+
+ if (callback == NULL)
+ goto out;
+
+ bsa.callback = callback;
+ bsa.arg = arg;
+
+ if (visit_dip(rdip, &bsa) == DDI_WALK_CONTINUE) {
+ ndi_devi_enter(rdip, &circ);
+ ddi_walk_devs(ddi_get_child(rdip), visit_dip, &bsa);
+ ndi_devi_exit(rdip, circ);
+ }
+
+out:
+ ndi_rele_devi(rdip);
+ mod_hash_destroy_ptrhash(bsa.s_hash);
+ mod_hash_destroy_ptrhash(bsa.dv_hash);
+ return (bsa.s_total > bsa.dv_total ? bsa.s_total : bsa.dv_total);
+}
diff --git a/usr/src/uts/common/sys/esunddi.h b/usr/src/uts/common/sys/esunddi.h
index 3cde0b2b14..e2f860d520 100644
--- a/usr/src/uts/common/sys/esunddi.h
+++ b/usr/src/uts/common/sys/esunddi.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -26,9 +25,6 @@
#ifndef _SYS_ESUNDDI_H
#define _SYS_ESUNDDI_H
-
-#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 */
-
#include <sys/sunddi.h>
#include <sys/proc.h>
#include <sys/autoconf.h>
@@ -214,6 +210,41 @@ dev_info_t *
e_ddi_nodeid_to_dip(pnode_t nodeid);
/*
+ * Defines for DR interfaces
+ */
+#define DEVI_BRANCH_CHILD 0x01 /* Walk immediate children of root */
+#define DEVI_BRANCH_CONFIGURE 0x02 /* Configure branch after create */
+#define DEVI_BRANCH_DESTROY 0x04 /* Destroy branch after unconfigure */
+#define DEVI_BRANCH_EVENT 0x08 /* Post NDI event */
+#define DEVI_BRANCH_PROM 0x10 /* Branches derived from PROM nodes */
+#define DEVI_BRANCH_SID 0x20 /* SID node branches */
+#define DEVI_BRANCH_ROOT 0x40 /* Node is the root of a branch */
+
+typedef struct devi_branch {
+ void *arg;
+ void (*devi_branch_callback)(dev_info_t *, void *, uint_t);
+ int type;
+ union {
+ int (*prom_branch_select)(pnode_t, void *, uint_t);
+ int (*sid_branch_create)(dev_info_t *, void *, uint_t);
+ } create;
+} devi_branch_t;
+
+extern int e_ddi_branch_create(dev_info_t *pdip, devi_branch_t *bp,
+ dev_info_t **dipp, uint_t flags);
+extern int e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp,
+ uint_t flags);
+extern int e_ddi_branch_unconfigure(dev_info_t *rdip, dev_info_t **dipp,
+ uint_t flags);
+extern int e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp,
+ uint_t flags);
+extern void e_ddi_branch_hold(dev_info_t *rdip);
+extern void e_ddi_branch_rele(dev_info_t *rdip);
+extern int e_ddi_branch_held(dev_info_t *rdip);
+extern int e_ddi_branch_referenced(dev_info_t *rdip,
+ int (*cb)(dev_info_t *dip, void *, uint_t), void *arg);
+
+/*
* Obsolete interfaces, no longer used, to be removed.
* Retained only for driver compatibility.
*/
diff --git a/usr/src/uts/common/sys/pci_impl.h b/usr/src/uts/common/sys/pci_impl.h
index d485800b96..429d6860bd 100644
--- a/usr/src/uts/common/sys/pci_impl.h
+++ b/usr/src/uts/common/sys/pci_impl.h
@@ -116,6 +116,9 @@ extern struct pci_bus_resource *pci_bus_res;
/*
* For now, x86-only to avoid conflicts with <sys/memlist_impl.h>
*/
+#define memlist_find memlist_find_pci
+#define memlist_insert memlist_insert_pci
+
extern struct memlist *memlist_alloc(void);
extern void memlist_free(struct memlist *);
extern void memlist_free_all(struct memlist **);
diff --git a/usr/src/uts/common/sys/processor.h b/usr/src/uts/common/sys/processor.h
index 3a76c8c9b4..c0fe6e21b8 100644
--- a/usr/src/uts/common/sys/processor.h
+++ b/usr/src/uts/common/sys/processor.h
@@ -32,8 +32,6 @@
#ifndef _SYS_PROCESSOR_H
#define _SYS_PROCESSOR_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/procset.h>
@@ -140,6 +138,7 @@ extern lgrpid_t gethomelgroup();
* Internal interface prototypes
*/
extern int p_online_internal(processorid_t, int, int *);
+extern int p_online_internal_locked(processorid_t, int, int *);
#endif /* !_KERNEL */
diff --git a/usr/src/uts/common/syscall/p_online.c b/usr/src/uts/common/syscall/p_online.c
index 004627569c..88a0340bf7 100644
--- a/usr/src/uts/common/syscall/p_online.c
+++ b/usr/src/uts/common/syscall/p_online.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -24,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/param.h>
#include <sys/var.h>
@@ -49,7 +46,7 @@
* P_FAULTED
*/
int
-p_online_internal(processorid_t cpun, int new_status, int *old_status)
+p_online_internal_locked(processorid_t cpun, int new_status, int *old_status)
{
cpu_t *cp;
int status;
@@ -59,7 +56,7 @@ p_online_internal(processorid_t cpun, int new_status, int *old_status)
/*
* Try to get a pointer to the requested CPU structure.
*/
- mutex_enter(&cpu_lock); /* protects CPU states */
+ ASSERT(MUTEX_HELD(&cpu_lock));
if ((cp = cpu_get(cpun)) == NULL) {
error = EINVAL;
goto out;
@@ -219,10 +216,21 @@ p_online_internal(processorid_t cpun, int new_status, int *old_status)
break;
}
out:
- mutex_exit(&cpu_lock);
return (error);
}
+int
+p_online_internal(processorid_t cpun, int new_status, int *old_status)
+{
+ int rc;
+
+ mutex_enter(&cpu_lock); /* protects CPU states */
+ rc = p_online_internal_locked(cpun, new_status, old_status);
+ mutex_exit(&cpu_lock);
+
+ return (rc);
+}
+
/*
* p_online(2) - get/change processor operational status.
*
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h
index 8739e8a366..298b8d4415 100644
--- a/usr/src/uts/common/vm/page.h
+++ b/usr/src/uts/common/vm/page.h
@@ -1085,8 +1085,8 @@ struct memseg {
page_t *pages, *epages; /* [from, to] in page array */
pfn_t pages_base, pages_end; /* [from, to] in page numbers */
struct memseg *next; /* next segment in list */
-#if defined(__sparc)
struct memseg *lnext; /* next segment in deleted list */
+#if defined(__sparc)
uint64_t pagespa, epagespa; /* [from, to] page array physical */
uint64_t nextpa; /* physical next pointer */
pfn_t kpm_pbase; /* start of kpm range */
@@ -1096,8 +1096,8 @@ struct memseg {
kpm_spage_t *kpm_spgs; /* ptr to kpm_spage array */
} mseg_un;
uint64_t kpm_pagespa; /* physical ptr to kpm (s)pages array */
- uint_t msegflags; /* memseg flags */
#endif /* __sparc */
+ uint_t msegflags; /* memseg flags */
};
/* memseg union aliases */
diff --git a/usr/src/uts/common/vm/seg_kpm.h b/usr/src/uts/common/vm/seg_kpm.h
index 0b766bbaf4..b61980df01 100644
--- a/usr/src/uts/common/vm/seg_kpm.h
+++ b/usr/src/uts/common/vm/seg_kpm.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -27,8 +26,6 @@
#ifndef _VM_SEG_KPM_H
#define _VM_SEG_KPM_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -85,8 +82,13 @@ extern int segmap_kpm;
#define IS_KPM_ADDR(addr) \
((addr) >= segkpm->s_base && (addr) < (segkpm->s_base + segkpm->s_size))
+#ifdef __x86
+/* x86 systems use neither kpm_page_t nor kpm_spage_t when supporting kpm. */
+#define KPMPAGE_T_SZ (0)
+#else /* __x86 */
#define KPMPAGE_T_SZ \
((kpm_smallpages == 0) ? sizeof (kpm_page_t) : sizeof (kpm_spage_t))
+#endif /* __x86 */
#else /* SEGKPM_SUPPORT */
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 8a1cf4b2b7..eeaaa379ab 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -80,7 +80,10 @@ CORE_OBJS += \
mach_kdi.o \
mach_sysconfig.o \
machdep.o \
+ mem_config.o \
mem_config_stubs.o \
+ mem_config_arch.o \
+ memlist_new.o \
memnode.o \
microcode.o \
microfind.o \
@@ -187,7 +190,7 @@ PCPLUSMP_OBJS += apic.o apic_regops.o psm_common.o apic_introp.o \
mp_platform_common.o hpet_acpi.o
ACPI_DRV_OBJS += acpi_drv.o acpi_video.o
-ACPINEX_OBJS += acpinex_drv.o
+ACPINEX_OBJS += acpinex_drv.o acpinex_event.o
CPUDRV_OBJS += \
cpudrv.o \
@@ -200,10 +203,16 @@ ACPIDEV_OBJS += acpidev_drv.o \
acpidev_scope.o acpidev_device.o \
acpidev_container.o \
acpidev_cpu.o \
+ acpidev_dr.o \
acpidev_memory.o \
+ acpidev_pci.o \
acpidev_resource.o \
acpidev_util.o
+DRMACH_ACPI_OBJS += drmach_acpi.o dr_util.o drmach_err.o
+
+DR_OBJS += dr.o dr_cpu.o dr_err.o dr_io.o dr_mem_acpi.o dr_quiesce.o dr_util.o
+
ROOTNEX_OBJS += rootnex.o immu.o immu_dmar.o immu_dvma.o \
immu_intrmap.o immu_qinv.o immu_regs.o
diff --git a/usr/src/uts/i86pc/Makefile.i86pc.shared b/usr/src/uts/i86pc/Makefile.i86pc.shared
index 89b47413f3..a8fbb83fdf 100644
--- a/usr/src/uts/i86pc/Makefile.i86pc.shared
+++ b/usr/src/uts/i86pc/Makefile.i86pc.shared
@@ -61,6 +61,9 @@ LINTS_DIR = $(OBJS_DIR)
LINT_LIB_DIR = $(UTSBASE)/$(PLATFORM)/lint-libs/$(OBJS_DIR)
GEN_LINT_LIB_DIR = $(UTSBASE)/intel/lint-libs/$(OBJS_DIR)
+LINT32_DIRS = $(LINT32_BUILDS:%=$(UTSBASE)/$(PLATFORM)/lint-libs/%)
+LINT32_FILES = $(LINT32_DIRS:%=%/llib-l$(MODULE).ln)
+
DTRACESTUBS_O = $(OBJS_DIR)/dtracestubs.o
DTRACESTUBS = $(OBJS_DIR)/libdtracestubs.so
@@ -256,6 +259,7 @@ DRV_KMODS += tzmon
DRV_KMODS += acpi_drv
DRV_KMODS += acpinex
DRV_KMODS += amd_iommu
+DRV_KMODS += dr
DRV_KMODS += ioat
DRV_KMODS += fipe
@@ -311,6 +315,7 @@ SYS_KMODS +=
#
MISC_KMODS += gfx_private pcie
MISC_KMODS += acpidev
+MISC_KMODS += drmach_acpi
#
# 'Dacf' modules (/kernel/dacf)
diff --git a/usr/src/uts/i86pc/Makefile.rules b/usr/src/uts/i86pc/Makefile.rules
index ab82a3537f..592250db83 100644
--- a/usr/src/uts/i86pc/Makefile.rules
+++ b/usr/src/uts/i86pc/Makefile.rules
@@ -79,10 +79,42 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/acpi/acpinex/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+SBD_IOCTL = $(UTSBASE)/i86pc/sys/sbd_ioctl.h
+DRMACH_IO = $(UTSBASE)/i86pc/io/acpi/drmach_acpi
+DRMACH_GENERR = $(DRMACH_IO)/sbdgenerr
+DR_IO = $(UTSBASE)/i86pc/io/dr
+DR_GENERR = $(DR_IO)/sbdgenerr
+
+$(DRMACH_GENERR): $(DR_IO)/sbdgenerr.pl
+ $(RM) $@
+ $(CAT) $(DR_IO)/sbdgenerr.pl > $@
+ $(CHMOD) +x $@
+
+$(DRMACH_IO)/drmach_err.c: $(DRMACH_GENERR) $(SBD_IOCTL)
+ $(RM) $@
+ $(DRMACH_GENERR) EX86 < $(SBD_IOCTL) > $(DRMACH_IO)/drmach_err.c
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/acpi/drmach_acpi/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/amd_iommu/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(DR_GENERR): $(DR_IO)/sbdgenerr.pl
+ $(RM) $@
+ $(CAT) $(DR_IO)/sbdgenerr.pl > $@
+ $(CHMOD) +x $@
+
+$(DR_IO)/dr_err.c: $(DR_GENERR) $(SBD_IOCTL)
+ $(RM) $@
+ $(DR_GENERR) ESBD < $(SBD_IOCTL) > $(DR_IO)/dr_err.c
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/dr/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/i86pc/io/ioat/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -307,9 +339,15 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/acpi/acpidev/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/acpi/acpinex/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/acpi/drmach_acpi/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/amd_iommu/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/dr/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/i86pc/io/ioat/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h
index f5d852b01a..c520e06be7 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h
@@ -189,6 +189,7 @@ struct gcpu_chipshared {
kmutex_t gcpus_cfglock; /* serial MCA config from chip cores */
kmutex_t gcpus_poll_lock; /* serialize pollers on the same chip */
uint32_t gcpus_actv_banks; /* MCA bank numbers active on chip */
+ volatile uint32_t gcpus_actv_cnt; /* active cpu count in this chip */
};
struct gcpu_data {
@@ -205,11 +206,13 @@ struct regs;
* CMI implementation
*/
extern int gcpu_init(cmi_hdl_t, void **);
+extern void gcpu_fini(cmi_hdl_t);
extern void gcpu_post_startup(cmi_hdl_t);
extern void gcpu_post_mpstartup(cmi_hdl_t);
extern void gcpu_faulted_enter(cmi_hdl_t);
extern void gcpu_faulted_exit(cmi_hdl_t);
extern void gcpu_mca_init(cmi_hdl_t);
+extern void gcpu_mca_fini(cmi_hdl_t hdl);
extern cmi_errno_t gcpu_msrinject(cmi_hdl_t, cmi_mca_regs_t *, uint_t, int);
#ifndef __xpv
extern uint64_t gcpu_mca_trap(cmi_hdl_t, struct regs *);
@@ -228,6 +231,7 @@ extern int cmi_enable_cmci;
* Local functions
*/
extern void gcpu_mca_poll_init(cmi_hdl_t);
+extern void gcpu_mca_poll_fini(cmi_hdl_t);
extern void gcpu_mca_poll_start(cmi_hdl_t);
extern void gcpu_poll_trace_init(gcpu_poll_trace_ctl_t *);
extern void gcpu_poll_trace(gcpu_poll_trace_ctl_t *, uint8_t, uint8_t);
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c
index db4a695b7d..761d942f27 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c
@@ -23,6 +23,10 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* Generic x86 CPU Module
@@ -78,22 +82,52 @@ gcpu_init(cmi_hdl_t hdl, void **datap)
*/
if ((sp = gcpu_shared[chipid]) == NULL) {
sp = kmem_zalloc(sizeof (struct gcpu_chipshared), KM_SLEEP);
+ mutex_init(&sp->gcpus_poll_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&sp->gcpus_cfglock, NULL, MUTEX_DRIVER, NULL);
osp = atomic_cas_ptr(&gcpu_shared[chipid], NULL, sp);
- if (osp == NULL) {
- mutex_init(&sp->gcpus_poll_lock, NULL, MUTEX_DRIVER,
- NULL);
- mutex_init(&sp->gcpus_cfglock, NULL, MUTEX_DRIVER,
- NULL);
- } else {
+ if (osp != NULL) {
+ mutex_destroy(&sp->gcpus_cfglock);
+ mutex_destroy(&sp->gcpus_poll_lock);
kmem_free(sp, sizeof (struct gcpu_chipshared));
sp = osp;
}
}
+
+ atomic_inc_32(&sp->gcpus_actv_cnt);
gcpu->gcpu_shared = sp;
return (0);
}
+/*
+ * deconfigure gcpu_init()
+ */
+void
+gcpu_fini(cmi_hdl_t hdl)
+{
+ uint_t chipid = cmi_hdl_chipid(hdl);
+ gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
+ struct gcpu_chipshared *sp;
+
+ if (gcpu_disable || chipid >= GCPU_MAX_CHIPID)
+ return;
+
+ gcpu_mca_fini(hdl);
+
+ /*
+ * Keep shared data in cache for reuse.
+ */
+ sp = gcpu_shared[chipid];
+ ASSERT(sp != NULL);
+ atomic_dec_32(&sp->gcpus_actv_cnt);
+
+ if (gcpu != NULL)
+ kmem_free(gcpu, sizeof (gcpu_data_t));
+
+ /* Release reference count held in gcpu_init(). */
+ cmi_hdl_rele(hdl);
+}
+
void
gcpu_post_startup(cmi_hdl_t hdl)
{
@@ -150,7 +184,7 @@ const cmi_ops_t _cmi_ops = {
GCPU_OP(gcpu_cmci_trap, NULL), /* cmi_cmci_trap */
gcpu_msrinject, /* cmi_msrinject */
GCPU_OP(gcpu_hdl_poke, NULL), /* cmi_hdl_poke */
- NULL, /* cmi_fini */
+ gcpu_fini, /* cmi_fini */
GCPU_OP(NULL, gcpu_xpv_panic_callback), /* cmi_panic_callback */
};
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
index 911a7c6af7..730411ebf1 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
@@ -23,6 +23,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/mca_x86.h>
#include <sys/cpu_module_impl.h>
@@ -1064,7 +1068,7 @@ gcpu_mca_init(cmi_hdl_t hdl)
/*
* CPU startup code only calls cmi_mca_init if x86_feature indicates
* both MCA and MCE support (i.e., X86_MCA). P5, K6, and earlier
- * processors, which have their own * more primitive way of doing
+ * processors, which have their own more primitive way of doing
* machine checks, will not have cmi_mca_init called since their
* CPUID information will not indicate both MCA and MCE features.
*/
@@ -2002,3 +2006,50 @@ gcpu_msrinject(cmi_hdl_t hdl, cmi_mca_regs_t *regs, uint_t nregs,
return (errs == 0 ? CMI_SUCCESS : CMIERR_UNKNOWN);
}
+
+/* deconfigure gcpu_mca_init() */
+void
+gcpu_mca_fini(cmi_hdl_t hdl)
+{
+ gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
+ gcpu_mca_t *mca = &gcpu->gcpu_mca;
+ int i;
+
+ /*
+ * CPU startup code only calls cmi_mca_init if x86_feature indicates
+ * both MCA and MCE support (i.e., X86_MCA). P5, K6, and earlier
+ * processors, which have their own more primitive way of doing
+ * machine checks, will not have cmi_mca_init called since their
+ * CPUID information will not indicate both MCA and MCE features.
+ */
+ if ((x86_feature & X86_MCA) == 0)
+ return;
+#ifndef __xpv
+ /*
+ * disable machine check in CR4
+ */
+ cmi_ntv_hwdisable_mce(hdl);
+#endif
+ mutex_enter(&gcpu->gcpu_shared->gcpus_cfglock);
+ gcpu_mca_poll_fini(hdl);
+ mutex_exit(&gcpu->gcpu_shared->gcpus_cfglock);
+
+ /*
+ * free resources allocated during init
+ */
+ if (mca->gcpu_bank_cmci != NULL) {
+ kmem_free(mca->gcpu_bank_cmci, sizeof (gcpu_mca_cmci_t) *
+ mca->gcpu_mca_nbanks);
+ }
+
+ for (i = 0; i < GCPU_MCA_LOGOUT_NUM; i++) {
+ if (mca->gcpu_mca_logout[i] != NULL) {
+ kmem_free(mca->gcpu_mca_logout[i], mca->gcpu_mca_lgsz);
+ }
+ }
+
+ if (mca->gcpu_mca_bioscfg.bios_bankcfg != NULL) {
+ kmem_free(mca->gcpu_mca_bioscfg.bios_bankcfg,
+ sizeof (struct gcpu_bios_bankcfg) * mca->gcpu_mca_nbanks);
+ }
+}
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c
index f68f96259c..361d147b56 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c
@@ -23,6 +23,10 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* Native MCA polling. We establish an ommipresent cyclic to fire on all
@@ -31,6 +35,7 @@
*/
#include <sys/types.h>
+#include <sys/atomic.h>
#include <sys/cyclic.h>
#include <sys/x86_archext.h>
#include <sys/mca_x86.h>
@@ -39,7 +44,9 @@
hrtime_t gcpu_mca_poll_interval = NANOSEC * 10ULL; /* tuneable */
static cyclic_id_t gcpu_mca_poll_cycid;
-static int gcpu_mca_poll_inits;
+static volatile uint_t gcpu_mca_poll_inits;
+extern int gcpu_poll_trace_always;
+extern uint_t gcpu_poll_trace_nent;
/*
* Return nonzero of the given handle should poll the MCH. We stick with
@@ -280,7 +287,24 @@ gcpu_mca_poll_init(cmi_hdl_t hdl)
gcpu_poll_trace_init(ptc);
- gcpu_mca_poll_inits++;
+ atomic_inc_uint(&gcpu_mca_poll_inits);
+}
+
+/* deconfigure gcpu_mca_poll_init() */
+void
+gcpu_mca_poll_fini(cmi_hdl_t hdl)
+{
+ gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
+ gcpu_poll_trace_ctl_t *ptc = &gcpu->gcpu_mca.gcpu_polltrace;
+
+ ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE);
+
+ if (gcpu_poll_trace_always && (ptc->mptc_tbufs != NULL)) {
+ kmem_free(ptc->mptc_tbufs, sizeof (gcpu_poll_trace_t) *
+ gcpu_poll_trace_nent);
+ }
+
+ atomic_dec_uint(&gcpu_mca_poll_inits);
}
void
diff --git a/usr/src/uts/i86pc/dr/Makefile b/usr/src/uts/i86pc/dr/Makefile
new file mode 100644
index 0000000000..03aa5f615e
--- /dev/null
+++ b/usr/src/uts/i86pc/dr/Makefile
@@ -0,0 +1,104 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# This makefile drives the production of the dr driver module.
+#
+# i86pc architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+PLATFORM = i86pc
+
+#
+# Define the module and object file sets.
+#
+MODULE = dr
+OBJECTS = $(DR_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(DR_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/i86pc/io/dr
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Overrides
+#
+DEF_BUILDS = $(DEF_BUILDS64)
+ALL_BUILDS = $(ALL_BUILDS64)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# module dependencies
+#
+LDFLAGS += -dy -Nmisc/drmach_acpi
+
+CLEANFILES += $(DR_GENERR)
+CLEANFILES += $(DR_IO)/dr_err.c
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS) lint32
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS) $(CONF_INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
+
diff --git a/usr/src/uts/i86pc/drmach_acpi/Makefile b/usr/src/uts/i86pc/drmach_acpi/Makefile
new file mode 100644
index 0000000000..24f9197b32
--- /dev/null
+++ b/usr/src/uts/i86pc/drmach_acpi/Makefile
@@ -0,0 +1,101 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# This makefile drives the production of the drmach_acpi loadable module.
+#
+# i86pc architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = drmach_acpi
+OBJECTS = $(DRMACH_ACPI_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(DRMACH_ACPI_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_PSM_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Overrides
+#
+DEF_BUILDS = $(DEF_BUILDS64)
+ALL_BUILDS = $(ALL_BUILDS64)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# module dependencies
+#
+LDFLAGS += -dy -Nmisc/acpica -Nmisc/acpidev
+
+CLEANFILES += $(DRMACH_GENERR)
+CLEANFILES += $(DRMACH_IO)/drmach_err.c
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS) lint32
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c
index 33de316493..5c58266634 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -54,11 +54,13 @@
#include <sys/types.h>
#include <sys/atomic.h>
+#include <sys/note.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
#include <sys/acpidev.h>
+#include <sys/acpidev_dr.h>
#include <sys/acpidev_impl.h>
static ACPI_STATUS acpidev_container_probe(acpidev_walk_info_t *infop);
@@ -124,7 +126,7 @@ static acpidev_filter_rule_t acpidev_container_filters[] = {
static ACPI_STATUS
acpidev_container_probe(acpidev_walk_info_t *infop)
{
- ACPI_STATUS rc;
+ ACPI_STATUS rc = AE_OK;
int flags;
ASSERT(infop != NULL);
@@ -137,19 +139,35 @@ acpidev_container_probe(acpidev_walk_info_t *infop)
return (AE_OK);
}
- if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u in "
+ flags = ACPIDEV_PROCESS_FLAG_SCAN;
+ switch (infop->awi_op_type) {
+ case ACPIDEV_OP_BOOT_PROBE:
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_CONTAINER)) {
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE;
+ acpidev_dr_check(infop);
+ }
+ break;
+
+ case ACPIDEV_OP_BOOT_REPROBE:
+ break;
+
+ case ACPIDEV_OP_HOTPLUG_PROBE:
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_CONTAINER)) {
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+ ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+ ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+ }
+ break;
+
+ default:
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u in "
"acpidev_container_probe().", infop->awi_op_type);
rc = AE_BAD_PARAMETER;
+ break;
+ }
+
+ if (rc == AE_OK) {
+ rc = acpidev_process_object(infop, flags);
}
if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
cmn_err(CE_WARN,
@@ -162,11 +180,12 @@ acpidev_container_probe(acpidev_walk_info_t *infop)
return (rc);
}
-/*ARGSUSED*/
static ACPI_STATUS
acpidev_container_search_dev(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
void **retval)
{
+ _NOTE(ARGUNUSED(hdl, retval));
+
int *fp = (int *)ctx;
*fp = lvl;
@@ -221,11 +240,11 @@ acpidev_container_filter_func(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
}
if (cpu_lvl == 1) {
/* CPU as child, most likely a physical CPU. */
- (void) strncpy(devname, ACPIDEV_NODE_NAME_MODULE_CPU,
+ (void) strlcpy(devname, ACPIDEV_NODE_NAME_MODULE_CPU,
devnamelen);
} else if (cpu_lvl == 2 && module_lvl == 1) {
/* CPU as grandchild, most likely a system board. */
- (void) strncpy(devname, ACPIDEV_NODE_NAME_MODULE_SBD,
+ (void) strlcpy(devname, ACPIDEV_NODE_NAME_MODULE_SBD,
devnamelen);
} else if (ACPI_FAILURE(AcpiGetName(infop->awi_hdl,
ACPI_SINGLE_NAME, &buf))) {
@@ -233,7 +252,7 @@ acpidev_container_filter_func(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
* Failed to get ACPI object name; use ACPI object name
* as the default name.
*/
- (void) strncpy(devname, ACPIDEV_NODE_NAME_CONTAINER,
+ (void) strlcpy(devname, ACPIDEV_NODE_NAME_CONTAINER,
devnamelen);
}
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c
index 0576591147..fa93871fb4 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -43,6 +43,7 @@
#include <sys/bootconf.h>
#include <sys/cpuvar.h>
#include <sys/machsystm.h>
+#include <sys/note.h>
#include <sys/psm_types.h>
#include <sys/x86_archext.h>
#include <sys/sunddi.h>
@@ -70,11 +71,14 @@ static ACPI_STATUS acpidev_cpu_probe(acpidev_walk_info_t *infop);
static acpidev_filter_result_t acpidev_cpu_filter(acpidev_walk_info_t *infop,
char *devname, int maxlen);
static ACPI_STATUS acpidev_cpu_init(acpidev_walk_info_t *infop);
+static void acpidev_cpu_fini(ACPI_HANDLE hdl, acpidev_data_handle_t dhdl,
+ acpidev_class_t *clsp);
static acpidev_filter_result_t acpidev_cpu_filter_func(
acpidev_walk_info_t *infop, ACPI_HANDLE hdl, acpidev_filter_rule_t *afrp,
char *devname, int len);
-static int acpidev_cpu_query_dip(cpu_t *, dev_info_t **);
+static int acpidev_cpu_create_dip(cpu_t *, dev_info_t **);
+static int acpidev_cpu_get_dip(cpu_t *, dev_info_t **);
/*
* Default class driver for ACPI processor/CPU objects.
@@ -91,7 +95,7 @@ acpidev_class_t acpidev_class_cpu = {
acpidev_cpu_probe, /* adc_probe */
acpidev_cpu_filter, /* adc_filter */
acpidev_cpu_init, /* adc_init */
- NULL, /* adc_fini */
+ acpidev_cpu_fini, /* adc_fini */
};
/*
@@ -133,12 +137,14 @@ static char *acpidev_cpu_uid_formats[] = {
"SCK%x-CPU%x",
};
-static ACPI_HANDLE acpidev_cpu_map_hdl = NULL;
-static uint32_t acpidev_cpu_map_count = 0;
-static struct acpidev_cpu_map_item *acpidev_cpu_map = NULL;
+static ACPI_HANDLE acpidev_cpu_map_hdl;
+static uint32_t acpidev_cpu_map_count;
+static struct acpidev_cpu_map_item *acpidev_cpu_map;
extern int (*psm_cpu_create_devinfo)(cpu_t *, dev_info_t **);
-static int (*psm_cpu_create_devinfo_old)(cpu_t *, dev_info_t **) = NULL;
+static int (*psm_cpu_create_devinfo_old)(cpu_t *, dev_info_t **);
+extern int (*psm_cpu_get_devinfo)(cpu_t *, dev_info_t **);
+static int (*psm_cpu_get_devinfo_old)(cpu_t *, dev_info_t **);
/* Count how many enabled CPUs are in the MADT table. */
static ACPI_STATUS
@@ -200,7 +206,7 @@ acpidev_cpu_parse_MADT(ACPI_SUBTABLE_HEADER *ap, void *context)
/* See comment at beginning about 255 limitation. */
if (mpx2a->LocalApicId < 255) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: encountered CPU with X2APIC Id < 255.");
+ "!acpidev: encountered CPU with X2APIC Id < 255.");
} else if (mpx2a->LapicFlags & ACPI_MADT_ENABLED) {
ASSERT(*cntp < acpidev_cpu_map_count);
acpidev_cpu_map[*cntp].proc_id = mpx2a->Uid;
@@ -270,7 +276,7 @@ acpidev_cpu_query_MAT(ACPI_SUBTABLE_HEADER *ap, void *context)
}
return (AE_CTRL_TERMINATE);
} else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: encountered CPU "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: encountered CPU "
"with X2APIC Id < 255 in _MAT.");
}
break;
@@ -292,7 +298,7 @@ acpidev_cpu_query_MAT(ACPI_SUBTABLE_HEADER *ap, void *context)
* x2APIC and x2APIC NMI Structure.
*/
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: unknown APIC entry type %u in _MAT.", ap->Type);
+ "!acpidev: unknown APIC entry type %u in _MAT.", ap->Type);
break;
}
@@ -313,7 +319,7 @@ acpidev_cpu_get_procid(acpidev_walk_info_t *infop, uint32_t *idp)
if (infop->awi_info->Type != ACPI_TYPE_PROCESSOR &&
infop->awi_info->Type != ACPI_TYPE_DEVICE) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: object %s is not PROCESSOR or DEVICE.",
+ "!acpidev: object %s is not PROCESSOR or DEVICE.",
infop->awi_name);
return (AE_BAD_PARAMETER);
}
@@ -345,7 +351,7 @@ acpidev_cpu_get_procid(acpidev_walk_info_t *infop, uint32_t *idp)
return (AE_OK);
} else {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to evaluate ACPI object %s.",
+ "!acpidev: failed to evaluate ACPI object %s.",
infop->awi_name);
}
}
@@ -365,6 +371,59 @@ acpidev_cpu_get_procid(acpidev_walk_info_t *infop, uint32_t *idp)
}
static ACPI_STATUS
+acpidev_cpu_get_proximity_id(ACPI_HANDLE hdl, uint32_t apicid, uint32_t *pxmidp)
+{
+ int len, off;
+ ACPI_SUBTABLE_HEADER *sp;
+ ACPI_SRAT_CPU_AFFINITY *xp;
+ ACPI_SRAT_X2APIC_CPU_AFFINITY *x2p;
+
+ ASSERT(hdl != NULL);
+ ASSERT(pxmidp != NULL);
+ *pxmidp = UINT32_MAX;
+
+ if (ACPI_SUCCESS(acpidev_eval_pxm(hdl, pxmidp))) {
+ return (AE_OK);
+ }
+ if (acpidev_srat_tbl_ptr == NULL) {
+ return (AE_NOT_FOUND);
+ }
+
+ /* Search the static ACPI SRAT table for proximity domain id. */
+ sp = (ACPI_SUBTABLE_HEADER *)(acpidev_srat_tbl_ptr + 1);
+ len = acpidev_srat_tbl_ptr->Header.Length;
+ off = sizeof (*acpidev_srat_tbl_ptr);
+ while (off < len) {
+ switch (sp->Type) {
+ case ACPI_SRAT_TYPE_CPU_AFFINITY:
+ xp = (ACPI_SRAT_CPU_AFFINITY *)sp;
+ if ((xp->Flags & ACPI_SRAT_CPU_ENABLED) &&
+ xp->ApicId == apicid) {
+ *pxmidp = xp->ProximityDomainLo;
+ *pxmidp |= xp->ProximityDomainHi[0] << 8;
+ *pxmidp |= xp->ProximityDomainHi[1] << 16;
+ *pxmidp |= xp->ProximityDomainHi[2] << 24;
+ return (AE_OK);
+ }
+ break;
+
+ case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
+ x2p = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)sp;
+ if ((x2p->Flags & ACPI_SRAT_CPU_ENABLED) &&
+ x2p->ApicId == apicid) {
+ *pxmidp = x2p->ProximityDomain;
+ return (AE_OK);
+ }
+ break;
+ }
+ off += sp->Length;
+ sp = (ACPI_SUBTABLE_HEADER *)(((char *)sp) + sp->Length);
+ }
+
+ return (AE_NOT_FOUND);
+}
+
+static ACPI_STATUS
acpidev_cpu_pre_probe(acpidev_walk_info_t *infop)
{
uint32_t count = 0;
@@ -373,6 +432,7 @@ acpidev_cpu_pre_probe(acpidev_walk_info_t *infop)
ASSERT(infop != NULL);
if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE &&
acpidev_cpu_map_hdl == NULL) {
+ /* Parse CPU relative information in the ACPI MADT table. */
(void) acpidev_walk_apic(NULL, NULL, NULL,
acpidev_cpu_count_MADT, &acpidev_cpu_map_count);
acpidev_cpu_map = kmem_zalloc(sizeof (acpidev_cpu_map[0])
@@ -381,6 +441,12 @@ acpidev_cpu_pre_probe(acpidev_walk_info_t *infop)
acpidev_cpu_parse_MADT, &count);
ASSERT(count == acpidev_cpu_map_count);
acpidev_cpu_map_hdl = infop->awi_hdl;
+
+ /* Cache pointer to the ACPI SRAT table. */
+ if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_SRAT, 1,
+ (ACPI_TABLE_HEADER **)&acpidev_srat_tbl_ptr))) {
+ acpidev_srat_tbl_ptr = NULL;
+ }
}
return (AE_OK);
@@ -404,7 +470,9 @@ acpidev_cpu_post_probe(acpidev_walk_info_t *infop)
/* replace psm_cpu_create_devinfo with local implementation. */
psm_cpu_create_devinfo_old = psm_cpu_create_devinfo;
- psm_cpu_create_devinfo = acpidev_cpu_query_dip;
+ psm_cpu_create_devinfo = acpidev_cpu_create_dip;
+ psm_cpu_get_devinfo_old = psm_cpu_get_devinfo;
+ psm_cpu_get_devinfo = acpidev_cpu_get_dip;
}
return (AE_OK);
@@ -427,27 +495,40 @@ acpidev_cpu_probe(acpidev_walk_info_t *infop)
return (AE_OK);
}
- /*
- * Mark device as offline. It will be changed to online state
- * when the corresponding CPU starts up.
- */
- if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN |
- ACPIDEV_PROCESS_FLAG_CREATE |
- ACPIDEV_PROCESS_FLAG_OFFLINE;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN |
- ACPIDEV_PROCESS_FLAG_CREATE |
- ACPIDEV_PROCESS_FLAG_OFFLINE;
- rc = acpidev_process_object(infop, flags);
- } else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u in "
+ flags = ACPIDEV_PROCESS_FLAG_SCAN;
+ switch (infop->awi_op_type) {
+ case ACPIDEV_OP_BOOT_PROBE:
+ /*
+ * Mark device as offline. It will be changed to online state
+ * when the corresponding CPU starts up.
+ */
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_CPU)) {
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+ ACPIDEV_PROCESS_FLAG_OFFLINE;
+ }
+ break;
+
+ case ACPIDEV_OP_BOOT_REPROBE:
+ break;
+
+ case ACPIDEV_OP_HOTPLUG_PROBE:
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_CPU)) {
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+ ACPIDEV_PROCESS_FLAG_OFFLINE |
+ ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+ ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+ }
+ break;
+
+ default:
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u in "
"acpidev_cpu_probe().", infop->awi_op_type);
rc = AE_BAD_PARAMETER;
+ break;
+ }
+
+ if (rc == AE_OK) {
+ rc = acpidev_process_object(infop, flags);
}
if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
cmn_err(CE_WARN,
@@ -474,12 +555,12 @@ acpidev_cpu_filter_func(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
if (acpidev_cpu_get_procid(infop, &procid) != 0) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to query processor id for %s.",
+ "!acpidev: failed to query processor id for %s.",
infop->awi_name);
return (ACPIDEV_FILTER_SKIP);
} else if (acpidev_cpu_get_apicid(procid, &apicid) != 0) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to query apic id for %s.",
+ "!acpidev: failed to query apic id for %s.",
infop->awi_name);
return (ACPIDEV_FILTER_SKIP);
}
@@ -499,7 +580,7 @@ acpidev_cpu_filter_func(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
return (ACPIDEV_FILTER_SKIP);
} else if (!mat.enabled) {
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: CPU %s has been disabled.",
+ "!acpidev: CPU %s has been disabled.",
infop->awi_name);
return (ACPIDEV_FILTER_SKIP);
}
@@ -536,6 +617,7 @@ static ACPI_STATUS
acpidev_cpu_init(acpidev_walk_info_t *infop)
{
int count;
+ uint32_t pxmid;
dev_info_t *dip;
ACPI_HANDLE hdl;
char unitaddr[64];
@@ -550,7 +632,7 @@ acpidev_cpu_init(acpidev_walk_info_t *infop)
dip = infop->awi_dip;
hdl = infop->awi_hdl;
- /* Create "apic_id" and "processor_id" properties. */
+ /* Create "apic_id", "processor_id" and "proximity_id" properties. */
if (ndi_prop_update_int(DDI_DEV_T_NONE, dip,
ACPIDEV_PROP_NAME_PROCESSOR_ID, infop->awi_scratchpad[0]) !=
NDI_SUCCESS) {
@@ -567,6 +649,15 @@ acpidev_cpu_init(acpidev_walk_info_t *infop)
infop->awi_name);
return (AE_ERROR);
}
+ if (ACPI_SUCCESS(acpidev_cpu_get_proximity_id(infop->awi_hdl,
+ infop->awi_scratchpad[1], &pxmid))) {
+ if (ndi_prop_update_int(DDI_DEV_T_NONE, dip,
+ ACPIDEV_PROP_NAME_PROXIMITY_ID, pxmid) != NDI_SUCCESS) {
+ cmn_err(CE_WARN, "!acpidev: failed to set proximity id "
+ "property for %s.", infop->awi_name);
+ return (AE_ERROR);
+ }
+ }
/* Set "compatible" property for CPU dip */
count = sizeof (compatible) / sizeof (compatible[0]);
@@ -616,7 +707,7 @@ acpidev_cpu_init(acpidev_walk_info_t *infop)
}
} else {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: unknown operation type %u in acpidev_cpu_init.",
+ "!acpidev: unknown operation type %u in acpidev_cpu_init.",
infop->awi_op_type);
return (AE_BAD_PARAMETER);
}
@@ -624,18 +715,38 @@ acpidev_cpu_init(acpidev_walk_info_t *infop)
return (AE_OK);
}
+static void
+acpidev_cpu_fini(ACPI_HANDLE hdl, acpidev_data_handle_t dhdl,
+ acpidev_class_t *clsp)
+{
+ _NOTE(ARGUNUSED(clsp, dhdl));
+
+ int rc;
+ uint32_t procid;
+
+ rc = acpica_get_procid_by_object(hdl, &procid);
+ ASSERT(ACPI_SUCCESS(rc));
+ if (ACPI_SUCCESS(rc)) {
+ rc = acpica_remove_processor_from_map(procid);
+ ASSERT(ACPI_SUCCESS(rc));
+ if (ACPI_FAILURE(rc)) {
+ cmn_err(CE_WARN, "!acpidev: failed to remove "
+ "processor from ACPICA.");
+ }
+ }
+}
+
+/*
+ * Lookup the dip for a CPU if ACPI CPU autoconfig is enabled.
+ */
static int
-acpidev_cpu_query_dip(cpu_t *cp, dev_info_t **dipp)
+acpidev_cpu_lookup_dip(cpu_t *cp, dev_info_t **dipp)
{
uint32_t apicid;
ACPI_HANDLE hdl;
dev_info_t *dip = NULL;
*dipp = NULL;
- /*
- * Try to get the dip associated with the CPU if ACPI_DEVCFG_CPU is
- * enabled.
- */
if (acpica_get_devcfg_feature(ACPI_DEVCFG_CPU)) {
apicid = cpuid_get_apicid(cp);
if (acpica_get_cpu_object_by_cpuid(cp->cpu_id, &hdl) == 0 ||
@@ -644,18 +755,41 @@ acpidev_cpu_query_dip(cpu_t *cp, dev_info_t **dipp)
ASSERT(hdl != NULL);
if (ACPI_SUCCESS(acpica_get_devinfo(hdl, &dip))) {
ASSERT(dip != NULL);
- ndi_hold_devi(dip);
*dipp = dip;
return (PSM_SUCCESS);
}
}
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to lookup dip for cpu %d(%p).",
+ cp->cpu_id, (void *)cp);
}
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to get dip for cpu %d(%p).",
- cp->cpu_id, (void *)cp);
+ return (PSM_FAILURE);
+}
+
+static int
+acpidev_cpu_create_dip(cpu_t *cp, dev_info_t **dipp)
+{
+ if (acpidev_cpu_lookup_dip(cp, dipp) == PSM_SUCCESS) {
+ ndi_hold_devi(*dipp);
+ return (PSM_SUCCESS);
+ }
if (psm_cpu_create_devinfo_old != NULL) {
return (psm_cpu_create_devinfo_old(cp, dipp));
} else {
return (PSM_FAILURE);
}
}
+
+static int
+acpidev_cpu_get_dip(cpu_t *cp, dev_info_t **dipp)
+{
+ if (acpidev_cpu_lookup_dip(cp, dipp) == PSM_SUCCESS) {
+ return (PSM_SUCCESS);
+ }
+ if (psm_cpu_get_devinfo_old != NULL) {
+ return (psm_cpu_get_devinfo_old(cp, dipp));
+ } else {
+ return (PSM_FAILURE);
+ }
+}
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c
index fbf6e944b4..cf65cf4084 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -103,7 +103,7 @@ static acpidev_filter_rule_t acpidev_device_filters[] = {
static ACPI_STATUS
acpidev_device_probe(acpidev_walk_info_t *infop)
{
- ACPI_STATUS rc;
+ ACPI_STATUS rc = AE_OK;
int flags;
ASSERT(infop != NULL);
@@ -114,20 +114,31 @@ acpidev_device_probe(acpidev_walk_info_t *infop)
return (AE_OK);
}
- if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else {
+ flags = ACPIDEV_PROCESS_FLAG_SCAN;
+ switch (infop->awi_op_type) {
+ case ACPIDEV_OP_BOOT_PROBE:
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE;
+ break;
+
+ case ACPIDEV_OP_BOOT_REPROBE:
+ break;
+
+ case ACPIDEV_OP_HOTPLUG_PROBE:
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+ ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+ ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+ break;
+
+ default:
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: unknown operation type %u in "
+ "!acpidev: unknown operation type %u in "
"acpi_device_probe().", infop->awi_op_type);
rc = AE_BAD_PARAMETER;
+ break;
+ }
+
+ if (rc == AE_OK) {
+ rc = acpidev_process_object(infop, flags);
}
if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
cmn_err(CE_WARN,
@@ -153,7 +164,7 @@ acpidev_device_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
ACPIDEV_ARRAY_PARAM(acpidev_device_filters),
devname, maxlen);
} else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
"in acpidev_device_filter().", infop->awi_op_type);
res = ACPIDEV_FILTER_FAILED;
}
@@ -161,7 +172,6 @@ acpidev_device_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
return (res);
}
-/*ARGSUSED*/
static ACPI_STATUS
acpidev_device_init(acpidev_walk_info_t *infop)
{
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_dr.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_dr.c
new file mode 100644
index 0000000000..e346c65cfb
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_dr.c
@@ -0,0 +1,2743 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/cmn_err.h>
+#include <sys/cpuvar.h>
+#include <sys/memlist.h>
+#include <sys/memlist_impl.h>
+#include <sys/note.h>
+#include <sys/obpdefs.h>
+#include <sys/synch.h>
+#include <sys/sysmacros.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/x86_archext.h>
+#include <sys/machsystm.h>
+#include <sys/memnode.h> /* for lgrp_plat_node_cnt */
+#include <sys/psm_types.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_rsc.h>
+#include <sys/acpidev_dr.h>
+#include <sys/acpidev_impl.h>
+
+struct acpidev_dr_set_prop_arg {
+ uint32_t level;
+ uint32_t bdnum;
+ uint32_t cpu_id;
+ uint32_t mem_id;
+ uint32_t io_id;
+ uint32_t mod_id;
+};
+
+struct acpidev_dr_device_remove_arg {
+ uint32_t level;
+};
+
+extern int acpidev_options;
+
+/* User configurable option to enable/disable ACPI based DR operations. */
+int acpidev_dr_enable = 1;
+int acpidev_dr_hierarchy_name = 1;
+uint32_t acpidev_dr_max_segs_per_mem_device = ACPIDEV_DR_SEGS_PER_MEM_DEV;
+uint32_t acpidev_dr_max_memlists_per_seg = ACPIDEV_DR_MEMLISTS_PER_SEG;
+
+ACPI_TABLE_SRAT *acpidev_srat_tbl_ptr;
+ACPI_TABLE_SLIT *acpidev_slit_tbl_ptr;
+
+/* ACPI based DR operations are unsupported if zero. */
+static int acpidev_dr_supported = -1;
+
+/* Failed to initialize support of DR operations if non-zero. */
+static int acpidev_dr_failed;
+
+static volatile uint32_t acpidev_dr_boards;
+static volatile uint32_t acpidev_dr_board_index;
+static uint32_t acpidev_dr_max_cmp_per_board;
+static uint32_t acpidev_dr_max_memory_per_board;
+static uint32_t acpidev_dr_max_io_per_board;
+static uint32_t acpidev_dr_memory_device_cnt;
+
+static ACPI_HANDLE *acpidev_dr_board_handles[ACPIDEV_DR_MAX_BOARDS];
+
+/* Lock to protect/block DR operations at runtime. */
+static kmutex_t acpidev_dr_lock;
+
+static acpidev_dr_capacity_t acpidev_dr_capacities[] = {
+ { /* Nehalem-EX */
+ X86_VENDOR_Intel, 0x6, 0x2e, 0x2e, 0, UINT_MAX,
+ B_TRUE, /* Hotplug capable */
+ 1ULL << 30, /* Align on 1GB boundary */
+ },
+ { /* the last item is used to mark end of the table */
+ UINT_MAX, UINT_MAX, UINT_MAX, 0, UINT_MAX, 0,
+ B_FALSE,
+ 0,
+ },
+};
+
+static ACPI_STATUS acpidev_dr_scan_topo(ACPI_HANDLE hdl, UINT32 lvl, void *arg,
+ void **retval);
+
+static acpidev_dr_capacity_t *
+acpidev_dr_get_capacity(void)
+{
+ acpidev_dr_capacity_t *cp, *cp1;
+ uint_t vendor, family, model, step;
+ static acpidev_dr_capacity_t *acpidev_dr_capacity_curr = NULL;
+
+ if (acpidev_dr_capacity_curr != NULL) {
+ return (acpidev_dr_capacity_curr);
+ }
+
+ kpreempt_disable();
+ vendor = cpuid_getvendor(CPU);
+ family = cpuid_getfamily(CPU);
+ model = cpuid_getmodel(CPU);
+ step = cpuid_getstep(CPU);
+ kpreempt_enable();
+
+ for (cp = acpidev_dr_capacities; ; cp++) {
+ ASSERT(cp < acpidev_dr_capacities +
+ sizeof (acpidev_dr_capacities) / sizeof (*cp));
+
+ /* Check whether it reaches the last item of the table. */
+ if (cp->cpu_vendor == UINT_MAX && cp->cpu_family == UINT_MAX &&
+ cp->cpu_model_min == UINT_MAX && cp->cpu_model_max == 0 &&
+ cp->cpu_step_min == UINT_MAX && cp->cpu_step_max == 0) {
+ break;
+ }
+ if (cp->cpu_vendor == vendor && cp->cpu_family == family &&
+ model >= cp->cpu_model_min && model <= cp->cpu_model_max &&
+ step >= cp->cpu_step_min && step <= cp->cpu_step_max) {
+ break;
+ }
+ }
+
+ /* Assume all CPUs in system are homogeneous. */
+ cp1 = atomic_cas_ptr(&acpidev_dr_capacity_curr, NULL, cp);
+ ASSERT(cp1 == NULL || cp1 == cp);
+ if (cp1 != NULL && cp1 != cp) {
+ return (NULL);
+ }
+
+ return (cp);
+}
+
+int
+acpidev_dr_capable(void)
+{
+ uint64_t flags1, flags2;
+ acpidev_dr_capacity_t *cp;
+
+ /*
+ * Disable support of DR operations if:
+ * 1) acpidev fails to initialize DR interfaces.
+ * 2) ACPI based DR has been disabled by user.
+ * 3) No DR capable devices have been detected.
+ * 4) The system doesn't support DR operations.
+ * 5) Some acpidev features have been disabled by user.
+ */
+ if (acpidev_dr_failed != 0 || acpidev_dr_enable == 0 ||
+ acpidev_dr_supported == 0) {
+ return (0);
+ }
+
+ flags1 = ACPI_FEATURE_DEVCFG | ACPI_FEATURE_OSI_MODULE;
+ flags2 = ACPI_DEVCFG_CPU | ACPI_DEVCFG_MEMORY |
+ ACPI_DEVCFG_CONTAINER | ACPI_DEVCFG_PCI;
+ if (acpica_get_core_feature(flags1) != flags1 ||
+ acpica_get_devcfg_feature(flags2) != flags2) {
+ cmn_err(CE_CONT,
+ "?acpidev: disable support of ACPI based DR because "
+ "some acpidev features have been disabled by user.\n");
+ acpidev_dr_supported = 0;
+ return (0);
+ }
+
+ cp = acpidev_dr_get_capacity();
+ if (cp == NULL || cp->hotplug_supported == B_FALSE) {
+ return (0);
+ }
+
+ return (1);
+}
+
+uint32_t
+acpidev_dr_max_boards(void)
+{
+ return (acpidev_dr_boards);
+}
+
+uint32_t
+acpidev_dr_max_io_units_per_board(void)
+{
+ return (acpidev_dr_max_io_per_board);
+}
+
+uint32_t
+acpidev_dr_max_mem_units_per_board(void)
+{
+ return (acpidev_dr_max_memory_per_board);
+}
+
+uint32_t
+acpidev_dr_max_cmp_units_per_board(void)
+{
+ return (acpidev_dr_max_cmp_per_board);
+}
+
+uint32_t
+acpidev_dr_max_cpu_units_per_cmp(void)
+{
+ static int max_cnt;
+
+ if (max_cnt == 0) {
+ kpreempt_disable();
+ max_cnt = cpuid_get_ncpu_per_chip(CPU);
+ kpreempt_enable();
+ }
+
+ return (max_cnt);
+}
+
+uint32_t
+acpidev_dr_max_segments_per_mem_device(void)
+{
+ if (acpidev_dr_max_segs_per_mem_device < 1) {
+ return (ACPIDEV_DR_SEGS_PER_MEM_DEV);
+ } else {
+ return (acpidev_dr_max_segs_per_mem_device);
+ }
+}
+
+uint32_t
+acpidev_dr_max_memlists_per_segment(void)
+{
+ if (acpidev_dr_max_memlists_per_seg < ACPIDEV_DR_MEMLISTS_PER_SEG) {
+ return (ACPIDEV_DR_MEMLISTS_PER_SEG);
+ } else {
+ return (acpidev_dr_max_memlists_per_seg);
+ }
+}
+
+void
+acpidev_dr_init(void)
+{
+ mutex_init(&acpidev_dr_lock, NULL, MUTEX_DRIVER, NULL);
+}
+
+static void
+acpidev_dr_check_board_type(acpidev_data_handle_t dhdl,
+ struct acpidev_dr_set_prop_arg *ap, char *objname)
+{
+ if (dhdl->aod_class_id == ACPIDEV_CLASS_ID_MEMORY) {
+ /* Memory board should have only one memory device. */
+ ASSERT(ap->cpu_id == 0);
+ ASSERT(ap->mem_id == 1);
+ ASSERT(ap->io_id == 0);
+ ASSERT(ap->mod_id == 0);
+ dhdl->aod_bdtype = ACPIDEV_MEMORY_BOARD;
+ } else if (dhdl->aod_class_id == ACPIDEV_CLASS_ID_PCI ||
+ dhdl->aod_class_id == ACPIDEV_CLASS_ID_PCIEX) {
+ /* IO board should have only one IO device. */
+ ASSERT(ap->cpu_id == 0);
+ ASSERT(ap->mem_id == 0);
+ ASSERT(ap->io_id == 1);
+ ASSERT(ap->mod_id == 0);
+ dhdl->aod_bdtype = ACPIDEV_IO_BOARD;
+ } else if (dhdl->aod_class_id == ACPIDEV_CLASS_ID_CONTAINER) {
+ if (ap->mod_id == 1 && ap->mem_id == 0) {
+ dhdl->aod_bdtype = ACPIDEV_CPU_BOARD;
+ } else {
+ dhdl->aod_bdtype = ACPIDEV_SYSTEM_BOARD;
+ }
+ } else {
+ cmn_err(CE_WARN,
+ "!acpidev: unknown type of hotplug capable board %s.",
+ objname);
+ ASSERT(0);
+ }
+}
+
+/*
+ * Check for hotplug capable boards and create environment to support
+ * ACPI based DR operations. No need to acquire lock here, it's called
+ * from single-threaded context during boot.
+ */
+void
+acpidev_dr_check(acpidev_walk_info_t *infop)
+{
+ uint_t cmp;
+ boolean_t found = B_FALSE;
+ ACPI_HANDLE phdl;
+ acpidev_data_handle_t dhdl, pdhdl;
+ struct acpidev_dr_set_prop_arg arg;
+
+ if (infop == NULL ||
+ infop->awi_op_type != ACPIDEV_OP_BOOT_PROBE) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: invalid parameter to acpidev_dr_check().");
+ return;
+ }
+
+ if (acpidev_dr_capable() == 0) {
+ return;
+ }
+
+ dhdl = infop->awi_data;
+ ASSERT(dhdl != NULL);
+
+ /* This device has already been handled before. */
+ if (ACPIDEV_DR_IS_PROCESSED(dhdl)) {
+ return;
+ }
+
+ /*
+ * It implies that the device is hotplug capable if ACPI _EJ0 method
+ * is available.
+ */
+ if (!ACPIDEV_DR_IS_BOARD(dhdl) &&
+ acpidev_dr_device_hotplug_capable(infop->awi_hdl)) {
+ ACPIDEV_DR_SET_BOARD(dhdl);
+ }
+
+ /* All things are done if the device isn't hotplug capable. */
+ if (!ACPIDEV_DR_IS_BOARD(dhdl)) {
+ return;
+ }
+
+ /* Check whether hardware topology is supported or not. */
+ if (ACPI_FAILURE(acpidev_dr_scan_topo(infop->awi_hdl, 0, NULL,
+ NULL))) {
+ ACPIDEV_DR_SET_FAILED(dhdl);
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: hardware topology under %s "
+ "is unsupported for DR operations.", infop->awi_name);
+ return;
+ }
+
+ /* Generate board/index/port number for the hotplug capable board. */
+ dhdl->aod_bdnum = atomic_inc_32_nv(&acpidev_dr_boards) - 1;
+ dhdl->aod_portid = 0;
+ phdl = infop->awi_hdl;
+ while (ACPI_SUCCESS(AcpiGetParent(phdl, &phdl)) &&
+ phdl != ACPI_ROOT_OBJECT) {
+ pdhdl = acpidev_data_get_handle(phdl);
+ if (pdhdl != NULL && ACPIDEV_DR_IS_BOARD(pdhdl)) {
+ dhdl->aod_bdidx = atomic_inc_32_nv(&pdhdl->aod_chidx);
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (found == B_FALSE) {
+ dhdl->aod_bdidx = atomic_inc_32_nv(&acpidev_dr_board_index);
+ }
+ dhdl->aod_bdidx -= 1;
+
+ /* Found too many hotplug capable boards. */
+ if (dhdl->aod_bdnum >= ACPIDEV_DR_MAX_BOARDS) {
+ ACPIDEV_DR_SET_FAILED(dhdl);
+ cmn_err(CE_WARN, "!acpidev: too many hotplug capable boards, "
+ "max %d, found %d.",
+ ACPIDEV_DR_MAX_BOARDS, dhdl->aod_bdnum + 1);
+ return;
+ }
+
+ /* Scan all descendant devices to prepare info for DR operations. */
+ bzero(&arg, sizeof (arg));
+ arg.bdnum = dhdl->aod_bdnum;
+ arg.level = infop->awi_level;
+ if (ACPI_FAILURE(acpidev_dr_scan_topo(infop->awi_hdl, 0, &arg,
+ NULL))) {
+ ACPIDEV_DR_SET_FAILED(dhdl);
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: failed to set DR properties "
+ "for descendants of %s.", infop->awi_name);
+ return;
+ }
+
+ /* Get type of the hotplug capable board. */
+ acpidev_dr_check_board_type(dhdl, &arg, infop->awi_name);
+
+ /*
+ * Save ACPI handle of the hotplug capable board to speed up lookup
+ * board handle if caching is enabled.
+ */
+ if ((acpidev_options & ACPIDEV_OUSER_NO_CACHE) == 0) {
+ acpidev_dr_board_handles[dhdl->aod_bdnum] = infop->awi_hdl;
+ }
+
+ /* Update system maximum DR capabilities. */
+ cmp = (arg.cpu_id + acpidev_dr_max_cpu_units_per_cmp() - 1);
+ cmp /= acpidev_dr_max_cpu_units_per_cmp();
+ if (cmp > acpidev_dr_max_cmp_per_board) {
+ acpidev_dr_max_cmp_per_board = cmp;
+ }
+ if (arg.mem_id > acpidev_dr_max_memory_per_board) {
+ acpidev_dr_max_memory_per_board = arg.mem_id;
+ }
+ if (arg.io_id > acpidev_dr_max_io_per_board) {
+ acpidev_dr_max_io_per_board = arg.io_id;
+ }
+}
+
+static void
+acpidev_dr_initialize_memory_hotplug(void)
+{
+ caddr_t buf;
+ uint32_t cnt;
+ acpidev_dr_capacity_t *cp;
+
+ /*
+ * We have already checked that the platform supports DR operations.
+ */
+ cp = acpidev_dr_get_capacity();
+ ASSERT(cp != NULL && cp->hotplug_supported);
+ ASSERT(ISP2(cp->memory_alignment));
+ ASSERT(cp->memory_alignment > MMU_PAGESIZE);
+ mem_node_physalign = cp->memory_alignment;
+
+ /* Pre-populate memlist cache. */
+ cnt = acpidev_dr_memory_device_cnt;
+ cnt *= acpidev_dr_max_segments_per_mem_device();
+ cnt *= acpidev_dr_max_memlists_per_segment();
+ if (cnt > ACPIDEV_DR_MAX_MEMLIST_ENTRIES) {
+ cmn_err(CE_WARN, "!acpidev: attempted to reserve too many "
+ "memlist entries (%u), max %u. Falling back to %u and "
+ "some memory hot add operations may fail.",
+ cnt, ACPIDEV_DR_MAX_MEMLIST_ENTRIES,
+ ACPIDEV_DR_MAX_MEMLIST_ENTRIES);
+ cnt = ACPIDEV_DR_MAX_MEMLIST_ENTRIES;
+ }
+ cnt *= sizeof (struct memlist);
+ buf = kmem_zalloc(cnt, KM_SLEEP);
+ memlist_free_block(buf, cnt);
+}
+
+/*
+ * Create pseudo DR control device node if the system is hotplug capable.
+ * No need to acquire lock, it's called from single-threaded context
+ * during boot. pdip has been held by the caller.
+ */
+static ACPI_STATUS
+acpidev_dr_create_node(dev_info_t *pdip)
+{
+ dev_info_t *dip;
+ char unit[32];
+ char *path;
+ char *comps[] = {
+ "acpidr_sbd",
+ };
+
+ /*
+ * Disable support of DR operations if no hotplug capable board has
+ * been detected.
+ */
+ if (acpidev_dr_boards == 0) {
+ acpidev_dr_supported = 0;
+ } else {
+ acpidev_dr_supported = 1;
+ }
+
+ /*
+ * Don't create control device node if the system isn't hotplug capable.
+ */
+ if (acpidev_dr_capable() == 0) {
+ return (AE_SUPPORT);
+ }
+
+ /* Cache pointer to the ACPI SLIT table. */
+ if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_SLIT, 1,
+ (ACPI_TABLE_HEADER **)&acpidev_slit_tbl_ptr))) {
+ acpidev_slit_tbl_ptr = NULL;
+ }
+ if (acpidev_srat_tbl_ptr == NULL || acpidev_slit_tbl_ptr == NULL) {
+ if (lgrp_plat_node_cnt != 1) {
+ /*
+ * Disable support of CPU/memory DR operations if lgrp
+ * is enabled but failed to cache SRAT/SLIT table
+ * pointers.
+ */
+ cmn_err(CE_WARN,
+ "!acpidev: failed to get ACPI SRAT/SLIT table.");
+ plat_dr_disable_cpu();
+ plat_dr_disable_memory();
+ }
+ }
+
+ ndi_devi_alloc_sleep(pdip, ACPIDEV_NODE_NAME_ACPIDR,
+ (pnode_t)DEVI_PSEUDO_NODEID, &dip);
+
+ /* Set "unit-address" device property. */
+ (void) snprintf(unit, sizeof (unit), "%u", 0);
+ if (ndi_prop_update_string(DDI_DEV_T_NONE, dip,
+ ACPIDEV_PROP_NAME_UNIT_ADDR, unit) != NDI_SUCCESS) {
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ cmn_err(CE_CONT,
+ "?acpidev: failed to set unit-address property for %s.\n",
+ ddi_pathname(dip, path));
+ kmem_free(path, MAXPATHLEN);
+ (void) ddi_remove_child(dip, 0);
+ acpidev_dr_failed = 1;
+ return (AE_ERROR);
+ }
+
+ /* Set "compatible" device property. */
+ if (ndi_prop_update_string_array(DDI_DEV_T_NONE, dip, OBP_COMPATIBLE,
+ comps, sizeof (comps) / sizeof (comps[0])) != NDI_SUCCESS) {
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ cmn_err(CE_CONT, "?acpidev: failed to set compatible "
+ "property for %s.\n", ddi_pathname(dip, path));
+ kmem_free(path, MAXPATHLEN);
+ (void) ddi_remove_child(dip, 0);
+ acpidev_dr_failed = 1;
+ return (AE_ERROR);
+ }
+
+ (void) ndi_devi_bind_driver(dip, 0);
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_initialize(dev_info_t *pdip)
+{
+ ACPI_STATUS rc;
+
+ rc = acpidev_dr_create_node(pdip);
+ if (ACPI_FAILURE(rc)) {
+ return (rc);
+ }
+
+ /* Initialize support of memory DR operations. */
+ if (plat_dr_support_memory()) {
+ acpidev_dr_initialize_memory_hotplug();
+ }
+
+ /* Mark the DR subsystem is ready for use. */
+ plat_dr_enable();
+
+ return (AE_OK);
+}
+
+static ACPI_STATUS
+acpidev_dr_find_board(ACPI_HANDLE hdl, uint_t lvl, void *ctx, void **retval)
+{
+ _NOTE(ARGUNUSED(lvl));
+
+ acpidev_data_handle_t dhdl;
+
+ ASSERT(hdl != NULL);
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ /* No data handle available, not ready for DR operations. */
+ return (AE_CTRL_DEPTH);
+ } else if (ACPIDEV_DR_IS_BOARD(dhdl) && ACPIDEV_DR_IS_WORKING(dhdl) &&
+ dhdl->aod_bdnum == (intptr_t)ctx) {
+ ASSERT(retval != NULL);
+ *(ACPI_HANDLE *)retval = hdl;
+ return (AE_CTRL_TERMINATE);
+ }
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_get_board_handle(uint_t board, ACPI_HANDLE *hdlp)
+{
+ ACPI_STATUS rc = AE_OK;
+ ACPI_HANDLE hdl;
+
+ ASSERT(hdlp != NULL);
+ if (hdlp == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_get_board_handle().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ if (board >= acpidev_dr_boards) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: board number %d is out of range, max %d.",
+ board, acpidev_dr_boards);
+ return (AE_NOT_FOUND);
+ }
+
+ /* Use cached handles if caching is enabled. */
+ if ((acpidev_options & ACPIDEV_OUSER_NO_CACHE) == 0) {
+ if (acpidev_dr_board_handles[board] != NULL) {
+ hdl = acpidev_dr_board_handles[board];
+ if (ACPI_FAILURE(acpidev_dr_find_board(hdl, 1,
+ (void *)(intptr_t)board, (void **)hdlp)) &&
+ *hdlp != NULL) {
+ return (AE_OK);
+ }
+ }
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: board %d doesn't exist.", board);
+ *hdlp = NULL;
+ return (AE_NOT_FOUND);
+ }
+
+ /* All hotplug capable boards should exist under \_SB_. */
+ if (ACPI_FAILURE(AcpiGetHandle(ACPI_ROOT_OBJECT,
+ ACPIDEV_OBJECT_NAME_SB, &hdl))) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get handle of %s.",
+ ACPIDEV_OBJECT_NAME_SB);
+ return (AE_ERROR);
+ }
+
+ *hdlp = NULL;
+ if (ACPI_FAILURE(AcpiWalkNamespace(ACPI_TYPE_DEVICE, hdl,
+ ACPIDEV_MAX_ENUM_LEVELS - 1, acpidev_dr_find_board, NULL,
+ (void *)(intptr_t)board, (void **)hdlp))) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to find ACPI handle "
+ "for board %d.", board);
+ rc = AE_NOT_FOUND;
+ } else if (*hdlp == NULL) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: board %d doesn't exist.", board);
+ rc = AE_NOT_FOUND;
+ }
+
+ return (rc);
+}
+
+acpidev_board_type_t
+acpidev_dr_get_board_type(ACPI_HANDLE hdl)
+{
+ acpidev_data_handle_t dhdl;
+ acpidev_board_type_t type = ACPIDEV_INVALID_BOARD;
+
+ if (hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_get_board_type().");
+ return (type);
+ }
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data associated with %p.", hdl);
+ } else {
+ type = dhdl->aod_bdtype;
+ }
+
+ return (type);
+}
+
+ACPI_STATUS
+acpidev_dr_get_board_number(ACPI_HANDLE hdl, uint32_t *bnump)
+{
+ acpidev_data_handle_t dhdl;
+
+ if (hdl == NULL || bnump == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_get_board_number().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data associated with %p.", hdl);
+ return (AE_ERROR);
+ }
+ *bnump = dhdl->aod_bdnum;
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_get_board_name(ACPI_HANDLE hdl, char *buf, size_t len)
+{
+ char *fmt;
+ int count = 0;
+ size_t rlen = 0;
+ ACPI_HANDLE thdl;
+ acpidev_data_handle_t dhdl;
+ acpidev_data_handle_t dhdls[ACPIDEV_MAX_ENUM_LEVELS];
+
+ if (hdl == NULL || buf == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_get_board_name().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ /* Find ancestors of the device which are hotplug capable. */
+ for (thdl = hdl; thdl != NULL; ) {
+ dhdl = acpidev_data_get_handle(thdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get data "
+ "associated with %p.", thdl);
+ return (AE_ERROR);
+ }
+
+ if (!ACPIDEV_DR_IS_BOARD(dhdl)) {
+ /* The board itself should be hotplug capable. */
+ if (count == 0) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is "
+ "not hotplug capable.", thdl);
+ return (AE_ERROR);
+ }
+ } else {
+ if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is "
+ "in the FAILED state.", thdl);
+ }
+
+ if (count >= ACPIDEV_MAX_ENUM_LEVELS) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: recursive level for hotplug "
+ "capable board is too deep.");
+ return (AE_ERROR);
+ }
+
+ dhdls[count] = dhdl;
+ count++;
+ }
+
+ if (acpidev_dr_hierarchy_name == 0) {
+ thdl = NULL;
+ } else if (ACPI_FAILURE(AcpiGetParent(thdl, &thdl))) {
+ thdl = NULL;
+ }
+ }
+
+ /* Generate hierarchy board name for the board. */
+ ASSERT(count > 0);
+ for (count--; count >= 0 && rlen < len; count--) {
+ dhdl = dhdls[count];
+ switch (dhdl->aod_bdtype) {
+ case ACPIDEV_CPU_BOARD:
+ fmt = ACPIDEV_DR_CPU_BD_FMT;
+ break;
+ case ACPIDEV_MEMORY_BOARD:
+ fmt = ACPIDEV_DR_MEMORY_BD_FMT;
+ break;
+ case ACPIDEV_IO_BOARD:
+ fmt = ACPIDEV_DR_IO_BD_FMT;
+ break;
+ case ACPIDEV_SYSTEM_BOARD:
+ fmt = ACPIDEV_DR_SYSTEM_BD_FMT;
+ break;
+ case ACPIDEV_INVALID_BOARD:
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid board type.");
+ return (AE_ERROR);
+ default:
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: unknown board type %u.",
+ dhdl->aod_bdtype);
+ return (AE_ERROR);
+ }
+
+ /* Add "." before component name except first item. */
+ if (rlen != 0) {
+ rlen += snprintf(buf + rlen, len - rlen, ".");
+ }
+ if (rlen < len) {
+ rlen += snprintf(buf + rlen, len - rlen, fmt,
+ dhdl->aod_bdidx);
+ }
+ }
+
+ /* Check whether the buffer is sufficient. */
+ if (rlen >= len) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: buffer length to "
+ "acpidev_dr_get_board_name() is too small.");
+ return (AE_NO_MEMORY);
+ }
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_get_attachment_point(ACPI_HANDLE hdl, char *buf, size_t len)
+{
+ size_t rlen;
+
+ if (hdl == NULL || buf == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_get_attachment_point().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ rlen = snprintf(buf, len, "/devices/%s/%s@%u:",
+ ACPIDEV_NODE_NAME_ROOT, ACPIDEV_NODE_NAME_ACPIDR, 0);
+ if (rlen >= len) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: buffer to "
+ "acpidev_dr_get_attachment_point() is too small.");
+ return (AE_NO_MEMORY);
+ }
+
+ return (acpidev_dr_get_board_name(hdl, buf + rlen, len - rlen));
+}
+
+/*
+ * Existence of ACPI _EJ0 method implies that the device is hotplug capable.
+ */
+int
+acpidev_dr_device_hotplug_capable(ACPI_HANDLE hdl)
+{
+ ACPI_HANDLE ej0;
+
+ ASSERT(hdl != NULL);
+ if (ACPI_FAILURE(AcpiGetHandle(hdl, ACPIDEV_METHOD_NAME_EJ0, &ej0))) {
+ return (0);
+ }
+
+ return (1);
+}
+
+int
+acpidev_dr_device_has_edl(ACPI_HANDLE hdl)
+{
+ ACPI_HANDLE edl;
+
+ ASSERT(hdl != NULL);
+ if (ACPI_FAILURE(AcpiGetHandle(hdl, ACPIDEV_METHOD_NAME_EDL, &edl))) {
+ return (0);
+ }
+
+ return (1);
+}
+
+int
+acpidev_dr_device_is_present(ACPI_HANDLE hdl)
+{
+ int status;
+
+ ASSERT(hdl != NULL);
+
+ status = acpidev_query_device_status(hdl);
+ if (acpidev_check_device_present(status)) {
+ return (1);
+ }
+
+ return (0);
+}
+
+int
+acpidev_dr_device_is_powered(ACPI_HANDLE hdl)
+{
+ int status;
+
+ ASSERT(hdl != NULL);
+
+ /*
+ * Check device status returned by ACPI _STA method.
+ * It implies that the device is powered if status is both PRESENT
+ * and ENABLED.
+ */
+ status = acpidev_query_device_status(hdl);
+ if (acpidev_check_device_enabled(status)) {
+ return (1);
+ }
+
+ return (0);
+}
+
+ACPI_STATUS
+acpidev_dr_get_mem_alignment(ACPI_HANDLE hdl, uint64_t *ap)
+{
+ acpidev_dr_capacity_t *cp;
+
+ ASSERT(hdl != NULL);
+ ASSERT(ap != NULL);
+ if (ap == NULL || hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_get_mem_alignment().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ cp = acpidev_dr_get_capacity();
+ if (cp == NULL || cp->hotplug_supported == B_FALSE) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get memory alignment.");
+ return (AE_SUPPORT);
+ }
+ *ap = cp->memory_alignment;
+
+ return (AE_OK);
+}
+
+/*
+ * Get the device property for the given name and store it into buf.
+ * Returns the amount of data copied to buf if len is large enough to
+ * hold all of the data. If len is not large enough, then the required
+ * len would be returned and buf would not be modified. On any errors,
+ * -1 is returned and buf is not modified.
+ */
+ACPI_STATUS
+acpidev_dr_device_get_regspec(ACPI_HANDLE hdl, boolean_t assigned,
+ acpidev_regspec_t **regpp, uint_t *cntp)
+{
+ int *valp;
+ uint_t count;
+ char *propname;
+ dev_info_t *dip;
+ acpidev_data_handle_t dhdl;
+
+ ASSERT(hdl != NULL);
+ ASSERT(regpp != NULL && cntp != NULL);
+ if (hdl == NULL || regpp == NULL || cntp == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters to "
+ "acpidev_dr_device_get_regspec().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ /* Set default return value. */
+ *regpp = NULL;
+ *cntp = 0;
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data associated with %p.", hdl);
+ return (AE_ERROR);
+ } else if ((dip = acpidev_data_get_devinfo(dhdl)) == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get dip associated with %p.", hdl);
+ return (AE_NOT_FOUND);
+ }
+
+ propname = assigned ? "assigned-addresses" : "reg";
+ if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ propname, &valp, &count) != DDI_PROP_SUCCESS) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to lookup device property %s.", propname);
+ return (AE_NOT_FOUND);
+ }
+
+ if (count % (sizeof (**regpp) / sizeof (int)) != 0) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: device property %s is invalid.", propname);
+ ddi_prop_free(valp);
+ return (AE_ERROR);
+ }
+
+ *regpp = (acpidev_regspec_t *)valp;
+ *cntp = count / (sizeof (**regpp) / sizeof (int));
+
+ return (AE_OK);
+}
+
+void
+acpidev_dr_device_free_regspec(acpidev_regspec_t *regp, uint_t count)
+{
+ _NOTE(ARGUNUSED(count));
+
+ if (regp != NULL) {
+ ddi_prop_free(regp);
+ }
+}
+
+/*
+ * Return values
+ * . negative values on error
+ * . size of data copied to buffer if it's bigger enough
+ * . size of buffer needed if buffer is too small
+ */
+int
+acpidev_dr_device_getprop(ACPI_HANDLE hdl, char *name, caddr_t buf, size_t len)
+{
+ int rlen = -1;
+ acpidev_data_handle_t dhdl;
+
+ if (hdl == NULL) {
+ return (-1);
+ }
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ return (-1);
+ } else if (!ACPIDEV_DR_IS_WORKING(dhdl)) {
+ return (-1);
+ }
+
+ if (strcmp(name, ACPIDEV_DR_PROP_PORTID) == 0) {
+ if (len >= sizeof (uint32_t)) {
+ *(uint32_t *)(void *)buf = dhdl->aod_portid;
+ }
+ rlen = sizeof (uint32_t);
+ } else if (strcmp(name, ACPIDEV_DR_PROP_BOARDNUM) == 0) {
+ if (len >= sizeof (uint32_t)) {
+ *(uint32_t *)(void *)buf = dhdl->aod_bdnum;
+ }
+ rlen = sizeof (uint32_t);
+ } else if (strcmp(name, ACPIDEV_DR_PROP_DEVNAME) == 0) {
+ switch (dhdl->aod_class_id) {
+ case ACPIDEV_CLASS_ID_CPU:
+ if (len >= sizeof (ACPIDEV_NODE_NAME_CPU)) {
+ (void) strlcpy((char *)buf,
+ ACPIDEV_NODE_NAME_CPU, len);
+ }
+ rlen = sizeof (ACPIDEV_NODE_NAME_CPU);
+ break;
+
+ case ACPIDEV_CLASS_ID_MEMORY:
+ if (len >= sizeof (ACPIDEV_NODE_NAME_MEMORY)) {
+ (void) strlcpy((char *)buf,
+ ACPIDEV_NODE_NAME_MEMORY, len);
+ }
+ rlen = sizeof (ACPIDEV_NODE_NAME_MEMORY);
+ break;
+
+ case ACPIDEV_CLASS_ID_PCI:
+ case ACPIDEV_CLASS_ID_PCIEX:
+ if (len >= sizeof (ACPIDEV_NODE_NAME_PCI)) {
+ (void) strlcpy((char *)buf,
+ ACPIDEV_NODE_NAME_PCI, len);
+ }
+ rlen = sizeof (ACPIDEV_NODE_NAME_PCI);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return (rlen);
+}
+
+/*
+ * Figure out device class of the device.
+ * It only supports device classes which may be involved in DR operations.
+ */
+acpidev_class_id_t
+acpidev_dr_device_get_class(ACPI_HANDLE hdl)
+{
+ ACPI_OBJECT_TYPE type;
+ ACPI_DEVICE_INFO *infop;
+ acpidev_class_id_t id = ACPIDEV_CLASS_ID_INVALID;
+
+ static char *acpidev_id_cpu[] = {
+ ACPIDEV_HID_CPU,
+ };
+ static char *acpidev_id_mem[] = {
+ ACPIDEV_HID_MEMORY,
+ };
+ static char *acpidev_id_mod[] = {
+ ACPIDEV_HID_MODULE,
+ };
+ static char *acpidev_id_pci[] = {
+ ACPIDEV_HID_PCI_HOSTBRIDGE,
+ };
+ static char *acpidev_id_pciex[] = {
+ ACPIDEV_HID_PCIEX_HOSTBRIDGE,
+ };
+
+ /* Figure out device type by checking ACPI object type. */
+ if (ACPI_FAILURE(AcpiGetType(hdl, &type))) {
+ return (ACPIDEV_CLASS_ID_INVALID);
+ } else if (type == ACPI_TYPE_PROCESSOR) {
+ return (ACPIDEV_CLASS_ID_CPU);
+ } else if (type != ACPI_TYPE_DEVICE) {
+ return (ACPIDEV_CLASS_ID_INVALID);
+ }
+
+ if (ACPI_FAILURE(AcpiGetObjectInfo(hdl, &infop))) {
+ return (ACPIDEV_CLASS_ID_INVALID);
+ }
+
+ /* Figure out device type by checking _HID and _CID. */
+ if (acpidev_match_device_id(infop,
+ ACPIDEV_ARRAY_PARAM(acpidev_id_cpu))) {
+ id = ACPIDEV_CLASS_ID_CPU;
+ } else if (acpidev_match_device_id(infop,
+ ACPIDEV_ARRAY_PARAM(acpidev_id_mem))) {
+ id = ACPIDEV_CLASS_ID_MEMORY;
+ } else if (acpidev_match_device_id(infop,
+ ACPIDEV_ARRAY_PARAM(acpidev_id_mod))) {
+ id = ACPIDEV_CLASS_ID_CONTAINER;
+ } else if (acpidev_match_device_id(infop,
+ ACPIDEV_ARRAY_PARAM(acpidev_id_pciex))) {
+ id = ACPIDEV_CLASS_ID_PCIEX;
+ } else if (acpidev_match_device_id(infop,
+ ACPIDEV_ARRAY_PARAM(acpidev_id_pci))) {
+ id = ACPIDEV_CLASS_ID_PCI;
+ }
+
+ AcpiOsFree(infop);
+
+ return (id);
+}
+
+ACPI_STATUS
+acpidev_dr_device_get_memory_index(ACPI_HANDLE hdl, uint32_t *idxp)
+{
+ acpidev_data_handle_t dhdl;
+
+ ASSERT(idxp != NULL);
+ ASSERT(hdl != NULL);
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle for %p.", hdl);
+ return (AE_ERROR);
+ } else if (dhdl->aod_class_id != ACPIDEV_CLASS_ID_MEMORY) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %p is not a memory device.", hdl);
+ return (AE_ERROR);
+ } else {
+ *idxp = dhdl->aod_memidx;
+ }
+
+ return (AE_OK);
+}
+
+int
+acpidev_dr_device_is_board(ACPI_HANDLE hdl)
+{
+ acpidev_data_handle_t dhdl;
+
+ ASSERT(hdl != NULL);
+ if (hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_is_board().");
+ return (0);
+ }
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ return (0);
+ } else if (!ACPIDEV_DR_IS_BOARD(dhdl)) {
+ return (0);
+ }
+
+ return (1);
+}
+
+ACPI_STATUS
+acpidev_dr_device_walk_edl(ACPI_HANDLE hdl,
+ ACPI_WALK_CALLBACK cb, void *arg, void **retval)
+{
+ ACPI_STATUS rc = AE_OK;
+ int i;
+ char *objname;
+ ACPI_OBJECT *obj;
+ ACPI_BUFFER buf;
+ char *method = ACPIDEV_METHOD_NAME_EDL;
+
+ ASSERT(hdl != NULL);
+ ASSERT(cb != NULL);
+ if (hdl == NULL || cb == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_device_walk_edl().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ objname = acpidev_get_object_name(hdl);
+ buf.Length = ACPI_ALLOCATE_BUFFER;
+ rc = AcpiEvaluateObjectTyped(hdl, method, NULL, &buf,
+ ACPI_TYPE_PACKAGE);
+ if (rc == AE_NOT_FOUND) {
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ } else if (ACPI_FAILURE(rc)) {
+ cmn_err(CE_WARN,
+ "!acpidev: failed to evaluate method %s under %s.",
+ method, objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /* Validate the package structure. */
+ obj = buf.Pointer;
+ for (i = 0; i < obj->Package.Count; i++) {
+ if (obj->Package.Elements[i].Type !=
+ ACPI_TYPE_LOCAL_REFERENCE) {
+ cmn_err(CE_WARN, "!acpidev: element %d in package "
+ "returned by %s of %s is not local reference.",
+ i, method, objname);
+ AcpiOsFree(buf.Pointer);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ } else if (obj->Package.Elements[i].Reference.ActualType !=
+ ACPI_TYPE_DEVICE) {
+ cmn_err(CE_WARN, "!acpidev: element %d in package "
+ "returned by %s of %s doesn't refer to device.",
+ i, method, objname);
+ AcpiOsFree(buf.Pointer);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+ }
+
+ for (i = 0; i < obj->Package.Count; i++) {
+ if (obj->Package.Elements[i].Reference.Handle == NULL) {
+ cmn_err(CE_WARN, "!acpidev: handle of element %d in "
+ "package returned by %s of %s is NULL.",
+ i, method, objname);
+ continue;
+ }
+ rc = (*cb)(obj->Package.Elements[i].Reference.Handle,
+ UINT32_MAX, arg, retval);
+ if (rc == AE_CTRL_DEPTH || rc == AE_CTRL_TERMINATE) {
+ rc = AE_OK;
+ }
+ if (ACPI_FAILURE(rc)) {
+ break;
+ }
+ }
+
+ AcpiOsFree(buf.Pointer);
+ acpidev_free_object_name(objname);
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpidev_dr_device_walk_ejd(ACPI_HANDLE hdl,
+ ACPI_WALK_CALLBACK cb, void *arg, void **retval)
+{
+ ACPI_STATUS rc = AE_OK;
+ char *objname;
+ ACPI_OBJECT *obj;
+ ACPI_BUFFER buf;
+ ACPI_HANDLE chdl;
+ char *method = ACPIDEV_METHOD_NAME_EJD;
+
+ ASSERT(hdl != NULL);
+ ASSERT(cb != NULL);
+ if (hdl == NULL || cb == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_device_walk_ejd().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ objname = acpidev_get_object_name(hdl);
+ buf.Length = ACPI_ALLOCATE_BUFFER;
+ rc = AcpiEvaluateObjectTyped(hdl, method, NULL, &buf,
+ ACPI_TYPE_STRING);
+ if (rc == AE_NOT_FOUND) {
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ } else if (ACPI_FAILURE(rc)) {
+ cmn_err(CE_WARN,
+ "!acpidev: failed to evaluate method %s under %s.",
+ method, objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ obj = buf.Pointer;
+ ASSERT(obj->String.Pointer);
+ if (ACPI_FAILURE(AcpiGetHandle(NULL, obj->String.Pointer, &chdl))) {
+ cmn_err(CE_WARN, "!acpidev: failed to get handle for %s.",
+ obj->String.Pointer);
+ rc = AE_ERROR;
+ } else {
+ rc = (*cb)(chdl, UINT32_MAX, arg, retval);
+ if (rc == AE_CTRL_DEPTH || rc == AE_CTRL_TERMINATE) {
+ rc = AE_OK;
+ }
+ }
+
+ AcpiOsFree(buf.Pointer);
+ acpidev_free_object_name(objname);
+
+ return (rc);
+}
+
+/*
+ * Walk all child devices and special devices in the eject device list.
+ */
+static ACPI_STATUS
+acpidev_dr_device_walk_child(ACPI_HANDLE hdl, boolean_t init, uint_t max_lvl,
+ ACPI_WALK_CALLBACK cb, void *arg, void **retval)
+{
+ ACPI_STATUS rc = AE_OK;
+
+ ASSERT(hdl != NULL);
+ ASSERT(cb != NULL);
+ if (hdl == NULL || cb == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_device_walk_child().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ /*
+ * Walk the eject device list first when destroying.
+ * According to ACPI spec, devices in _EDL list must be handled first
+ * when the ejecting device.
+ */
+ if (init == B_FALSE) {
+ rc = acpidev_dr_device_walk_edl(hdl, cb, arg, retval);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: failed to walk eject device list in "
+ "acpidev_dr_device_walk_child().");
+ }
+ }
+
+ /* Walk all child ACPI DEVICE objects. */
+ if (ACPI_SUCCESS(rc)) {
+ rc = AcpiWalkNamespace(ACPI_TYPE_DEVICE, hdl,
+ max_lvl, cb, NULL, arg, retval);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: failed to walk DEVICE objects in "
+ "acpidev_dr_device_walk_child().");
+ }
+ }
+
+ /* Walk all child ACPI PROCESSOR objects. */
+ if (ACPI_SUCCESS(rc)) {
+ rc = AcpiWalkNamespace(ACPI_TYPE_PROCESSOR, hdl,
+ max_lvl, cb, NULL, arg, retval);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: failed to walk PROCESSOR objects in "
+ "acpidev_dr_device_walk_child().");
+ }
+ }
+
+ /*
+ * Walk the eject device list last when initializing.
+ */
+ if (init == B_TRUE && ACPI_SUCCESS(rc)) {
+ rc = acpidev_dr_device_walk_edl(hdl, cb, arg, retval);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: failed to walk eject device list in "
+ "acpidev_dr_device_walk_child().");
+ }
+ }
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpidev_dr_device_walk_device(ACPI_HANDLE hdl, uint_t max_lvl,
+ ACPI_WALK_CALLBACK cb, void *arg, void **retval)
+{
+ ACPI_STATUS rc = AE_OK;
+ char *objname;
+
+ ASSERT(hdl != NULL);
+ ASSERT(cb != NULL);
+ if (hdl == NULL || cb == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+ "acpidev_dr_walk_device().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ /* Walk the top object itself first. */
+ rc = (*cb)(hdl, 0, arg, retval);
+ if (rc == AE_CTRL_DEPTH || rc == AE_CTRL_TERMINATE) {
+ rc = AE_OK;
+ } else if (ACPI_FAILURE(rc)) {
+ objname = acpidev_get_object_name(hdl);
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to handle top node %s "
+ "in acpidev_dr_walk_device().", objname);
+ acpidev_free_object_name(objname);
+ } else {
+ rc = acpidev_dr_device_walk_child(hdl, B_TRUE, max_lvl,
+ cb, arg, retval);
+ if (ACPI_FAILURE(rc)) {
+ objname = acpidev_get_object_name(hdl);
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to handle descendant nodes of %s "
+ "in acpidev_dr_walk_device().", objname);
+ acpidev_free_object_name(objname);
+ }
+ }
+
+ return (rc);
+}
+
+static ACPI_STATUS
+acpidev_dr_no_support(ACPI_HANDLE hdl, UINT32 lvl, void *arg, void **retval)
+{
+ _NOTE(ARGUNUSED(arg, retval));
+
+ char *objname;
+
+ ASSERT(hdl != NULL);
+
+ objname = acpidev_get_object_name(hdl);
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: device %s at level 0x%x is unsupported.",
+ objname, lvl);
+ acpidev_free_object_name(objname);
+
+ return (AE_SUPPORT);
+}
+
+static ACPI_STATUS
+acpidev_dr_set_prop(ACPI_HANDLE hdl, char *objname,
+ struct acpidev_dr_set_prop_arg *ap, uint32_t lvl,
+ acpidev_class_id_t clsid, uint_t *devid)
+{
+ acpidev_data_handle_t dhdl;
+
+ /* Create data handle first if it doesn't exist yet. */
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ uint32_t rlvl;
+ ACPI_HANDLE phdl;
+
+ /*
+ * Compute level by walking ACPI namespace if it's a device
+ * from the eject device list.
+ */
+ if (lvl == UINT32_MAX) {
+ /*
+ * AcpiGetParent() fails when it tries to get
+ * the parent of the ACPI namespace root node.
+ */
+ for (rlvl = 0, phdl = hdl;
+ ACPI_SUCCESS(AcpiGetParent(phdl, &phdl));
+ rlvl++) {
+ if (phdl == ACPI_ROOT_OBJECT) {
+ break;
+ }
+ }
+ if (rlvl == 0) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get level of %s.",
+ objname);
+ return (AE_BAD_PARAMETER);
+ }
+ } else {
+ rlvl = ap->level;
+ }
+ if (rlvl >= ACPIDEV_MAX_ENUM_LEVELS) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: recursive level of %s is too deep.",
+ objname);
+ return (AE_SUPPORT);
+ }
+
+ dhdl = acpidev_data_create_handle(hdl);
+ if (dhdl != NULL) {
+ dhdl->aod_hdl = hdl;
+ dhdl->aod_level = rlvl;
+ }
+ }
+
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create data handle "
+ "for device %s.", objname);
+ return (AE_NO_MEMORY);
+ }
+
+ if (ACPIDEV_DR_IS_READY(dhdl)) {
+ /*
+ * The same device may be enumerated twice at most. Once as
+ * child devices, another time from the eject device list.
+ */
+ if (dhdl->aod_bdnum == ap->bdnum) {
+ return (AE_OK);
+ } else {
+ /*
+ * A device has been enumerated more than once from
+ * different paths. It's dangerous to support such
+ * a topology. Disable support of DR operations.
+ */
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: device %s has been "
+ "enumerated more than once for DR.", objname);
+ acpidev_dr_failed = 1;
+ return (AE_SUPPORT);
+ }
+ }
+
+ /* Set properties for DR operations. */
+ dhdl->aod_class_id = clsid;
+ dhdl->aod_bdnum = ap->bdnum;
+ dhdl->aod_portid = atomic_inc_32_nv(devid) - 1;
+ if (clsid == ACPIDEV_CLASS_ID_MEMORY) {
+ dhdl->aod_memidx = acpidev_dr_memory_device_cnt;
+ ASSERT(dhdl->aod_memidx < ACPI_MEMNODE_DEVID_BOOT);
+ }
+ ACPIDEV_DR_SET_READY(dhdl);
+
+ return (AE_OK);
+}
+
+/*
+ * Verify whether the hardware topology is supported by the DR driver.
+ * The ACPI specification is so flexible that for safety reasons, only
+ * a few well defined topologies are supported.
+ * Possible values of parameter lvl:
+ * 0: the device is the board itself.
+ * UINT32_MAX: the device is from the _EDL list of the board.
+ * other: the device is a descendant of the board.
+ * Return values:
+ * AE_OK: the topology is supported
+ * AE_SUPPORT: the topology is unsupported
+ * AE_ERROR: other errors
+ */
+static ACPI_STATUS
+acpidev_dr_scan_topo(ACPI_HANDLE hdl, UINT32 lvl, void *arg, void **retval)
+{
+ _NOTE(ARGUNUSED(retval));
+
+ ACPI_STATUS rc = AE_OK;
+ char *objname;
+ acpidev_class_id_t cid;
+ struct acpidev_dr_set_prop_arg *ap = arg;
+
+ ASSERT(hdl != NULL);
+ ASSERT(lvl == 0 || lvl == 1 || lvl == UINT32_MAX);
+ objname = acpidev_get_object_name(hdl);
+
+ /*
+ * Validate descendants of the hotplug capable board.
+ * lvl is zero if it's the hotplug capable board itself, otherwise
+ * non-zero for descendants.
+ */
+ if (lvl != 0) {
+ /*
+ * Skip subtree if the device is hotplug capable.
+ * It will be treated as another hotplug capable board.
+ */
+ if (acpidev_dr_device_hotplug_capable(hdl)) {
+ acpidev_free_object_name(objname);
+ return (AE_CTRL_DEPTH);
+ }
+
+ /*
+ * Don't support the _EDL list of a non-hotplug-capable device.
+ */
+ if (acpidev_dr_device_has_edl(hdl)) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: non-hotplug-capable "
+ "object %s has _EDL method.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+ }
+
+ cid = acpidev_dr_device_get_class(hdl);
+ switch (cid) {
+ case ACPIDEV_CLASS_ID_CPU:
+ /* Don't support logical CPUs in the _EDL list. */
+ if (lvl == UINT32_MAX) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: logical CPU %s in "
+ "_EDL is unsupported.", objname);
+ rc = AE_SUPPORT;
+ break;
+ }
+
+ /* Don't support logical CPUs with children. */
+ ap->level++;
+ rc = acpidev_dr_device_walk_child(hdl, B_TRUE, 1,
+ acpidev_dr_no_support, arg, NULL);
+ ap->level--;
+ if (rc == AE_SUPPORT) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: logical CPU %s has "
+ "child or dependent devices.", objname);
+ break;
+ } else if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to scan "
+ "children of logical CPU %s.", objname);
+ rc = AE_ERROR;
+ break;
+ } else if (ap != NULL) {
+ rc = acpidev_dr_set_prop(hdl, objname, ap, lvl,
+ ACPIDEV_CLASS_ID_CPU, &ap->cpu_id);
+ }
+ break;
+
+ case ACPIDEV_CLASS_ID_MEMORY:
+ /* Don't support memory devices with children. */
+ ap->level++;
+ rc = acpidev_dr_device_walk_child(hdl, B_TRUE, 1,
+ acpidev_dr_no_support, arg, NULL);
+ ap->level--;
+ if (rc == AE_SUPPORT) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: memory device %s has child or "
+ "dependent devices.", objname);
+ } else if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to scan children of "
+ "memory device %s.", objname);
+ rc = AE_ERROR;
+ } else if (ap != NULL) {
+ acpidev_dr_memory_device_cnt++;
+ rc = acpidev_dr_set_prop(hdl, objname, ap, lvl,
+ ACPIDEV_CLASS_ID_MEMORY, &ap->mem_id);
+ }
+ break;
+
+ case ACPIDEV_CLASS_ID_PCI:
+ case ACPIDEV_CLASS_ID_PCIEX:
+ /* Don't scan child/descendant devices of PCI/PCIex devices. */
+ if (ap != NULL) {
+ rc = acpidev_dr_set_prop(hdl, objname, ap, lvl,
+ cid, &ap->io_id);
+ }
+ break;
+
+ case ACPIDEV_CLASS_ID_CONTAINER:
+ /* Don't support module devices in the _EDL list. */
+ if (lvl == UINT32_MAX) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: module device %s in "
+ "_EDL is unsupported.", objname);
+ rc = AE_SUPPORT;
+ break;
+ }
+
+ /* Don't support recurrence of module devices. */
+ if (lvl > 0) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: recursion level of "
+ "module device %s is too deep.", objname);
+ rc = AE_SUPPORT;
+ break;
+ }
+
+ ap->level++;
+ rc = acpidev_dr_device_walk_child(hdl, B_TRUE, 1,
+ acpidev_dr_scan_topo, arg, NULL);
+ ap->level--;
+ if (ACPI_SUCCESS(rc) && ap != NULL) {
+ rc = acpidev_dr_set_prop(hdl, objname, ap, lvl,
+ ACPIDEV_CLASS_ID_CONTAINER, &ap->mod_id);
+ }
+ break;
+
+ case ACPIDEV_CLASS_ID_INVALID:
+ /*FALLTHROUGH*/
+ default:
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: device %s is unsupported.", objname);
+ rc = AE_SUPPORT;
+ break;
+ }
+
+ acpidev_free_object_name(objname);
+
+ return (rc);
+}
+
+/* Create walk information structures. */
+static ACPI_STATUS
+acpidev_dr_create_walk_info(ACPI_HANDLE hdl, acpidev_data_handle_t dhdl,
+ char *objname, acpidev_walk_info_t **infopp, acpidev_walk_info_t **cinfopp)
+{
+ ACPI_HANDLE phdl = NULL;
+ dev_info_t *pdip = NULL;
+ acpidev_data_handle_t pdhdl, tdhdl;
+ acpidev_walk_info_t *infop = NULL, *cinfop = NULL;
+
+ ASSERT(hdl != NULL);
+ ASSERT(dhdl != NULL);
+ ASSERT(dhdl->aod_class_list != NULL);
+ ASSERT(objname != NULL);
+ ASSERT(infopp != NULL);
+ ASSERT(cinfopp != NULL);
+
+ if (ACPI_FAILURE(AcpiGetParent(hdl, &phdl))) {
+ cmn_err(CE_WARN,
+ "!acpidev: failed to get parent object of %s.", objname);
+ return (AE_ERROR);
+ }
+
+ pdhdl = acpidev_data_get_handle(phdl);
+ if (pdhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get data "
+ "associated with parent of %s.", objname);
+ return (AE_ERROR);
+ }
+ if (pdhdl->aod_level >= ACPIDEV_MAX_ENUM_LEVELS - 1) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: recursion level (%d) of %s is too deep.",
+ pdhdl->aod_level, objname);
+ return (AE_ERROR);
+ }
+ ASSERT(pdhdl->aod_class_list != NULL);
+ if (pdhdl->aod_class_list == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: class list for parent of %s is NULL.", objname);
+ return (AE_ERROR);
+ }
+
+ /* Allocate a walk info structure for its parent. */
+ infop = acpidev_alloc_walk_info(ACPIDEV_OP_HOTPLUG_PROBE,
+ pdhdl->aod_level, phdl, dhdl->aod_class_list, NULL);
+ if (infop == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to allocate walk info "
+ "structure for parent of %s.", objname);
+ return (AE_ERROR);
+ }
+
+ /* Get the parent dip if it's not ready yet. */
+ while (infop->awi_dip == NULL) {
+ if (ACPI_FAILURE(AcpiGetParent(phdl, &phdl))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get parent of object %p.",
+ phdl);
+ break;
+ }
+ tdhdl = acpidev_data_get_handle(phdl);
+ if (tdhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get data "
+ "associated with object %p.", phdl);
+ break;
+ }
+ pdip = acpidev_data_get_devinfo(tdhdl);
+ if (pdip != NULL) {
+ infop->awi_dip = pdip;
+ break;
+ }
+ /* Give up if reaches the ACPI namespace root node. */
+ if (phdl == ACPI_ROOT_OBJECT) {
+ break;
+ }
+ }
+ if (infop->awi_dip == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get parent dip of %s.", objname);
+ acpidev_free_walk_info(infop);
+ return (AE_ERROR);
+ }
+
+ /* Allocate a walk info for the child. */
+ cinfop = acpidev_alloc_walk_info(ACPIDEV_OP_HOTPLUG_PROBE,
+ infop->awi_level + 1, hdl, NULL, infop);
+ if (cinfop == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to allocate walk info "
+ "structure for %s.", objname);
+ acpidev_free_walk_info(infop);
+ return (AE_ERROR);
+ }
+
+ *infopp = infop;
+ *cinfopp = cinfop;
+
+ return (AE_OK);
+}
+
+static ACPI_STATUS
+acpidev_dr_probe_object(ACPI_HANDLE hdl, acpidev_data_handle_t dhdl)
+{
+ ACPI_STATUS rc = AE_OK;
+ int circ;
+ char *objname;
+ dev_info_t *pdip;
+ ACPI_STATUS res;
+ ACPI_OBJECT_TYPE type;
+ acpidev_class_list_t *it;
+ acpidev_walk_info_t *infop, *cinfop;
+
+ ASSERT(hdl != NULL);
+ ASSERT(dhdl != NULL);
+ if (hdl == NULL || dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: hdl or dhdl is NULL in "
+ "acpidev_dr_probe_object().");
+ return (AE_BAD_PARAMETER);
+ }
+ objname = acpidev_get_object_name(hdl);
+
+ /* Check whether the device is of interest. */
+ if (ACPI_FAILURE(AcpiGetType(hdl, &type)) ||
+ type > ACPI_TYPE_NS_NODE_MAX ||
+ BT_TEST(acpidev_object_type_mask, type) == 0) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: ACPI object %s is unsupported.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+
+ if (dhdl->aod_class_list == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: class list is NULL in data associated with %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ pdip = NULL;
+ infop = NULL;
+ cinfop = NULL;
+ rc = acpidev_dr_create_walk_info(hdl, dhdl, objname, &infop, &cinfop);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to create walk info structures for %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (rc);
+ }
+ ASSERT(infop != NULL);
+ ASSERT(infop->awi_dip != NULL);
+ ASSERT(infop->awi_class_list != NULL);
+ ASSERT(cinfop != NULL);
+ ASSERT(cinfop->awi_data == dhdl);
+
+ /* Lock the parent dip before touching children. */
+ pdip = infop->awi_dip;
+ ndi_devi_enter(pdip, &circ);
+ rw_enter(&acpidev_class_lock, RW_READER);
+
+ /* Call pre-probe callback functions to prepare for probing. */
+ for (it = *(infop->awi_class_list); it != NULL; it = it->acl_next) {
+ if (it->acl_class->adc_pre_probe == NULL) {
+ continue;
+ }
+ infop->awi_class_curr = it->acl_class;
+ if (ACPI_FAILURE(it->acl_class->adc_pre_probe(infop))) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: failed to pre-probe "
+ "device of type %s under %s.",
+ it->acl_class->adc_class_name, infop->awi_name);
+ }
+ }
+
+ /* Call registered probe callback functions to probe devices. */
+ for (it = *(infop->awi_class_list); it != NULL; it = it->acl_next) {
+ if (it->acl_class->adc_probe == NULL) {
+ continue;
+ }
+ cinfop->awi_class_curr = it->acl_class;
+ res = it->acl_class->adc_probe(cinfop);
+ if (ACPI_FAILURE(res)) {
+ rc = res;
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: failed to process object %s under %s.",
+ objname, infop->awi_name);
+ }
+ }
+
+ /* Call post-probe callback functions to clean up. */
+ for (it = *(infop->awi_class_list); it != NULL; it = it->acl_next) {
+ if (it->acl_class->adc_post_probe == NULL) {
+ continue;
+ }
+ infop->awi_class_curr = it->acl_class;
+ if (ACPI_FAILURE(it->acl_class->adc_post_probe(infop))) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: failed to post-probe "
+ "device of type %s under %s.",
+ it->acl_class->adc_class_name, infop->awi_name);
+ }
+ }
+
+ rw_exit(&acpidev_class_lock);
+ ndi_devi_exit(pdip, circ);
+
+ acpidev_free_walk_info(cinfop);
+ acpidev_free_walk_info(infop);
+ acpidev_free_object_name(objname);
+
+ return (rc);
+}
+
+/*
+ * Some PCI/PCIex buses embedded in physical processors may be presented in
+ * the eject device list instead of being presented as child devices.
+ * This function figures out such devices and create device nodes for them.
+ */
+static ACPI_STATUS
+acpidev_dr_probe_dependent(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+ void **retval)
+{
+ _NOTE(ARGUNUSED(retval));
+
+ ACPI_STATUS rc = AE_OK;
+ int status;
+ char *objname;
+ ACPI_HANDLE phdl, thdl;
+ acpidev_data_handle_t dhdl;
+
+ ASSERT(lvl == UINT32_MAX);
+ ASSERT(hdl != NULL);
+ ASSERT(ctx != NULL);
+ phdl = ctx;
+ objname = acpidev_get_object_name(hdl);
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data associated with %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /*
+ * It should be treated as another board if device is hotplug capable.
+ */
+ if (ACPIDEV_DR_IS_BOARD(dhdl)) {
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ } else if (!ACPIDEV_DR_IS_WORKING(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: %s is unusable for DR operations.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+
+ /*
+ * Skip hdl if it's a descendant of phdl because it should have
+ * already been handled when handling phdl itself.
+ */
+ for (thdl = hdl; ACPI_SUCCESS(AcpiGetParent(thdl, &thdl)); ) {
+ /* Return when reaches the phdl. */
+ if (thdl == phdl) {
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ }
+ /* Break out when reaches the ACPI namespace root node. */
+ if (thdl == ACPI_ROOT_OBJECT) {
+ break;
+ }
+ }
+
+ /*
+ * No support of enumerating PCI/PCIex Host Bridge devices yet.
+ * It will be enabled when PCI/PCIex Host Bridge hotplug is ready.
+ */
+ if (dhdl->aod_class_id == ACPIDEV_CLASS_ID_PCI ||
+ dhdl->aod_class_id == ACPIDEV_CLASS_ID_PCIEX) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: PCI/PCIEX host bridge %s is "
+ "unsupported, skip it.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ }
+
+ /* Check whether the device exists and has been enabled. */
+ status = acpidev_query_device_status(hdl);
+ if (!acpidev_check_device_enabled(status)) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: object %s is disabled/absent "
+ "when trying to connect it.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ }
+
+ /* Probe the device and its children. */
+ rc = acpidev_dr_probe_object(hdl, dhdl);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to probe object %s in eject device list.",
+ objname);
+ return (rc);
+ }
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_device_insert(ACPI_HANDLE hdl)
+{
+ ACPI_STATUS rc = AE_OK;
+ int status, circ;
+ char *objname;
+ dev_info_t *dip;
+ acpidev_data_handle_t dhdl;
+
+ ASSERT(acpidev_root_node() != NULL);
+ ASSERT(hdl != NULL);
+ if (hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter hdl to "
+ "acpidev_dr_insert_insert() is NULL.");
+ return (AE_BAD_PARAMETER);
+ }
+
+ objname = acpidev_get_object_name(hdl);
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle associated with %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /* Validate that the object is hotplug capable. */
+ if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %s is not hotplug capable.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %s is in the FAILED "
+ "state, unusable for DR.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /* Check whether the device exists and has been enabled. */
+ status = acpidev_query_device_status(hdl);
+ if (!acpidev_check_device_enabled(status)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %s is disabled/absent "
+ "when trying to connect it.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_NOT_EXIST);
+ }
+
+ /* Check that there's no device node created for object yet. */
+ dip = acpidev_data_get_devinfo(dhdl);
+ if (dip != NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: device node for object %s "
+ "already exists when trying to connect it.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_ALREADY_EXISTS);
+ }
+
+ /*
+ * Solaris has a limitation that all device nodes for PCI/PCIex host
+ * bridges must exist directly under /devices.
+ * Special care is needed here to deal with hot-adding PCI/PCIex host
+ * bridges to avoid dead lock caused by ndi_devi_enter().
+ * Here the lock on ddi_root_node() is held first, which will break
+ * the dead lock loop.
+ */
+ ndi_devi_enter(ddi_root_node(), &circ);
+
+ rc = acpidev_dr_probe_object(hdl, dhdl);
+ if (ACPI_SUCCESS(rc)) {
+ rc = acpidev_dr_device_walk_edl(hdl,
+ &acpidev_dr_probe_dependent, hdl, NULL);
+ }
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create device "
+ "nodes for children of %s.", objname);
+ cmn_err(CE_WARN, "!acpidev: disable DR support for object %s "
+ "due to failure when creating device nodes for it.",
+ objname);
+ ACPIDEV_DR_SET_FAILED(dhdl);
+ }
+
+ ndi_devi_exit(ddi_root_node(), circ);
+ acpidev_free_object_name(objname);
+
+ return (rc);
+}
+
+static ACPI_STATUS
+acpidev_dr_device_remove_cb(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+ void **retval)
+{
+ _NOTE(ARGUNUSED(lvl));
+
+ ACPI_STATUS rc = AE_OK;
+ int status;
+ char *objname;
+ dev_info_t *dip;
+ acpidev_data_handle_t dhdl;
+ struct acpidev_dr_device_remove_arg *argp;
+
+ ASSERT(hdl != NULL && ctx != NULL);
+ if (hdl == NULL || ctx == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter to "
+ "acpidev_dr_device_remove_cb() is NULL.");
+ return (AE_BAD_PARAMETER);
+ }
+
+ argp = (struct acpidev_dr_device_remove_arg *)ctx;
+ objname = acpidev_get_object_name(hdl);
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle associated with %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /* Validate that the object is hotplug capable. */
+ /* It's the hotplug capable board itself if level is zero. */
+ if (argp->level == 0) {
+ if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %s is not hotplug capable.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %s is unusable for DR.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+ } else {
+ /* It's a device under the hotplug capable board. */
+ /*
+ * Skip it if device itself is hotplug capable.
+ * It will be treated as another hotplug capable board.
+ */
+ if (ACPIDEV_DR_IS_BOARD(dhdl)) {
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ }
+
+ if (!ACPIDEV_DR_IS_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %s is not hotplug capable.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %s is unusable for DR.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+ }
+
+ /* Skip the device if it hasn't been enabled at all. */
+ status = acpidev_data_get_status(dhdl);
+ if (!acpidev_check_device_enabled(status)) {
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+ }
+
+ dip = acpidev_data_get_devinfo(dhdl);
+ if (dip == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get dev_info associated with %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+
+ /* For safety, only handle supported device types when unconfiguring. */
+ switch (dhdl->aod_class_id) {
+ case ACPIDEV_CLASS_ID_CONTAINER:
+ /*FALLTHROUGH*/
+ case ACPIDEV_CLASS_ID_CPU:
+ /*FALLTHROUGH*/
+ case ACPIDEV_CLASS_ID_MEMORY:
+ break;
+
+ default:
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %s (type %d) doesn't "
+ "support unconfiguration.", objname, dhdl->aod_class_id);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ }
+
+ /* Destroy descendants first. */
+ argp->level++;
+ rc = acpidev_dr_device_walk_child(hdl, B_FALSE, 1,
+ acpidev_dr_device_remove_cb, ctx, retval);
+ argp->level--;
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to destroy descendants of %s.", objname);
+ acpidev_free_object_name(objname);
+ return (rc);
+ }
+
+ /* Untag dip and ACPI object before destroying the dip. */
+ if ((dhdl->aod_iflag & ACPIDEV_ODF_DEVINFO_TAGGED) &&
+ ACPI_FAILURE(acpica_untag_devinfo(dip, hdl))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to untag object %s.", objname);
+ /* Mark the node as unusable. */
+ ACPIDEV_DR_SET_FAILED(dhdl);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /* Destroy the node itself. */
+ if (e_ddi_branch_destroy(dip, NULL, 0) != 0) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ if ((dhdl->aod_iflag & ACPIDEV_ODF_DEVINFO_TAGGED) &&
+ ACPI_FAILURE(acpica_tag_devinfo(dip, hdl))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to retag object %s.", objname);
+ }
+
+ /* Mark the node as unusable. */
+ ACPIDEV_DR_SET_FAILED(dhdl);
+
+ (void) ddi_pathname(dip, path);
+ cmn_err(CE_WARN,
+ "acpidev: failed to remove node %s (%s).", path, objname);
+ kmem_free(path, MAXPATHLEN);
+ acpidev_free_object_name(objname);
+
+ return (AE_ERROR);
+ }
+
+ /* Update status and information associated with the device. */
+ dhdl->aod_dip = NULL;
+ dhdl->aod_iflag &= ~ACPIDEV_ODF_DEVINFO_CREATED;
+ dhdl->aod_iflag &= ~ACPIDEV_ODF_DEVINFO_TAGGED;
+ if (dhdl->aod_class != NULL) {
+ if (dhdl->aod_class->adc_fini != NULL) {
+ (*(dhdl->aod_class->adc_fini))(hdl, dhdl,
+ dhdl->aod_class);
+ }
+ atomic_dec_32(&(dhdl->aod_class->adc_refcnt));
+ dhdl->aod_class = NULL;
+ }
+ dhdl->aod_iflag &= ~ACPIDEV_ODF_STATUS_VALID;
+ dhdl->aod_status = 0;
+
+ acpidev_free_object_name(objname);
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_device_remove(ACPI_HANDLE hdl)
+{
+ ACPI_STATUS rc = AE_OK;
+ int circ;
+ char *objname;
+ acpidev_data_handle_t dhdl;
+ struct acpidev_dr_device_remove_arg arg;
+
+ ASSERT(acpidev_root_node() != NULL);
+ ASSERT(hdl != NULL);
+ if (hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter hdl to "
+ "acpidev_dr_device_remove() is NULL.");
+ return (AE_BAD_PARAMETER);
+ }
+
+ objname = acpidev_get_object_name(hdl);
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle associated with %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /* Validate that the device is hotplug capable. */
+ if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %s is not hotplug capable.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %s is in the FAILED "
+ "state, unusable for DR.", objname);
+ acpidev_free_object_name(objname);
+ return (AE_ERROR);
+ }
+
+ /*
+ * Recursively destroy descendants under the top node.
+ * No need to undo what has been done if error happens, it will be
+ * handled by DR driver.
+ */
+ /*
+ * Lock ddi_root_node() to avoid deadlock.
+ */
+ ndi_devi_enter(ddi_root_node(), &circ);
+
+ arg.level = 0;
+ rc = acpidev_dr_device_remove_cb(hdl, 0, &arg, NULL);
+ ASSERT(arg.level == 0);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to destroy device "
+ "nodes for children of %s.", objname);
+ cmn_err(CE_WARN, "!acpidev: disable DR support for object %s "
+ "due to failure when destroying device nodes for it.",
+ objname);
+ ACPIDEV_DR_SET_FAILED(dhdl);
+ }
+
+ ndi_devi_exit(ddi_root_node(), circ);
+ acpidev_free_object_name(objname);
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpidev_dr_device_poweron(ACPI_HANDLE hdl)
+{
+ acpidev_data_handle_t dhdl;
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle associated with %p.",
+ hdl);
+ return (AE_ERROR);
+ }
+
+ /* Check whether the device is hotplug capable. */
+ if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %p is not hotplug capable.", hdl);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is in the FAILED "
+ "state, unusable for DR.", hdl);
+ return (AE_ERROR);
+ }
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_device_poweroff(ACPI_HANDLE hdl)
+{
+ ACPI_STATUS rc;
+ acpidev_data_handle_t dhdl;
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle associated with %p.",
+ hdl);
+ return (AE_ERROR);
+ }
+
+ /* Check whether the device is hotplug capable. */
+ if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %p is not hotplug capable.", hdl);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is in the FAILED "
+ "state, unusable for DR.", hdl);
+ return (AE_ERROR);
+ }
+
+ rc = acpidev_eval_ej0(hdl);
+ if (ACPI_FAILURE(rc)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to evaluate _EJ0 for object %p.", hdl);
+ }
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpidev_dr_device_check_status(ACPI_HANDLE hdl)
+{
+ acpidev_data_handle_t dhdl;
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data handle associated with %p.",
+ hdl);
+ return (AE_ERROR);
+ }
+
+ /* Check whether the device is hotplug capable. */
+ if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: object %p is not hotplug capable.", hdl);
+ return (AE_SUPPORT);
+ } else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is in the FAILED "
+ "state, unusable for DR.", hdl);
+ return (AE_ERROR);
+ }
+
+ return (AE_OK);
+}
+
+void
+acpidev_dr_lock_all(void)
+{
+ mutex_enter(&acpidev_dr_lock);
+}
+
+void
+acpidev_dr_unlock_all(void)
+{
+ mutex_exit(&acpidev_dr_lock);
+}
+
+ACPI_STATUS
+acpidev_dr_allocate_cpuid(ACPI_HANDLE hdl, processorid_t *idp)
+{
+ int rv;
+ processorid_t cpuid;
+ uint32_t procid, apicid;
+ mach_cpu_add_arg_t arg;
+ acpidev_data_handle_t dhdl;
+ dev_info_t *dip = NULL;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ ASSERT(hdl != NULL);
+ if (hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter hdl to "
+ "acpidev_dr_allocate_cpuid() is NULL.");
+ return (AE_BAD_PARAMETER);
+ }
+
+ /* Validate that the device is ready for hotplug. */
+ if (ACPI_FAILURE(acpica_get_devinfo(hdl, &dip))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get devinfo for object %p.", hdl);
+ return (AE_ERROR);
+ }
+ ASSERT(dip != NULL);
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get data associated with object %p",
+ hdl);
+ return (AE_SUPPORT);
+ }
+ if (!ACPIDEV_DR_IS_READY(dhdl)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: dip %p is not hotplug ready.", (void *)dip);
+ return (AE_SUPPORT);
+ }
+ if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+ ACPIDEV_DEBUG(CE_NOTE,
+ "!acpidev: dip %p is in the FAILED state.", (void *)dip);
+ return (AE_SUPPORT);
+ }
+
+ /* Query CPU relative information */
+ apicid = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, ACPIDEV_PROP_NAME_LOCALAPIC_ID, UINT32_MAX);
+ procid = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, ACPIDEV_PROP_NAME_PROCESSOR_ID, UINT32_MAX);
+ if (procid == UINT32_MAX || apicid == UINT32_MAX || apicid == 255) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: dip %p is malformed, "
+ "procid(0x%x) or apicid(0x%x) is invalid.",
+ (void *)dip, procid, apicid);
+ return (AE_ERROR);
+ }
+
+ /* Check whether the CPU device is in offline state. */
+ mutex_enter(&(DEVI(dip)->devi_lock));
+ if (!DEVI_IS_DEVICE_OFFLINE(dip)) {
+ mutex_exit(&DEVI(dip)->devi_lock);
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: dip %p isn't in offline state.", (void *)dip);
+ return (AE_ERROR);
+ }
+ mutex_exit(&DEVI(dip)->devi_lock);
+
+ /* Check whether the CPU already exists. */
+ if (ACPI_SUCCESS(acpica_get_cpu_id_by_object(hdl, &cpuid))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: dip %p already has CPU id(%d) assigned.",
+ (void *)dip, cpuid);
+ return (AE_ALREADY_EXISTS);
+ }
+
+ /* Allocate cpuid for the CPU */
+ arg.arg.apic.apic_id = apicid;
+ arg.arg.apic.proc_id = procid;
+ if (apicid >= 255) {
+ arg.type = MACH_CPU_ARG_LOCAL_X2APIC;
+ } else {
+ arg.type = MACH_CPU_ARG_LOCAL_APIC;
+ }
+ rv = mach_cpu_add(&arg, &cpuid);
+ if (rv != PSM_SUCCESS) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to allocate cpu id for dip %p.",
+ (void *)dip);
+ return (AE_NOT_EXIST);
+ }
+
+ ASSERT(cpuid >= 0 && cpuid < NCPU && cpuid < max_ncpus);
+ if (idp != NULL) {
+ *idp = cpuid;
+ }
+
+ return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_free_cpuid(ACPI_HANDLE hdl)
+{
+ ACPI_STATUS rv = AE_OK;
+ processorid_t cpuid;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ ASSERT(hdl != NULL);
+ if (hdl == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter hdl to "
+ "acpidev_dr_free_cpuid() is NULL.");
+ return (AE_BAD_PARAMETER);
+ }
+
+ if (ACPI_FAILURE(acpica_get_cpu_id_by_object(hdl, &cpuid))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get cpuid for object %p.", hdl);
+ rv = AE_NOT_EXIST;
+ } else if (cpuid < 0 || cpuid > max_ncpus) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: cpuid(%d) of object %p is invalid.",
+ cpuid, hdl);
+ rv = AE_ERROR;
+ } else if (mach_cpu_remove(cpuid) != PSM_SUCCESS) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to free cpuid(%d) for object %p.",
+ cpuid, hdl);
+ rv = AE_ERROR;
+ }
+
+ return (rv);
+}
+
+static ACPI_STATUS
+acpidev_dr_get_latency(ACPI_HANDLE hdl, void **hdlpp,
+ uint32_t pxmid, uint32_t *slicntp, uchar_t **slipp)
+{
+ ACPI_STATUS rc;
+ ACPI_BUFFER buf;
+ uint32_t i, pxmcnt;
+ uchar_t *valp, *sp, *ep;
+
+ /* Evaluate the ACPI _SLI method under the object. */
+ buf.Length = ACPI_ALLOCATE_BUFFER;
+ rc = AcpiEvaluateObjectTyped(hdl, ACPIDEV_METHOD_NAME_SLI, NULL, &buf,
+ ACPI_TYPE_BUFFER);
+ if (ACPI_SUCCESS(rc)) {
+ valp = (uchar_t *)buf.Pointer;
+ if (acpidev_slit_tbl_ptr->LocalityCount > pxmid) {
+ pxmcnt = acpidev_slit_tbl_ptr->LocalityCount;
+ } else {
+ pxmcnt = pxmid + 1;
+ }
+
+ /*
+ * Validate data returned by the ACPI _SLI method.
+ * Please refer to 6.2.14 "_SLI (System Locality Information)"
+ * in ACPI4.0 for data format returned by _SLI method.
+ */
+ if (buf.Length != pxmcnt * 2 * sizeof (uchar_t)) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: buffer length returned by _SLI method "
+ "under %p is invalid.", hdl);
+ AcpiOsFree(buf.Pointer);
+ } else if (valp[pxmid] != ACPI_SLIT_SELF_LATENCY ||
+ valp[pxmid + pxmcnt] != ACPI_SLIT_SELF_LATENCY) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: local latency returned by _SLI method "
+ "under %p is not %u.", hdl, ACPI_SLIT_SELF_LATENCY);
+ AcpiOsFree(buf.Pointer);
+ } else {
+ *slicntp = pxmcnt;
+ *slipp = (uchar_t *)buf.Pointer;
+ *hdlpp = buf.Pointer;
+ return (AE_OK);
+ }
+ } else if (rc != AE_NOT_FOUND) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to evaluate "
+ "_SLI method under object %p.", hdl);
+ }
+
+ /* Return data from the ACPI SLIT table. */
+ ASSERT(acpidev_slit_tbl_ptr != NULL);
+ pxmcnt = acpidev_slit_tbl_ptr->LocalityCount;
+ if (pxmid >= pxmcnt) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: proximity domain id "
+ "(%u) is too big, max %u.", pxmid, pxmcnt - 1);
+ *slicntp = 0;
+ *slipp = NULL;
+ return (AE_ERROR);
+ } else {
+ sp = AcpiOsAllocate(pxmcnt * 2 * sizeof (uchar_t));
+ ep = acpidev_slit_tbl_ptr->Entry;
+ for (i = 0; i < pxmcnt; i++) {
+ sp[i] = ep[pxmcnt * pxmid + i];
+ sp[i + pxmcnt] = ep[pxmcnt * i + pxmid];
+ }
+ *slicntp = pxmcnt;
+ *slipp = sp;
+ *hdlpp = sp;
+ return (AE_OK);
+ }
+}
+
+/*
+ * Query NUMA information for the CPU device.
+ * It returns APIC id, Proximity id and latency information of the CPU device.
+ */
+int
+acpidev_dr_get_cpu_numa_info(cpu_t *cp, void **hdlpp, uint32_t *apicidp,
+ uint32_t *pxmidp, uint32_t *slicntp, uchar_t **slipp)
+{
+ dev_info_t *dip = NULL;
+ ACPI_HANDLE hdl = NULL;
+
+ ASSERT(cp != NULL);
+ ASSERT(hdlpp != NULL);
+ ASSERT(apicidp != NULL);
+ ASSERT(pxmidp != NULL);
+ if (cp == NULL || hdlpp == NULL || apicidp == NULL || pxmidp == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters to "
+ "acpidev_dr_get_cpu_numa_info().");
+ return (-1);
+ }
+
+ *hdlpp = NULL;
+ *apicidp = UINT32_MAX;
+ *pxmidp = UINT32_MAX;
+ if (lgrp_plat_node_cnt == 1) {
+ return (-1);
+ }
+ ASSERT(acpidev_slit_tbl_ptr != NULL);
+
+ /* Query APIC id and Proximity id from device properties. */
+ if (ACPI_FAILURE(acpica_get_cpu_object_by_cpuid(cp->cpu_id, &hdl))) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get ACPI object "
+ "for CPU(%d).", cp->cpu_id);
+ return (-1);
+ }
+ if (ACPI_FAILURE(acpica_get_devinfo(hdl, &dip))) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get device node "
+ "for CPU(%d).", cp->cpu_id);
+ return (-1);
+ }
+ *apicidp = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ ACPIDEV_PROP_NAME_LOCALAPIC_ID, UINT32_MAX);
+ *pxmidp = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ ACPIDEV_PROP_NAME_PROXIMITY_ID, UINT32_MAX);
+ if (*apicidp == UINT32_MAX || *pxmidp == UINT32_MAX) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get local APIC id "
+ "or proximity id for CPU(%d).", cp->cpu_id);
+ return (-1);
+ }
+
+ ASSERT((slicntp && slipp) || (!slicntp && !slipp));
+ if (slicntp != NULL && slipp != NULL) {
+ if (ACPI_FAILURE(acpidev_dr_get_latency(hdl, hdlpp, *pxmidp,
+ slicntp, slipp))) {
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+void
+acpidev_dr_free_cpu_numa_info(void *hdlp)
+{
+ if (hdlp != NULL) {
+ AcpiOsFree(hdlp);
+ }
+}
+
+static ACPI_STATUS
+acpidev_dr_mem_search_srat(struct memlist *ml, uint32_t *pxmidp)
+{
+ int len, off;
+ uint64_t start, end;
+ boolean_t found = B_FALSE;
+ ACPI_SUBTABLE_HEADER *sp;
+ ACPI_SRAT_MEM_AFFINITY *mp;
+
+ ASSERT(ml != NULL);
+ ASSERT(pxmidp != NULL);
+ ASSERT(acpidev_srat_tbl_ptr != NULL);
+
+ /* Search the static ACPI SRAT table for proximity domain. */
+ sp = (ACPI_SUBTABLE_HEADER *)(acpidev_srat_tbl_ptr + 1);
+ len = acpidev_srat_tbl_ptr->Header.Length;
+ off = sizeof (*acpidev_srat_tbl_ptr);
+ while (off < len) {
+ if (sp->Type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) {
+ mp = (ACPI_SRAT_MEM_AFFINITY *)sp;
+ if ((mp->Flags & ACPI_SRAT_MEM_ENABLED) &&
+ (mp->Flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
+ ml->ml_address >= mp->BaseAddress &&
+ ml->ml_address <= mp->BaseAddress + mp->Length) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ off += sp->Length;
+ sp = (ACPI_SUBTABLE_HEADER *)(((char *)sp) + sp->Length);
+ }
+ if (!found)
+ return (AE_NOT_FOUND);
+
+ /*
+ * Verify that all memory regions in the list belong to the same domain.
+ */
+ start = mp->BaseAddress;
+ end = mp->BaseAddress + mp->Length;
+ while (ml) {
+ if (ml->ml_address < start ||
+ ml->ml_address + ml->ml_size > end) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: memory for hot-adding doesn't belong "
+ "to the same proximity domain.");
+ return (AE_ERROR);
+ }
+ ml = ml->ml_next;
+ }
+
+ return (AE_OK);
+}
+
+/*
+ * Query lgrp information for a memory device.
+ * It returns proximity domain id and latency information of the memory device.
+ */
+ACPI_STATUS
+acpidev_dr_get_mem_numa_info(ACPI_HANDLE hdl, struct memlist *ml,
+ void **hdlpp, uint32_t *pxmidp, uint32_t *slicntp, uchar_t **slipp)
+{
+ ASSERT(ml != NULL);
+ ASSERT(hdlpp != NULL);
+ ASSERT(pxmidp != NULL);
+ if (ml == NULL || hdlpp == NULL || pxmidp == NULL) {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters to "
+ "acpidev_dr_get_mem_numa_info().");
+ return (AE_BAD_PARAMETER);
+ }
+
+ *pxmidp = UINT32_MAX;
+ if (lgrp_plat_node_cnt == 1) {
+ return (AE_SUPPORT);
+ }
+
+ if (ACPI_FAILURE(acpidev_eval_pxm(hdl, pxmidp))) {
+ /*
+ * Try to get proximity domain id from SRAT table if failed to
+ * evaluate ACPI _PXM method for memory device.
+ */
+ if (ACPI_FAILURE(acpidev_dr_mem_search_srat(ml, pxmidp))) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to get proximity domain id for "
+ "memory device %p.", hdl);
+ return (AE_ERROR);
+ }
+ }
+
+ ASSERT((slicntp && slipp) || (!slicntp && !slipp));
+ if (slicntp != NULL && slipp != NULL) {
+ if (ACPI_FAILURE(acpidev_dr_get_latency(hdl, hdlpp, *pxmidp,
+ slicntp, slipp))) {
+ return (AE_ERROR);
+ }
+ }
+
+ return (AE_OK);
+}
+
+void
+acpidev_dr_free_mem_numa_info(void *hdlp)
+{
+ if (hdlp != NULL) {
+ AcpiOsFree(hdlp);
+ }
+}
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c
index eee5d84715..2c0dab936b 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -60,19 +60,23 @@
#include <sys/errno.h>
#include <sys/modctl.h>
#include <sys/mutex.h>
+#include <sys/note.h>
#include <sys/obpdefs.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
#include <sys/acpidev.h>
+#include <sys/acpidev_dr.h>
#include <sys/acpidev_impl.h>
/* Patchable through /etc/system */
int acpidev_options = 0;
int acpidev_debug = 0;
+krwlock_t acpidev_class_lock;
acpidev_class_list_t *acpidev_class_list_root = NULL;
+ulong_t acpidev_object_type_mask[BT_BITOUL(ACPI_TYPE_NS_NODE_MAX + 1)];
/* ACPI device autoconfig global status */
typedef enum acpidev_status {
@@ -86,9 +90,7 @@ typedef enum acpidev_status {
static acpidev_status_t acpidev_status = ACPIDEV_STATUS_UNKNOWN;
static kmutex_t acpidev_drv_lock;
-static krwlock_t acpidev_class_lock;
static dev_info_t *acpidev_root_dip = NULL;
-static ulong_t acpidev_object_type_mask[BT_BITOUL(ACPI_TYPE_NS_NODE_MAX + 1)];
/* Boot time ACPI device enumerator. */
static void acpidev_boot_probe(int type);
@@ -117,6 +119,7 @@ _init(void)
sizeof (acpidev_object_type_mask));
mutex_init(&acpidev_drv_lock, NULL, MUTEX_DRIVER, NULL);
rw_init(&acpidev_class_lock, NULL, RW_DEFAULT, NULL);
+ acpidev_dr_init();
impl_bus_add_probe(acpidev_boot_probe);
} else {
cmn_err(CE_WARN, "!acpidev: failed to install driver.");
@@ -157,6 +160,13 @@ acpidev_class_list_fini(void)
{
acpidev_unload_plat_modules();
+ if ((acpidev_options & ACPIDEV_OUSER_NO_PCI) == 0) {
+ (void) acpidev_unregister_class(&acpidev_class_list_scope,
+ &acpidev_class_pci);
+ (void) acpidev_unregister_class(&acpidev_class_list_device,
+ &acpidev_class_pci);
+ }
+
if ((acpidev_options & ACPIDEV_OUSER_NO_MEM) == 0) {
(void) acpidev_unregister_class(&acpidev_class_list_device,
&acpidev_class_memory);
@@ -252,7 +262,7 @@ acpidev_class_list_init(uint64_t *fp)
*fp |= ACPI_DEVCFG_CPU;
}
- /* Check support for ACPI memory device. */
+ /* Check support of ACPI memory devices. */
if ((acpidev_options & ACPIDEV_OUSER_NO_MEM) == 0) {
/*
* Register the ACPI memory class driver onto the
@@ -267,10 +277,36 @@ acpidev_class_list_init(uint64_t *fp)
*fp |= ACPI_DEVCFG_MEMORY;
}
+ /* Check support of PCI/PCIex Host Bridge devices. */
+ if ((acpidev_options & ACPIDEV_OUSER_NO_PCI) == 0) {
+ /*
+ * Register pci/pciex class drivers onto
+ * the acpidev_class_list_device class list because ACPI
+ * module class driver uses that list.
+ */
+ if (ACPI_FAILURE(acpidev_register_class(
+ &acpidev_class_list_device, &acpidev_class_pci,
+ B_FALSE))) {
+ goto error_device_pci;
+ }
+
+ /*
+ * Register pci/pciex class drivers onto the
+ * acpidev_class_list_scope class list.
+ */
+ if (ACPI_FAILURE(acpidev_register_class(
+ &acpidev_class_list_scope, &acpidev_class_pci,
+ B_FALSE))) {
+ goto error_scope_pci;
+ }
+
+ *fp |= ACPI_DEVCFG_PCI;
+ }
+
/* Check blacklist and load platform specific modules. */
rc = acpidev_load_plat_modules();
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to check blacklist "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to check blacklist "
"or load pratform modules.");
goto error_plat;
}
@@ -278,6 +314,16 @@ acpidev_class_list_init(uint64_t *fp)
return (AE_OK);
error_plat:
+ if ((acpidev_options & ACPIDEV_OUSER_NO_PCI) == 0) {
+ (void) acpidev_unregister_class(&acpidev_class_list_scope,
+ &acpidev_class_pci);
+ }
+error_scope_pci:
+ if ((acpidev_options & ACPIDEV_OUSER_NO_PCI) == 0) {
+ (void) acpidev_unregister_class(&acpidev_class_list_device,
+ &acpidev_class_pci);
+ }
+error_device_pci:
if ((acpidev_options & ACPIDEV_OUSER_NO_MEM) == 0) {
(void) acpidev_unregister_class(&acpidev_class_list_device,
&acpidev_class_memory);
@@ -313,7 +359,7 @@ error_root_device:
&acpidev_class_scope);
error_out:
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to register built-in class drivers.");
+ "!acpidev: failed to register built-in class drivers.");
*fp = 0;
return (AE_ERROR);
@@ -353,7 +399,7 @@ acpidev_create_root_node(void)
(pnode_t)DEVI_SID_NODEID, &dip);
if (rv != NDI_SUCCESS) {
ndi_devi_exit(ddi_root_node(), circ);
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to create device node "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create device node "
"for ACPI root with errcode %d.", rv);
return (AE_ERROR);
}
@@ -362,7 +408,7 @@ acpidev_create_root_node(void)
if (ACPI_FAILURE(acpica_tag_devinfo(dip, ACPI_ROOT_OBJECT))) {
(void) ddi_remove_child(dip, 0);
ndi_devi_exit(ddi_root_node(), circ);
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to tag object %s.",
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to tag object %s.",
ACPIDEV_OBJECT_NAME_SB);
return (AE_ERROR);
}
@@ -376,7 +422,7 @@ acpidev_create_root_node(void)
}
if (rv != DDI_SUCCESS) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to set device property for /devices/%s.",
+ "!acpidev: failed to set device property for /devices/%s.",
ACPIDEV_NODE_NAME_ROOT);
goto error_out;
}
@@ -384,7 +430,7 @@ acpidev_create_root_node(void)
/* Manually create an object handle for the root node */
objhdl = acpidev_data_create_handle(ACPI_ROOT_OBJECT);
if (objhdl == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to create object "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create object "
"handle for the root node.");
goto error_out;
}
@@ -425,7 +471,7 @@ acpidev_initialize(void)
return;
} else if (acpidev_status != ACPIDEV_STATUS_UNKNOWN) {
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: initialization called more than once.");
+ "!acpidev: initialization called more than once.");
return;
}
@@ -476,6 +522,18 @@ acpidev_initialize(void)
acpidev_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(),
DDI_PROP_DONTPASS, "acpidev-options", acpidev_options);
+ /* Check whether ACPI based DR has been disabled by user. */
+ rc = ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
+ DDI_PROP_DONTPASS, "acpidev-dr", &str);
+ if (rc == DDI_SUCCESS) {
+ if (strcasecmp(str, "off") == 0 || strcasecmp(str, "no") == 0) {
+ cmn_err(CE_CONT, "?acpidev: ACPI based DR has been "
+ "disabled by user.\n");
+ acpidev_dr_enable = 0;
+ }
+ ddi_prop_free(str);
+ }
+
/* Register all device class drivers. */
if (ACPI_FAILURE(acpidev_class_list_init(&features))) {
cmn_err(CE_WARN,
@@ -518,7 +576,7 @@ acpidev_boot_probe_device(acpidev_op_type_t op_type)
infop = acpidev_alloc_walk_info(op_type, 0, ACPI_ROOT_OBJECT,
&acpidev_class_list_root, NULL);
if (infop == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to allocate walk info "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to allocate walk info "
"object in acpi_boot_probe_device().");
return (AE_ERROR);
}
@@ -558,9 +616,18 @@ acpidev_boot_probe(int type)
if (type == 0 && acpidev_status == ACPIDEV_STATUS_INITIALIZED) {
rc = acpidev_boot_probe_device(ACPIDEV_OP_BOOT_PROBE);
if (ACPI_SUCCESS(rc)) {
+ /*
+ * Support of DR operations will be disabled
+ * if failed to initialize DR subsystem.
+ */
+ rc = acpidev_dr_initialize(acpidev_root_dip);
+ if (ACPI_FAILURE(rc) && rc != AE_SUPPORT) {
+ cmn_err(CE_CONT, "?acpidev: failed to "
+ "initialize DR subsystem.");
+ }
acpidev_status = ACPIDEV_STATUS_FIRST_PASS;
} else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to probe ACPI "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to probe ACPI "
"devices during boot.");
acpidev_status = ACPIDEV_STATUS_FAILED;
}
@@ -569,7 +636,7 @@ acpidev_boot_probe(int type)
if (ACPI_SUCCESS(rc)) {
acpidev_status = ACPIDEV_STATUS_READY;
} else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to reprobe "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to reprobe "
"ACPI devices during boot.");
acpidev_status = ACPIDEV_STATUS_FAILED;
}
@@ -598,12 +665,12 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
ASSERT(infop != NULL);
if (infop == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: infop is NULL in acpidev_probe_child().");
+ "!acpidev: infop is NULL in acpidev_probe_child().");
return (AE_BAD_PARAMETER);
}
ASSERT(infop->awi_level < ACPIDEV_MAX_ENUM_LEVELS - 1);
if (infop->awi_level >= ACPIDEV_MAX_ENUM_LEVELS - 1) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: recursive level is too deep "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: recursive level is too deep "
"in acpidev_probe_child().");
return (AE_BAD_PARAMETER);
}
@@ -615,14 +682,14 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
if (infop->awi_class_list == NULL || infop->awi_hdl == NULL ||
infop->awi_info == NULL || infop->awi_name == NULL ||
infop->awi_data == NULL) {
- ACPIDEV_DEBUG(CE_WARN,
- "acpidev: infop has NULL fields in acpidev_probe_child().");
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: infop has NULL fields in "
+ "acpidev_probe_child().");
return (AE_BAD_PARAMETER);
}
pdip = acpidev_walk_info_get_pdip(infop);
if (pdip == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: pdip is NULL in acpidev_probe_child().");
+ "!acpidev: pdip is NULL in acpidev_probe_child().");
return (AE_BAD_PARAMETER);
}
@@ -636,7 +703,7 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
}
infop->awi_class_curr = it->acl_class;
if (ACPI_FAILURE(it->acl_class->adc_pre_probe(infop))) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to pre-probe "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to pre-probe "
"device of type %s under %s.",
it->acl_class->adc_class_name, infop->awi_name);
}
@@ -653,11 +720,17 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
continue;
}
+ /* It's another hotplug-capable board, skip it. */
+ if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE &&
+ acpidev_dr_device_is_board(child)) {
+ continue;
+ }
+
/* Allocate the walk info structure. */
cinfop = acpidev_alloc_walk_info(infop->awi_op_type,
infop->awi_level + 1, child, NULL, infop);
if (cinfop == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to allocate "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to allocate "
"walk info child object of %s.",
infop->awi_name);
/* Mark error and continue to handle next child. */
@@ -673,12 +746,6 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
datap = cinfop->awi_data;
if (cinfop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
datap->aod_class_list = infop->awi_class_list;
- } else if (datap->aod_class_list != infop->awi_class_list) {
- ACPIDEV_DEBUG(CE_WARN,
- "acpidev: class list for %s has been changed",
- infop->awi_name);
- acpidev_free_walk_info(cinfop);
- continue;
}
/* Call registered process callbacks. */
@@ -691,7 +758,7 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
res = it->acl_class->adc_probe(cinfop);
if (ACPI_FAILURE(res)) {
rc = res;
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"process object of type %s under %s.",
it->acl_class->adc_class_name,
infop->awi_name);
@@ -709,7 +776,7 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
}
infop->awi_class_curr = it->acl_class;
if (ACPI_FAILURE(it->acl_class->adc_post_probe(infop))) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to post-probe "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to post-probe "
"device of type %s under %s.",
it->acl_class->adc_class_name, infop->awi_name);
}
@@ -737,7 +804,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
ASSERT(infop != NULL);
if (infop == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: infop is NULL in acpidev_process_object().");
+ "!acpidev: infop is NULL in acpidev_process_object().");
return (AE_BAD_PARAMETER);
}
ASSERT(infop->awi_hdl != NULL);
@@ -751,13 +818,13 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
clsp = infop->awi_class_curr;
if (hdl == NULL || datap == NULL || adip == NULL || clsp == NULL ||
clsp->adc_filter == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: infop has NULL pointer in "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: infop has NULL pointer in "
"acpidev_process_object().");
return (AE_BAD_PARAMETER);
}
pdip = acpidev_walk_info_get_pdip(infop);
if (pdip == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to get pdip for %s "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get pdip for %s "
"in acpidev_process_object().", infop->awi_name);
return (AE_BAD_PARAMETER);
}
@@ -782,7 +849,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
(infop->awi_flags & ACPIDEV_WI_DEVICE_CREATED)) {
ASSERT(infop->awi_dip != NULL);
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: device has already been created for object %s.",
+ "!acpidev: device has already been created for object %s.",
infop->awi_name);
return (AE_ALREADY_EXISTS);
}
@@ -793,9 +860,9 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
* 6.3.1 and 6.5.1.
* present functioning enabled Action
* 0 0 x Do nothing
- * 1 x 0 Create node in OFFLINE and scan child
+ * 1 x 0 Do nothing
* 1 x 1 Create node and scan child
- * x 1 0 Create node in OFFLINE and scan child
+ * x 1 0 Do nothing
* x 1 1 Create node and scan child
*/
if ((datap->aod_iflag & ACPIDEV_ODF_STATUS_VALID) == 0 ||
@@ -807,31 +874,34 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
}
datap->aod_iflag |= ACPIDEV_ODF_STATUS_VALID;
}
- if (!acpidev_check_device_present(datap->aod_status)) {
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: object %s doesn't exist.",
+ if (!acpidev_check_device_enabled(datap->aod_status)) {
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: object %s doesn't exist.",
infop->awi_name);
- return (AE_NOT_EXIST);
+ /*
+ * Need to scan for hotplug-capable boards even if object
+ * doesn't exist or has been disabled during the first pass.
+ * So just disable creating device node and keep on scanning.
+ */
+ if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
+ flags &= ~ACPIDEV_PROCESS_FLAG_CREATE;
+ } else {
+ return (AE_NOT_EXIST);
+ }
}
ASSERT(infop->awi_data != NULL);
ASSERT(infop->awi_parent != NULL);
ASSERT(infop->awi_parent->awi_data != NULL);
- /* Put device into offline state if parent is in offline state. */
- if (infop->awi_parent->awi_data->aod_iflag &
- ACPIDEV_ODF_DEVINFO_OFFLINE) {
- flags |= ACPIDEV_PROCESS_FLAG_OFFLINE;
- /* Put device into offline state if it's disabled. */
- } else if (!acpidev_check_device_enabled(datap->aod_status)) {
- flags |= ACPIDEV_PROCESS_FLAG_OFFLINE;
- }
- /*
- * Mark current node status as OFFLINE even if a device node will not
- * be created for it. This is needed to handle the case when the current
- * node is SKIPPED (no device node will be created for it), so that all
- * descedants of current nodes could be correctly marked as OFFLINE.
- */
- if (flags & ACPIDEV_PROCESS_FLAG_OFFLINE) {
- infop->awi_data->aod_iflag |= ACPIDEV_ODF_DEVINFO_OFFLINE;
+ if (flags & ACPIDEV_PROCESS_FLAG_CREATE) {
+ mutex_enter(&(DEVI(pdip)->devi_lock));
+ /*
+ * Put the device into offline state if its parent is in
+ * offline state.
+ */
+ if (DEVI_IS_DEVICE_OFFLINE(pdip)) {
+ flags |= ACPIDEV_PROCESS_FLAG_OFFLINE;
+ }
+ mutex_exit(&(DEVI(pdip)->devi_lock));
}
/* Evaluate filtering rules and generate device name. */
@@ -862,7 +932,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
OBP_DEVICETYPE, clsp->adc_dev_type);
if (ret != NDI_SUCCESS) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to set device property for %s.",
+ "!acpidev: failed to set device property for %s.",
infop->awi_name);
(void) ddi_remove_child(dip, 0);
infop->awi_dip = NULL;
@@ -886,7 +956,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
if (clsp->adc_init != NULL &&
ACPI_FAILURE(clsp->adc_init(infop))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to initialize device %s.",
+ "!acpidev: failed to initialize device %s.",
infop->awi_name);
if ((flags & ACPIDEV_PROCESS_FLAG_NOTAG) == 0) {
(void) acpica_untag_devinfo(dip, hdl);
@@ -923,6 +993,11 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
} else {
(void) ndi_devi_bind_driver(dip, 0);
}
+
+ /* Hold reference on branch when hot-adding devices. */
+ if (flags & ACPIDEV_PROCESS_FLAG_HOLDBRANCH) {
+ e_ddi_branch_hold(dip);
+ }
}
/* Free resources */
@@ -942,7 +1017,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
rc = acpidev_probe_child(infop);
if (ACPI_FAILURE(rc)) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to probe subtree of %s.",
+ "!acpidev: failed to probe subtree of %s.",
infop->awi_name);
rc = AE_ERROR;
}
@@ -960,7 +1035,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
case ACPIDEV_FILTER_FAILED:
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to probe device for %s.",
+ "!acpidev: failed to probe device for %s.",
infop->awi_name);
rc = AE_ERROR;
break;
@@ -975,11 +1050,12 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
return (rc);
}
-/*ARGSUSED*/
acpidev_filter_result_t
acpidev_filter_default(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
acpidev_filter_rule_t *afrp, char *devname, int len)
{
+ _NOTE(ARGUNUSED(hdl));
+
ASSERT(afrp != NULL);
ASSERT(devname == NULL || len >= ACPIDEV_MAX_NAMELEN);
if (infop->awi_level < afrp->adf_minlvl ||
@@ -992,8 +1068,7 @@ acpidev_filter_default(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
return (ACPIDEV_FILTER_CONTINUE);
}
if (afrp->adf_replace != NULL && devname != NULL) {
- (void) strncpy(devname, afrp->adf_replace, len - 1);
- devname[len - 1] = 0;
+ (void) strlcpy(devname, afrp->adf_replace, len);
}
return (afrp->adf_retcode);
@@ -1042,7 +1117,7 @@ acpidev_register_class(acpidev_class_list_t **listpp, acpidev_class_t *clsp,
ASSERT(listpp != NULL);
if (listpp == NULL || clsp == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: invalid parameter in acpidev_register_class().");
+ "!acpidev: invalid parameter in acpidev_register_class().");
return (AE_BAD_PARAMETER);
} else if (clsp->adc_version != ACPIDEV_CLASS_REV) {
cmn_err(CE_WARN,
@@ -1092,7 +1167,7 @@ acpidev_unregister_class(acpidev_class_list_t **listpp,
ASSERT(clsp != NULL);
ASSERT(listpp != NULL);
if (listpp == NULL || clsp == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameter "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter "
"in acpidev_unregister_class().");
return (AE_BAD_PARAMETER);
}
@@ -1106,12 +1181,12 @@ acpidev_unregister_class(acpidev_class_list_t **listpp,
}
}
if (temp == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: class %p(%s) doesn't exist "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: class %p(%s) doesn't exist "
"in acpidev_unregister_class().",
(void *)clsp, clsp->adc_class_name);
rc = AE_NOT_FOUND;
} else if (temp->acl_class->adc_refcnt != 0) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: class %p(%s) is still in use "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: class %p(%s) is still in use "
"in acpidev_unregister_class()..",
(void *)clsp, clsp->adc_class_name);
rc = AE_ERROR;
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c
index e4dcbcdd67..4185817dd9 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -31,6 +31,7 @@
#include <sys/acpica.h>
#include <sys/acpidev.h>
#include <sys/acpidev_rsc.h>
+#include <sys/acpidev_dr.h>
#include <sys/acpidev_impl.h>
static ACPI_STATUS acpidev_memory_probe(acpidev_walk_info_t *infop);
@@ -97,7 +98,7 @@ static acpidev_filter_rule_t acpidev_memory_filters[] = {
static ACPI_STATUS
acpidev_memory_probe(acpidev_walk_info_t *infop)
{
- ACPI_STATUS rc;
+ ACPI_STATUS rc = AE_OK;
int flags;
ASSERT(infop != NULL);
@@ -109,19 +110,35 @@ acpidev_memory_probe(acpidev_walk_info_t *infop)
return (AE_OK);
}
- if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u "
+ flags = ACPIDEV_PROCESS_FLAG_SCAN;
+ switch (infop->awi_op_type) {
+ case ACPIDEV_OP_BOOT_PROBE:
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_MEMORY)) {
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE;
+ acpidev_dr_check(infop);
+ }
+ break;
+
+ case ACPIDEV_OP_BOOT_REPROBE:
+ break;
+
+ case ACPIDEV_OP_HOTPLUG_PROBE:
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_MEMORY)) {
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+ ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+ ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+ }
+ break;
+
+ default:
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
"in acpidev_memory_probe.", infop->awi_op_type);
rc = AE_BAD_PARAMETER;
+ break;
+ }
+
+ if (rc == AE_OK) {
+ rc = acpidev_process_object(infop, flags);
}
if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
cmn_err(CE_WARN,
@@ -153,7 +170,6 @@ acpidev_memory_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
return (res);
}
-/*ARGSUSED*/
static ACPI_STATUS
acpidev_memory_init(acpidev_walk_info_t *infop)
{
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_pci.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_pci.c
new file mode 100644
index 0000000000..5f0c6c6c13
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_pci.c
@@ -0,0 +1,172 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_rsc.h>
+#include <sys/acpidev_dr.h>
+#include <sys/acpidev_impl.h>
+
+static ACPI_STATUS acpidev_pci_probe(acpidev_walk_info_t *infop);
+
+/*
+ * Default class driver for PCI/PCIEX Host Bridge devices.
+ */
+acpidev_class_t acpidev_class_pci = {
+ 0, /* adc_refcnt */
+ ACPIDEV_CLASS_REV1, /* adc_version */
+ ACPIDEV_CLASS_ID_PCI, /* adc_class_id */
+ "PCI/PCIex Host Bridge", /* adc_class_name */
+ ACPIDEV_TYPE_PCI, /* adc_dev_type */
+ NULL, /* adc_private */
+ NULL, /* adc_pre_probe */
+ NULL, /* adc_post_probe */
+ acpidev_pci_probe, /* adc_probe */
+ NULL, /* adc_filter */
+ NULL, /* adc_init */
+ NULL, /* adc_fini */
+};
+
+static char *acpidev_pci_device_ids[] = {
+ ACPIDEV_HID_PCIEX_HOSTBRIDGE,
+ ACPIDEV_HID_PCI_HOSTBRIDGE,
+};
+
+static char *acpidev_pciex_device_ids[] = {
+ ACPIDEV_HID_PCIEX_HOSTBRIDGE,
+};
+
+static void
+acpidev_pci_update_status(acpidev_walk_info_t *infop)
+{
+ int status;
+ dev_info_t *dip = NULL;
+ acpidev_data_handle_t dhdl;
+
+ dhdl = infop->awi_data;
+ ASSERT((dhdl->aod_iflag & ACPIDEV_ODF_DEVINFO_CREATED) == 0);
+ ASSERT((dhdl->aod_iflag & ACPIDEV_ODF_DEVINFO_TAGGED) == 0);
+ if ((dhdl->aod_iflag & ACPIDEV_ODF_STATUS_VALID) == 0) {
+ status = acpidev_query_device_status(infop->awi_hdl);
+ dhdl->aod_status = status;
+ dhdl->aod_iflag |= ACPIDEV_ODF_STATUS_VALID;
+ } else {
+ status = dhdl->aod_status;
+ }
+ dhdl->aod_level = infop->awi_level;
+ dhdl->aod_hdl = infop->awi_hdl;
+ dhdl->aod_class = NULL;
+ dhdl->aod_class_list = NULL;
+ if (acpidev_match_device_id(infop->awi_info,
+ ACPIDEV_ARRAY_PARAM(acpidev_pciex_device_ids))) {
+ dhdl->aod_class_id = ACPIDEV_CLASS_ID_PCIEX;
+ } else {
+ dhdl->aod_class_id = ACPIDEV_CLASS_ID_PCI;
+ }
+
+ if (ACPI_FAILURE(acpica_get_devinfo(infop->awi_hdl, &dip))) {
+ dip = NULL;
+ } else {
+ ASSERT(dip != NULL);
+ }
+ if (acpidev_check_device_enabled(status)) {
+ /*
+ * Mark the device as DISABLE if no device node created.
+ * BIOS may hide some special PCI/PCIex buses from OS.
+ */
+ if (dip == NULL) {
+ dhdl->aod_dip = NULL;
+ dhdl->aod_status &= ~ACPI_STA_DEVICE_ENABLED;
+ } else {
+ dhdl->aod_dip = dip;
+ dhdl->aod_iflag |= ACPIDEV_ODF_DEVINFO_CREATED;
+ }
+ } else {
+ ASSERT(dip == NULL);
+ dhdl->aod_dip = NULL;
+ dhdl->aod_status &= ~ACPI_STA_DEVICE_ENABLED;
+ }
+}
+
+static ACPI_STATUS
+acpidev_pci_probe(acpidev_walk_info_t *infop)
+{
+ ACPI_STATUS rc = AE_OK;
+
+ ASSERT(infop != NULL);
+ ASSERT(infop->awi_hdl != NULL);
+ ASSERT(infop->awi_info != NULL);
+ if (infop->awi_info->Type != ACPI_TYPE_DEVICE ||
+ acpidev_match_device_id(infop->awi_info,
+ ACPIDEV_ARRAY_PARAM(acpidev_pci_device_ids)) == B_FALSE) {
+ return (AE_OK);
+ }
+
+ if (acpica_get_devcfg_feature(ACPI_DEVCFG_PCI) == 0) {
+ return (AE_OK);
+ }
+
+ if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
+ /*
+ * Check hotplug capability on the first pass.
+ */
+ acpidev_dr_check(infop);
+ } else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
+ /*
+ * Check whether the PCI device enumerator has created device
+ * nodes for PCI/PCIEX host bridges.
+ */
+ acpidev_pci_update_status(infop);
+ } else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
+ /*
+ * No support of PCI/PCIEX host bridge hotplug yet.
+ * It will come in next phase.
+ */
+ cmn_err(CE_WARN,
+ "!acpidev: no support of PCI/PCIEX host bridge hotplug.");
+ /*
+ * Don't block the hot-adding process, just skip it.
+ */
+ rc = AE_OK;
+ } else {
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
+ "in acpidev_pci_probe().", infop->awi_op_type);
+ rc = AE_BAD_PARAMETER;
+ }
+ if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
+ cmn_err(CE_CONT, "?acpidev: failed to process PCI/PCIEX host "
+ "bridge object %s.\n", infop->awi_name);
+ } else {
+ rc = AE_OK;
+ }
+
+ return (rc);
+}
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c
index 17dd9c774b..e9664c7614 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -331,7 +331,7 @@ acpidev_resource_insert_dma(acpidev_resource_handle_t rhdl, int dma)
ASSERT(rhdl != NULL);
if (rhdl->acpidev_dma_count >= ACPIDEV_RES_DMA_MAX) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: too many DMA resources, max %u.",
+ "!acpidev: too many DMA resources, max %u.",
ACPIDEV_RES_DMA_MAX);
return (AE_LIMIT);
}
@@ -380,7 +380,7 @@ acpidev_resource_insert_irq(acpidev_resource_handle_t rhdl, int irq)
ASSERT(rhdl != NULL);
if (rhdl->acpidev_irq_count >= ACPIDEV_RES_IRQ_MAX) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: too many IRQ resources, max %u.",
+ "!acpidev: too many IRQ resources, max %u.",
ACPIDEV_RES_IRQ_MAX);
return (AE_LIMIT);
}
@@ -456,7 +456,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
high |= ACPIDEV_REG_MEM_COHERENT_PF;
} else {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: unknown memory caching type %u.",
+ "!acpidev: unknown memory caching type %u.",
addrp->Info.Mem.Caching);
rc = AE_ERROR;
break;
@@ -477,7 +477,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
reg.size_low = addrp->AddressLength & 0xFFFFFFFF;
rc = acpidev_resource_insert_reg(rhdl, &reg);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert regspec into resource handle.");
}
/* Generate 'ranges' for producer. */
@@ -502,7 +502,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
range.size_low = addrp->AddressLength & 0xFFFFFFFF;
rc = acpidev_resource_insert_range(rhdl, &range);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert range into resource handle.");
}
}
@@ -524,7 +524,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
high |= ACPIDEV_REG_IO_RANGE_FULL;
} else {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: unknown IO range type %u.",
+ "!acpidev: unknown IO range type %u.",
addrp->Info.Io.RangeType);
rc = AE_ERROR;
break;
@@ -545,7 +545,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
reg.size_low = addrp->AddressLength & 0xFFFFFFFF;
rc = acpidev_resource_insert_reg(rhdl, &reg);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert regspec into resource handle.");
}
/* Generate 'ranges' for producer. */
@@ -570,7 +570,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
range.size_low = addrp->AddressLength & 0xFFFFFFFF;
rc = acpidev_resource_insert_range(rhdl, &range);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert range into resource handle.");
}
}
@@ -585,7 +585,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
end = addrp->Minimum + addrp->AddressLength;
if (end < addrp->Minimum || end > UINT_MAX) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: bus range "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: bus range "
"in ADDRESS64 is invalid.");
rc = AE_ERROR;
break;
@@ -595,7 +595,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
ASSERT(bus.bus_start <= bus.bus_end);
rc = acpidev_resource_insert_bus(rhdl, &bus);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert bus range into resource handle.");
}
}
@@ -603,7 +603,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
default:
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: unknown resource type %u in ADDRESS64.",
+ "!acpidev: unknown resource type %u in ADDRESS64.",
addrp->ResourceType);
rc = AE_BAD_PARAMETER;
}
@@ -632,7 +632,7 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
case ACPI_RESOURCE_TYPE_GENERIC_REGISTER:
case ACPI_RESOURCE_TYPE_VENDOR:
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: unsupported producer resource type %u, ignored.",
+ "!acpidev: unsupported producer resource type %u, ignored.",
rscp->Type);
break;
@@ -651,13 +651,13 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
range.size_hi = 0;
range.size_low = rscp->Data.Io.AddressLength;
if ((uint64_t)range.child_low + range.size_low > UINT16_MAX) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid IO record, "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid IO record, "
"IO max is out of range.");
rc = AE_ERROR;
} else if (range.size_low != 0) {
rc = acpidev_resource_insert_range(rhdl, &range);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert range into resource handle.");
}
}
@@ -671,16 +671,16 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
ACPI_RESOURCE_ADDRESS64 addr64;
if (rscp->Data.Address.ProducerConsumer != ACPI_PRODUCER) {
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: producer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: producer encountered "
"a CONSUMER resource, ignored.");
} else if (ACPI_FAILURE(AcpiResourceToAddress64(rscp,
&addr64))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to convert resource to ADDR64.");
+ "!acpidev: failed to convert resource to ADDR64.");
} else if (ACPI_FAILURE(rc = acpidev_resource_address64(rhdl,
&addr64))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to handle ADDRESS resource.");
+ "!acpidev: failed to handle ADDRESS resource.");
}
break;
}
@@ -690,7 +690,7 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
ACPI_RESOURCE_ADDRESS64 addr64;
if (rscp->Data.ExtAddress64.ProducerConsumer != ACPI_PRODUCER) {
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: producer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: producer encountered "
"a CONSUMER resource, ignored.");
break;
}
@@ -705,14 +705,14 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
if (ACPI_FAILURE(rc = acpidev_resource_address64(rhdl,
&addr64))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to handle EXTADDRESS resource.");
+ "!acpidev: failed to handle EXTADDRESS resource.");
}
break;
}
case ACPI_RESOURCE_TYPE_START_DEPENDENT:
case ACPI_RESOURCE_TYPE_END_DEPENDENT:
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: producer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: producer encountered "
"START_DEPENDENT or END_DEPENDENT tag, ignored.");
break;
@@ -723,7 +723,7 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
default:
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: unknown ACPI resource type %u, ignored.",
+ "!acpidev: unknown ACPI resource type %u, ignored.",
rscp->Type);
break;
}
@@ -746,7 +746,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
case ACPI_RESOURCE_TYPE_GENERIC_REGISTER:
case ACPI_RESOURCE_TYPE_VENDOR:
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: unsupported consumer resource type %u, ignored.",
+ "!acpidev: unsupported consumer resource type %u, ignored.",
rscp->Type);
break;
@@ -755,7 +755,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
int i;
if (rscp->Data.ExtendedIrq.ProducerConsumer != ACPI_CONSUMER) {
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: consumer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: consumer encountered "
"a PRODUCER resource, ignored.");
break;
}
@@ -764,7 +764,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
rscp->Data.ExtendedIrq.Interrupts[i]))) {
continue;
}
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to insert"
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to insert"
"Extended IRQ into resource handle.");
rc = AE_ERROR;
break;
@@ -781,7 +781,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
rscp->Data.Irq.Interrupts[i]))) {
continue;
}
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to insert"
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to insert"
"IRQ into resource handle.");
rc = AE_ERROR;
break;
@@ -798,7 +798,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
rscp->Data.Dma.Channels[i]))) {
continue;
}
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to insert"
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to insert"
"dma into resource handle.");
rc = AE_ERROR;
break;
@@ -827,13 +827,13 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
reg.phys_mid = 0;
reg.size_hi = 0;
if ((uint64_t)reg.phys_low + reg.size_low > UINT16_MAX) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid IO/FIXEDIO "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid IO/FIXEDIO "
"record, IO max is out of range.");
rc = AE_ERROR;
} else if (reg.size_low != 0) {
rc = acpidev_resource_insert_reg(rhdl, &reg);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert reg into resource handle.");
}
}
@@ -866,13 +866,13 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
reg.size_hi = 0;
if ((uint64_t)reg.phys_low + reg.size_low > UINT32_MAX) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: invalid MEMORY32/FIXEDMEMORY32 record, "
+ "!acpidev: invalid MEMORY32/FIXEDMEMORY32 record, "
"memory max is out of range.");
rc = AE_ERROR;
} else if (reg.size_low != 0) {
rc = acpidev_resource_insert_reg(rhdl, &reg);
if (ACPI_FAILURE(rc)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
"insert reg into resource handle.");
}
}
@@ -886,16 +886,16 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
ACPI_RESOURCE_ADDRESS64 addr64;
if (rscp->Data.Address.ProducerConsumer != ACPI_CONSUMER) {
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: consumer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: consumer encountered "
"a PRODUCER resource, ignored.");
} else if (ACPI_FAILURE(AcpiResourceToAddress64(rscp,
&addr64))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to convert resource to ADDR64.");
+ "!acpidev: failed to convert resource to ADDR64.");
} else if (ACPI_FAILURE(rc = acpidev_resource_address64(rhdl,
&addr64))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to handle ADDRESS resource.");
+ "!acpidev: failed to handle ADDRESS resource.");
}
break;
}
@@ -905,7 +905,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
ACPI_RESOURCE_ADDRESS64 addr64;
if (rscp->Data.ExtAddress64.ProducerConsumer != ACPI_CONSUMER) {
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: consumer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: consumer encountered "
"a PRODUCER resource, ignored.");
break;
}
@@ -920,14 +920,14 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
if (ACPI_FAILURE(rc = acpidev_resource_address64(rhdl,
&addr64))) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to handle EXTADDRESS resource.");
+ "!acpidev: failed to handle EXTADDRESS resource.");
}
break;
}
case ACPI_RESOURCE_TYPE_START_DEPENDENT:
case ACPI_RESOURCE_TYPE_END_DEPENDENT:
- ACPIDEV_DEBUG(CE_NOTE, "acpidev: consumer encountered "
+ ACPIDEV_DEBUG(CE_NOTE, "!acpidev: consumer encountered "
"START_DEPENDENT or END_DEPENDENT tag, ignored.");
break;
@@ -938,7 +938,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
default:
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: unknown ACPI resource type %u, ignored.",
+ "!acpidev: unknown ACPI resource type %u, ignored.",
rscp->Type);
break;
}
@@ -959,14 +959,14 @@ acpidev_resource_walk(ACPI_HANDLE hdl, char *method,
ASSERT(rhdlp != NULL);
if (hdl == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: hdl is NULL in acpidev_resource_walk().");
+ "!acpidev: hdl is NULL in acpidev_resource_walk().");
return (AE_BAD_PARAMETER);
} else if (method == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: method is NULL in acpidev_resource_walk().");
+ "!acpidev: method is NULL in acpidev_resource_walk().");
return (AE_BAD_PARAMETER);
} else if (rhdlp == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: resource handle ptr is NULL "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: resource handle ptr is NULL "
"in acpidev_resource_walk().");
return (AE_BAD_PARAMETER);
}
@@ -975,7 +975,7 @@ acpidev_resource_walk(ACPI_HANDLE hdl, char *method,
if (ACPI_FAILURE(AcpiGetHandle(hdl, method, &mhdl))) {
char *objname = acpidev_get_object_name(hdl);
ACPIDEV_DEBUG(CE_NOTE,
- "acpidev: method %s doesn't exist under %s",
+ "!acpidev: method %s doesn't exist under %s",
method, objname);
acpidev_free_object_name(objname);
return (AE_NOT_FOUND);
@@ -997,11 +997,9 @@ acpidev_resource_walk(ACPI_HANDLE hdl, char *method,
}
if (ACPI_FAILURE(rc)) {
char *objname = acpidev_get_object_name(hdl);
- ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to walk resource from method %s under %s.",
- method, objname);
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to walk resource from "
+ "method %s under %s.", method, objname);
acpidev_free_object_name(objname);
-
}
return (rc);
@@ -1016,7 +1014,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
ASSERT(infop != NULL);
if (infop == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameter "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter "
"in acpidev_resource_process().");
return (AE_BAD_PARAMETER);
}
@@ -1027,7 +1025,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
consumer, &rhdl);
if (ACPI_FAILURE(rc)) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: failed to walk ACPI resources of %s(%s).",
+ "!acpidev: failed to walk ACPI resources of %s(%s).",
path, infop->awi_name);
return (rc);
}
@@ -1041,7 +1039,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
"reg", (int *)rhdl->acpidev_regp,
rhdl->acpidev_reg_count * sizeof (acpidev_regspec_t) /
sizeof (int)) != NDI_SUCCESS) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
"'reg' property for %s.", path);
rc = AE_ERROR;
goto out;
@@ -1051,7 +1049,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
"assigned-addresses", (int *)rhdl->acpidev_regp,
rhdl->acpidev_reg_count * sizeof (acpidev_regspec_t) /
sizeof (int)) != NDI_SUCCESS) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
"'assigned-addresses' property for %s.", path);
rc = AE_ERROR;
goto out;
@@ -1062,7 +1060,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
ndi_prop_update_int_array(DDI_DEV_T_NONE, infop->awi_dip,
"interrupts", (int *)rhdl->acpidev_irqp,
rhdl->acpidev_irq_count) != NDI_SUCCESS) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
"'interrupts' property for %s.", path);
rc = AE_ERROR;
goto out;
@@ -1073,7 +1071,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
ndi_prop_update_int_array(DDI_DEV_T_NONE, infop->awi_dip,
"dma-channels", (int *)rhdl->acpidev_dmap,
rhdl->acpidev_dma_count) != NDI_SUCCESS) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
"'dma-channels' property for %s.", path);
rc = AE_ERROR;
goto out;
@@ -1088,7 +1086,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
"ranges", (int *)rhdl->acpidev_rangep,
rhdl->acpidev_range_count * sizeof (acpidev_ranges_t) /
sizeof (int)) != NDI_SUCCESS) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
"'ranges' property for %s.", path);
rc = AE_ERROR;
goto out;
@@ -1100,7 +1098,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
"bus-range", (int *)rhdl->acpidev_busp,
rhdl->acpidev_bus_count * sizeof (acpidev_bus_range_t) /
sizeof (int)) != NDI_SUCCESS) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
"'bus-range' property for %s.", path);
rc = AE_ERROR;
goto out;
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c
index 144cec2dbc..e19a6e92e4 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -102,7 +102,7 @@ static acpidev_filter_rule_t acpidev_scope_filters[] = {
static ACPI_STATUS
acpidev_scope_probe(acpidev_walk_info_t *infop)
{
- ACPI_STATUS rc;
+ ACPI_STATUS rc = AE_OK;
int flags;
ASSERT(infop != NULL);
@@ -112,19 +112,29 @@ acpidev_scope_probe(acpidev_walk_info_t *infop)
return (AE_OK);
}
- if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN;
- rc = acpidev_process_object(infop, flags);
- } else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
- flags = ACPIDEV_PROCESS_FLAG_SCAN;
- rc = acpidev_process_object(infop, flags);
- } else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u "
+ flags = ACPIDEV_PROCESS_FLAG_SCAN;
+ switch (infop->awi_op_type) {
+ case ACPIDEV_OP_BOOT_PROBE:
+ flags |= ACPIDEV_PROCESS_FLAG_CREATE;
+ break;
+
+ case ACPIDEV_OP_BOOT_REPROBE:
+ break;
+
+ case ACPIDEV_OP_HOTPLUG_PROBE:
+ flags |= ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+ ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+ break;
+
+ default:
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
"in acpidev_scope_probe().", infop->awi_op_type);
rc = AE_BAD_PARAMETER;
+ break;
+ }
+
+ if (rc == AE_OK) {
+ rc = acpidev_process_object(infop, flags);
}
if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
cmn_err(CE_WARN,
@@ -150,7 +160,7 @@ acpidev_scope_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
ACPIDEV_ARRAY_PARAM(acpidev_scope_filters),
devname, maxlen);
} else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
"in acpidev_scope_filter().", infop->awi_op_type);
res = ACPIDEV_FILTER_FAILED;
}
@@ -158,7 +168,6 @@ acpidev_scope_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
return (res);
}
-/*ARGSUSED*/
static ACPI_STATUS
acpidev_scope_init(acpidev_walk_info_t *infop)
{
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c
index 869a935f0d..1b1a0cf3ad 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c
@@ -24,12 +24,13 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
#include <sys/types.h>
#include <sys/cmn_err.h>
+#include <sys/note.h>
#include <sys/sysmacros.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
@@ -55,7 +56,7 @@ acpidev_query_device_status(ACPI_HANDLE hdl)
ASSERT(hdl != NULL);
if (hdl == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: hdl is NULL in acpidev_query_device_status().");
+ "!acpidev: hdl is NULL in acpidev_query_device_status().");
return (0);
}
@@ -114,7 +115,7 @@ acpidev_match_device_id(ACPI_DEVICE_INFO *infop, char **ids, int count)
if (count == 0) {
return (B_TRUE);
} else if (infop == NULL || ids == NULL) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameters in "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters in "
"acpidev_match_device_id().");
return (B_FALSE);
}
@@ -206,7 +207,7 @@ acpidev_get_device_by_id(ACPI_HANDLE hdl, char **ids, int count,
ASSERT(userfunc != NULL);
if (hdl == NULL || userfunc == NULL || (ids == NULL && count != 0)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameters "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters "
"in acpidev_get_device_by_id().");
return (AE_BAD_PARAMETER);
}
@@ -237,7 +238,7 @@ acpidev_walk_apic(ACPI_BUFFER *bufp, ACPI_HANDLE hdl, char *method,
ASSERT(func != NULL);
if (func == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: invalid parameters for acpidev_walk_apic().");
+ "!acpidev: invalid parameters for acpidev_walk_apic().");
return (AE_BAD_PARAMETER);
}
@@ -361,7 +362,7 @@ acpidev_alloc_walk_info(acpidev_op_type_t op_type, int lvl, ACPI_HANDLE hdl,
datap->aod_level = lvl;
datap->aod_hdl = hdl;
} else {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to create object "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create object "
"handle for %s in acpidev_alloc_walk_info().",
infop->awi_name);
AcpiOsFree(infop->awi_info);
@@ -416,12 +417,17 @@ acpidev_walk_info_get_pdip(acpidev_walk_info_t *infop)
* Called to release resources when the corresponding object is going
* to be destroyed.
*/
-/*ARGSUSED*/
static void
-acpidev_get_object_handler(ACPI_HANDLE hdl, void *data)
+acpidev_free_object_handler(ACPI_HANDLE hdl, void *data)
{
+ _NOTE(ARGUNUSED(hdl));
+
acpidev_data_handle_t objhdl = data;
+ if (objhdl->aod_class != NULL) {
+ atomic_dec_32(&objhdl->aod_class->adc_refcnt);
+ objhdl->aod_class = NULL;
+ }
kmem_free(objhdl, sizeof (acpidev_data_handle_t));
}
@@ -431,7 +437,7 @@ acpidev_data_get_handle(ACPI_HANDLE hdl)
void *ptr;
acpidev_data_handle_t objhdl = NULL;
- if (ACPI_SUCCESS(AcpiGetData(hdl, acpidev_get_object_handler, &ptr))) {
+ if (ACPI_SUCCESS(AcpiGetData(hdl, acpidev_free_object_handler, &ptr))) {
objhdl = (acpidev_data_handle_t)ptr;
}
@@ -444,7 +450,12 @@ acpidev_data_create_handle(ACPI_HANDLE hdl)
acpidev_data_handle_t objhdl;
objhdl = kmem_zalloc(sizeof (*objhdl), KM_SLEEP);
- if (ACPI_FAILURE(AcpiAttachData(hdl, acpidev_get_object_handler,
+ objhdl->aod_bdtype = ACPIDEV_INVALID_BOARD;
+ objhdl->aod_bdnum = UINT32_MAX;
+ objhdl->aod_portid = UINT32_MAX;
+ objhdl->aod_class_id = ACPIDEV_CLASS_ID_INVALID;
+
+ if (ACPI_FAILURE(AcpiAttachData(hdl, acpidev_free_object_handler,
(void *)objhdl))) {
cmn_err(CE_WARN,
"!acpidev: failed to attach handle data to object.");
@@ -459,9 +470,15 @@ void
acpidev_data_destroy_handle(ACPI_HANDLE hdl)
{
void *ptr;
+ acpidev_data_handle_t objhdl = NULL;
- if (ACPI_SUCCESS(AcpiGetData(hdl, acpidev_get_object_handler, &ptr)) &&
- ACPI_SUCCESS(AcpiDetachData(hdl, acpidev_get_object_handler))) {
+ if (ACPI_SUCCESS(AcpiGetData(hdl, acpidev_free_object_handler, &ptr)) &&
+ ACPI_SUCCESS(AcpiDetachData(hdl, acpidev_free_object_handler))) {
+ objhdl = ptr;
+ if (objhdl->aod_class != NULL) {
+ atomic_dec_32(&objhdl->aod_class->adc_refcnt);
+ objhdl->aod_class = NULL;
+ }
kmem_free(ptr, sizeof (acpidev_data_handle_t));
}
}
@@ -502,14 +519,14 @@ void
acpidev_data_set_flag(acpidev_data_handle_t hdl, uint32_t flag)
{
ASSERT(hdl != NULL);
- hdl->aod_eflag |= flag;
+ atomic_or_32(&hdl->aod_eflag, flag);
}
void
acpidev_data_clear_flag(acpidev_data_handle_t hdl, uint32_t flag)
{
ASSERT(hdl != NULL);
- hdl->aod_eflag &= ~flag;
+ atomic_and_32(&hdl->aod_eflag, ~flag);
}
uint32_t
@@ -519,6 +536,27 @@ acpidev_data_get_flag(acpidev_data_handle_t hdl, uint32_t flag)
return (hdl->aod_eflag & flag);
}
+boolean_t
+acpidev_data_dr_capable(acpidev_data_handle_t hdl)
+{
+ ASSERT(hdl != NULL);
+ return (hdl->aod_iflag & ACPIDEV_ODF_HOTPLUG_CAPABLE);
+}
+
+boolean_t
+acpidev_data_dr_ready(acpidev_data_handle_t hdl)
+{
+ ASSERT(hdl != NULL);
+ return (hdl->aod_iflag & ACPIDEV_ODF_HOTPLUG_READY);
+}
+
+boolean_t
+acpidev_data_dr_failed(acpidev_data_handle_t hdl)
+{
+ ASSERT(hdl != NULL);
+ return (hdl->aod_iflag & ACPIDEV_ODF_HOTPLUG_FAILED);
+}
+
static char *
acpidev_generate_pseudo_unitaddr(char *uid, acpidev_class_id_t cid,
char *buf, size_t len)
@@ -591,7 +629,7 @@ acpidev_gen_unitaddr(char *uid, char *fmt, char *buf, size_t len)
}
if (cnt != 1 && cnt != 2) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: invalid uid format string '%s'.", fmt);
+ "!acpidev: invalid uid format string '%s'.", fmt);
return (NULL);
}
@@ -605,11 +643,11 @@ acpidev_gen_unitaddr(char *uid, char *fmt, char *buf, size_t len)
*/
if (cnt == 2 && snprintf(buf, len, "%u,%u", id2, id1) >= len) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: generated unitaddr is too long.");
+ "!acpidev: generated unitaddr is too long.");
return (NULL);
} else if (cnt == 1 && snprintf(buf, len, "%u", id1) >= len) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: generated unitaddr is too long.");
+ "!acpidev: generated unitaddr is too long.");
return (NULL);
}
@@ -681,7 +719,7 @@ acpidev_set_unitaddr(acpidev_walk_info_t *infop, char **fmts, size_t nfmt,
if (infop == NULL || infop->awi_dip == NULL ||
infop->awi_info == NULL) {
ACPIDEV_DEBUG(CE_WARN,
- "acpidev: invalid parameters in acpidev_set_unitaddr().");
+ "!acpidev: invalid parameters in acpidev_set_unitaddr().");
return (AE_BAD_PARAMETER);
}
@@ -758,7 +796,7 @@ acpidev_set_compatible(acpidev_walk_info_t *infop, char **compat, int acount)
ASSERT(compat != NULL || acount == 0);
if (infop == NULL || infop->awi_dip == NULL ||
infop->awi_info == NULL || (compat == NULL && acount != 0)) {
- ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameters "
+ ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters "
"in acpidev_set_compatible().");
return (AE_BAD_PARAMETER);
}
@@ -802,3 +840,90 @@ acpidev_set_compatible(acpidev_walk_info_t *infop, char **compat, int acount)
return (AE_OK);
}
+
+/* Evaluate _OST method under object, which is used to support hotplug event. */
+ACPI_STATUS
+acpidev_eval_ost(ACPI_HANDLE hdl, uint32_t code, uint32_t status,
+ char *bufp, size_t len)
+{
+ ACPI_STATUS rc;
+ ACPI_OBJECT args[3];
+ ACPI_OBJECT_LIST arglist;
+
+ args[0].Type = ACPI_TYPE_INTEGER;
+ args[0].Integer.Value = code;
+ args[1].Type = ACPI_TYPE_INTEGER;
+ args[1].Integer.Value = status;
+ args[2].Type = ACPI_TYPE_BUFFER;
+ args[2].Buffer.Pointer = (UINT8 *)bufp;
+ args[2].Buffer.Length = (UINT32)len;
+ if (bufp == NULL || len == 0) {
+ arglist.Count = 2;
+ } else {
+ arglist.Count = 3;
+ }
+ arglist.Pointer = args;
+ rc = AcpiEvaluateObject(hdl, ACPIDEV_METHOD_NAME_OST, &arglist, NULL);
+ if (rc != AE_OK && rc != AE_NOT_FOUND) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to evaluate _OST method, code 0x%x.", rc);
+ }
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpidev_eval_ej0(ACPI_HANDLE hdl)
+{
+ ACPI_STATUS rc;
+ ACPI_OBJECT args[1];
+ ACPI_OBJECT_LIST arglist;
+
+ /*
+ * Quotation from ACPI spec 4.0 section 6.3.3.
+ * Arg0 An Integer containing a device ejection control
+ * 0 Cancel a mark for ejection request (EJ0 will never be called
+ * with this value)
+ * 1 Hot eject or mark for ejection
+ */
+ args[0].Type = ACPI_TYPE_INTEGER;
+ args[0].Integer.Value = 1;
+ arglist.Count = 1;
+ arglist.Pointer = args;
+ rc = AcpiEvaluateObject(hdl, ACPIDEV_METHOD_NAME_EJ0, &arglist, NULL);
+ if (rc != AE_OK) {
+ ACPIDEV_DEBUG(CE_WARN,
+ "!acpidev: failed to evaluate _EJ0 method, code 0x%x.", rc);
+ }
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpidev_eval_pxm(ACPI_HANDLE hdl, uint32_t *idp)
+{
+ int pxmid;
+
+ ASSERT(idp != NULL);
+
+ /*
+ * Try to evaluate ACPI _PXM method to get proximity doamin id.
+ * Quotation from ACPI4.0:
+ * If the Local APIC ID / Local SAPIC ID / Local x2APIC ID of a
+ * dynamically added processor is not present in the System Resource
+ * Affinity Table (SRAT), a _PXM object must exist for the processor's
+ * device or one of its ancestors in the ACPI Namespace.
+ */
+ while (hdl != NULL) {
+ if (ACPI_SUCCESS(acpica_eval_int(hdl,
+ ACPIDEV_METHOD_NAME_PXM, &pxmid))) {
+ *idp = (uint32_t)pxmid;
+ return (AE_OK);
+ }
+ if (ACPI_FAILURE(AcpiGetParent(hdl, &hdl))) {
+ break;
+ }
+ }
+
+ return (AE_NOT_FOUND);
+}
diff --git a/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c b/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c
index d9908aec59..35fb4b6af5 100644
--- a/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c
+++ b/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
/*
@@ -40,6 +40,7 @@
#include <sys/ddi.h>
#include <sys/ddi_impldefs.h>
#include <sys/ddifm.h>
+#include <sys/note.h>
#include <sys/ndifm.h>
#include <sys/sunddi.h>
#include <sys/sunndi.h>
@@ -175,6 +176,9 @@ _init(void)
return (error);
}
+ /* Initialize event subsystem. */
+ acpinex_event_init();
+
/* Install the module. */
if ((error = mod_install(&modlinkage)) != 0) {
cmn_err(CE_WARN, "acpinex: failed to install module.");
@@ -197,6 +201,9 @@ _fini(void)
return (error);
}
+ /* Shut down event subsystem. */
+ acpinex_event_fini();
+
/* Free the soft state info. */
ddi_soft_state_fini(&acpinex_softstates);
@@ -211,10 +218,11 @@ _info(struct modinfo *modinfop)
return (mod_info(&modlinkage, modinfop));
}
-/* ARGSUSED */
static int
acpinex_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
{
+ _NOTE(ARGUNUSED(dip));
+
dev_t dev;
int instance;
@@ -268,16 +276,24 @@ acpinex_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
(void) ddi_pathname(devi, softsp->ans_path);
if (ACPI_FAILURE(acpica_get_handle(devi, &softsp->ans_hdl))) {
ACPINEX_DEBUG(CE_WARN,
- "acpinex: failed to get ACPI handle for %s.",
+ "!acpinex: failed to get ACPI handle for %s.",
softsp->ans_path);
ddi_soft_state_free(acpinex_softstates, instance);
return (DDI_FAILURE);
}
mutex_init(&softsp->ans_lock, NULL, MUTEX_DRIVER, NULL);
+ /* Install event handler for child/descendant objects. */
+ if (acpinex_event_scan(softsp, B_TRUE) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "!acpinex: failed to install event handler "
+ "for children of %s.", softsp->ans_path);
+ }
+
/* nothing to suspend/resume here */
(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
"pm-hardware-state", "no-suspend-resume");
+ (void) ddi_prop_update_int(DDI_DEV_T_NONE, devi,
+ DDI_NO_AUTODETACH, 1);
acpinex_fm_init(softsp);
ddi_report_dev(devi);
@@ -301,17 +317,24 @@ acpinex_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
softsp = ddi_get_soft_state(acpinex_softstates, instance);
if (softsp == NULL) {
- ACPINEX_DEBUG(CE_WARN, "acpinex: failed to get soft state "
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to get soft state "
"object for instance %d in acpinex_detach()", instance);
return (DDI_FAILURE);
}
switch (cmd) {
case DDI_DETACH:
+ if (acpinex_event_scan(softsp, B_FALSE) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "!acpinex: failed to uninstall event "
+ "handler for children of %s.", softsp->ans_path);
+ return (DDI_FAILURE);
+ }
ddi_remove_minor_node(devi, NULL);
acpinex_fm_fini(softsp);
mutex_destroy(&softsp->ans_lock);
ddi_soft_state_free(acpinex_softstates, instance);
+ (void) ddi_prop_update_int(DDI_DEV_T_NONE, devi,
+ DDI_NO_AUTODETACH, 0);
return (DDI_SUCCESS);
case DDI_SUSPEND:
@@ -332,13 +355,11 @@ name_child(dev_info_t *child, char *name, int namelen)
name[0] = '\0';
if (ddi_prop_lookup_string(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
ACPIDEV_PROP_NAME_UNIT_ADDR, &unitaddr) == DDI_SUCCESS) {
- (void) strncpy(name, unitaddr, namelen - 1);
- name[namelen - 1] = '\0';
+ (void) strlcpy(name, unitaddr, namelen);
ddi_prop_free(unitaddr);
} else {
- ACPINEX_DEBUG(CE_NOTE,
- "acpinex: failed to lookup child unit-address prop for %p.",
- (void *)child);
+ ACPINEX_DEBUG(CE_NOTE, "!acpinex: failed to lookup child "
+ "unit-address prop for %p.", (void *)child);
}
return (DDI_SUCCESS);
@@ -406,21 +427,22 @@ acpinex_bus_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp,
off_t offset, off_t len, caddr_t *vaddrp)
{
ACPINEX_DEBUG(CE_WARN,
- "acpinex: acpinex_bus_map called and it's unimplemented.");
+ "!acpinex: acpinex_bus_map called and it's unimplemented.");
return (DDI_ME_UNIMPLEMENTED);
}
-/* ARGSUSED */
static int
acpinex_open(dev_t *devi, int flags, int otyp, cred_t *credp)
{
+ _NOTE(ARGUNUSED(flags, otyp, credp));
+
minor_t minor, instance;
acpinex_softstate_t *softsp;
minor = getminor(*devi);
instance = ACPINEX_GET_INSTANCE(minor);
if (instance >= ACPINEX_INSTANCE_MAX) {
- ACPINEX_DEBUG(CE_WARN, "acpinex: instance number %d out of "
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: instance number %d out of "
"range in acpinex_open, max %d.",
instance, ACPINEX_INSTANCE_MAX - 1);
return (EINVAL);
@@ -428,7 +450,7 @@ acpinex_open(dev_t *devi, int flags, int otyp, cred_t *credp)
softsp = ddi_get_soft_state(acpinex_softstates, instance);
if (softsp == NULL) {
- ACPINEX_DEBUG(CE_WARN, "acpinex: failed to get soft state "
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to get soft state "
"object for instance %d in acpinex_open().", instance);
return (EINVAL);
}
@@ -437,23 +459,24 @@ acpinex_open(dev_t *devi, int flags, int otyp, cred_t *credp)
return (0);
} else {
ACPINEX_DEBUG(CE_WARN,
- "acpinex: invalid minor number %d in acpinex_open().",
+ "!acpinex: invalid minor number %d in acpinex_open().",
minor);
return (EINVAL);
}
}
-/* ARGSUSED */
static int
acpinex_close(dev_t dev, int flags, int otyp, cred_t *credp)
{
+ _NOTE(ARGUNUSED(flags, otyp, credp));
+
minor_t minor, instance;
acpinex_softstate_t *softsp;
minor = getminor(dev);
instance = ACPINEX_GET_INSTANCE(minor);
if (instance >= ACPINEX_INSTANCE_MAX) {
- ACPINEX_DEBUG(CE_WARN, "acpinex: instance number %d out of "
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: instance number %d out of "
"range in acpinex_close(), max %d.",
instance, ACPINEX_INSTANCE_MAX - 1);
return (EINVAL);
@@ -461,7 +484,7 @@ acpinex_close(dev_t dev, int flags, int otyp, cred_t *credp)
softsp = ddi_get_soft_state(acpinex_softstates, instance);
if (softsp == NULL) {
- ACPINEX_DEBUG(CE_WARN, "acpinex: failed to get soft state "
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to get soft state "
"object for instance %d in acpinex_close().", instance);
return (EINVAL);
}
@@ -470,17 +493,18 @@ acpinex_close(dev_t dev, int flags, int otyp, cred_t *credp)
return (0);
} else {
ACPINEX_DEBUG(CE_WARN,
- "acpinex: invalid minor number %d in acpinex_close().",
+ "!acpinex: invalid minor number %d in acpinex_close().",
minor);
return (EINVAL);
}
}
-/* ARGSUSED */
static int
acpinex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
int *rvalp)
{
+ _NOTE(ARGUNUSED(cmd, arg, mode, credp, rvalp));
+
int rv = 0;
minor_t minor, instance;
acpinex_softstate_t *softsp;
@@ -488,21 +512,21 @@ acpinex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
minor = getminor(dev);
instance = ACPINEX_GET_INSTANCE(minor);
if (instance >= ACPINEX_INSTANCE_MAX) {
- ACPINEX_DEBUG(CE_NOTE, "acpinex: instance number %d out of "
+ ACPINEX_DEBUG(CE_NOTE, "!acpinex: instance number %d out of "
"range in acpinex_ioctl(), max %d.",
instance, ACPINEX_INSTANCE_MAX - 1);
return (EINVAL);
}
softsp = ddi_get_soft_state(acpinex_softstates, instance);
if (softsp == NULL) {
- ACPINEX_DEBUG(CE_WARN, "acpinex: failed to get soft state "
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to get soft state "
"object for instance %d in acpinex_ioctl().", instance);
return (EINVAL);
}
rv = ENOTSUP;
ACPINEX_DEBUG(CE_WARN,
- "acpinex: invalid minor number %d in acpinex_ioctl().", minor);
+ "!acpinex: invalid minor number %d in acpinex_ioctl().", minor);
return (rv);
}
@@ -512,11 +536,12 @@ acpinex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
* Register error handling callback with our parent. We will just call
* our children's error callbacks and return their status.
*/
-/*ARGSUSED*/
static int
acpinex_err_callback(dev_info_t *dip, ddi_fm_error_t *derr,
const void *impl_data)
{
+ _NOTE(ARGUNUSED(impl_data));
+
/* Call our childrens error handlers */
return (ndi_fm_handler_dispatch(dip, NULL, derr));
}
@@ -560,11 +585,12 @@ acpinex_fm_fini(acpinex_softstate_t *softsp)
* Initialize FMA resources for child devices.
* Called when child calls ddi_fm_init().
*/
-/*ARGSUSED*/
static int
acpinex_fm_init_child(dev_info_t *dip, dev_info_t *tdip, int cap,
ddi_iblock_cookie_t *ibc)
{
+ _NOTE(ARGUNUSED(tdip, cap));
+
acpinex_softstate_t *softsp = ddi_get_soft_state(acpinex_softstates,
ddi_get_instance(dip));
diff --git a/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_event.c b/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_event.c
new file mode 100644
index 0000000000..633c99b442
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_event.c
@@ -0,0 +1,712 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/bitmap.h>
+#include <sys/cmn_err.h>
+#include <sys/note.h>
+#include <sys/sunndi.h>
+#include <sys/fastboot_impl.h>
+#include <sys/sysevent.h>
+#include <sys/sysevent/dr.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_dr.h>
+#include <sys/acpinex.h>
+
+int acpinex_event_support_remove = 0;
+
+static volatile uint_t acpinex_dr_event_cnt = 0;
+static ulong_t acpinex_object_type_mask[BT_BITOUL(ACPI_TYPE_NS_NODE_MAX + 1)];
+
+/*
+ * Generate DR_REQ event to syseventd.
+ * Please refer to sys/sysevent/dr.h for message definition.
+ */
+static int
+acpinex_event_generate_event(dev_info_t *dip, ACPI_HANDLE hdl, int req,
+ int event, char *objname)
+{
+ int rv = 0;
+ sysevent_id_t eid;
+ sysevent_value_t evnt_val;
+ sysevent_attr_list_t *evnt_attr_list = NULL;
+ char *attach_pnt;
+ char event_type[32];
+
+ /* Add "attachment point" attribute. */
+ attach_pnt = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+ if (ACPI_FAILURE(acpidev_dr_get_attachment_point(hdl,
+ attach_pnt, MAXPATHLEN))) {
+ cmn_err(CE_WARN,
+ "!acpinex: failed to generate AP name for %s.", objname);
+ kmem_free(attach_pnt, MAXPATHLEN);
+ return (-1);
+ }
+ ASSERT(attach_pnt[0] != '\0');
+ evnt_val.value_type = SE_DATA_TYPE_STRING;
+ evnt_val.value.sv_string = attach_pnt;
+ rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID, &evnt_val, KM_SLEEP);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "!acpinex: failed to add attr [%s] for %s event.",
+ DR_AP_ID, EC_DR);
+ kmem_free(attach_pnt, MAXPATHLEN);
+ return (rv);
+ }
+
+ /* Add "request type" attribute. */
+ evnt_val.value_type = SE_DATA_TYPE_STRING;
+ evnt_val.value.sv_string = SE_REQ2STR(req);
+ rv = sysevent_add_attr(&evnt_attr_list, DR_REQ_TYPE, &evnt_val,
+ KM_SLEEP);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "!acpinex: failed to add attr [%s] for %s event.",
+ DR_REQ_TYPE, EC_DR);
+ sysevent_free_attr(evnt_attr_list);
+ kmem_free(attach_pnt, MAXPATHLEN);
+ return (rv);
+ }
+
+ /* Add "acpi-event-type" attribute. */
+ switch (event) {
+ case ACPI_NOTIFY_BUS_CHECK:
+ (void) snprintf(event_type, sizeof (event_type),
+ ACPIDEV_EVENT_TYPE_BUS_CHECK);
+ break;
+ case ACPI_NOTIFY_DEVICE_CHECK:
+ (void) snprintf(event_type, sizeof (event_type),
+ ACPIDEV_EVENT_TYPE_DEVICE_CHECK);
+ break;
+ case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
+ (void) snprintf(event_type, sizeof (event_type),
+ ACPIDEV_EVENT_TYPE_DEVICE_CHECK_LIGHT);
+ break;
+ case ACPI_NOTIFY_EJECT_REQUEST:
+ (void) snprintf(event_type, sizeof (event_type),
+ ACPIDEV_EVENT_TYPE_EJECT_REQUEST);
+ break;
+ default:
+ cmn_err(CE_WARN,
+ "!acpinex: unknown ACPI event type %d.", event);
+ sysevent_free_attr(evnt_attr_list);
+ kmem_free(attach_pnt, MAXPATHLEN);
+ return (-1);
+ }
+ evnt_val.value_type = SE_DATA_TYPE_STRING;
+ evnt_val.value.sv_string = event_type;
+ rv = sysevent_add_attr(&evnt_attr_list, ACPIDEV_EVENT_TYPE_ATTR_NAME,
+ &evnt_val, KM_SLEEP);
+ if (rv != 0) {
+ cmn_err(CE_WARN,
+ "!acpinex: failed to add attr [%s] for %s event.",
+ ACPIDEV_EVENT_TYPE_ATTR_NAME, EC_DR);
+ sysevent_free_attr(evnt_attr_list);
+ kmem_free(attach_pnt, MAXPATHLEN);
+ return (rv);
+ }
+
+ rv = ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DR, ESC_DR_REQ,
+ evnt_attr_list, &eid, KM_SLEEP);
+ if (rv != DDI_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!acpinex: failed to log DR_REQ event for %s.", objname);
+ rv = -1;
+ }
+
+ nvlist_free(evnt_attr_list);
+ kmem_free(attach_pnt, MAXPATHLEN);
+
+ return (rv);
+}
+
+/*
+ * Event handler for ACPI EJECT_REQUEST notifications.
+ * EJECT_REQUEST notifications should be generated on the device to be ejected,
+ * so no need to scan subtree of it.
+ * It also invokes ACPI _OST method to update event status if call_ost is true.
+ */
+static void
+acpinex_event_handle_eject_request(ACPI_HANDLE hdl, acpinex_softstate_t *sp,
+ boolean_t call_ost)
+{
+ int code;
+ char *objname;
+
+ ASSERT(hdl != NULL);
+ objname = acpidev_get_object_name(hdl);
+
+ ASSERT(sp != NULL);
+ ASSERT(sp->ans_dip != NULL && sp->ans_hdl != NULL);
+ if (sp == NULL || sp->ans_dip == NULL || sp->ans_hdl == NULL) {
+ if (call_ost) {
+ (void) acpidev_eval_ost(hdl, ACPI_NOTIFY_EJECT_REQUEST,
+ ACPI_OST_STA_FAILURE, NULL, 0);
+ }
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: softstate data structure is invalid.");
+ cmn_err(CE_WARN,
+ "!acpinex: failed to handle EJECT_REQUEST event from %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ return;
+ }
+
+ if (acpinex_event_support_remove == 0) {
+ cmn_err(CE_WARN,
+ "!acpinex: hot-removing of device %s is unsupported.",
+ objname);
+ code = ACPI_OST_STA_EJECT_NOT_SUPPORT;
+ } else if (acpinex_event_generate_event(sp->ans_dip, hdl,
+ SE_OUTGOING_RES, ACPI_NOTIFY_EJECT_REQUEST, objname) != 0) {
+ cmn_err(CE_WARN, "!acpinex: failed to generate ESC_DR_REQ "
+ "event for device eject request from %s.", objname);
+ code = ACPI_OST_STA_FAILURE;
+ } else {
+ cmn_err(CE_NOTE, "!acpinex: generate ESC_DR_REQ event for "
+ "device eject request from %s.", objname);
+ code = ACPI_OST_STA_EJECT_IN_PROGRESS;
+ }
+ if (call_ost) {
+ (void) acpidev_eval_ost(hdl, ACPI_NOTIFY_EJECT_REQUEST,
+ code, NULL, 0);
+ }
+
+ acpidev_free_object_name(objname);
+}
+
+struct acpinex_event_check_arg {
+ acpinex_softstate_t *softstatep;
+ int event_type;
+ uint32_t device_insert;
+ uint32_t device_remove;
+ uint32_t device_fail;
+};
+
+static ACPI_STATUS
+acpinex_event_handle_check_one(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+ void **retval)
+{
+ _NOTE(ARGUNUSED(lvl, retval));
+
+ char *objname;
+ int status, psta, csta;
+ acpidev_data_handle_t dhdl;
+ struct acpinex_event_check_arg *argp;
+
+ ASSERT(hdl != NULL);
+ ASSERT(ctx != NULL);
+ argp = (struct acpinex_event_check_arg *)ctx;
+
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ /* Skip subtree if failed to get the data handle. */
+ ACPINEX_DEBUG(CE_NOTE,
+ "!acpinex: failed to get data associated with %p.", hdl);
+ return (AE_CTRL_DEPTH);
+ } else if (!acpidev_data_dr_capable(dhdl)) {
+ return (AE_OK);
+ }
+
+ objname = acpidev_get_object_name(hdl);
+
+ status = 0;
+ /* Query previous device status. */
+ psta = acpidev_data_get_status(dhdl);
+ if (acpidev_check_device_enabled(psta)) {
+ status |= 0x1;
+ }
+ /* Query current device status. */
+ csta = acpidev_query_device_status(hdl);
+ if (acpidev_check_device_enabled(csta)) {
+ status |= 0x2;
+ }
+
+ switch (status) {
+ case 0x0:
+ /*FALLTHROUGH*/
+ case 0x3:
+ /* No status changes, keep on walking. */
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+
+ case 0x1:
+ /* Surprising removal. */
+ cmn_err(CE_WARN,
+ "!acpinex: device %s has been surprisingly removed.",
+ objname);
+ if (argp->event_type == ACPI_NOTIFY_BUS_CHECK) {
+ /*
+ * According to ACPI spec, BUS_CHECK notification
+ * should be triggered for hot-adding events only.
+ */
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: device %s has been surprisingly removed "
+ "when handling BUS_CHECK event.", objname);
+ }
+ acpidev_free_object_name(objname);
+ argp->device_remove++;
+ return (AE_CTRL_DEPTH);
+
+ case 0x2:
+ /* Hot-adding. */
+ ACPINEX_DEBUG(CE_NOTE,
+ "!acpinex: device %s has been inserted.", objname);
+ argp->device_insert++;
+ if (acpinex_event_generate_event(argp->softstatep->ans_dip, hdl,
+ SE_INCOMING_RES, argp->event_type, objname) != 0) {
+ cmn_err(CE_WARN,
+ "!acpinex: failed to generate ESC_DR_REQ event for "
+ "device insert request from %s.", objname);
+ argp->device_fail++;
+ } else {
+ cmn_err(CE_NOTE, "!acpinex: generate ESC_DR_REQ event "
+ "for device insert request from %s.", objname);
+ }
+ acpidev_free_object_name(objname);
+ return (AE_OK);
+
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ return (AE_ERROR);
+}
+
+/*
+ * Event handler for BUS_CHECK/DEVICE_CHECK/DEVICE_CHECK_LIGHT notifications.
+ * These events may be signaled on parent/ancestor of devices to be hot-added,
+ * so need to scan ACPI namespace to figure out devices in question.
+ * It also invokes ACPI _OST method to update event status if call_ost is true.
+ */
+static void
+acpinex_event_handle_check_request(int event, ACPI_HANDLE hdl,
+ acpinex_softstate_t *sp, boolean_t call_ost)
+{
+ ACPI_STATUS rv;
+ int code;
+ char *objname;
+ struct acpinex_event_check_arg arg;
+
+ ASSERT(hdl != NULL);
+ objname = acpidev_get_object_name(hdl);
+
+ ASSERT(sp != NULL);
+ ASSERT(sp->ans_dip != NULL && sp->ans_hdl != NULL);
+ if (sp == NULL || sp->ans_dip == NULL || sp->ans_hdl == NULL) {
+ if (call_ost) {
+ (void) acpidev_eval_ost(hdl, event,
+ ACPI_OST_STA_FAILURE, NULL, 0);
+ }
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: softstate data structure is invalid.");
+ cmn_err(CE_WARN, "!acpinex: failed to handle "
+ "BUS/DEVICE_CHECK event from %s.", objname);
+ acpidev_free_object_name(objname);
+ return;
+ }
+
+ bzero(&arg, sizeof (arg));
+ arg.event_type = event;
+ arg.softstatep = sp;
+ rv = acpinex_event_handle_check_one(hdl, 0, &arg, NULL);
+ if (ACPI_SUCCESS(rv)) {
+ rv = AcpiWalkNamespace(ACPI_TYPE_DEVICE, hdl,
+ ACPIDEV_MAX_ENUM_LEVELS,
+ &acpinex_event_handle_check_one, NULL, &arg, NULL);
+ }
+
+ if (ACPI_FAILURE(rv)) {
+ /* Failed to scan the ACPI namespace. */
+ cmn_err(CE_WARN, "!acpinex: failed to handle event %d from %s.",
+ event, objname);
+ code = ACPI_OST_STA_FAILURE;
+ } else if (arg.device_remove != 0) {
+ /* Surprising removal happened. */
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: some devices have been surprisingly removed.");
+ code = ACPI_OST_STA_NOT_SUPPORT;
+ } else if (arg.device_fail != 0) {
+ /* Failed to handle some devices. */
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: failed to check status of some devices.");
+ code = ACPI_OST_STA_FAILURE;
+ } else if (arg.device_insert == 0) {
+ /* No hot-added devices found. */
+ cmn_err(CE_WARN,
+ "!acpinex: no hot-added devices under %s found.", objname);
+ code = ACPI_OST_STA_FAILURE;
+ } else {
+ code = ACPI_OST_STA_INSERT_IN_PROGRESS;
+ }
+ if (call_ost) {
+ (void) acpidev_eval_ost(hdl, event, code, NULL, 0);
+ }
+
+ acpidev_free_object_name(objname);
+}
+
+static void
+acpinex_event_system_handler(ACPI_HANDLE hdl, UINT32 type, void *arg)
+{
+ acpinex_softstate_t *sp;
+
+ ASSERT(hdl != NULL);
+ ASSERT(arg != NULL);
+ sp = (acpinex_softstate_t *)arg;
+
+ acpidev_dr_lock_all();
+ mutex_enter(&sp->ans_lock);
+
+ switch (type) {
+ case ACPI_NOTIFY_BUS_CHECK:
+ /*
+ * Bus Check. This notification is performed on a device object
+ * to indicate to OSPM that it needs to perform the Plug and
+ * Play re-enumeration operation on the device tree starting
+ * from the point where it has been notified. OSPM will only
+ * perform this operation at boot, and when notified. It is
+ * the responsibility of the ACPI AML code to notify OSPM at
+ * any other times that this operation is required. The more
+ * accurately and closer to the actual device tree change the
+ * notification can be done, the more efficient the operating
+ * system response will be; however, it can also be an issue
+ * when a device change cannot be confirmed. For example, if
+ * the hardware cannot notice a device change for a particular
+ * location during a system sleeping state, it issues a Bus
+ * Check notification on wake to inform OSPM that it needs to
+ * check the configuration for a device change.
+ */
+ /*FALLTHROUGH*/
+ case ACPI_NOTIFY_DEVICE_CHECK:
+ /*
+ * Device Check. Used to notify OSPM that the device either
+ * appeared or disappeared. If the device has appeared, OSPM
+ * will re-enumerate from the parent. If the device has
+ * disappeared, OSPM will invalidate the state of the device.
+ * OSPM may optimize out re-enumeration. If _DCK is present,
+ * then Notify(object,1) is assumed to indicate an undock
+ * request.
+ */
+ /*FALLTHROUGH*/
+ case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
+ /*
+ * Device Check Light. Used to notify OSPM that the device
+ * either appeared or disappeared. If the device has appeared,
+ * OSPM will re-enumerate from the device itself, not the
+ * parent. If the device has disappeared, OSPM will invalidate
+ * the state of the device.
+ */
+ atomic_inc_uint(&acpinex_dr_event_cnt);
+ acpinex_event_handle_check_request(type, hdl, sp, B_TRUE);
+ break;
+
+ case ACPI_NOTIFY_EJECT_REQUEST:
+ /*
+ * Eject Request. Used to notify OSPM that the device should
+ * be ejected, and that OSPM needs to perform the Plug and Play
+ * ejection operation. OSPM will run the _EJx method.
+ */
+ atomic_inc_uint(&acpinex_dr_event_cnt);
+ acpinex_event_handle_eject_request(hdl, sp, B_TRUE);
+ break;
+
+ default:
+ ACPINEX_DEBUG(CE_NOTE,
+ "!acpinex: unhandled event(%d) on hdl %p under %s.",
+ type, hdl, sp->ans_path);
+ (void) acpidev_eval_ost(hdl, type, ACPI_OST_STA_NOT_SUPPORT,
+ NULL, 0);
+ break;
+ }
+
+ if (acpinex_dr_event_cnt != 0) {
+ /*
+ * Disable fast reboot if a CPU/MEM/IOH hotplug event happens.
+ * Note: this is a temporary solution and will be revised when
+ * fast reboot can support CPU/MEM/IOH DR operations in the
+ * future.
+ *
+ * ACPI BIOS generates some static ACPI tables, such as MADT,
+ * SRAT and SLIT, to describe the system hardware configuration
+ * on power-on. When a CPU/MEM/IOH hotplug event happens, those
+ * static tables won't be updated and will become stale.
+ *
+ * If we reset the system by fast reboot, BIOS will have no
+ * chance to regenerate those staled static tables. Fast reboot
+ * can't tolerate such inconsistency between staled ACPI tables
+ * and real hardware configuration yet.
+ *
+ * A temporary solution is introduced to disable fast reboot if
+ * CPU/MEM/IOH hotplug event happens. This solution should be
+ * revised when fast reboot is enhanced to support CPU/MEM/IOH
+ * DR operations.
+ */
+ fastreboot_disable(FBNS_HOTPLUG);
+ }
+
+ mutex_exit(&sp->ans_lock);
+ acpidev_dr_unlock_all();
+}
+
+/*
+ * Install event handler for ACPI system events.
+ * Acpinex driver handles ACPI system events for its children,
+ * device specific events will be handled by device drivers.
+ * Return DDI_SUCCESS on success, and DDI_FAILURE on failure.
+ */
+static int
+acpinex_event_install_handler(ACPI_HANDLE hdl, void *arg,
+ ACPI_DEVICE_INFO *infop, acpidev_data_handle_t dhdl)
+{
+ int rc = DDI_SUCCESS;
+
+ ASSERT(hdl != NULL);
+ ASSERT(dhdl != NULL);
+ ASSERT(infop != NULL);
+
+ /*
+ * Check whether the event handler has already been installed on the
+ * device object. With the introduction of ACPI Alias objects, which are
+ * similar to symlinks in file systems, there may be multiple name
+ * objects in the ACPI namespace pointing to the same underlying device
+ * object. Those Alias objects need to be filtered out, otherwise
+ * it will attempt to install the event handler multiple times on the
+ * same device object which will fail.
+ */
+ if (acpidev_data_get_flag(dhdl, ACPIDEV_DATA_HANDLER_READY)) {
+ return (DDI_SUCCESS);
+ }
+ if (ACPI_SUCCESS(AcpiInstallNotifyHandler(hdl, ACPI_SYSTEM_NOTIFY,
+ acpinex_event_system_handler, arg))) {
+ acpidev_data_set_flag(dhdl, ACPIDEV_DATA_HANDLER_READY);
+ } else {
+ char *objname;
+
+ objname = acpidev_get_object_name(hdl);
+ cmn_err(CE_WARN,
+ "!acpinex: failed to install system event handler for %s.",
+ objname);
+ acpidev_free_object_name(objname);
+ rc = DDI_FAILURE;
+ }
+
+ return (rc);
+}
+
+/*
+ * Uninstall event handler for ACPI system events.
+ * Return DDI_SUCCESS on success, and DDI_FAILURE on failure.
+ */
+static int
+acpinex_event_uninstall_handler(ACPI_HANDLE hdl, ACPI_DEVICE_INFO *infop,
+ acpidev_data_handle_t dhdl)
+{
+ ASSERT(hdl != NULL);
+ ASSERT(dhdl != NULL);
+ ASSERT(infop != NULL);
+
+ if (!acpidev_data_get_flag(dhdl, ACPIDEV_DATA_HANDLER_READY)) {
+ return (DDI_SUCCESS);
+ }
+ if (ACPI_SUCCESS(AcpiRemoveNotifyHandler(hdl, ACPI_SYSTEM_NOTIFY,
+ acpinex_event_system_handler))) {
+ acpidev_data_clear_flag(dhdl, ACPIDEV_DATA_HANDLER_READY);
+ } else {
+ char *objname;
+
+ objname = acpidev_get_object_name(hdl);
+ cmn_err(CE_WARN, "!acpinex: failed to uninstall system event "
+ "handler for %s.", objname);
+ acpidev_free_object_name(objname);
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * Install/uninstall ACPI system event handler for child objects of hdl.
+ * Return DDI_SUCCESS on success, and DDI_FAILURE on failure.
+ */
+static int
+acpinex_event_walk(boolean_t init, acpinex_softstate_t *sp, ACPI_HANDLE hdl)
+{
+ int rc;
+ int retval = DDI_SUCCESS;
+ dev_info_t *dip;
+ ACPI_HANDLE child = NULL;
+ ACPI_OBJECT_TYPE type;
+ ACPI_DEVICE_INFO *infop;
+ acpidev_data_handle_t dhdl;
+
+ /* Walk all child objects. */
+ ASSERT(hdl != NULL);
+ while (ACPI_SUCCESS(AcpiGetNextObject(ACPI_TYPE_ANY, hdl, child,
+ &child))) {
+ /* Skip unwanted object types. */
+ if (ACPI_FAILURE(AcpiGetType(child, &type)) ||
+ type > ACPI_TYPE_NS_NODE_MAX ||
+ BT_TEST(acpinex_object_type_mask, type) == 0) {
+ continue;
+ }
+
+ /* Get data associated with the object. Skip it if fails. */
+ dhdl = acpidev_data_get_handle(child);
+ if (dhdl == NULL) {
+ ACPINEX_DEBUG(CE_NOTE, "!acpinex: failed to get data "
+ "associated with %p, skip.", child);
+ continue;
+ }
+
+ /* Query ACPI object info for the object. */
+ if (ACPI_FAILURE(AcpiGetObjectInfo(child, &infop))) {
+ cmn_err(CE_WARN,
+ "!acpidnex: failed to get object info for %p.",
+ child);
+ continue;
+ }
+
+ if (init) {
+ rc = acpinex_event_install_handler(child, sp, infop,
+ dhdl);
+ if (rc != DDI_SUCCESS) {
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to "
+ "install handler for child %p of %s.",
+ child, sp->ans_path);
+ retval = DDI_FAILURE;
+ /*
+ * Try to handle descendants if both of the
+ * following two conditions are true:
+ * 1) Device corresponding to the current object is
+ * enabled. If the device is absent/disabled,
+ * no notification should be generated from
+ * descendant objects of it.
+ * 2) No Solaris device node has been created for the
+ * current object yet. If the device node has been
+ * created for the current object, notification
+ * events from child objects should be handled by
+ * the corresponding driver.
+ */
+ } else if (acpidev_check_device_enabled(
+ acpidev_data_get_status(dhdl)) &&
+ ACPI_FAILURE(acpica_get_devinfo(child, &dip))) {
+ rc = acpinex_event_walk(B_TRUE, sp, child);
+ if (rc != DDI_SUCCESS) {
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: failed to install "
+ "handler for descendants of %s.",
+ sp->ans_path);
+ retval = DDI_FAILURE;
+ }
+ }
+ } else {
+ rc = DDI_SUCCESS;
+ /* Uninstall handler for descendants if needed. */
+ if (ACPI_FAILURE(acpica_get_devinfo(child, &dip))) {
+ rc = acpinex_event_walk(B_FALSE, sp, child);
+ }
+ if (rc == DDI_SUCCESS) {
+ rc = acpinex_event_uninstall_handler(child,
+ infop, dhdl);
+ }
+ /* Undo will be done by caller in case of failure. */
+ if (rc != DDI_SUCCESS) {
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to "
+ "uninstall handler for descendants of %s.",
+ sp->ans_path);
+ AcpiOsFree(infop);
+ retval = DDI_FAILURE;
+ break;
+ }
+ }
+
+ /* Release cached resources. */
+ AcpiOsFree(infop);
+ }
+
+ return (retval);
+}
+
+int
+acpinex_event_scan(acpinex_softstate_t *sp, boolean_t init)
+{
+ int rc;
+
+ ASSERT(sp != NULL);
+ ASSERT(sp->ans_hdl != NULL);
+ ASSERT(sp->ans_dip != NULL);
+ if (sp == NULL || sp->ans_hdl == NULL || sp->ans_dip == NULL) {
+ ACPINEX_DEBUG(CE_WARN,
+ "!acpinex: invalid parameter to acpinex_event_scan().");
+ return (DDI_FAILURE);
+ }
+
+ /* Lock current device node and walk all child device nodes of it. */
+ mutex_enter(&sp->ans_lock);
+
+ rc = acpinex_event_walk(init, sp, sp->ans_hdl);
+ if (rc != DDI_SUCCESS) {
+ if (init) {
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to "
+ "configure child objects of %s.", sp->ans_path);
+ rc = DDI_FAILURE;
+ } else {
+ ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to "
+ "unconfigure child objects of %s.", sp->ans_path);
+ /* Undo in case of errors */
+ (void) acpinex_event_walk(B_TRUE, sp, sp->ans_hdl);
+ rc = DDI_FAILURE;
+ }
+ }
+
+ mutex_exit(&sp->ans_lock);
+
+ return (rc);
+}
+
+void
+acpinex_event_init(void)
+{
+ /*
+ * According to ACPI specifications, notification is only supported on
+ * Device, Processor and ThermalZone. Currently we only need to handle
+ * Device and Processor objects.
+ */
+ BT_SET(acpinex_object_type_mask, ACPI_TYPE_PROCESSOR);
+ BT_SET(acpinex_object_type_mask, ACPI_TYPE_DEVICE);
+}
+
+void
+acpinex_event_fini(void)
+{
+ bzero(acpinex_object_type_mask, sizeof (acpinex_object_type_mask));
+}
diff --git a/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.c b/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.c
new file mode 100644
index 0000000000..35ad4353aa
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.c
@@ -0,0 +1,2927 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/note.h>
+#include <sys/dditypes.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/varargs.h>
+#include <sys/modctl.h>
+#include <sys/kmem.h>
+#include <sys/cpuvar.h>
+#include <sys/cpupart.h>
+#include <sys/mem_config.h>
+#include <sys/mem_cage.h>
+#include <sys/memnode.h>
+#include <sys/callb.h>
+#include <sys/ontrap.h>
+#include <sys/obpdefs.h>
+#include <sys/promif.h>
+#include <sys/synch.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+#include <sys/x_call.h>
+#include <sys/x86_archext.h>
+#include <sys/fastboot_impl.h>
+#include <sys/sysevent.h>
+#include <sys/sysevent/dr.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_rsc.h>
+#include <sys/acpidev_dr.h>
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+#include <sys/drmach.h>
+#include "drmach_acpi.h"
+
+/* utility */
+#define MBYTE (1048576ull)
+#define _ptob64(p) ((uint64_t)(p) << PAGESHIFT)
+#define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
+
+static int drmach_init(void);
+static void drmach_fini(void);
+static int drmach_name2type_idx(char *);
+static sbd_error_t *drmach_mem_update_lgrp(drmachid_t);
+
+#ifdef DEBUG
+int drmach_debug = 1; /* set to non-zero to enable debug messages */
+#endif /* DEBUG */
+
+drmach_domain_info_t drmach_domain;
+
+static char *drmach_ie_fmt = "drmach_acpi.c %d";
+static drmach_array_t *drmach_boards;
+
+/* rwlock to protect drmach_boards. */
+static krwlock_t drmach_boards_rwlock;
+
+/* rwlock to block out CPR thread. */
+static krwlock_t drmach_cpr_rwlock;
+
+/* CPR callb id. */
+static callb_id_t drmach_cpr_cid;
+
+static struct {
+ const char *name;
+ const char *type;
+ sbd_error_t *(*new)(drmach_device_t *, drmachid_t *);
+} drmach_name2type[] = {
+ { ACPIDEV_NODE_NAME_CPU, DRMACH_DEVTYPE_CPU, drmach_cpu_new },
+ { ACPIDEV_NODE_NAME_MEMORY, DRMACH_DEVTYPE_MEM, drmach_mem_new },
+ { ACPIDEV_NODE_NAME_PCI, DRMACH_DEVTYPE_PCI, drmach_io_new },
+};
+
+/*
+ * drmach autoconfiguration data structures and interfaces
+ */
+static struct modlmisc modlmisc = {
+ &mod_miscops,
+ "ACPI based DR v1.0"
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modlmisc,
+ NULL
+};
+
+int
+_init(void)
+{
+ int err;
+
+ if ((err = drmach_init()) != 0) {
+ return (err);
+ }
+
+ if ((err = mod_install(&modlinkage)) != 0) {
+ drmach_fini();
+ }
+
+ return (err);
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = mod_remove(&modlinkage)) == 0) {
+ drmach_fini();
+ }
+
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*
+ * Internal support functions.
+ */
+static DRMACH_HANDLE
+drmach_node_acpi_get_dnode(drmach_node_t *np)
+{
+ return ((DRMACH_HANDLE)(uintptr_t)np->here);
+}
+
+static dev_info_t *
+drmach_node_acpi_get_dip(drmach_node_t *np)
+{
+ dev_info_t *dip = NULL;
+
+ if (ACPI_FAILURE(acpica_get_devinfo((DRMACH_HANDLE)(np->here), &dip))) {
+ return (NULL);
+ }
+
+ return (dip);
+}
+
+static int
+drmach_node_acpi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
+{
+ int rv = 0;
+ DRMACH_HANDLE hdl;
+
+ hdl = np->get_dnode(np);
+ if (hdl == NULL) {
+ DRMACH_PR("!drmach_node_acpi_get_prop: NULL handle");
+ rv = -1;
+ } else {
+ rv = acpidev_dr_device_getprop(hdl, name, buf, len);
+ if (rv >= 0) {
+ ASSERT(rv <= len);
+ rv = 0;
+ }
+ }
+
+ return (rv);
+}
+
+static int
+drmach_node_acpi_get_proplen(drmach_node_t *np, char *name, int *len)
+{
+ int rv = 0;
+ DRMACH_HANDLE hdl;
+
+ hdl = np->get_dnode(np);
+ if (hdl == NULL) {
+ DRMACH_PR("!drmach_node_acpi_get_proplen: NULL handle");
+ rv = -1;
+ } else {
+ rv = acpidev_dr_device_getprop(hdl, name, NULL, 0);
+ if (rv >= 0) {
+ *len = rv;
+ return (0);
+ }
+ }
+
+ return (-1);
+}
+
+static ACPI_STATUS
+drmach_node_acpi_callback(ACPI_HANDLE hdl, uint_t lvl, void *ctx, void **retval)
+{
+ _NOTE(ARGUNUSED(lvl));
+
+ int rv;
+ dev_info_t *dip;
+ drmach_node_walk_args_t *argp = ctx;
+ int (*cb)(drmach_node_walk_args_t *args);
+ acpidev_class_id_t clsid;
+
+ ASSERT(hdl != NULL);
+ ASSERT(ctx != NULL);
+ ASSERT(retval != NULL);
+
+ /* Skip subtree if the device is not powered. */
+ if (!acpidev_dr_device_is_powered(hdl)) {
+ return (AE_CTRL_DEPTH);
+ }
+
+ /*
+ * Keep scanning subtree if it fails to lookup device node.
+ * There may be some ACPI objects without device nodes created.
+ */
+ if (ACPI_FAILURE(acpica_get_devinfo(hdl, &dip))) {
+ return (AE_OK);
+ }
+
+ argp->node->here = hdl;
+ cb = (int (*)(drmach_node_walk_args_t *args))argp->func;
+ rv = (*cb)(argp);
+ argp->node->here = NULL;
+ if (rv) {
+ *(int *)retval = rv;
+ return (AE_CTRL_TERMINATE);
+ }
+
+ /*
+ * Skip descendants of PCI/PCIex host bridges.
+ * PCI/PCIex devices will be handled by pcihp.
+ */
+ clsid = acpidev_dr_device_get_class(hdl);
+ if (clsid == ACPIDEV_CLASS_ID_PCI || clsid == ACPIDEV_CLASS_ID_PCIEX) {
+ return (AE_CTRL_DEPTH);
+ }
+
+ return (AE_OK);
+}
+
+static int
+drmach_node_acpi_walk(drmach_node_t *np, void *data,
+ int (*cb)(drmach_node_walk_args_t *args))
+{
+ DRMACH_HANDLE hdl;
+ int rv = 0;
+ drmach_node_walk_args_t args;
+
+ /* initialize the args structure for callback */
+ args.node = np;
+ args.data = data;
+ args.func = (void *)cb;
+
+ /* save the handle, it will be modified when walking the tree. */
+ hdl = np->get_dnode(np);
+ if (hdl == NULL) {
+ DRMACH_PR("!drmach_node_acpi_walk: failed to get device node.");
+ return (EX86_INAPPROP);
+ }
+
+ if (ACPI_FAILURE(acpidev_dr_device_walk_device(hdl,
+ ACPIDEV_MAX_ENUM_LEVELS, drmach_node_acpi_callback,
+ &args, (void *)&rv))) {
+ /*
+ * If acpidev_dr_device_walk_device() itself fails, rv won't
+ * be set to suitable error code. Set it here.
+ */
+ if (rv == 0) {
+ cmn_err(CE_WARN, "!drmach_node_acpi_walk: failed to "
+ "walk ACPI namespace.");
+ rv = EX86_ACPIWALK;
+ }
+ }
+
+ /* restore the handle to original value after walking the tree. */
+ np->here = (void *)hdl;
+
+ return ((int)rv);
+}
+
+static drmach_node_t *
+drmach_node_new(void)
+{
+ drmach_node_t *np;
+
+ np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
+
+ np->get_dnode = drmach_node_acpi_get_dnode;
+ np->getdip = drmach_node_acpi_get_dip;
+ np->getproplen = drmach_node_acpi_get_proplen;
+ np->getprop = drmach_node_acpi_get_prop;
+ np->walk = drmach_node_acpi_walk;
+
+ return (np);
+}
+
+static drmachid_t
+drmach_node_dup(drmach_node_t *np)
+{
+ drmach_node_t *dup;
+
+ dup = drmach_node_new();
+ dup->here = np->here;
+ dup->get_dnode = np->get_dnode;
+ dup->getdip = np->getdip;
+ dup->getproplen = np->getproplen;
+ dup->getprop = np->getprop;
+ dup->walk = np->walk;
+
+ return (dup);
+}
+
+static void
+drmach_node_dispose(drmach_node_t *np)
+{
+ kmem_free(np, sizeof (*np));
+}
+
+static int
+drmach_node_walk(drmach_node_t *np, void *param,
+ int (*cb)(drmach_node_walk_args_t *args))
+{
+ return (np->walk(np, param, cb));
+}
+
+static DRMACH_HANDLE
+drmach_node_get_dnode(drmach_node_t *np)
+{
+ return (np->get_dnode(np));
+}
+
+/*
+ * drmach_array provides convenient array construction, access,
+ * bounds checking and array destruction logic.
+ */
+static drmach_array_t *
+drmach_array_new(uint_t min_index, uint_t max_index)
+{
+ drmach_array_t *arr;
+
+ arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
+
+ arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
+ if (arr->arr_sz > 0) {
+ arr->min_index = min_index;
+ arr->max_index = max_index;
+
+ arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
+ return (arr);
+ } else {
+ kmem_free(arr, sizeof (*arr));
+ return (0);
+ }
+}
+
+static int
+drmach_array_set(drmach_array_t *arr, uint_t idx, drmachid_t val)
+{
+ if (idx < arr->min_index || idx > arr->max_index)
+ return (-1);
+ arr->arr[idx - arr->min_index] = val;
+ return (0);
+}
+
+/*
+ * Get the item with index idx.
+ * Return 0 with the value stored in val if succeeds, otherwise return -1.
+ */
+static int
+drmach_array_get(drmach_array_t *arr, uint_t idx, drmachid_t *val)
+{
+ if (idx < arr->min_index || idx > arr->max_index)
+ return (-1);
+ *val = arr->arr[idx - arr->min_index];
+ return (0);
+}
+
+static int
+drmach_array_first(drmach_array_t *arr, uint_t *idx, drmachid_t *val)
+{
+ int rv;
+
+ *idx = arr->min_index;
+ while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
+ *idx += 1;
+
+ return (rv);
+}
+
+static int
+drmach_array_next(drmach_array_t *arr, uint_t *idx, drmachid_t *val)
+{
+ int rv;
+
+ *idx += 1;
+ while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
+ *idx += 1;
+
+ return (rv);
+}
+
+static void
+drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
+{
+ drmachid_t val;
+ uint_t idx;
+ int rv;
+
+ rv = drmach_array_first(arr, &idx, &val);
+ while (rv == 0) {
+ (*disposer)(val);
+ rv = drmach_array_next(arr, &idx, &val);
+ }
+
+ kmem_free(arr->arr, arr->arr_sz);
+ kmem_free(arr, sizeof (*arr));
+}
+
+static drmach_board_t *
+drmach_get_board_by_bnum(uint_t bnum)
+{
+ drmachid_t id;
+
+ if (drmach_array_get(drmach_boards, bnum, &id) == 0)
+ return ((drmach_board_t *)id);
+ else
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_device_new(drmach_node_t *node,
+ drmach_board_t *bp, int portid, drmachid_t *idp)
+{
+ int i;
+ int rv;
+ drmach_device_t proto;
+ sbd_error_t *err;
+ char name[OBP_MAXDRVNAME];
+
+ rv = node->getprop(node, ACPIDEV_DR_PROP_DEVNAME, name, OBP_MAXDRVNAME);
+ if (rv) {
+ /* every node is expected to have a name */
+ err = drerr_new(1, EX86_GETPROP, "device node %s: property %s",
+ ddi_node_name(node->getdip(node)),
+ ACPIDEV_DR_PROP_DEVNAME);
+ return (err);
+ }
+
+ /*
+ * The node currently being examined is not listed in the name2type[]
+ * array. In this case, the node is no interest to drmach. Both
+ * dp and err are initialized here to yield nothing (no device or
+ * error structure) for this case.
+ */
+ i = drmach_name2type_idx(name);
+ if (i < 0) {
+ *idp = (drmachid_t)0;
+ return (NULL);
+ }
+
+ /* device specific new function will set unum */
+ bzero(&proto, sizeof (proto));
+ proto.type = drmach_name2type[i].type;
+ proto.bp = bp;
+ proto.node = node;
+ proto.portid = portid;
+
+ return (drmach_name2type[i].new(&proto, idp));
+}
+
+static void
+drmach_device_dispose(drmachid_t id)
+{
+ drmach_device_t *self = id;
+
+ self->cm.dispose(id);
+}
+
+static sbd_error_t *
+drmach_device_status(drmachid_t id, drmach_status_t *stat)
+{
+ drmach_common_t *cp;
+
+ if (!DRMACH_IS_ID(id))
+ return (drerr_new(0, EX86_NOTID, NULL));
+ cp = id;
+
+ return (cp->status(id, stat));
+}
+
+drmach_board_t *
+drmach_board_new(uint_t bnum, int boot_board)
+{
+ static void drmach_board_dispose(drmachid_t id);
+ static sbd_error_t *drmach_board_release(drmachid_t);
+ static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
+
+ sbd_error_t *err;
+ drmach_board_t *bp;
+ dev_info_t *dip = NULL;
+
+ bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
+ bp->cm.isa = (void *)drmach_board_new;
+ bp->cm.release = drmach_board_release;
+ bp->cm.status = drmach_board_status;
+
+ bp->bnum = bnum;
+ bp->devices = NULL;
+ bp->tree = drmach_node_new();
+
+ acpidev_dr_lock_all();
+ if (ACPI_FAILURE(acpidev_dr_get_board_handle(bnum, &bp->tree->here))) {
+ acpidev_dr_unlock_all();
+ drmach_board_dispose(bp);
+ return (NULL);
+ }
+ acpidev_dr_unlock_all();
+ ASSERT(bp->tree->here != NULL);
+
+ err = drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
+ if (err != NULL) {
+ sbd_err_clear(&err);
+ drmach_board_dispose(bp);
+ return (NULL);
+ }
+
+ if (acpidev_dr_device_is_powered(bp->tree->here)) {
+ bp->boot_board = boot_board;
+ bp->powered = 1;
+ } else {
+ bp->boot_board = 0;
+ bp->powered = 0;
+ }
+ bp->assigned = boot_board;
+ if (ACPI_SUCCESS(acpica_get_devinfo(bp->tree->here, &dip))) {
+ bp->connected = 1;
+ } else {
+ bp->connected = 0;
+ }
+
+ (void) drmach_array_set(drmach_boards, bnum, bp);
+
+ return (bp);
+}
+
+static void
+drmach_board_dispose(drmachid_t id)
+{
+ drmach_board_t *bp;
+
+ ASSERT(DRMACH_IS_BOARD_ID(id));
+ bp = id;
+
+ if (bp->tree)
+ drmach_node_dispose(bp->tree);
+
+ if (bp->devices)
+ drmach_array_dispose(bp->devices, drmach_device_dispose);
+
+ kmem_free(bp, sizeof (drmach_board_t));
+}
+
+static sbd_error_t *
+drmach_board_release(drmachid_t id)
+{
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+static int
+drmach_board_check_power(drmach_board_t *bp)
+{
+ DRMACH_HANDLE hdl;
+
+ hdl = drmach_node_get_dnode(bp->tree);
+
+ return (acpidev_dr_device_is_powered(hdl));
+}
+
+struct drmach_board_list_dep_arg {
+ int count;
+ size_t len;
+ ssize_t off;
+ char *buf;
+ char temp[MAXPATHLEN];
+};
+
+static ACPI_STATUS
+drmach_board_generate_name(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+ void **retval)
+{
+ _NOTE(ARGUNUSED(retval));
+
+ struct drmach_board_list_dep_arg *argp = ctx;
+
+ ASSERT(hdl != NULL);
+ ASSERT(lvl == UINT32_MAX);
+ ASSERT(ctx != NULL);
+
+ /* Skip non-board devices. */
+ if (!acpidev_dr_device_is_board(hdl)) {
+ return (AE_OK);
+ }
+
+ if (ACPI_FAILURE(acpidev_dr_get_board_name(hdl, argp->temp,
+ sizeof (argp->temp)))) {
+ DRMACH_PR("!drmach_board_generate_name: failed to "
+ "generate board name for handle %p.", hdl);
+ /* Keep on walking. */
+ return (AE_OK);
+ }
+ argp->count++;
+ argp->off += snprintf(argp->buf + argp->off, argp->len - argp->off,
+ " %s", argp->temp);
+ if (argp->off >= argp->len) {
+ return (AE_CTRL_TERMINATE);
+ }
+
+ return (AE_OK);
+}
+
+static ssize_t
+drmach_board_list_dependency(ACPI_HANDLE hdl, boolean_t edl, char *prefix,
+ char *buf, size_t len)
+{
+ ACPI_STATUS rc;
+ ssize_t off;
+ struct drmach_board_list_dep_arg *ap;
+
+ ASSERT(buf != NULL && len != 0);
+ if (buf == NULL || len == 0) {
+ return (-1);
+ }
+
+ ap = kmem_zalloc(sizeof (*ap), KM_SLEEP);
+ ap->buf = buf;
+ ap->len = len;
+ ap->off = snprintf(buf, len, "%s", prefix);
+ if (ap->off >= len) {
+ *buf = '\0';
+ kmem_free(ap, sizeof (*ap));
+ return (-1);
+ }
+
+ /* Generate the device dependency list. */
+ if (edl) {
+ rc = acpidev_dr_device_walk_edl(hdl,
+ drmach_board_generate_name, ap, NULL);
+ } else {
+ rc = acpidev_dr_device_walk_ejd(hdl,
+ drmach_board_generate_name, ap, NULL);
+ }
+ if (ACPI_FAILURE(rc)) {
+ *buf = '\0';
+ ap->off = -1;
+ /* No device has dependency on this board. */
+ } else if (ap->count == 0) {
+ *buf = '\0';
+ ap->off = 0;
+ }
+
+ off = ap->off;
+ kmem_free(ap, sizeof (*ap));
+
+ return (off);
+}
+
+static sbd_error_t *
+drmach_board_status(drmachid_t id, drmach_status_t *stat)
+{
+ sbd_error_t *err = NULL;
+ drmach_board_t *bp;
+ DRMACH_HANDLE hdl;
+ size_t off;
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = id;
+
+ if (bp->tree == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ stat->busy = 0; /* assume not busy */
+ stat->configured = 0; /* assume not configured */
+ stat->assigned = bp->assigned;
+ stat->powered = bp->powered = acpidev_dr_device_is_powered(hdl);
+ stat->empty = !acpidev_dr_device_is_present(hdl);
+ if (ACPI_SUCCESS(acpidev_dr_device_check_status(hdl))) {
+ stat->cond = bp->cond = SBD_COND_OK;
+ } else {
+ stat->cond = bp->cond = SBD_COND_FAILED;
+ }
+ stat->info[0] = '\0';
+
+ /* Generate the eject device list. */
+ if (drmach_board_list_dependency(hdl, B_TRUE, "EDL:",
+ stat->info, sizeof (stat->info)) < 0) {
+ DRMACH_PR("!drmach_board_status: failed to generate "
+ "eject device list for board %d.", bp->bnum);
+ stat->info[0] = '\0';
+ }
+ off = strlen(stat->info);
+ if (off < sizeof (stat->info)) {
+ if (drmach_board_list_dependency(hdl, B_FALSE,
+ off ? ", EJD:" : "EJD:",
+ stat->info + off, sizeof (stat->info) - off) < 0) {
+ DRMACH_PR("!drmach_board_status: failed to generate "
+ "eject dependent device for board %d.", bp->bnum);
+ stat->info[off] = '\0';
+ }
+ }
+
+ switch (acpidev_dr_get_board_type(bp->tree->get_dnode(bp->tree))) {
+ case ACPIDEV_CPU_BOARD:
+ (void) strlcpy(stat->type, "CPU Board", sizeof (stat->type));
+ break;
+ case ACPIDEV_MEMORY_BOARD:
+ (void) strlcpy(stat->type, "MemoryBoard", sizeof (stat->type));
+ break;
+ case ACPIDEV_IO_BOARD:
+ (void) strlcpy(stat->type, "IO Board", sizeof (stat->type));
+ break;
+ case ACPIDEV_SYSTEM_BOARD:
+ /*FALLTHROUGH*/
+ default:
+ (void) strlcpy(stat->type, "SystemBoard", sizeof (stat->type));
+ break;
+ }
+
+ if (bp->devices) {
+ int rv;
+ uint_t d_idx;
+ drmachid_t d_id;
+
+ rv = drmach_array_first(bp->devices, &d_idx, &d_id);
+ while (rv == 0) {
+ drmach_status_t d_stat;
+
+ err = drmach_device_status(d_id, &d_stat);
+ if (err)
+ break;
+
+ stat->busy |= d_stat.busy;
+ stat->configured |= d_stat.configured;
+
+ rv = drmach_array_next(bp->devices, &d_idx, &d_id);
+ }
+ }
+
+ return (err);
+}
+
+/*
+ * When DR is initialized, we walk the device tree and acquire a hold on
+ * all the nodes that are interesting to DR. This is so that the corresponding
+ * branches cannot be deleted.
+ */
+static int
+drmach_hold_rele_devtree(dev_info_t *rdip, void *arg)
+{
+ int *holdp = (int *)arg;
+ ACPI_HANDLE hdl = NULL;
+ acpidev_data_handle_t dhdl;
+
+ /* Skip nodes and subtrees which are not created by acpidev. */
+ if (ACPI_FAILURE(acpica_get_handle(rdip, &hdl))) {
+ return (DDI_WALK_PRUNECHILD);
+ }
+ ASSERT(hdl != NULL);
+ dhdl = acpidev_data_get_handle(hdl);
+ if (dhdl == NULL) {
+ return (DDI_WALK_PRUNECHILD);
+ }
+
+ /* Hold/release devices which are interesting to DR operations. */
+ if (acpidev_data_dr_ready(dhdl)) {
+ if (*holdp) {
+ ASSERT(!e_ddi_branch_held(rdip));
+ e_ddi_branch_hold(rdip);
+ } else {
+ ASSERT(e_ddi_branch_held(rdip));
+ e_ddi_branch_rele(rdip);
+ }
+ }
+
+ return (DDI_WALK_CONTINUE);
+}
+
+static void
+drmach_hold_devtree(void)
+{
+ dev_info_t *dip;
+ int circ;
+ int hold = 1;
+
+ dip = ddi_root_node();
+ ndi_devi_enter(dip, &circ);
+ ddi_walk_devs(ddi_get_child(dip), drmach_hold_rele_devtree, &hold);
+ ndi_devi_exit(dip, circ);
+}
+
+static void
+drmach_release_devtree(void)
+{
+ dev_info_t *dip;
+ int circ;
+ int hold = 0;
+
+ dip = ddi_root_node();
+ ndi_devi_enter(dip, &circ);
+ ddi_walk_devs(ddi_get_child(dip), drmach_hold_rele_devtree, &hold);
+ ndi_devi_exit(dip, circ);
+}
+
+static boolean_t
+drmach_cpr_callb(void *arg, int code)
+{
+ _NOTE(ARGUNUSED(arg));
+
+ if (code == CB_CODE_CPR_CHKPT) {
+ /*
+ * Temporarily block CPR operations if there are DR operations
+ * ongoing.
+ */
+ rw_enter(&drmach_cpr_rwlock, RW_WRITER);
+ } else {
+ rw_exit(&drmach_cpr_rwlock);
+ }
+
+ return (B_TRUE);
+}
+
+static int
+drmach_init(void)
+{
+ DRMACH_HANDLE hdl;
+ drmachid_t id;
+ uint_t bnum;
+
+ if (MAX_BOARDS > SHRT_MAX) {
+ cmn_err(CE_WARN, "!drmach_init: system has too many (%d) "
+ "hotplug capable boards.", MAX_BOARDS);
+ return (ENXIO);
+ } else if (MAX_CMP_UNITS_PER_BOARD > 1) {
+ cmn_err(CE_WARN, "!drmach_init: DR doesn't support multiple "
+ "(%d) physical processors on one board.",
+ MAX_CMP_UNITS_PER_BOARD);
+ return (ENXIO);
+ } else if (MAX_CORES_PER_CMP & (MAX_CORES_PER_CMP - 1)) {
+ cmn_err(CE_WARN, "!drmach_init: number of logical CPUs (%d) in "
+ "physical processor is not power of 2.",
+ MAX_CORES_PER_CMP);
+ return (ENXIO);
+ } else if (MAX_CPU_UNITS_PER_BOARD > DEVSET_CPU_NUMBER ||
+ MAX_MEM_UNITS_PER_BOARD > DEVSET_MEM_NUMBER ||
+ MAX_IO_UNITS_PER_BOARD > DEVSET_IO_NUMBER) {
+ cmn_err(CE_WARN, "!drmach_init: system has more CPU/memory/IO "
+ "units than the DR driver can handle.");
+ return (ENXIO);
+ }
+
+ rw_init(&drmach_cpr_rwlock, NULL, RW_DEFAULT, NULL);
+ drmach_cpr_cid = callb_add(drmach_cpr_callb, NULL,
+ CB_CL_CPR_PM, "drmach");
+
+ rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
+ drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
+ drmach_domain.allow_dr = acpidev_dr_capable();
+
+ for (bnum = 0; bnum < MAX_BOARDS; bnum++) {
+ hdl = NULL;
+ if (ACPI_FAILURE(acpidev_dr_get_board_handle(bnum, &hdl)) ||
+ hdl == NULL) {
+ cmn_err(CE_WARN, "!drmach_init: failed to lookup ACPI "
+ "handle for board %d.", bnum);
+ continue;
+ }
+ if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
+ DRMACH_PR("!drmach_init: failed to get handle "
+ "for board %d.", bnum);
+ ASSERT(0);
+ goto error;
+ } else if (id == NULL) {
+ (void) drmach_board_new(bnum, 1);
+ }
+ }
+
+ /*
+ * Walk descendants of the devinfo root node and hold
+ * all devinfo branches of interest.
+ */
+ drmach_hold_devtree();
+
+ return (0);
+
+error:
+ drmach_array_dispose(drmach_boards, drmach_board_dispose);
+ rw_destroy(&drmach_boards_rwlock);
+ rw_destroy(&drmach_cpr_rwlock);
+ return (ENXIO);
+}
+
+static void
+drmach_fini(void)
+{
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+ if (drmach_boards != NULL) {
+ drmach_array_dispose(drmach_boards, drmach_board_dispose);
+ drmach_boards = NULL;
+ }
+ rw_exit(&drmach_boards_rwlock);
+
+ /*
+ * Walk descendants of the root devinfo node
+ * release holds acquired on branches in drmach_init()
+ */
+ drmach_release_devtree();
+
+ (void) callb_delete(drmach_cpr_cid);
+ rw_destroy(&drmach_cpr_rwlock);
+ rw_destroy(&drmach_boards_rwlock);
+}
+
+sbd_error_t *
+drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
+{
+ static void drmach_io_dispose(drmachid_t);
+ static sbd_error_t *drmach_io_release(drmachid_t);
+ static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
+
+ drmach_io_t *ip;
+ int portid;
+
+ portid = proto->portid;
+ ASSERT(portid != -1);
+ proto->unum = portid;
+
+ ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
+ bcopy(proto, &ip->dev, sizeof (ip->dev));
+ ip->dev.node = drmach_node_dup(proto->node);
+ ip->dev.cm.isa = (void *)drmach_io_new;
+ ip->dev.cm.dispose = drmach_io_dispose;
+ ip->dev.cm.release = drmach_io_release;
+ ip->dev.cm.status = drmach_io_status;
+ (void) snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
+ ip->dev.type, ip->dev.unum);
+
+ *idp = (drmachid_t)ip;
+
+ return (NULL);
+}
+
+static void
+drmach_io_dispose(drmachid_t id)
+{
+ drmach_io_t *self;
+
+ ASSERT(DRMACH_IS_IO_ID(id));
+
+ self = id;
+ if (self->dev.node)
+ drmach_node_dispose(self->dev.node);
+
+ kmem_free(self, sizeof (*self));
+}
+
+static sbd_error_t *
+drmach_io_release(drmachid_t id)
+{
+ if (!DRMACH_IS_IO_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+static sbd_error_t *
+drmach_io_status(drmachid_t id, drmach_status_t *stat)
+{
+ drmach_device_t *dp;
+ sbd_error_t *err;
+ int configured;
+
+ ASSERT(DRMACH_IS_IO_ID(id));
+ dp = id;
+
+ err = drmach_io_is_attached(id, &configured);
+ if (err)
+ return (err);
+
+ stat->assigned = dp->bp->assigned;
+ stat->powered = dp->bp->powered;
+ stat->configured = (configured != 0);
+ stat->busy = dp->busy;
+ (void) strlcpy(stat->type, dp->type, sizeof (stat->type));
+ stat->info[0] = '\0';
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
+{
+ static void drmach_cpu_dispose(drmachid_t);
+ static sbd_error_t *drmach_cpu_release(drmachid_t);
+ static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
+
+ int portid;
+ processorid_t cpuid;
+ drmach_cpu_t *cp = NULL;
+
+ /* the portid is APIC ID of the node */
+ portid = proto->portid;
+ ASSERT(portid != -1);
+
+ /*
+ * Assume all CPUs are homogeneous and have the same number of
+ * cores/threads.
+ */
+ proto->unum = portid % MAX_CPU_UNITS_PER_BOARD;
+
+ cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
+ bcopy(proto, &cp->dev, sizeof (cp->dev));
+ cp->dev.node = drmach_node_dup(proto->node);
+ cp->dev.cm.isa = (void *)drmach_cpu_new;
+ cp->dev.cm.dispose = drmach_cpu_dispose;
+ cp->dev.cm.release = drmach_cpu_release;
+ cp->dev.cm.status = drmach_cpu_status;
+ (void) snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
+ cp->dev.type, cp->dev.unum);
+
+ cp->apicid = portid;
+ if (ACPI_SUCCESS(acpica_get_cpu_id_by_object(
+ drmach_node_get_dnode(proto->node), &cpuid))) {
+ cp->cpuid = cpuid;
+ } else {
+ cp->cpuid = -1;
+ }
+
+ /* Mark CPU0 as busy, many other components have dependency on it. */
+ if (cp->cpuid == 0) {
+ cp->dev.busy = 1;
+ }
+
+ *idp = (drmachid_t)cp;
+
+ return (NULL);
+}
+
+static void
+drmach_cpu_dispose(drmachid_t id)
+{
+ drmach_cpu_t *self;
+
+ ASSERT(DRMACH_IS_CPU_ID(id));
+
+ self = id;
+ if (self->dev.node)
+ drmach_node_dispose(self->dev.node);
+
+ kmem_free(self, sizeof (*self));
+}
+
+static sbd_error_t *
+drmach_cpu_release(drmachid_t id)
+{
+ if (!DRMACH_IS_CPU_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+static sbd_error_t *
+drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
+{
+ drmach_cpu_t *cp;
+ drmach_device_t *dp;
+
+ ASSERT(DRMACH_IS_CPU_ID(id));
+ cp = (drmach_cpu_t *)id;
+ dp = &cp->dev;
+
+ stat->assigned = dp->bp->assigned;
+ stat->powered = dp->bp->powered;
+ mutex_enter(&cpu_lock);
+ stat->configured = (cpu_get(cp->cpuid) != NULL);
+ mutex_exit(&cpu_lock);
+ stat->busy = dp->busy;
+ (void) strlcpy(stat->type, dp->type, sizeof (stat->type));
+ stat->info[0] = '\0';
+
+ return (NULL);
+}
+
+static int
+drmach_setup_mc_info(DRMACH_HANDLE hdl, drmach_mem_t *mp)
+{
+ uint_t i, j, count;
+ struct memlist *ml = NULL, *ml2 = NULL;
+ acpidev_regspec_t *regp;
+ uint64_t align, addr_min, addr_max, total_size, skipped_size;
+
+ if (hdl == NULL) {
+ return (-1);
+ } else if (ACPI_FAILURE(acpidev_dr_get_mem_alignment(hdl, &align))) {
+ return (-1);
+ } else {
+ ASSERT((align & (align - 1)) == 0);
+ mp->mem_alignment = align;
+ }
+
+ addr_min = UINT64_MAX;
+ addr_max = 0;
+ total_size = 0;
+ skipped_size = 0;
+ /*
+ * There's a memory hole just below 4G on x86, which needs special
+ * handling. All other addresses assigned to a specific memory device
+ * should be contiguous.
+ */
+ if (ACPI_FAILURE(acpidev_dr_device_get_regspec(hdl, TRUE, &regp,
+ &count))) {
+ return (-1);
+ }
+ for (i = 0, j = 0; i < count; i++) {
+ uint64_t addr, size;
+
+ addr = (uint64_t)regp[i].phys_mid << 32;
+ addr |= (uint64_t)regp[i].phys_low;
+ size = (uint64_t)regp[i].size_hi << 32;
+ size |= (uint64_t)regp[i].size_low;
+ if (size == 0)
+ continue;
+ else
+ j++;
+
+ total_size += size;
+ if (addr < addr_min)
+ addr_min = addr;
+ if (addr + size > addr_max)
+ addr_max = addr + size;
+ if (mp->dev.bp->boot_board ||
+ j <= acpidev_dr_max_segments_per_mem_device()) {
+ ml = memlist_add_span(ml, addr, size);
+ } else {
+ skipped_size += size;
+ }
+ }
+ acpidev_dr_device_free_regspec(regp, count);
+
+ if (skipped_size != 0) {
+ cmn_err(CE_WARN, "!drmach: too many (%d) segments on memory "
+ "device, max (%d) segments supported, 0x%" PRIx64 " bytes "
+ "of memory skipped.",
+ j, acpidev_dr_max_segments_per_mem_device(), skipped_size);
+ }
+
+ mp->slice_base = addr_min;
+ mp->slice_top = addr_max;
+ mp->slice_size = total_size;
+
+ if (mp->dev.bp->boot_board) {
+ uint64_t endpa = _ptob64(physmax + 1);
+
+ /*
+ * we intersect phys_install to get base_pa.
+ * This only works at boot-up time.
+ */
+ memlist_read_lock();
+ ml2 = memlist_dup(phys_install);
+ memlist_read_unlock();
+
+ ml2 = memlist_del_span(ml2, 0ull, mp->slice_base);
+ if (ml2 && endpa > addr_max) {
+ ml2 = memlist_del_span(ml2, addr_max, endpa - addr_max);
+ }
+ }
+
+ /*
+ * Create a memlist for the memory board.
+ * The created memlist only contains configured memory if there's
+ * configured memory on the board, otherwise it contains all memory
+ * on the board.
+ */
+ if (ml2) {
+ uint64_t nbytes = 0;
+ struct memlist *p;
+
+ for (p = ml2; p; p = p->ml_next) {
+ nbytes += p->ml_size;
+ }
+ if (nbytes == 0) {
+ memlist_delete(ml2);
+ ml2 = NULL;
+ } else {
+ /* Node has configured memory at boot time. */
+ mp->base_pa = ml2->ml_address;
+ mp->nbytes = nbytes;
+ mp->memlist = ml2;
+ if (ml)
+ memlist_delete(ml);
+ }
+ }
+ if (ml2 == NULL) {
+ /* Not configured at boot time. */
+ mp->base_pa = UINT64_MAX;
+ mp->nbytes = 0;
+ mp->memlist = ml;
+ }
+
+ return (0);
+}
+
+sbd_error_t *
+drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
+{
+ static void drmach_mem_dispose(drmachid_t);
+ static sbd_error_t *drmach_mem_release(drmachid_t);
+ static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
+
+ DRMACH_HANDLE hdl;
+ drmach_mem_t *mp;
+ int portid;
+
+ mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
+ portid = proto->portid;
+ ASSERT(portid != -1);
+ proto->unum = portid;
+
+ bcopy(proto, &mp->dev, sizeof (mp->dev));
+ mp->dev.node = drmach_node_dup(proto->node);
+ mp->dev.cm.isa = (void *)drmach_mem_new;
+ mp->dev.cm.dispose = drmach_mem_dispose;
+ mp->dev.cm.release = drmach_mem_release;
+ mp->dev.cm.status = drmach_mem_status;
+
+ (void) snprintf(mp->dev.cm.name, sizeof (mp->dev.cm.name), "%s%d",
+ mp->dev.type, proto->unum);
+ hdl = mp->dev.node->get_dnode(mp->dev.node);
+ ASSERT(hdl != NULL);
+ if (drmach_setup_mc_info(hdl, mp) != 0) {
+ kmem_free(mp, sizeof (drmach_mem_t));
+ *idp = (drmachid_t)NULL;
+ return (drerr_new(1, EX86_MC_SETUP, NULL));
+ }
+
+ /* make sure we do not create memoryless nodes */
+ if (mp->nbytes == 0 && mp->slice_size == 0) {
+ kmem_free(mp, sizeof (drmach_mem_t));
+ *idp = (drmachid_t)NULL;
+ } else
+ *idp = (drmachid_t)mp;
+
+ return (NULL);
+}
+
+static void
+drmach_mem_dispose(drmachid_t id)
+{
+ drmach_mem_t *mp;
+
+ ASSERT(DRMACH_IS_MEM_ID(id));
+
+ mp = id;
+
+ if (mp->dev.node)
+ drmach_node_dispose(mp->dev.node);
+
+ if (mp->memlist) {
+ memlist_delete(mp->memlist);
+ mp->memlist = NULL;
+ }
+
+ kmem_free(mp, sizeof (*mp));
+}
+
+static sbd_error_t *
+drmach_mem_release(drmachid_t id)
+{
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+static sbd_error_t *
+drmach_mem_status(drmachid_t id, drmach_status_t *stat)
+{
+ uint64_t pa;
+ drmach_mem_t *dp;
+ struct memlist *ml = NULL;
+
+ ASSERT(DRMACH_IS_MEM_ID(id));
+ dp = id;
+
+ /* get starting physical address of target memory */
+ pa = dp->base_pa;
+ /* round down to slice boundary */
+ pa &= ~(dp->mem_alignment - 1);
+
+ /* stop at first span that is in slice */
+ memlist_read_lock();
+ for (ml = phys_install; ml; ml = ml->ml_next)
+ if (ml->ml_address >= pa && ml->ml_address < dp->slice_top)
+ break;
+ memlist_read_unlock();
+
+ stat->assigned = dp->dev.bp->assigned;
+ stat->powered = dp->dev.bp->powered;
+ stat->configured = (ml != NULL);
+ stat->busy = dp->dev.busy;
+ (void) strlcpy(stat->type, dp->dev.type, sizeof (stat->type));
+ stat->info[0] = '\0';
+
+ return (NULL);
+}
+
+/*
+ * Public interfaces exported to support platform independent dr driver.
+ */
+uint_t
+drmach_max_boards(void)
+{
+ return (acpidev_dr_max_boards());
+}
+
+uint_t
+drmach_max_io_units_per_board(void)
+{
+ return (acpidev_dr_max_io_units_per_board());
+}
+
+uint_t
+drmach_max_cmp_units_per_board(void)
+{
+ return (acpidev_dr_max_cmp_units_per_board());
+}
+
+uint_t
+drmach_max_mem_units_per_board(void)
+{
+ return (acpidev_dr_max_mem_units_per_board());
+}
+
+uint_t
+drmach_max_core_per_cmp(void)
+{
+ return (acpidev_dr_max_cpu_units_per_cmp());
+}
+
+sbd_error_t *
+drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts, void *argp)
+{
+ drmach_board_t *bp = (drmach_board_t *)id;
+ sbd_error_t *err = NULL;
+
+ /* allow status and ncm operations to always succeed */
+ if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
+ return (NULL);
+ }
+
+ switch (cmd) {
+ case SBD_CMD_POWERON:
+ case SBD_CMD_POWEROFF:
+ /*
+ * Disable fast reboot if CPU/MEM/IOH hotplug event happens.
+ * Note: this is a temporary solution and will be revised when
+ * fast reboot can support CPU/MEM/IOH DR operations in future.
+ *
+ * ACPI BIOS generates some static ACPI tables, such as MADT,
+ * SRAT and SLIT, to describe system hardware configuration on
+ * power-on. When CPU/MEM/IOH hotplug event happens, those
+ * static tables won't be updated and will become stale.
+ *
+ * If we reset system by fast reboot, BIOS will have no chance
+ * to regenerate those staled static tables. Fast reboot can't
+ * tolerate such inconsistency between staled ACPI tables and
+ * real hardware configuration yet.
+ *
+ * A temporary solution is introduced to disable fast reboot if
+ * CPU/MEM/IOH hotplug event happens. This solution should be
+ * revised when fast reboot is enhanced to support CPU/MEM/IOH
+ * DR operations.
+ */
+ fastreboot_disable(FBNS_HOTPLUG);
+ /*FALLTHROUGH*/
+
+ default:
+ /* Block out the CPR thread. */
+ rw_enter(&drmach_cpr_rwlock, RW_READER);
+ break;
+ }
+
+ /* check all other commands for the required option string */
+ if ((opts->size > 0) && (opts->copts != NULL)) {
+ if (strstr(opts->copts, ACPIDEV_CMD_OST_PREFIX) == NULL) {
+ err = drerr_new(1, EX86_SUPPORT, NULL);
+ }
+ } else {
+ err = drerr_new(1, EX86_SUPPORT, NULL);
+ }
+
+ if (!err && id && DRMACH_IS_BOARD_ID(id)) {
+ switch (cmd) {
+ case SBD_CMD_TEST:
+ break;
+ case SBD_CMD_CONNECT:
+ if (bp->connected)
+ err = drerr_new(0, ESBD_STATE, NULL);
+ else if (!drmach_domain.allow_dr)
+ err = drerr_new(1, EX86_SUPPORT, NULL);
+ break;
+ case SBD_CMD_DISCONNECT:
+ if (!bp->connected)
+ err = drerr_new(0, ESBD_STATE, NULL);
+ else if (!drmach_domain.allow_dr)
+ err = drerr_new(1, EX86_SUPPORT, NULL);
+ break;
+ default:
+ if (!drmach_domain.allow_dr)
+ err = drerr_new(1, EX86_SUPPORT, NULL);
+ break;
+
+ }
+ }
+
+ /*
+ * CPU/memory/IO DR operations will be supported in stages on x86.
+ * With early versions, some operations should be blocked here.
+ * This temporary hook will be removed when all CPU/memory/IO DR
+ * operations are supported on x86 systems.
+ *
+ * We only need to filter unsupported device types for
+ * SBD_CMD_CONFIGURE/SBD_CMD_UNCONFIGURE commands, all other
+ * commands are supported by all device types.
+ */
+ if (!err && (cmd == SBD_CMD_CONFIGURE || cmd == SBD_CMD_UNCONFIGURE)) {
+ int i;
+ dr_devset_t *devsetp = (dr_devset_t *)argp;
+ dr_devset_t devset = *devsetp;
+
+ switch (cmd) {
+ case SBD_CMD_CONFIGURE:
+ if (!plat_dr_support_cpu()) {
+ DEVSET_DEL(devset, SBD_COMP_CPU,
+ DEVSET_ANYUNIT);
+ } else {
+ for (i = MAX_CPU_UNITS_PER_BOARD;
+ i < DEVSET_CPU_NUMBER; i++) {
+ DEVSET_DEL(devset, SBD_COMP_CPU, i);
+ }
+ }
+
+ if (!plat_dr_support_memory()) {
+ DEVSET_DEL(devset, SBD_COMP_MEM,
+ DEVSET_ANYUNIT);
+ } else {
+ for (i = MAX_MEM_UNITS_PER_BOARD;
+ i < DEVSET_MEM_NUMBER; i++) {
+ DEVSET_DEL(devset, SBD_COMP_MEM, i);
+ }
+ }
+
+ /* No support of configuring IOH devices yet. */
+ DEVSET_DEL(devset, SBD_COMP_IO, DEVSET_ANYUNIT);
+ break;
+
+ case SBD_CMD_UNCONFIGURE:
+ if (!plat_dr_support_cpu()) {
+ DEVSET_DEL(devset, SBD_COMP_CPU,
+ DEVSET_ANYUNIT);
+ } else {
+ for (i = MAX_CPU_UNITS_PER_BOARD;
+ i < DEVSET_CPU_NUMBER; i++) {
+ DEVSET_DEL(devset, SBD_COMP_CPU, i);
+ }
+ }
+
+ /* No support of unconfiguring MEM/IOH devices yet. */
+ DEVSET_DEL(devset, SBD_COMP_MEM, DEVSET_ANYUNIT);
+ DEVSET_DEL(devset, SBD_COMP_IO, DEVSET_ANYUNIT);
+ break;
+ }
+
+ *devsetp = devset;
+ if (DEVSET_IS_NULL(devset)) {
+ err = drerr_new(1, EX86_SUPPORT, NULL);
+ }
+ }
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts, int rv)
+{
+ _NOTE(ARGUNUSED(id, opts, rv));
+
+ switch (cmd) {
+ case SBD_CMD_STATUS:
+ case SBD_CMD_GETNCM:
+ break;
+
+ default:
+ rw_exit(&drmach_cpr_rwlock);
+ break;
+ }
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_configure(drmachid_t id, int flags)
+{
+ _NOTE(ARGUNUSED(flags));
+
+ drmach_device_t *dp;
+ sbd_error_t *err = NULL;
+ dev_info_t *rdip;
+ dev_info_t *fdip = NULL;
+
+ if (!DRMACH_IS_DEVICE_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ dp = id;
+
+ rdip = dp->node->getdip(dp->node);
+ ASSERT(rdip);
+ ASSERT(e_ddi_branch_held(rdip));
+
+ /* allocate cpu id for the CPU device. */
+ if (DRMACH_IS_CPU_ID(id)) {
+ DRMACH_HANDLE hdl = drmach_node_get_dnode(dp->node);
+ ASSERT(hdl != NULL);
+ if (ACPI_FAILURE(acpidev_dr_allocate_cpuid(hdl, NULL))) {
+ err = drerr_new(1, EX86_ALLOC_CPUID, NULL);
+ }
+ return (err);
+ }
+
+ if (DRMACH_IS_MEM_ID(id)) {
+ err = drmach_mem_update_lgrp(id);
+ if (err)
+ return (err);
+ }
+
+ if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
+
+ (void) ddi_pathname(dip, path);
+ err = drerr_new(1, EX86_DRVFAIL, path);
+ kmem_free(path, MAXPATHLEN);
+
+ /* If non-NULL, fdip is returned held and must be released */
+ if (fdip != NULL)
+ ddi_release_devi(fdip);
+ }
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_unconfigure(drmachid_t id, int flags)
+{
+ _NOTE(ARGUNUSED(flags));
+
+ drmach_device_t *dp;
+ sbd_error_t *err = NULL;
+ dev_info_t *rdip, *fdip = NULL;
+
+ if (!DRMACH_IS_DEVICE_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ dp = id;
+
+ rdip = dp->node->getdip(dp->node);
+ ASSERT(rdip);
+ ASSERT(e_ddi_branch_held(rdip));
+
+ if (DRMACH_IS_CPU_ID(id)) {
+ DRMACH_HANDLE hdl = drmach_node_get_dnode(dp->node);
+ ASSERT(hdl != NULL);
+ if (ACPI_FAILURE(acpidev_dr_free_cpuid(hdl))) {
+ err = drerr_new(1, EX86_FREE_CPUID, NULL);
+ }
+ return (err);
+ }
+
+ /*
+ * Note: FORCE flag is no longer necessary under devfs
+ */
+ if (e_ddi_branch_unconfigure(rdip, &fdip, 0)) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ /*
+ * If non-NULL, fdip is returned held and must be released.
+ */
+ if (fdip != NULL) {
+ (void) ddi_pathname(fdip, path);
+ ndi_rele_devi(fdip);
+ } else {
+ (void) ddi_pathname(rdip, path);
+ }
+
+ err = drerr_new(1, EX86_DRVFAIL, path);
+
+ kmem_free(path, MAXPATHLEN);
+ }
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_get_dip(drmachid_t id, dev_info_t **dip)
+{
+ drmach_device_t *dp;
+
+ if (!DRMACH_IS_DEVICE_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ dp = id;
+
+ *dip = dp->node->getdip(dp->node);
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_release(drmachid_t id)
+{
+ drmach_common_t *cp;
+
+ if (!DRMACH_IS_DEVICE_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ cp = id;
+
+ return (cp->release(id));
+}
+
+sbd_error_t *
+drmach_status(drmachid_t id, drmach_status_t *stat)
+{
+ drmach_common_t *cp;
+ sbd_error_t *err;
+
+ rw_enter(&drmach_boards_rwlock, RW_READER);
+ if (!DRMACH_IS_ID(id)) {
+ rw_exit(&drmach_boards_rwlock);
+ return (drerr_new(0, EX86_NOTID, NULL));
+ }
+ cp = (drmach_common_t *)id;
+ err = cp->status(id, stat);
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+static sbd_error_t *
+drmach_update_acpi_status(drmachid_t id, drmach_opts_t *opts)
+{
+ char *copts;
+ drmach_board_t *bp;
+ DRMACH_HANDLE hdl;
+ int event, code;
+ boolean_t inprogress = B_FALSE;
+
+ if (DRMACH_NULL_ID(id) || !DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = (drmach_board_t *)id;
+ hdl = drmach_node_get_dnode(bp->tree);
+ ASSERT(hdl != NULL);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ /* Get the status code. */
+ copts = opts->copts;
+ if (strncmp(copts, ACPIDEV_CMD_OST_INPROGRESS,
+ strlen(ACPIDEV_CMD_OST_INPROGRESS)) == 0) {
+ inprogress = B_TRUE;
+ code = ACPI_OST_STA_INSERT_IN_PROGRESS;
+ copts += strlen(ACPIDEV_CMD_OST_INPROGRESS);
+ } else if (strncmp(copts, ACPIDEV_CMD_OST_SUCCESS,
+ strlen(ACPIDEV_CMD_OST_SUCCESS)) == 0) {
+ code = ACPI_OST_STA_SUCCESS;
+ copts += strlen(ACPIDEV_CMD_OST_SUCCESS);
+ } else if (strncmp(copts, ACPIDEV_CMD_OST_FAILURE,
+ strlen(ACPIDEV_CMD_OST_FAILURE)) == 0) {
+ code = ACPI_OST_STA_FAILURE;
+ copts += strlen(ACPIDEV_CMD_OST_FAILURE);
+ } else if (strncmp(copts, ACPIDEV_CMD_OST_NOOP,
+ strlen(ACPIDEV_CMD_OST_NOOP)) == 0) {
+ return (NULL);
+ } else {
+ return (drerr_new(0, EX86_UNKPTCMD, opts->copts));
+ }
+
+ /* Get the event type. */
+ copts = strstr(copts, ACPIDEV_EVENT_TYPE_ATTR_NAME);
+ if (copts == NULL) {
+ return (drerr_new(0, EX86_UNKPTCMD, opts->copts));
+ }
+ copts += strlen(ACPIDEV_EVENT_TYPE_ATTR_NAME);
+ if (copts[0] != '=') {
+ return (drerr_new(0, EX86_UNKPTCMD, opts->copts));
+ }
+ copts += strlen("=");
+ if (strncmp(copts, ACPIDEV_EVENT_TYPE_BUS_CHECK,
+ strlen(ACPIDEV_EVENT_TYPE_BUS_CHECK)) == 0) {
+ event = ACPI_NOTIFY_BUS_CHECK;
+ } else if (strncmp(copts, ACPIDEV_EVENT_TYPE_DEVICE_CHECK,
+ strlen(ACPIDEV_EVENT_TYPE_DEVICE_CHECK)) == 0) {
+ event = ACPI_NOTIFY_DEVICE_CHECK;
+ } else if (strncmp(copts, ACPIDEV_EVENT_TYPE_DEVICE_CHECK_LIGHT,
+ strlen(ACPIDEV_EVENT_TYPE_DEVICE_CHECK_LIGHT)) == 0) {
+ event = ACPI_NOTIFY_DEVICE_CHECK_LIGHT;
+ } else if (strncmp(copts, ACPIDEV_EVENT_TYPE_EJECT_REQUEST,
+ strlen(ACPIDEV_EVENT_TYPE_EJECT_REQUEST)) == 0) {
+ event = ACPI_NOTIFY_EJECT_REQUEST;
+ if (inprogress) {
+ code = ACPI_OST_STA_EJECT_IN_PROGRESS;
+ }
+ } else {
+ return (drerr_new(0, EX86_UNKPTCMD, opts->copts));
+ }
+
+ (void) acpidev_eval_ost(hdl, event, code, NULL, 0);
+
+ return (NULL);
+}
+
+static struct {
+ const char *name;
+ sbd_error_t *(*handler)(drmachid_t id, drmach_opts_t *opts);
+} drmach_pt_arr[] = {
+ { ACPIDEV_CMD_OST_PREFIX, &drmach_update_acpi_status },
+ /* the following line must always be last */
+ { NULL, NULL }
+};
+
+sbd_error_t *
+drmach_passthru(drmachid_t id, drmach_opts_t *opts)
+{
+ int i;
+ sbd_error_t *err;
+
+ i = 0;
+ while (drmach_pt_arr[i].name != NULL) {
+ int len = strlen(drmach_pt_arr[i].name);
+
+ if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
+ break;
+
+ i += 1;
+ }
+
+ if (drmach_pt_arr[i].name == NULL)
+ err = drerr_new(0, EX86_UNKPTCMD, opts->copts);
+ else
+ err = (*drmach_pt_arr[i].handler)(id, opts);
+
+ return (err);
+}
+
+/*
+ * Board specific interfaces to support dr driver
+ */
+static int
+drmach_get_portid(drmach_node_t *np)
+{
+ uint32_t portid;
+
+ if (np->getprop(np, ACPIDEV_DR_PROP_PORTID,
+ &portid, sizeof (portid)) == 0) {
+ /*
+ * acpidev returns portid as uint32_t, validates it.
+ */
+ if (portid > INT_MAX) {
+ return (-1);
+ } else {
+ return (portid);
+ }
+ }
+
+ return (-1);
+}
+
+/*
+ * This is a helper function to determine if a given
+ * node should be considered for a dr operation according
+ * to predefined dr type nodes and the node's name.
+ * Formal Parameter : The name of a device node.
+ * Return Value: -1, name does not map to a valid dr type.
+ * A value greater or equal to 0, name is a valid dr type.
+ */
+static int
+drmach_name2type_idx(char *name)
+{
+ int index, ntypes;
+
+ if (name == NULL)
+ return (-1);
+
+ /*
+ * Determine how many possible types are currently supported
+ * for dr.
+ */
+ ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
+
+ /* Determine if the node's name correspond to a predefined type. */
+ for (index = 0; index < ntypes; index++) {
+ if (strcmp(drmach_name2type[index].name, name) == 0)
+ /* The node is an allowed type for dr. */
+ return (index);
+ }
+
+ /*
+ * If the name of the node does not map to any of the
+ * types in the array drmach_name2type then the node is not of
+ * interest to dr.
+ */
+ return (-1);
+}
+
+static int
+drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
+{
+ drmach_node_t *node = args->node;
+ drmach_board_cb_data_t *data = args->data;
+ drmach_board_t *obj = data->obj;
+
+ int rv, portid;
+ uint32_t bnum;
+ drmachid_t id;
+ drmach_device_t *device;
+ char name[OBP_MAXDRVNAME];
+
+ portid = drmach_get_portid(node);
+ rv = node->getprop(node, ACPIDEV_DR_PROP_DEVNAME,
+ name, OBP_MAXDRVNAME);
+ if (rv)
+ return (0);
+
+ rv = node->getprop(node, ACPIDEV_DR_PROP_BOARDNUM,
+ &bnum, sizeof (bnum));
+ if (rv) {
+ return (0);
+ }
+ if (bnum > INT_MAX) {
+ return (0);
+ }
+
+ if (bnum != obj->bnum)
+ return (0);
+
+ if (drmach_name2type_idx(name) < 0) {
+ return (0);
+ }
+
+ /*
+ * Create a device data structure from this node data.
+ * The call may yield nothing if the node is not of interest
+ * to drmach.
+ */
+ data->err = drmach_device_new(node, obj, portid, &id);
+ if (data->err)
+ return (-1);
+ else if (!id) {
+ /*
+ * drmach_device_new examined the node we passed in
+ * and determined that it was one not of interest to
+ * drmach. So, it is skipped.
+ */
+ return (0);
+ }
+
+ rv = drmach_array_set(obj->devices, data->ndevs++, id);
+ if (rv) {
+ data->err = DRMACH_INTERNAL_ERROR();
+ return (-1);
+ }
+ device = id;
+
+ data->err = (*data->found)(data->a, device->type, device->unum, id);
+
+ return (data->err == NULL ? 0 : -1);
+}
+
+sbd_error_t *
+drmach_board_find_devices(drmachid_t id, void *a,
+ sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
+{
+ drmach_board_t *bp = (drmach_board_t *)id;
+ sbd_error_t *err;
+ int max_devices;
+ int rv;
+ drmach_board_cb_data_t data;
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ max_devices = MAX_CPU_UNITS_PER_BOARD;
+ max_devices += MAX_MEM_UNITS_PER_BOARD;
+ max_devices += MAX_IO_UNITS_PER_BOARD;
+
+ if (bp->devices == NULL)
+ bp->devices = drmach_array_new(0, max_devices);
+ ASSERT(bp->tree != NULL);
+
+ data.obj = bp;
+ data.ndevs = 0;
+ data.found = found;
+ data.a = a;
+ data.err = NULL;
+
+ acpidev_dr_lock_all();
+ rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
+ acpidev_dr_unlock_all();
+ if (rv == 0) {
+ err = NULL;
+ } else {
+ drmach_array_dispose(bp->devices, drmach_device_dispose);
+ bp->devices = NULL;
+
+ if (data.err)
+ err = data.err;
+ else
+ err = DRMACH_INTERNAL_ERROR();
+ }
+
+ return (err);
+}
+
+int
+drmach_board_lookup(int bnum, drmachid_t *id)
+{
+ int rv = 0;
+
+ if (bnum < 0) {
+ *id = 0;
+ return (-1);
+ }
+
+ rw_enter(&drmach_boards_rwlock, RW_READER);
+ if (drmach_array_get(drmach_boards, (uint_t)bnum, id)) {
+ *id = 0;
+ rv = -1;
+ }
+ rw_exit(&drmach_boards_rwlock);
+
+ return (rv);
+}
+
+sbd_error_t *
+drmach_board_name(int bnum, char *buf, int buflen)
+{
+ ACPI_HANDLE hdl;
+ sbd_error_t *err = NULL;
+
+ if (bnum < 0) {
+ return (drerr_new(1, EX86_BNUM, "%d", bnum));
+ }
+
+ acpidev_dr_lock_all();
+ if (ACPI_FAILURE(acpidev_dr_get_board_handle(bnum, &hdl))) {
+ DRMACH_PR("!drmach_board_name: failed to lookup ACPI handle "
+ "for board %d.", bnum);
+ err = drerr_new(1, EX86_BNUM, "%d", bnum);
+ } else if (ACPI_FAILURE(acpidev_dr_get_board_name(hdl, buf, buflen))) {
+ DRMACH_PR("!drmach_board_name: failed to generate board name "
+ "for board %d.", bnum);
+ err = drerr_new(0, EX86_INVALID_ARG,
+ ": buffer is too small for board name.");
+ }
+ acpidev_dr_unlock_all();
+
+ return (err);
+}
+
+int
+drmach_board_is_floating(drmachid_t id)
+{
+ drmach_board_t *bp;
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (0);
+
+ bp = (drmach_board_t *)id;
+
+ return ((drmach_domain.floating & (1ULL << bp->bnum)) ? 1 : 0);
+}
+
+static ACPI_STATUS
+drmach_board_check_dependent_cb(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+ void **retval)
+{
+ uint32_t bdnum;
+ drmach_board_t *bp;
+ ACPI_STATUS rc = AE_OK;
+ int cmd = (int)(intptr_t)ctx;
+
+ ASSERT(hdl != NULL);
+ ASSERT(lvl == UINT32_MAX);
+ ASSERT(retval != NULL);
+
+ /* Skip non-board devices. */
+ if (!acpidev_dr_device_is_board(hdl)) {
+ return (AE_OK);
+ } else if (ACPI_FAILURE(acpidev_dr_get_board_number(hdl, &bdnum))) {
+ DRMACH_PR("!drmach_board_check_dependent_cb: failed to get "
+ "board number for object %p.\n", hdl);
+ return (AE_ERROR);
+ } else if (bdnum > MAX_BOARDS) {
+ DRMACH_PR("!drmach_board_check_dependent_cb: board number %u "
+ "is too big, max %u.", bdnum, MAX_BOARDS);
+ return (AE_ERROR);
+ }
+
+ bp = drmach_get_board_by_bnum(bdnum);
+ switch (cmd) {
+ case SBD_CMD_CONNECT:
+ /*
+ * Its parent board should be present, assigned, powered and
+ * connected when connecting the child board.
+ */
+ if (bp == NULL) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ } else {
+ bp->powered = acpidev_dr_device_is_powered(hdl);
+ if (!bp->connected || !bp->powered || !bp->assigned) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ }
+ }
+ break;
+
+ case SBD_CMD_POWERON:
+ /*
+ * Its parent board should be present, assigned and powered when
+ * powering on the child board.
+ */
+ if (bp == NULL) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ } else {
+ bp->powered = acpidev_dr_device_is_powered(hdl);
+ if (!bp->powered || !bp->assigned) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ }
+ }
+ break;
+
+ case SBD_CMD_ASSIGN:
+ /*
+ * Its parent board should be present and assigned when
+ * assigning the child board.
+ */
+ if (bp == NULL) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ } else if (!bp->assigned) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ }
+ break;
+
+ case SBD_CMD_DISCONNECT:
+ /*
+ * The child board should be disconnected if present when
+ * disconnecting its parent board.
+ */
+ if (bp != NULL && bp->connected) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ }
+ break;
+
+ case SBD_CMD_POWEROFF:
+ /*
+ * The child board should be disconnected and powered off if
+ * present when powering off its parent board.
+ */
+ if (bp != NULL) {
+ bp->powered = acpidev_dr_device_is_powered(hdl);
+ if (bp->connected || bp->powered) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ }
+ }
+ break;
+
+ case SBD_CMD_UNASSIGN:
+ /*
+ * The child board should be disconnected, powered off and
+ * unassigned if present when unassigning its parent board.
+ */
+ if (bp != NULL) {
+ bp->powered = acpidev_dr_device_is_powered(hdl);
+ if (bp->connected || bp->powered || bp->assigned) {
+ *retval = hdl;
+ rc = AE_ERROR;
+ }
+ }
+ break;
+
+ default:
+ /* Return success for all other commands. */
+ break;
+ }
+
+ return (rc);
+}
+
+sbd_error_t *
+drmach_board_check_dependent(int cmd, drmach_board_t *bp)
+{
+ int reverse;
+ char *name;
+ sbd_error_t *err = NULL;
+ DRMACH_HANDLE hdl;
+ DRMACH_HANDLE dp = NULL;
+
+ ASSERT(bp != NULL);
+ ASSERT(DRMACH_IS_BOARD_ID(bp));
+ ASSERT(RW_LOCK_HELD(&drmach_boards_rwlock));
+
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ switch (cmd) {
+ case SBD_CMD_ASSIGN:
+ case SBD_CMD_POWERON:
+ case SBD_CMD_CONNECT:
+ if (ACPI_SUCCESS(acpidev_dr_device_walk_ejd(hdl,
+ &drmach_board_check_dependent_cb,
+ (void *)(intptr_t)cmd, &dp))) {
+ return (NULL);
+ }
+ reverse = 0;
+ break;
+
+ case SBD_CMD_UNASSIGN:
+ case SBD_CMD_POWEROFF:
+ case SBD_CMD_DISCONNECT:
+ if (ACPI_SUCCESS(acpidev_dr_device_walk_edl(hdl,
+ &drmach_board_check_dependent_cb,
+ (void *)(intptr_t)cmd, &dp))) {
+ return (NULL);
+ }
+ reverse = 1;
+ break;
+
+ default:
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ }
+
+ if (dp == NULL) {
+ return (drerr_new(1, EX86_WALK_DEPENDENCY, "%s", bp->cm.name));
+ }
+ name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+ if (ACPI_FAILURE(acpidev_dr_get_board_name(dp, name, MAXPATHLEN))) {
+ err = drerr_new(1, EX86_WALK_DEPENDENCY, "%s", bp->cm.name);
+ } else if (reverse == 0) {
+ err = drerr_new(1, EX86_WALK_DEPENDENCY,
+ "%s, depends on board %s", bp->cm.name, name);
+ } else {
+ err = drerr_new(1, EX86_WALK_DEPENDENCY,
+ "board %s depends on %s", name, bp->cm.name);
+ }
+ kmem_free(name, MAXPATHLEN);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_assign(int bnum, drmachid_t *id)
+{
+ sbd_error_t *err = NULL;
+
+ if (bnum < 0) {
+ return (drerr_new(1, EX86_BNUM, "%d", bnum));
+ }
+
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+
+ if (drmach_array_get(drmach_boards, bnum, id) == -1) {
+ err = drerr_new(1, EX86_BNUM, "%d", bnum);
+ } else {
+ drmach_board_t *bp;
+
+ /*
+ * Board has already been created, downgrade to reader.
+ */
+ if (*id)
+ rw_downgrade(&drmach_boards_rwlock);
+
+ bp = *id;
+ if (!(*id))
+ bp = *id =
+ (drmachid_t)drmach_board_new(bnum, 0);
+
+ if (bp == NULL) {
+ DRMACH_PR("!drmach_board_assign: failed to create "
+ "object for board %d.", bnum);
+ err = drerr_new(1, EX86_BNUM, "%d", bnum);
+ } else {
+ err = drmach_board_check_dependent(SBD_CMD_ASSIGN, bp);
+ if (err == NULL)
+ bp->assigned = 1;
+ }
+ }
+
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_unassign(drmachid_t id)
+{
+ drmach_board_t *bp;
+ sbd_error_t *err;
+ drmach_status_t stat;
+
+ if (DRMACH_NULL_ID(id))
+ return (NULL);
+
+ if (!DRMACH_IS_BOARD_ID(id)) {
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ }
+ bp = id;
+
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+
+ err = drmach_board_status(id, &stat);
+ if (err) {
+ rw_exit(&drmach_boards_rwlock);
+ return (err);
+ }
+
+ if (stat.configured || stat.busy) {
+ err = drerr_new(0, EX86_CONFIGBUSY, bp->cm.name);
+ } else if (bp->connected) {
+ err = drerr_new(0, EX86_CONNECTBUSY, bp->cm.name);
+ } else if (stat.powered) {
+ err = drerr_new(0, EX86_POWERBUSY, bp->cm.name);
+ } else {
+ err = drmach_board_check_dependent(SBD_CMD_UNASSIGN, bp);
+ if (err == NULL) {
+ if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
+ err = DRMACH_INTERNAL_ERROR();
+ else
+ drmach_board_dispose(bp);
+ }
+ }
+
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_poweron(drmachid_t id)
+{
+ drmach_board_t *bp;
+ sbd_error_t *err = NULL;
+ DRMACH_HANDLE hdl;
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = id;
+
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ bp->powered = drmach_board_check_power(bp);
+ if (bp->powered) {
+ return (NULL);
+ }
+
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+ err = drmach_board_check_dependent(SBD_CMD_POWERON, bp);
+ if (err == NULL) {
+ acpidev_dr_lock_all();
+ if (ACPI_FAILURE(acpidev_dr_device_poweron(hdl)))
+ err = drerr_new(0, EX86_POWERON, NULL);
+ acpidev_dr_unlock_all();
+
+ /* Check whether the board is powered on. */
+ bp->powered = drmach_board_check_power(bp);
+ if (err == NULL && bp->powered == 0)
+ err = drerr_new(0, EX86_POWERON, NULL);
+ }
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_poweroff(drmachid_t id)
+{
+ sbd_error_t *err = NULL;
+ drmach_board_t *bp;
+ drmach_status_t stat;
+ DRMACH_HANDLE hdl;
+
+ if (DRMACH_NULL_ID(id))
+ return (NULL);
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = id;
+
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ /* Check whether the board is busy, configured or connected. */
+ err = drmach_board_status(id, &stat);
+ if (err != NULL)
+ return (err);
+ if (stat.configured || stat.busy) {
+ return (drerr_new(0, EX86_CONFIGBUSY, bp->cm.name));
+ } else if (bp->connected) {
+ return (drerr_new(0, EX86_CONNECTBUSY, bp->cm.name));
+ }
+
+ bp->powered = drmach_board_check_power(bp);
+ if (bp->powered == 0) {
+ return (NULL);
+ }
+
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+ err = drmach_board_check_dependent(SBD_CMD_POWEROFF, bp);
+ if (err == NULL) {
+ acpidev_dr_lock_all();
+ if (ACPI_FAILURE(acpidev_dr_device_poweroff(hdl)))
+ err = drerr_new(0, EX86_POWEROFF, NULL);
+ acpidev_dr_unlock_all();
+
+ bp->powered = drmach_board_check_power(bp);
+ if (err == NULL && bp->powered != 0)
+ err = drerr_new(0, EX86_POWEROFF, NULL);
+ }
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
+{
+ _NOTE(ARGUNUSED(opts, force));
+
+ drmach_board_t *bp;
+ DRMACH_HANDLE hdl;
+
+ if (DRMACH_NULL_ID(id))
+ return (NULL);
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = id;
+
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ if (ACPI_FAILURE(acpidev_dr_device_check_status(hdl)))
+ return (drerr_new(0, EX86_IN_FAILURE, NULL));
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
+{
+ _NOTE(ARGUNUSED(opts));
+
+ sbd_error_t *err = NULL;
+ drmach_board_t *bp = (drmach_board_t *)id;
+ DRMACH_HANDLE hdl;
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = (drmach_board_t *)id;
+
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+ err = drmach_board_check_dependent(SBD_CMD_CONNECT, bp);
+ if (err == NULL) {
+ acpidev_dr_lock_all();
+ if (ACPI_FAILURE(acpidev_dr_device_insert(hdl))) {
+ (void) acpidev_dr_device_remove(hdl);
+ err = drerr_new(1, EX86_PROBE, NULL);
+ } else {
+ bp->connected = 1;
+ }
+ acpidev_dr_unlock_all();
+ }
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
+{
+ _NOTE(ARGUNUSED(opts));
+
+ DRMACH_HANDLE hdl;
+ drmach_board_t *bp;
+ drmach_status_t stat;
+ sbd_error_t *err = NULL;
+
+ if (DRMACH_NULL_ID(id))
+ return (NULL);
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = (drmach_board_t *)id;
+
+ hdl = drmach_node_get_dnode(bp->tree);
+ if (hdl == NULL)
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ /* Check whether the board is busy or configured. */
+ err = drmach_board_status(id, &stat);
+ if (err != NULL)
+ return (err);
+ if (stat.configured || stat.busy)
+ return (drerr_new(0, EX86_CONFIGBUSY, bp->cm.name));
+
+ rw_enter(&drmach_boards_rwlock, RW_WRITER);
+ err = drmach_board_check_dependent(SBD_CMD_DISCONNECT, bp);
+ if (err == NULL) {
+ acpidev_dr_lock_all();
+ if (ACPI_SUCCESS(acpidev_dr_device_remove(hdl))) {
+ bp->connected = 0;
+ } else {
+ err = drerr_new(1, EX86_DEPROBE, bp->cm.name);
+ }
+ acpidev_dr_unlock_all();
+ }
+ rw_exit(&drmach_boards_rwlock);
+
+ return (err);
+}
+
+sbd_error_t *
+drmach_board_deprobe(drmachid_t id)
+{
+ drmach_board_t *bp;
+
+ if (!DRMACH_IS_BOARD_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ bp = id;
+
+ cmn_err(CE_CONT, "DR: detach board %d\n", bp->bnum);
+
+ if (bp->devices) {
+ drmach_array_dispose(bp->devices, drmach_device_dispose);
+ bp->devices = NULL;
+ }
+
+ bp->boot_board = 0;
+
+ return (NULL);
+}
+
+/*
+ * CPU specific interfaces to support dr driver
+ */
+sbd_error_t *
+drmach_cpu_disconnect(drmachid_t id)
+{
+ if (!DRMACH_IS_CPU_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
+{
+ drmach_cpu_t *cpu;
+
+ if (!DRMACH_IS_CPU_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ cpu = (drmach_cpu_t *)id;
+
+ if (cpu->cpuid == -1) {
+ if (ACPI_SUCCESS(acpica_get_cpu_id_by_object(
+ drmach_node_get_dnode(cpu->dev.node), cpuid))) {
+ cpu->cpuid = *cpuid;
+ } else {
+ *cpuid = -1;
+ }
+ } else {
+ *cpuid = cpu->cpuid;
+ }
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_cpu_get_impl(drmachid_t id, int *ip)
+{
+ if (!DRMACH_IS_CPU_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ /* Assume all CPUs in system are homogeneous. */
+ *ip = X86_CPU_IMPL_UNKNOWN;
+
+ kpreempt_disable();
+ if (cpuid_getvendor(CPU) == X86_VENDOR_Intel) {
+ /* NHM-EX CPU */
+ if (cpuid_getfamily(CPU) == 0x6 &&
+ cpuid_getmodel(CPU) == 0x2e) {
+ *ip = X86_CPU_IMPL_NEHALEM_EX;
+ }
+ }
+ kpreempt_enable();
+
+ return (NULL);
+}
+
+/*
+ * Memory specific interfaces to support dr driver
+ */
+
+/*
+ * When drmach_mem_new() is called, the mp->base_pa field is set to the base
+ * address of configured memory if there's configured memory on the board,
+ * otherwise set to UINT64_MAX. For hot-added memory board, there's no
+ * configured memory when drmach_mem_new() is called, so mp->base_pa is set
+ * to UINT64_MAX and we need to set a correct value for it after memory
+ * hot-add operations.
+ * A hot-added memory board may contain multiple memory segments,
+ * drmach_mem_add_span() will be called once for each segment, so we can't
+ * rely on the basepa argument. And it's possible that only part of a memory
+ * segment is added into OS, so need to intersect with phys_installed list
+ * to get the real base address of configured memory on the board.
+ */
+sbd_error_t *
+drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
+{
+ _NOTE(ARGUNUSED(basepa));
+
+ uint64_t nbytes = 0;
+ uint64_t endpa;
+ drmach_mem_t *mp;
+ struct memlist *ml2;
+ struct memlist *p;
+
+ ASSERT(size != 0);
+
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ mp = (drmach_mem_t *)id;
+
+ /* Compute basepa and size of installed memory. */
+ endpa = _ptob64(physmax + 1);
+ memlist_read_lock();
+ ml2 = memlist_dup(phys_install);
+ memlist_read_unlock();
+ ml2 = memlist_del_span(ml2, 0ull, mp->slice_base);
+ if (ml2 && endpa > mp->slice_top) {
+ ml2 = memlist_del_span(ml2, mp->slice_top,
+ endpa - mp->slice_top);
+ }
+
+ ASSERT(ml2);
+ if (ml2) {
+ for (p = ml2; p; p = p->ml_next) {
+ nbytes += p->ml_size;
+ if (mp->base_pa > p->ml_address)
+ mp->base_pa = p->ml_address;
+ }
+ ASSERT(nbytes > 0);
+ mp->nbytes += nbytes;
+ memlist_delete(ml2);
+ }
+
+ return (NULL);
+}
+
+static sbd_error_t *
+drmach_mem_update_lgrp(drmachid_t id)
+{
+ ACPI_STATUS rc;
+ DRMACH_HANDLE hdl;
+ void *hdlp;
+ drmach_mem_t *mp;
+ update_membounds_t umb;
+
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ mp = (drmach_mem_t *)id;
+ /* No need to update lgrp if memory is already installed. */
+ if (mp->nbytes != 0)
+ return (NULL);
+ /* No need to update lgrp if lgrp is disabled. */
+ if (max_mem_nodes == 1)
+ return (NULL);
+
+ /* Add memory to lgroup */
+ hdl = mp->dev.node->get_dnode(mp->dev.node);
+ rc = acpidev_dr_device_get_memory_index(hdl, &umb.u_device_id);
+ ASSERT(ACPI_SUCCESS(rc));
+ if (ACPI_FAILURE(rc)) {
+ cmn_err(CE_WARN, "drmach: failed to get device id of memory, "
+ "can't update lgrp information.");
+ return (drerr_new(0, EX86_INTERNAL, NULL));
+ }
+ rc = acpidev_dr_get_mem_numa_info(hdl, mp->memlist, &hdlp,
+ &umb.u_domain, &umb.u_sli_cnt, &umb.u_sli_ptr);
+ ASSERT(ACPI_SUCCESS(rc));
+ if (ACPI_FAILURE(rc)) {
+ cmn_err(CE_WARN, "drmach: failed to get lgrp info of memory, "
+ "can't update lgrp information.");
+ return (drerr_new(0, EX86_INTERNAL, NULL));
+ }
+ umb.u_base = (uint64_t)mp->slice_base;
+ umb.u_length = (uint64_t)(mp->slice_top - mp->slice_base);
+ lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
+ acpidev_dr_free_mem_numa_info(hdlp);
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_mem_enable(drmachid_t id)
+{
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ else
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_mem_get_info(drmachid_t id, drmach_mem_info_t *mem)
+{
+ drmach_mem_t *mp;
+
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ mp = (drmach_mem_t *)id;
+
+ /*
+ * This is only used by dr to round up/down the memory
+ * for copying.
+ */
+ mem->mi_alignment_mask = mp->mem_alignment - 1;
+ mem->mi_basepa = mp->base_pa;
+ mem->mi_size = mp->nbytes;
+ mem->mi_slice_base = mp->slice_base;
+ mem->mi_slice_top = mp->slice_top;
+ mem->mi_slice_size = mp->slice_size;
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_mem_get_slice_info(drmachid_t id,
+ uint64_t *bp, uint64_t *ep, uint64_t *sp)
+{
+ drmach_mem_t *mp;
+
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ mp = (drmach_mem_t *)id;
+
+ if (bp)
+ *bp = mp->slice_base;
+ if (ep)
+ *ep = mp->slice_top;
+ if (sp)
+ *sp = mp->slice_size;
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
+{
+#ifdef DEBUG
+ int rv;
+#endif
+ drmach_mem_t *mem;
+ struct memlist *mlist;
+
+ if (!DRMACH_IS_MEM_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ mem = (drmach_mem_t *)id;
+
+ mlist = memlist_dup(mem->memlist);
+ *ml = mlist;
+
+#ifdef DEBUG
+ /*
+ * Make sure the incoming memlist doesn't already
+ * intersect with what's present in the system (phys_install).
+ */
+ memlist_read_lock();
+ rv = memlist_intersect(phys_install, mlist);
+ memlist_read_unlock();
+ if (rv) {
+ DRMACH_PR("Derived memlist intersects with phys_install\n");
+ memlist_dump(mlist);
+
+ DRMACH_PR("phys_install memlist:\n");
+ memlist_dump(phys_install);
+
+ memlist_delete(mlist);
+ return (DRMACH_INTERNAL_ERROR());
+ }
+
+ DRMACH_PR("Derived memlist:");
+ memlist_dump(mlist);
+#endif
+
+ return (NULL);
+}
+
+processorid_t
+drmach_mem_cpu_affinity(drmachid_t id)
+{
+ _NOTE(ARGUNUSED(id));
+
+ return (CPU_CURRENT);
+}
+
+int
+drmach_copy_rename_need_suspend(drmachid_t id)
+{
+ _NOTE(ARGUNUSED(id));
+
+ return (0);
+}
+
+/*
+ * IO specific interfaces to support dr driver
+ */
+sbd_error_t *
+drmach_io_pre_release(drmachid_t id)
+{
+ if (!DRMACH_IS_IO_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_io_unrelease(drmachid_t id)
+{
+ if (!DRMACH_IS_IO_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_io_post_release(drmachid_t id)
+{
+ _NOTE(ARGUNUSED(id));
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_io_post_attach(drmachid_t id)
+{
+ if (!DRMACH_IS_IO_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+
+ return (NULL);
+}
+
+sbd_error_t *
+drmach_io_is_attached(drmachid_t id, int *yes)
+{
+ drmach_device_t *dp;
+ dev_info_t *dip;
+ int state;
+
+ if (!DRMACH_IS_IO_ID(id))
+ return (drerr_new(0, EX86_INAPPROP, NULL));
+ dp = id;
+
+ dip = dp->node->getdip(dp->node);
+ if (dip == NULL) {
+ *yes = 0;
+ return (NULL);
+ }
+
+ state = ddi_get_devstate(dip);
+ *yes = ((i_ddi_node_state(dip) >= DS_ATTACHED) ||
+ (state == DDI_DEVSTATE_UP));
+
+ return (NULL);
+}
+
+/*
+ * Miscellaneous interfaces to support dr driver
+ */
+int
+drmach_verify_sr(dev_info_t *dip, int sflag)
+{
+ _NOTE(ARGUNUSED(dip, sflag));
+
+ return (0);
+}
+
+void
+drmach_suspend_last(void)
+{
+}
+
+void
+drmach_resume_first(void)
+{
+}
+
+/*
+ * Log a DR sysevent.
+ * Return value: 0 success, non-zero failure.
+ */
+int
+drmach_log_sysevent(int board, char *hint, int flag, int verbose)
+{
+ sysevent_t *ev = NULL;
+ sysevent_id_t eid;
+ int rv, km_flag;
+ sysevent_value_t evnt_val;
+ sysevent_attr_list_t *evnt_attr_list = NULL;
+ sbd_error_t *err;
+ char attach_pnt[MAXNAMELEN];
+
+ km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
+ attach_pnt[0] = '\0';
+ err = drmach_board_name(board, attach_pnt, MAXNAMELEN);
+ if (err != NULL) {
+ sbd_err_clear(&err);
+ rv = -1;
+ goto logexit;
+ }
+ if (verbose) {
+ DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
+ attach_pnt, hint, flag, verbose);
+ }
+
+ if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
+ SUNW_KERN_PUB"dr", km_flag)) == NULL) {
+ rv = -2;
+ goto logexit;
+ }
+ evnt_val.value_type = SE_DATA_TYPE_STRING;
+ evnt_val.value.sv_string = attach_pnt;
+ if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID, &evnt_val,
+ km_flag)) != 0)
+ goto logexit;
+
+ evnt_val.value_type = SE_DATA_TYPE_STRING;
+ evnt_val.value.sv_string = hint;
+ if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT, &evnt_val,
+ km_flag)) != 0) {
+ sysevent_free_attr(evnt_attr_list);
+ goto logexit;
+ }
+
+ (void) sysevent_attach_attributes(ev, evnt_attr_list);
+
+ /*
+ * Log the event but do not sleep waiting for its
+ * delivery. This provides insulation from syseventd.
+ */
+ rv = log_sysevent(ev, SE_NOSLEEP, &eid);
+
+logexit:
+ if (ev)
+ sysevent_free(ev);
+ if ((rv != 0) && verbose)
+ cmn_err(CE_WARN, "!drmach_log_sysevent failed (rv %d) for %s "
+ " %s\n", rv, attach_pnt, hint);
+
+ return (rv);
+}
diff --git a/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.h b/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.h
new file mode 100644
index 0000000000..abbc264ef0
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.h
@@ -0,0 +1,199 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_DRMACH_ACPI_H
+#define _SYS_DRMACH_ACPI_H
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/param.h>
+#include <sys/sunddi.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/drmach.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+/* Use ACPI handle as DRMACH handle on x86 systems. */
+#define DRMACH_HANDLE ACPI_HANDLE
+
+/* Macros to deal with object type. */
+#define DRMACH_OBJ(id) ((drmach_common_t *)id)
+
+#define DRMACH_NULL_ID(id) ((id) == 0)
+
+#define DRMACH_IS_BOARD_ID(id) \
+ ((id != 0) && (DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
+
+#define DRMACH_IS_CPU_ID(id) \
+ ((id != 0) && (DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
+
+#define DRMACH_IS_MEM_ID(id) \
+ ((id != 0) && (DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
+
+#define DRMACH_IS_IO_ID(id) \
+ ((id != 0) && (DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
+
+#define DRMACH_IS_DEVICE_ID(id) \
+ ((id != 0) && \
+ (DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new || \
+ DRMACH_OBJ(id)->isa == (void *)drmach_mem_new || \
+ DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
+
+#define DRMACH_IS_ID(id) \
+ ((id != 0) && \
+ (DRMACH_OBJ(id)->isa == (void *)drmach_board_new || \
+ DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new || \
+ DRMACH_OBJ(id)->isa == (void *)drmach_mem_new || \
+ DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
+
+#define DRMACH_INTERNAL_ERROR() \
+ drerr_new(1, EX86_INTERNAL, drmach_ie_fmt, __LINE__)
+
+#ifdef DEBUG
+extern int drmach_debug;
+
+#define DRMACH_PR if (drmach_debug) printf
+#else
+#define DRMACH_PR _NOTE(CONSTANTCONDITION) if (0) printf
+#endif /* DEBUG */
+
+typedef struct {
+ struct drmach_node *node;
+ void *data;
+ void *func;
+} drmach_node_walk_args_t;
+
+typedef struct drmach_node {
+ void *here;
+
+ DRMACH_HANDLE (*get_dnode)(struct drmach_node *node);
+ dev_info_t *(*getdip)(struct drmach_node *node);
+ int (*getproplen)(struct drmach_node *node, char *name,
+ int *len);
+ int (*getprop)(struct drmach_node *node, char *name,
+ void *buf, int len);
+ int (*walk)(struct drmach_node *node, void *data,
+ int (*cb)(drmach_node_walk_args_t *args));
+} drmach_node_t;
+
+typedef struct {
+ int min_index;
+ int max_index;
+ int arr_sz;
+ drmachid_t *arr;
+} drmach_array_t;
+
+typedef struct {
+ void *isa;
+
+ void (*dispose)(drmachid_t);
+ sbd_error_t *(*release)(drmachid_t);
+ sbd_error_t *(*status)(drmachid_t, drmach_status_t *);
+
+ char name[MAXNAMELEN];
+} drmach_common_t;
+
+typedef struct {
+ drmach_common_t cm;
+ uint_t bnum;
+ int assigned;
+ int powered;
+ int connected;
+ int cond;
+ drmach_node_t *tree;
+ drmach_array_t *devices;
+ int boot_board; /* if board exists on bootup */
+} drmach_board_t;
+
+typedef struct {
+ drmach_common_t cm;
+ drmach_board_t *bp;
+ int unum;
+ uint_t portid;
+ int busy;
+ int powered;
+ const char *type;
+ drmach_node_t *node;
+} drmach_device_t;
+
+typedef struct drmach_cpu {
+ drmach_device_t dev;
+ processorid_t cpuid;
+ uint32_t apicid;
+} drmach_cpu_t;
+
+typedef struct drmach_mem {
+ drmach_device_t dev;
+ uint64_t mem_alignment;
+ uint64_t slice_base;
+ uint64_t slice_top;
+ uint64_t slice_size;
+ uint64_t base_pa; /* lowest installed memory base */
+ uint64_t nbytes; /* size of installed memory */
+ struct memlist *memlist;
+} drmach_mem_t;
+
+typedef struct drmach_io {
+ drmach_device_t dev;
+} drmach_io_t;
+
+typedef struct drmach_domain_info {
+ uint64_t floating;
+ int allow_dr;
+} drmach_domain_info_t;
+
+typedef struct {
+ drmach_board_t *obj;
+ int ndevs;
+ void *a;
+ sbd_error_t *(*found)(void *a, const char *, int, drmachid_t);
+ sbd_error_t *err;
+} drmach_board_cb_data_t;
+
+extern drmach_domain_info_t drmach_domain;
+
+extern drmach_board_t *drmach_board_new(uint_t, int);
+extern sbd_error_t *drmach_device_new(drmach_node_t *,
+ drmach_board_t *, int, drmachid_t *);
+extern sbd_error_t *drmach_cpu_new(drmach_device_t *, drmachid_t *);
+extern sbd_error_t *drmach_mem_new(drmach_device_t *, drmachid_t *);
+extern sbd_error_t *drmach_io_new(drmach_device_t *, drmachid_t *);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DRMACH_ACPI_H */
diff --git a/usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c b/usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c
index 3a237509ba..cece3f77f2 100644
--- a/usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c
+++ b/usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c
@@ -919,7 +919,7 @@ acpi_drv_obj_copy(ACPI_OBJECT *op, char *bp, struct obj_desc *dp)
ep->Type == dp->type) {
ACPI_DRV_DBG(CE_NOTE, NULL, "\t%s: \"%s\"",
dp->name, ep->String.Pointer);
- (void) strncpy(fp, ep->String.Pointer, dp->size);
+ (void) strlcpy(fp, ep->String.Pointer, dp->size);
} else if (dp->type == ACPI_TYPE_STRING &&
ep->Type == ACPI_TYPE_BUFFER) {
#ifdef DEBUG
@@ -1559,9 +1559,9 @@ acpi_drv_obj_init(struct acpi_drv_dev *p)
if ((info->Valid & ACPI_VALID_HID) == 0) {
ACPI_DRV_DBG(CE_WARN, NULL,
"AcpiGetObjectInfo(): _HID not available");
- (void) strncpy(p->hid, "\0", ID_LEN);
+ p->hid[0] = 0;
} else {
- (void) strncpy(p->hid, info->HardwareId.String, ID_LEN);
+ (void) strlcpy(p->hid, info->HardwareId.String, ID_LEN);
}
/*
@@ -1572,9 +1572,9 @@ acpi_drv_obj_init(struct acpi_drv_dev *p)
ACPI_DRV_DBG(CE_WARN, NULL,
"AcpiGetObjectInfo(): _UID not available");
/* Use 0 as the default _UID */
- (void) strncpy(p->uid, "\0", ID_LEN);
+ p->uid[0] = 0;
} else {
- (void) strncpy(p->uid, info->UniqueId.String, ID_LEN);
+ (void) strlcpy(p->uid, info->UniqueId.String, ID_LEN);
}
AcpiOsFree(info);
@@ -2064,9 +2064,9 @@ acpi_drv_dev_init(struct acpi_drv_dev *p)
if ((info->Valid & ACPI_VALID_HID) == 0) {
ACPI_DRV_DBG(CE_WARN, NULL,
"!AcpiGetObjectInfo(): _HID not available");
- (void) strncpy(p->hid, "\0", ID_LEN);
+ p->hid[0] = 0;
} else {
- (void) strncpy(p->hid, info->HardwareId.String, ID_LEN);
+ (void) strlcpy(p->hid, info->HardwareId.String, ID_LEN);
}
/*
@@ -2077,9 +2077,9 @@ acpi_drv_dev_init(struct acpi_drv_dev *p)
ACPI_DRV_DBG(CE_WARN, NULL,
"!AcpiGetObjectInfo(): _UID not available");
/* Use 0 as the default _UID */
- (void) strncpy(p->uid, "\0", ID_LEN);
+ p->uid[0] = 0;
} else {
- (void) strncpy(p->uid, info->UniqueId.String, ID_LEN);
+ (void) strlcpy(p->uid, info->UniqueId.String, ID_LEN);
}
if (info->Valid & ACPI_VALID_ADR) {
diff --git a/usr/src/uts/i86pc/io/cbe.c b/usr/src/uts/i86pc/io/cbe.c
index d5e08c63bc..87ec248717 100644
--- a/usr/src/uts/i86pc/io/cbe.c
+++ b/usr/src/uts/i86pc/io/cbe.c
@@ -40,6 +40,7 @@
#include <sys/ddi_impldefs.h>
#include <sys/ddi_intr.h>
#include <sys/avintr.h>
+#include <sys/note.h>
static int cbe_vector;
static int cbe_ticks = 0;
@@ -207,6 +208,13 @@ cbe_configure(cpu_t *cpu)
return (cpu);
}
+void
+cbe_unconfigure(void *arg)
+{
+ _NOTE(ARGUNUSED(arg));
+ ASSERT(!CPU_IN_SET(cbe_enabled, ((cpu_t *)arg)->cpu_id));
+}
+
#ifndef __xpv
/*
* declarations needed for time adjustment
@@ -318,7 +326,7 @@ cbe_init(void)
{
cyc_backend_t cbe = {
cbe_configure, /* cyb_configure */
- NULL, /* cyb_unconfigure */
+ cbe_unconfigure, /* cyb_unconfigure */
cbe_enable, /* cyb_enable */
cbe_disable, /* cyb_disable */
cbe_reprogram, /* cyb_reprogram */
diff --git a/usr/src/uts/i86pc/io/dr/dr.c b/usr/src/uts/i86pc/io/dr/dr.c
new file mode 100644
index 0000000000..90f3a80adf
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr.c
@@ -0,0 +1,3329 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
+ * PIM-DR layer of DR driver. Provides interface between user
+ * level applications and the PSM-DR layer.
+ */
+
+#include <sys/note.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/cred.h>
+#include <sys/dditypes.h>
+#include <sys/devops.h>
+#include <sys/modctl.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/stat.h>
+#include <sys/kmem.h>
+#include <sys/processor.h>
+#include <sys/cpuvar.h>
+#include <sys/mem_config.h>
+
+#include <sys/autoconf.h>
+#include <sys/cmn_err.h>
+
+#include <sys/ddi_impldefs.h>
+#include <sys/promif.h>
+#include <sys/machsystm.h>
+
+#include <sys/dr.h>
+#include <sys/drmach.h>
+#include <sys/dr_util.h>
+
+extern int nulldev();
+extern int nodev();
+extern struct memlist *phys_install;
+
+#ifdef DEBUG
+uint_t dr_debug = 0; /* dr.h for bit values */
+#endif /* DEBUG */
+
+/*
+ * NOTE: state_str, nt_str and SBD_CMD_STR are only used in a debug
+ * kernel. They are, however, referenced during both debug and non-debug
+ * compiles.
+ */
+
+static char *state_str[] = {
+ "EMPTY", "OCCUPIED", "CONNECTED", "UNCONFIGURED",
+ "PARTIAL", "CONFIGURED", "RELEASE", "UNREFERENCED",
+ "FATAL"
+};
+
+#define SBD_CMD_STR(c) \
+ (((c) == SBD_CMD_ASSIGN) ? "ASSIGN" : \
+ ((c) == SBD_CMD_UNASSIGN) ? "UNASSIGN" : \
+ ((c) == SBD_CMD_POWERON) ? "POWERON" : \
+ ((c) == SBD_CMD_POWEROFF) ? "POWEROFF" : \
+ ((c) == SBD_CMD_TEST) ? "TEST" : \
+ ((c) == SBD_CMD_CONNECT) ? "CONNECT" : \
+ ((c) == SBD_CMD_DISCONNECT) ? "DISCONNECT" : \
+ ((c) == SBD_CMD_CONFIGURE) ? "CONFIGURE" : \
+ ((c) == SBD_CMD_UNCONFIGURE) ? "UNCONFIGURE" : \
+ ((c) == SBD_CMD_GETNCM) ? "GETNCM" : \
+ ((c) == SBD_CMD_PASSTHRU) ? "PASSTHRU" : \
+ ((c) == SBD_CMD_STATUS) ? "STATUS" : "unknown")
+
+#define DR_GET_BOARD_DEVUNIT(sb, ut, un) (&((sb)->b_dev[DEVSET_NIX(ut)][un]))
+
+#define DR_MAKE_MINOR(i, b) (((i) << 16) | (b))
+#define DR_MINOR2INST(m) (((m) >> 16) & 0xffff)
+#define DR_MINOR2BNUM(m) ((m) & 0xffff)
+
+/* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */
+static char *dr_ie_fmt = "dr.c %d";
+
+/* struct for drmach device name to sbd_comp_type_t mapping */
+typedef struct {
+ char *s_devtype;
+ sbd_comp_type_t s_nodetype;
+} dr_devname_t;
+
+/* struct to map starfire device attributes - name:sbd_comp_type_t */
+static dr_devname_t dr_devattr[] = {
+ { DRMACH_DEVTYPE_MEM, SBD_COMP_MEM },
+ { DRMACH_DEVTYPE_CPU, SBD_COMP_CPU },
+ { DRMACH_DEVTYPE_PCI, SBD_COMP_IO },
+#if defined(DRMACH_DEVTYPE_SBUS)
+ { DRMACH_DEVTYPE_SBUS, SBD_COMP_IO },
+#endif
+#if defined(DRMACH_DEVTYPE_WCI)
+ { DRMACH_DEVTYPE_WCI, SBD_COMP_IO },
+#endif
+ /* last s_devtype must be NULL, s_nodetype must be SBD_COMP_UNKNOWN */
+ { NULL, SBD_COMP_UNKNOWN }
+};
+
+/*
+ * Per instance soft-state structure.
+ */
+typedef struct dr_softstate {
+ dev_info_t *dip;
+ dr_board_t *boards;
+ kmutex_t i_lock;
+ int dr_initialized;
+} dr_softstate_t;
+
+/*
+ * dr Global data elements
+ */
+struct dr_global {
+ dr_softstate_t *softsp; /* pointer to initialize soft state */
+ kmutex_t lock;
+} dr_g;
+
+dr_unsafe_devs_t dr_unsafe_devs;
+
+/*
+ * Table of known passthru commands.
+ */
+struct {
+ char *pt_name;
+ int (*pt_func)(dr_handle_t *);
+} pt_arr[] = {
+ "quiesce", dr_pt_test_suspend,
+};
+
+int dr_modunload_okay = 0; /* set to non-zero to allow unload */
+
+/*
+ * State transition table. States valid transitions for "board" state.
+ * Recall that non-zero return value terminates operation, however
+ * the herrno value is what really indicates an error , if any.
+ */
+static int
+_cmd2index(int c)
+{
+ /*
+ * Translate DR CMD to index into dr_state_transition.
+ */
+ switch (c) {
+ case SBD_CMD_CONNECT: return (0);
+ case SBD_CMD_DISCONNECT: return (1);
+ case SBD_CMD_CONFIGURE: return (2);
+ case SBD_CMD_UNCONFIGURE: return (3);
+ case SBD_CMD_ASSIGN: return (4);
+ case SBD_CMD_UNASSIGN: return (5);
+ case SBD_CMD_POWERON: return (6);
+ case SBD_CMD_POWEROFF: return (7);
+ case SBD_CMD_TEST: return (8);
+ default: return (-1);
+ }
+}
+
+#define CMD2INDEX(c) _cmd2index(c)
+
+static struct dr_state_trans {
+ int x_cmd;
+ struct {
+ int x_rv; /* return value of pre_op */
+ int x_err; /* error, if any */
+ } x_op[DR_STATE_MAX];
+} dr_state_transition[] = {
+ { SBD_CMD_CONNECT,
+ {
+ { 0, 0 }, /* empty */
+ { 0, 0 }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_DISCONNECT,
+ {
+ { -1, ESBD_STATE }, /* empty */
+ { 0, 0 }, /* occupied */
+ { 0, 0 }, /* connected */
+ { 0, 0 }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_CONFIGURE,
+ {
+ { -1, ESBD_STATE }, /* empty */
+ { -1, ESBD_STATE }, /* occupied */
+ { 0, 0 }, /* connected */
+ { 0, 0 }, /* unconfigured */
+ { 0, 0 }, /* partial */
+ { 0, 0 }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_UNCONFIGURE,
+ {
+ { -1, ESBD_STATE }, /* empty */
+ { -1, ESBD_STATE }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { 0, 0 }, /* partial */
+ { 0, 0 }, /* configured */
+ { 0, 0 }, /* release */
+ { 0, 0 }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_ASSIGN,
+ {
+ { 0, 0 }, /* empty */
+ { 0, 0 }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_UNASSIGN,
+ {
+ { 0, 0 }, /* empty */
+ { 0, 0 }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_POWERON,
+ {
+ { 0, 0 }, /* empty */
+ { 0, 0 }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_POWEROFF,
+ {
+ { 0, 0 }, /* empty */
+ { 0, 0 }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+ { SBD_CMD_TEST,
+ {
+ { 0, 0 }, /* empty */
+ { 0, 0 }, /* occupied */
+ { -1, ESBD_STATE }, /* connected */
+ { -1, ESBD_STATE }, /* unconfigured */
+ { -1, ESBD_STATE }, /* partial */
+ { -1, ESBD_STATE }, /* configured */
+ { -1, ESBD_STATE }, /* release */
+ { -1, ESBD_STATE }, /* unreferenced */
+ { -1, ESBD_FATAL_STATE }, /* fatal */
+ }
+ },
+};
+
+/*
+ * Global R/W lock to synchronize access across
+ * multiple boards. Users wanting multi-board access
+ * must grab WRITE lock, others must grab READ lock.
+ */
+krwlock_t dr_grwlock;
+
+/*
+ * Head of the boardlist used as a reference point for
+ * locating board structs.
+ * TODO: eliminate dr_boardlist
+ */
+dr_board_t *dr_boardlist;
+
+/*
+ * DR support functions.
+ */
+static dr_devset_t dr_dev2devset(sbd_comp_id_t *cid);
+static int dr_check_transition(dr_board_t *bp,
+ dr_devset_t *devsetp,
+ struct dr_state_trans *transp,
+ int cmd);
+static int dr_check_unit_attached(dr_common_unit_t *dp);
+static sbd_error_t *dr_init_devlists(dr_board_t *bp);
+static void dr_board_discovery(dr_board_t *bp);
+static int dr_board_init(dr_board_t *bp, dev_info_t *dip, int bd);
+static void dr_board_destroy(dr_board_t *bp);
+static void dr_board_transition(dr_board_t *bp, dr_state_t st);
+
+/*
+ * DR driver (DDI) entry points.
+ */
+static int dr_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd,
+ void *arg, void **result);
+static int dr_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
+static int dr_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
+static int dr_probe(dev_info_t *dip);
+static int dr_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
+ cred_t *cred_p, int *rval_p);
+static int dr_close(dev_t dev, int flag, int otyp, cred_t *cred_p);
+static int dr_open(dev_t *dev, int flag, int otyp, cred_t *cred_p);
+
+/*
+ * DR command processing operations.
+ */
+static int dr_copyin_iocmd(dr_handle_t *hp);
+static int dr_copyout_iocmd(dr_handle_t *hp);
+static int dr_copyout_errs(dr_handle_t *hp);
+static int dr_pre_op(dr_handle_t *hp);
+static int dr_post_op(dr_handle_t *hp, int rv);
+static int dr_exec_op(dr_handle_t *hp);
+static void dr_assign_board(dr_handle_t *hp);
+static void dr_unassign_board(dr_handle_t *hp);
+static void dr_connect(dr_handle_t *hp);
+static int dr_disconnect(dr_handle_t *hp);
+static void dr_dev_configure(dr_handle_t *hp);
+static void dr_dev_release(dr_handle_t *hp);
+static int dr_dev_unconfigure(dr_handle_t *hp);
+static void dr_dev_cancel(dr_handle_t *hp);
+static int dr_dev_status(dr_handle_t *hp);
+static int dr_get_ncm(dr_handle_t *hp);
+static int dr_pt_ioctl(dr_handle_t *hp);
+static void dr_poweron_board(dr_handle_t *hp);
+static void dr_poweroff_board(dr_handle_t *hp);
+static void dr_test_board(dr_handle_t *hp);
+
+/*
+ * Autoconfiguration data structures
+ */
+struct cb_ops dr_cb_ops = {
+ dr_open, /* open */
+ dr_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ dr_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ NULL, /* struct streamtab */
+ D_NEW | D_MP | D_MTSAFE, /* compatibility flags */
+ CB_REV, /* Rev */
+ nodev, /* cb_aread */
+ nodev /* cb_awrite */
+};
+
+struct dev_ops dr_dev_ops = {
+ DEVO_REV, /* build version */
+ 0, /* dev ref count */
+ dr_getinfo, /* getinfo */
+ nulldev, /* identify */
+ dr_probe, /* probe */
+ dr_attach, /* attach */
+ dr_detach, /* detach */
+ nodev, /* reset */
+ &dr_cb_ops, /* cb_ops */
+ (struct bus_ops *)NULL, /* bus ops */
+ NULL, /* power */
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+extern struct mod_ops mod_driverops;
+
+static struct modldrv modldrv = {
+ &mod_driverops,
+ "Dynamic Reconfiguration",
+ &dr_dev_ops
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modldrv,
+ NULL
+};
+
+/*
+ * Driver entry points.
+ */
+int
+_init(void)
+{
+ int err;
+
+ /*
+ * If you need to support multiple nodes (instances), then
+ * whatever the maximum number of supported nodes is would
+ * need to passed as the third parameter to ddi_soft_state_init().
+ * Alternative would be to dynamically fini and re-init the
+ * soft state structure each time a node is attached.
+ */
+ err = ddi_soft_state_init((void **)&dr_g.softsp,
+ sizeof (dr_softstate_t), 1);
+ if (err)
+ return (err);
+
+ mutex_init(&dr_g.lock, NULL, MUTEX_DRIVER, NULL);
+ rw_init(&dr_grwlock, NULL, RW_DEFAULT, NULL);
+
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = mod_remove(&modlinkage)) != 0)
+ return (err);
+
+ mutex_destroy(&dr_g.lock);
+ rw_destroy(&dr_grwlock);
+
+ ddi_soft_state_fini((void **)&dr_g.softsp);
+
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*ARGSUSED1*/
+static int
+dr_open(dev_t *dev, int flag, int otyp, cred_t *cred_p)
+{
+ int instance;
+ dr_softstate_t *softsp;
+ dr_board_t *bp;
+
+ /*
+ * Don't open unless we've attached.
+ */
+ instance = DR_MINOR2INST(getminor(*dev));
+ softsp = ddi_get_soft_state(dr_g.softsp, instance);
+ if (softsp == NULL)
+ return (ENXIO);
+
+ mutex_enter(&softsp->i_lock);
+ if (!softsp->dr_initialized) {
+ int bd;
+ int rv = 0;
+
+ bp = softsp->boards;
+
+ /* initialize each array element */
+ for (bd = 0; bd < MAX_BOARDS; bd++, bp++) {
+ rv = dr_board_init(bp, softsp->dip, bd);
+ if (rv)
+ break;
+ }
+
+ if (rv == 0) {
+ softsp->dr_initialized = 1;
+ } else {
+ /* destroy elements initialized thus far */
+ while (--bp >= softsp->boards)
+ dr_board_destroy(bp);
+
+ /* TODO: should this be another errno val ? */
+ mutex_exit(&softsp->i_lock);
+ return (ENXIO);
+ }
+ }
+ mutex_exit(&softsp->i_lock);
+
+ bp = &softsp->boards[DR_MINOR2BNUM(getminor(*dev))];
+
+ /*
+ * prevent opening of a dyn-ap for a board
+ * that does not exist
+ */
+ if (!bp->b_assigned) {
+ if (drmach_board_lookup(bp->b_num, &bp->b_id) != 0)
+ return (ENODEV);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+dr_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
+{
+ return (0);
+}
+
+/*
+ * Enable/disable DR features.
+ */
+int dr_enable = 1;
+
+/*ARGSUSED3*/
+static int
+dr_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
+ cred_t *cred_p, int *rval_p)
+{
+ static int dr_dev_type_to_nt(char *);
+
+ int rv = 0;
+ int instance;
+ int bd;
+ dr_handle_t *hp;
+ dr_softstate_t *softsp;
+ static fn_t f = "dr_ioctl";
+
+ PR_ALL("%s...\n", f);
+
+ instance = DR_MINOR2INST(getminor(dev));
+ softsp = ddi_get_soft_state(dr_g.softsp, instance);
+ if (softsp == NULL) {
+ cmn_err(CE_WARN, "dr%d: module not yet attached", instance);
+ return (ENXIO);
+ }
+
+ if (!dr_enable) {
+ switch (cmd) {
+ case SBD_CMD_STATUS:
+ case SBD_CMD_GETNCM:
+ case SBD_CMD_PASSTHRU:
+ break;
+ default:
+ return (ENOTSUP);
+ }
+ }
+
+ bd = DR_MINOR2BNUM(getminor(dev));
+ if (bd >= MAX_BOARDS)
+ return (ENXIO);
+
+ /* get and initialize storage for new handle */
+ hp = GETSTRUCT(dr_handle_t, 1);
+ hp->h_bd = &softsp->boards[bd];
+ hp->h_err = NULL;
+ hp->h_dev = getminor(dev);
+ hp->h_cmd = cmd;
+ hp->h_mode = mode;
+ hp->h_iap = (sbd_ioctl_arg_t *)arg;
+
+ /* copy sbd command into handle */
+ rv = dr_copyin_iocmd(hp);
+ if (rv) {
+ FREESTRUCT(hp, dr_handle_t, 1);
+ return (EINVAL);
+ }
+
+ /* translate canonical name to component type */
+ if (hp->h_sbdcmd.cmd_cm.c_id.c_name[0] != '\0') {
+ hp->h_sbdcmd.cmd_cm.c_id.c_type =
+ dr_dev_type_to_nt(hp->h_sbdcmd.cmd_cm.c_id.c_name);
+
+ PR_ALL("%s: c_name = %s, c_type = %d\n",
+ f,
+ hp->h_sbdcmd.cmd_cm.c_id.c_name,
+ hp->h_sbdcmd.cmd_cm.c_id.c_type);
+ } else {
+ /*EMPTY*/
+ PR_ALL("%s: c_name is NULL\n", f);
+ }
+
+ /* determine scope of operation */
+ hp->h_devset = dr_dev2devset(&hp->h_sbdcmd.cmd_cm.c_id);
+
+ switch (hp->h_cmd) {
+ case SBD_CMD_STATUS:
+ case SBD_CMD_GETNCM:
+ /* no locks needed for these commands */
+ break;
+
+ default:
+ rw_enter(&dr_grwlock, RW_WRITER);
+ mutex_enter(&hp->h_bd->b_lock);
+
+ /*
+ * If we're dealing with memory at all, then we have
+ * to keep the "exclusive" global lock held. This is
+ * necessary since we will probably need to look at
+ * multiple board structs. Otherwise, we only have
+ * to deal with the board in question and so can drop
+ * the global lock to "shared".
+ */
+ rv = DEVSET_IN_SET(hp->h_devset, SBD_COMP_MEM, DEVSET_ANYUNIT);
+ if (rv == 0)
+ rw_downgrade(&dr_grwlock);
+ break;
+ }
+ rv = 0;
+
+ if (rv == 0)
+ rv = dr_pre_op(hp);
+ if (rv == 0) {
+ rv = dr_exec_op(hp);
+ rv = dr_post_op(hp, rv);
+ }
+
+ if (rv == -1)
+ rv = EIO;
+
+ if (hp->h_err != NULL)
+ if (!(rv = dr_copyout_errs(hp)))
+ rv = EIO;
+
+ /* undo locking, if any, done before dr_pre_op */
+ switch (hp->h_cmd) {
+ case SBD_CMD_STATUS:
+ case SBD_CMD_GETNCM:
+ break;
+
+ case SBD_CMD_ASSIGN:
+ case SBD_CMD_UNASSIGN:
+ case SBD_CMD_POWERON:
+ case SBD_CMD_POWEROFF:
+ case SBD_CMD_CONNECT:
+ case SBD_CMD_CONFIGURE:
+ case SBD_CMD_UNCONFIGURE:
+ case SBD_CMD_DISCONNECT:
+ /* Board changed state. Log a sysevent. */
+ if (rv == 0)
+ (void) drmach_log_sysevent(hp->h_bd->b_num, "",
+ SE_SLEEP, 0);
+ /* Fall through */
+
+ default:
+ mutex_exit(&hp->h_bd->b_lock);
+ rw_exit(&dr_grwlock);
+ }
+
+ if (hp->h_opts.size != 0)
+ FREESTRUCT(hp->h_opts.copts, char, hp->h_opts.size);
+
+ FREESTRUCT(hp, dr_handle_t, 1);
+
+ return (rv);
+}
+
+/*ARGSUSED*/
+static int
+dr_probe(dev_info_t *dip)
+{
+ return (DDI_PROBE_SUCCESS);
+}
+
+static int
+dr_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int rv, rv2;
+ int bd;
+ int instance;
+ sbd_error_t *err;
+ dr_softstate_t *softsp;
+
+ instance = ddi_get_instance(dip);
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ rw_enter(&dr_grwlock, RW_WRITER);
+
+ rv = ddi_soft_state_zalloc(dr_g.softsp, instance);
+ if (rv != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "dr%d: failed to alloc soft-state",
+ instance);
+ return (DDI_FAILURE);
+ }
+
+ /* initialize softstate structure */
+ softsp = ddi_get_soft_state(dr_g.softsp, instance);
+ softsp->dip = dip;
+
+ mutex_init(&softsp->i_lock, NULL, MUTEX_DRIVER, NULL);
+
+ /* allocate board array (aka boardlist) */
+ softsp->boards = GETSTRUCT(dr_board_t, MAX_BOARDS);
+
+ /* TODO: eliminate dr_boardlist */
+ dr_boardlist = softsp->boards;
+
+ /* initialize each array element */
+ rv = DDI_SUCCESS;
+ for (bd = 0; bd < MAX_BOARDS; bd++) {
+ dr_board_t *bp = &softsp->boards[bd];
+ char *p, *name;
+ int l, minor_num;
+
+ /*
+ * initialized board attachment point path
+ * (relative to pseudo) in a form immediately
+ * reusable as an cfgadm command argument.
+ * TODO: clean this up
+ */
+ p = bp->b_path;
+ l = sizeof (bp->b_path);
+ (void) snprintf(p, l, "dr@%d:", instance);
+ while (*p != '\0') {
+ l--;
+ p++;
+ }
+
+ name = p;
+ err = drmach_board_name(bd, p, l);
+ if (err) {
+ sbd_err_clear(&err);
+ rv = DDI_FAILURE;
+ break;
+ }
+
+ minor_num = DR_MAKE_MINOR(instance, bd);
+ rv = ddi_create_minor_node(dip, name, S_IFCHR,
+ minor_num, DDI_NT_SBD_ATTACHMENT_POINT, NULL);
+ if (rv != DDI_SUCCESS)
+ rv = DDI_FAILURE;
+ }
+
+ if (rv == DDI_SUCCESS) {
+ /*
+ * Announce the node's presence.
+ */
+ ddi_report_dev(dip);
+ } else {
+ ddi_remove_minor_node(dip, NULL);
+ }
+ /*
+ * Init registered unsafe devs.
+ */
+ dr_unsafe_devs.devnames = NULL;
+ rv2 = ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ "unsupported-io-drivers", &dr_unsafe_devs.devnames,
+ &dr_unsafe_devs.ndevs);
+
+ if (rv2 != DDI_PROP_SUCCESS)
+ dr_unsafe_devs.ndevs = 0;
+
+ rw_exit(&dr_grwlock);
+ return (rv);
+
+ default:
+ return (DDI_FAILURE);
+ }
+
+ /*NOTREACHED*/
+}
+
+static int
+dr_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int instance;
+ dr_softstate_t *softsp;
+
+ switch (cmd) {
+ case DDI_DETACH:
+ if (!dr_modunload_okay)
+ return (DDI_FAILURE);
+
+ rw_enter(&dr_grwlock, RW_WRITER);
+
+ instance = ddi_get_instance(dip);
+ softsp = ddi_get_soft_state(dr_g.softsp, instance);
+
+ /* TODO: eliminate dr_boardlist */
+ ASSERT(softsp->boards == dr_boardlist);
+
+ /* remove all minor nodes */
+ ddi_remove_minor_node(dip, NULL);
+
+ if (softsp->dr_initialized) {
+ int bd;
+
+ for (bd = 0; bd < MAX_BOARDS; bd++)
+ dr_board_destroy(&softsp->boards[bd]);
+ }
+
+ FREESTRUCT(softsp->boards, dr_board_t, MAX_BOARDS);
+ mutex_destroy(&softsp->i_lock);
+ ddi_soft_state_free(dr_g.softsp, instance);
+
+ rw_exit(&dr_grwlock);
+ return (DDI_SUCCESS);
+
+ default:
+ return (DDI_FAILURE);
+ }
+ /*NOTREACHED*/
+}
+
+static int
+dr_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
+{
+ _NOTE(ARGUNUSED(dip))
+
+ dev_t dev = (dev_t)arg;
+ int instance, error;
+ dr_softstate_t *softsp;
+
+ *result = NULL;
+ error = DDI_SUCCESS;
+ instance = DR_MINOR2INST(getminor(dev));
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ softsp = ddi_get_soft_state(dr_g.softsp, instance);
+ if (softsp == NULL)
+ return (DDI_FAILURE);
+ *result = (void *)softsp->dip;
+ break;
+
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)(uintptr_t)instance;
+ break;
+
+ default:
+ error = DDI_FAILURE;
+ break;
+ }
+
+ return (error);
+}
+
+/*
+ * DR operations.
+ */
+
+static int
+dr_copyin_iocmd(dr_handle_t *hp)
+{
+ static fn_t f = "dr_copyin_iocmd";
+ sbd_cmd_t *scp = &hp->h_sbdcmd;
+
+ if (hp->h_iap == NULL)
+ return (EINVAL);
+
+ bzero((caddr_t)scp, sizeof (sbd_cmd_t));
+
+#ifdef _MULTI_DATAMODEL
+ if (ddi_model_convert_from(hp->h_mode & FMODELS) == DDI_MODEL_ILP32) {
+ sbd_cmd32_t scmd32;
+
+ bzero((caddr_t)&scmd32, sizeof (sbd_cmd32_t));
+
+ if (ddi_copyin((void *)hp->h_iap, (void *)&scmd32,
+ sizeof (sbd_cmd32_t), hp->h_mode)) {
+ cmn_err(CE_WARN,
+ "%s: (32bit) failed to copyin "
+ "sbdcmd-struct", f);
+ return (EFAULT);
+ }
+ scp->cmd_cm.c_id.c_type = scmd32.cmd_cm.c_id.c_type;
+ scp->cmd_cm.c_id.c_unit = scmd32.cmd_cm.c_id.c_unit;
+ bcopy(&scmd32.cmd_cm.c_id.c_name[0],
+ &scp->cmd_cm.c_id.c_name[0], OBP_MAXPROPNAME);
+ scp->cmd_cm.c_flags = scmd32.cmd_cm.c_flags;
+ scp->cmd_cm.c_len = scmd32.cmd_cm.c_len;
+ scp->cmd_cm.c_opts = (caddr_t)(uintptr_t)scmd32.cmd_cm.c_opts;
+
+ switch (hp->h_cmd) {
+ case SBD_CMD_STATUS:
+ scp->cmd_stat.s_nbytes = scmd32.cmd_stat.s_nbytes;
+ scp->cmd_stat.s_statp =
+ (caddr_t)(uintptr_t)scmd32.cmd_stat.s_statp;
+ break;
+ default:
+ break;
+
+ }
+ } else
+#endif /* _MULTI_DATAMODEL */
+ if (ddi_copyin((void *)hp->h_iap, (void *)scp,
+ sizeof (sbd_cmd_t), hp->h_mode) != 0) {
+ cmn_err(CE_WARN,
+ "%s: failed to copyin sbdcmd-struct", f);
+ return (EFAULT);
+ }
+
+ if ((hp->h_opts.size = scp->cmd_cm.c_len) != 0) {
+ hp->h_opts.copts = GETSTRUCT(char, scp->cmd_cm.c_len + 1);
+ ++hp->h_opts.size;
+ if (ddi_copyin((void *)scp->cmd_cm.c_opts,
+ (void *)hp->h_opts.copts,
+ scp->cmd_cm.c_len, hp->h_mode) != 0) {
+ cmn_err(CE_WARN, "%s: failed to copyin options", f);
+ return (EFAULT);
+ }
+ }
+
+ return (0);
+}
+
+static int
+dr_copyout_iocmd(dr_handle_t *hp)
+{
+ static fn_t f = "dr_copyout_iocmd";
+ sbd_cmd_t *scp = &hp->h_sbdcmd;
+
+ if (hp->h_iap == NULL)
+ return (EINVAL);
+
+#ifdef _MULTI_DATAMODEL
+ if (ddi_model_convert_from(hp->h_mode & FMODELS) == DDI_MODEL_ILP32) {
+ sbd_cmd32_t scmd32;
+
+ scmd32.cmd_cm.c_id.c_type = scp->cmd_cm.c_id.c_type;
+ scmd32.cmd_cm.c_id.c_unit = scp->cmd_cm.c_id.c_unit;
+ bcopy(&scp->cmd_cm.c_id.c_name[0],
+ &scmd32.cmd_cm.c_id.c_name[0], OBP_MAXPROPNAME);
+
+ scmd32.cmd_cm.c_flags = scp->cmd_cm.c_flags;
+ scmd32.cmd_cm.c_len = scp->cmd_cm.c_len;
+ scmd32.cmd_cm.c_opts = (caddr32_t)(uintptr_t)scp->cmd_cm.c_opts;
+
+ switch (hp->h_cmd) {
+ case SBD_CMD_GETNCM:
+ scmd32.cmd_getncm.g_ncm = scp->cmd_getncm.g_ncm;
+ break;
+ default:
+ break;
+ }
+
+ if (ddi_copyout((void *)&scmd32, (void *)hp->h_iap,
+ sizeof (sbd_cmd32_t), hp->h_mode)) {
+ cmn_err(CE_WARN,
+ "%s: (32bit) failed to copyout "
+ "sbdcmd-struct", f);
+ return (EFAULT);
+ }
+ } else
+#endif /* _MULTI_DATAMODEL */
+ if (ddi_copyout((void *)scp, (void *)hp->h_iap,
+ sizeof (sbd_cmd_t), hp->h_mode) != 0) {
+ cmn_err(CE_WARN,
+ "%s: failed to copyout sbdcmd-struct", f);
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+dr_copyout_errs(dr_handle_t *hp)
+{
+ static fn_t f = "dr_copyout_errs";
+
+ if (hp->h_err == NULL)
+ return (0);
+
+ if (hp->h_err->e_code) {
+ PR_ALL("%s: error %d %s",
+ f, hp->h_err->e_code, hp->h_err->e_rsc);
+ }
+
+#ifdef _MULTI_DATAMODEL
+ if (ddi_model_convert_from(hp->h_mode & FMODELS) == DDI_MODEL_ILP32) {
+ sbd_error32_t *serr32p;
+
+ serr32p = GETSTRUCT(sbd_error32_t, 1);
+
+ serr32p->e_code = hp->h_err->e_code;
+ bcopy(&hp->h_err->e_rsc[0], &serr32p->e_rsc[0],
+ MAXPATHLEN);
+ if (ddi_copyout((void *)serr32p,
+ (void *)&((sbd_ioctl_arg32_t *)hp->h_iap)->i_err,
+ sizeof (sbd_error32_t), hp->h_mode)) {
+ cmn_err(CE_WARN,
+ "%s: (32bit) failed to copyout", f);
+ return (EFAULT);
+ }
+ FREESTRUCT(serr32p, sbd_error32_t, 1);
+ } else
+#endif /* _MULTI_DATAMODEL */
+ if (ddi_copyout((void *)hp->h_err,
+ (void *)&hp->h_iap->i_err,
+ sizeof (sbd_error_t), hp->h_mode)) {
+ cmn_err(CE_WARN,
+ "%s: failed to copyout", f);
+ return (EFAULT);
+ }
+
+ sbd_err_clear(&hp->h_err);
+
+ return (0);
+
+}
+
+/*
+ * pre-op entry point must sbd_err_set_c(), if needed.
+ * Return value of non-zero indicates failure.
+ */
+static int
+dr_pre_op(dr_handle_t *hp)
+{
+ int rv = 0, t;
+ int cmd, serr = 0;
+ dr_devset_t devset;
+ dr_board_t *bp = hp->h_bd;
+ dr_handle_t *shp = hp;
+ static fn_t f = "dr_pre_op";
+
+ cmd = hp->h_cmd;
+ devset = shp->h_devset;
+
+ PR_ALL("%s (cmd = %s)...\n", f, SBD_CMD_STR(cmd));
+
+ devset = DEVSET_AND(devset, DR_DEVS_PRESENT(bp));
+ hp->h_err = drmach_pre_op(cmd, bp->b_id, &hp->h_opts, &devset);
+ if (hp->h_err != NULL) {
+ PR_ALL("drmach_pre_op failed for cmd %s(%d)\n",
+ SBD_CMD_STR(cmd), cmd);
+ return (-1);
+ }
+
+ /*
+ * Check for valid state transitions.
+ */
+ if ((t = CMD2INDEX(cmd)) != -1) {
+ struct dr_state_trans *transp;
+ int state_err;
+
+ transp = &dr_state_transition[t];
+ ASSERT(transp->x_cmd == cmd);
+
+ state_err = dr_check_transition(bp, &devset, transp, cmd);
+
+ if (state_err < 0) {
+ /*
+ * Invalidate device.
+ */
+ dr_op_err(CE_IGNORE, hp, ESBD_INVAL, NULL);
+ serr = -1;
+ PR_ALL("%s: invalid devset (0x%x)\n",
+ f, (uint_t)devset);
+ } else if (state_err != 0) {
+ /*
+ * State transition is not a valid one.
+ */
+ dr_op_err(CE_IGNORE, hp,
+ transp->x_op[state_err].x_err, NULL);
+
+ serr = transp->x_op[state_err].x_rv;
+
+ PR_ALL("%s: invalid state %s(%d) for cmd %s(%d)\n",
+ f, state_str[state_err], state_err,
+ SBD_CMD_STR(cmd), cmd);
+ } else {
+ shp->h_devset = devset;
+ }
+ }
+
+ if (serr) {
+ rv = -1;
+ }
+
+ return (rv);
+}
+
+static int
+dr_post_op(dr_handle_t *hp, int rv)
+{
+ int cmd;
+ sbd_error_t *err;
+ dr_board_t *bp = hp->h_bd;
+ static fn_t f = "dr_post_op";
+
+ cmd = hp->h_cmd;
+
+ PR_ALL("%s (cmd = %s)...\n", f, SBD_CMD_STR(cmd));
+
+ err = drmach_post_op(cmd, bp->b_id, &hp->h_opts, rv);
+ if (err != NULL) {
+ PR_ALL("drmach_post_op failed for cmd %s(%d)\n",
+ SBD_CMD_STR(cmd), cmd);
+ if (rv == 0) {
+ ASSERT(hp->h_err == NULL);
+ hp->h_err = err;
+ rv = -1;
+ } else if (hp->h_err == NULL) {
+ hp->h_err = err;
+ } else {
+ sbd_err_clear(&err);
+ }
+ }
+
+ return (rv);
+}
+
+static int
+dr_exec_op(dr_handle_t *hp)
+{
+ int rv = 0;
+ static fn_t f = "dr_exec_op";
+
+ /* errors should have been caught by now */
+ ASSERT(hp->h_err == NULL);
+
+ switch (hp->h_cmd) {
+ case SBD_CMD_ASSIGN:
+ dr_assign_board(hp);
+ break;
+
+ case SBD_CMD_UNASSIGN:
+ dr_unassign_board(hp);
+ break;
+
+ case SBD_CMD_POWEROFF:
+ dr_poweroff_board(hp);
+ break;
+
+ case SBD_CMD_POWERON:
+ dr_poweron_board(hp);
+ break;
+
+ case SBD_CMD_TEST:
+ dr_test_board(hp);
+ break;
+
+ case SBD_CMD_CONNECT:
+ dr_connect(hp);
+ break;
+
+ case SBD_CMD_CONFIGURE:
+ dr_dev_configure(hp);
+ break;
+
+ case SBD_CMD_UNCONFIGURE:
+ dr_dev_release(hp);
+ if (hp->h_err == NULL)
+ rv = dr_dev_unconfigure(hp);
+ else
+ dr_dev_cancel(hp);
+ break;
+
+ case SBD_CMD_DISCONNECT:
+ rv = dr_disconnect(hp);
+ break;
+
+ case SBD_CMD_STATUS:
+ rv = dr_dev_status(hp);
+ break;
+
+ case SBD_CMD_GETNCM:
+ hp->h_sbdcmd.cmd_getncm.g_ncm = dr_get_ncm(hp);
+ rv = dr_copyout_iocmd(hp);
+ break;
+
+ case SBD_CMD_PASSTHRU:
+ rv = dr_pt_ioctl(hp);
+ break;
+
+ default:
+ cmn_err(CE_WARN,
+ "%s: unknown command (%d)",
+ f, hp->h_cmd);
+ break;
+ }
+
+ if (hp->h_err != NULL) {
+ rv = -1;
+ }
+
+ return (rv);
+}
+
+static void
+dr_assign_board(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+
+ hp->h_err = drmach_board_assign(bp->b_num, &bp->b_id);
+ if (hp->h_err == NULL) {
+ bp->b_assigned = 1;
+ }
+}
+
+static void
+dr_unassign_board(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+
+ /*
+ * Block out status during unassign.
+ * Not doing cv_wait_sig here as starfire SSP software
+ * ignores unassign failure and removes board from
+ * domain mask causing system panic.
+ * TODO: Change cv_wait to cv_wait_sig when SSP software
+ * handles unassign failure.
+ */
+ dr_lock_status(bp);
+
+ hp->h_err = drmach_board_unassign(bp->b_id);
+ if (hp->h_err == NULL) {
+ /*
+ * clear drmachid_t handle; not valid after board unassign
+ */
+ bp->b_id = 0;
+ bp->b_assigned = 0;
+ }
+
+ dr_unlock_status(bp);
+}
+
+static void
+dr_poweron_board(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+
+ hp->h_err = drmach_board_poweron(bp->b_id);
+}
+
+static void
+dr_poweroff_board(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+
+ hp->h_err = drmach_board_poweroff(bp->b_id);
+}
+
+static void
+dr_test_board(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+ hp->h_err = drmach_board_test(bp->b_id, &hp->h_opts,
+ dr_cmd_flags(hp) & SBD_FLAG_FORCE);
+}
+
+/*
+ * Create and populate the component nodes for a board. Assumes that the
+ * devlists for the board have been initialized.
+ */
+static void
+dr_make_comp_nodes(dr_board_t *bp)
+{
+ int i;
+
+ /*
+ * Make nodes for the individual components on the board.
+ * First we need to initialize memory unit data structures of board
+ * structure.
+ */
+ for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+ dr_mem_unit_t *mp;
+
+ mp = dr_get_mem_unit(bp, i);
+ dr_init_mem_unit(mp);
+ }
+
+ /*
+ * Initialize cpu unit data structures.
+ */
+ for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+ dr_cpu_unit_t *cp;
+
+ cp = dr_get_cpu_unit(bp, i);
+ dr_init_cpu_unit(cp);
+ }
+
+ /*
+ * Initialize io unit data structures.
+ */
+ for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+ dr_io_unit_t *ip;
+
+ ip = dr_get_io_unit(bp, i);
+ dr_init_io_unit(ip);
+ }
+
+ dr_board_transition(bp, DR_STATE_CONNECTED);
+
+ bp->b_rstate = SBD_STAT_CONNECTED;
+ bp->b_ostate = SBD_STAT_UNCONFIGURED;
+ bp->b_cond = SBD_COND_OK;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+
+}
+
+/*
+ * Only do work if called to operate on an entire board
+ * which doesn't already have components present.
+ */
+static void
+dr_connect(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+ static fn_t f = "dr_connect";
+
+ PR_ALL("%s...\n", f);
+
+ if (DR_DEVS_PRESENT(bp)) {
+ /*
+ * Board already has devices present.
+ */
+ PR_ALL("%s: devices already present (" DEVSET_FMT_STR ")\n",
+ f, DEVSET_FMT_ARG(DR_DEVS_PRESENT(bp)));
+ return;
+ }
+
+ hp->h_err = drmach_board_connect(bp->b_id, &hp->h_opts);
+ if (hp->h_err)
+ return;
+
+ hp->h_err = dr_init_devlists(bp);
+ if (hp->h_err)
+ return;
+ else if (bp->b_ndev == 0) {
+ dr_op_err(CE_WARN, hp, ESBD_EMPTY_BD, bp->b_path);
+ return;
+ } else {
+ dr_make_comp_nodes(bp);
+ return;
+ }
+ /*NOTREACHED*/
+}
+
+static int
+dr_disconnect(dr_handle_t *hp)
+{
+ int i;
+ dr_devset_t devset;
+ dr_board_t *bp = hp->h_bd;
+ static fn_t f = "dr_disconnect";
+
+ PR_ALL("%s...\n", f);
+
+ /*
+ * Only devices which are present, but
+ * unattached can be disconnected.
+ */
+ devset = hp->h_devset & DR_DEVS_PRESENT(bp) &
+ DR_DEVS_UNATTACHED(bp);
+
+ if ((devset == 0) && DR_DEVS_PRESENT(bp)) {
+ dr_op_err(CE_IGNORE, hp, ESBD_EMPTY_BD, bp->b_path);
+ return (0);
+ }
+
+ /*
+ * Block out status during disconnect.
+ */
+ mutex_enter(&bp->b_slock);
+ while (bp->b_sflags & DR_BSLOCK) {
+ if (cv_wait_sig(&bp->b_scv, &bp->b_slock) == 0) {
+ mutex_exit(&bp->b_slock);
+ return (EINTR);
+ }
+ }
+ bp->b_sflags |= DR_BSLOCK;
+ mutex_exit(&bp->b_slock);
+
+ hp->h_err = drmach_board_disconnect(bp->b_id, &hp->h_opts);
+ if (hp->h_err && hp->h_err->e_code == EX86_WALK_DEPENDENCY) {
+ /*
+ * Other boards have dependency on this board. No device nodes
+ * have been destroyed so keep current board status.
+ */
+ goto disconnect_done;
+ }
+
+ DR_DEVS_DISCONNECT(bp, devset);
+
+ ASSERT((DR_DEVS_ATTACHED(bp) & devset) == 0);
+
+ /*
+ * Update per-device state transitions.
+ */
+ for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+ dr_cpu_unit_t *cp;
+
+ if (!DEVSET_IN_SET(devset, SBD_COMP_CPU, i))
+ continue;
+
+ cp = dr_get_cpu_unit(bp, i);
+ if (dr_disconnect_cpu(cp) == 0)
+ dr_device_transition(&cp->sbc_cm, DR_STATE_EMPTY);
+ else if (cp->sbc_cm.sbdev_error != NULL)
+ DRERR_SET_C(&hp->h_err, &cp->sbc_cm.sbdev_error);
+
+ ASSERT(cp->sbc_cm.sbdev_error == NULL);
+ }
+
+ for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+ dr_mem_unit_t *mp;
+
+ if (!DEVSET_IN_SET(devset, SBD_COMP_MEM, i))
+ continue;
+
+ mp = dr_get_mem_unit(bp, i);
+ if (dr_disconnect_mem(mp) == 0)
+ dr_device_transition(&mp->sbm_cm, DR_STATE_EMPTY);
+ else if (mp->sbm_cm.sbdev_error != NULL)
+ DRERR_SET_C(&hp->h_err, &mp->sbm_cm.sbdev_error);
+
+ ASSERT(mp->sbm_cm.sbdev_error == NULL);
+ }
+
+ for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+ dr_io_unit_t *ip;
+
+ if (!DEVSET_IN_SET(devset, SBD_COMP_IO, i))
+ continue;
+
+ ip = dr_get_io_unit(bp, i);
+ if (dr_disconnect_io(ip) == 0)
+ dr_device_transition(&ip->sbi_cm, DR_STATE_EMPTY);
+ else if (ip->sbi_cm.sbdev_error != NULL)
+ DRERR_SET_C(&hp->h_err, &ip->sbi_cm.sbdev_error);
+
+ ASSERT(ip->sbi_cm.sbdev_error == NULL);
+ }
+
+ if (hp->h_err) {
+ /*
+ * For certain errors, drmach_board_disconnect will mark
+ * the board as unusable; in these cases the devtree must
+ * be purged so that status calls will succeed.
+ * XXX
+ * This implementation checks for discrete error codes -
+ * someday, the i/f to drmach_board_disconnect should be
+ * changed to avoid the e_code testing.
+ */
+ if (hp->h_err->e_code == EX86_DEPROBE) {
+ bp->b_ostate = SBD_STAT_UNCONFIGURED;
+ bp->b_busy = 0;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+
+ if (drmach_board_deprobe(bp->b_id))
+ goto disconnect_done;
+ else
+ bp->b_ndev = 0;
+ }
+ }
+
+ /*
+ * Once all the components on a board have been disconnect
+ * the board's state can transition to disconnected and
+ * we can allow the deprobe to take place.
+ */
+ if (hp->h_err == NULL && DR_DEVS_PRESENT(bp) == 0) {
+ dr_board_transition(bp, DR_STATE_OCCUPIED);
+ bp->b_rstate = SBD_STAT_DISCONNECTED;
+ bp->b_ostate = SBD_STAT_UNCONFIGURED;
+ bp->b_busy = 0;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+
+ hp->h_err = drmach_board_deprobe(bp->b_id);
+
+ if (hp->h_err == NULL) {
+ bp->b_ndev = 0;
+ dr_board_transition(bp, DR_STATE_EMPTY);
+ bp->b_rstate = SBD_STAT_EMPTY;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+ }
+ }
+
+disconnect_done:
+ dr_unlock_status(bp);
+
+ return (0);
+}
+
+/*
+ * Check if a particular device is a valid target of the current
+ * operation. Return 1 if it is a valid target, and 0 otherwise.
+ */
+static int
+dr_dev_is_target(dr_dev_unit_t *dp, int present_only, uint_t uset)
+{
+ dr_common_unit_t *cp;
+ int is_present;
+ int is_attached;
+
+ cp = &dp->du_common;
+
+ /* check if the user requested this device */
+ if ((uset & (1 << cp->sbdev_unum)) == 0) {
+ return (0);
+ }
+
+ is_present = DR_DEV_IS_PRESENT(cp) ? 1 : 0;
+ is_attached = DR_DEV_IS_ATTACHED(cp) ? 1 : 0;
+
+ /*
+ * If the present_only flag is set, a valid target
+ * must be present but not attached. Otherwise, it
+ * must be both present and attached.
+ */
+ if (is_present && (present_only ^ is_attached)) {
+ /* sanity check */
+ ASSERT(cp->sbdev_id != (drmachid_t)0);
+
+ return (1);
+ }
+
+ return (0);
+}
+
+static void
+dr_dev_make_list(dr_handle_t *hp, sbd_comp_type_t type, int present_only,
+ dr_common_unit_t ***devlist, int *devnum)
+{
+ dr_board_t *bp = hp->h_bd;
+ int unum;
+ int nunits;
+ uint_t uset;
+ int len;
+ dr_common_unit_t **list, **wp;
+
+ switch (type) {
+ case SBD_COMP_CPU:
+ nunits = MAX_CPU_UNITS_PER_BOARD;
+ break;
+ case SBD_COMP_MEM:
+ nunits = MAX_MEM_UNITS_PER_BOARD;
+ break;
+ case SBD_COMP_IO:
+ nunits = MAX_IO_UNITS_PER_BOARD;
+ break;
+ default:
+ /* catch this in debug kernels */
+ ASSERT(0);
+ break;
+ }
+
+ /* allocate list storage. */
+ len = sizeof (dr_common_unit_t *) * (nunits + 1);
+ list = kmem_zalloc(len, KM_SLEEP);
+
+ /* record length of storage in first element */
+ *list++ = (dr_common_unit_t *)(uintptr_t)len;
+
+ /* get bit array signifying which units are to be involved */
+ uset = DEVSET_GET_UNITSET(hp->h_devset, type);
+
+ /*
+ * Adjust the loop count for CPU devices since all cores
+ * in a CMP will be examined in a single iteration.
+ */
+ if (type == SBD_COMP_CPU) {
+ nunits = MAX_CMP_UNITS_PER_BOARD;
+ }
+
+ /* populate list */
+ for (wp = list, unum = 0; unum < nunits; unum++) {
+ dr_dev_unit_t *dp;
+ int core;
+ int cunum;
+
+ dp = DR_GET_BOARD_DEVUNIT(bp, type, unum);
+ if (dr_dev_is_target(dp, present_only, uset)) {
+ *wp++ = &dp->du_common;
+ }
+
+ /* further processing is only required for CPUs */
+ if (type != SBD_COMP_CPU) {
+ continue;
+ }
+
+ /*
+ * Add any additional cores from the current CPU
+ * device. This is to ensure that all the cores
+ * are grouped together in the device list, and
+ * consequently sequenced together during the actual
+ * operation.
+ */
+ for (core = 1; core < MAX_CORES_PER_CMP; core++) {
+ cunum = DR_CMP_CORE_UNUM(unum, core);
+ dp = DR_GET_BOARD_DEVUNIT(bp, type, cunum);
+
+ if (dr_dev_is_target(dp, present_only, uset)) {
+ *wp++ = &dp->du_common;
+ }
+ }
+ }
+
+ /* calculate number of units in list, return result and list pointer */
+ *devnum = wp - list;
+ *devlist = list;
+}
+
+static void
+dr_dev_clean_up(dr_handle_t *hp, dr_common_unit_t **list, int devnum)
+{
+ int len;
+ int n = 0;
+ dr_common_unit_t *cp, **rp = list;
+
+ /*
+ * move first encountered unit error to handle if handle
+ * does not yet have a recorded error.
+ */
+ if (hp->h_err == NULL) {
+ while (n++ < devnum) {
+ cp = *rp++;
+ if (cp->sbdev_error != NULL) {
+ hp->h_err = cp->sbdev_error;
+ cp->sbdev_error = NULL;
+ break;
+ }
+ }
+ }
+
+ /* free remaining unit errors */
+ while (n++ < devnum) {
+ cp = *rp++;
+ if (cp->sbdev_error != NULL) {
+ sbd_err_clear(&cp->sbdev_error);
+ cp->sbdev_error = NULL;
+ }
+ }
+
+ /* free list */
+ list -= 1;
+ len = (int)(uintptr_t)list[0];
+ kmem_free(list, len);
+}
+
+static int
+dr_dev_walk(dr_handle_t *hp, sbd_comp_type_t type, int present_only,
+ int (*pre_op)(dr_handle_t *, dr_common_unit_t **, int),
+ void (*op)(dr_handle_t *, dr_common_unit_t *),
+ int (*post_op)(dr_handle_t *, dr_common_unit_t **, int),
+ void (*board_op)(dr_handle_t *, dr_common_unit_t **, int))
+{
+ int devnum, rv;
+ dr_common_unit_t **devlist;
+
+ dr_dev_make_list(hp, type, present_only, &devlist, &devnum);
+
+ rv = 0;
+ if (devnum > 0) {
+ rv = (*pre_op)(hp, devlist, devnum);
+ if (rv == 0) {
+ int n;
+
+ for (n = 0; n < devnum; n++)
+ (*op)(hp, devlist[n]);
+
+ rv = (*post_op)(hp, devlist, devnum);
+
+ (*board_op)(hp, devlist, devnum);
+ }
+ }
+
+ dr_dev_clean_up(hp, devlist, devnum);
+ return (rv);
+}
+
+/*ARGSUSED*/
+static int
+dr_dev_noop(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ return (0);
+}
+
+static void
+dr_attach_update_state(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ dr_board_t *bp = hp->h_bd;
+ int i;
+ dr_devset_t devs_unattached, devs_present;
+ static fn_t f = "dr_attach_update_state";
+
+ for (i = 0; i < devnum; i++) {
+ dr_common_unit_t *cp = devlist[i];
+
+ if (dr_check_unit_attached(cp) == -1) {
+ PR_ALL("%s: ERROR %s not attached\n",
+ f, cp->sbdev_path);
+ continue;
+ }
+
+ DR_DEV_SET_ATTACHED(cp);
+
+ dr_device_transition(cp, DR_STATE_CONFIGURED);
+ cp->sbdev_cond = SBD_COND_OK;
+ }
+
+ devs_present = DR_DEVS_PRESENT(bp);
+ devs_unattached = DR_DEVS_UNATTACHED(bp);
+
+ switch (bp->b_state) {
+ case DR_STATE_CONNECTED:
+ case DR_STATE_UNCONFIGURED:
+ ASSERT(devs_present);
+
+ if (devs_unattached == 0) {
+ /*
+ * All devices finally attached.
+ */
+ dr_board_transition(bp, DR_STATE_CONFIGURED);
+ hp->h_bd->b_ostate = SBD_STAT_CONFIGURED;
+ hp->h_bd->b_rstate = SBD_STAT_CONNECTED;
+ hp->h_bd->b_cond = SBD_COND_OK;
+ hp->h_bd->b_busy = 0;
+ (void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+ } else if (devs_present != devs_unattached) {
+ /*
+ * Only some devices are fully attached.
+ */
+ dr_board_transition(bp, DR_STATE_PARTIAL);
+ hp->h_bd->b_rstate = SBD_STAT_CONNECTED;
+ hp->h_bd->b_ostate = SBD_STAT_CONFIGURED;
+ (void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+ }
+ break;
+
+ case DR_STATE_PARTIAL:
+ ASSERT(devs_present);
+ /*
+ * All devices finally attached.
+ */
+ if (devs_unattached == 0) {
+ dr_board_transition(bp, DR_STATE_CONFIGURED);
+ hp->h_bd->b_rstate = SBD_STAT_CONNECTED;
+ hp->h_bd->b_ostate = SBD_STAT_CONFIGURED;
+ hp->h_bd->b_cond = SBD_COND_OK;
+ hp->h_bd->b_busy = 0;
+ (void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+dr_dev_configure(dr_handle_t *hp)
+{
+ int rv;
+
+ rv = dr_dev_walk(hp, SBD_COMP_CPU, 1,
+ dr_pre_attach_cpu,
+ dr_attach_cpu,
+ dr_post_attach_cpu,
+ dr_attach_update_state);
+
+ if (rv >= 0) {
+ rv = dr_dev_walk(hp, SBD_COMP_MEM, 1,
+ dr_pre_attach_mem,
+ dr_attach_mem,
+ dr_post_attach_mem,
+ dr_attach_update_state);
+ }
+
+ if (rv >= 0) {
+ (void) dr_dev_walk(hp, SBD_COMP_IO, 1,
+ dr_pre_attach_io,
+ dr_attach_io,
+ dr_post_attach_io,
+ dr_attach_update_state);
+ }
+}
+
+static void
+dr_release_update_state(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ _NOTE(ARGUNUSED(devlist))
+ _NOTE(ARGUNUSED(devnum))
+
+ dr_board_t *bp = hp->h_bd;
+
+ /*
+ * If the entire board was released and all components
+ * unreferenced then transfer it to the UNREFERENCED state.
+ */
+ if ((bp->b_state != DR_STATE_RELEASE) &&
+ (DR_DEVS_RELEASED(bp) == DR_DEVS_ATTACHED(bp))) {
+ dr_board_transition(bp, DR_STATE_RELEASE);
+ hp->h_bd->b_busy = 1;
+ }
+}
+
+/* called by dr_release_done [below] and dr_release_mem_done [dr_mem.c] */
+int
+dr_release_dev_done(dr_common_unit_t *cp)
+{
+ if (cp->sbdev_state == DR_STATE_RELEASE) {
+ ASSERT(DR_DEV_IS_RELEASED(cp));
+
+ DR_DEV_SET_UNREFERENCED(cp);
+
+ dr_device_transition(cp, DR_STATE_UNREFERENCED);
+
+ return (0);
+ } else {
+ return (-1);
+ }
+}
+
+static void
+dr_release_done(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ _NOTE(ARGUNUSED(hp))
+
+ dr_board_t *bp;
+ static fn_t f = "dr_release_done";
+
+ PR_ALL("%s...\n", f);
+
+ /* get board pointer & sanity check */
+ bp = cp->sbdev_bp;
+ ASSERT(bp == hp->h_bd);
+
+ /*
+ * Transfer the device which just completed its release
+ * to the UNREFERENCED state.
+ */
+ switch (cp->sbdev_type) {
+ case SBD_COMP_MEM:
+ dr_release_mem_done(cp);
+ break;
+
+ default:
+ DR_DEV_SET_RELEASED(cp);
+
+ dr_device_transition(cp, DR_STATE_RELEASE);
+
+ (void) dr_release_dev_done(cp);
+ break;
+ }
+
+ /*
+ * If we're not already in the RELEASE state for this
+ * board and we now have released all that were previously
+ * attached, then transfer the board to the RELEASE state.
+ */
+ if ((bp->b_state == DR_STATE_RELEASE) &&
+ (DR_DEVS_RELEASED(bp) == DR_DEVS_UNREFERENCED(bp))) {
+ dr_board_transition(bp, DR_STATE_UNREFERENCED);
+ bp->b_busy = 1;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+ }
+}
+
+static void
+dr_dev_release_mem(dr_handle_t *hp, dr_common_unit_t *dv)
+{
+ dr_release_mem(dv);
+ dr_release_done(hp, dv);
+}
+
+static void
+dr_dev_release(dr_handle_t *hp)
+{
+ int rv;
+
+ hp->h_bd->b_busy = 1;
+
+ rv = dr_dev_walk(hp, SBD_COMP_CPU, 0,
+ dr_pre_release_cpu,
+ dr_release_done,
+ dr_dev_noop,
+ dr_release_update_state);
+
+ if (rv >= 0) {
+ rv = dr_dev_walk(hp, SBD_COMP_MEM, 0,
+ dr_pre_release_mem,
+ dr_dev_release_mem,
+ dr_dev_noop,
+ dr_release_update_state);
+ }
+
+ if (rv >= 0) {
+ rv = dr_dev_walk(hp, SBD_COMP_IO, 0,
+ dr_pre_release_io,
+ dr_release_done,
+ dr_dev_noop,
+ dr_release_update_state);
+
+ }
+
+ if (rv < 0)
+ hp->h_bd->b_busy = 0;
+ /* else, b_busy will be cleared in dr_detach_update_state() */
+}
+
+static void
+dr_detach_update_state(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ dr_board_t *bp = hp->h_bd;
+ int i;
+ dr_state_t bstate;
+ static fn_t f = "dr_detach_update_state";
+
+ for (i = 0; i < devnum; i++) {
+ dr_common_unit_t *cp = devlist[i];
+
+ if (dr_check_unit_attached(cp) >= 0) {
+ /*
+ * Device is still attached probably due
+ * to an error. Need to keep track of it.
+ */
+ PR_ALL("%s: ERROR %s not detached\n",
+ f, cp->sbdev_path);
+
+ continue;
+ }
+
+ DR_DEV_CLR_ATTACHED(cp);
+ DR_DEV_CLR_RELEASED(cp);
+ DR_DEV_CLR_UNREFERENCED(cp);
+ dr_device_transition(cp, DR_STATE_UNCONFIGURED);
+ }
+
+ bstate = bp->b_state;
+ if (bstate != DR_STATE_UNCONFIGURED) {
+ if (DR_DEVS_PRESENT(bp) == DR_DEVS_UNATTACHED(bp)) {
+ /*
+ * All devices are finally detached.
+ */
+ dr_board_transition(bp, DR_STATE_UNCONFIGURED);
+ hp->h_bd->b_ostate = SBD_STAT_UNCONFIGURED;
+ (void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+ } else if ((bp->b_state != DR_STATE_PARTIAL) &&
+ (DR_DEVS_ATTACHED(bp) !=
+ DR_DEVS_PRESENT(bp))) {
+ /*
+ * Some devices remain attached.
+ */
+ dr_board_transition(bp, DR_STATE_PARTIAL);
+ (void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+ }
+
+ if ((hp->h_devset & DR_DEVS_UNATTACHED(bp)) == hp->h_devset)
+ hp->h_bd->b_busy = 0;
+ }
+}
+
+static int
+dr_dev_unconfigure(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+
+ /*
+ * Block out status during IO unconfig.
+ */
+ mutex_enter(&bp->b_slock);
+ while (bp->b_sflags & DR_BSLOCK) {
+ if (cv_wait_sig(&bp->b_scv, &bp->b_slock) == 0) {
+ mutex_exit(&bp->b_slock);
+ return (EINTR);
+ }
+ }
+ bp->b_sflags |= DR_BSLOCK;
+ mutex_exit(&bp->b_slock);
+
+ (void) dr_dev_walk(hp, SBD_COMP_IO, 0,
+ dr_pre_detach_io,
+ dr_detach_io,
+ dr_post_detach_io,
+ dr_detach_update_state);
+
+ dr_unlock_status(bp);
+
+ (void) dr_dev_walk(hp, SBD_COMP_CPU, 0,
+ dr_pre_detach_cpu,
+ dr_detach_cpu,
+ dr_post_detach_cpu,
+ dr_detach_update_state);
+
+ (void) dr_dev_walk(hp, SBD_COMP_MEM, 0,
+ dr_pre_detach_mem,
+ dr_detach_mem,
+ dr_post_detach_mem,
+ dr_detach_update_state);
+
+ return (0);
+}
+
+static void
+dr_dev_cancel(dr_handle_t *hp)
+{
+ int i;
+ dr_devset_t devset;
+ dr_board_t *bp = hp->h_bd;
+ static fn_t f = "dr_dev_cancel";
+
+ PR_ALL("%s...\n", f);
+
+ /*
+ * Only devices which have been "released" are
+ * subject to cancellation.
+ */
+ devset = hp->h_devset & DR_DEVS_RELEASED(bp);
+
+ /*
+ * Nothing to do for CPUs or IO other than change back
+ * their state.
+ */
+ for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+ dr_cpu_unit_t *cp;
+ dr_state_t nstate;
+
+ if (!DEVSET_IN_SET(devset, SBD_COMP_CPU, i))
+ continue;
+
+ cp = dr_get_cpu_unit(bp, i);
+ if (dr_cancel_cpu(cp) == 0)
+ nstate = DR_STATE_CONFIGURED;
+ else
+ nstate = DR_STATE_FATAL;
+
+ dr_device_transition(&cp->sbc_cm, nstate);
+ }
+
+ for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+ dr_io_unit_t *ip;
+
+ if (!DEVSET_IN_SET(devset, SBD_COMP_IO, i))
+ continue;
+ ip = dr_get_io_unit(bp, i);
+ dr_device_transition(&ip->sbi_cm, DR_STATE_CONFIGURED);
+ }
+ for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+ dr_mem_unit_t *mp;
+ dr_state_t nstate;
+
+ if (!DEVSET_IN_SET(devset, SBD_COMP_MEM, i))
+ continue;
+
+ mp = dr_get_mem_unit(bp, i);
+ if (dr_cancel_mem(mp) == 0)
+ nstate = DR_STATE_CONFIGURED;
+ else
+ nstate = DR_STATE_FATAL;
+
+ dr_device_transition(&mp->sbm_cm, nstate);
+ }
+
+ PR_ALL("%s: unreleasing devset (0x%x)\n", f, (uint_t)devset);
+
+ DR_DEVS_CANCEL(bp, devset);
+
+ if (DR_DEVS_RELEASED(bp) == 0) {
+ dr_state_t new_state;
+ /*
+ * If the board no longer has any released devices
+ * than transfer it back to the CONFIG/PARTIAL state.
+ */
+ if (DR_DEVS_ATTACHED(bp) == DR_DEVS_PRESENT(bp))
+ new_state = DR_STATE_CONFIGURED;
+ else
+ new_state = DR_STATE_PARTIAL;
+ if (bp->b_state != new_state) {
+ dr_board_transition(bp, new_state);
+ }
+ hp->h_bd->b_ostate = SBD_STAT_CONFIGURED;
+ hp->h_bd->b_busy = 0;
+ (void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+ }
+}
+
+static int
+dr_dev_status(dr_handle_t *hp)
+{
+ int nstat, mode, ncm, sz, pbsz, pnstat;
+ dr_handle_t *shp;
+ dr_devset_t devset = 0;
+ sbd_stat_t *dstatp = NULL;
+ sbd_dev_stat_t *devstatp;
+ dr_board_t *bp;
+ drmach_status_t pstat;
+ int rv = 0;
+
+#ifdef _MULTI_DATAMODEL
+ int sz32 = 0;
+#endif /* _MULTI_DATAMODEL */
+
+ static fn_t f = "dr_dev_status";
+
+ PR_ALL("%s...\n", f);
+
+ mode = hp->h_mode;
+ shp = hp;
+ devset = shp->h_devset;
+ bp = hp->h_bd;
+
+ /*
+ * Block out disconnect, unassign, IO unconfigure and
+ * devinfo branch creation during status.
+ */
+ mutex_enter(&bp->b_slock);
+ while (bp->b_sflags & DR_BSLOCK) {
+ if (cv_wait_sig(&bp->b_scv, &bp->b_slock) == 0) {
+ mutex_exit(&bp->b_slock);
+ return (EINTR);
+ }
+ }
+ bp->b_sflags |= DR_BSLOCK;
+ mutex_exit(&bp->b_slock);
+
+ ncm = 1;
+ if (hp->h_sbdcmd.cmd_cm.c_id.c_type == SBD_COMP_NONE) {
+ if (dr_cmd_flags(hp) & SBD_FLAG_ALLCMP) {
+ /*
+ * Calculate the maximum number of components possible
+ * for a board. This number will be used to size the
+ * status scratch buffer used by board and component
+ * status functions.
+ * This buffer may differ in size from what is provided
+ * by the plugin, since the known component set on the
+ * board may change between the plugin's GETNCM call, and
+ * the status call. Sizing will be adjusted to the plugin's
+ * receptacle buffer at copyout time.
+ */
+ ncm = MAX_CPU_UNITS_PER_BOARD +
+ MAX_MEM_UNITS_PER_BOARD +
+ MAX_IO_UNITS_PER_BOARD;
+
+ } else {
+ /*
+ * In the case of c_type == SBD_COMP_NONE, and
+ * SBD_FLAG_ALLCMP not specified, only the board
+ * info is to be returned, no components.
+ */
+ ncm = 0;
+ devset = 0;
+ }
+ }
+
+ sz = sizeof (sbd_stat_t);
+ if (ncm > 1)
+ sz += sizeof (sbd_dev_stat_t) * (ncm - 1);
+
+
+ pbsz = (int)hp->h_sbdcmd.cmd_stat.s_nbytes;
+ pnstat = (pbsz - sizeof (sbd_stat_t)) / sizeof (sbd_dev_stat_t);
+
+ /*
+ * s_nbytes describes the size of the preallocated user
+ * buffer into which the application is execting to
+ * receive the sbd_stat_t and sbd_dev_stat_t structures.
+ */
+
+#ifdef _MULTI_DATAMODEL
+
+ /*
+ * More buffer space is required for the 64bit to 32bit
+ * conversion of data structures.
+ */
+ if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
+ sz32 = sizeof (sbd_stat32_t);
+ if (ncm > 1)
+ sz32 += sizeof (sbd_dev_stat32_t) * (ncm - 1);
+ pnstat = (pbsz - sizeof (sbd_stat32_t))/
+ sizeof (sbd_dev_stat32_t);
+ }
+
+ sz += sz32;
+#endif
+ /*
+ * Since one sbd_dev_stat_t is included in the sbd_stat_t,
+ * increment the plugin's nstat count.
+ */
+ ++pnstat;
+
+ if (bp->b_id == 0) {
+ bzero(&pstat, sizeof (pstat));
+ } else {
+ sbd_error_t *err;
+
+ err = drmach_status(bp->b_id, &pstat);
+ if (err) {
+ DRERR_SET_C(&hp->h_err, &err);
+ rv = EIO;
+ goto status_done;
+ }
+ }
+
+ dstatp = (sbd_stat_t *)(void *)GETSTRUCT(char, sz);
+
+ devstatp = &dstatp->s_stat[0];
+
+ dstatp->s_board = bp->b_num;
+
+ /*
+ * Detect transitions between empty and disconnected.
+ */
+ if (!pstat.empty && (bp->b_rstate == SBD_STAT_EMPTY))
+ bp->b_rstate = SBD_STAT_DISCONNECTED;
+ else if (pstat.empty && (bp->b_rstate == SBD_STAT_DISCONNECTED))
+ bp->b_rstate = SBD_STAT_EMPTY;
+
+ dstatp->s_rstate = bp->b_rstate;
+ dstatp->s_ostate = bp->b_ostate;
+ dstatp->s_cond = bp->b_cond = pstat.cond;
+ dstatp->s_busy = bp->b_busy | pstat.busy;
+ dstatp->s_time = bp->b_time;
+ dstatp->s_power = pstat.powered;
+ dstatp->s_assigned = bp->b_assigned = pstat.assigned;
+ dstatp->s_nstat = nstat = 0;
+ bcopy(&pstat.type[0], &dstatp->s_type[0], SBD_TYPE_LEN);
+ bcopy(&pstat.info[0], &dstatp->s_info[0], SBD_MAX_INFO);
+
+ devset &= DR_DEVS_PRESENT(bp);
+ if (devset == 0) {
+ /*
+ * No device chosen.
+ */
+ PR_ALL("%s: no device present\n", f);
+ }
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_CPU, DEVSET_ANYUNIT))
+ if ((nstat = dr_cpu_status(hp, devset, devstatp)) > 0) {
+ dstatp->s_nstat += nstat;
+ devstatp += nstat;
+ }
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_MEM, DEVSET_ANYUNIT))
+ if ((nstat = dr_mem_status(hp, devset, devstatp)) > 0) {
+ dstatp->s_nstat += nstat;
+ devstatp += nstat;
+ }
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_IO, DEVSET_ANYUNIT))
+ if ((nstat = dr_io_status(hp, devset, devstatp)) > 0) {
+ dstatp->s_nstat += nstat;
+ devstatp += nstat;
+ }
+
+ /*
+ * Due to a possible change in number of components between
+ * the time of plugin's GETNCM call and now, there may be
+ * more or less components than the plugin's buffer can
+ * hold. Adjust s_nstat accordingly.
+ */
+
+ dstatp->s_nstat = dstatp->s_nstat > pnstat ? pnstat : dstatp->s_nstat;
+
+#ifdef _MULTI_DATAMODEL
+ if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
+ int i, j;
+ sbd_stat32_t *dstat32p;
+
+ dstat32p = (sbd_stat32_t *)devstatp;
+
+ /* Alignment Paranoia */
+ if ((ulong_t)dstat32p & 0x1) {
+ PR_ALL("%s: alignment: sz=0x%lx dstat32p=0x%p\n",
+ f, sizeof (sbd_stat32_t), (void *)dstat32p);
+ DR_OP_INTERNAL_ERROR(hp);
+ rv = EINVAL;
+ goto status_done;
+ }
+
+ /* paranoia: detect buffer overrun */
+ if ((caddr_t)&dstat32p->s_stat[dstatp->s_nstat] >
+ ((caddr_t)dstatp) + sz) {
+ DR_OP_INTERNAL_ERROR(hp);
+ rv = EINVAL;
+ goto status_done;
+ }
+
+ /* copy sbd_stat_t structure members */
+#define _SBD_STAT(t, m) dstat32p->m = (t)dstatp->m
+ _SBD_STAT(int32_t, s_board);
+ _SBD_STAT(int32_t, s_rstate);
+ _SBD_STAT(int32_t, s_ostate);
+ _SBD_STAT(int32_t, s_cond);
+ _SBD_STAT(int32_t, s_busy);
+ _SBD_STAT(time32_t, s_time);
+ _SBD_STAT(uint32_t, s_power);
+ _SBD_STAT(uint32_t, s_assigned);
+ _SBD_STAT(int32_t, s_nstat);
+ bcopy(&dstatp->s_type[0], &dstat32p->s_type[0],
+ SBD_TYPE_LEN);
+ bcopy(&dstatp->s_info[0], &dstat32p->s_info[0],
+ SBD_MAX_INFO);
+#undef _SBD_STAT
+
+ for (i = 0; i < dstatp->s_nstat; i++) {
+ sbd_dev_stat_t *dsp = &dstatp->s_stat[i];
+ sbd_dev_stat32_t *ds32p = &dstat32p->s_stat[i];
+#define _SBD_DEV_STAT(t, m) ds32p->m = (t)dsp->m
+
+ /* copy sbd_cm_stat_t structure members */
+ _SBD_DEV_STAT(int32_t, ds_type);
+ _SBD_DEV_STAT(int32_t, ds_unit);
+ _SBD_DEV_STAT(int32_t, ds_ostate);
+ _SBD_DEV_STAT(int32_t, ds_cond);
+ _SBD_DEV_STAT(int32_t, ds_busy);
+ _SBD_DEV_STAT(int32_t, ds_suspend);
+ _SBD_DEV_STAT(time32_t, ds_time);
+ bcopy(&dsp->ds_name[0], &ds32p->ds_name[0],
+ OBP_MAXPROPNAME);
+
+ switch (dsp->ds_type) {
+ case SBD_COMP_CPU:
+ /* copy sbd_cpu_stat_t structure members */
+ _SBD_DEV_STAT(int32_t, d_cpu.cs_isbootproc);
+ _SBD_DEV_STAT(int32_t, d_cpu.cs_cpuid);
+ _SBD_DEV_STAT(int32_t, d_cpu.cs_speed);
+ _SBD_DEV_STAT(int32_t, d_cpu.cs_ecache);
+ break;
+
+ case SBD_COMP_MEM:
+ /* copy sbd_mem_stat_t structure members */
+ _SBD_DEV_STAT(int32_t, d_mem.ms_interleave);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_basepfn);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_totpages);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_detpages);
+ _SBD_DEV_STAT(int32_t, d_mem.ms_pageslost);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_managed_pages);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_noreloc_pages);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_noreloc_first);
+ _SBD_DEV_STAT(uint32_t, d_mem.ms_noreloc_last);
+ _SBD_DEV_STAT(int32_t, d_mem.ms_cage_enabled);
+ _SBD_DEV_STAT(int32_t, d_mem.ms_peer_is_target);
+ bcopy(&dsp->d_mem.ms_peer_ap_id[0],
+ &ds32p->d_mem.ms_peer_ap_id[0],
+ sizeof (ds32p->d_mem.ms_peer_ap_id));
+ break;
+
+ case SBD_COMP_IO:
+ /* copy sbd_io_stat_t structure members */
+ _SBD_DEV_STAT(int32_t, d_io.is_referenced);
+ _SBD_DEV_STAT(int32_t, d_io.is_unsafe_count);
+
+ for (j = 0; j < SBD_MAX_UNSAFE; j++)
+ _SBD_DEV_STAT(int32_t,
+ d_io.is_unsafe_list[j]);
+
+ bcopy(&dsp->d_io.is_pathname[0],
+ &ds32p->d_io.is_pathname[0], MAXPATHLEN);
+ break;
+
+ case SBD_COMP_CMP:
+ /* copy sbd_cmp_stat_t structure members */
+ bcopy(&dsp->d_cmp.ps_cpuid[0],
+ &ds32p->d_cmp.ps_cpuid[0],
+ sizeof (ds32p->d_cmp.ps_cpuid));
+ _SBD_DEV_STAT(int32_t, d_cmp.ps_ncores);
+ _SBD_DEV_STAT(int32_t, d_cmp.ps_speed);
+ _SBD_DEV_STAT(int32_t, d_cmp.ps_ecache);
+ break;
+
+ default:
+ cmn_err(CE_WARN, "%s: unknown dev type (%d)",
+ f, (int)dsp->ds_type);
+ rv = EFAULT;
+ goto status_done;
+ }
+#undef _SBD_DEV_STAT
+ }
+
+
+ if (ddi_copyout((void *)dstat32p,
+ hp->h_sbdcmd.cmd_stat.s_statp, pbsz, mode) != 0) {
+ cmn_err(CE_WARN,
+ "%s: failed to copyout status "
+ "for board %d", f, bp->b_num);
+ rv = EFAULT;
+ goto status_done;
+ }
+ } else
+#endif /* _MULTI_DATAMODEL */
+
+ if (ddi_copyout((void *)dstatp, hp->h_sbdcmd.cmd_stat.s_statp,
+ pbsz, mode) != 0) {
+ cmn_err(CE_WARN,
+ "%s: failed to copyout status for board %d",
+ f, bp->b_num);
+ rv = EFAULT;
+ goto status_done;
+ }
+
+status_done:
+ if (dstatp != NULL)
+ FREESTRUCT(dstatp, char, sz);
+
+ dr_unlock_status(bp);
+
+ return (rv);
+}
+
+static int
+dr_get_ncm(dr_handle_t *hp)
+{
+ int i;
+ int ncm = 0;
+ dr_devset_t devset;
+
+ devset = DR_DEVS_PRESENT(hp->h_bd);
+ if (hp->h_sbdcmd.cmd_cm.c_id.c_type != SBD_COMP_NONE)
+ devset &= DEVSET(hp->h_sbdcmd.cmd_cm.c_id.c_type,
+ DEVSET_ANYUNIT);
+
+ /*
+ * Handle CPUs first to deal with possible CMP
+ * devices. If the CPU is a CMP, we need to only
+ * increment ncm once even if there are multiple
+ * cores for that CMP present in the devset.
+ */
+ for (i = 0; i < MAX_CMP_UNITS_PER_BOARD; i++) {
+ if (devset & DEVSET(SBD_COMP_CMP, i)) {
+ ncm++;
+ }
+ }
+
+ /* eliminate the CPU information from the devset */
+ devset &= ~(DEVSET(SBD_COMP_CMP, DEVSET_ANYUNIT));
+
+ for (i = 0; i < (sizeof (dr_devset_t) * 8); i++) {
+ ncm += devset & 0x1;
+ devset >>= 1;
+ }
+
+ return (ncm);
+}
+
+/* used by dr_mem.c */
+/* TODO: eliminate dr_boardlist */
+dr_board_t *
+dr_lookup_board(int board_num)
+{
+ dr_board_t *bp;
+
+ ASSERT(board_num >= 0 && board_num < MAX_BOARDS);
+
+ bp = &dr_boardlist[board_num];
+ ASSERT(bp->b_num == board_num);
+
+ return (bp);
+}
+
+static dr_dev_unit_t *
+dr_get_dev_unit(dr_board_t *bp, sbd_comp_type_t nt, int unit_num)
+{
+ dr_dev_unit_t *dp;
+
+ dp = DR_GET_BOARD_DEVUNIT(bp, nt, unit_num);
+ ASSERT(dp->du_common.sbdev_bp == bp);
+ ASSERT(dp->du_common.sbdev_unum == unit_num);
+ ASSERT(dp->du_common.sbdev_type == nt);
+
+ return (dp);
+}
+
+dr_cpu_unit_t *
+dr_get_cpu_unit(dr_board_t *bp, int unit_num)
+{
+ dr_dev_unit_t *dp;
+
+ ASSERT(unit_num >= 0 && unit_num < MAX_CPU_UNITS_PER_BOARD);
+
+ dp = dr_get_dev_unit(bp, SBD_COMP_CPU, unit_num);
+ return (&dp->du_cpu);
+}
+
+dr_mem_unit_t *
+dr_get_mem_unit(dr_board_t *bp, int unit_num)
+{
+ dr_dev_unit_t *dp;
+
+ ASSERT(unit_num >= 0 && unit_num < MAX_MEM_UNITS_PER_BOARD);
+
+ dp = dr_get_dev_unit(bp, SBD_COMP_MEM, unit_num);
+ return (&dp->du_mem);
+}
+
+dr_io_unit_t *
+dr_get_io_unit(dr_board_t *bp, int unit_num)
+{
+ dr_dev_unit_t *dp;
+
+ ASSERT(unit_num >= 0 && unit_num < MAX_IO_UNITS_PER_BOARD);
+
+ dp = dr_get_dev_unit(bp, SBD_COMP_IO, unit_num);
+ return (&dp->du_io);
+}
+
+dr_common_unit_t *
+dr_get_common_unit(dr_board_t *bp, sbd_comp_type_t nt, int unum)
+{
+ dr_dev_unit_t *dp;
+
+ dp = dr_get_dev_unit(bp, nt, unum);
+ return (&dp->du_common);
+}
+
+static dr_devset_t
+dr_dev2devset(sbd_comp_id_t *cid)
+{
+ static fn_t f = "dr_dev2devset";
+
+ dr_devset_t devset;
+ int unit = cid->c_unit;
+
+ switch (cid->c_type) {
+ case SBD_COMP_NONE:
+ devset = DEVSET(SBD_COMP_CPU, DEVSET_ANYUNIT);
+ devset |= DEVSET(SBD_COMP_MEM, DEVSET_ANYUNIT);
+ devset |= DEVSET(SBD_COMP_IO, DEVSET_ANYUNIT);
+ PR_ALL("%s: COMP_NONE devset = " DEVSET_FMT_STR "\n",
+ f, DEVSET_FMT_ARG(devset));
+ break;
+
+ case SBD_COMP_CPU:
+ if ((unit > MAX_CPU_UNITS_PER_BOARD) || (unit < 0)) {
+ cmn_err(CE_WARN,
+ "%s: invalid cpu unit# = %d",
+ f, unit);
+ devset = 0;
+ } else {
+ /*
+ * Generate a devset that includes all the
+ * cores of a CMP device. If this is not a
+ * CMP, the extra cores will be eliminated
+ * later since they are not present. This is
+ * also true for CMP devices that do not have
+ * all cores active.
+ */
+ devset = DEVSET(SBD_COMP_CMP, unit);
+ }
+
+ PR_ALL("%s: CPU devset = " DEVSET_FMT_STR "\n",
+ f, DEVSET_FMT_ARG(devset));
+ break;
+
+ case SBD_COMP_MEM:
+ if (unit == SBD_NULL_UNIT) {
+ unit = 0;
+ cid->c_unit = 0;
+ }
+
+ if ((unit > MAX_MEM_UNITS_PER_BOARD) || (unit < 0)) {
+ cmn_err(CE_WARN,
+ "%s: invalid mem unit# = %d",
+ f, unit);
+ devset = 0;
+ } else
+ devset = DEVSET(cid->c_type, unit);
+
+ PR_ALL("%s: MEM devset = " DEVSET_FMT_STR "\n",
+ f, DEVSET_FMT_ARG(devset));
+ break;
+
+ case SBD_COMP_IO:
+ if ((unit > MAX_IO_UNITS_PER_BOARD) || (unit < 0)) {
+ cmn_err(CE_WARN,
+ "%s: invalid io unit# = %d",
+ f, unit);
+ devset = 0;
+ } else
+ devset = DEVSET(cid->c_type, unit);
+
+ PR_ALL("%s: IO devset = " DEVSET_FMT_STR "\n",
+ f, DEVSET_FMT_ARG(devset));
+ break;
+
+ default:
+ case SBD_COMP_UNKNOWN:
+ devset = 0;
+ break;
+ }
+
+ return (devset);
+}
+
+/*
+ * Converts a dynamic attachment point name to a SBD_COMP_* type.
+ * Returns SDB_COMP_UNKNOWN if name is not recognized.
+ */
+static int
+dr_dev_type_to_nt(char *type)
+{
+ int i;
+
+ for (i = 0; dr_devattr[i].s_nodetype != SBD_COMP_UNKNOWN; i++)
+ if (strcmp(dr_devattr[i].s_devtype, type) == 0)
+ break;
+
+ return (dr_devattr[i].s_nodetype);
+}
+
+/*
+ * Converts a SBD_COMP_* type to a dynamic attachment point name.
+ * Return NULL if SBD_COMP_ type is not recognized.
+ */
+char *
+dr_nt_to_dev_type(int nt)
+{
+ int i;
+
+ for (i = 0; dr_devattr[i].s_nodetype != SBD_COMP_UNKNOWN; i++)
+ if (dr_devattr[i].s_nodetype == nt)
+ break;
+
+ return (dr_devattr[i].s_devtype);
+}
+
+/*
+ * State transition policy is that if there is some component for which
+ * the state transition is valid, then let it through. The exception is
+ * SBD_CMD_DISCONNECT. On disconnect, the state transition must be valid
+ * for ALL components.
+ * Returns the state that is in error, if any.
+ */
+static int
+dr_check_transition(dr_board_t *bp, dr_devset_t *devsetp,
+ struct dr_state_trans *transp, int cmd)
+{
+ int s, ut;
+ int state_err = 0;
+ dr_devset_t devset;
+ dr_common_unit_t *cp;
+ static fn_t f = "dr_check_transition";
+
+ devset = *devsetp;
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_CPU, DEVSET_ANYUNIT)) {
+ for (ut = 0; ut < MAX_CPU_UNITS_PER_BOARD; ut++) {
+ if (DEVSET_IN_SET(devset, SBD_COMP_CPU, ut) == 0)
+ continue;
+
+ cp = dr_get_common_unit(bp, SBD_COMP_CPU, ut);
+ s = (int)cp->sbdev_state;
+ if (!DR_DEV_IS_PRESENT(cp)) {
+ DEVSET_DEL(devset, SBD_COMP_CPU, ut);
+ } else {
+ if (transp->x_op[s].x_rv) {
+ if (!state_err)
+ state_err = s;
+ DEVSET_DEL(devset, SBD_COMP_CPU, ut);
+ }
+ }
+ }
+ }
+ if (DEVSET_IN_SET(devset, SBD_COMP_MEM, DEVSET_ANYUNIT)) {
+ for (ut = 0; ut < MAX_MEM_UNITS_PER_BOARD; ut++) {
+ if (DEVSET_IN_SET(devset, SBD_COMP_MEM, ut) == 0)
+ continue;
+
+ cp = dr_get_common_unit(bp, SBD_COMP_MEM, ut);
+ s = (int)cp->sbdev_state;
+ if (!DR_DEV_IS_PRESENT(cp)) {
+ DEVSET_DEL(devset, SBD_COMP_MEM, ut);
+ } else {
+ if (transp->x_op[s].x_rv) {
+ if (!state_err)
+ state_err = s;
+ DEVSET_DEL(devset, SBD_COMP_MEM, ut);
+ }
+ }
+ }
+ }
+ if (DEVSET_IN_SET(devset, SBD_COMP_IO, DEVSET_ANYUNIT)) {
+ for (ut = 0; ut < MAX_IO_UNITS_PER_BOARD; ut++) {
+ if (DEVSET_IN_SET(devset, SBD_COMP_IO, ut) == 0)
+ continue;
+
+ cp = dr_get_common_unit(bp, SBD_COMP_IO, ut);
+ s = (int)cp->sbdev_state;
+ if (!DR_DEV_IS_PRESENT(cp)) {
+ DEVSET_DEL(devset, SBD_COMP_IO, ut);
+ } else {
+ if (transp->x_op[s].x_rv) {
+ if (!state_err)
+ state_err = s;
+ DEVSET_DEL(devset, SBD_COMP_IO, ut);
+ }
+ }
+ }
+ }
+
+ PR_ALL("%s: requested devset = 0x%x, final devset = 0x%x\n",
+ f, (uint_t)*devsetp, (uint_t)devset);
+
+ *devsetp = devset;
+ /*
+ * If there are some remaining components for which
+ * this state transition is valid, then allow them
+ * through, otherwise if none are left then return
+ * the state error. The exception is SBD_CMD_DISCONNECT.
+ * On disconnect, the state transition must be valid for ALL
+ * components.
+ */
+ if (cmd == SBD_CMD_DISCONNECT)
+ return (state_err);
+ return (devset ? 0 : state_err);
+}
+
+void
+dr_device_transition(dr_common_unit_t *cp, dr_state_t st)
+{
+ PR_STATE("%s STATE %s(%d) -> %s(%d)\n",
+ cp->sbdev_path,
+ state_str[cp->sbdev_state], cp->sbdev_state,
+ state_str[st], st);
+
+ cp->sbdev_state = st;
+ if (st == DR_STATE_CONFIGURED) {
+ cp->sbdev_ostate = SBD_STAT_CONFIGURED;
+ if (cp->sbdev_bp->b_ostate != SBD_STAT_CONFIGURED) {
+ cp->sbdev_bp->b_ostate = SBD_STAT_CONFIGURED;
+ (void) drv_getparm(TIME,
+ (void *) &cp->sbdev_bp->b_time);
+ }
+ } else
+ cp->sbdev_ostate = SBD_STAT_UNCONFIGURED;
+
+ (void) drv_getparm(TIME, (void *) &cp->sbdev_time);
+}
+
+static void
+dr_board_transition(dr_board_t *bp, dr_state_t st)
+{
+ PR_STATE("BOARD %d STATE: %s(%d) -> %s(%d)\n",
+ bp->b_num,
+ state_str[bp->b_state], bp->b_state,
+ state_str[st], st);
+
+ bp->b_state = st;
+}
+
+void
+dr_op_err(int ce, dr_handle_t *hp, int code, char *fmt, ...)
+{
+ sbd_error_t *err;
+ va_list args;
+
+ va_start(args, fmt);
+ err = drerr_new_v(code, fmt, args);
+ va_end(args);
+
+ if (ce != CE_IGNORE)
+ sbd_err_log(err, ce);
+
+ DRERR_SET_C(&hp->h_err, &err);
+}
+
+void
+dr_dev_err(int ce, dr_common_unit_t *cp, int code)
+{
+ sbd_error_t *err;
+
+ err = drerr_new(0, code, cp->sbdev_path, NULL);
+
+ if (ce != CE_IGNORE)
+ sbd_err_log(err, ce);
+
+ DRERR_SET_C(&cp->sbdev_error, &err);
+}
+
+/*
+ * A callback routine. Called from the drmach layer as a result of
+ * call to drmach_board_find_devices from dr_init_devlists.
+ */
+static sbd_error_t *
+dr_dev_found(void *data, const char *name, int unum, drmachid_t id)
+{
+ dr_board_t *bp = data;
+ dr_dev_unit_t *dp;
+ int nt;
+ static fn_t f = "dr_dev_found";
+
+ PR_ALL("%s (board = %d, name = %s, unum = %d, id = %p)...\n",
+ f, bp->b_num, name, unum, id);
+
+ nt = dr_dev_type_to_nt((char *)name);
+ if (nt == SBD_COMP_UNKNOWN) {
+ /*
+ * this should not happen. When it does, it indicates
+ * a missmatch in devices supported by the drmach layer
+ * vs devices supported by this layer.
+ */
+ return (DR_INTERNAL_ERROR());
+ }
+
+ dp = DR_GET_BOARD_DEVUNIT(bp, nt, unum);
+
+ /* sanity check */
+ ASSERT(dp->du_common.sbdev_bp == bp);
+ ASSERT(dp->du_common.sbdev_unum == unum);
+ ASSERT(dp->du_common.sbdev_type == nt);
+
+ /* render dynamic attachment point path of this unit */
+ (void) snprintf(dp->du_common.sbdev_path,
+ sizeof (dp->du_common.sbdev_path), "%s::%s%d",
+ bp->b_path, name, DR_UNUM2SBD_UNUM(unum, nt));
+
+ dp->du_common.sbdev_id = id;
+ DR_DEV_SET_PRESENT(&dp->du_common);
+
+ bp->b_ndev++;
+
+ return (NULL);
+}
+
+static sbd_error_t *
+dr_init_devlists(dr_board_t *bp)
+{
+ int i;
+ sbd_error_t *err;
+ dr_dev_unit_t *dp;
+ static fn_t f = "dr_init_devlists";
+
+ PR_ALL("%s (%s)...\n", f, bp->b_path);
+
+ /* sanity check */
+ ASSERT(bp->b_ndev == 0);
+
+ DR_DEVS_DISCONNECT(bp, (uint_t)-1);
+
+ /*
+ * This routine builds the board's devlist and initializes
+ * the common portion of the unit data structures.
+ * Note: because the common portion is considered
+ * uninitialized, the dr_get_*_unit() routines can not
+ * be used.
+ */
+
+ /*
+ * Clear out old entries, if any.
+ */
+ for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+ dp = DR_GET_BOARD_DEVUNIT(bp, SBD_COMP_CPU, i);
+
+ bzero(dp, sizeof (*dp));
+ dp->du_common.sbdev_bp = bp;
+ dp->du_common.sbdev_unum = i;
+ dp->du_common.sbdev_type = SBD_COMP_CPU;
+ }
+
+ for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+ dp = DR_GET_BOARD_DEVUNIT(bp, SBD_COMP_MEM, i);
+
+ bzero(dp, sizeof (*dp));
+ dp->du_common.sbdev_bp = bp;
+ dp->du_common.sbdev_unum = i;
+ dp->du_common.sbdev_type = SBD_COMP_MEM;
+ }
+
+ for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+ dp = DR_GET_BOARD_DEVUNIT(bp, SBD_COMP_IO, i);
+
+ bzero(dp, sizeof (*dp));
+ dp->du_common.sbdev_bp = bp;
+ dp->du_common.sbdev_unum = i;
+ dp->du_common.sbdev_type = SBD_COMP_IO;
+ }
+
+ err = NULL;
+ if (bp->b_id) {
+ /* find devices on this board */
+ err = drmach_board_find_devices(
+ bp->b_id, bp, dr_dev_found);
+ }
+
+ return (err);
+}
+
+/*
+ * Return the unit number of the respective drmachid if
+ * it's found to be attached.
+ */
+static int
+dr_check_unit_attached(dr_common_unit_t *cp)
+{
+ int rv = 0;
+ processorid_t cpuid;
+ uint64_t basepa, endpa;
+ struct memlist *ml;
+ extern struct memlist *phys_install;
+ sbd_error_t *err;
+ int yes;
+ static fn_t f = "dr_check_unit_attached";
+
+ switch (cp->sbdev_type) {
+ case SBD_COMP_CPU:
+ err = drmach_cpu_get_id(cp->sbdev_id, &cpuid);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ rv = -1;
+ break;
+ }
+ mutex_enter(&cpu_lock);
+ if (cpu_get(cpuid) == NULL)
+ rv = -1;
+ mutex_exit(&cpu_lock);
+ break;
+
+ case SBD_COMP_MEM:
+ err = drmach_mem_get_slice_info(cp->sbdev_id,
+ &basepa, &endpa, NULL);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ rv = -1;
+ break;
+ }
+
+ /*
+ * Check if base address is in phys_install.
+ */
+ memlist_read_lock();
+ for (ml = phys_install; ml; ml = ml->ml_next)
+ if ((endpa <= ml->ml_address) ||
+ (basepa >= (ml->ml_address + ml->ml_size)))
+ continue;
+ else
+ break;
+ memlist_read_unlock();
+ if (ml == NULL)
+ rv = -1;
+ break;
+
+ case SBD_COMP_IO:
+ err = drmach_io_is_attached(cp->sbdev_id, &yes);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ rv = -1;
+ break;
+ } else if (!yes)
+ rv = -1;
+ break;
+
+ default:
+ PR_ALL("%s: unexpected nodetype(%d) for id 0x%p\n",
+ f, cp->sbdev_type, cp->sbdev_id);
+ rv = -1;
+ break;
+ }
+
+ return (rv);
+}
+
+/*
+ * See if drmach recognizes the passthru command. DRMACH expects the
+ * id to identify the thing to which the command is being applied. Using
+ * nonsense SBD terms, that information has been perversely encoded in the
+ * c_id member of the sbd_cmd_t structure. This logic reads those tea
+ * leaves, finds the associated drmach id, then calls drmach to process
+ * the passthru command.
+ */
+static int
+dr_pt_try_drmach(dr_handle_t *hp)
+{
+ dr_board_t *bp = hp->h_bd;
+ sbd_comp_id_t *comp_id = &hp->h_sbdcmd.cmd_cm.c_id;
+ drmachid_t id;
+
+ if (comp_id->c_type == SBD_COMP_NONE) {
+ id = bp->b_id;
+ } else {
+ sbd_comp_type_t nt;
+
+ nt = dr_dev_type_to_nt(comp_id->c_name);
+ if (nt == SBD_COMP_UNKNOWN) {
+ dr_op_err(CE_IGNORE, hp, ESBD_INVAL, comp_id->c_name);
+ id = 0;
+ } else {
+ /* pt command applied to dynamic attachment point */
+ dr_common_unit_t *cp;
+ cp = dr_get_common_unit(bp, nt, comp_id->c_unit);
+ id = cp->sbdev_id;
+ }
+ }
+
+ if (hp->h_err == NULL)
+ hp->h_err = drmach_passthru(id, &hp->h_opts);
+
+ return (hp->h_err == NULL ? 0 : -1);
+}
+
+static int
+dr_pt_ioctl(dr_handle_t *hp)
+{
+ int cmd, rv, len;
+ int32_t sz;
+ int found;
+ char *copts;
+ static fn_t f = "dr_pt_ioctl";
+
+ PR_ALL("%s...\n", f);
+
+ sz = hp->h_opts.size;
+ copts = hp->h_opts.copts;
+
+ if (sz == 0 || copts == (char *)NULL) {
+ cmn_err(CE_WARN, "%s: invalid passthru args", f);
+ return (EINVAL);
+ }
+
+ found = 0;
+ for (cmd = 0; cmd < (sizeof (pt_arr) / sizeof (pt_arr[0])); cmd++) {
+ len = strlen(pt_arr[cmd].pt_name);
+ found = (strncmp(pt_arr[cmd].pt_name, copts, len) == 0);
+ if (found)
+ break;
+ }
+
+ if (found)
+ rv = (*pt_arr[cmd].pt_func)(hp);
+ else
+ rv = dr_pt_try_drmach(hp);
+
+ return (rv);
+}
+
+/*
+ * Called at driver load time to determine the state and condition
+ * of an existing board in the system.
+ */
+static void
+dr_board_discovery(dr_board_t *bp)
+{
+ int i;
+ dr_devset_t devs_lost, devs_attached = 0;
+ dr_cpu_unit_t *cp;
+ dr_mem_unit_t *mp;
+ dr_io_unit_t *ip;
+ static fn_t f = "dr_board_discovery";
+
+ if (DR_DEVS_PRESENT(bp) == 0) {
+ PR_ALL("%s: board %d has no devices present\n",
+ f, bp->b_num);
+ return;
+ }
+
+ /*
+ * Check for existence of cpus.
+ */
+ for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+ cp = dr_get_cpu_unit(bp, i);
+
+ if (!DR_DEV_IS_PRESENT(&cp->sbc_cm))
+ continue;
+
+ if (dr_check_unit_attached(&cp->sbc_cm) >= 0) {
+ DR_DEV_SET_ATTACHED(&cp->sbc_cm);
+ DEVSET_ADD(devs_attached, SBD_COMP_CPU, i);
+ PR_ALL("%s: board %d, cpu-unit %d - attached\n",
+ f, bp->b_num, i);
+ }
+ dr_init_cpu_unit(cp);
+ }
+
+ /*
+ * Check for existence of memory.
+ */
+ for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+ mp = dr_get_mem_unit(bp, i);
+
+ if (!DR_DEV_IS_PRESENT(&mp->sbm_cm))
+ continue;
+
+ if (dr_check_unit_attached(&mp->sbm_cm) >= 0) {
+ DR_DEV_SET_ATTACHED(&mp->sbm_cm);
+ DEVSET_ADD(devs_attached, SBD_COMP_MEM, i);
+ PR_ALL("%s: board %d, mem-unit %d - attached\n",
+ f, bp->b_num, i);
+ }
+ dr_init_mem_unit(mp);
+ }
+
+ /*
+ * Check for i/o state.
+ */
+ for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+ ip = dr_get_io_unit(bp, i);
+
+ if (!DR_DEV_IS_PRESENT(&ip->sbi_cm))
+ continue;
+
+ if (dr_check_unit_attached(&ip->sbi_cm) >= 0) {
+ /*
+ * Found it!
+ */
+ DR_DEV_SET_ATTACHED(&ip->sbi_cm);
+ DEVSET_ADD(devs_attached, SBD_COMP_IO, i);
+ PR_ALL("%s: board %d, io-unit %d - attached\n",
+ f, bp->b_num, i);
+ }
+ dr_init_io_unit(ip);
+ }
+
+ DR_DEVS_CONFIGURE(bp, devs_attached);
+ if (devs_attached && ((devs_lost = DR_DEVS_UNATTACHED(bp)) != 0)) {
+ int ut;
+
+ /*
+ * It is not legal on board discovery to have a
+ * board that is only partially attached. A board
+ * is either all attached or all connected. If a
+ * board has at least one attached device, then
+ * the the remaining devices, if any, must have
+ * been lost or disconnected. These devices can
+ * only be recovered by a full attach from scratch.
+ * Note that devices previously in the unreferenced
+ * state are subsequently lost until the next full
+ * attach. This is necessary since the driver unload
+ * that must have occurred would have wiped out the
+ * information necessary to re-configure the device
+ * back online, e.g. memlist.
+ */
+ PR_ALL("%s: some devices LOST (" DEVSET_FMT_STR ")...\n",
+ f, DEVSET_FMT_ARG(devs_lost));
+
+ for (ut = 0; ut < MAX_CPU_UNITS_PER_BOARD; ut++) {
+ if (!DEVSET_IN_SET(devs_lost, SBD_COMP_CPU, ut))
+ continue;
+
+ cp = dr_get_cpu_unit(bp, ut);
+ dr_device_transition(&cp->sbc_cm, DR_STATE_EMPTY);
+ }
+
+ for (ut = 0; ut < MAX_MEM_UNITS_PER_BOARD; ut++) {
+ if (!DEVSET_IN_SET(devs_lost, SBD_COMP_MEM, ut))
+ continue;
+
+ mp = dr_get_mem_unit(bp, ut);
+ dr_device_transition(&mp->sbm_cm, DR_STATE_EMPTY);
+ }
+
+ for (ut = 0; ut < MAX_IO_UNITS_PER_BOARD; ut++) {
+ if (!DEVSET_IN_SET(devs_lost, SBD_COMP_IO, ut))
+ continue;
+
+ ip = dr_get_io_unit(bp, ut);
+ dr_device_transition(&ip->sbi_cm, DR_STATE_EMPTY);
+ }
+
+ DR_DEVS_DISCONNECT(bp, devs_lost);
+ }
+}
+
+static int
+dr_board_init(dr_board_t *bp, dev_info_t *dip, int bd)
+{
+ sbd_error_t *err;
+
+ mutex_init(&bp->b_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&bp->b_slock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&bp->b_scv, NULL, CV_DRIVER, NULL);
+ bp->b_rstate = SBD_STAT_EMPTY;
+ bp->b_ostate = SBD_STAT_UNCONFIGURED;
+ bp->b_cond = SBD_COND_UNKNOWN;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+
+ (void) drmach_board_lookup(bd, &bp->b_id);
+ bp->b_num = bd;
+ bp->b_dip = dip;
+
+ bp->b_dev[DEVSET_NIX(SBD_COMP_CPU)] = GETSTRUCT(dr_dev_unit_t,
+ MAX_CPU_UNITS_PER_BOARD);
+
+ bp->b_dev[DEVSET_NIX(SBD_COMP_MEM)] = GETSTRUCT(dr_dev_unit_t,
+ MAX_MEM_UNITS_PER_BOARD);
+
+ bp->b_dev[DEVSET_NIX(SBD_COMP_IO)] = GETSTRUCT(dr_dev_unit_t,
+ MAX_IO_UNITS_PER_BOARD);
+
+ /*
+ * Initialize the devlists
+ */
+ err = dr_init_devlists(bp);
+ if (err) {
+ sbd_err_clear(&err);
+ dr_board_destroy(bp);
+ return (-1);
+ } else if (bp->b_ndev == 0) {
+ dr_board_transition(bp, DR_STATE_EMPTY);
+ } else {
+ /*
+ * Couldn't have made it down here without
+ * having found at least one device.
+ */
+ ASSERT(DR_DEVS_PRESENT(bp) != 0);
+ /*
+ * Check the state of any possible devices on the
+ * board.
+ */
+ dr_board_discovery(bp);
+
+ bp->b_assigned = 1;
+
+ if (DR_DEVS_UNATTACHED(bp) == 0) {
+ /*
+ * The board has no unattached devices, therefore
+ * by reason of insanity it must be configured!
+ */
+ dr_board_transition(bp, DR_STATE_CONFIGURED);
+ bp->b_ostate = SBD_STAT_CONFIGURED;
+ bp->b_rstate = SBD_STAT_CONNECTED;
+ bp->b_cond = SBD_COND_OK;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+ } else if (DR_DEVS_ATTACHED(bp)) {
+ dr_board_transition(bp, DR_STATE_PARTIAL);
+ bp->b_ostate = SBD_STAT_CONFIGURED;
+ bp->b_rstate = SBD_STAT_CONNECTED;
+ bp->b_cond = SBD_COND_OK;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+ } else {
+ dr_board_transition(bp, DR_STATE_CONNECTED);
+ bp->b_rstate = SBD_STAT_CONNECTED;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+ }
+ }
+
+ return (0);
+}
+
+static void
+dr_board_destroy(dr_board_t *bp)
+{
+ PR_ALL("dr_board_destroy: num %d, path %s\n",
+ bp->b_num, bp->b_path);
+
+ dr_board_transition(bp, DR_STATE_EMPTY);
+ bp->b_rstate = SBD_STAT_EMPTY;
+ (void) drv_getparm(TIME, (void *)&bp->b_time);
+
+ /*
+ * Free up MEM unit structs.
+ */
+ FREESTRUCT(bp->b_dev[DEVSET_NIX(SBD_COMP_MEM)],
+ dr_dev_unit_t, MAX_MEM_UNITS_PER_BOARD);
+ bp->b_dev[DEVSET_NIX(SBD_COMP_MEM)] = NULL;
+ /*
+ * Free up CPU unit structs.
+ */
+ FREESTRUCT(bp->b_dev[DEVSET_NIX(SBD_COMP_CPU)],
+ dr_dev_unit_t, MAX_CPU_UNITS_PER_BOARD);
+ bp->b_dev[DEVSET_NIX(SBD_COMP_CPU)] = NULL;
+ /*
+ * Free up IO unit structs.
+ */
+ FREESTRUCT(bp->b_dev[DEVSET_NIX(SBD_COMP_IO)],
+ dr_dev_unit_t, MAX_IO_UNITS_PER_BOARD);
+ bp->b_dev[DEVSET_NIX(SBD_COMP_IO)] = NULL;
+
+ mutex_destroy(&bp->b_lock);
+ mutex_destroy(&bp->b_slock);
+ cv_destroy(&bp->b_scv);
+
+ /*
+ * Reset the board structure to its initial state, otherwise it will
+ * cause trouble on the next call to dr_board_init() for the same board.
+ * dr_board_init() may be called multiple times for the same board
+ * if DR driver fails to initialize some boards.
+ */
+ bzero(bp, sizeof (*bp));
+}
+
+void
+dr_lock_status(dr_board_t *bp)
+{
+ mutex_enter(&bp->b_slock);
+ while (bp->b_sflags & DR_BSLOCK)
+ cv_wait(&bp->b_scv, &bp->b_slock);
+ bp->b_sflags |= DR_BSLOCK;
+ mutex_exit(&bp->b_slock);
+}
+
+void
+dr_unlock_status(dr_board_t *bp)
+{
+ mutex_enter(&bp->b_slock);
+ bp->b_sflags &= ~DR_BSLOCK;
+ cv_signal(&bp->b_scv);
+ mutex_exit(&bp->b_slock);
+}
+
+/*
+ * Extract flags passed via ioctl.
+ */
+int
+dr_cmd_flags(dr_handle_t *hp)
+{
+ return (hp->h_sbdcmd.cmd_cm.c_flags);
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr.conf b/usr/src/uts/i86pc/io/dr/dr.conf
new file mode 100644
index 0000000000..c694f5573f
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2009, Intel Corporation.
+# All rights reserved.
+#
+
+# Force attach driver to enable DR at boot time.
+ddi-forceattach=1;
diff --git a/usr/src/uts/i86pc/io/dr/dr_cpu.c b/usr/src/uts/i86pc/io/dr/dr_cpu.c
new file mode 100644
index 0000000000..a893310977
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_cpu.c
@@ -0,0 +1,885 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
+ * CPU support routines for DR
+ */
+
+#include <sys/note.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/dditypes.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/kmem.h>
+#include <sys/processor.h>
+#include <sys/cpuvar.h>
+#include <sys/promif.h>
+#include <sys/sysmacros.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+#include <sys/cpu_module.h>
+#include <sys/cmn_err.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+
+/* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */
+static char *dr_ie_fmt = "dr_cpu.c %d";
+
+int
+dr_cpu_unit_is_sane(dr_board_t *bp, dr_cpu_unit_t *cp)
+{
+#ifdef DEBUG
+ ASSERT(cp->sbc_cm.sbdev_bp == bp);
+ ASSERT(cp->sbc_cm.sbdev_type == SBD_COMP_CPU);
+#else
+ _NOTE(ARGUNUSED(bp))
+ _NOTE(ARGUNUSED(cp))
+#endif
+
+ return (1);
+}
+
+static int
+dr_errno2ecode(int error)
+{
+ int rv;
+
+ switch (error) {
+ case EBUSY:
+ rv = ESBD_BUSY;
+ break;
+ case EINVAL:
+ rv = ESBD_INVAL;
+ break;
+ case EALREADY:
+ rv = ESBD_ALREADY;
+ break;
+ case ENODEV:
+ rv = ESBD_NODEV;
+ break;
+ case ENOMEM:
+ rv = ESBD_NOMEM;
+ break;
+ default:
+ rv = ESBD_INVAL;
+ }
+
+ return (rv);
+}
+
+/*
+ * On x86, the "clock-frequency" and cache size device properties may be
+ * unavailable before CPU starts. If they are unavailabe, just set them to zero.
+ */
+static void
+dr_cpu_set_prop(dr_cpu_unit_t *cp)
+{
+ sbd_error_t *err;
+ dev_info_t *dip;
+ uint64_t clock_freq;
+ int ecache_size = 0;
+ char *cache_str = NULL;
+
+ err = drmach_get_dip(cp->sbc_cm.sbdev_id, &dip);
+ if (err) {
+ DRERR_SET_C(&cp->sbc_cm.sbdev_error, &err);
+ return;
+ }
+
+ if (dip == NULL) {
+ DR_DEV_INTERNAL_ERROR(&cp->sbc_cm);
+ return;
+ }
+
+ /* read in the CPU speed */
+ clock_freq = (unsigned int)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "clock-frequency", 0);
+
+ /*
+ * The ecache property string is not the same
+ * for all CPU implementations.
+ */
+ switch (cp->sbc_cpu_impl) {
+ case X86_CPU_IMPL_NEHALEM_EX:
+ cache_str = "l3-cache-size";
+ break;
+ default:
+ cmn_err(CE_WARN, "Unknown cpu implementation=0x%x",
+ cp->sbc_cpu_impl);
+ break;
+ }
+
+ if (cache_str != NULL) {
+ /* read in the ecache size */
+ /*
+ * If the property is not found in the CPU node,
+ * it has to be kept in the core or cmp node so
+ * we just keep looking.
+ */
+
+ ecache_size = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
+ cache_str, 0);
+ }
+
+ /* convert to the proper units */
+ cp->sbc_speed = (clock_freq + 500000) / 1000000;
+ cp->sbc_ecache = ecache_size / (1024 * 1024);
+}
+
+void
+dr_init_cpu_unit(dr_cpu_unit_t *cp)
+{
+ sbd_error_t *err;
+ dr_state_t new_state;
+ int cpuid;
+ int impl;
+
+ if (DR_DEV_IS_ATTACHED(&cp->sbc_cm)) {
+ new_state = DR_STATE_CONFIGURED;
+ cp->sbc_cm.sbdev_cond = SBD_COND_OK;
+ } else if (DR_DEV_IS_PRESENT(&cp->sbc_cm)) {
+ new_state = DR_STATE_CONNECTED;
+ cp->sbc_cm.sbdev_cond = SBD_COND_OK;
+ } else {
+ new_state = DR_STATE_EMPTY;
+ cp->sbc_cm.sbdev_cond = SBD_COND_UNKNOWN;
+ }
+
+ if (DR_DEV_IS_PRESENT(&cp->sbc_cm)) {
+ err = drmach_cpu_get_id(cp->sbc_cm.sbdev_id, &cpuid);
+ if (err) {
+ DRERR_SET_C(&cp->sbc_cm.sbdev_error, &err);
+ new_state = DR_STATE_FATAL;
+ goto done;
+ }
+
+ err = drmach_cpu_get_impl(cp->sbc_cm.sbdev_id, &impl);
+ if (err) {
+ DRERR_SET_C(&cp->sbc_cm.sbdev_error, &err);
+ new_state = DR_STATE_FATAL;
+ goto done;
+ }
+ } else {
+ cp->sbc_cpu_id = -1;
+ cp->sbc_cpu_impl = -1;
+ goto done;
+ }
+
+ cp->sbc_cpu_id = cpuid;
+ cp->sbc_cpu_impl = impl;
+
+ /* if true at init time, it must always be true */
+ ASSERT(dr_cpu_unit_is_sane(cp->sbc_cm.sbdev_bp, cp));
+
+ mutex_enter(&cpu_lock);
+ if ((cpuid >= 0) && cpu[cpuid])
+ cp->sbc_cpu_flags = cpu[cpuid]->cpu_flags;
+ else
+ cp->sbc_cpu_flags = P_OFFLINE | P_POWEROFF;
+ mutex_exit(&cpu_lock);
+
+ dr_cpu_set_prop(cp);
+
+done:
+ /* delay transition until fully initialized */
+ dr_device_transition(&cp->sbc_cm, new_state);
+}
+
+int
+dr_pre_attach_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ int i;
+ static fn_t f = "dr_pre_attach_cpu";
+
+ PR_CPU("%s...\n", f);
+
+ for (i = 0; i < devnum; i++) {
+ dr_cpu_unit_t *up = (dr_cpu_unit_t *)devlist[i];
+
+ ASSERT(dr_cpu_unit_is_sane(hp->h_bd, up));
+
+ /*
+ * Print a console message for each attachment
+ * point. For CMP devices, this means that only
+ * one message should be printed, no matter how
+ * many cores are actually present.
+ */
+ if ((up->sbc_cm.sbdev_unum % MAX_CORES_PER_CMP) == 0) {
+ cmn_err(CE_CONT, "OS configure %s",
+ up->sbc_cm.sbdev_path);
+ }
+ }
+
+ /*
+ * Block out status threads while creating
+ * devinfo tree branches
+ */
+ dr_lock_status(hp->h_bd);
+ ndi_devi_enter(ddi_root_node(), (int *)(&hp->h_ndi));
+ mutex_enter(&cpu_lock);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+void
+dr_attach_cpu(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ sbd_error_t *err;
+ processorid_t cpuid;
+ int rv;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ err = drmach_configure(cp->sbdev_id, 0);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ return;
+ }
+
+ err = drmach_cpu_get_id(cp->sbdev_id, &cpuid);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+
+ err = drmach_unconfigure(cp->sbdev_id, DEVI_BRANCH_DESTROY);
+ if (err)
+ sbd_err_clear(&err);
+ } else if ((rv = cpu_configure(cpuid)) != 0) {
+ dr_dev_err(CE_WARN, cp, dr_errno2ecode(rv));
+ err = drmach_unconfigure(cp->sbdev_id, DEVI_BRANCH_DESTROY);
+ if (err)
+ sbd_err_clear(&err);
+ } else {
+ dr_cpu_unit_t *up = (dr_cpu_unit_t *)cp;
+ up->sbc_cpu_id = cpuid;
+ }
+}
+
+/*
+ * dr_post_attach_cpu
+ *
+ * sbd error policy: Does not stop on error. Processes all units in list.
+ */
+int
+dr_post_attach_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ int i;
+ int errflag = 0;
+ static fn_t f = "dr_post_attach_cpu";
+
+ PR_CPU("%s...\n", f);
+
+ /* Startup and online newly-attached CPUs */
+ for (i = 0; i < devnum; i++) {
+ dr_cpu_unit_t *up = (dr_cpu_unit_t *)devlist[i];
+ struct cpu *cp;
+
+ ASSERT(dr_cpu_unit_is_sane(hp->h_bd, up));
+
+ cp = cpu_get(up->sbc_cpu_id);
+ if (cp == NULL) {
+ cmn_err(CE_WARN, "%s: cpu_get failed for cpu %d",
+ f, up->sbc_cpu_id);
+ continue;
+ }
+
+ if (cpu_is_poweredoff(cp)) {
+ if (cpu_poweron(cp) != 0) {
+ dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_CPUSTART);
+ errflag = 1;
+ }
+ PR_CPU("%s: cpu %d powered ON\n", f, up->sbc_cpu_id);
+ }
+
+ if (cpu_is_offline(cp)) {
+ PR_CPU("%s: onlining cpu %d...\n", f, up->sbc_cpu_id);
+
+ if (cpu_online(cp) != 0) {
+ dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_ONLINE);
+ errflag = 1;
+ }
+ }
+
+ }
+
+ mutex_exit(&cpu_lock);
+ ndi_devi_exit(ddi_root_node(), hp->h_ndi);
+ dr_unlock_status(hp->h_bd);
+
+ if (errflag)
+ return (-1);
+ else
+ return (0);
+}
+
+/*
+ * dr_pre_release_cpu
+ *
+ * sbd error policy: Stops on first error.
+ */
+int
+dr_pre_release_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ int c, cix, i, lastoffline = -1, rv = 0;
+ processorid_t cpuid;
+ struct cpu *cp;
+ dr_cpu_unit_t *up;
+ dr_devset_t devset;
+ sbd_dev_stat_t *ds;
+ static fn_t f = "dr_pre_release_cpu";
+ int cpu_flags = 0;
+
+ devset = DR_DEVS_PRESENT(hp->h_bd);
+
+ /* allocate status struct storage. */
+ ds = (sbd_dev_stat_t *) kmem_zalloc(sizeof (sbd_dev_stat_t) *
+ MAX_CPU_UNITS_PER_BOARD, KM_SLEEP);
+
+ cix = dr_cpu_status(hp, devset, ds);
+
+ mutex_enter(&cpu_lock);
+
+ for (i = 0; i < devnum; i++) {
+ up = (dr_cpu_unit_t *)devlist[i];
+ if (!DR_DEV_IS_ATTACHED(&up->sbc_cm)) {
+ continue;
+ }
+ ASSERT(dr_cpu_unit_is_sane(hp->h_bd, up));
+
+ /*
+ * On x86 systems, some CPUs can't be unconfigured.
+ * For example, CPU0 can't be unconfigured because many other
+ * components have a dependency on it.
+ * This check determines if a CPU is currently in use and
+ * returns a "Device busy" error if so.
+ */
+ for (c = 0; c < cix; c++) {
+ if (ds[c].d_cpu.cs_unit == up->sbc_cm.sbdev_unum) {
+ if (ds[c].d_cpu.cs_busy) {
+ dr_dev_err(CE_WARN, &up->sbc_cm,
+ ESBD_BUSY);
+ rv = -1;
+ break;
+ }
+ }
+ }
+ if (c < cix)
+ break;
+
+ cpuid = up->sbc_cpu_id;
+ if ((cp = cpu_get(cpuid)) == NULL) {
+ dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_OFFLINE);
+ rv = -1;
+ break;
+ }
+
+ /* used by dr_cancel_cpu during error flow */
+ up->sbc_cpu_flags = cp->cpu_flags;
+
+ if (CPU_ACTIVE(cp)) {
+ if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
+ cpu_flags = CPU_FORCED;
+
+ PR_CPU("%s: offlining cpu %d\n", f, cpuid);
+ if (cpu_offline(cp, cpu_flags)) {
+ PR_CPU("%s: failed to offline cpu %d\n", f,
+ cpuid);
+ dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_OFFLINE);
+ if (disp_bound_threads(cp, 0)) {
+ cmn_err(CE_WARN, "%s: thread(s) bound "
+ "to cpu %d", f, cp->cpu_id);
+ }
+ rv = -1;
+ break;
+ } else
+ lastoffline = i;
+ }
+
+ if (!rv) {
+ sbd_error_t *err;
+
+ err = drmach_release(up->sbc_cm.sbdev_id);
+ if (err) {
+ DRERR_SET_C(&up->sbc_cm.sbdev_error, &err);
+ rv = -1;
+ break;
+ }
+ }
+ }
+
+ mutex_exit(&cpu_lock);
+
+ if (rv) {
+ /*
+ * Need to unwind others since at this level (pre-release)
+ * the device state has not yet transitioned and failures
+ * will prevent us from reaching the "post" release
+ * function where states are normally transitioned.
+ */
+ for (i = lastoffline; i >= 0; i--) {
+ up = (dr_cpu_unit_t *)devlist[i];
+ (void) dr_cancel_cpu(up);
+ }
+ }
+
+ kmem_free(ds, sizeof (sbd_dev_stat_t) * MAX_CPU_UNITS_PER_BOARD);
+ return (rv);
+}
+
+/*
+ * dr_pre_detach_cpu
+ *
+ * sbd error policy: Stops on first error.
+ */
+int
+dr_pre_detach_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ _NOTE(ARGUNUSED(hp))
+
+ int i;
+ int cpu_flags = 0;
+ static fn_t f = "dr_pre_detach_cpu";
+
+ PR_CPU("%s...\n", f);
+
+ /*
+ * Block out status threads while destroying devinfo tree
+ * branches
+ */
+ dr_lock_status(hp->h_bd);
+ mutex_enter(&cpu_lock);
+
+ for (i = 0; i < devnum; i++) {
+ dr_cpu_unit_t *up = (dr_cpu_unit_t *)devlist[i];
+ struct cpu *cp;
+
+ if (!DR_DEV_IS_ATTACHED(&up->sbc_cm)) {
+ continue;
+ }
+
+ ASSERT(dr_cpu_unit_is_sane(hp->h_bd, up));
+
+ cp = cpu_get(up->sbc_cpu_id);
+ if (cp == NULL)
+ continue;
+
+ /*
+ * Print a console message for each attachment
+ * point. For CMP devices, this means that only
+ * one message should be printed, no matter how
+ * many cores are actually present.
+ */
+ if ((up->sbc_cm.sbdev_unum % MAX_CORES_PER_CMP) == 0) {
+ cmn_err(CE_CONT, "OS unconfigure %s\n",
+ up->sbc_cm.sbdev_path);
+ }
+
+ /*
+ * CPUs were offlined during Release.
+ */
+ if (cpu_is_poweredoff(cp)) {
+ PR_CPU("%s: cpu %d already powered OFF\n",
+ f, up->sbc_cpu_id);
+ continue;
+ }
+
+ if (!cpu_is_offline(cp)) {
+ if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
+ cpu_flags = CPU_FORCED;
+ /* cpu was onlined after release. Offline it again */
+ PR_CPU("%s: offlining cpu %d\n", f, up->sbc_cpu_id);
+ if (cpu_offline(cp, cpu_flags)) {
+ PR_CPU("%s: failed to offline cpu %d\n",
+ f, up->sbc_cpu_id);
+ dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_OFFLINE);
+ if (disp_bound_threads(cp, 0)) {
+ cmn_err(CE_WARN, "%s: thread(s) bound "
+ "to cpu %d", f, cp->cpu_id);
+ }
+ goto err;
+ }
+ }
+ if (cpu_poweroff(cp) != 0) {
+ dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_CPUSTOP);
+ goto err;
+ } else {
+ PR_CPU("%s: cpu %d powered OFF\n", f, up->sbc_cpu_id);
+ }
+ }
+
+ return (0);
+
+err:
+ mutex_exit(&cpu_lock);
+ dr_unlock_status(hp->h_bd);
+ return (-1);
+}
+
+/*ARGSUSED*/
+void
+dr_detach_cpu(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ sbd_error_t *err;
+ processorid_t cpuid;
+ int rv;
+ dr_cpu_unit_t *up = (dr_cpu_unit_t *)cp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ if (!DR_DEV_IS_ATTACHED(&up->sbc_cm)) {
+ return;
+ }
+
+ err = drmach_cpu_get_id(cp->sbdev_id, &cpuid);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ } else if ((rv = cpu_unconfigure(cpuid)) != 0) {
+ dr_dev_err(CE_IGNORE, cp, dr_errno2ecode(rv));
+ } else {
+ err = drmach_unconfigure(cp->sbdev_id, DEVI_BRANCH_DESTROY);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ } else {
+ up->sbc_cpu_id = -1;
+ }
+ }
+}
+
+/*ARGSUSED1*/
+int
+dr_post_detach_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ static fn_t f = "dr_post_detach_cpu";
+
+ PR_CPU("%s...\n", f);
+ hp->h_ndi = 0;
+
+ mutex_exit(&cpu_lock);
+ dr_unlock_status(hp->h_bd);
+
+ return (0);
+}
+
+static void
+dr_fill_cpu_stat(dr_cpu_unit_t *cp, drmach_status_t *pstat, sbd_cpu_stat_t *csp)
+{
+ ASSERT(cp && pstat && csp);
+
+ /* Fill in the common status information */
+ bzero((caddr_t)csp, sizeof (*csp));
+ csp->cs_type = cp->sbc_cm.sbdev_type;
+ csp->cs_unit = cp->sbc_cm.sbdev_unum;
+ (void) strlcpy(csp->cs_name, pstat->type, sizeof (csp->cs_name));
+ csp->cs_cond = cp->sbc_cm.sbdev_cond;
+ csp->cs_busy = cp->sbc_cm.sbdev_busy | pstat->busy;
+ csp->cs_time = cp->sbc_cm.sbdev_time;
+ csp->cs_ostate = cp->sbc_cm.sbdev_ostate;
+ csp->cs_suspend = 0;
+
+ /* CPU specific status data */
+ csp->cs_cpuid = cp->sbc_cpu_id;
+
+ /*
+ * If the speed and ecache properties have not been
+ * cached yet, read them in from the device tree.
+ */
+ if ((cp->sbc_speed == 0) || (cp->sbc_ecache == 0))
+ dr_cpu_set_prop(cp);
+
+ /* use the cached speed and ecache values */
+ csp->cs_speed = cp->sbc_speed;
+ csp->cs_ecache = cp->sbc_ecache;
+
+ mutex_enter(&cpu_lock);
+ if (!cpu_get(csp->cs_cpuid)) {
+ /* ostate must be UNCONFIGURED */
+ csp->cs_cm.c_ostate = SBD_STAT_UNCONFIGURED;
+ }
+ mutex_exit(&cpu_lock);
+}
+
+/*ARGSUSED2*/
+static void
+dr_fill_cmp_stat(sbd_cpu_stat_t *csp, int ncores, int impl, sbd_cmp_stat_t *psp)
+{
+ int core;
+
+ ASSERT(csp && psp && (ncores >= 1));
+
+ bzero((caddr_t)psp, sizeof (*psp));
+
+ /*
+ * Fill in the common status information based
+ * on the data for the first core.
+ */
+ psp->ps_type = SBD_COMP_CMP;
+ psp->ps_unit = DR_UNUM2SBD_UNUM(csp->cs_unit, SBD_COMP_CMP);
+ (void) strlcpy(psp->ps_name, csp->cs_name, sizeof (psp->ps_name));
+ psp->ps_cond = csp->cs_cond;
+ psp->ps_busy = csp->cs_busy;
+ psp->ps_time = csp->cs_time;
+ psp->ps_ostate = csp->cs_ostate;
+ psp->ps_suspend = csp->cs_suspend;
+
+ /* CMP specific status data */
+ *psp->ps_cpuid = csp->cs_cpuid;
+ psp->ps_ncores = 1;
+ psp->ps_speed = csp->cs_speed;
+ psp->ps_ecache = csp->cs_ecache;
+
+ /*
+ * Walk through the data for the remaining cores.
+ * Make any adjustments to the common status data,
+ * or the shared CMP specific data if necessary.
+ */
+ for (core = 1; core < ncores; core++) {
+ /*
+ * The following properties should be the same
+ * for all the cores of the CMP.
+ */
+ ASSERT(psp->ps_unit == DR_UNUM2SBD_UNUM(csp[core].cs_unit,
+ SBD_COMP_CMP));
+
+ if (csp[core].cs_speed > psp->ps_speed)
+ psp->ps_speed = csp[core].cs_speed;
+ if (csp[core].cs_ecache > psp->ps_ecache)
+ psp->ps_ecache = csp[core].cs_ecache;
+
+ psp->ps_cpuid[core] = csp[core].cs_cpuid;
+ psp->ps_ncores++;
+
+ /* adjust time if necessary */
+ if (csp[core].cs_time > psp->ps_time) {
+ psp->ps_time = csp[core].cs_time;
+ }
+
+ psp->ps_busy |= csp[core].cs_busy;
+
+ /*
+ * If any of the cores are configured, the
+ * entire CMP is marked as configured.
+ */
+ if (csp[core].cs_ostate == SBD_STAT_CONFIGURED) {
+ psp->ps_ostate = csp[core].cs_ostate;
+ }
+ }
+}
+
+int
+dr_cpu_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
+{
+ int cmp;
+ int core;
+ int ncpu;
+ dr_board_t *bp;
+ sbd_cpu_stat_t *cstat;
+ int impl;
+
+ bp = hp->h_bd;
+ ncpu = 0;
+
+ devset &= DR_DEVS_PRESENT(bp);
+ cstat = kmem_zalloc(sizeof (sbd_cpu_stat_t) * MAX_CORES_PER_CMP,
+ KM_SLEEP);
+
+ /*
+ * Treat every CPU as a CMP. In the case where the
+ * device is not a CMP, treat it as a CMP with only
+ * one core.
+ */
+ for (cmp = 0; cmp < MAX_CMP_UNITS_PER_BOARD; cmp++) {
+ int ncores;
+ dr_cpu_unit_t *cp;
+ drmach_status_t pstat;
+ sbd_error_t *err;
+ sbd_cmp_stat_t *psp;
+
+ if ((devset & DEVSET(SBD_COMP_CMP, cmp)) == 0) {
+ continue;
+ }
+
+ ncores = 0;
+
+ for (core = 0; core < MAX_CORES_PER_CMP; core++) {
+
+ cp = dr_get_cpu_unit(bp, DR_CMP_CORE_UNUM(cmp, core));
+
+ if (cp->sbc_cm.sbdev_state == DR_STATE_EMPTY) {
+ /* present, but not fully initialized */
+ continue;
+ }
+
+ ASSERT(dr_cpu_unit_is_sane(hp->h_bd, cp));
+
+ /* skip if not present */
+ if (cp->sbc_cm.sbdev_id == (drmachid_t)0) {
+ continue;
+ }
+
+ /* fetch platform status */
+ err = drmach_status(cp->sbc_cm.sbdev_id, &pstat);
+ if (err) {
+ DRERR_SET_C(&cp->sbc_cm.sbdev_error, &err);
+ continue;
+ }
+
+ dr_fill_cpu_stat(cp, &pstat, &cstat[ncores++]);
+ /*
+ * We should set impl here because the last core
+ * found might be EMPTY or not present.
+ */
+ impl = cp->sbc_cpu_impl;
+ }
+
+ if (ncores == 0) {
+ continue;
+ }
+
+ /*
+ * Store the data to the outgoing array. If the
+ * device is a CMP, combine all the data for the
+ * cores into a single stat structure.
+ *
+ * The check for a CMP device uses the last core
+ * found, assuming that all cores will have the
+ * same implementation.
+ */
+ if (CPU_IMPL_IS_CMP(impl)) {
+ psp = (sbd_cmp_stat_t *)dsp;
+ dr_fill_cmp_stat(cstat, ncores, impl, psp);
+ } else {
+ ASSERT(ncores == 1);
+ bcopy(cstat, dsp, sizeof (sbd_cpu_stat_t));
+ }
+
+ dsp++;
+ ncpu++;
+ }
+
+ kmem_free(cstat, sizeof (sbd_cpu_stat_t) * MAX_CORES_PER_CMP);
+
+ return (ncpu);
+}
+
+/*
+ * Cancel previous release operation for cpu.
+ * For cpus this means simply bringing cpus that
+ * were offline back online. Note that they had
+ * to have been online at the time there were
+ * released.
+ */
+int
+dr_cancel_cpu(dr_cpu_unit_t *up)
+{
+ int rv = 0;
+ static fn_t f = "dr_cancel_cpu";
+
+ ASSERT(dr_cpu_unit_is_sane(up->sbc_cm.sbdev_bp, up));
+
+ if (cpu_flagged_active(up->sbc_cpu_flags)) {
+ struct cpu *cp;
+
+ /*
+ * CPU had been online, go ahead
+ * bring it back online.
+ */
+ PR_CPU("%s: bringing cpu %d back ONLINE\n", f, up->sbc_cpu_id);
+
+ mutex_enter(&cpu_lock);
+ cp = cpu[up->sbc_cpu_id];
+
+ if (cpu_is_poweredoff(cp)) {
+ if (cpu_poweron(cp)) {
+ cmn_err(CE_WARN, "%s: failed to power-on "
+ "cpu %d", f, up->sbc_cpu_id);
+ rv = -1;
+ }
+ }
+
+ if (rv == 0 && cpu_is_offline(cp)) {
+ if (cpu_online(cp)) {
+ cmn_err(CE_WARN, "%s: failed to online cpu %d",
+ f, up->sbc_cpu_id);
+ rv = -1;
+ }
+ }
+
+ if (rv == 0 && cpu_is_online(cp)) {
+ if (cpu_flagged_nointr(up->sbc_cpu_flags)) {
+ if (cpu_intr_disable(cp) != 0) {
+ cmn_err(CE_WARN, "%s: failed to "
+ "disable interrupts on cpu %d", f,
+ up->sbc_cpu_id);
+ }
+ }
+ }
+
+ mutex_exit(&cpu_lock);
+ }
+
+ return (rv);
+}
+
+int
+dr_disconnect_cpu(dr_cpu_unit_t *up)
+{
+ sbd_error_t *err;
+ static fn_t f = "dr_disconnect_cpu";
+
+ PR_CPU("%s...\n", f);
+
+ ASSERT((up->sbc_cm.sbdev_state == DR_STATE_CONNECTED) ||
+ (up->sbc_cm.sbdev_state == DR_STATE_UNCONFIGURED));
+
+ ASSERT(dr_cpu_unit_is_sane(up->sbc_cm.sbdev_bp, up));
+
+ if (up->sbc_cm.sbdev_state == DR_STATE_CONNECTED) {
+ /*
+ * Cpus were never brought in and so are still
+ * effectively disconnected, so nothing to do here.
+ */
+ PR_CPU("%s: cpu %d never brought in\n", f, up->sbc_cpu_id);
+ return (0);
+ }
+
+ err = drmach_cpu_disconnect(up->sbc_cm.sbdev_id);
+ if (err == NULL)
+ return (0);
+ else {
+ DRERR_SET_C(&up->sbc_cm.sbdev_error, &err);
+ return (-1);
+ }
+ /*NOTREACHED*/
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr_io.c b/usr/src/uts/i86pc/io/dr/dr_io.c
new file mode 100644
index 0000000000..96e0e458cb
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_io.c
@@ -0,0 +1,389 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * I/O support routines for DR
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/dditypes.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/kmem.h>
+#include <sys/promif.h>
+#include <sys/sysmacros.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+#include <sys/drmach.h>
+
+void
+dr_init_io_unit(dr_io_unit_t *ip)
+{
+ dr_state_t new_state;
+
+ if (DR_DEV_IS_ATTACHED(&ip->sbi_cm)) {
+ new_state = DR_STATE_CONFIGURED;
+ ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+ } else if (DR_DEV_IS_PRESENT(&ip->sbi_cm)) {
+ new_state = DR_STATE_CONNECTED;
+ ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+ } else {
+ new_state = DR_STATE_EMPTY;
+ }
+ dr_device_transition(&ip->sbi_cm, new_state);
+}
+
+/*ARGSUSED*/
+void
+dr_attach_io(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ sbd_error_t *err;
+
+ dr_lock_status(hp->h_bd);
+ err = drmach_configure(cp->sbdev_id, 0);
+ dr_unlock_status(hp->h_bd);
+
+ if (!err)
+ err = drmach_io_post_attach(cp->sbdev_id);
+
+ if (err)
+ DRERR_SET_C(&cp->sbdev_error, &err);
+}
+
+/*
+ * remove device nodes for the branch indicated by cp
+ */
+/*ARGSUSED*/
+void
+dr_detach_io(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ sbd_error_t *err;
+
+ err = drmach_unconfigure(cp->sbdev_id, 0);
+
+ if (!err)
+ err = drmach_unconfigure(cp->sbdev_id, DEVI_BRANCH_DESTROY);
+
+ if (!err)
+ err = drmach_io_post_release(cp->sbdev_id);
+
+ if (err) {
+ dr_device_transition(cp, DR_STATE_CONFIGURED);
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ }
+}
+
+/*ARGSUSED*/
+int
+dr_disconnect_io(dr_io_unit_t *ip)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_pre_attach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ int d;
+
+ for (d = 0; d < devnum; d++) {
+ dr_common_unit_t *cp = devlist[d];
+
+ cmn_err(CE_CONT, "OS configure %s", cp->sbdev_path);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_post_attach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ return (0);
+}
+
+static int
+dr_check_io_refs(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ register int i, reftotal = 0;
+ static fn_t f = "dr_check_io_refs";
+
+ for (i = 0; i < devnum; i++) {
+ dr_io_unit_t *ip = (dr_io_unit_t *)devlist[i];
+ dev_info_t *dip;
+ int ref;
+ int refcount_non_gldv3;
+ sbd_error_t *err;
+
+ err = drmach_get_dip(ip->sbi_cm.sbdev_id, &dip);
+ if (err)
+ DRERR_SET_C(&ip->sbi_cm.sbdev_error, &err);
+ else if (dip != NULL) {
+ ref = 0;
+ refcount_non_gldv3 = 0;
+ ASSERT(e_ddi_branch_held(dip));
+ dr_check_devices(dip, &ref, hp, NULL, NULL,
+ 0, &refcount_non_gldv3);
+ ASSERT(refcount_non_gldv3 >= 0);
+ ASSERT(ref >= refcount_non_gldv3);
+ /*
+ * Ignore reference counts of non-gldv3 network devices
+ * as Crossbow creates reference counts for non-active
+ * (unplumbed) instances. Reference count check in
+ * detach() known to prevent device from detaching
+ * as necessary.
+ */
+ ref -= refcount_non_gldv3;
+ hp->h_err = NULL;
+ if (ref) {
+ dr_dev_err(CE_WARN, &ip->sbi_cm, ESBD_BUSY);
+ }
+ PR_IO("%s: dip(%s) ref = %d\n",
+ f, ddi_get_name(dip), ref);
+ reftotal += ref;
+ } else {
+ PR_IO("%s: NO dip for id (0x%x)\n",
+ f, (uint_t)(uintptr_t)ip->sbi_cm.sbdev_id);
+ }
+ }
+
+ return (reftotal);
+}
+
+int
+dr_pre_release_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ static fn_t f = "dr_pre_release_io";
+ int d;
+
+ ASSERT(devnum > 0);
+
+ /* fail if any I/O device pre-release fails */
+ for (d = 0; d < devnum; d++) {
+ dr_io_unit_t *ip = (dr_io_unit_t *)devlist[d];
+
+ if ((hp->h_err = drmach_io_pre_release(
+ ip->sbi_cm.sbdev_id)) != 0) {
+ return (-1);
+ }
+ }
+
+ for (d = 0; d < devnum; d++) {
+ dr_io_unit_t *ip = (dr_io_unit_t *)devlist[d];
+ sbd_error_t *err;
+
+ err = drmach_release(ip->sbi_cm.sbdev_id);
+ if (err) {
+ DRERR_SET_C(&ip->sbi_cm.sbdev_error,
+ &err);
+ return (-1);
+ }
+ }
+
+ /* fail if any I/O devices are still referenced */
+ if (dr_check_io_refs(hp, devlist, devnum) > 0) {
+ PR_IO("%s: failed - I/O devices ref'd\n", f);
+
+ /* recover before return error */
+ for (d = 0; d < devnum; d++) {
+ dr_io_unit_t *ip = (dr_io_unit_t *)devlist[d];
+ sbd_error_t *err;
+ err = drmach_io_unrelease(ip->sbi_cm.sbdev_id);
+ if (err) {
+ DRERR_SET_C(&ip->sbi_cm.sbdev_error, &err);
+ return (-1);
+ }
+ }
+ return (-1);
+ }
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_pre_detach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum)
+{
+ int d;
+
+ ASSERT(devnum > 0);
+
+ for (d = 0; d < devnum; d++) {
+ dr_common_unit_t *cp = devlist[d];
+
+ cmn_err(CE_CONT, "OS unconfigure %s", cp->sbdev_path);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_post_detach_io(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ register int i;
+ int rv = 0;
+ static fn_t f = "dr_post_detach_io";
+
+ ASSERT(devnum > 0);
+ for (i = 0; i < devnum; i++) {
+ dr_common_unit_t *cp = devlist[i];
+ if (cp->sbdev_error != NULL) {
+ PR_IO("%s: Failed\n", f);
+ rv = -1;
+ break;
+ }
+ }
+ return (rv);
+}
+
+static void
+dr_get_comp_cond(dr_io_unit_t *ip, dev_info_t *dip)
+{
+ if (dip == NULL) {
+ ip->sbi_cm.sbdev_cond = SBD_COND_UNKNOWN;
+ return;
+ }
+
+ if (DEVI(dip)->devi_flags & DEVI_RETIRED) {
+ ip->sbi_cm.sbdev_cond = SBD_COND_FAILED;
+ return;
+ }
+
+ if (DR_DEV_IS_ATTACHED(&ip->sbi_cm)) {
+ ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+ } else if (DR_DEV_IS_PRESENT(&ip->sbi_cm)) {
+ ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+ }
+}
+
+int
+dr_io_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
+{
+ int i, ix;
+ dr_board_t *bp;
+ sbd_io_stat_t *isp;
+ dr_io_unit_t *ip;
+
+ bp = hp->h_bd;
+
+ /*
+ * Only look for requested devices that are actually present.
+ */
+ devset &= DR_DEVS_PRESENT(bp);
+
+ for (i = ix = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+ drmachid_t id;
+ dev_info_t *dip;
+ sbd_error_t *err;
+ drmach_status_t pstat;
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_IO, i) == 0)
+ continue;
+
+ ip = dr_get_io_unit(bp, i);
+
+ if (ip->sbi_cm.sbdev_state == DR_STATE_EMPTY) {
+ /* present, but not fully initialized */
+ continue;
+ }
+
+ id = ip->sbi_cm.sbdev_id;
+ if (id == (drmachid_t)0)
+ continue;
+
+ err = drmach_status(ip->sbi_cm.sbdev_id, &pstat);
+ if (err) {
+ DRERR_SET_C(&ip->sbi_cm.sbdev_error, &err);
+ return (-1);
+ }
+
+ dip = NULL;
+ err = drmach_get_dip(id, &dip);
+ if (err) {
+ /* catch this in debug kernels */
+ ASSERT(0);
+
+ sbd_err_clear(&err);
+ continue;
+ }
+
+ isp = &dsp->d_io;
+ bzero((caddr_t)isp, sizeof (*isp));
+
+ isp->is_cm.c_id.c_type = ip->sbi_cm.sbdev_type;
+ isp->is_cm.c_id.c_unit = ip->sbi_cm.sbdev_unum;
+ (void) strlcpy(isp->is_cm.c_id.c_name, pstat.type,
+ sizeof (isp->is_cm.c_id.c_name));
+
+ dr_get_comp_cond(ip, dip);
+ isp->is_cm.c_cond = ip->sbi_cm.sbdev_cond;
+ isp->is_cm.c_busy = ip->sbi_cm.sbdev_busy | pstat.busy;
+ isp->is_cm.c_time = ip->sbi_cm.sbdev_time;
+ isp->is_cm.c_ostate = ip->sbi_cm.sbdev_ostate;
+ isp->is_cm.c_sflags = 0;
+
+ if (dip == NULL) {
+ isp->is_pathname[0] = '\0';
+ isp->is_referenced = 0;
+ isp->is_unsafe_count = 0;
+ } else {
+ int refcount = 0, idx = 0;
+ uint64_t unsafe_devs[SBD_MAX_UNSAFE];
+
+ ASSERT(e_ddi_branch_held(dip));
+ (void) ddi_pathname(dip, isp->is_pathname);
+
+ /* check reference and unsafe counts on devices */
+ isp->is_unsafe_count = 0;
+ dr_check_devices(dip, &refcount, hp, unsafe_devs,
+ &idx, SBD_MAX_UNSAFE, NULL);
+ while (idx > 0) {
+ isp->is_unsafe_list[idx-1] = unsafe_devs[idx-1];
+ --idx;
+ }
+
+ isp->is_referenced = (refcount == 0) ? 0 : 1;
+
+ hp->h_err = NULL;
+ }
+ ix++;
+ dsp++;
+ }
+
+ return (ix);
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr_mem_acpi.c b/usr/src/uts/i86pc/io/dr/dr_mem_acpi.c
new file mode 100644
index 0000000000..90ceb84044
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_mem_acpi.c
@@ -0,0 +1,639 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
+ * DR memory support routines.
+ */
+
+#include <sys/note.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>
+#include <sys/kmem.h>
+#include <sys/kobj.h>
+#include <sys/conf.h>
+#include <sys/dditypes.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/sysmacros.h>
+#include <sys/machsystm.h>
+#include <sys/promif.h>
+#include <sys/lgrp.h>
+#include <sys/mem_config.h>
+#include <vm/seg_kmem.h>
+#include <vm/page.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+#include <sys/drmach.h>
+
+extern struct memlist *phys_install;
+
+/* TODO: push this reference below drmach line */
+extern int kcage_on;
+
+/* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */
+static char *dr_ie_fmt = "dr_mem_acpi.c %d";
+
+static void dr_init_mem_unit_data(dr_mem_unit_t *mp);
+
+/*
+ * dr_mem_unit_t.sbm_flags
+ */
+#define DR_MFLAG_RESERVED 0x01 /* mem unit reserved for delete */
+#define DR_MFLAG_SOURCE 0x02 /* source brd of copy/rename op */
+#define DR_MFLAG_TARGET 0x04 /* target brd of copy/rename op */
+#define DR_MFLAG_RELOWNER 0x20 /* memory release (delete) owner */
+#define DR_MFLAG_RELDONE 0x40 /* memory release (delete) done */
+
+/* helper macros */
+#define _ptob64(p) ((uint64_t)(p) << PAGESHIFT)
+#define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
+
+static struct memlist *
+dr_get_memlist(dr_mem_unit_t *mp)
+{
+ struct memlist *mlist = NULL;
+ sbd_error_t *err;
+ static fn_t f = "dr_get_memlist";
+
+ PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
+
+ /*
+ * Return cached memlist, if present.
+ * This memlist will be present following an
+ * unconfigure (a.k.a: detach) of this memunit.
+ * It should only be used in the case were a configure
+ * is bringing this memunit back in without going
+ * through the disconnect and connect states.
+ */
+ if (mp->sbm_mlist) {
+ PR_MEM("%s: found cached memlist\n", f);
+
+ mlist = memlist_dup(mp->sbm_mlist);
+ } else {
+ uint64_t basepa = _ptob64(mp->sbm_basepfn);
+
+ /* attempt to construct a memlist using phys_install */
+
+ /* round down to slice base address */
+ basepa &= ~mp->sbm_alignment_mask;
+
+ /* get a copy of phys_install to edit */
+ memlist_read_lock();
+ mlist = memlist_dup(phys_install);
+ memlist_read_unlock();
+
+ /* trim lower irrelevant span */
+ if (mlist)
+ mlist = memlist_del_span(mlist, 0ull, basepa);
+
+ /* trim upper irrelevant span */
+ if (mlist) {
+ uint64_t endpa, toppa;
+
+ toppa = mp->sbm_slice_top;
+ endpa = _ptob64(physmax + 1);
+ if (endpa > toppa)
+ mlist = memlist_del_span(
+ mlist, toppa,
+ endpa - toppa);
+ }
+
+ if (mlist) {
+ /* successfully built a memlist */
+ PR_MEM("%s: derived memlist from phys_install\n", f);
+ }
+
+ /* if no mlist yet, try platform layer */
+ if (!mlist) {
+ err = drmach_mem_get_memlist(
+ mp->sbm_cm.sbdev_id, &mlist);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ mlist = NULL; /* paranoia */
+ }
+ }
+ }
+
+ PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(mlist);
+
+ return (mlist);
+}
+
+/*ARGSUSED*/
+void
+dr_release_mem(dr_common_unit_t *cp)
+{
+}
+
+void
+dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)cp;
+ struct memlist *ml, *mc;
+ sbd_error_t *err;
+ static fn_t f = "dr_attach_mem";
+ uint64_t dr_physmax;
+
+ PR_MEM("%s...\n", f);
+
+ dr_lock_status(hp->h_bd);
+ err = drmach_configure(cp->sbdev_id, 0);
+ dr_unlock_status(hp->h_bd);
+ if (err) {
+ DRERR_SET_C(&cp->sbdev_error, &err);
+ return;
+ }
+
+ ml = dr_get_memlist(mp);
+
+ /* Skip memory with address above plat_dr_physmax or kpm_size */
+ dr_physmax = plat_dr_physmax ? ptob(plat_dr_physmax) : UINT64_MAX;
+ if (kpm_size < dr_physmax)
+ dr_physmax = kpm_size;
+ ml = memlist_del_span(ml, dr_physmax, UINT64_MAX - dr_physmax);
+
+ for (mc = ml; mc; mc = mc->ml_next) {
+ int rv;
+ sbd_error_t *err;
+
+ rv = kphysm_add_memory_dynamic(
+ (pfn_t)btop(mc->ml_address),
+ (pgcnt_t)btop(mc->ml_size));
+ if (rv != KPHYSM_OK) {
+ /*
+ * translate kphysm error and
+ * store in devlist error
+ */
+ switch (rv) {
+ case KPHYSM_ERESOURCE:
+ rv = ESBD_NOMEM;
+ break;
+
+ case KPHYSM_EFAULT:
+ rv = ESBD_FAULT;
+ break;
+
+ default:
+ rv = ESBD_INTERNAL;
+ break;
+ }
+
+ if (rv == ESBD_INTERNAL) {
+ DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
+ } else
+ dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
+ break;
+ }
+
+ err = drmach_mem_add_span(
+ mp->sbm_cm.sbdev_id, mc->ml_address, mc->ml_size);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ break;
+ }
+ }
+
+ memlist_delete(ml);
+ dr_init_mem_unit_data(mp);
+
+ /* back out if configure failed */
+ if (mp->sbm_cm.sbdev_error != NULL) {
+ dr_lock_status(hp->h_bd);
+ err = drmach_unconfigure(cp->sbdev_id, 0);
+ if (err)
+ sbd_err_clear(&err);
+ dr_unlock_status(hp->h_bd);
+ }
+}
+
+/*ARGSUSED*/
+void
+dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+}
+
+/*
+ * This routine acts as a wrapper for kphysm_del_span_query in order to
+ * support potential memory holes in a board's physical address space.
+ * It calls kphysm_del_span_query for each node in a memlist and accumulates
+ * the results in *mp.
+ */
+static int
+dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
+{
+ int rv = 0;
+
+ if (mlist == NULL)
+ cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
+
+ mp->phys_pages = 0;
+ mp->managed = 0;
+ mp->nonrelocatable = 0;
+ mp->first_nonrelocatable = 0;
+ mp->last_nonrelocatable = 0;
+
+ return (rv);
+}
+
+/*
+ * NOTE: This routine is only partially smart about multiple
+ * mem-units. Need to make mem-status structure smart
+ * about them also.
+ */
+int
+dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
+{
+ int m, mix;
+ memquery_t mq;
+ dr_board_t *bp;
+ dr_mem_unit_t *mp;
+ sbd_mem_stat_t *msp;
+ static fn_t f = "dr_mem_status";
+
+ bp = hp->h_bd;
+ devset &= DR_DEVS_PRESENT(bp);
+
+ for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
+ int rv;
+ sbd_error_t *err;
+ drmach_status_t pstat;
+ dr_mem_unit_t *p_mp;
+
+ if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
+ continue;
+
+ mp = dr_get_mem_unit(bp, m);
+
+ if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
+ /* present, but not fully initialized */
+ continue;
+ }
+
+ if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
+ continue;
+
+ /* fetch platform status */
+ err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ continue;
+ }
+
+ msp = &dsp->d_mem;
+ bzero((caddr_t)msp, sizeof (*msp));
+
+ (void) strlcpy(msp->ms_cm.c_id.c_name, pstat.type,
+ sizeof (msp->ms_cm.c_id.c_name));
+ msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
+ msp->ms_cm.c_id.c_unit = mp->sbm_cm.sbdev_unum;
+ msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
+ msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
+ msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
+ msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
+
+ msp->ms_totpages = mp->sbm_npages;
+ msp->ms_basepfn = mp->sbm_basepfn;
+ msp->ms_pageslost = mp->sbm_pageslost;
+ msp->ms_cage_enabled = kcage_on;
+
+ if (mp->sbm_flags & DR_MFLAG_RESERVED)
+ p_mp = mp->sbm_peer;
+ else
+ p_mp = NULL;
+
+ if (p_mp == NULL) {
+ msp->ms_peer_is_target = 0;
+ msp->ms_peer_ap_id[0] = '\0';
+ } else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ char *minor;
+
+ /*
+ * b_dip doesn't have to be held for ddi_pathname()
+ * because the board struct (dr_board_t) will be
+ * destroyed before b_dip detaches.
+ */
+ (void) ddi_pathname(bp->b_dip, path);
+ minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
+
+ (void) snprintf(msp->ms_peer_ap_id,
+ sizeof (msp->ms_peer_ap_id), "%s%s",
+ path, (minor == NULL) ? "" : minor);
+
+ kmem_free(path, MAXPATHLEN);
+
+ if (p_mp->sbm_flags & DR_MFLAG_TARGET)
+ msp->ms_peer_is_target = 1;
+ }
+
+ /*
+ * kphysm_del_span_query can report non-reloc pages = total
+ * pages for memory that is not yet configured
+ */
+ if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
+ struct memlist *ml;
+
+ ml = dr_get_memlist(mp);
+ rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
+ memlist_delete(ml);
+
+ if (rv == KPHYSM_OK) {
+ msp->ms_managed_pages = mq.managed;
+ msp->ms_noreloc_pages = mq.nonrelocatable;
+ msp->ms_noreloc_first =
+ mq.first_nonrelocatable;
+ msp->ms_noreloc_last =
+ mq.last_nonrelocatable;
+ msp->ms_cm.c_sflags = 0;
+ if (mq.nonrelocatable &&
+ drmach_copy_rename_need_suspend(
+ mp->sbm_cm.sbdev_id)) {
+ SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
+ msp->ms_cm.c_sflags);
+ }
+ } else {
+ PR_MEM("%s: kphysm_del_span_query() = %d\n",
+ f, rv);
+ }
+ }
+
+ /*
+ * Check source unit state during copy-rename
+ */
+ if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
+ (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
+ mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
+ msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
+
+ mix++;
+ dsp++;
+ }
+
+ return (mix);
+}
+
+/*ARGSUSED*/
+int
+dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ int err_flag = 0;
+ int d;
+ sbd_error_t *err;
+ static fn_t f = "dr_pre_attach_mem";
+
+ PR_MEM("%s...\n", f);
+
+ for (d = 0; d < devnum; d++) {
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
+ dr_state_t state;
+
+ cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
+
+ state = mp->sbm_cm.sbdev_state;
+ switch (state) {
+ case DR_STATE_UNCONFIGURED:
+ PR_MEM("%s: recovering from UNCONFIG for %s\n",
+ f, mp->sbm_cm.sbdev_path);
+
+ /* use memlist cached by dr_post_detach_mem_unit */
+ ASSERT(mp->sbm_mlist != NULL);
+ PR_MEM("%s: re-configuring cached memlist for %s:\n",
+ f, mp->sbm_cm.sbdev_path);
+ PR_MEMLIST_DUMP(mp->sbm_mlist);
+
+ /* kphysm del handle should be have been freed */
+ ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
+
+ /*FALLTHROUGH*/
+
+ case DR_STATE_CONNECTED:
+ PR_MEM("%s: reprogramming mem hardware on %s\n",
+ f, mp->sbm_cm.sbdev_bp->b_path);
+
+ PR_MEM("%s: enabling %s\n",
+ f, mp->sbm_cm.sbdev_path);
+
+ err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
+ if (err) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ err_flag = 1;
+ }
+ break;
+
+ default:
+ dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
+ err_flag = 1;
+ break;
+ }
+
+ /* exit for loop if error encountered */
+ if (err_flag)
+ break;
+ }
+
+ return (err_flag ? -1 : 0);
+}
+
+/*ARGSUSED*/
+int
+dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ int d;
+ static fn_t f = "dr_post_attach_mem";
+
+ PR_MEM("%s...\n", f);
+
+ for (d = 0; d < devnum; d++) {
+ dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
+ struct memlist *mlist, *ml;
+
+ mlist = dr_get_memlist(mp);
+
+ /*
+ * Verify the memory really did successfully attach
+ * by checking for its existence in phys_install.
+ */
+ memlist_read_lock();
+ if (memlist_intersect(phys_install, mlist) == 0) {
+ memlist_read_unlock();
+
+ DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
+
+ PR_MEM("%s: %s memlist not in phys_install",
+ f, mp->sbm_cm.sbdev_path);
+
+ memlist_delete(mlist);
+ continue;
+ }
+ memlist_read_unlock();
+
+ for (ml = mlist; ml != NULL; ml = ml->ml_next) {
+ sbd_error_t *err;
+
+ err = drmach_mem_add_span(
+ mp->sbm_cm.sbdev_id,
+ ml->ml_address,
+ ml->ml_size);
+ if (err)
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ }
+
+ memlist_delete(mlist);
+
+ /*
+ * Destroy cached memlist, if any.
+ * There will be a cached memlist in sbm_mlist if
+ * this board is being configured directly after
+ * an unconfigure.
+ * To support this transition, dr_post_detach_mem
+ * left a copy of the last known memlist in sbm_mlist.
+ * This memlist could differ from any derived from
+ * hardware if while this memunit was last configured
+ * the system detected and deleted bad pages from
+ * phys_install. The location of those bad pages
+ * will be reflected in the cached memlist.
+ */
+ if (mp->sbm_mlist) {
+ memlist_delete(mp->sbm_mlist);
+ mp->sbm_mlist = NULL;
+ }
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ return (-1);
+}
+
+/*ARGSUSED*/
+int
+dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ return (-1);
+}
+
+/*
+ * Successful return from this function will have the memory
+ * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
+ * and waiting. This routine's job is to select the memory that
+ * actually has to be released (detached) which may not necessarily
+ * be the same memory node that came in in devlist[],
+ * i.e. a copy-rename is needed.
+ */
+/*ARGSUSED*/
+int
+dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+ return (-1);
+}
+
+/*ARGSUSED*/
+void
+dr_release_mem_done(dr_common_unit_t *cp)
+{
+}
+
+/*ARGSUSED*/
+int
+dr_disconnect_mem(dr_mem_unit_t *mp)
+{
+ return (-1);
+}
+
+/*ARGSUSED*/
+int
+dr_cancel_mem(dr_mem_unit_t *s_mp)
+{
+ return (-1);
+}
+
+void
+dr_init_mem_unit(dr_mem_unit_t *mp)
+{
+ dr_state_t new_state;
+
+ if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
+ new_state = DR_STATE_CONFIGURED;
+ mp->sbm_cm.sbdev_cond = SBD_COND_OK;
+ } else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
+ new_state = DR_STATE_CONNECTED;
+ mp->sbm_cm.sbdev_cond = SBD_COND_OK;
+ } else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
+ new_state = DR_STATE_OCCUPIED;
+ } else {
+ new_state = DR_STATE_EMPTY;
+ }
+
+ if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
+ dr_init_mem_unit_data(mp);
+
+ /* delay transition until fully initialized */
+ dr_device_transition(&mp->sbm_cm, new_state);
+}
+
+static void
+dr_init_mem_unit_data(dr_mem_unit_t *mp)
+{
+ drmachid_t id = mp->sbm_cm.sbdev_id;
+ drmach_mem_info_t minfo;
+ sbd_error_t *err;
+ static fn_t f = "dr_init_mem_unit_data";
+
+ PR_MEM("%s...\n", f);
+
+ /* a little sanity checking */
+ ASSERT(mp->sbm_peer == NULL);
+ ASSERT(mp->sbm_flags == 0);
+
+ if (err = drmach_mem_get_info(id, &minfo)) {
+ DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+ return;
+ }
+ mp->sbm_basepfn = _b64top(minfo.mi_basepa);
+ mp->sbm_npages = _b64top(minfo.mi_size);
+ mp->sbm_alignment_mask = minfo.mi_alignment_mask;
+ mp->sbm_slice_base = minfo.mi_slice_base;
+ mp->sbm_slice_top = minfo.mi_slice_top;
+ mp->sbm_slice_size = minfo.mi_slice_size;
+
+ PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
+ f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr_quiesce.c b/usr/src/uts/i86pc/io/dr/dr_quiesce.c
new file mode 100644
index 0000000000..f3467f6eb1
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_quiesce.c
@@ -0,0 +1,1050 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * A CPR derivative specifically for starfire/starcat
+ * X86 doesn't make use of the quiesce interfaces, it's kept for simplicity.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/machparam.h>
+#include <sys/machsystm.h>
+#include <sys/ddi.h>
+#define SUNDDI_IMPL
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/devctl.h>
+#include <sys/time.h>
+#include <sys/kmem.h>
+#include <nfs/lm.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/obpdefs.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/callb.h>
+#include <sys/clock.h>
+#include <sys/x_call.h>
+#include <sys/cpuvar.h>
+#include <sys/epm.h>
+#include <sys/vfs.h>
+#include <sys/promif.h>
+#include <sys/conf.h>
+#include <sys/cyclic.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+
+extern void e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
+extern void e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
+extern int is_pseudo_device(dev_info_t *dip);
+
+extern kmutex_t cpu_lock;
+extern dr_unsafe_devs_t dr_unsafe_devs;
+
+static int dr_is_real_device(dev_info_t *dip);
+static int dr_is_unsafe_major(major_t major);
+static int dr_bypass_device(char *dname);
+static int dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
+static int dr_resolve_devname(dev_info_t *dip, char *buffer,
+ char *alias);
+static sbd_error_t *drerr_int(int e_code, uint64_t *arr, int idx,
+ int majors);
+static int dr_add_int(uint64_t *arr, int idx, int len,
+ uint64_t val);
+
+int dr_pt_test_suspend(dr_handle_t *hp);
+
+/*
+ * dr_quiesce.c interface
+ * NOTE: states used internally by dr_suspend and dr_resume
+ */
+typedef enum dr_suspend_state {
+ DR_SRSTATE_BEGIN = 0,
+ DR_SRSTATE_USER,
+ DR_SRSTATE_DRIVER,
+ DR_SRSTATE_FULL
+} suspend_state_t;
+
+struct dr_sr_handle {
+ dr_handle_t *sr_dr_handlep;
+ dev_info_t *sr_failed_dip;
+ suspend_state_t sr_suspend_state;
+ uint_t sr_flags;
+ uint64_t sr_err_ints[DR_MAX_ERR_INT];
+ int sr_err_idx;
+};
+
+#define SR_FLAG_WATCHDOG 0x1
+
+/*
+ * XXX
+ * This hack will go away before RTI. Just for testing.
+ * List of drivers to bypass when performing a suspend.
+ */
+static char *dr_bypass_list[] = {
+ ""
+};
+
+
+#define SKIP_SYNC /* bypass sync ops in dr_suspend */
+
+/*
+ * dr_skip_user_threads is used to control if user threads should
+ * be suspended. If dr_skip_user_threads is true, the rest of the
+ * flags are not used; if it is false, dr_check_user_stop_result
+ * will be used to control whether or not we need to check suspend
+ * result, and dr_allow_blocked_threads will be used to control
+ * whether or not we allow suspend to continue if there are blocked
+ * threads. We allow all combinations of dr_check_user_stop_result
+ * and dr_allow_block_threads, even though it might not make much
+ * sense to not allow block threads when we don't even check stop
+ * result.
+ */
+static int dr_skip_user_threads = 0; /* default to FALSE */
+static int dr_check_user_stop_result = 1; /* default to TRUE */
+static int dr_allow_blocked_threads = 1; /* default to TRUE */
+
+#define DR_CPU_LOOP_MSEC 1000
+
+static void
+dr_stop_intr(void)
+{
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ kpreempt_disable();
+ cyclic_suspend();
+}
+
+static void
+dr_enable_intr(void)
+{
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ cyclic_resume();
+ kpreempt_enable();
+}
+
+dr_sr_handle_t *
+dr_get_sr_handle(dr_handle_t *hp)
+{
+ dr_sr_handle_t *srh;
+
+ srh = GETSTRUCT(dr_sr_handle_t, 1);
+ srh->sr_dr_handlep = hp;
+
+ return (srh);
+}
+
+void
+dr_release_sr_handle(dr_sr_handle_t *srh)
+{
+ ASSERT(srh->sr_failed_dip == NULL);
+ FREESTRUCT(srh, dr_sr_handle_t, 1);
+}
+
+static int
+dr_is_real_device(dev_info_t *dip)
+{
+ struct regspec *regbuf = NULL;
+ int length = 0;
+ int rc;
+
+ if (ddi_get_driver(dip) == NULL)
+ return (0);
+
+ if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
+ return (1);
+ if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
+ return (0);
+
+ /*
+ * now the general case
+ */
+ rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
+ (caddr_t)&regbuf, &length);
+ ASSERT(rc != DDI_PROP_NO_MEMORY);
+ if (rc != DDI_PROP_SUCCESS) {
+ return (0);
+ } else {
+ if ((length > 0) && (regbuf != NULL))
+ kmem_free(regbuf, length);
+ return (1);
+ }
+}
+
+static int
+dr_is_unsafe_major(major_t major)
+{
+ char *dname, **cpp;
+ int i, ndevs;
+
+ if ((dname = ddi_major_to_name(major)) == NULL) {
+ PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
+ return (0);
+ }
+
+ ndevs = dr_unsafe_devs.ndevs;
+ for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
+ if (strcmp(dname, *cpp++) == 0)
+ return (1);
+ }
+ return (0);
+}
+
+static int
+dr_bypass_device(char *dname)
+{
+ int i;
+ char **lname;
+ /* check the bypass list */
+ for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
+ if (strcmp(dname, dr_bypass_list[i++]) == 0)
+ return (1);
+ }
+ return (0);
+}
+
+static int
+dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
+{
+ major_t devmajor;
+ char *aka, *name;
+
+ *buffer = *alias = 0;
+
+ if (dip == NULL)
+ return (-1);
+
+ if ((name = ddi_get_name(dip)) == NULL)
+ name = "<null name>";
+
+ aka = name;
+
+ if ((devmajor = ddi_name_to_major(aka)) != DDI_MAJOR_T_NONE)
+ aka = ddi_major_to_name(devmajor);
+
+ (void) strcpy(buffer, name);
+
+ if (strcmp(name, aka))
+ (void) strcpy(alias, aka);
+ else
+ *alias = 0;
+
+ return (0);
+}
+
+struct dr_ref {
+ int *refcount;
+ int *refcount_non_gldv3;
+ uint64_t *arr;
+ int *idx;
+ int len;
+};
+
+/* ARGSUSED */
+static int
+dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
+{
+ major_t major;
+ char *dname;
+ struct dr_ref *rp = (struct dr_ref *)arg;
+
+ if (dip == NULL)
+ return (DDI_WALK_CONTINUE);
+
+ if (!dr_is_real_device(dip))
+ return (DDI_WALK_CONTINUE);
+
+ dname = ddi_binding_name(dip);
+
+ if (dr_bypass_device(dname))
+ return (DDI_WALK_CONTINUE);
+
+ if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
+ if (ref && rp->refcount) {
+ *rp->refcount += ref;
+ PR_QR("\n %s (major# %d) is referenced(%u)\n", dname,
+ major, ref);
+ }
+ if (ref && rp->refcount_non_gldv3) {
+ if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
+ *rp->refcount_non_gldv3 += ref;
+ }
+ if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
+ PR_QR("\n %s (major# %d) not hotpluggable\n", dname,
+ major);
+ if (rp->arr != NULL && rp->idx != NULL)
+ *rp->idx = dr_add_int(rp->arr, *rp->idx,
+ rp->len, (uint64_t)major);
+ }
+ }
+ return (DDI_WALK_CONTINUE);
+}
+
+static int
+dr_check_unsafe_major(dev_info_t *dip, void *arg)
+{
+ return (dr_check_dip(dip, arg, 0));
+}
+
+
+/*ARGSUSED*/
+void
+dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
+ uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
+{
+ struct dr_ref bref = {0};
+
+ if (dip == NULL)
+ return;
+
+ bref.refcount = refcount;
+ bref.refcount_non_gldv3 = refcount_non_gldv3;
+ bref.arr = arr;
+ bref.idx = idx;
+ bref.len = len;
+
+ ASSERT(e_ddi_branch_held(dip));
+ (void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
+}
+
+/*
+ * The "dip" argument's parent (if it exists) must be held busy.
+ */
+static int
+dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
+{
+ dr_handle_t *handle;
+ major_t major;
+ char *dname;
+ int circ;
+
+ /*
+ * If dip is the root node, it has no siblings and it is
+ * always held. If dip is not the root node, dr_suspend_devices()
+ * will be invoked with the parent held busy.
+ */
+ for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
+ char d_name[40], d_alias[40], *d_info;
+
+ ndi_devi_enter(dip, &circ);
+ if (dr_suspend_devices(ddi_get_child(dip), srh)) {
+ ndi_devi_exit(dip, circ);
+ return (ENXIO);
+ }
+ ndi_devi_exit(dip, circ);
+
+ if (!dr_is_real_device(dip))
+ continue;
+
+ major = (major_t)-1;
+ if ((dname = ddi_binding_name(dip)) != NULL)
+ major = ddi_name_to_major(dname);
+
+ if (dr_bypass_device(dname)) {
+ PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
+ major);
+ continue;
+ }
+
+ if (drmach_verify_sr(dip, 1)) {
+ PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
+ major);
+ continue;
+ }
+
+ if ((d_info = ddi_get_name_addr(dip)) == NULL)
+ d_info = "<null>";
+
+ d_name[0] = 0;
+ if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
+ if (d_alias[0] != 0) {
+ prom_printf("\tsuspending %s@%s (aka %s)\n",
+ d_name, d_info, d_alias);
+ } else {
+ prom_printf("\tsuspending %s@%s\n", d_name,
+ d_info);
+ }
+ } else {
+ prom_printf("\tsuspending %s@%s\n", dname, d_info);
+ }
+
+ if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
+ prom_printf("\tFAILED to suspend %s@%s\n",
+ d_name[0] ? d_name : dname, d_info);
+
+ srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
+ srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
+
+ ndi_hold_devi(dip);
+ srh->sr_failed_dip = dip;
+
+ handle = srh->sr_dr_handlep;
+ dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
+ d_name[0] ? d_name : dname, d_info);
+
+ return (DDI_FAILURE);
+ }
+ }
+
+ return (DDI_SUCCESS);
+}
+
+static void
+dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
+{
+ dr_handle_t *handle;
+ dev_info_t *dip, *next, *last = NULL;
+ major_t major;
+ char *bn;
+ int circ;
+
+ major = (major_t)-1;
+
+ /* attach in reverse device tree order */
+ while (last != start) {
+ dip = start;
+ next = ddi_get_next_sibling(dip);
+ while (next != last && dip != srh->sr_failed_dip) {
+ dip = next;
+ next = ddi_get_next_sibling(dip);
+ }
+ if (dip == srh->sr_failed_dip) {
+ /* release hold acquired in dr_suspend_devices() */
+ srh->sr_failed_dip = NULL;
+ ndi_rele_devi(dip);
+ } else if (dr_is_real_device(dip) &&
+ srh->sr_failed_dip == NULL) {
+
+ if ((bn = ddi_binding_name(dip)) != NULL) {
+ major = ddi_name_to_major(bn);
+ } else {
+ bn = "<null>";
+ }
+ if (!dr_bypass_device(bn) &&
+ !drmach_verify_sr(dip, 0)) {
+ char d_name[40], d_alias[40], *d_info;
+
+ d_name[0] = 0;
+ d_info = ddi_get_name_addr(dip);
+ if (d_info == NULL)
+ d_info = "<null>";
+
+ if (!dr_resolve_devname(dip, d_name, d_alias)) {
+ if (d_alias[0] != 0) {
+ prom_printf("\tresuming "
+ "%s@%s (aka %s)\n", d_name,
+ d_info, d_alias);
+ } else {
+ prom_printf("\tresuming "
+ "%s@%s\n", d_name, d_info);
+ }
+ } else {
+ prom_printf("\tresuming %s@%s\n", bn,
+ d_info);
+ }
+
+ if (devi_attach(dip, DDI_RESUME) !=
+ DDI_SUCCESS) {
+ /*
+ * Print a console warning,
+ * set an e_code of ESBD_RESUME,
+ * and save the driver major
+ * number in the e_rsc.
+ */
+ prom_printf("\tFAILED to resume %s@%s",
+ d_name[0] ? d_name : bn, d_info);
+
+ srh->sr_err_idx =
+ dr_add_int(srh->sr_err_ints,
+ srh->sr_err_idx, DR_MAX_ERR_INT,
+ (uint64_t)major);
+
+ handle = srh->sr_dr_handlep;
+
+ dr_op_err(CE_IGNORE, handle,
+ ESBD_RESUME, "%s@%s",
+ d_name[0] ? d_name : bn, d_info);
+ }
+ }
+ }
+
+ /* Hold parent busy while walking its children */
+ ndi_devi_enter(dip, &circ);
+ dr_resume_devices(ddi_get_child(dip), srh);
+ ndi_devi_exit(dip, circ);
+ last = dip;
+ }
+}
+
+/*
+ * True if thread is virtually stopped. Similar to CPR_VSTOPPED
+ * but from DR point of view. These user threads are waiting in
+ * the kernel. Once they complete in the kernel, they will process
+ * the stop signal and stop.
+ */
+#define DR_VSTOPPED(t) \
+ ((t)->t_state == TS_SLEEP && \
+ (t)->t_wchan != NULL && \
+ (t)->t_astflag && \
+ ((t)->t_proc_flag & TP_CHKPT))
+
+/* ARGSUSED */
+static int
+dr_stop_user_threads(dr_sr_handle_t *srh)
+{
+ int count;
+ int bailout;
+ dr_handle_t *handle = srh->sr_dr_handlep;
+ static fn_t f = "dr_stop_user_threads";
+ kthread_id_t tp;
+
+ extern void add_one_utstop();
+ extern void utstop_timedwait(clock_t);
+ extern void utstop_init(void);
+
+#define DR_UTSTOP_RETRY 4
+#define DR_UTSTOP_WAIT hz
+
+ if (dr_skip_user_threads)
+ return (DDI_SUCCESS);
+
+ utstop_init();
+
+ /* we need to try a few times to get past fork, etc. */
+ srh->sr_err_idx = 0;
+ for (count = 0; count < DR_UTSTOP_RETRY; count++) {
+ /* walk the entire threadlist */
+ mutex_enter(&pidlock);
+ for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
+ proc_t *p = ttoproc(tp);
+
+ /* handle kernel threads separately */
+ if (p->p_as == &kas || p->p_stat == SZOMB)
+ continue;
+
+ mutex_enter(&p->p_lock);
+ thread_lock(tp);
+
+ if (tp->t_state == TS_STOPPED) {
+ /* add another reason to stop this thread */
+ tp->t_schedflag &= ~TS_RESUME;
+ } else {
+ tp->t_proc_flag |= TP_CHKPT;
+
+ thread_unlock(tp);
+ mutex_exit(&p->p_lock);
+ add_one_utstop();
+ mutex_enter(&p->p_lock);
+ thread_lock(tp);
+
+ aston(tp);
+
+ if (ISWAKEABLE(tp) || ISWAITING(tp)) {
+ setrun_locked(tp);
+ }
+
+ }
+
+ /* grab thread if needed */
+ if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
+ poke_cpu(tp->t_cpu->cpu_id);
+
+
+ thread_unlock(tp);
+ mutex_exit(&p->p_lock);
+ }
+ mutex_exit(&pidlock);
+
+
+ /* let everything catch up */
+ utstop_timedwait(count * count * DR_UTSTOP_WAIT);
+
+
+ /* now, walk the threadlist again to see if we are done */
+ mutex_enter(&pidlock);
+ for (tp = curthread->t_next, bailout = 0;
+ tp != curthread; tp = tp->t_next) {
+ proc_t *p = ttoproc(tp);
+
+ /* handle kernel threads separately */
+ if (p->p_as == &kas || p->p_stat == SZOMB)
+ continue;
+
+ /*
+ * If this thread didn't stop, and we don't allow
+ * unstopped blocked threads, bail.
+ */
+ thread_lock(tp);
+ if (!CPR_ISTOPPED(tp) &&
+ !(dr_allow_blocked_threads &&
+ DR_VSTOPPED(tp))) {
+ bailout = 1;
+ if (count == DR_UTSTOP_RETRY - 1) {
+ /*
+ * save the pid for later reporting
+ */
+ srh->sr_err_idx =
+ dr_add_int(srh->sr_err_ints,
+ srh->sr_err_idx, DR_MAX_ERR_INT,
+ (uint64_t)p->p_pid);
+
+ cmn_err(CE_WARN, "%s: "
+ "failed to stop thread: "
+ "process=%s, pid=%d",
+ f, p->p_user.u_psargs, p->p_pid);
+
+ PR_QR("%s: failed to stop thread: "
+ "process=%s, pid=%d, t_id=0x%p, "
+ "t_state=0x%x, t_proc_flag=0x%x, "
+ "t_schedflag=0x%x\n",
+ f, p->p_user.u_psargs, p->p_pid,
+ (void *)tp, tp->t_state,
+ tp->t_proc_flag, tp->t_schedflag);
+ }
+
+ }
+ thread_unlock(tp);
+ }
+ mutex_exit(&pidlock);
+
+ /* were all the threads stopped? */
+ if (!bailout)
+ break;
+ }
+
+ /* were we unable to stop all threads after a few tries? */
+ if (bailout) {
+ handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
+ srh->sr_err_idx, 0);
+ return (ESRCH);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+static void
+dr_start_user_threads(void)
+{
+ kthread_id_t tp;
+
+ mutex_enter(&pidlock);
+
+ /* walk all threads and release them */
+ for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
+ proc_t *p = ttoproc(tp);
+
+ /* skip kernel threads */
+ if (ttoproc(tp)->p_as == &kas)
+ continue;
+
+ mutex_enter(&p->p_lock);
+ tp->t_proc_flag &= ~TP_CHKPT;
+ mutex_exit(&p->p_lock);
+
+ thread_lock(tp);
+ if (CPR_ISTOPPED(tp)) {
+ /* back on the runq */
+ tp->t_schedflag |= TS_RESUME;
+ setrun_locked(tp);
+ }
+ thread_unlock(tp);
+ }
+
+ mutex_exit(&pidlock);
+}
+
+static void
+dr_signal_user(int sig)
+{
+ struct proc *p;
+
+ mutex_enter(&pidlock);
+
+ for (p = practive; p != NULL; p = p->p_next) {
+ /* only user threads */
+ if (p->p_exec == NULL || p->p_stat == SZOMB ||
+ p == proc_init || p == ttoproc(curthread))
+ continue;
+
+ mutex_enter(&p->p_lock);
+ sigtoproc(p, NULL, sig);
+ mutex_exit(&p->p_lock);
+ }
+
+ mutex_exit(&pidlock);
+
+ /* add a bit of delay */
+ delay(hz);
+}
+
+void
+dr_resume(dr_sr_handle_t *srh)
+{
+ dr_handle_t *handle;
+
+ handle = srh->sr_dr_handlep;
+
+ switch (srh->sr_suspend_state) {
+ case DR_SRSTATE_FULL:
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /*
+ * Prevent false alarm in tod_validate() due to tod
+ * value change between suspend and resume
+ */
+ mutex_enter(&tod_lock);
+ tod_status_set(TOD_DR_RESUME_DONE);
+ mutex_exit(&tod_lock);
+
+ dr_enable_intr(); /* enable intr & clock */
+
+ start_cpus();
+ mutex_exit(&cpu_lock);
+
+ /*
+ * This should only be called if drmach_suspend_last()
+ * was called and state transitioned to DR_SRSTATE_FULL
+ * to prevent resume attempts on device instances that
+ * were not previously suspended.
+ */
+ drmach_resume_first();
+
+ /* FALLTHROUGH */
+
+ case DR_SRSTATE_DRIVER:
+ /*
+ * resume drivers
+ */
+ srh->sr_err_idx = 0;
+
+ /* no parent dip to hold busy */
+ dr_resume_devices(ddi_root_node(), srh);
+
+ if (srh->sr_err_idx && srh->sr_dr_handlep) {
+ (srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
+ srh->sr_err_ints, srh->sr_err_idx, 1);
+ }
+
+ /*
+ * resume the lock manager
+ */
+ lm_cprresume();
+
+ /* FALLTHROUGH */
+
+ case DR_SRSTATE_USER:
+ /*
+ * finally, resume user threads
+ */
+ if (!dr_skip_user_threads) {
+ prom_printf("DR: resuming user threads...\n");
+ dr_start_user_threads();
+ }
+ /* FALLTHROUGH */
+
+ case DR_SRSTATE_BEGIN:
+ default:
+ /*
+ * let those who care know that we've just resumed
+ */
+ PR_QR("sending SIGTHAW...\n");
+ dr_signal_user(SIGTHAW);
+ break;
+ }
+
+ i_ndi_allow_device_tree_changes(handle->h_ndi);
+
+ prom_printf("DR: resume COMPLETED\n");
+}
+
+int
+dr_suspend(dr_sr_handle_t *srh)
+{
+ dr_handle_t *handle;
+ int force;
+ int dev_errs_idx;
+ uint64_t dev_errs[DR_MAX_ERR_INT];
+ int rc = DDI_SUCCESS;
+
+ handle = srh->sr_dr_handlep;
+
+ force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
+
+ i_ndi_block_device_tree_changes(&handle->h_ndi);
+
+ prom_printf("\nDR: suspending user threads...\n");
+ srh->sr_suspend_state = DR_SRSTATE_USER;
+ if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
+ dr_check_user_stop_result) {
+ dr_resume(srh);
+ return (rc);
+ }
+
+ if (!force) {
+ struct dr_ref drc = {0};
+
+ prom_printf("\nDR: checking devices...\n");
+ dev_errs_idx = 0;
+
+ drc.arr = dev_errs;
+ drc.idx = &dev_errs_idx;
+ drc.len = DR_MAX_ERR_INT;
+
+ /*
+ * Since the root node can never go away, it
+ * doesn't have to be held.
+ */
+ ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
+ if (dev_errs_idx) {
+ handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
+ dev_errs_idx, 1);
+ dr_resume(srh);
+ return (DDI_FAILURE);
+ }
+ PR_QR("done\n");
+ } else {
+ prom_printf("\nDR: dr_suspend invoked with force flag\n");
+ }
+
+#ifndef SKIP_SYNC
+ /*
+ * This sync swap out all user pages
+ */
+ vfs_sync(SYNC_ALL);
+#endif
+
+ /*
+ * special treatment for lock manager
+ */
+ lm_cprsuspend();
+
+#ifndef SKIP_SYNC
+ /*
+ * sync the file system in case we never make it back
+ */
+ sync();
+#endif
+
+ /*
+ * now suspend drivers
+ */
+ prom_printf("DR: suspending drivers...\n");
+ srh->sr_suspend_state = DR_SRSTATE_DRIVER;
+ srh->sr_err_idx = 0;
+ /* No parent to hold busy */
+ if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
+ if (srh->sr_err_idx && srh->sr_dr_handlep) {
+ (srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
+ srh->sr_err_ints, srh->sr_err_idx, 1);
+ }
+ dr_resume(srh);
+ return (rc);
+ }
+
+ drmach_suspend_last();
+
+ /*
+ * finally, grab all cpus
+ */
+ srh->sr_suspend_state = DR_SRSTATE_FULL;
+
+ mutex_enter(&cpu_lock);
+ pause_cpus(NULL);
+ dr_stop_intr();
+
+ return (rc);
+}
+
+int
+dr_pt_test_suspend(dr_handle_t *hp)
+{
+ dr_sr_handle_t *srh;
+ int err;
+ uint_t psmerr;
+ static fn_t f = "dr_pt_test_suspend";
+
+ PR_QR("%s...\n", f);
+
+ srh = dr_get_sr_handle(hp);
+ if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
+ dr_resume(srh);
+ if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
+ PR_QR("%s: error on dr_resume()", f);
+ switch (psmerr) {
+ case ESBD_RESUME:
+ PR_QR("Couldn't resume devices: %s\n",
+ DR_GET_E_RSC(hp->h_err));
+ break;
+
+ case ESBD_KTHREAD:
+ PR_ALL("psmerr is ESBD_KTHREAD\n");
+ break;
+ default:
+ PR_ALL("Resume error unknown = %d\n", psmerr);
+ break;
+ }
+ }
+ } else {
+ PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
+ psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
+ switch (psmerr) {
+ case ESBD_UNSAFE:
+ PR_ALL("Unsafe devices (major #): %s\n",
+ DR_GET_E_RSC(hp->h_err));
+ break;
+
+ case ESBD_RTTHREAD:
+ PR_ALL("RT threads (PIDs): %s\n",
+ DR_GET_E_RSC(hp->h_err));
+ break;
+
+ case ESBD_UTHREAD:
+ PR_ALL("User threads (PIDs): %s\n",
+ DR_GET_E_RSC(hp->h_err));
+ break;
+
+ case ESBD_SUSPEND:
+ PR_ALL("Non-suspendable devices (major #): %s\n",
+ DR_GET_E_RSC(hp->h_err));
+ break;
+
+ case ESBD_RESUME:
+ PR_ALL("Could not resume devices (major #): %s\n",
+ DR_GET_E_RSC(hp->h_err));
+ break;
+
+ case ESBD_KTHREAD:
+ PR_ALL("psmerr is ESBD_KTHREAD\n");
+ break;
+
+ case ESBD_NOERROR:
+ PR_ALL("sbd_error_t error code not set\n");
+ break;
+
+ default:
+ PR_ALL("Unknown error psmerr = %d\n", psmerr);
+ break;
+ }
+ }
+ dr_release_sr_handle(srh);
+
+ return (0);
+}
+
+/*
+ * Add a new integer value to the end of an array. Don't allow duplicates to
+ * appear in the array, and don't allow the array to overflow. Return the new
+ * total number of entries in the array.
+ */
+static int
+dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
+{
+ int i;
+
+ if (arr == NULL)
+ return (0);
+
+ if (idx >= len)
+ return (idx);
+
+ for (i = 0; i < idx; i++) {
+ if (arr[i] == val)
+ return (idx);
+ }
+
+ arr[idx++] = val;
+
+ return (idx);
+}
+
+/*
+ * Construct an sbd_error_t featuring a string representation of an array of
+ * integers as its e_rsc.
+ */
+static sbd_error_t *
+drerr_int(int e_code, uint64_t *arr, int idx, int majors)
+{
+ int i, n, buf_len, buf_idx, buf_avail;
+ char *dname;
+ char *buf;
+ sbd_error_t *new_sbd_err;
+ static char s_ellipsis[] = "...";
+
+ if (arr == NULL || idx <= 0)
+ return (NULL);
+
+ /* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
+ buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+
+ /*
+ * This is the total working area of the buffer. It must be computed
+ * as the size of 'buf', minus reserved space for the null terminator
+ * and the ellipsis string.
+ */
+ buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
+
+ /* Construct a string representation of the array values */
+ for (buf_idx = 0, i = 0; i < idx; i++) {
+ buf_avail = buf_len - buf_idx;
+ if (majors) {
+ dname = ddi_major_to_name(arr[i]);
+ if (dname) {
+ n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
+ dname);
+ } else {
+ n = snprintf(&buf[buf_idx], buf_avail,
+ "major %" PRIu64 ", ", arr[i]);
+ }
+ } else {
+ n = snprintf(&buf[buf_idx], buf_avail, "%" PRIu64 ", ",
+ arr[i]);
+ }
+
+ /* An ellipsis gets appended when no more values fit */
+ if (n >= buf_avail) {
+ (void) strcpy(&buf[buf_idx], s_ellipsis);
+ break;
+ }
+
+ buf_idx += n;
+ }
+
+ /* If all the contents fit, remove the trailing comma */
+ if (n < buf_avail) {
+ buf[--buf_idx] = '\0';
+ buf[--buf_idx] = '\0';
+ }
+
+ /* Return an sbd_error_t with the buffer and e_code */
+ new_sbd_err = drerr_new(1, e_code, buf);
+ kmem_free(buf, MAXPATHLEN);
+ return (new_sbd_err);
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr_util.c b/usr/src/uts/i86pc/io/dr/dr_util.c
new file mode 100644
index 0000000000..d60dc7f7b8
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_util.c
@@ -0,0 +1,457 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/param.h> /* for NULL */
+#include <sys/sbd_ioctl.h>
+#include <sys/dr_util.h>
+#include <sys/varargs.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+
+/* sbd_etab[] and sbd_etab_len provided by sbdgenerr.pl */
+extern sbd_etab_t sbd_etab[];
+extern int sbd_etab_len;
+
+sbd_error_t *
+sbd_err_new(int e_code, char *fmt, va_list args)
+{
+ sbd_error_t *new;
+
+ new = GETSTRUCT(sbd_error_t, 1);
+ new->e_code = e_code;
+
+ if (fmt)
+ (void) vsnprintf(new->e_rsc, sizeof (new->e_rsc), fmt, args);
+
+ return (new);
+}
+
+void
+sbd_err_log(sbd_error_t *ep, int ce)
+{
+ char buf[32];
+ char *fmt;
+ char *txt;
+ int i;
+ sbd_etab_t *tp;
+
+ if (!ep)
+ return;
+
+ if (ep->e_rsc[0] == '\0')
+ fmt = "%s";
+ else
+ fmt = "%s: %s";
+
+ for (tp = sbd_etab, i = 0; i < sbd_etab_len; i++, tp++)
+ if (ep->e_code >= tp->t_base && ep->e_code <= tp->t_bnd)
+ break;
+
+ if (i < sbd_etab_len)
+ txt = tp->t_text[ep->e_code - tp->t_base];
+ else {
+ (void) snprintf(buf, sizeof (buf), "error %d", ep->e_code);
+ txt = buf;
+ }
+
+ cmn_err(ce, fmt, txt, ep->e_rsc);
+}
+
+void
+sbd_err_clear(sbd_error_t **ep)
+{
+ FREESTRUCT(*ep, sbd_error_t, 1);
+ *ep = NULL;
+}
+
+void
+sbd_err_set_c(sbd_error_t **ep, int ce, int e_code, char *fmt, ...)
+{
+ sbd_error_t *tmp;
+ va_list args;
+
+ va_start(args, fmt);
+
+ tmp = sbd_err_new(e_code, fmt, args);
+
+ sbd_err_log(tmp, ce);
+
+ if (*ep == NULL)
+ *ep = tmp;
+ else
+ sbd_err_clear(&tmp);
+
+ va_end(args);
+}
+
+void
+sbd_err_set(sbd_error_t **ep, int ce, int e_code, char *fmt, ...)
+{
+ sbd_error_t *tmp;
+ va_list args;
+
+ va_start(args, fmt);
+
+ tmp = sbd_err_new(e_code, fmt, args);
+
+ sbd_err_log(tmp, ce);
+
+ *ep = tmp;
+
+ va_end(args);
+}
+
+sbd_error_t *
+drerr_new_v(int e_code, char *fmt, va_list args)
+{
+ return (sbd_err_new(e_code, fmt, args));
+}
+
+sbd_error_t *
+drerr_new(int log, int e_code, char *fmt, ...)
+{
+ sbd_error_t *ep;
+ va_list args;
+
+ va_start(args, fmt);
+ ep = sbd_err_new(e_code, fmt, args);
+ va_end(args);
+
+ if (log)
+ sbd_err_log(ep, CE_WARN);
+
+ return (ep);
+}
+
+void
+drerr_set_c(int log, sbd_error_t **ep, int e_code, char *fmt, ...)
+{
+ sbd_error_t *err;
+ va_list args;
+
+ va_start(args, fmt);
+ err = sbd_err_new(e_code, fmt, args);
+ va_end(args);
+
+ if (log)
+ sbd_err_log(err, CE_WARN);
+
+ if (*ep == NULL)
+ *ep = err;
+ else
+ sbd_err_clear(&err);
+}
+
+
+/*
+ * Memlist support.
+ */
+void
+dr_memlist_delete(struct memlist *mlist)
+{
+ register struct memlist *ml;
+
+ for (ml = mlist; ml; ml = mlist) {
+ mlist = ml->ml_next;
+ FREESTRUCT(ml, struct memlist, 1);
+ }
+}
+
+int
+dr_memlist_intersect(struct memlist *al, struct memlist *bl)
+{
+ uint64_t astart, aend, bstart, bend;
+
+ if ((al == NULL) || (bl == NULL))
+ return (0);
+
+ aend = al->ml_address + al->ml_size;
+ bstart = bl->ml_address;
+ bend = bl->ml_address + bl->ml_size;
+
+ while (al && bl) {
+ while (al && (aend <= bstart))
+ if ((al = al->ml_next) != NULL)
+ aend = al->ml_address + al->ml_size;
+ if (al == NULL)
+ return (0);
+
+ if ((astart = al->ml_address) <= bstart)
+ return (1);
+
+ while (bl && (bend <= astart))
+ if ((bl = bl->ml_next) != NULL)
+ bend = bl->ml_address + bl->ml_size;
+ if (bl == NULL)
+ return (0);
+
+ if ((bstart = bl->ml_address) <= astart)
+ return (1);
+ }
+
+ return (0);
+}
+
+void
+dr_memlist_coalesce(struct memlist *mlist)
+{
+ uint64_t end, nend;
+
+ if ((mlist == NULL) || (mlist->ml_next == NULL))
+ return;
+
+ while (mlist->ml_next) {
+ end = mlist->ml_address + mlist->ml_size;
+ if (mlist->ml_next->ml_address <= end) {
+ struct memlist *nl;
+
+ nend = mlist->ml_next->ml_address +
+ mlist->ml_next->ml_size;
+ if (nend > end)
+ mlist->ml_size += (nend - end);
+ nl = mlist->ml_next;
+ mlist->ml_next = mlist->ml_next->ml_next;
+ if (nl) {
+ FREESTRUCT(nl, struct memlist, 1);
+ }
+ if (mlist->ml_next)
+ mlist->ml_next->ml_prev = mlist;
+ } else {
+ mlist = mlist->ml_next;
+ }
+ }
+}
+
+#ifdef DEBUG
+void
+memlist_dump(struct memlist *mlist)
+{
+ register struct memlist *ml;
+
+ if (mlist == NULL)
+ printf("memlist> EMPTY\n");
+ else for (ml = mlist; ml; ml = ml->ml_next)
+ printf("memlist> 0x%" PRIx64 ", 0x%" PRIx64 "\n",
+ ml->ml_address, ml->ml_size);
+}
+#endif
+
+struct memlist *
+dr_memlist_dup(struct memlist *mlist)
+{
+ struct memlist *hl = NULL, *tl, **mlp;
+
+ if (mlist == NULL)
+ return (NULL);
+
+ mlp = &hl;
+ tl = *mlp;
+ for (; mlist; mlist = mlist->ml_next) {
+ *mlp = GETSTRUCT(struct memlist, 1);
+ (*mlp)->ml_address = mlist->ml_address;
+ (*mlp)->ml_size = mlist->ml_size;
+ (*mlp)->ml_prev = tl;
+ tl = *mlp;
+ mlp = &((*mlp)->ml_next);
+ }
+ *mlp = NULL;
+
+ return (hl);
+}
+
+struct memlist *
+dr_memlist_add_span(struct memlist *mlist, uint64_t base, uint64_t len)
+{
+ struct memlist *ml, *tl, *nl;
+
+ if (len == 0ull)
+ return (NULL);
+
+ if (mlist == NULL) {
+ mlist = GETSTRUCT(struct memlist, 1);
+ mlist->ml_address = base;
+ mlist->ml_size = len;
+ mlist->ml_next = mlist->ml_prev = NULL;
+
+ return (mlist);
+ }
+
+ for (tl = ml = mlist; ml; tl = ml, ml = ml->ml_next) {
+ if (base < ml->ml_address) {
+ if ((base + len) < ml->ml_address) {
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->ml_address = base;
+ nl->ml_size = len;
+ nl->ml_next = ml;
+ if ((nl->ml_prev = ml->ml_prev) != NULL)
+ nl->ml_prev->ml_next = nl;
+ ml->ml_prev = nl;
+ if (mlist == ml)
+ mlist = nl;
+ } else {
+ ml->ml_size = MAX((base + len),
+ (ml->ml_address + ml->ml_size)) - base;
+ ml->ml_address = base;
+ }
+ break;
+
+ } else if (base <= (ml->ml_address + ml->ml_size)) {
+ ml->ml_size = MAX((base + len),
+ (ml->ml_address + ml->ml_size)) -
+ MIN(ml->ml_address, base);
+ ml->ml_address = MIN(ml->ml_address, base);
+ break;
+ }
+ }
+ if (ml == NULL) {
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->ml_address = base;
+ nl->ml_size = len;
+ nl->ml_next = NULL;
+ nl->ml_prev = tl;
+ tl->ml_next = nl;
+ }
+
+ dr_memlist_coalesce(mlist);
+
+ return (mlist);
+}
+
+struct memlist *
+dr_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len)
+{
+ uint64_t end;
+ struct memlist *ml, *tl, *nlp;
+
+ if (mlist == NULL)
+ return (NULL);
+
+ end = base + len;
+ if ((end <= mlist->ml_address) || (base == end))
+ return (mlist);
+
+ for (tl = ml = mlist; ml; tl = ml, ml = nlp) {
+ uint64_t mend;
+
+ nlp = ml->ml_next;
+
+ if (end <= ml->ml_address)
+ break;
+
+ mend = ml->ml_address + ml->ml_size;
+ if (base < mend) {
+ if (base <= ml->ml_address) {
+ ml->ml_address = end;
+ if (end >= mend)
+ ml->ml_size = 0ull;
+ else
+ ml->ml_size = mend - ml->ml_address;
+ } else {
+ ml->ml_size = base - ml->ml_address;
+ if (end < mend) {
+ struct memlist *nl;
+ /*
+ * splitting an memlist entry.
+ */
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->ml_address = end;
+ nl->ml_size = mend - nl->ml_address;
+ if ((nl->ml_next = nlp) != NULL)
+ nlp->ml_prev = nl;
+ nl->ml_prev = ml;
+ ml->ml_next = nl;
+ nlp = nl;
+ }
+ }
+ if (ml->ml_size == 0ull) {
+ if (ml == mlist) {
+ if ((mlist = nlp) != NULL)
+ nlp->ml_prev = NULL;
+ FREESTRUCT(ml, struct memlist, 1);
+ if (mlist == NULL)
+ break;
+ ml = nlp;
+ } else {
+ if ((tl->ml_next = nlp) != NULL)
+ nlp->ml_prev = tl;
+ FREESTRUCT(ml, struct memlist, 1);
+ ml = tl;
+ }
+ }
+ }
+ }
+
+ return (mlist);
+}
+
+/*
+ * add span without merging
+ */
+struct memlist *
+dr_memlist_cat_span(struct memlist *mlist, uint64_t base, uint64_t len)
+{
+ struct memlist *ml, *tl, *nl;
+
+ if (len == 0ull)
+ return (NULL);
+
+ if (mlist == NULL) {
+ mlist = GETSTRUCT(struct memlist, 1);
+ mlist->ml_address = base;
+ mlist->ml_size = len;
+ mlist->ml_next = mlist->ml_prev = NULL;
+
+ return (mlist);
+ }
+
+ for (tl = ml = mlist; ml; tl = ml, ml = ml->ml_next) {
+ if (base < ml->ml_address) {
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->ml_address = base;
+ nl->ml_size = len;
+ nl->ml_next = ml;
+ if ((nl->ml_prev = ml->ml_prev) != NULL)
+ nl->ml_prev->ml_next = nl;
+ ml->ml_prev = nl;
+ if (mlist == ml)
+ mlist = nl;
+ break;
+ }
+ }
+
+ if (ml == NULL) {
+ nl = GETSTRUCT(struct memlist, 1);
+ nl->ml_address = base;
+ nl->ml_size = len;
+ nl->ml_next = NULL;
+ nl->ml_prev = tl;
+ tl->ml_next = nl;
+ }
+
+ return (mlist);
+}
diff --git a/usr/src/uts/i86pc/io/dr/sbdgenerr.pl b/usr/src/uts/i86pc/io/dr/sbdgenerr.pl
new file mode 100644
index 0000000000..9980c96459
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/sbdgenerr.pl
@@ -0,0 +1,90 @@
+#!/usr/bin/perl
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+sub trim {
+ my ($line) = @_;
+ $line =~ s#/\*|\*/##g;
+ $line =~ s#^\s+|\s+$##g;
+ return $line;
+}
+
+my $filter = 0;
+my %prefix;
+while ($#ARGV >= 0) {
+ $prefix{$ARGV[0]} = 0;
+ shift @ARGV;
+ $filter = 1;
+}
+
+my $base;
+my $bnd;
+my @text;
+my @sets;
+while (<STDIN>) {
+ my $n = m@^#define\s(E\w\w\w)\w+\s+(\d+)(.*)@;
+ next unless ($n > 0);
+ next unless ($filter == 0 || defined $prefix{$1});
+ my $txt = trim($3);
+ if (length($txt) == 0) {
+ my $l = <STDIN>;
+ $txt = trim($l);
+ }
+
+ $base = $2 if (!defined $base);
+ if (defined $bnd && $2 != $bnd + 1) {
+ push(@sets, { base => $base, bnd => $bnd });
+ $base = $2;
+ }
+ $bnd = $2;
+ push(@text, $txt);
+}
+
+push(@sets, { base => $base, bnd => $bnd });
+
+printf "#include <sys/sbd_ioctl.h>\n";
+
+my $i = 0;
+my $s = 0;
+do {
+ my $set = $sets[$s];
+
+ printf "static char *sbd_t%d[] = {\n", $set->{base};
+ my $n = $set->{bnd} - $set->{base} + 1;
+ while ($n--) {
+ printf "\t\"%s\",\n", $text[$i++];
+ }
+ printf "};\n";
+} while (++$s <= $#sets);
+
+printf "sbd_etab_t sbd_etab[] = {\n";
+$s = 0;
+do {
+ my $set = $sets[$s];
+ printf "\t{ %d, %d, sbd_t%d },\n",
+ $set->{base}, $set->{bnd}, $set->{base};
+} while (++$s <= $#sets);
+printf "};\n";
+printf "int sbd_etab_len = %d;\n", $s;
diff --git a/usr/src/uts/i86pc/io/mp_platform_common.c b/usr/src/uts/i86pc/io/mp_platform_common.c
index 319193bcc0..a3afe84d07 100644
--- a/usr/src/uts/i86pc/io/mp_platform_common.c
+++ b/usr/src/uts/i86pc/io/mp_platform_common.c
@@ -22,6 +22,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* PSMI 1.1 extensions are supported only in 2.6 and later versions.
@@ -29,8 +33,9 @@
* PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
* PSMI 1.5 extensions are supported in Solaris Nevada.
* PSMI 1.6 extensions are supported in Solaris Nevada.
+ * PSMI 1.7 extensions are supported in Solaris Nevada.
*/
-#define PSMI_1_6
+#define PSMI_1_7
#include <sys/processor.h>
#include <sys/time.h>
@@ -93,6 +98,7 @@ static int apic_setup_irq_table(dev_info_t *dip, int irqno,
struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp,
int type);
static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp);
+static void apic_free_apic_cpus(void);
static void apic_try_deferred_reprogram(int ipl, int vect);
static void delete_defer_repro_ent(int which_irq);
static void apic_ioapic_wait_pending_clear(int ioapicindex,
@@ -194,6 +200,11 @@ int apic_redist_cpu_skip = 0;
int apic_num_imbalance = 0;
int apic_num_rebind = 0;
+/*
+ * Maximum number of APIC CPUs in the system, -1 indicates that dynamic
+ * allocation of CPU ids is disabled.
+ */
+int apic_max_nproc = -1;
int apic_nproc = 0;
size_t apic_cpus_size = 0;
int apic_defconf = 0;
@@ -597,6 +608,16 @@ apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp)
}
}
+static void
+apic_free_apic_cpus(void)
+{
+ if (apic_cpus != NULL) {
+ kmem_free(apic_cpus, apic_cpus_size);
+ apic_cpus = NULL;
+ apic_cpus_size = 0;
+ }
+}
+
static int
acpi_probe(char *modname)
{
@@ -639,7 +660,6 @@ acpi_probe(char *modname)
id = apic_reg_ops->apic_read(APIC_LID_REG);
local_ids[0] = (uchar_t)(id >> 24);
apic_nproc = index = 1;
- CPUSET_ONLY(apic_cpumask, 0);
apic_io_max = 0;
ap = (ACPI_SUBTABLE_HEADER *) (acpi_mapic_dtp + 1);
@@ -652,16 +672,12 @@ acpi_probe(char *modname)
mpa = (ACPI_MADT_LOCAL_APIC *) ap;
if (mpa->LapicFlags & ACPI_MADT_ENABLED) {
if (mpa->Id == local_ids[0]) {
+ ASSERT(index == 1);
proc_ids[0] = mpa->ProcessorId;
- (void) acpica_map_cpu(0,
- mpa->ProcessorId);
} else if (apic_nproc < NCPU && use_mp &&
apic_nproc < boot_ncpus) {
local_ids[index] = mpa->Id;
proc_ids[index] = mpa->ProcessorId;
- CPUSET_ADD(apic_cpumask, index);
- (void) acpica_map_cpu(index,
- mpa->ProcessorId);
index++;
apic_nproc++;
} else if (apic_nproc == NCPU && !warned) {
@@ -770,9 +786,7 @@ acpi_probe(char *modname)
if (apic_nproc < NCPU && use_mp &&
apic_nproc < boot_ncpus) {
local_ids[index] = mpx2a->LocalApicId;
- CPUSET_ADD(apic_cpumask, index);
- (void) acpica_map_cpu(index,
- mpx2a->Uid);
+ proc_ids[index] = mpa->ProcessorId;
index++;
apic_nproc++;
} else if (apic_nproc == NCPU && !warned) {
@@ -813,7 +827,14 @@ acpi_probe(char *modname)
ap = (ACPI_SUBTABLE_HEADER *)(((char *)ap) + ap->Length);
}
- apic_cpus_size = apic_nproc * sizeof (*apic_cpus);
+ /*
+ * allocate enough space for possible hot-adding of CPUs.
+ * max_ncpus may be less than apic_nproc if it's set by user.
+ */
+ if (plat_dr_support_cpu()) {
+ apic_max_nproc = max_ncpus;
+ }
+ apic_cpus_size = max(apic_nproc, max_ncpus) * sizeof (*apic_cpus);
if ((apic_cpus = kmem_zalloc(apic_cpus_size, KM_NOSLEEP)) == NULL)
goto cleanup;
@@ -825,6 +846,47 @@ acpi_probe(char *modname)
for (i = 0; i < apic_nproc; i++) {
apic_cpus[i].aci_local_id = local_ids[i];
apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF);
+ apic_cpus[i].aci_processor_id = proc_ids[i];
+ /* Only build mapping info for CPUs present at boot. */
+ if (i < boot_ncpus)
+ (void) acpica_map_cpu(i, proc_ids[i]);
+ }
+
+ /*
+ * To support CPU dynamic reconfiguration, the apic CPU info structure
+ * for each possible CPU will be pre-allocated at boot time.
+ * The state for each apic CPU info structure will be assigned according
+ * to the following rules:
+ * Rule 1:
+ * Slot index range: [0, min(apic_nproc, boot_ncpus))
+ * State flags: 0
+ * Note: cpu exists and will be configured/enabled at boot time
+ * Rule 2:
+ * Slot index range: [boot_ncpus, apic_nproc)
+ * State flags: APIC_CPU_FREE | APIC_CPU_DIRTY
+ * Note: cpu exists but won't be configured/enabled at boot time
+ * Rule 3:
+ * Slot index range: [apic_nproc, boot_ncpus)
+ * State flags: APIC_CPU_FREE
+ * Note: cpu doesn't exist at boot time
+ * Rule 4:
+ * Slot index range: [max(apic_nproc, boot_ncpus), max_ncpus)
+ * State flags: APIC_CPU_FREE
+ * Note: cpu doesn't exist at boot time
+ */
+ CPUSET_ZERO(apic_cpumask);
+ for (i = 0; i < min(boot_ncpus, apic_nproc); i++) {
+ CPUSET_ADD(apic_cpumask, i);
+ apic_cpus[i].aci_status = 0;
+ }
+ for (i = boot_ncpus; i < apic_nproc; i++) {
+ apic_cpus[i].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
+ }
+ for (i = apic_nproc; i < boot_ncpus; i++) {
+ apic_cpus[i].aci_status = APIC_CPU_FREE;
+ }
+ for (i = max(boot_ncpus, apic_nproc); i < max_ncpus; i++) {
+ apic_cpus[i].aci_status = APIC_CPU_FREE;
}
for (i = 0; i < apic_io_max; i++) {
@@ -926,10 +988,12 @@ acpi_probe(char *modname)
/* if setting APIC mode failed above, we fall through to cleanup */
cleanup:
+ apic_free_apic_cpus();
if (apicadr != NULL) {
mapout_apic((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
apicadr = NULL;
}
+ apic_max_nproc = -1;
apic_nproc = 0;
for (i = 0; i < apic_io_max; i++) {
mapout_ioapic((caddr_t)apicioadr[i], APIC_IO_MEMLEN);
@@ -954,6 +1018,11 @@ apic_handle_defconf()
{
uint_t lid;
+ /* Failed to probe ACPI MADT tables, disable CPU DR. */
+ apic_max_nproc = -1;
+ apic_free_apic_cpus();
+ plat_dr_disable_cpu();
+
/*LINTED: pointer cast may result in improper alignment */
apicioadr[0] = mapin_ioapic(APIC_IO_ADDR,
APIC_IO_MEMLEN, PROT_READ | PROT_WRITE);
@@ -998,8 +1067,7 @@ apic_handle_defconf()
return (PSM_SUCCESS);
apic_handle_defconf_fail:
- if (apic_cpus)
- kmem_free(apic_cpus, apic_cpus_size);
+ apic_free_apic_cpus();
if (apicadr)
mapout_apic((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
if (apicioadr[0])
@@ -1187,7 +1255,51 @@ apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics)
boolean_t
apic_cpu_in_range(int cpu)
{
- return ((cpu & ~IRQ_USER_BOUND) < apic_nproc);
+ cpu &= ~IRQ_USER_BOUND;
+ /* Check whether cpu id is in valid range. */
+ if (cpu < 0 || cpu >= apic_nproc) {
+ return (B_FALSE);
+ } else if (apic_max_nproc != -1 && cpu >= apic_max_nproc) {
+ /*
+ * Check whether cpuid is in valid range if CPU DR is enabled.
+ */
+ return (B_FALSE);
+ } else if (!CPU_IN_SET(apic_cpumask, cpu)) {
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * Must be called with interrupts disabled and the apic_ioapic_lock held.
+ */
+processorid_t
+apic_find_next_cpu_intr(void)
+{
+ int i, count;
+ processorid_t cpuid = 0;
+
+ ASSERT(LOCK_HELD(&apic_ioapic_lock));
+
+ /*
+ * Find next CPU with INTR_ENABLE flag set.
+ * Assume that there is at least one CPU with interrupts enabled.
+ */
+ for (count = 0; count < apic_nproc; count++) {
+ if (apic_next_bind_cpu >= apic_nproc) {
+ apic_next_bind_cpu = 0;
+ }
+ i = apic_next_bind_cpu++;
+ if (apic_cpu_in_range(i) &&
+ (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) {
+ cpuid = i;
+ break;
+ }
+ }
+ ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_INTR_ENABLE) != 0);
+
+ return (cpuid);
}
uint16_t
@@ -1770,7 +1882,7 @@ apic_delspl_common(int irqno, int ipl, int min_ipl, int max_ipl)
bind_cpu = irqptr->airq_temp_cpu;
if (((uint32_t)bind_cpu != IRQ_UNBOUND) &&
((uint32_t)bind_cpu != IRQ_UNINIT)) {
- ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc);
+ ASSERT(apic_cpu_in_range(bind_cpu));
if (bind_cpu & IRQ_USER_BOUND) {
/* If hardbound, temp_cpu == cpu */
bind_cpu &= ~IRQ_USER_BOUND;
@@ -2392,6 +2504,7 @@ apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin)
major_t major;
char *name, *drv_name, *prop_val, *cptr;
char prop_name[32];
+ ulong_t iflag;
if (apic_intr_policy == INTR_LOWEST_PRIORITY)
@@ -2475,26 +2588,28 @@ apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin)
i++;
bind_cpu = stoi(&cptr);
kmem_free(prop_val, prop_len);
- /* if specific cpu is bogus, then default to cpu 0 */
- if (bind_cpu >= apic_nproc) {
+ /* if specific CPU is bogus, then default to next cpu */
+ if (!apic_cpu_in_range(bind_cpu)) {
cmn_err(CE_WARN, "%s: %s=%s: CPU %d not present",
psm_name, prop_name, prop_val, bind_cpu);
- bind_cpu = 0;
+ rc = DDI_PROP_NOT_FOUND;
} else {
/* indicate that we are bound at user request */
bind_cpu |= IRQ_USER_BOUND;
}
/*
- * no need to check apic_cpus[].aci_status, if specific cpu is
+ * no need to check apic_cpus[].aci_status, if specific CPU is
* not up, then post_cpu_start will handle it.
*/
- } else {
- bind_cpu = apic_next_bind_cpu++;
- if (bind_cpu >= apic_nproc) {
- apic_next_bind_cpu = 1;
- bind_cpu = 0;
- }
}
+ if (rc != DDI_PROP_SUCCESS) {
+ iflag = intr_clear();
+ lock_set(&apic_ioapic_lock);
+ bind_cpu = apic_find_next_cpu_intr();
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ }
+
if (drv_name != NULL)
cmn_err(CE_CONT, "!%s: %s (%s) instance %d irq 0x%x "
"vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n",
@@ -2925,7 +3040,7 @@ apic_rebind(apic_irq_t *irq_ptr, int bind_cpu,
/* Mask off high bit so it can be used as array index */
airq_temp_cpu &= ~IRQ_USER_BOUND;
- ASSERT(airq_temp_cpu < apic_nproc);
+ ASSERT(apic_cpu_in_range(airq_temp_cpu));
}
/*
@@ -2999,7 +3114,7 @@ apic_rebind(apic_irq_t *irq_ptr, int bind_cpu,
} else {
cpu_infop->aci_temp_bound++;
}
- ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc);
+ ASSERT(apic_cpu_in_range(bind_cpu));
if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) {
apic_cpus[airq_temp_cpu].aci_temp_bound--;
@@ -3477,7 +3592,8 @@ apic_intr_redistribute()
* we are consistent.
*/
for (i = 0; i < apic_nproc; i++) {
- if (!(apic_redist_cpu_skip & (1 << i)) &&
+ if (apic_cpu_in_range(i) &&
+ !(apic_redist_cpu_skip & (1 << i)) &&
(apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) {
cpu_infop = &apic_cpus[i];
@@ -3639,7 +3755,9 @@ apic_intr_redistribute()
apic_redist_cpu_skip = 0;
}
for (i = 0; i < apic_nproc; i++) {
- apic_cpus[i].aci_busy = 0;
+ if (apic_cpu_in_range(i)) {
+ apic_cpus[i].aci_busy = 0;
+ }
}
}
@@ -3650,7 +3768,9 @@ apic_cleanup_busy()
apic_irq_t *irq_ptr;
for (i = 0; i < apic_nproc; i++) {
- apic_cpus[i].aci_busy = 0;
+ if (apic_cpu_in_range(i)) {
+ apic_cpus[i].aci_busy = 0;
+ }
}
for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic.c b/usr/src/uts/i86pc/io/pcplusmp/apic.c
index 939ac73dab..13ceaef674 100644
--- a/usr/src/uts/i86pc/io/pcplusmp/apic.c
+++ b/usr/src/uts/i86pc/io/pcplusmp/apic.c
@@ -23,6 +23,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* PSMI 1.1 extensions are supported only in 2.6 and later versions.
@@ -30,8 +34,9 @@
* PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
* PSMI 1.5 extensions are supported in Solaris Nevada.
* PSMI 1.6 extensions are supported in Solaris Nevada.
+ * PSMI 1.7 extensions are supported in Solaris Nevada.
*/
-#define PSMI_1_6
+#define PSMI_1_7
#include <sys/processor.h>
#include <sys/time.h>
@@ -88,7 +93,11 @@ static hrtime_t apic_gettime();
static hrtime_t apic_gethrtime();
static void apic_init();
static void apic_picinit(void);
-static int apic_cpu_start(processorid_t, caddr_t);
+static int apic_cpu_start(processorid_t cpuid, caddr_t ctx);
+static int apic_cpu_stop(processorid_t cpuid, caddr_t ctx);
+static int apic_cpu_add(psm_cpu_request_t *reqp);
+static int apic_cpu_remove(psm_cpu_request_t *reqp);
+static int apic_cpu_ops(psm_cpu_request_t *reqp);
static int apic_post_cpu_start(void);
static void apic_send_ipi(int cpun, int ipl);
static void apic_set_idlecpu(processorid_t cpun);
@@ -96,6 +105,7 @@ static void apic_unset_idlecpu(processorid_t cpun);
static int apic_intr_enter(int ipl, int *vect);
static void apic_setspl(int ipl);
static void x2apic_setspl(int ipl);
+static void apic_switch_ipi_callback(boolean_t enter);
static int apic_addspl(int ipl, int vector, int min_ipl, int max_ipl);
static int apic_delspl(int ipl, int vector, int min_ipl, int max_ipl);
static void apic_shutdown(int cmd, int fcn);
@@ -137,6 +147,9 @@ int apic_error_display_delay = 100;
int apic_cpcovf_vect;
int apic_enable_cpcovf_intr = 1;
+/* maximum loop count when sending Start IPIs. */
+int apic_sipi_max_loop_count = 0x1000;
+
/* vector at which CMCI interrupts come in */
int apic_cmci_vect;
extern int cmi_enable_cmci;
@@ -145,6 +158,10 @@ extern void cmi_cmci_trap(void);
static kmutex_t cmci_cpu_setup_lock; /* protects cmci_cpu_setup_registered */
static int cmci_cpu_setup_registered;
+/* number of CPUs in power-on transition state */
+static int apic_poweron_cnt = 0;
+static lock_t apic_mode_switch_lock;
+
/*
* The following vector assignments influence the value of ipltopri and
* vectortoipl. Note that vectors 0 - 0x1f are not used. We can program
@@ -209,6 +226,7 @@ int apic_verbose = 0;
/* minimum number of timer ticks to program to */
int apic_min_timer_ticks = 1;
+
/*
* Local static data
*/
@@ -252,11 +270,12 @@ static struct psm_ops apic_ops = {
apic_preshutdown,
apic_intr_ops, /* Advanced DDI Interrupt framework */
apic_state, /* save, restore apic state for S3 */
+ apic_cpu_ops, /* CPU control interface. */
};
static struct psm_info apic_psm_info = {
- PSM_INFO_VER01_6, /* version */
+ PSM_INFO_VER01_7, /* version */
PSM_OWN_EXCLUSIVE, /* ownership */
(struct psm_ops *)&apic_ops, /* operation */
APIC_PCPLUSMP_NAME, /* machine name */
@@ -786,6 +805,7 @@ apic_picinit(void)
LOCK_INIT_CLEAR(&apic_gethrtime_lock);
LOCK_INIT_CLEAR(&apic_ioapic_lock);
LOCK_INIT_CLEAR(&apic_error_lock);
+ LOCK_INIT_CLEAR(&apic_mode_switch_lock);
picsetup(); /* initialise the 8259 */
@@ -814,29 +834,27 @@ apic_picinit(void)
ioapic_init_intr(IOAPIC_MASK);
}
-
-/*ARGSUSED1*/
-static int
-apic_cpu_start(processorid_t cpun, caddr_t arg)
+static void
+apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
{
int loop_count;
uint32_t vector;
- uint_t cpu_id;
+ uint_t apicid;
ulong_t iflag;
- cpu_id = apic_cpus[cpun].aci_local_id;
-
- apic_cmos_ssb_set = 1;
+ apicid = apic_cpus[cpun].aci_local_id;
/*
- * Interrupts on BSP cpu will be disabled during these startup
+ * Interrupts on current CPU will be disabled during the
* steps in order to avoid unwanted side effects from
* executing interrupt handlers on a problematic BIOS.
*/
-
iflag = intr_clear();
- outb(CMOS_ADDR, SSB);
- outb(CMOS_DATA, BIOS_SHUTDOWN);
+
+ if (start) {
+ outb(CMOS_ADDR, SSB);
+ outb(CMOS_DATA, BIOS_SHUTDOWN);
+ }
/*
* According to X2APIC specification in section '2.3.5.1' of
@@ -860,10 +878,10 @@ apic_cpu_start(processorid_t cpun, caddr_t arg)
/* for integrated - make sure there is one INIT IPI in buffer */
/* for external - it will wake up the cpu */
- apic_reg_ops->apic_write_int_cmd(cpu_id, AV_ASSERT | AV_RESET);
+ apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
/* If only 1 CPU is installed, PENDING bit will not go low */
- for (loop_count = 0x1000; loop_count; loop_count--) {
+ for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
if (apic_mode == LOCAL_APIC &&
apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
apic_ret();
@@ -871,8 +889,7 @@ apic_cpu_start(processorid_t cpun, caddr_t arg)
break;
}
- apic_reg_ops->apic_write_int_cmd(cpu_id, AV_DEASSERT | AV_RESET);
-
+ apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
drv_usecwait(20000); /* 20 milli sec */
if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
@@ -882,18 +899,111 @@ apic_cpu_start(processorid_t cpun, caddr_t arg)
(APIC_VECTOR_MASK | APIC_IPL_MASK);
/* to offset the INIT IPI queue up in the buffer */
- apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
-
+ apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
drv_usecwait(200); /* 20 micro sec */
- apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
-
- drv_usecwait(200); /* 20 micro sec */
+ /*
+ * send the second SIPI (Startup IPI) as recommended by Intel
+ * software development manual.
+ */
+ apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
+ drv_usecwait(200); /* 20 micro sec */
}
+
intr_restore(iflag);
+}
+
+/*ARGSUSED1*/
+static int
+apic_cpu_start(processorid_t cpun, caddr_t arg)
+{
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ if (!apic_cpu_in_range(cpun)) {
+ return (EINVAL);
+ }
+
+ /*
+ * Switch to apic_common_send_ipi for safety during starting other CPUs.
+ */
+ if (apic_mode == LOCAL_X2APIC) {
+ apic_switch_ipi_callback(B_TRUE);
+ }
+
+ apic_cmos_ssb_set = 1;
+ apic_cpu_send_SIPI(cpun, B_TRUE);
+
return (0);
}
+/*
+ * Put CPU into halted state with interrupts disabled.
+ */
+/*ARGSUSED1*/
+static int
+apic_cpu_stop(processorid_t cpun, caddr_t arg)
+{
+ int rc;
+ cpu_t *cp;
+ extern cpuset_t cpu_ready_set;
+ extern void cpu_idle_intercept_cpu(cpu_t *cp);
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ if (!apic_cpu_in_range(cpun)) {
+ return (EINVAL);
+ }
+ if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
+ return (ENOTSUP);
+ }
+
+ cp = cpu_get(cpun);
+ ASSERT(cp != NULL);
+ ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
+ ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
+ ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
+
+ /* Clear CPU_READY flag to disable cross calls. */
+ cp->cpu_flags &= ~CPU_READY;
+ CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
+ rc = xc_flush_cpu(cp);
+ if (rc != 0) {
+ CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
+ cp->cpu_flags |= CPU_READY;
+ return (rc);
+ }
+
+ /* Intercept target CPU at a safe point before powering it off. */
+ cpu_idle_intercept_cpu(cp);
+
+ apic_cpu_send_SIPI(cpun, B_FALSE);
+ cp->cpu_flags &= ~CPU_RUNNING;
+
+ return (0);
+}
+
+static int
+apic_cpu_ops(psm_cpu_request_t *reqp)
+{
+ if (reqp == NULL) {
+ return (EINVAL);
+ }
+
+ switch (reqp->pcr_cmd) {
+ case PSM_CPU_ADD:
+ return (apic_cpu_add(reqp));
+
+ case PSM_CPU_REMOVE:
+ return (apic_cpu_remove(reqp));
+
+ case PSM_CPU_STOP:
+ return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
+ reqp->req.cpu_stop.ctx));
+
+ default:
+ return (ENOTSUP);
+ }
+}
#ifdef DEBUG
int apic_break_on_cpu = 9;
@@ -1377,7 +1487,6 @@ apic_post_cpu_start()
{
int cpun;
static int cpus_started = 1;
- struct psm_ops *pops = &apic_ops;
/* We know this CPU + BSP started successfully. */
cpus_started++;
@@ -1395,23 +1504,11 @@ apic_post_cpu_start()
}
/*
- * We change psm_send_ipi and send_dirintf only if Solaris
- * is booted in kmdb & the current CPU is the last CPU being
- * brought up. We don't need to do anything if Solaris is running
- * in MMIO mode (xAPIC).
+ * Switch back to x2apic IPI sending method for performance when target
+ * CPU has entered x2apic mode.
*/
- if ((boothowto & RB_DEBUG) &&
- (cpus_started == boot_ncpus || cpus_started == apic_nproc) &&
- apic_mode == LOCAL_X2APIC) {
- /*
- * We no longer need help from apic_common_send_ipi()
- * since we will not start any more CPUs.
- *
- * We will need to revisit this if we start supporting
- * hot-plugging of CPUs.
- */
- pops->psm_send_ipi = x2apic_send_ipi;
- send_dirintf = pops->psm_send_ipi;
+ if (apic_mode == LOCAL_X2APIC) {
+ apic_switch_ipi_callback(B_FALSE);
}
splx(ipltospl(LOCK_LEVEL));
@@ -1452,13 +1549,230 @@ apic_get_next_processorid(processorid_t cpu_id)
return ((processorid_t)0);
for (i = cpu_id + 1; i < NCPU; i++) {
- if (CPU_IN_SET(apic_cpumask, i))
+ if (apic_cpu_in_range(i))
return (i);
}
return ((processorid_t)-1);
}
+static int
+apic_cpu_add(psm_cpu_request_t *reqp)
+{
+ int i, rv = 0;
+ ulong_t iflag;
+ boolean_t first = B_TRUE;
+ uchar_t localver;
+ uint32_t localid, procid;
+ processorid_t cpuid = (processorid_t)-1;
+ mach_cpu_add_arg_t *ap;
+
+ ASSERT(reqp != NULL);
+ reqp->req.cpu_add.cpuid = (processorid_t)-1;
+
+ /* Check whether CPU hotplug is supported. */
+ if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
+ return (ENOTSUP);
+ }
+
+ ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
+ switch (ap->type) {
+ case MACH_CPU_ARG_LOCAL_APIC:
+ localid = ap->arg.apic.apic_id;
+ procid = ap->arg.apic.proc_id;
+ if (localid >= 255 || procid > 255) {
+ cmn_err(CE_WARN,
+ "!apic: apicid(%u) or procid(%u) is invalid.",
+ localid, procid);
+ return (EINVAL);
+ }
+ break;
+
+ case MACH_CPU_ARG_LOCAL_X2APIC:
+ localid = ap->arg.apic.apic_id;
+ procid = ap->arg.apic.proc_id;
+ if (localid >= UINT32_MAX) {
+ cmn_err(CE_WARN,
+ "!apic: x2apicid(%u) is invalid.", localid);
+ return (EINVAL);
+ } else if (localid >= 255 && apic_mode == LOCAL_APIC) {
+ cmn_err(CE_WARN, "!apic: system is in APIC mode, "
+ "can't support x2APIC processor.");
+ return (ENOTSUP);
+ }
+ break;
+
+ default:
+ cmn_err(CE_WARN,
+ "!apic: unknown argument type %d to apic_cpu_add().",
+ ap->type);
+ return (EINVAL);
+ }
+
+ /* Use apic_ioapic_lock to sync with apic_find_next_cpu_intr. */
+ iflag = intr_clear();
+ lock_set(&apic_ioapic_lock);
+
+ /* Check whether local APIC id already exists. */
+ for (i = 0; i < apic_nproc; i++) {
+ if (!CPU_IN_SET(apic_cpumask, i))
+ continue;
+ if (apic_cpus[i].aci_local_id == localid) {
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ cmn_err(CE_WARN,
+ "!apic: local apic id %u already exists.",
+ localid);
+ return (EEXIST);
+ } else if (apic_cpus[i].aci_processor_id == procid) {
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ cmn_err(CE_WARN,
+ "!apic: processor id %u already exists.",
+ (int)procid);
+ return (EEXIST);
+ }
+
+ /*
+ * There's no local APIC version number available in MADT table,
+ * so assume that all CPUs are homogeneous and use local APIC
+ * version number of the first existing CPU.
+ */
+ if (first) {
+ first = B_FALSE;
+ localver = apic_cpus[i].aci_local_ver;
+ }
+ }
+ ASSERT(first == B_FALSE);
+
+ /*
+ * Try to assign the same cpuid if APIC id exists in the dirty cache.
+ */
+ for (i = 0; i < apic_max_nproc; i++) {
+ if (CPU_IN_SET(apic_cpumask, i)) {
+ ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
+ continue;
+ }
+ ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
+ if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
+ apic_cpus[i].aci_local_id == localid &&
+ apic_cpus[i].aci_processor_id == procid) {
+ cpuid = i;
+ break;
+ }
+ }
+
+ /* Avoid the dirty cache and allocate fresh slot if possible. */
+ if (cpuid == (processorid_t)-1) {
+ for (i = 0; i < apic_max_nproc; i++) {
+ if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
+ (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
+ cpuid = i;
+ break;
+ }
+ }
+ }
+
+ /* Try to find any free slot as last resort. */
+ if (cpuid == (processorid_t)-1) {
+ for (i = 0; i < apic_max_nproc; i++) {
+ if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
+ cpuid = i;
+ break;
+ }
+ }
+ }
+
+ if (cpuid == (processorid_t)-1) {
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ cmn_err(CE_NOTE,
+ "!apic: failed to allocate cpu id for processor %u.",
+ procid);
+ rv = EAGAIN;
+ } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ cmn_err(CE_NOTE,
+ "!apic: failed to build mapping for processor %u.",
+ procid);
+ rv = EBUSY;
+ } else {
+ ASSERT(cpuid >= 0 && cpuid < NCPU);
+ ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
+ bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
+ apic_cpus[cpuid].aci_processor_id = procid;
+ apic_cpus[cpuid].aci_local_id = localid;
+ apic_cpus[cpuid].aci_local_ver = localver;
+ CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
+ if (cpuid >= apic_nproc) {
+ apic_nproc = cpuid + 1;
+ }
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ reqp->req.cpu_add.cpuid = cpuid;
+ }
+
+ return (rv);
+}
+
+static int
+apic_cpu_remove(psm_cpu_request_t *reqp)
+{
+ int i;
+ ulong_t iflag;
+ processorid_t cpuid;
+
+ /* Check whether CPU hotplug is supported. */
+ if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
+ return (ENOTSUP);
+ }
+
+ cpuid = reqp->req.cpu_remove.cpuid;
+
+ /* Use apic_ioapic_lock to sync with apic_find_next_cpu_intr. */
+ iflag = intr_clear();
+ lock_set(&apic_ioapic_lock);
+
+ if (!apic_cpu_in_range(cpuid)) {
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ cmn_err(CE_WARN,
+ "!apic: cpuid %d doesn't exist in apic_cpus array.",
+ cpuid);
+ return (ENODEV);
+ }
+ ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
+
+ if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+ return (ENOENT);
+ }
+
+ if (cpuid == apic_nproc - 1) {
+ /*
+ * We are removing the highest numbered cpuid so we need to
+ * find the next highest cpuid as the new value for apic_nproc.
+ */
+ for (i = apic_nproc; i > 0; i--) {
+ if (CPU_IN_SET(apic_cpumask, i - 1)) {
+ apic_nproc = i;
+ break;
+ }
+ }
+ /* at least one CPU left */
+ ASSERT(i > 0);
+ }
+ CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
+ /* mark slot as free and keep it in the dirty cache */
+ apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
+
+ lock_clear(&apic_ioapic_lock);
+ intr_restore(iflag);
+
+ return (0);
+}
/*
* type == -1 indicates it is an internal request. Do not change
@@ -1898,8 +2212,7 @@ apic_disable_intr(processorid_t cpun)
if ((irq_ptr = apic_irq_table[i]) != NULL) {
ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) ||
(irq_ptr->airq_temp_cpu == IRQ_UNINIT) ||
- ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) <
- apic_nproc));
+ (apic_cpu_in_range(irq_ptr->airq_temp_cpu)));
if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) {
hardbound = 1;
@@ -1908,12 +2221,7 @@ apic_disable_intr(processorid_t cpun)
if (irq_ptr->airq_temp_cpu == cpun) {
do {
- bind_cpu = apic_next_bind_cpu++;
- if (bind_cpu >= apic_nproc) {
- apic_next_bind_cpu = 1;
- bind_cpu = 0;
-
- }
+ bind_cpu = apic_find_next_cpu_intr();
} while (apic_rebind_all(irq_ptr, bind_cpu));
}
}
@@ -2141,6 +2449,8 @@ apic_redistribute_compute(void)
}
max_busy = 0;
for (i = 0; i < apic_nproc; i++) {
+ if (!apic_cpu_in_range(i))
+ continue;
/*
* Check if curipl is non zero & if ISR is in
@@ -2481,24 +2791,6 @@ ioapic_write_eoi(int ioapic_ix, uint32_t value)
ioapic[APIC_IO_EOI] = value;
}
-static processorid_t
-apic_find_cpu(int flag)
-{
- processorid_t acid = 0;
- int i;
-
- /* Find the first CPU with the passed-in flag set */
- for (i = 0; i < apic_nproc; i++) {
- if (apic_cpus[i].aci_status & flag) {
- acid = i;
- break;
- }
- }
-
- ASSERT((apic_cpus[acid].aci_status & flag) != 0);
- return (acid);
-}
-
/*
* Call rebind to do the actual programming.
* Must be called with interrupts disabled and apic_ioapic_lock held
@@ -2508,8 +2800,8 @@ apic_find_cpu(int flag)
* p is of the type 'apic_irq_t *'.
*
* apic_ioapic_lock must be held across this call, as it protects apic_rebind
- * and it protects apic_find_cpu() from a race in which a CPU can be taken
- * offline after a cpu is selected, but before apic_rebind is called to
+ * and it protects apic_find_next_cpu_intr() from a race in which a CPU can be
+ * taken offline after a cpu is selected, but before apic_rebind is called to
* bind interrupts to it.
*/
int
@@ -2534,8 +2826,7 @@ apic_setup_io_intr(void *p, int irq, boolean_t deferred)
* CPU is not up or interrupts are disabled. Fall back to
* the first available CPU
*/
- rv = apic_rebind(irqptr, apic_find_cpu(APIC_CPU_INTR_ENABLE),
- drep);
+ rv = apic_rebind(irqptr, apic_find_next_cpu_intr(), drep);
}
return (rv);
@@ -2555,6 +2846,45 @@ apic_get_apic_type()
return (apic_psm_info.p_mach_idstring);
}
+/*
+ * Switch between safe and x2APIC IPI sending method.
+ * CPU may power on in xapic mode or x2apic mode. If CPU needs to send IPI to
+ * other CPUs before entering x2APIC mode, it still needs to xAPIC method.
+ * Before sending StartIPI to target CPU, psm_send_ipi will be changed to
+ * apic_common_send_ipi, which detects current local APIC mode and use right
+ * method to send IPI. If some CPUs fail to start up, apic_poweron_cnt
+ * won't return to zero, so apic_common_send_ipi will always be used.
+ * psm_send_ipi can't be simply changed back to x2apic_send_ipi if some CPUs
+ * failed to start up because those failed CPUs may recover itself later at
+ * unpredictable time.
+ */
+static void
+apic_switch_ipi_callback(boolean_t enter)
+{
+ ulong_t iflag;
+ struct psm_ops *pops = &apic_ops;
+
+ iflag = intr_clear();
+ lock_set(&apic_mode_switch_lock);
+ if (enter) {
+ ASSERT(apic_poweron_cnt >= 0);
+ if (apic_poweron_cnt == 0) {
+ pops->psm_send_ipi = apic_common_send_ipi;
+ send_dirintf = pops->psm_send_ipi;
+ }
+ apic_poweron_cnt++;
+ } else {
+ ASSERT(apic_poweron_cnt > 0);
+ apic_poweron_cnt--;
+ if (apic_poweron_cnt == 0) {
+ pops->psm_send_ipi = x2apic_send_ipi;
+ send_dirintf = pops->psm_send_ipi;
+ }
+ }
+ lock_clear(&apic_mode_switch_lock);
+ intr_restore(iflag);
+}
+
void
x2apic_update_psm()
{
@@ -2562,28 +2892,9 @@ x2apic_update_psm()
ASSERT(pops != NULL);
- /*
- * We don't need to do any magic if one of the following
- * conditions is true :
- * - Not being run under kernel debugger.
- * - MP is not set.
- * - Booted with one CPU only.
- * - One CPU configured.
- *
- * We set apic_common_send_ipi() since kernel debuggers
- * attempt to send IPIs to other slave CPUs during
- * entry (exit) from (to) debugger.
- */
- if (!(boothowto & RB_DEBUG) || use_mp == 0 ||
- apic_nproc == 1 || boot_ncpus == 1) {
- pops->psm_send_ipi = x2apic_send_ipi;
- } else {
- pops->psm_send_ipi = apic_common_send_ipi;
- }
-
pops->psm_intr_exit = x2apic_intr_exit;
pops->psm_setspl = x2apic_setspl;
-
+ pops->psm_send_ipi = x2apic_send_ipi;
send_dirintf = pops->psm_send_ipi;
apic_mode = LOCAL_X2APIC;
diff --git a/usr/src/uts/i86pc/io/psm/uppc.c b/usr/src/uts/i86pc/io/psm/uppc.c
index 0b6faee267..ad4d378f76 100644
--- a/usr/src/uts/i86pc/io/psm/uppc.c
+++ b/usr/src/uts/i86pc/io/psm/uppc.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
-#define PSMI_1_6
+#define PSMI_1_7
#include <sys/mutex.h>
#include <sys/types.h>
@@ -173,12 +173,13 @@ static struct psm_ops uppc_ops = {
(int (*)(dev_info_t *, ddi_intr_handle_impl_t *,
psm_intr_op_t, int *))NULL, /* psm_intr_ops */
- uppc_state /* psm_state */
+ uppc_state, /* psm_state */
+ (int (*)(psm_cpu_request_t *))NULL /* psm_cpu_ops */
};
static struct psm_info uppc_info = {
- PSM_INFO_VER01_6, /* version */
+ PSM_INFO_VER01_7, /* version */
PSM_OWN_SYS_DEFAULT, /* ownership */
(struct psm_ops *)&uppc_ops, /* operation */
"uppc", /* machine name */
diff --git a/usr/src/uts/i86pc/ml/mpcore.s b/usr/src/uts/i86pc/ml/mpcore.s
index 721761f01d..25a943870e 100644
--- a/usr/src/uts/i86pc/ml/mpcore.s
+++ b/usr/src/uts/i86pc/ml/mpcore.s
@@ -22,9 +22,11 @@
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/asm_linkage.h>
#include <sys/asm_misc.h>
#include <sys/regset.h>
@@ -44,6 +46,7 @@
* - The GDT, IDT, ktss and page directory has been built for us
*
* Our actions:
+ * Start CPU:
* - We start using our GDT by loading correct values in the
* selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
* gs=KGS_SEL).
@@ -51,20 +54,30 @@
* - We load the default LDT into the hardware LDT register.
* - We load the default TSS into the hardware task register.
* - call mp_startup(void) indirectly through the T_PC
+ * Stop CPU:
+ * - Put CPU into halted state with interrupts disabled
*
*/
#if defined(__lint)
void
-real_mode_start(void)
+real_mode_start_cpu(void)
+{}
+
+void
+real_mode_stop_cpu_stage1(void)
+{}
+
+void
+real_mode_stop_cpu_stage2(void)
{}
#else /* __lint */
#if defined(__amd64)
- ENTRY_NP(real_mode_start)
+ ENTRY_NP(real_mode_start_cpu)
#if !defined(__GNUC_AS__)
@@ -229,8 +242,8 @@ long_mode_64:
pushq $KCS_SEL
pushq $kernel_cs_code
lretq
- .globl real_mode_end
-real_mode_end:
+ .globl real_mode_start_cpu_end
+real_mode_start_cpu_end:
nop
kernel_cs_code:
@@ -484,8 +497,8 @@ long_mode_64:
pushq $KCS_SEL
pushq $kernel_cs_code
lretq
- .globl real_mode_end
-real_mode_end:
+ .globl real_mode_start_cpu_end
+real_mode_start_cpu_end:
nop
kernel_cs_code:
@@ -575,11 +588,11 @@ kernel_cs_code:
int $20 /* whoops, returned somehow! */
#endif /* !__GNUC_AS__ */
- SET_SIZE(real_mode_start)
+ SET_SIZE(real_mode_start_cpu)
#elif defined(__i386)
- ENTRY_NP(real_mode_start)
+ ENTRY_NP(real_mode_start_cpu)
#if !defined(__GNUC_AS__)
@@ -621,8 +634,8 @@ pestart:
D16 pushl $KCS_SEL
D16 pushl $kernel_cs_code
D16 lret
- .globl real_mode_end
-real_mode_end:
+ .globl real_mode_start_cpu_end
+real_mode_start_cpu_end:
nop
.globl kernel_cs_code
@@ -714,8 +727,8 @@ pestart:
D16 pushl $KCS_SEL
D16 pushl $kernel_cs_code
D16 lret
- .globl real_mode_end
-real_mode_end:
+ .globl real_mode_start_cpu_end
+real_mode_start_cpu_end:
nop
.globl kernel_cs_code
kernel_cs_code:
@@ -767,7 +780,120 @@ kernel_cs_code:
int $20 /* whoops, returned somehow! */
#endif
- SET_SIZE(real_mode_start)
+ SET_SIZE(real_mode_start_cpu)
#endif /* __amd64 */
+
+#if defined(__amd64)
+
+ ENTRY_NP(real_mode_stop_cpu_stage1)
+
+#if !defined(__GNUC_AS__)
+
+ /*
+ * For vulcan as we need to do a .code32 and mentally invert the
+ * meaning of the addr16 and data16 prefixes to get 32-bit access when
+ * generating code to be executed in 16-bit mode (sigh...)
+ */
+ .code32
+ cli
+ movw %cs, %ax
+ movw %ax, %ds /* load cs into ds */
+ movw %ax, %ss /* and into ss */
+
+ /*
+ * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
+ */
+ movw $CPUHALTCODEOFF, %ax
+ .byte 0xff, 0xe0 /* jmp *%ax */
+
+#else /* __GNUC_AS__ */
+
+ /*
+ * NOTE: The GNU assembler automatically does the right thing to
+ * generate data size operand prefixes based on the code size
+ * generation mode (e.g. .code16, .code32, .code64) and as such
+ * prefixes need not be used on instructions EXCEPT in the case
+ * of address prefixes for code for which the reference is not
+ * automatically of the default operand size.
+ */
+ .code16
+ cli
+ movw %cs, %ax
+ movw %ax, %ds /* load cs into ds */
+ movw %ax, %ss /* and into ss */
+
+ /*
+ * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
+ */
+ movw $CPUHALTCODEOFF, %ax
+ jmp *%ax
+
+#endif /* !__GNUC_AS__ */
+
+ .globl real_mode_stop_cpu_stage1_end
+real_mode_stop_cpu_stage1_end:
+ nop
+
+ SET_SIZE(real_mode_stop_cpu_stage1)
+
+#elif defined(__i386)
+
+ ENTRY_NP(real_mode_stop_cpu_stage1)
+
+#if !defined(__GNUC_AS__)
+
+ cli
+ D16 movw %cs, %eax
+ movw %eax, %ds /* load cs into ds */
+ movw %eax, %ss /* and into ss */
+
+ /*
+ * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
+ */
+ movw $CPUHALTCODEOFF, %ax
+ .byte 0xff, 0xe0 /* jmp *%ax */
+
+#else /* __GNUC_AS__ */
+
+ cli
+ mov %cs, %ax
+ mov %eax, %ds /* load cs into ds */
+ mov %eax, %ss /* and into ss */
+
+ /*
+ * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
+ */
+ movw $CPUHALTCODEOFF, %ax
+ jmp *%ax
+
+#endif /* !__GNUC_AS__ */
+
+ .globl real_mode_stop_cpu_stage1_end
+real_mode_stop_cpu_stage1_end:
+ nop
+
+ SET_SIZE(real_mode_stop_cpu_stage1)
+
+#endif /* __amd64 */
+
+ ENTRY_NP(real_mode_stop_cpu_stage2)
+
+ movw $0xdead, %ax
+ movw %ax, CPUHALTEDOFF
+
+real_mode_stop_cpu_loop:
+ /*
+ * Put CPU into halted state.
+ * Only INIT, SMI, NMI could break the loop.
+ */
+ hlt
+ jmp real_mode_stop_cpu_loop
+
+ .globl real_mode_stop_cpu_stage2_end
+real_mode_stop_cpu_stage2_end:
+ nop
+
+ SET_SIZE(real_mode_stop_cpu_stage2)
+
#endif /* __lint */
diff --git a/usr/src/uts/i86pc/ml/offsets.in b/usr/src/uts/i86pc/ml/offsets.in
index f857617beb..029193b35e 100644
--- a/usr/src/uts/i86pc/ml/offsets.in
+++ b/usr/src/uts/i86pc/ml/offsets.in
@@ -295,6 +295,8 @@ rm_platter
rm_cpu CPUNOFF
rm_cr4 CR4OFF
rm_x86feature X86FEATURE
+ rm_cpu_halt_code CPUHALTCODEOFF
+ rm_cpu_halted CPUHALTEDOFF
ddi_acc_impl
ahi_acc_attr ACC_ATTR
diff --git a/usr/src/uts/i86pc/os/acpi_fw.h b/usr/src/uts/i86pc/os/acpi_fw.h
index 032e231f3f..54a34ef591 100644
--- a/usr/src/uts/i86pc/os/acpi_fw.h
+++ b/usr/src/uts/i86pc/os/acpi_fw.h
@@ -22,6 +22,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#ifndef _ACPI_FW_H
#define _ACPI_FW_H
@@ -161,6 +165,28 @@ struct slit {
*/
extern struct slit *slit_ptr;
+struct msct_proximity_domain {
+ uint8_t revision;
+ uint8_t length;
+ uint32_t domain_min;
+ uint32_t domain_max;
+ uint32_t processor_max;
+ uint64_t memory_max;
+};
+
+struct msct {
+ struct table_header hdr;
+ uint32_t proximity_domain_offset;
+ uint32_t maximum_proximity_domains;
+ uint32_t maximum_power_domains;
+ uint64_t maximum_physical_address;
+};
+
+/*
+ * Pointer to Maximum System Capability Table (MSCT)
+ */
+extern struct msct *msct_ptr;
+
struct cfg_base_addr_alloc {
uint64_t base_addr;
uint16_t segment;
diff --git a/usr/src/uts/i86pc/os/cmi.c b/usr/src/uts/i86pc/os/cmi.c
index f9d722f031..4d27eb61d3 100644
--- a/usr/src/uts/i86pc/os/cmi.c
+++ b/usr/src/uts/i86pc/os/cmi.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -100,6 +100,8 @@ typedef struct cmi {
} cmi_t;
static cmi_t *cmi_list;
+static const cmi_mc_ops_t *cmi_mc_global_ops;
+static void *cmi_mc_global_data;
static kmutex_t cmi_load_lock;
/*
@@ -107,6 +109,7 @@ static kmutex_t cmi_load_lock;
* interface.
*/
extern cmi_hdl_t cmi_hdl_create(enum cmi_hdl_class, uint_t, uint_t, uint_t);
+extern void cmi_hdl_destroy(cmi_hdl_t ophdl);
extern void cmi_hdl_setcmi(cmi_hdl_t, void *, void *);
extern void *cmi_hdl_getcmi(cmi_hdl_t);
extern void cmi_hdl_setmc(cmi_hdl_t, const struct cmi_mc_ops *, void *);
@@ -484,9 +487,8 @@ cmi_init(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
}
/*
- * cmi_fini is not called at the moment. It is intended to be called
- * on DR deconfigure of a cpu resource. It should not be called at
- * simple offline of a cpu.
+ * cmi_fini is called on DR deconfigure of a cpu resource.
+ * It should not be called at simple offline of a cpu.
*/
void
cmi_fini(cmi_hdl_t hdl)
@@ -499,7 +501,7 @@ cmi_fini(cmi_hdl_t hdl)
if (CMI_OP_PRESENT(cmi, cmi_fini))
CMI_OPS(cmi)->cmi_fini(hdl);
- cmi_hdl_rele(hdl); /* release hold obtained in cmi_hdl_create */
+ cmi_hdl_destroy(hdl);
}
/*
@@ -806,6 +808,21 @@ cmi_mc_register(cmi_hdl_t hdl, const cmi_mc_ops_t *mcops, void *mcdata)
cmi_hdl_setmc(hdl, mcops, mcdata);
}
+cmi_errno_t
+cmi_mc_register_global(const cmi_mc_ops_t *mcops, void *mcdata)
+{
+ if (!cmi_no_mca_init) {
+ if (cmi_mc_global_ops != NULL || cmi_mc_global_data != NULL ||
+ mcops == NULL || mcops->cmi_mc_patounum == NULL ||
+ mcops->cmi_mc_unumtopa == NULL) {
+ return (CMIERR_UNKNOWN);
+ }
+ cmi_mc_global_data = mcdata;
+ cmi_mc_global_ops = mcops;
+ }
+ return (CMI_SUCCESS);
+}
+
void
cmi_mc_sw_memscrub_disable(void)
{
@@ -820,8 +837,17 @@ cmi_mc_patounum(uint64_t pa, uint8_t valid_hi, uint8_t valid_lo, uint32_t synd,
cmi_hdl_t hdl;
cmi_errno_t rv;
- if (cmi_no_mca_init ||
- (hdl = cmi_hdl_any()) == NULL) /* short-term hold */
+ if (cmi_no_mca_init)
+ return (CMIERR_MC_ABSENT);
+
+ if (cmi_mc_global_ops != NULL) {
+ if (cmi_mc_global_ops->cmi_mc_patounum == NULL)
+ return (CMIERR_MC_NOTSUP);
+ return (cmi_mc_global_ops->cmi_mc_patounum(cmi_mc_global_data,
+ pa, valid_hi, valid_lo, synd, syndtype, up));
+ }
+
+ if ((hdl = cmi_hdl_any()) == NULL) /* short-term hold */
return (CMIERR_MC_ABSENT);
if ((mcops = cmi_hdl_getmcops(hdl)) == NULL ||
@@ -849,8 +875,17 @@ cmi_mc_unumtopa(mc_unum_t *up, nvlist_t *nvl, uint64_t *pap)
if (up != NULL && nvl != NULL)
return (CMIERR_API); /* convert from just one form */
- if (cmi_no_mca_init ||
- (hdl = cmi_hdl_any()) == NULL) /* short-term hold */
+ if (cmi_no_mca_init)
+ return (CMIERR_MC_ABSENT);
+
+ if (cmi_mc_global_ops != NULL) {
+ if (cmi_mc_global_ops->cmi_mc_unumtopa == NULL)
+ return (CMIERR_MC_NOTSUP);
+ return (cmi_mc_global_ops->cmi_mc_unumtopa(cmi_mc_global_data,
+ up, nvl, pap));
+ }
+
+ if ((hdl = cmi_hdl_any()) == NULL) /* short-term hold */
return (CMIERR_MC_ABSENT);
if ((mcops = cmi_hdl_getmcops(hdl)) == NULL ||
@@ -865,7 +900,7 @@ cmi_mc_unumtopa(mc_unum_t *up, nvlist_t *nvl, uint64_t *pap)
pap) == 0)) {
return (CMIERR_MC_PARTIALUNUMTOPA);
} else {
- return (mcops && mcops->cmi_mc_unumtopa ?
+ return (mcops && mcops->cmi_mc_unumtopa == NULL ?
CMIERR_MC_NOTSUP : CMIERR_MC_ABSENT);
}
}
@@ -882,10 +917,15 @@ cmi_mc_logout(cmi_hdl_t hdl, boolean_t ismc, boolean_t sync)
{
const struct cmi_mc_ops *mcops;
- if (cmi_no_mca_init || (mcops = cmi_hdl_getmcops(hdl)) == NULL)
+ if (cmi_no_mca_init)
return;
- if (mcops->cmi_mc_logout != NULL)
+ if (cmi_mc_global_ops != NULL)
+ mcops = cmi_mc_global_ops;
+ else
+ mcops = cmi_hdl_getmcops(hdl);
+
+ if (mcops != NULL && mcops->cmi_mc_logout != NULL)
mcops->cmi_mc_logout(hdl, ismc, sync);
}
diff --git a/usr/src/uts/i86pc/os/cmi_hw.c b/usr/src/uts/i86pc/os/cmi_hw.c
index 1c2dbeaad6..8e3ae0f0af 100644
--- a/usr/src/uts/i86pc/os/cmi_hw.c
+++ b/usr/src/uts/i86pc/os/cmi_hw.c
@@ -23,6 +23,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* CPU Module Interface - hardware abstraction.
@@ -94,6 +98,7 @@ typedef struct cmi_hdl_impl {
#define HDLOPS(hdl) ((hdl)->cmih_ops)
#define CMIH_F_INJACTV 0x1ULL
+#define CMIH_F_DEAD 0x2ULL
/*
* Ops structure for handle operations.
@@ -847,9 +852,18 @@ ntv_int(cmi_hdl_impl_t *hdl, int int_no)
static int
ntv_online(cmi_hdl_impl_t *hdl, int new_status, int *old_status)
{
+ int rc;
processorid_t cpuid = HDLPRIV(hdl)->cpu_id;
- return (p_online_internal(cpuid, new_status, old_status));
+ while (mutex_tryenter(&cpu_lock) == 0) {
+ if (hdl->cmih_flags & CMIH_F_DEAD)
+ return (EBUSY);
+ delay(1);
+ }
+ rc = p_online_internal_locked(cpuid, new_status, old_status);
+ mutex_exit(&cpu_lock);
+
+ return (rc);
}
#else /* __xpv */
@@ -1387,15 +1401,32 @@ void
cmi_hdl_rele(cmi_hdl_t ophdl)
{
cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
- cmi_hdl_ent_t *ent;
ASSERT(*hdl->cmih_refcntp > 0);
+ (void) atomic_dec_32_nv(hdl->cmih_refcntp);
+}
- if (atomic_dec_32_nv(hdl->cmih_refcntp) > 0)
- return;
+void
+cmi_hdl_destroy(cmi_hdl_t ophdl)
+{
+ cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
+ cmi_hdl_ent_t *ent;
+
+ /* Release the reference count held by cmi_hdl_create(). */
+ ASSERT(*hdl->cmih_refcntp > 0);
+ (void) atomic_dec_32_nv(hdl->cmih_refcntp);
+ hdl->cmih_flags |= CMIH_F_DEAD;
ent = cmi_hdl_ent_lookup(hdl->cmih_chipid, hdl->cmih_coreid,
hdl->cmih_strandid);
+ /*
+ * Use busy polling instead of condition variable here because
+ * cmi_hdl_rele() may be called from #MC handler.
+ */
+ while (cmi_hdl_canref(ent)) {
+ cmi_hdl_rele(ophdl);
+ delay(1);
+ }
ent->cmae_hdlp = NULL;
kmem_free(hdl, sizeof (*hdl));
@@ -1628,6 +1659,33 @@ cmi_ntv_hwstrandid(cpu_t *cp)
return (cpuid_get_clogid(cp) % strands_per_core);
}
+
+static void
+cmi_ntv_hwdisable_mce_xc(void)
+{
+ ulong_t cr4;
+
+ cr4 = getcr4();
+ cr4 = cr4 & (~CR4_MCE);
+ setcr4(cr4);
+}
+
+void
+cmi_ntv_hwdisable_mce(cmi_hdl_t hdl)
+{
+ cpuset_t set;
+ cmi_hdl_impl_t *thdl = IMPLHDL(hdl);
+ cpu_t *cp = HDLPRIV(thdl);
+
+ if (CPU->cpu_id == cp->cpu_id) {
+ cmi_ntv_hwdisable_mce_xc();
+ } else {
+ CPUSET_ONLY(set, cp->cpu_id);
+ xc_call(NULL, NULL, NULL, CPUSET2BV(set),
+ (xc_func_t)cmi_ntv_hwdisable_mce_xc);
+ }
+}
+
#endif /* __xpv */
void
diff --git a/usr/src/uts/i86pc/os/cms.c b/usr/src/uts/i86pc/os/cms.c
index 18d3b046f6..1fe03d3f30 100644
--- a/usr/src/uts/i86pc/os/cms.c
+++ b/usr/src/uts/i86pc/os/cms.c
@@ -23,6 +23,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/cpu_module_ms_impl.h>
@@ -421,9 +425,19 @@ void
cms_fini(cmi_hdl_t hdl)
{
cms_t *cms = HDL2CMS(hdl);
+ struct cms_ctl *cdp;
if (CMS_OP_PRESENT(cms, cms_fini))
CMS_OPS(cms)->cms_fini(hdl);
+
+ mutex_enter(&cms_load_lock);
+ cdp = (struct cms_ctl *)cmi_hdl_getspecific(hdl);
+ if (cdp != NULL) {
+ if (cdp->cs_cms != NULL)
+ cms_rele(cdp->cs_cms);
+ kmem_free(cdp, sizeof (*cdp));
+ }
+ mutex_exit(&cms_load_lock);
}
boolean_t
diff --git a/usr/src/uts/i86pc/os/cpr_impl.c b/usr/src/uts/i86pc/os/cpr_impl.c
index 2599d3530a..103955a097 100644
--- a/usr/src/uts/i86pc/os/cpr_impl.c
+++ b/usr/src/uts/i86pc/os/cpr_impl.c
@@ -337,7 +337,7 @@ i_cpr_pre_resume_cpus()
CPUSET_ONLY(cpu_ready_set, boot_cpuid);
CPUSET_ONLY(procset, boot_cpuid);
- for (who = 0; who < ncpus; who++) {
+ for (who = 0; who < max_ncpus; who++) {
wc_cpu_t *cpup = wc_other_cpus + who;
wc_desctbr_t gdt;
@@ -363,7 +363,10 @@ i_cpr_pre_resume_cpus()
init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
- if ((err = mach_cpuid_start(who, rm_platter_va)) != 0) {
+ mutex_enter(&cpu_lock);
+ err = mach_cpuid_start(who, rm_platter_va);
+ mutex_exit(&cpu_lock);
+ if (err != 0) {
cmn_err(CE_WARN, "cpu%d: failed to start during "
"suspend/resume error %d", who, err);
continue;
@@ -481,10 +484,13 @@ prt_other_cpus()
return;
}
- for (who = 0; who < ncpus; who++) {
+ for (who = 0; who < max_ncpus; who++) {
wc_cpu_t *cpup = wc_other_cpus + who;
+ if (!CPU_IN_SET(mp_cpus, who))
+ continue;
+
PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
"idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
AFMT ", sp=%lx\n", who,
@@ -1009,7 +1015,7 @@ i_cpr_alloc_cpus(void)
*/
if (wc_other_cpus == NULL) {
- wc_other_cpus = kmem_zalloc(ncpus * sizeof (wc_cpu_t),
+ wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t),
KM_SLEEP);
}
@@ -1022,7 +1028,7 @@ i_cpr_free_cpus(void)
wc_cpu_t *wc_cpu;
if (wc_other_cpus != NULL) {
- for (index = 0; index < ncpus; index++) {
+ for (index = 0; index < max_ncpus; index++) {
wc_cpu = wc_other_cpus + index;
if (wc_cpu->wc_saved_stack != NULL) {
kmem_free(wc_cpu->wc_saved_stack,
@@ -1030,7 +1036,8 @@ i_cpr_free_cpus(void)
}
}
- kmem_free((void *) wc_other_cpus, ncpus * sizeof (wc_cpu_t));
+ kmem_free((void *) wc_other_cpus,
+ max_ncpus * sizeof (wc_cpu_t));
wc_other_cpus = NULL;
}
}
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 8a14fccaa8..d47d680939 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -430,6 +430,9 @@ uint32_t cpuid_feature_ecx_exclude;
uint32_t cpuid_feature_edx_include;
uint32_t cpuid_feature_edx_exclude;
+/*
+ * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
+ */
void
cpuid_alloc_space(cpu_t *cpu)
{
@@ -439,6 +442,7 @@ cpuid_alloc_space(cpu_t *cpu)
* their cpuid_info struct allocated here.
*/
ASSERT(cpu->cpu_id != 0);
+ ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
cpu->cpu_m.mcpu_cpi =
kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
}
@@ -449,7 +453,8 @@ cpuid_free_space(cpu_t *cpu)
struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
int i;
- ASSERT(cpu->cpu_id != 0);
+ ASSERT(cpi != NULL);
+ ASSERT(cpi != &cpuid_info0);
/*
* Free up any function 4 related dynamic storage
@@ -460,7 +465,8 @@ cpuid_free_space(cpu_t *cpu)
kmem_free(cpi->cpi_std_4,
cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
- kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi));
+ kmem_free(cpi, sizeof (*cpi));
+ cpu->cpu_m.mcpu_cpi = NULL;
}
#if !defined(__xpv)
@@ -721,9 +727,9 @@ cpuid_pass1(cpu_t *cpu)
determine_platform();
#endif
/*
- * Space statically allocated for cpu0, ensure pointer is set
+ * Space statically allocated for BSP, ensure pointer is set
*/
- if (cpu->cpu_id == 0)
+ if (cpu->cpu_id == 0 && cpu->cpu_m.mcpu_cpi == NULL)
cpu->cpu_m.mcpu_cpi = &cpuid_info0;
cpi = cpu->cpu_m.mcpu_cpi;
ASSERT(cpi != NULL);
@@ -3977,9 +3983,9 @@ cpuid_mwait_alloc(cpu_t *cpu)
uint32_t *ret;
size_t mwait_size;
- ASSERT(cpuid_checkpass(cpu, 2));
+ ASSERT(cpuid_checkpass(CPU, 2));
- mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
+ mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
if (mwait_size == 0)
return (NULL);
@@ -4016,7 +4022,9 @@ cpuid_mwait_alloc(cpu_t *cpu)
void
cpuid_mwait_free(cpu_t *cpu)
{
- ASSERT(cpuid_checkpass(cpu, 2));
+ if (cpu->cpu_m.mcpu_cpi == NULL) {
+ return;
+ }
if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
diff --git a/usr/src/uts/i86pc/os/cpupm/cpu_acpi.c b/usr/src/uts/i86pc/os/cpupm/cpu_acpi.c
index a092046071..896b110e0e 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpu_acpi.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpu_acpi.c
@@ -733,7 +733,8 @@ cpu_acpi_cache_cst(cpu_acpi_handle_t handle)
CPU_ACPI_CSTATES_COUNT(handle) = (uint32_t)cnt;
alloc_size = CPU_ACPI_CSTATES_SIZE(cnt);
- CPU_ACPI_CSTATES(handle) = kmem_zalloc(alloc_size, KM_SLEEP);
+ if (CPU_ACPI_CSTATES(handle) == NULL)
+ CPU_ACPI_CSTATES(handle) = kmem_zalloc(alloc_size, KM_SLEEP);
cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
p = cstate;
diff --git a/usr/src/uts/i86pc/os/cpupm/cpu_idle.c b/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
index 3dd75f00a5..483e8865c7 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -47,6 +47,8 @@
#define CSTATE_USING_HPET 1
#define CSTATE_USING_LAT 2
+#define CPU_IDLE_STOP_TIMEOUT 1000
+
extern void cpu_idle_adaptive(void);
extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data,
cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start);
@@ -677,7 +679,7 @@ cpu_idle_init(cpu_t *cp)
/*
* Allocate, initialize and install cstate kstat
*/
- cstate->cs_ksp = kstat_create("cstate", CPU->cpu_id,
+ cstate->cs_ksp = kstat_create("cstate", cp->cpu_id,
name, "misc",
KSTAT_TYPE_NAMED,
sizeof (cpu_idle_kstat) / sizeof (kstat_named_t),
@@ -692,8 +694,8 @@ cpu_idle_init(cpu_t *cp)
cstate->cs_ksp->ks_data_size += MAXNAMELEN;
cstate->cs_ksp->ks_private = cstate;
kstat_install(cstate->cs_ksp);
- cstate++;
}
+ cstate++;
}
cpupm_alloc_domains(cp, CPUPM_C_STATES);
@@ -771,6 +773,19 @@ cpu_idle_fini(cpu_t *cp)
mutex_exit(&cpu_idle_callb_mutex);
}
+/*
+ * This function is introduced here to solve a race condition
+ * between the master and the slave to touch c-state data structure.
+ * After the slave calls this idle function to switch to the non
+ * deep idle function, the master can go on to reclaim the resource.
+ */
+static void
+cpu_idle_stop_sync(void)
+{
+ /* switch to the non deep idle function */
+ CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
+}
+
static void
cpu_idle_stop(cpu_t *cp)
{
@@ -778,15 +793,28 @@ cpu_idle_stop(cpu_t *cp)
(cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
cpu_acpi_cstate_t *cstate;
- uint_t cpu_max_cstates, i;
+ uint_t cpu_max_cstates, i = 0;
- /*
- * place the CPUs in a safe place so that we can disable
- * deep c-state on them.
- */
- pause_cpus(NULL);
- cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
- start_cpus();
+ mutex_enter(&cpu_idle_callb_mutex);
+ if (idle_cpu == cpu_idle_adaptive) {
+ /*
+ * invoke the slave to call synchronous idle function.
+ */
+ cp->cpu_m.mcpu_idle_cpu = cpu_idle_stop_sync;
+ poke_cpu(cp->cpu_id);
+
+ /*
+ * wait until the slave switchs to non deep idle function,
+ * so that the master is safe to go on to reclaim the resource.
+ */
+ while (cp->cpu_m.mcpu_idle_cpu != non_deep_idle_cpu) {
+ drv_usecwait(10);
+ if ((++i % CPU_IDLE_STOP_TIMEOUT) == 0)
+ cmn_err(CE_NOTE, "!cpu_idle_stop: the slave"
+ " idle stop timeout");
+ }
+ }
+ mutex_exit(&cpu_idle_callb_mutex);
cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
if (cstate) {
@@ -937,7 +965,6 @@ cpuidle_cstate_instance(cpu_t *cp)
cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state"
" object Instance: %d", cpu_id);
}
- mutex_enter(&cpu_lock);
mcpu = &(cp->cpu_m);
mcpu->max_cstates = cpu_acpi_get_max_cstates(handle);
if (mcpu->max_cstates > CPU_ACPI_C1) {
@@ -950,7 +977,6 @@ cpuidle_cstate_instance(cpu_t *cp)
cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
(void) cstate_timer_callback(CST_EVENT_ONE_CSTATE);
}
- mutex_exit(&cpu_lock);
CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result);
} while (result < 0);
diff --git a/usr/src/uts/i86pc/os/cpupm/cpupm_intel.c b/usr/src/uts/i86pc/os/cpupm/cpupm_intel.c
index 04113a9485..66b2acc7bf 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpupm_intel.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpupm_intel.c
@@ -93,8 +93,8 @@ cpupm_intel_init(cpu_t *cp)
if (x86_vendor != X86_VENDOR_Intel)
return (B_FALSE);
- family = cpuid_getfamily(CPU);
- model = cpuid_getmodel(CPU);
+ family = cpuid_getfamily(cp);
+ model = cpuid_getmodel(cp);
cpupm_intel_pdccap = CPUPM_INTEL_PDC_MP;
diff --git a/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c b/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c
index 78ad028890..72302fcf07 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c
@@ -240,7 +240,7 @@ cpupm_init(cpu_t *cp)
if (mcpu->max_cstates > CPU_ACPI_C1) {
(void) cstate_timer_callback(
CST_EVENT_MULTIPLE_CSTATES);
- CPU->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
+ cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
mcpu->mcpu_idle_type = CPU_ACPI_C1;
disp_enq_thread = cstate_wakeup;
} else {
@@ -439,9 +439,13 @@ cpupm_alloc_domains(cpu_t *cp, int state)
domain = CPU_ACPI_PSD(handle).sd_domain;
type = CPU_ACPI_PSD(handle).sd_type;
} else {
- mutex_enter(&cpu_lock);
- domain = cpuid_get_chipid(cp);
- mutex_exit(&cpu_lock);
+ if (MUTEX_HELD(&cpu_lock)) {
+ domain = cpuid_get_chipid(cp);
+ } else {
+ mutex_enter(&cpu_lock);
+ domain = cpuid_get_chipid(cp);
+ mutex_exit(&cpu_lock);
+ }
type = CPU_ACPI_HW_ALL;
}
dom_ptr = &cpupm_pstate_domains;
@@ -452,9 +456,13 @@ cpupm_alloc_domains(cpu_t *cp, int state)
domain = CPU_ACPI_TSD(handle).sd_domain;
type = CPU_ACPI_TSD(handle).sd_type;
} else {
- mutex_enter(&cpu_lock);
- domain = cpuid_get_chipid(cp);
- mutex_exit(&cpu_lock);
+ if (MUTEX_HELD(&cpu_lock)) {
+ domain = cpuid_get_chipid(cp);
+ } else {
+ mutex_enter(&cpu_lock);
+ domain = cpuid_get_chipid(cp);
+ mutex_exit(&cpu_lock);
+ }
type = CPU_ACPI_HW_ALL;
}
dom_ptr = &cpupm_tstate_domains;
@@ -465,9 +473,13 @@ cpupm_alloc_domains(cpu_t *cp, int state)
domain = CPU_ACPI_CSD(handle).sd_domain;
type = CPU_ACPI_CSD(handle).sd_type;
} else {
- mutex_enter(&cpu_lock);
- domain = cpuid_get_coreid(cp);
- mutex_exit(&cpu_lock);
+ if (MUTEX_HELD(&cpu_lock)) {
+ domain = cpuid_get_coreid(cp);
+ } else {
+ mutex_enter(&cpu_lock);
+ domain = cpuid_get_coreid(cp);
+ mutex_exit(&cpu_lock);
+ }
type = CPU_ACPI_HW_ALL;
}
dom_ptr = &cpupm_cstate_domains;
diff --git a/usr/src/uts/i86pc/os/fakebop.c b/usr/src/uts/i86pc/os/fakebop.c
index 1c23b7643c..d38bcb046f 100644
--- a/usr/src/uts/i86pc/os/fakebop.c
+++ b/usr/src/uts/i86pc/os/fakebop.c
@@ -23,6 +23,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* This file contains the functionality that mimics the boot operations
@@ -40,6 +44,7 @@
#include <sys/varargs.h>
#include <sys/param.h>
#include <sys/machparam.h>
+#include <sys/machsystm.h>
#include <sys/archsystm.h>
#include <sys/boot_console.h>
#include <sys/cmn_err.h>
@@ -151,12 +156,13 @@ char fastreboot_onpanic_cmdline[FASTBOOT_SAVED_CMDLINE_LEN];
static const char fastreboot_onpanic_args[] = " -B fastreboot_onpanic=0";
/*
- * Pointers to where System Resource Affinity Table (SRAT) and
- * System Locality Information Table (SLIT) are mapped into virtual memory
+ * Pointers to where System Resource Affinity Table (SRAT), System Locality
+ * Information Table (SLIT) and Maximum System Capability Table (MSCT)
+ * are mapped into virtual memory
*/
struct srat *srat_ptr = NULL;
struct slit *slit_ptr = NULL;
-
+struct msct *msct_ptr = NULL;
/*
* Allocate aligned physical memory at boot time. This allocator allocates
@@ -2077,6 +2083,7 @@ process_madt(struct madt *tp)
{
struct madt_processor *cpu, *end;
uint32_t cpu_count = 0;
+ uint32_t cpu_possible_count = 0;
uint8_t cpu_apicid_array[UINT8_MAX + 1];
if (tp != NULL) {
@@ -2094,6 +2101,7 @@ process_madt(struct madt *tp)
cpu->apic_id;
cpu_count++;
}
+ cpu_possible_count++;
}
cpu = (struct madt_processor *)
@@ -2109,17 +2117,44 @@ process_madt(struct madt *tp)
}
/*
+ * Check whehter property plat-max-ncpus is already set.
+ */
+ if (do_bsys_getproplen(NULL, PLAT_MAX_NCPUS_NAME) < 0) {
+ /*
+ * Set plat-max-ncpus to number of maximum possible CPUs given
+ * in MADT if it hasn't been set.
+ * There's no formal way to detect max possible CPUs supported
+ * by platform according to ACPI spec3.0b. So current CPU
+ * hotplug implementation expects that all possible CPUs will
+ * have an entry in MADT table and set plat-max-ncpus to number
+ * of entries in MADT.
+ * With introducing of ACPI4.0, Maximum System Capability Table
+ * (MSCT) provides maximum number of CPUs supported by platform.
+ * If MSCT is unavailable, fall back to old way.
+ */
+ if (tp != NULL)
+ bsetpropsi(PLAT_MAX_NCPUS_NAME, cpu_possible_count);
+ }
+
+ /*
+ * Set boot property boot-max-ncpus to number of CPUs existing at
+ * boot time. boot-max-ncpus is mainly used for optimization.
+ */
+ if (tp != NULL)
+ bsetpropsi(BOOT_MAX_NCPUS_NAME, cpu_count);
+
+ /*
* User-set boot-ncpus overrides firmware count
*/
- if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
+ if (do_bsys_getproplen(NULL, BOOT_NCPUS_NAME) >= 0)
return;
/*
- * Set boot property for boot-ncpus to number of CPUs given in MADT
- * if user hasn't set the property already
+ * Set boot property boot-ncpus to number of active CPUs given in MADT
+ * if it hasn't been set yet.
*/
if (tp != NULL)
- bsetpropsi("boot-ncpus", cpu_count);
+ bsetpropsi(BOOT_NCPUS_NAME, cpu_count);
}
static void
@@ -2146,6 +2181,7 @@ process_srat(struct srat *tp)
} memory;
#pragma pack()
char prop_name[30];
+ uint64_t maxmem = 0;
if (tp == NULL)
return;
@@ -2181,6 +2217,10 @@ process_srat(struct srat *tp)
mem_num);
bsetprop(prop_name, strlen(prop_name), &memory,
sizeof (memory));
+ if ((item->i.m.flags & SRAT_HOT_PLUG) &&
+ (memory.addr + memory.length > maxmem)) {
+ maxmem = memory.addr + memory.length;
+ }
mem_num++;
break;
case SRAT_X2APIC:
@@ -2199,6 +2239,14 @@ process_srat(struct srat *tp)
item = (struct srat_item *)
(item->len + (caddr_t)item);
}
+
+ /*
+ * The maximum physical address calculated from the SRAT table is more
+ * accurate than that calculated from the MSCT table.
+ */
+ if (maxmem != 0) {
+ plat_dr_physmax = btop(maxmem);
+ }
}
static void
@@ -2224,6 +2272,99 @@ process_slit(struct slit *tp)
tp->number * tp->number);
}
+static struct msct *
+process_msct(struct msct *tp)
+{
+ int last_seen = 0;
+ int proc_num = 0;
+ struct msct_proximity_domain *item, *end;
+ extern uint64_t plat_dr_options;
+
+ ASSERT(tp != NULL);
+
+ end = (void *)(tp->hdr.len + (uintptr_t)tp);
+ for (item = (void *)((uintptr_t)tp + tp->proximity_domain_offset);
+ item < end;
+ item = (void *)(item->length + (uintptr_t)item)) {
+ /*
+ * Sanity check according to section 5.2.19.1 of ACPI 4.0.
+ * Revision 1
+ * Length 22
+ */
+ if (item->revision != 1 || item->length != 22) {
+ cmn_err(CE_CONT,
+ "?boot: unknown proximity domain structure in MSCT "
+ "with rev(%d), len(%d).\n",
+ (int)item->revision, (int)item->length);
+ return (NULL);
+ } else if (item->domain_min > item->domain_max) {
+ cmn_err(CE_CONT,
+ "?boot: invalid proximity domain structure in MSCT "
+ "with domain_min(%u), domain_max(%u).\n",
+ item->domain_min, item->domain_max);
+ return (NULL);
+ } else if (item->domain_min != last_seen) {
+ /*
+ * Items must be organized in ascending order of the
+ * proximity domain enumerations.
+ */
+ cmn_err(CE_CONT,
+ "?boot: invalid proximity domain structure in MSCT,"
+ " items are not orginized in ascending order.\n");
+ return (NULL);
+ }
+
+ /*
+ * If processor_max is 0 then there would be no CPUs in this
+ * domain.
+ */
+ if (item->processor_max != 0) {
+ proc_num += (item->domain_max - item->domain_min + 1) *
+ item->processor_max;
+ }
+
+ last_seen = item->domain_max - item->domain_min + 1;
+ /*
+ * Break out if all proximity domains have been processed.
+ * Some BIOSes may have unused items at the end of MSCT table.
+ */
+ if (last_seen > tp->maximum_proximity_domains) {
+ break;
+ }
+ }
+ if (last_seen != tp->maximum_proximity_domains + 1) {
+ cmn_err(CE_CONT,
+ "?boot: invalid proximity domain structure in MSCT, "
+ "proximity domain count doesn't match.\n");
+ return (NULL);
+ }
+
+ /*
+ * Set plat-max-ncpus property if it hasn't been set yet.
+ */
+ if (do_bsys_getproplen(NULL, PLAT_MAX_NCPUS_NAME) < 0) {
+ if (proc_num != 0) {
+ bsetpropsi(PLAT_MAX_NCPUS_NAME, proc_num);
+ }
+ }
+
+ /*
+ * Use Maximum Physical Address from the MSCT table as upper limit for
+ * memory hot-adding by default. It may be overridden by value from
+ * the SRAT table or the "plat-dr-physmax" boot option.
+ */
+ plat_dr_physmax = btop(tp->maximum_physical_address + 1);
+
+ /*
+ * Existence of MSCT implies CPU/memory hotplug-capability for the
+ * platform.
+ */
+ plat_dr_options |= PLAT_DR_FEATURE_CPU;
+ plat_dr_options |= PLAT_DR_FEATURE_MEMORY;
+
+ return (tp);
+}
+
#else /* __xpv */
static void
enumerate_xen_cpus()
@@ -2233,7 +2374,7 @@ enumerate_xen_cpus()
/*
* User-set boot-ncpus overrides enumeration
*/
- if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
+ if (do_bsys_getproplen(NULL, BOOT_NCPUS_NAME) >= 0)
return;
/*
@@ -2246,7 +2387,7 @@ enumerate_xen_cpus()
if (HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL) == 0)
max_id = id;
- bsetpropsi("boot-ncpus", max_id+1);
+ bsetpropsi(BOOT_NCPUS_NAME, max_id+1);
}
#endif /* __xpv */
@@ -2257,6 +2398,9 @@ build_firmware_properties(void)
struct table_header *tp = NULL;
#ifndef __xpv
+ if ((msct_ptr = (struct msct *)find_fw_table("MSCT")) != NULL)
+ msct_ptr = process_msct(msct_ptr);
+
if ((tp = find_fw_table("APIC")) != NULL)
process_madt((struct madt *)tp);
diff --git a/usr/src/uts/i86pc/os/lgrpplat.c b/usr/src/uts/i86pc/os/lgrpplat.c
index 1525b7859d..02596478c0 100644
--- a/usr/src/uts/i86pc/os/lgrpplat.c
+++ b/usr/src/uts/i86pc/os/lgrpplat.c
@@ -23,6 +23,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* LOCALITY GROUP (LGROUP) PLATFORM SUPPORT FOR X86/AMD64 PLATFORMS
@@ -88,14 +92,18 @@
* - lgrp_plat_lat_stats.latencies[][] Table of latencies between same and
* different nodes indexed by node ID
*
- * - lgrp_plat_node_cnt Number of NUMA nodes in system
+ * - lgrp_plat_node_cnt Number of NUMA nodes in system for
+ * non-DR-capable systems,
+ * maximum possible number of NUMA nodes
+ * in system for DR capable systems.
*
* - lgrp_plat_node_domain[] Node ID to proximity domain ID mapping
* table indexed by node ID (only used
* for SRAT)
*
- * - lgrp_plat_node_memory[] Table with physical address range for
- * each node indexed by node ID
+ * - lgrp_plat_memnode_info[] Table with physical address range for
+ * each memory node indexed by memory node
+ * ID
*
* The code is implemented to make the following always be true:
*
@@ -113,26 +121,41 @@
* equivalent node ID since we want to keep the node IDs numbered from 0 to
* <number of nodes - 1> to minimize cost of searching and potentially space.
*
- * The code below really tries to do the above. However, the virtual memory
- * system expects the memnodes which describe the physical address range for
- * each NUMA node to be arranged in ascending order by physical address. (:-(
- * Otherwise, the kernel will panic in different semi-random places in the VM
- * system.
+ * With the introduction of support of memory DR operations on x86 platforms,
+ * things get a little complicated. The addresses of hot-added memory may not
+ * be continuous with other memory connected to the same lgrp node. In other
+ * words, memory addresses may get interleaved among lgrp nodes after memory
+ * DR operations. To work around this limitation, we have extended the
+ * relationship between lgrp node and memory node from 1:1 map to 1:N map,
+ * that means there may be multiple memory nodes associated with a lgrp node
+ * after memory DR operations.
*
- * Consequently, this module has to try to sort the nodes in ascending order by
- * each node's starting physical address to try to meet this "constraint" in
- * the VM system (see lgrp_plat_node_sort()). Also, the lowest numbered
- * proximity domain ID in the system is deteremined and used to make the lowest
- * numbered proximity domain map to node 0 in hopes that the proximity domains
- * are sorted in ascending order by physical address already even if their IDs
- * don't start at 0 (see NODE_DOMAIN_HASH() and lgrp_plat_srat_domains()).
- * Finally, it is important to note that these workarounds may not be
- * sufficient if/when memory hotplugging is supported and the VM system may
- * ultimately need to be fixed to handle this....
+ * To minimize the code changes to support memory DR operations, the
+ * following policies have been adopted.
+ * 1) On non-DR-capable systems, the relationship among lgroup platform handle,
+ * node ID and memnode ID is still kept as:
+ * lgroup platform handle == node ID == memnode ID
+ * 2) For memory present at boot time on DR capable platforms, the relationship
+ * is still kept as is.
+ * lgroup platform handle == node ID == memnode ID
+ * 3) For hot-added memory, the relationship between lgrp ID and memnode ID have
+ * been changed from 1:1 map to 1:N map. Memnode IDs [0 - lgrp_plat_node_cnt)
+ * are reserved for memory present at boot time, and memnode IDs
+ * [lgrp_plat_node_cnt, max_mem_nodes) are used to dynamically allocate
+ * memnode ID for hot-added memory.
+ * 4) All boot code having the assumption "node ID == memnode ID" can live as
+ * is, that's because node ID is always equal to memnode ID at boot time.
+ * 5) The lgrp_plat_memnode_info_update(), plat_pfn_to_mem_node() and
+ * lgrp_plat_mem_size() related logics have been enhanced to deal with
+ * the 1:N map relationship.
+ * 6) The latency probing related logics, which have the assumption
+ * "node ID == memnode ID" and may be called at run time, is disabled if
+ * memory DR operation is enabled.
*/
#include <sys/archsystm.h> /* for {in,out}{b,w,l}() */
+#include <sys/atomic.h>
#include <sys/bootconf.h>
#include <sys/cmn_err.h>
#include <sys/controlregs.h>
@@ -143,6 +166,7 @@
#include <sys/memlist.h>
#include <sys/memnode.h>
#include <sys/mman.h>
+#include <sys/note.h>
#include <sys/pci_cfgspace.h>
#include <sys/pci_impl.h>
#include <sys/param.h>
@@ -158,7 +182,8 @@
#include <vm/seg_kmem.h>
#include <vm/vm_dep.h>
-#include "acpi_fw.h" /* for SRAT and SLIT */
+#include <sys/acpidev.h>
+#include "acpi_fw.h" /* for SRAT, SLIT and MSCT */
#define MAX_NODES 8
@@ -187,7 +212,6 @@
((lgrp_plat_prox_domain_min == UINT32_MAX) ? (domain) % node_cnt : \
((domain) - lgrp_plat_prox_domain_min) % node_cnt)
-
/*
* CPU to node ID mapping structure (only used with SRAT)
*/
@@ -239,14 +263,16 @@ typedef struct node_domain_map {
} node_domain_map_t;
/*
- * Node ID and starting and ending page for physical memory in node
+ * Node ID and starting and ending page for physical memory in memory node
*/
-typedef struct node_phys_addr_map {
+typedef struct memnode_phys_addr_map {
pfn_t start;
pfn_t end;
int exists;
uint32_t prox_domain;
-} node_phys_addr_map_t;
+ uint32_t device_id;
+ uint_t lgrphand;
+} memnode_phys_addr_map_t;
/*
* Number of CPUs for which we got APIC IDs
@@ -278,7 +304,7 @@ static node_domain_map_t lgrp_plat_node_domain[MAX_NODES];
/*
* Physical address range for memory in each node
*/
-static node_phys_addr_map_t lgrp_plat_node_memory[MAX_NODES];
+static memnode_phys_addr_map_t lgrp_plat_memnode_info[MAX_MEM_NODES];
/*
* Statistics gotten from probing
@@ -306,6 +332,17 @@ static int lgrp_plat_srat_error = 0;
static int lgrp_plat_slit_error = 0;
/*
+ * Whether lgrp topology has been flattened to 2 levels.
+ */
+static int lgrp_plat_topo_flatten = 0;
+
+
+/*
+ * Maximum memory node ID in use.
+ */
+static uint_t lgrp_plat_max_mem_node;
+
+/*
* Allocate lgroup array statically
*/
static lgrp_t lgrp_space[NLGRP];
@@ -318,7 +355,7 @@ static int nlgrps_alloc;
int lgrp_plat_domain_min_enable = 1;
/*
- * Number of nodes in system
+ * Maximum possible number of nodes in system
*/
uint_t lgrp_plat_node_cnt = 1;
@@ -343,11 +380,12 @@ int lgrp_plat_probe_nsamples = LGRP_PLAT_PROBE_NSAMPLES;
int lgrp_plat_probe_nreads = LGRP_PLAT_PROBE_NREADS;
/*
- * Enable use of ACPI System Resource Affinity Table (SRAT) and System
- * Locality Information Table (SLIT)
+ * Enable use of ACPI System Resource Affinity Table (SRAT), System
+ * Locality Information Table (SLIT) and Maximum System Capability Table (MSCT)
*/
int lgrp_plat_srat_enable = 1;
int lgrp_plat_slit_enable = 1;
+int lgrp_plat_msct_enable = 1;
/*
* mnode_xwa: set to non-zero value to initiate workaround if large pages are
@@ -370,12 +408,10 @@ struct lgrp_stats lgrp_stats[NLGRP];
*/
void plat_build_mem_nodes(struct memlist *list);
-int plat_lgrphand_to_mem_node(lgrp_handle_t hand);
+int plat_mnode_xcheck(pfn_t pfncnt);
lgrp_handle_t plat_mem_node_to_lgrphand(int mnode);
-int plat_mnode_xcheck(pfn_t pfncnt);
-
int plat_pfn_to_mem_node(pfn_t pfn);
/*
@@ -420,11 +456,11 @@ static int lgrp_plat_domain_to_node(node_domain_map_t *node_domain,
static void lgrp_plat_get_numa_config(void);
-static void lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
+static void lgrp_plat_latency_adjust(memnode_phys_addr_map_t *memnode_info,
lgrp_plat_latency_stats_t *lat_stats,
lgrp_plat_probe_stats_t *probe_stats);
-static int lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
+static int lgrp_plat_latency_verify(memnode_phys_addr_map_t *memnode_info,
lgrp_plat_latency_stats_t *lat_stats);
static void lgrp_plat_main_init(void);
@@ -434,13 +470,13 @@ static pgcnt_t lgrp_plat_mem_size_default(lgrp_handle_t, lgrp_mem_query_t);
static int lgrp_plat_node_domain_update(node_domain_map_t *node_domain,
int node_cnt, uint32_t domain);
-static int lgrp_plat_node_memory_update(node_domain_map_t *node_domain,
- int node_cnt, node_phys_addr_map_t *node_memory, uint64_t start,
- uint64_t end, uint32_t domain);
+static int lgrp_plat_memnode_info_update(node_domain_map_t *node_domain,
+ int node_cnt, memnode_phys_addr_map_t *memnode_info, int memnode_cnt,
+ uint64_t start, uint64_t end, uint32_t domain, uint32_t device_id);
static void lgrp_plat_node_sort(node_domain_map_t *node_domain,
int node_cnt, cpu_node_map_t *cpu_node, int cpu_count,
- node_phys_addr_map_t *node_memory);
+ memnode_phys_addr_map_t *memnode_info);
static hrtime_t lgrp_plat_probe_time(int to, cpu_node_map_t *cpu_node,
int cpu_node_nentries, lgrp_plat_probe_mem_config_t *probe_mem_config,
@@ -448,24 +484,32 @@ static hrtime_t lgrp_plat_probe_time(int to, cpu_node_map_t *cpu_node,
static int lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node);
-static int lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
- node_phys_addr_map_t *node_memory, lgrp_plat_latency_stats_t *lat_stats);
+static int lgrp_plat_process_slit(struct slit *tp,
+ node_domain_map_t *node_domain, uint_t node_cnt,
+ memnode_phys_addr_map_t *memnode_info,
+ lgrp_plat_latency_stats_t *lat_stats);
+
+static int lgrp_plat_process_sli(uint32_t domain, uchar_t *sli_info,
+ uint32_t sli_cnt, node_domain_map_t *node_domain, uint_t node_cnt,
+ lgrp_plat_latency_stats_t *lat_stats);
-static int lgrp_plat_process_srat(struct srat *tp,
+static int lgrp_plat_process_srat(struct srat *tp, struct msct *mp,
uint32_t *prox_domain_min, node_domain_map_t *node_domain,
cpu_node_map_t *cpu_node, int cpu_count,
- node_phys_addr_map_t *node_memory);
+ memnode_phys_addr_map_t *memnode_info);
static void lgrp_plat_release_bootstrap(void);
static int lgrp_plat_srat_domains(struct srat *tp,
uint32_t *prox_domain_min);
-static void lgrp_plat_2level_setup(node_phys_addr_map_t *node_memory,
- lgrp_plat_latency_stats_t *lat_stats);
+static int lgrp_plat_msct_domains(struct msct *tp,
+ uint32_t *prox_domain_min);
+
+static void lgrp_plat_2level_setup(lgrp_plat_latency_stats_t *lat_stats);
static void opt_get_numa_config(uint_t *node_cnt, int *mem_intrlv,
- node_phys_addr_map_t *node_memory);
+ memnode_phys_addr_map_t *memnode_info);
static hrtime_t opt_probe_vendor(int dest_node, int nreads);
@@ -529,10 +573,10 @@ plat_build_mem_nodes(struct memlist *list)
* This shouldn't happen and rest of code can't deal
* with this if it does.
*/
- if (node < 0 || node >= lgrp_plat_node_cnt ||
- !lgrp_plat_node_memory[node].exists ||
- cur_start < lgrp_plat_node_memory[node].start ||
- cur_start > lgrp_plat_node_memory[node].end) {
+ if (node < 0 || node >= lgrp_plat_max_mem_node ||
+ !lgrp_plat_memnode_info[node].exists ||
+ cur_start < lgrp_plat_memnode_info[node].start ||
+ cur_start > lgrp_plat_memnode_info[node].end) {
cmn_err(CE_PANIC, "Don't know which memnode "
"to add installed memory address 0x%lx\n",
cur_start);
@@ -543,9 +587,9 @@ plat_build_mem_nodes(struct memlist *list)
*/
cur_end = end;
endcnt = 0;
- if (lgrp_plat_node_memory[node].exists &&
- cur_end > lgrp_plat_node_memory[node].end) {
- cur_end = lgrp_plat_node_memory[node].end;
+ if (lgrp_plat_memnode_info[node].exists &&
+ cur_end > lgrp_plat_memnode_info[node].end) {
+ cur_end = lgrp_plat_memnode_info[node].end;
if (mnode_xwa > 1) {
/*
* sacrifice the last page in each
@@ -572,16 +616,6 @@ plat_build_mem_nodes(struct memlist *list)
}
-int
-plat_lgrphand_to_mem_node(lgrp_handle_t hand)
-{
- if (max_mem_nodes == 1)
- return (0);
-
- return ((int)hand);
-}
-
-
/*
* plat_mnode_xcheck: checks the node memory ranges to see if there is a pfncnt
* range of pages aligned on pfncnt that crosses an node boundary. Returns 1 if
@@ -593,9 +627,9 @@ plat_mnode_xcheck(pfn_t pfncnt)
int node, prevnode = -1, basenode;
pfn_t ea, sa;
- for (node = 0; node < lgrp_plat_node_cnt; node++) {
+ for (node = 0; node < lgrp_plat_max_mem_node; node++) {
- if (lgrp_plat_node_memory[node].exists == 0)
+ if (lgrp_plat_memnode_info[node].exists == 0)
continue;
if (prevnode == -1) {
@@ -605,23 +639,23 @@ plat_mnode_xcheck(pfn_t pfncnt)
}
/* assume x86 node pfn ranges are in increasing order */
- ASSERT(lgrp_plat_node_memory[node].start >
- lgrp_plat_node_memory[prevnode].end);
+ ASSERT(lgrp_plat_memnode_info[node].start >
+ lgrp_plat_memnode_info[prevnode].end);
/*
* continue if the starting address of node is not contiguous
* with the previous node.
*/
- if (lgrp_plat_node_memory[node].start !=
- (lgrp_plat_node_memory[prevnode].end + 1)) {
+ if (lgrp_plat_memnode_info[node].start !=
+ (lgrp_plat_memnode_info[prevnode].end + 1)) {
basenode = node;
prevnode = node;
continue;
}
/* check if the starting address of node is pfncnt aligned */
- if ((lgrp_plat_node_memory[node].start & (pfncnt - 1)) != 0) {
+ if ((lgrp_plat_memnode_info[node].start & (pfncnt - 1)) != 0) {
/*
* at this point, node starts at an unaligned boundary
@@ -630,14 +664,14 @@ plat_mnode_xcheck(pfn_t pfncnt)
* range of length pfncnt that crosses this boundary.
*/
- sa = P2ALIGN(lgrp_plat_node_memory[prevnode].end,
+ sa = P2ALIGN(lgrp_plat_memnode_info[prevnode].end,
pfncnt);
- ea = P2ROUNDUP((lgrp_plat_node_memory[node].start),
+ ea = P2ROUNDUP((lgrp_plat_memnode_info[node].start),
pfncnt);
ASSERT((ea - sa) == pfncnt);
- if (sa >= lgrp_plat_node_memory[basenode].start &&
- ea <= (lgrp_plat_node_memory[node].end + 1)) {
+ if (sa >= lgrp_plat_memnode_info[basenode].start &&
+ ea <= (lgrp_plat_memnode_info[node].end + 1)) {
/*
* large page found to cross mnode boundary.
* Return Failure if workaround not enabled.
@@ -659,9 +693,10 @@ plat_mem_node_to_lgrphand(int mnode)
if (max_mem_nodes == 1)
return (LGRP_DEFAULT_HANDLE);
- return ((lgrp_handle_t)mnode);
-}
+ ASSERT(0 <= mnode && mnode < lgrp_plat_max_mem_node);
+ return ((lgrp_handle_t)(lgrp_plat_memnode_info[mnode].lgrphand));
+}
int
plat_pfn_to_mem_node(pfn_t pfn)
@@ -671,22 +706,23 @@ plat_pfn_to_mem_node(pfn_t pfn)
if (max_mem_nodes == 1)
return (0);
- for (node = 0; node < lgrp_plat_node_cnt; node++) {
+ for (node = 0; node < lgrp_plat_max_mem_node; node++) {
/*
* Skip nodes with no memory
*/
- if (!lgrp_plat_node_memory[node].exists)
+ if (!lgrp_plat_memnode_info[node].exists)
continue;
- if (pfn >= lgrp_plat_node_memory[node].start &&
- pfn <= lgrp_plat_node_memory[node].end)
+ membar_consumer();
+ if (pfn >= lgrp_plat_memnode_info[node].start &&
+ pfn <= lgrp_plat_memnode_info[node].end)
return (node);
}
/*
* Didn't find memnode where this PFN lives which should never happen
*/
- ASSERT(node < lgrp_plat_node_cnt);
+ ASSERT(node < lgrp_plat_max_mem_node);
return (-1);
}
@@ -698,7 +734,6 @@ plat_pfn_to_mem_node(pfn_t pfn)
/*
* Allocate additional space for an lgroup.
*/
-/* ARGSUSED */
lgrp_t *
lgrp_plat_alloc(lgrp_id_t lgrpid)
{
@@ -713,23 +748,198 @@ lgrp_plat_alloc(lgrp_id_t lgrpid)
/*
* Platform handling for (re)configuration changes
+ *
+ * Mechanism to protect lgrp_plat_cpu_node[] at CPU hotplug:
+ * 1) Use cpu_lock to synchronize between lgrp_plat_config() and
+ * lgrp_plat_cpu_to_hand().
+ * 2) Disable latency probing logic by making sure that the flag
+ * LGRP_PLAT_PROBE_ENABLE is cleared.
+ *
+ * Mechanism to protect lgrp_plat_memnode_info[] at memory hotplug:
+ * 1) Only inserts into lgrp_plat_memnode_info at memory hotplug, no removal.
+ * 2) Only expansion to existing entries, no shrinking.
+ * 3) On writing side, DR framework ensures that lgrp_plat_config() is called
+ * in single-threaded context. And membar_producer() is used to ensure that
+ * all changes are visible to other CPUs before setting the "exists" flag.
+ * 4) On reading side, membar_consumer() after checking the "exists" flag
+ * ensures that right values are retrieved.
+ *
+ * Mechanism to protect lgrp_plat_node_domain[] at hotplug:
+ * 1) Only insertion into lgrp_plat_node_domain at hotplug, no removal.
+ * 2) On writing side, it's single-threaded and membar_producer() is used to
+ * ensure all changes are visible to other CPUs before setting the "exists"
+ * flag.
+ * 3) On reading side, membar_consumer() after checking the "exists" flag
+ * ensures that right values are retrieved.
*/
-/* ARGSUSED */
void
lgrp_plat_config(lgrp_config_flag_t flag, uintptr_t arg)
{
+#ifdef __xpv
+ _NOTE(ARGUNUSED(flag, arg));
+#else
+ int rc, node;
+ cpu_t *cp;
+ void *hdl = NULL;
+ uchar_t *sliptr = NULL;
+ uint32_t domain, apicid, slicnt = 0;
+ update_membounds_t *mp;
+
+ extern int acpidev_dr_get_cpu_numa_info(cpu_t *, void **, uint32_t *,
+ uint32_t *, uint32_t *, uchar_t **);
+ extern void acpidev_dr_free_cpu_numa_info(void *);
+
+ /*
+ * This interface is used to support CPU/memory DR operations.
+ * Don't bother here if it's still during boot or only one lgrp node
+ * is supported.
+ */
+ if (!lgrp_topo_initialized || lgrp_plat_node_cnt == 1)
+ return;
+
+ switch (flag) {
+ case LGRP_CONFIG_CPU_ADD:
+ cp = (cpu_t *)arg;
+ ASSERT(cp != NULL);
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /* Check whether CPU already exists. */
+ ASSERT(!lgrp_plat_cpu_node[cp->cpu_id].exists);
+ if (lgrp_plat_cpu_node[cp->cpu_id].exists) {
+ cmn_err(CE_WARN,
+ "!lgrp: CPU(%d) already exists in cpu_node map.",
+ cp->cpu_id);
+ break;
+ }
+
+ /* Query CPU lgrp information. */
+ rc = acpidev_dr_get_cpu_numa_info(cp, &hdl, &apicid, &domain,
+ &slicnt, &sliptr);
+ ASSERT(rc == 0);
+ if (rc != 0) {
+ cmn_err(CE_WARN,
+ "!lgrp: failed to query lgrp info for CPU(%d).",
+ cp->cpu_id);
+ break;
+ }
+
+ /* Update node to proximity domain mapping */
+ node = lgrp_plat_domain_to_node(lgrp_plat_node_domain,
+ lgrp_plat_node_cnt, domain);
+ if (node == -1) {
+ node = lgrp_plat_node_domain_update(
+ lgrp_plat_node_domain, lgrp_plat_node_cnt, domain);
+ ASSERT(node != -1);
+ if (node == -1) {
+ acpidev_dr_free_cpu_numa_info(hdl);
+ cmn_err(CE_WARN, "!lgrp: failed to update "
+ "node_domain map for domain(%u).", domain);
+ break;
+ }
+ }
+
+ /* Update latency information among lgrps. */
+ if (slicnt != 0 && sliptr != NULL) {
+ if (lgrp_plat_process_sli(domain, sliptr, slicnt,
+ lgrp_plat_node_domain, lgrp_plat_node_cnt,
+ &lgrp_plat_lat_stats) != 0) {
+ cmn_err(CE_WARN, "!lgrp: failed to update "
+ "latency information for domain (%u).",
+ domain);
+ }
+ }
+
+ /* Update CPU to node mapping. */
+ lgrp_plat_cpu_node[cp->cpu_id].prox_domain = domain;
+ lgrp_plat_cpu_node[cp->cpu_id].node = node;
+ lgrp_plat_cpu_node[cp->cpu_id].apicid = apicid;
+ lgrp_plat_cpu_node[cp->cpu_id].exists = 1;
+ lgrp_plat_apic_ncpus++;
+
+ acpidev_dr_free_cpu_numa_info(hdl);
+ break;
+
+ case LGRP_CONFIG_CPU_DEL:
+ cp = (cpu_t *)arg;
+ ASSERT(cp != NULL);
+ ASSERT(MUTEX_HELD(&cpu_lock));
+
+ /* Check whether CPU exists. */
+ ASSERT(lgrp_plat_cpu_node[cp->cpu_id].exists);
+ if (!lgrp_plat_cpu_node[cp->cpu_id].exists) {
+ cmn_err(CE_WARN,
+ "!lgrp: CPU(%d) doesn't exist in cpu_node map.",
+ cp->cpu_id);
+ break;
+ }
+
+ /* Query CPU lgrp information. */
+ rc = acpidev_dr_get_cpu_numa_info(cp, &hdl, &apicid, &domain,
+ NULL, NULL);
+ ASSERT(rc == 0);
+ if (rc != 0) {
+ cmn_err(CE_WARN,
+ "!lgrp: failed to query lgrp info for CPU(%d).",
+ cp->cpu_id);
+ break;
+ }
+
+ /* Update map. */
+ ASSERT(lgrp_plat_cpu_node[cp->cpu_id].apicid == apicid);
+ ASSERT(lgrp_plat_cpu_node[cp->cpu_id].prox_domain == domain);
+ lgrp_plat_cpu_node[cp->cpu_id].exists = 0;
+ lgrp_plat_cpu_node[cp->cpu_id].apicid = UINT32_MAX;
+ lgrp_plat_cpu_node[cp->cpu_id].prox_domain = UINT32_MAX;
+ lgrp_plat_cpu_node[cp->cpu_id].node = UINT_MAX;
+ lgrp_plat_apic_ncpus--;
+
+ acpidev_dr_free_cpu_numa_info(hdl);
+ break;
+
+ case LGRP_CONFIG_MEM_ADD:
+ mp = (update_membounds_t *)arg;
+ ASSERT(mp != NULL);
+
+ /* Update latency information among lgrps. */
+ if (mp->u_sli_cnt != 0 && mp->u_sli_ptr != NULL) {
+ if (lgrp_plat_process_sli(mp->u_domain,
+ mp->u_sli_ptr, mp->u_sli_cnt,
+ lgrp_plat_node_domain, lgrp_plat_node_cnt,
+ &lgrp_plat_lat_stats) != 0) {
+ cmn_err(CE_WARN, "!lgrp: failed to update "
+ "latency information for domain (%u).",
+ domain);
+ }
+ }
+
+ if (lgrp_plat_memnode_info_update(lgrp_plat_node_domain,
+ lgrp_plat_node_cnt, lgrp_plat_memnode_info, max_mem_nodes,
+ mp->u_base, mp->u_base + mp->u_length,
+ mp->u_domain, mp->u_device_id) < 0) {
+ cmn_err(CE_WARN,
+ "!lgrp: failed to update latency information for "
+ "memory (0x%" PRIx64 " - 0x%" PRIx64 ").",
+ mp->u_base, mp->u_base + mp->u_length);
+ }
+ break;
+
+ default:
+ break;
+ }
+#endif /* __xpv */
}
/*
* Return the platform handle for the lgroup containing the given CPU
*/
-/* ARGSUSED */
lgrp_handle_t
lgrp_plat_cpu_to_hand(processorid_t id)
{
lgrp_handle_t hand;
+ ASSERT(!lgrp_initialized || MUTEX_HELD(&cpu_lock));
+
if (lgrp_plat_node_cnt == 1)
return (LGRP_DEFAULT_HANDLE);
@@ -763,6 +973,7 @@ lgrp_plat_init(lgrp_init_stages_t stage)
*/
lgrp_plat_node_cnt = max_mem_nodes = 1;
#else /* __xpv */
+
/*
* Get boot property for lgroup topology height limit
*/
@@ -782,14 +993,32 @@ lgrp_plat_init(lgrp_init_stages_t stage)
lgrp_plat_slit_enable = (int)value;
/*
+ * Get boot property for enabling/disabling MSCT
+ */
+ if (bootprop_getval(BP_LGRP_MSCT_ENABLE, &value) == 0)
+ lgrp_plat_msct_enable = (int)value;
+
+ /*
* Initialize as a UMA machine
*/
if (lgrp_topo_ht_limit() == 1) {
lgrp_plat_node_cnt = max_mem_nodes = 1;
+ lgrp_plat_max_mem_node = 1;
return;
}
lgrp_plat_get_numa_config();
+
+ /*
+ * Each lgrp node needs MAX_MEM_NODES_PER_LGROUP memnodes
+ * to support memory DR operations if memory DR is enabled.
+ */
+ lgrp_plat_max_mem_node = lgrp_plat_node_cnt;
+ if (plat_dr_support_memory() && lgrp_plat_node_cnt != 1) {
+ max_mem_nodes = MAX_MEM_NODES_PER_LGROUP *
+ lgrp_plat_node_cnt;
+ ASSERT(max_mem_nodes <= MAX_MEM_NODES);
+ }
#endif /* __xpv */
break;
@@ -817,7 +1046,6 @@ lgrp_plat_init(lgrp_init_stages_t stage)
* specific, so platform gets to decide its value. It would be nice if the
* number was at least proportional to make comparisons more meaningful though.
*/
-/* ARGSUSED */
int
lgrp_plat_latency(lgrp_handle_t from, lgrp_handle_t to)
{
@@ -844,13 +1072,29 @@ lgrp_plat_latency(lgrp_handle_t from, lgrp_handle_t to)
/*
* Probe from current CPU if its lgroup latencies haven't been set yet
- * and we are trying to get latency from current CPU to some node
+ * and we are trying to get latency from current CPU to some node.
+ * Avoid probing if CPU/memory DR is enabled.
*/
- node = lgrp_plat_cpu_to_node(CPU, lgrp_plat_cpu_node,
- lgrp_plat_cpu_node_nentries);
- ASSERT(node >= 0 && node < lgrp_plat_node_cnt);
- if (lgrp_plat_lat_stats.latencies[src][src] == 0 && node == src)
- lgrp_plat_probe();
+ if (lgrp_plat_lat_stats.latencies[src][src] == 0) {
+ /*
+ * Latency information should be updated by lgrp_plat_config()
+ * for DR operations. Something is wrong if reaches here.
+ * For safety, flatten lgrp topology to two levels.
+ */
+ if (plat_dr_support_cpu() || plat_dr_support_memory()) {
+ ASSERT(lgrp_plat_lat_stats.latencies[src][src]);
+ cmn_err(CE_WARN,
+ "lgrp: failed to get latency information, "
+ "fall back to two-level topology.");
+ lgrp_plat_2level_setup(&lgrp_plat_lat_stats);
+ } else {
+ node = lgrp_plat_cpu_to_node(CPU, lgrp_plat_cpu_node,
+ lgrp_plat_cpu_node_nentries);
+ ASSERT(node >= 0 && node < lgrp_plat_node_cnt);
+ if (node == src)
+ lgrp_plat_probe();
+ }
+ }
return (lgrp_plat_lat_stats.latencies[src][dest]);
}
@@ -859,21 +1103,45 @@ lgrp_plat_latency(lgrp_handle_t from, lgrp_handle_t to)
/*
* Return the maximum number of lgrps supported by the platform.
* Before lgrp topology is known it returns an estimate based on the number of
- * nodes. Once topology is known it returns the actual maximim number of lgrps
- * created. Since x86/x64 doesn't support Dynamic Reconfiguration (DR) and
- * dynamic addition of new nodes, this number may not grow during system
- * lifetime (yet).
+ * nodes. Once topology is known it returns:
+ * 1) the actual maximim number of lgrps created if CPU/memory DR operations
+ * are not suppported.
+ * 2) the maximum possible number of lgrps if CPU/memory DR operations are
+ * supported.
*/
int
lgrp_plat_max_lgrps(void)
{
- return (lgrp_topo_initialized ?
- lgrp_alloc_max + 1 :
- lgrp_plat_node_cnt * (lgrp_plat_node_cnt - 1) + 1);
+ if (!lgrp_topo_initialized || plat_dr_support_cpu() ||
+ plat_dr_support_memory()) {
+ return (lgrp_plat_node_cnt * (lgrp_plat_node_cnt - 1) + 1);
+ } else {
+ return (lgrp_alloc_max + 1);
+ }
}
/*
+ * Count number of memory pages (_t) based on mnode id (_n) and query type (_t).
+ */
+#define _LGRP_PLAT_MEM_SIZE(_n, _q, _t) \
+ if (mem_node_config[_n].exists) { \
+ switch (_q) { \
+ case LGRP_MEM_SIZE_FREE: \
+ _t += MNODE_PGCNT(_n); \
+ break; \
+ case LGRP_MEM_SIZE_AVAIL: \
+ _t += mem_node_memlist_pages(_n, phys_avail); \
+ break; \
+ case LGRP_MEM_SIZE_INSTALL: \
+ _t += mem_node_memlist_pages(_n, phys_install); \
+ break; \
+ default: \
+ break; \
+ } \
+ }
+
+/*
* Return the number of free pages in an lgroup.
*
* For query of LGRP_MEM_SIZE_FREE, return the number of base pagesize
@@ -896,25 +1164,19 @@ lgrp_plat_mem_size(lgrp_handle_t plathand, lgrp_mem_query_t query)
return (lgrp_plat_mem_size_default(plathand, query));
if (plathand != LGRP_NULL_HANDLE) {
- mnode = plat_lgrphand_to_mem_node(plathand);
- if (mnode >= 0 && mem_node_config[mnode].exists) {
- switch (query) {
- case LGRP_MEM_SIZE_FREE:
- npgs = MNODE_PGCNT(mnode);
- break;
- case LGRP_MEM_SIZE_AVAIL:
- npgs = mem_node_memlist_pages(mnode,
- phys_avail);
- break;
- case LGRP_MEM_SIZE_INSTALL:
- npgs = mem_node_memlist_pages(mnode,
- phys_install);
- break;
- default:
- break;
- }
+ /* Count memory node present at boot. */
+ mnode = (int)plathand;
+ ASSERT(mnode < lgrp_plat_node_cnt);
+ _LGRP_PLAT_MEM_SIZE(mnode, query, npgs);
+
+ /* Count possible hot-added memory nodes. */
+ for (mnode = lgrp_plat_node_cnt;
+ mnode < lgrp_plat_max_mem_node; mnode++) {
+ if (lgrp_plat_memnode_info[mnode].lgrphand == plathand)
+ _LGRP_PLAT_MEM_SIZE(mnode, query, npgs);
}
}
+
return (npgs);
}
@@ -923,7 +1185,6 @@ lgrp_plat_mem_size(lgrp_handle_t plathand, lgrp_mem_query_t query)
* Return the platform handle of the lgroup that contains the physical memory
* corresponding to the given page frame number
*/
-/* ARGSUSED */
lgrp_handle_t
lgrp_plat_pfn_to_hand(pfn_t pfn)
{
@@ -979,6 +1240,10 @@ lgrp_plat_probe(void)
max_mem_nodes == 1 || lgrp_topo_ht_limit() <= 2)
return;
+ /* SRAT and SLIT should be enabled if DR operations are enabled. */
+ if (plat_dr_support_cpu() || plat_dr_support_memory())
+ return;
+
/*
* Determine ID of node containing current CPU
*/
@@ -1051,14 +1316,13 @@ lgrp_plat_probe(void)
* - Fallback to just optimizing for local and remote if
* latencies didn't look right
*/
- lgrp_plat_latency_adjust(lgrp_plat_node_memory, &lgrp_plat_lat_stats,
+ lgrp_plat_latency_adjust(lgrp_plat_memnode_info, &lgrp_plat_lat_stats,
&lgrp_plat_probe_stats);
lgrp_plat_probe_stats.probe_error_code =
- lgrp_plat_latency_verify(lgrp_plat_node_memory,
+ lgrp_plat_latency_verify(lgrp_plat_memnode_info,
&lgrp_plat_lat_stats);
if (lgrp_plat_probe_stats.probe_error_code)
- lgrp_plat_2level_setup(lgrp_plat_node_memory,
- &lgrp_plat_lat_stats);
+ lgrp_plat_2level_setup(&lgrp_plat_lat_stats);
}
@@ -1078,8 +1342,11 @@ lgrp_plat_root_hand(void)
/*
- * Update CPU to node mapping for given CPU and proximity domain (and returns
- * negative numbers for errors and positive ones for success)
+ * Update CPU to node mapping for given CPU and proximity domain.
+ * Return values:
+ * - zero for success
+ * - positive numbers for warnings
+ * - negative numbers for errors
*/
static int
lgrp_plat_cpu_node_update(node_domain_map_t *node_domain, int node_cnt,
@@ -1119,6 +1386,13 @@ lgrp_plat_cpu_node_update(node_domain_map_t *node_domain, int node_cnt,
return (1);
/*
+ * It's invalid to have more than one entry with the same
+ * local APIC ID in SRAT table.
+ */
+ if (cpu_node[i].node != UINT_MAX)
+ return (-2);
+
+ /*
* Fill in node and proximity domain IDs
*/
cpu_node[i].prox_domain = domain;
@@ -1128,9 +1402,11 @@ lgrp_plat_cpu_node_update(node_domain_map_t *node_domain, int node_cnt,
}
/*
- * Return error when entry for APIC ID wasn't found in table
+ * It's possible that an apicid doesn't exist in the cpu_node map due
+ * to user limits number of CPUs powered on at boot by specifying the
+ * boot_ncpus kernel option.
*/
- return (-2);
+ return (2);
}
@@ -1189,9 +1465,11 @@ lgrp_plat_domain_to_node(node_domain_map_t *node_domain, int node_cnt,
*/
node = start = NODE_DOMAIN_HASH(domain, node_cnt);
do {
- if (node_domain[node].prox_domain == domain &&
- node_domain[node].exists)
- return (node);
+ if (node_domain[node].exists) {
+ membar_consumer();
+ if (node_domain[node].prox_domain == domain)
+ return (node);
+ }
node = (node + 1) % node_cnt;
} while (node != start);
return (-1);
@@ -1219,19 +1497,26 @@ lgrp_plat_get_numa_config(void)
if (lgrp_plat_apic_ncpus > 0) {
int retval;
+ /* Reserve enough resources if CPU DR is enabled. */
+ if (plat_dr_support_cpu() && max_ncpus > lgrp_plat_apic_ncpus)
+ lgrp_plat_cpu_node_nentries = max_ncpus;
+ else
+ lgrp_plat_cpu_node_nentries = lgrp_plat_apic_ncpus;
+
/*
* Temporarily allocate boot memory to use for CPU to node
* mapping since kernel memory allocator isn't alive yet
*/
lgrp_plat_cpu_node = (cpu_node_map_t *)BOP_ALLOC(bootops,
- NULL, lgrp_plat_apic_ncpus * sizeof (cpu_node_map_t),
+ NULL, lgrp_plat_cpu_node_nentries * sizeof (cpu_node_map_t),
sizeof (int));
ASSERT(lgrp_plat_cpu_node != NULL);
if (lgrp_plat_cpu_node) {
- lgrp_plat_cpu_node_nentries = lgrp_plat_apic_ncpus;
bzero(lgrp_plat_cpu_node, lgrp_plat_cpu_node_nentries *
sizeof (cpu_node_map_t));
+ } else {
+ lgrp_plat_cpu_node_nentries = 0;
}
/*
@@ -1240,10 +1525,10 @@ lgrp_plat_get_numa_config(void)
*/
(void) lgrp_plat_process_cpu_apicids(lgrp_plat_cpu_node);
- retval = lgrp_plat_process_srat(srat_ptr,
+ retval = lgrp_plat_process_srat(srat_ptr, msct_ptr,
&lgrp_plat_prox_domain_min,
lgrp_plat_node_domain, lgrp_plat_cpu_node,
- lgrp_plat_apic_ncpus, lgrp_plat_node_memory);
+ lgrp_plat_apic_ncpus, lgrp_plat_memnode_info);
if (retval <= 0) {
lgrp_plat_srat_error = retval;
lgrp_plat_node_cnt = 1;
@@ -1260,15 +1545,12 @@ lgrp_plat_get_numa_config(void)
if ((lgrp_plat_apic_ncpus <= 0 || lgrp_plat_srat_error != 0) &&
is_opteron())
opt_get_numa_config(&lgrp_plat_node_cnt, &lgrp_plat_mem_intrlv,
- lgrp_plat_node_memory);
+ lgrp_plat_memnode_info);
/*
* Don't bother to setup system for multiple lgroups and only use one
* memory node when memory is interleaved between any nodes or there is
* only one NUMA node
- *
- * NOTE: May need to change this for Dynamic Reconfiguration (DR)
- * when and if it happens for x86/x64
*/
if (lgrp_plat_mem_intrlv || lgrp_plat_node_cnt == 1) {
lgrp_plat_node_cnt = max_mem_nodes = 1;
@@ -1287,7 +1569,7 @@ lgrp_plat_get_numa_config(void)
/*
* There should be one memnode (physical page free list(s)) for
- * each node
+ * each node if memory DR is disabled.
*/
max_mem_nodes = lgrp_plat_node_cnt;
@@ -1303,8 +1585,35 @@ lgrp_plat_get_numa_config(void)
* exists
*/
lgrp_plat_slit_error = lgrp_plat_process_slit(slit_ptr,
- lgrp_plat_node_cnt, lgrp_plat_node_memory,
+ lgrp_plat_node_domain, lgrp_plat_node_cnt, lgrp_plat_memnode_info,
&lgrp_plat_lat_stats);
+
+ /*
+ * Disable support of CPU/memory DR operations if multiple locality
+ * domains exist in system and either of following is true.
+ * 1) Failed to process SLIT table.
+ * 2) Latency probing is enabled by user.
+ */
+ if (lgrp_plat_node_cnt > 1 &&
+ (plat_dr_support_cpu() || plat_dr_support_memory())) {
+ if (!lgrp_plat_slit_enable || lgrp_plat_slit_error != 0 ||
+ !lgrp_plat_srat_enable || lgrp_plat_srat_error != 0 ||
+ lgrp_plat_apic_ncpus <= 0) {
+ cmn_err(CE_CONT,
+ "?lgrp: failed to process ACPI SRAT/SLIT table, "
+ "disable support of CPU/memory DR operations.");
+ plat_dr_disable_cpu();
+ plat_dr_disable_memory();
+ } else if (lgrp_plat_probe_flags & LGRP_PLAT_PROBE_ENABLE) {
+ cmn_err(CE_CONT,
+ "?lgrp: latency probing enabled by user, "
+ "disable support of CPU/memory DR operations.");
+ plat_dr_disable_cpu();
+ plat_dr_disable_memory();
+ }
+ }
+
+ /* Done if succeeded to process SLIT table. */
if (lgrp_plat_slit_error == 0)
return;
@@ -1366,7 +1675,7 @@ int lgrp_plat_probe_lt_shift = LGRP_LAT_TOLERANCE_SHIFT;
* latencies be same
*/
static void
-lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
+lgrp_plat_latency_adjust(memnode_phys_addr_map_t *memnode_info,
lgrp_plat_latency_stats_t *lat_stats, lgrp_plat_probe_stats_t *probe_stats)
{
int i;
@@ -1387,7 +1696,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
if (max_mem_nodes == 1)
return;
- ASSERT(node_memory != NULL && lat_stats != NULL &&
+ ASSERT(memnode_info != NULL && lat_stats != NULL &&
probe_stats != NULL);
/*
@@ -1395,11 +1704,11 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
* (ie. latency(node0, node1) == latency(node1, node0))
*/
for (i = 0; i < lgrp_plat_node_cnt; i++) {
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
for (j = 0; j < lgrp_plat_node_cnt; j++) {
- if (!node_memory[j].exists)
+ if (!memnode_info[j].exists)
continue;
t1 = lat_stats->latencies[i][j];
@@ -1452,7 +1761,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
*/
for (i = 0; i < lgrp_plat_node_cnt; i++) {
for (j = 0; j < lgrp_plat_node_cnt; j++) {
- if (!node_memory[j].exists)
+ if (!memnode_info[j].exists)
continue;
/*
* Pick one pair of nodes (i, j)
@@ -1469,7 +1778,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
for (k = 0; k < lgrp_plat_node_cnt; k++) {
for (l = 0; l < lgrp_plat_node_cnt; l++) {
- if (!node_memory[l].exists)
+ if (!memnode_info[l].exists)
continue;
/*
* Pick another pair of nodes (k, l)
@@ -1545,7 +1854,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
min = -1;
max = 0;
for (i = 0; i < lgrp_plat_node_cnt; i++) {
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
t = lat_stats->latencies[i][i];
if (t == 0)
@@ -1559,7 +1868,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
for (i = 0; i < lgrp_plat_node_cnt; i++) {
int local;
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
local = lat_stats->latencies[i][i];
@@ -1590,7 +1899,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
lat_stats->latency_max = 0;
for (i = 0; i < lgrp_plat_node_cnt; i++) {
for (j = 0; j < lgrp_plat_node_cnt; j++) {
- if (!node_memory[j].exists)
+ if (!memnode_info[j].exists)
continue;
t = lat_stats->latencies[i][j];
if (t > lat_stats->latency_max)
@@ -1616,7 +1925,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
* -3 Local >= remote
*/
static int
-lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
+lgrp_plat_latency_verify(memnode_phys_addr_map_t *memnode_info,
lgrp_plat_latency_stats_t *lat_stats)
{
int i;
@@ -1624,7 +1933,7 @@ lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
u_longlong_t t1;
u_longlong_t t2;
- ASSERT(node_memory != NULL && lat_stats != NULL);
+ ASSERT(memnode_info != NULL && lat_stats != NULL);
/*
* Nothing to do when this is an UMA machine, lgroup topology is
@@ -1639,10 +1948,10 @@ lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
* (ie. latency(node0, node1) == latency(node1, node0))
*/
for (i = 0; i < lgrp_plat_node_cnt; i++) {
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
for (j = 0; j < lgrp_plat_node_cnt; j++) {
- if (!node_memory[j].exists)
+ if (!memnode_info[j].exists)
continue;
t1 = lat_stats->latencies[i][j];
t2 = lat_stats->latencies[j][i];
@@ -1659,7 +1968,7 @@ lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
*/
t1 = lat_stats->latencies[0][0];
for (i = 1; i < lgrp_plat_node_cnt; i++) {
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
t2 = lat_stats->latencies[i][i];
@@ -1681,7 +1990,7 @@ lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
if (t1) {
for (i = 0; i < lgrp_plat_node_cnt; i++) {
for (j = 0; j < lgrp_plat_node_cnt; j++) {
- if (!node_memory[j].exists)
+ if (!memnode_info[j].exists)
continue;
t2 = lat_stats->latencies[i][j];
if (i == j || t2 == 0)
@@ -1729,8 +2038,7 @@ lgrp_plat_main_init(void)
*/
if (ht_limit == 2 && lgrp_plat_lat_stats.latency_min == -1 &&
lgrp_plat_lat_stats.latency_max == 0)
- lgrp_plat_2level_setup(lgrp_plat_node_memory,
- &lgrp_plat_lat_stats);
+ lgrp_plat_2level_setup(&lgrp_plat_lat_stats);
return;
}
@@ -1769,7 +2077,7 @@ lgrp_plat_main_init(void)
* Skip this node and leave its probe page NULL
* if it doesn't have any memory
*/
- mnode = plat_lgrphand_to_mem_node((lgrp_handle_t)i);
+ mnode = i;
if (!mem_node_config[mnode].exists) {
lgrp_plat_probe_mem_config.probe_va[i] = NULL;
continue;
@@ -1815,10 +2123,11 @@ lgrp_plat_main_init(void)
* This is a copy of the MAX_MEM_NODES == 1 version of the routine
* used when MPO is disabled (i.e. single lgroup) or this is the root lgroup
*/
-/* ARGSUSED */
static pgcnt_t
lgrp_plat_mem_size_default(lgrp_handle_t lgrphand, lgrp_mem_query_t query)
{
+ _NOTE(ARGUNUSED(lgrphand));
+
struct memlist *mlist;
pgcnt_t npgs = 0;
extern struct memlist *phys_avail;
@@ -1866,8 +2175,9 @@ lgrp_plat_node_domain_update(node_domain_map_t *node_domain, int node_cnt,
* domain and return node ID which is index into mapping table.
*/
if (!node_domain[node].exists) {
- node_domain[node].exists = 1;
node_domain[node].prox_domain = domain;
+ membar_producer();
+ node_domain[node].exists = 1;
return (node);
}
@@ -1887,18 +2197,17 @@ lgrp_plat_node_domain_update(node_domain_map_t *node_domain, int node_cnt,
return (-1);
}
-
/*
* Update node memory information for given proximity domain with specified
* starting and ending physical address range (and return positive numbers for
* success and negative ones for errors)
*/
static int
-lgrp_plat_node_memory_update(node_domain_map_t *node_domain, int node_cnt,
- node_phys_addr_map_t *node_memory, uint64_t start, uint64_t end,
- uint32_t domain)
+lgrp_plat_memnode_info_update(node_domain_map_t *node_domain, int node_cnt,
+ memnode_phys_addr_map_t *memnode_info, int memnode_cnt, uint64_t start,
+ uint64_t end, uint32_t domain, uint32_t device_id)
{
- int node;
+ int node, mnode;
/*
* Get node number for proximity domain
@@ -1912,13 +2221,53 @@ lgrp_plat_node_memory_update(node_domain_map_t *node_domain, int node_cnt,
}
/*
+ * This function is called during boot if device_id is
+ * ACPI_MEMNODE_DEVID_BOOT, otherwise it's called at runtime for
+ * memory DR operations.
+ */
+ if (device_id != ACPI_MEMNODE_DEVID_BOOT) {
+ ASSERT(lgrp_plat_max_mem_node <= memnode_cnt);
+
+ for (mnode = lgrp_plat_node_cnt;
+ mnode < lgrp_plat_max_mem_node; mnode++) {
+ if (memnode_info[mnode].exists &&
+ memnode_info[mnode].prox_domain == domain &&
+ memnode_info[mnode].device_id == device_id) {
+ if (btop(start) < memnode_info[mnode].start)
+ memnode_info[mnode].start = btop(start);
+ if (btop(end) > memnode_info[mnode].end)
+ memnode_info[mnode].end = btop(end);
+ return (1);
+ }
+ }
+
+ if (lgrp_plat_max_mem_node >= memnode_cnt) {
+ return (-3);
+ } else {
+ lgrp_plat_max_mem_node++;
+ memnode_info[mnode].start = btop(start);
+ memnode_info[mnode].end = btop(end);
+ memnode_info[mnode].prox_domain = domain;
+ memnode_info[mnode].device_id = device_id;
+ memnode_info[mnode].lgrphand = node;
+ membar_producer();
+ memnode_info[mnode].exists = 1;
+ return (0);
+ }
+ }
+
+ /*
* Create entry in table for node if it doesn't exist
*/
- if (!node_memory[node].exists) {
- node_memory[node].exists = 1;
- node_memory[node].start = btop(start);
- node_memory[node].end = btop(end);
- node_memory[node].prox_domain = domain;
+ ASSERT(node < memnode_cnt);
+ if (!memnode_info[node].exists) {
+ memnode_info[node].start = btop(start);
+ memnode_info[node].end = btop(end);
+ memnode_info[node].prox_domain = domain;
+ memnode_info[node].device_id = device_id;
+ memnode_info[node].lgrphand = node;
+ membar_producer();
+ memnode_info[node].exists = 1;
return (0);
}
@@ -1928,11 +2277,11 @@ lgrp_plat_node_memory_update(node_domain_map_t *node_domain, int node_cnt,
* There may be more than one SRAT memory entry for a domain, so we may
* need to update existing start or end address for the node.
*/
- if (node_memory[node].prox_domain == domain) {
- if (btop(start) < node_memory[node].start)
- node_memory[node].start = btop(start);
- if (btop(end) > node_memory[node].end)
- node_memory[node].end = btop(end);
+ if (memnode_info[node].prox_domain == domain) {
+ if (btop(start) < memnode_info[node].start)
+ memnode_info[node].start = btop(start);
+ if (btop(end) > memnode_info[node].end)
+ memnode_info[node].end = btop(end);
return (1);
}
return (-2);
@@ -1940,16 +2289,14 @@ lgrp_plat_node_memory_update(node_domain_map_t *node_domain, int node_cnt,
/*
- * Have to sort node by starting physical address because VM system (physical
- * page free list management) assumes and expects memnodes to be sorted in
- * ascending order by physical address. If not, the kernel will panic in
- * potentially a number of different places. (:-(
- * NOTE: This workaround will not be sufficient if/when hotplugging memory is
- * supported on x86/x64.
+ * Have to sort nodes by starting physical address because plat_mnode_xcheck()
+ * assumes and expects memnodes to be sorted in ascending order by physical
+ * address.
*/
static void
lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
- cpu_node_map_t *cpu_node, int cpu_count, node_phys_addr_map_t *node_memory)
+ cpu_node_map_t *cpu_node, int cpu_count,
+ memnode_phys_addr_map_t *memnode_info)
{
boolean_t found;
int i;
@@ -1959,7 +2306,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
boolean_t swapped;
if (!lgrp_plat_node_sort_enable || node_cnt <= 1 ||
- node_domain == NULL || node_memory == NULL)
+ node_domain == NULL || memnode_info == NULL)
return;
/*
@@ -1970,7 +2317,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
/*
* Skip entries that don't exist
*/
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
/*
@@ -1978,7 +2325,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
*/
found = B_FALSE;
for (j = i + 1; j < node_cnt; j++) {
- if (node_memory[j].exists) {
+ if (memnode_info[j].exists) {
found = B_TRUE;
break;
}
@@ -1994,7 +2341,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
* Not sorted if starting address of current entry is bigger
* than starting address of next existing entry
*/
- if (node_memory[i].start > node_memory[j].start) {
+ if (memnode_info[i].start > memnode_info[j].start) {
sorted = B_FALSE;
break;
}
@@ -2017,7 +2364,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
/*
* Skip entries that don't exist
*/
- if (!node_memory[i].exists)
+ if (!memnode_info[i].exists)
continue;
/*
@@ -2025,7 +2372,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
*/
found = B_FALSE;
for (j = i + 1; j <= n; j++) {
- if (node_memory[j].exists) {
+ if (memnode_info[j].exists) {
found = B_TRUE;
break;
}
@@ -2037,8 +2384,8 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
if (found == B_FALSE)
break;
- if (node_memory[i].start > node_memory[j].start) {
- node_phys_addr_map_t save_addr;
+ if (memnode_info[i].start > memnode_info[j].start) {
+ memnode_phys_addr_map_t save_addr;
node_domain_map_t save_node;
/*
@@ -2054,12 +2401,12 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
/*
* Swap node to physical memory assignments
*/
- bcopy(&node_memory[i], &save_addr,
- sizeof (node_phys_addr_map_t));
- bcopy(&node_memory[j], &node_memory[i],
- sizeof (node_phys_addr_map_t));
- bcopy(&save_addr, &node_memory[j],
- sizeof (node_phys_addr_map_t));
+ bcopy(&memnode_info[i], &save_addr,
+ sizeof (memnode_phys_addr_map_t));
+ bcopy(&memnode_info[j], &memnode_info[i],
+ sizeof (memnode_phys_addr_map_t));
+ bcopy(&save_addr, &memnode_info[j],
+ sizeof (memnode_phys_addr_map_t));
swapped = B_TRUE;
}
}
@@ -2226,11 +2573,12 @@ lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node)
return (-1);
/*
- * Calculate number of entries in array and return when there's just
- * one CPU since that's not very interesting for NUMA
+ * Calculate number of entries in array and return when the system is
+ * not very interesting for NUMA. It's not interesting for NUMA if
+ * system has only one CPU and doesn't support CPU hotplug.
*/
n = boot_prop_len / sizeof (uint8_t);
- if (n == 1)
+ if (n == 1 && !plat_dr_support_cpu())
return (-2);
/*
@@ -2243,21 +2591,31 @@ lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node)
* Just return number of CPU APIC IDs if CPU to node mapping table is
* NULL
*/
- if (cpu_node == NULL)
- return (n);
+ if (cpu_node == NULL) {
+ if (plat_dr_support_cpu() && n >= boot_ncpus) {
+ return (boot_ncpus);
+ } else {
+ return (n);
+ }
+ }
/*
* Fill in CPU to node ID mapping table with APIC ID for each CPU
*/
for (i = 0; i < n; i++) {
+ /* Only add boot CPUs into the map if CPU DR is enabled. */
+ if (plat_dr_support_cpu() && i >= boot_ncpus)
+ break;
cpu_node[i].exists = 1;
cpu_node[i].apicid = cpu_apicid_array[i];
+ cpu_node[i].prox_domain = UINT32_MAX;
+ cpu_node[i].node = UINT_MAX;
}
/*
* Return number of CPUs based on number of APIC IDs
*/
- return (n);
+ return (i);
}
@@ -2266,11 +2624,14 @@ lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node)
* NUMA node is from each other
*/
static int
-lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
- node_phys_addr_map_t *node_memory, lgrp_plat_latency_stats_t *lat_stats)
+lgrp_plat_process_slit(struct slit *tp,
+ node_domain_map_t *node_domain, uint_t node_cnt,
+ memnode_phys_addr_map_t *memnode_info, lgrp_plat_latency_stats_t *lat_stats)
{
int i;
int j;
+ int src;
+ int dst;
int localities;
hrtime_t max;
hrtime_t min;
@@ -2284,8 +2645,6 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
return (2);
localities = tp->number;
- if (localities != node_cnt)
- return (3);
min = lat_stats->latency_min;
max = lat_stats->latency_max;
@@ -2295,11 +2654,21 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
*/
slit_entries = tp->entry;
for (i = 0; i < localities; i++) {
+ src = lgrp_plat_domain_to_node(node_domain,
+ node_cnt, i);
+ if (src == -1)
+ continue;
+
for (j = 0; j < localities; j++) {
uint8_t latency;
+ dst = lgrp_plat_domain_to_node(node_domain,
+ node_cnt, j);
+ if (dst == -1)
+ continue;
+
latency = slit_entries[(i * localities) + j];
- lat_stats->latencies[i][j] = latency;
+ lat_stats->latencies[src][dst] = latency;
if (latency < min || min == -1)
min = latency;
if (latency > max)
@@ -2310,7 +2679,7 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
/*
* Verify that latencies/distances given in SLIT look reasonable
*/
- retval = lgrp_plat_latency_verify(node_memory, lat_stats);
+ retval = lgrp_plat_latency_verify(memnode_info, lat_stats);
if (retval) {
/*
@@ -2334,20 +2703,110 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
/*
+ * Update lgrp latencies according to information returned by ACPI _SLI method.
+ */
+static int
+lgrp_plat_process_sli(uint32_t domain_id, uchar_t *sli_info,
+ uint32_t sli_cnt, node_domain_map_t *node_domain, uint_t node_cnt,
+ lgrp_plat_latency_stats_t *lat_stats)
+{
+ int i;
+ int src, dst;
+ uint8_t latency;
+ hrtime_t max, min;
+
+ if (lat_stats == NULL || sli_info == NULL ||
+ sli_cnt == 0 || domain_id >= sli_cnt)
+ return (-1);
+
+ src = lgrp_plat_domain_to_node(node_domain, node_cnt, domain_id);
+ if (src == -1) {
+ src = lgrp_plat_node_domain_update(node_domain, node_cnt,
+ domain_id);
+ if (src == -1)
+ return (-1);
+ }
+
+ /*
+ * Don't update latency info if topology has been flattened to 2 levels.
+ */
+ if (lgrp_plat_topo_flatten != 0) {
+ return (0);
+ }
+
+ /*
+ * Latency information for proximity domain is ready.
+ * TODO: support adjusting latency information at runtime.
+ */
+ if (lat_stats->latencies[src][src] != 0) {
+ return (0);
+ }
+
+ /* Validate latency information. */
+ for (i = 0; i < sli_cnt; i++) {
+ if (i == domain_id) {
+ if (sli_info[i] != ACPI_SLIT_SELF_LATENCY ||
+ sli_info[sli_cnt + i] != ACPI_SLIT_SELF_LATENCY) {
+ return (-1);
+ }
+ } else {
+ if (sli_info[i] <= ACPI_SLIT_SELF_LATENCY ||
+ sli_info[sli_cnt + i] <= ACPI_SLIT_SELF_LATENCY ||
+ sli_info[i] != sli_info[sli_cnt + i]) {
+ return (-1);
+ }
+ }
+ }
+
+ min = lat_stats->latency_min;
+ max = lat_stats->latency_max;
+ for (i = 0; i < sli_cnt; i++) {
+ dst = lgrp_plat_domain_to_node(node_domain, node_cnt, i);
+ if (dst == -1)
+ continue;
+
+ ASSERT(sli_info[i] == sli_info[sli_cnt + i]);
+
+ /* Update row in latencies matrix. */
+ latency = sli_info[i];
+ lat_stats->latencies[src][dst] = latency;
+ if (latency < min || min == -1)
+ min = latency;
+ if (latency > max)
+ max = latency;
+
+ /* Update column in latencies matrix. */
+ latency = sli_info[sli_cnt + i];
+ lat_stats->latencies[dst][src] = latency;
+ if (latency < min || min == -1)
+ min = latency;
+ if (latency > max)
+ max = latency;
+ }
+ lat_stats->latency_min = min;
+ lat_stats->latency_max = max;
+
+ return (0);
+}
+
+
+/*
* Read ACPI System Resource Affinity Table (SRAT) to determine which CPUs
* and memory are local to each other in the same NUMA node and return number
* of nodes
*/
static int
-lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
- node_domain_map_t *node_domain, cpu_node_map_t *cpu_node, int cpu_count,
- node_phys_addr_map_t *node_memory)
+lgrp_plat_process_srat(struct srat *tp, struct msct *mp,
+ uint32_t *prox_domain_min, node_domain_map_t *node_domain,
+ cpu_node_map_t *cpu_node, int cpu_count,
+ memnode_phys_addr_map_t *memnode_info)
{
struct srat_item *srat_end;
int i;
struct srat_item *item;
int node_cnt;
int proc_entry_count;
+ int rc;
/*
* Nothing to do when no SRAT or disabled
@@ -2356,11 +2815,21 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
return (-1);
/*
- * Determine number of nodes by counting number of proximity domains in
- * SRAT and return if number of nodes is 1 or less since don't need to
- * read SRAT then
+ * Try to get domain information from MSCT table.
+ * ACPI4.0: OSPM will use information provided by the MSCT only
+ * when the System Resource Affinity Table (SRAT) exists.
+ */
+ node_cnt = lgrp_plat_msct_domains(mp, prox_domain_min);
+ if (node_cnt <= 0) {
+ /*
+ * Determine number of nodes by counting number of proximity
+ * domains in SRAT.
+ */
+ node_cnt = lgrp_plat_srat_domains(tp, prox_domain_min);
+ }
+ /*
+ * Return if number of nodes is 1 or less since don't need to read SRAT.
*/
- node_cnt = lgrp_plat_srat_domains(tp, prox_domain_min);
if (node_cnt == 1)
return (1);
else if (node_cnt <= 0)
@@ -2397,16 +2866,17 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
}
apic_id = item->i.p.apic_id;
- if (lgrp_plat_cpu_node_update(node_domain, node_cnt,
- cpu_node, cpu_count, apic_id, domain) < 0)
+ rc = lgrp_plat_cpu_node_update(node_domain, node_cnt,
+ cpu_node, cpu_count, apic_id, domain);
+ if (rc < 0)
return (-3);
-
- proc_entry_count++;
+ else if (rc == 0)
+ proc_entry_count++;
break;
case SRAT_MEMORY: /* memory entry */
if (!(item->i.m.flags & SRAT_ENABLED) ||
- node_memory == NULL)
+ memnode_info == NULL)
break;
/*
@@ -2418,10 +2888,52 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
length = item->i.m.len;
end = start + length - 1;
- if (lgrp_plat_node_memory_update(node_domain, node_cnt,
- node_memory, start, end, domain) < 0)
+ /*
+ * According to ACPI 4.0, both ENABLE and HOTPLUG flags
+ * may be set for memory address range entries in SRAT
+ * table which are reserved for memory hot plug.
+ * We intersect memory address ranges in SRAT table
+ * with memory ranges in physinstalled to filter out
+ * memory address ranges reserved for hot plug.
+ */
+ if (item->i.m.flags & SRAT_HOT_PLUG) {
+ uint64_t rstart = UINT64_MAX;
+ uint64_t rend = 0;
+ struct memlist *ml;
+ extern struct bootops *bootops;
+
+ memlist_read_lock();
+ for (ml = bootops->boot_mem->physinstalled;
+ ml; ml = ml->ml_next) {
+ uint64_t tstart = ml->ml_address;
+ uint64_t tend;
+
+ tend = ml->ml_address + ml->ml_size;
+ if (tstart > end || tend < start)
+ continue;
+ if (start > tstart)
+ tstart = start;
+ if (rstart > tstart)
+ rstart = tstart;
+ if (end < tend)
+ tend = end;
+ if (rend < tend)
+ rend = tend;
+ }
+ memlist_read_unlock();
+ start = rstart;
+ end = rend;
+ /* Skip this entry if no memory installed. */
+ if (start > end)
+ break;
+ }
+
+ if (lgrp_plat_memnode_info_update(node_domain,
+ node_cnt, memnode_info, node_cnt,
+ start, end, domain, ACPI_MEMNODE_DEVID_BOOT) < 0)
return (-4);
break;
+
case SRAT_X2APIC: /* x2apic CPU entry */
if (!(item->i.xp.flags & SRAT_ENABLED) ||
cpu_node == NULL)
@@ -2434,11 +2946,12 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
domain = item->i.xp.domain;
apic_id = item->i.xp.x2apic_id;
- if (lgrp_plat_cpu_node_update(node_domain, node_cnt,
- cpu_node, cpu_count, apic_id, domain) < 0)
+ rc = lgrp_plat_cpu_node_update(node_domain, node_cnt,
+ cpu_node, cpu_count, apic_id, domain);
+ if (rc < 0)
return (-3);
-
- proc_entry_count++;
+ else if (rc == 0)
+ proc_entry_count++;
break;
default:
@@ -2460,7 +2973,7 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
* physical address
*/
lgrp_plat_node_sort(node_domain, node_cnt, cpu_node, cpu_count,
- node_memory);
+ memnode_info);
return (node_cnt);
}
@@ -2665,27 +3178,70 @@ lgrp_plat_srat_domains(struct srat *tp, uint32_t *prox_domain_min)
/*
+ * Parse domain information in ACPI Maximum System Capability Table (MSCT).
+ * MSCT table has been verified in function process_msct() in fakebop.c.
+ */
+static int
+lgrp_plat_msct_domains(struct msct *tp, uint32_t *prox_domain_min)
+{
+ int last_seen = 0;
+ uint32_t proxmin = UINT32_MAX;
+ struct msct_proximity_domain *item, *end;
+
+ if (tp == NULL || lgrp_plat_msct_enable == 0)
+ return (-1);
+
+ if (tp->maximum_proximity_domains >= MAX_NODES) {
+ cmn_err(CE_CONT,
+ "?lgrp: too many proximity domains (%d), max %d supported, "
+ "disable support of CPU/memory DR operations.",
+ tp->maximum_proximity_domains + 1, MAX_NODES);
+ plat_dr_disable_cpu();
+ plat_dr_disable_memory();
+ return (-1);
+ }
+
+ if (prox_domain_min != NULL) {
+ end = (void *)(tp->hdr.len + (uintptr_t)tp);
+ for (item = (void *)((uintptr_t)tp +
+ tp->proximity_domain_offset); item < end;
+ item = (void *)(item->length + (uintptr_t)item)) {
+ if (item->domain_min < proxmin) {
+ proxmin = item->domain_min;
+ }
+
+ last_seen = item->domain_max - item->domain_min + 1;
+ /*
+ * Break out if all proximity domains have been
+ * processed. Some BIOSes may have unused items
+ * at the end of MSCT table.
+ */
+ if (last_seen > tp->maximum_proximity_domains) {
+ break;
+ }
+ }
+ *prox_domain_min = proxmin;
+ }
+
+ return (tp->maximum_proximity_domains + 1);
+}
+
+
+/*
* Set lgroup latencies for 2 level lgroup topology
*/
static void
-lgrp_plat_2level_setup(node_phys_addr_map_t *node_memory,
- lgrp_plat_latency_stats_t *lat_stats)
+lgrp_plat_2level_setup(lgrp_plat_latency_stats_t *lat_stats)
{
- int i;
+ int i, j;
- ASSERT(node_memory != NULL && lat_stats != NULL);
+ ASSERT(lat_stats != NULL);
if (lgrp_plat_node_cnt >= 4)
cmn_err(CE_NOTE,
"MPO only optimizing for local and remote\n");
for (i = 0; i < lgrp_plat_node_cnt; i++) {
- int j;
-
- if (!node_memory[i].exists)
- continue;
for (j = 0; j < lgrp_plat_node_cnt; j++) {
- if (!node_memory[j].exists)
- continue;
if (i == j)
lat_stats->latencies[i][j] = 2;
else
@@ -2694,7 +3250,9 @@ lgrp_plat_2level_setup(node_phys_addr_map_t *node_memory,
}
lat_stats->latency_min = 2;
lat_stats->latency_max = 3;
+ /* TODO: check it. */
lgrp_config(LGRP_CONFIG_FLATTEN, 2, 0);
+ lgrp_plat_topo_flatten = 1;
}
@@ -2884,7 +3442,7 @@ is_opteron(void)
*/
static void
opt_get_numa_config(uint_t *node_cnt, int *mem_intrlv,
- node_phys_addr_map_t *node_memory)
+ memnode_phys_addr_map_t *memnode_info)
{
uint_t bus;
uint_t dev;
@@ -2999,10 +3557,10 @@ opt_get_numa_config(uint_t *node_cnt, int *mem_intrlv,
(base_lo & OPT_DRAMBASE_LO_MASK_WE) == 0) {
/*
* Mark node memory as non-existent and set start and
- * end addresses to be same in node_memory[]
+ * end addresses to be same in memnode_info[]
*/
- node_memory[node].exists = 0;
- node_memory[node].start = node_memory[node].end =
+ memnode_info[node].exists = 0;
+ memnode_info[node].start = memnode_info[node].end =
(pfn_t)-1;
continue;
}
@@ -3011,11 +3569,11 @@ opt_get_numa_config(uint_t *node_cnt, int *mem_intrlv,
* Mark node memory as existing and remember physical address
* range of each node for use later
*/
- node_memory[node].exists = 1;
+ memnode_info[node].exists = 1;
- node_memory[node].start = btop(OPT_DRAMADDR(base_hi, base_lo));
+ memnode_info[node].start = btop(OPT_DRAMADDR(base_hi, base_lo));
- node_memory[node].end = btop(OPT_DRAMADDR(limit_hi, limit_lo) |
+ memnode_info[node].end = btop(OPT_DRAMADDR(limit_hi, limit_lo) |
OPT_DRAMADDR_LO_MASK_OFF);
}
diff --git a/usr/src/uts/i86pc/os/machdep.c b/usr/src/uts/i86pc/os/machdep.c
index c9f97fc046..e9d08a9fa7 100644
--- a/usr/src/uts/i86pc/os/machdep.c
+++ b/usr/src/uts/i86pc/os/machdep.c
@@ -23,6 +23,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/t_lock.h>
@@ -147,6 +151,16 @@ int force_shutdown_method = AD_UNKNOWN;
char panicbuf[PANICBUFSIZE];
/*
+ * Flags to control Dynamic Reconfiguration features.
+ */
+uint64_t plat_dr_options;
+
+/*
+ * Maximum physical address for memory DR operations.
+ */
+uint64_t plat_dr_physmax;
+
+/*
* maxphys - used during physio
* klustsize - used for klustering by swapfs and specfs
*/
@@ -1390,3 +1404,33 @@ lbolt_softint_post(void)
{
(*setsoftint)(CBE_LOCK_PIL, lbolt_softint_hdl.ih_pending);
}
+
+boolean_t
+plat_dr_check_capability(uint64_t features)
+{
+ return ((plat_dr_options & features) == features);
+}
+
+boolean_t
+plat_dr_support_cpu(void)
+{
+ return (plat_dr_options & PLAT_DR_FEATURE_CPU);
+}
+
+boolean_t
+plat_dr_support_memory(void)
+{
+ return (plat_dr_options & PLAT_DR_FEATURE_MEMORY);
+}
+
+void
+plat_dr_enable_capability(uint64_t features)
+{
+ atomic_or_64(&plat_dr_options, features);
+}
+
+void
+plat_dr_disable_capability(uint64_t features)
+{
+ atomic_and_64(&plat_dr_options, ~features);
+}
diff --git a/usr/src/uts/i86pc/os/mem_config_arch.c b/usr/src/uts/i86pc/os/mem_config_arch.c
new file mode 100644
index 0000000000..191e8cb641
--- /dev/null
+++ b/usr/src/uts/i86pc/os/mem_config_arch.c
@@ -0,0 +1,56 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1997-1998 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/debug.h>
+#include <vm/page.h>
+#include <sys/mem_config.h>
+
+/*ARGSUSED*/
+int
+arch_kphysm_del_span_ok(pfn_t base, pgcnt_t npgs)
+{
+ ASSERT(npgs != 0);
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+arch_kphysm_relocate(pfn_t base, pgcnt_t npgs)
+{
+ ASSERT(npgs != 0);
+ return (ENOTSUP);
+}
+
+int
+arch_kphysm_del_supported(void)
+{
+ return (0);
+}
diff --git a/usr/src/uts/i86pc/os/memnode.c b/usr/src/uts/i86pc/os/memnode.c
index 908b2e8cd2..35185bcd84 100644
--- a/usr/src/uts/i86pc/os/memnode.c
+++ b/usr/src/uts/i86pc/os/memnode.c
@@ -84,7 +84,7 @@ mem_node_add_slice(pfn_t start, pfn_t end)
}
mnode = PFN_2_MEM_NODE(start);
- ASSERT(mnode < max_mem_nodes);
+ ASSERT(mnode >= 0 && mnode < max_mem_nodes);
if (cas32((uint32_t *)&mem_node_config[mnode].exists, 0, 1)) {
/*
@@ -130,7 +130,7 @@ mem_node_del_slice(pfn_t start, pfn_t end)
}
mnode = PFN_2_MEM_NODE(start);
- ASSERT(mnode < max_mem_nodes);
+ ASSERT(mnode >= 0 && mnode < max_mem_nodes);
ASSERT(mem_node_config[mnode].exists == 1);
delta_pgcnt = end - start;
diff --git a/usr/src/uts/i86pc/os/microcode.c b/usr/src/uts/i86pc/os/microcode.c
index 6f32a86c71..90d9585640 100644
--- a/usr/src/uts/i86pc/os/microcode.c
+++ b/usr/src/uts/i86pc/os/microcode.c
@@ -141,6 +141,7 @@ void
ucode_alloc_space(cpu_t *cp)
{
ASSERT(cp->cpu_id != 0);
+ ASSERT(cp->cpu_m.mcpu_ucode_info == NULL);
cp->cpu_m.mcpu_ucode_info =
kmem_zalloc(sizeof (*cp->cpu_m.mcpu_ucode_info), KM_SLEEP);
}
@@ -148,9 +149,11 @@ ucode_alloc_space(cpu_t *cp)
void
ucode_free_space(cpu_t *cp)
{
- ASSERT(cp->cpu_id != 0);
+ ASSERT(cp->cpu_m.mcpu_ucode_info != NULL);
+ ASSERT(cp->cpu_m.mcpu_ucode_info != &cpu_ucode_info0);
kmem_free(cp->cpu_m.mcpu_ucode_info,
sizeof (*cp->cpu_m.mcpu_ucode_info));
+ cp->cpu_m.mcpu_ucode_info = NULL;
}
/*
@@ -1126,7 +1129,10 @@ ucode_check(cpu_t *cp)
uint32_t new_rev = 0;
ASSERT(cp);
- if (cp->cpu_id == 0)
+ /*
+ * Space statically allocated for BSP, ensure pointer is set
+ */
+ if (cp->cpu_id == 0 && cp->cpu_m.mcpu_ucode_info == NULL)
cp->cpu_m.mcpu_ucode_info = &cpu_ucode_info0;
uinfop = cp->cpu_m.mcpu_ucode_info;
diff --git a/usr/src/uts/i86pc/os/mlsetup.c b/usr/src/uts/i86pc/os/mlsetup.c
index 3f8752638f..bf57dd3c50 100644
--- a/usr/src/uts/i86pc/os/mlsetup.c
+++ b/usr/src/uts/i86pc/os/mlsetup.c
@@ -22,6 +22,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/sysmacros.h>
@@ -107,6 +111,7 @@ mlsetup(struct regs *rp)
extern char t0stack[];
extern int post_fastreboot;
extern int console;
+ extern uint64_t plat_dr_options;
ASSERT_STACK_ALIGNED();
@@ -354,15 +359,70 @@ mlsetup(struct regs *rp)
prom_init("kernel", (void *)NULL);
- if (bootprop_getval("boot-ncpus", &prop_value) != 0)
+ /* User-set option overrides firmware value. */
+ if (bootprop_getval(PLAT_DR_OPTIONS_NAME, &prop_value) == 0) {
+ plat_dr_options = (uint64_t)prop_value;
+ }
+#if defined(__xpv)
+ /* No support of DR operations on xpv */
+ plat_dr_options = 0;
+#else /* __xpv */
+ /* Flag PLAT_DR_FEATURE_ENABLED should only be set by DR driver. */
+ plat_dr_options &= ~PLAT_DR_FEATURE_ENABLED;
+#ifndef __amd64
+ /* Only enable CPU/memory DR on 64 bits kernel. */
+ plat_dr_options &= ~PLAT_DR_FEATURE_MEMORY;
+ plat_dr_options &= ~PLAT_DR_FEATURE_CPU;
+#endif /* __amd64 */
+#endif /* __xpv */
+
+ /*
+ * Get value of "plat_dr_physmax" boot option.
+ * It overrides values calculated from MSCT or SRAT table.
+ */
+ if (bootprop_getval(PLAT_DR_PHYSMAX_NAME, &prop_value) == 0) {
+ plat_dr_physmax = ((uint64_t)prop_value) >> PAGESHIFT;
+ }
+
+ /* Get value of boot_ncpus. */
+ if (bootprop_getval(BOOT_NCPUS_NAME, &prop_value) != 0) {
boot_ncpus = NCPU;
- else {
+ } else {
boot_ncpus = (int)prop_value;
if (boot_ncpus <= 0 || boot_ncpus > NCPU)
boot_ncpus = NCPU;
}
- max_ncpus = boot_max_ncpus = boot_ncpus;
+ /*
+ * Set max_ncpus and boot_max_ncpus to boot_ncpus if platform doesn't
+ * support CPU DR operations.
+ */
+ if (plat_dr_support_cpu() == 0) {
+ max_ncpus = boot_max_ncpus = boot_ncpus;
+ } else {
+ if (bootprop_getval(PLAT_MAX_NCPUS_NAME, &prop_value) != 0) {
+ max_ncpus = NCPU;
+ } else {
+ max_ncpus = (int)prop_value;
+ if (max_ncpus <= 0 || max_ncpus > NCPU) {
+ max_ncpus = NCPU;
+ }
+ if (boot_ncpus > max_ncpus) {
+ boot_ncpus = max_ncpus;
+ }
+ }
+
+ if (bootprop_getval(BOOT_MAX_NCPUS_NAME, &prop_value) != 0) {
+ boot_max_ncpus = boot_ncpus;
+ } else {
+ boot_max_ncpus = (int)prop_value;
+ if (boot_max_ncpus <= 0 || boot_max_ncpus > NCPU) {
+ boot_max_ncpus = boot_ncpus;
+ } else if (boot_max_ncpus > max_ncpus) {
+ boot_max_ncpus = max_ncpus;
+ }
+ }
+ }
/*
* Initialize the lgrp framework
diff --git a/usr/src/uts/i86pc/os/mp_implfuncs.c b/usr/src/uts/i86pc/os/mp_implfuncs.c
index b5742b2c69..3bded8f124 100644
--- a/usr/src/uts/i86pc/os/mp_implfuncs.c
+++ b/usr/src/uts/i86pc/os/mp_implfuncs.c
@@ -22,12 +22,12 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+#define PSMI_1_7
#include <sys/vmem.h>
#include <vm/hat.h>
#include <sys/modctl.h>
#include <vm/seg_kmem.h>
-#define PSMI_1_6
#include <sys/psm.h>
#include <sys/psm_modctl.h>
#include <sys/smp_impldefs.h>
diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c
index 80e371850b..dc4023007c 100644
--- a/usr/src/uts/i86pc/os/mp_machdep.c
+++ b/usr/src/uts/i86pc/os/mp_machdep.c
@@ -24,11 +24,11 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
-#define PSMI_1_6
+#define PSMI_1_7
#include <sys/smp_impldefs.h>
#include <sys/psm.h>
#include <sys/psm_modctl.h>
@@ -157,6 +157,7 @@ void (*notify_error)(int, char *) = (void (*)(int, char *))return_instr;
void (*hrtime_tick)(void) = return_instr;
int (*psm_cpu_create_devinfo)(cpu_t *, dev_info_t **) = mach_cpu_create_devinfo;
+int (*psm_cpu_get_devinfo)(cpu_t *, dev_info_t **) = NULL;
/*
* True if the generic TSC code is our source of hrtime, rather than whatever
@@ -864,9 +865,17 @@ mach_get_platform(int owner)
total_ops = OFFSETOF(struct psm_ops, psm_preshutdown) /
sizeof (void (*)(void));
else if (mach_ver[owner] == (ushort_t)PSM_INFO_VER01_4)
- /* no psm_preshutdown function */
+ /* no psm_intr_ops function */
total_ops = OFFSETOF(struct psm_ops, psm_intr_ops) /
sizeof (void (*)(void));
+ else if (mach_ver[owner] == (ushort_t)PSM_INFO_VER01_5)
+ /* no psm_state function */
+ total_ops = OFFSETOF(struct psm_ops, psm_state) /
+ sizeof (void (*)(void));
+ else if (mach_ver[owner] == (ushort_t)PSM_INFO_VER01_6)
+ /* no psm_cpu_ops function */
+ total_ops = OFFSETOF(struct psm_ops, psm_cpu_ops) /
+ sizeof (void (*)(void));
else
total_ops = sizeof (struct psm_ops) / sizeof (void (*)(void));
@@ -1042,7 +1051,11 @@ mach_smpinit(void)
cpu_id = -1;
cpu_id = (*pops->psm_get_next_processorid)(cpu_id);
- for (cnt = 0; cpu_id != -1; cnt++) {
+ /*
+ * Only add boot_ncpus CPUs to mp_cpus. Other CPUs will be handled
+ * by CPU DR driver at runtime.
+ */
+ for (cnt = 0; cpu_id != -1 && cnt < boot_ncpus; cnt++) {
CPUSET_ADD(cpumask, cpu_id);
cpu_id = (*pops->psm_get_next_processorid)(cpu_id);
}
@@ -1092,7 +1105,7 @@ mach_smpinit(void)
psm_enable_intr = pops->psm_enable_intr;
/* check for multiple CPUs */
- if (cnt < 2)
+ if (cnt < 2 && plat_dr_support_cpu() == B_FALSE)
return;
/* check for MP platforms */
@@ -1480,6 +1493,63 @@ mach_cpuid_start(processorid_t id, void *ctx)
return ((*pops->psm_cpu_start)(id, ctx));
}
+int
+mach_cpu_stop(cpu_t *cp, void *ctx)
+{
+ struct psm_ops *pops = mach_set[0];
+ psm_cpu_request_t request;
+
+ if (pops->psm_cpu_ops == NULL) {
+ return (ENOTSUP);
+ }
+
+ ASSERT(cp->cpu_id != -1);
+ request.pcr_cmd = PSM_CPU_STOP;
+ request.req.cpu_stop.cpuid = cp->cpu_id;
+ request.req.cpu_stop.ctx = ctx;
+
+ return ((*pops->psm_cpu_ops)(&request));
+}
+
+int
+mach_cpu_add(mach_cpu_add_arg_t *argp, processorid_t *cpuidp)
+{
+ int rc;
+ struct psm_ops *pops = mach_set[0];
+ psm_cpu_request_t request;
+
+ if (pops->psm_cpu_ops == NULL) {
+ return (ENOTSUP);
+ }
+
+ request.pcr_cmd = PSM_CPU_ADD;
+ request.req.cpu_add.argp = argp;
+ request.req.cpu_add.cpuid = -1;
+ rc = (*pops->psm_cpu_ops)(&request);
+ if (rc == 0) {
+ ASSERT(request.req.cpu_add.cpuid != -1);
+ *cpuidp = request.req.cpu_add.cpuid;
+ }
+
+ return (rc);
+}
+
+int
+mach_cpu_remove(processorid_t cpuid)
+{
+ struct psm_ops *pops = mach_set[0];
+ psm_cpu_request_t request;
+
+ if (pops->psm_cpu_ops == NULL) {
+ return (ENOTSUP);
+ }
+
+ request.pcr_cmd = PSM_CPU_REMOVE;
+ request.req.cpu_remove.cpuid = cpuid;
+
+ return ((*pops->psm_cpu_ops)(&request));
+}
+
/*
* Default handler to create device node for CPU.
* One reference count will be held on created device node.
@@ -1569,6 +1639,24 @@ mach_cpu_create_device_node(struct cpu *cp, dev_info_t **dipp)
return (rv);
}
+/*
+ * The dipp contains one of following values on return:
+ * - NULL if no device node found
+ * - pointer to device node if found
+ */
+int
+mach_cpu_get_device_node(struct cpu *cp, dev_info_t **dipp)
+{
+ *dipp = NULL;
+ if (psm_cpu_get_devinfo != NULL) {
+ if (psm_cpu_get_devinfo(cp, dipp) == PSM_SUCCESS) {
+ return (PSM_SUCCESS);
+ }
+ }
+
+ return (PSM_FAILURE);
+}
+
/*ARGSUSED*/
static int
mach_translate_irq(dev_info_t *dip, int irqno)
diff --git a/usr/src/uts/i86pc/os/mp_pc.c b/usr/src/uts/i86pc/os/mp_pc.c
index 6fc571b445..bc21812187 100644
--- a/usr/src/uts/i86pc/os/mp_pc.c
+++ b/usr/src/uts/i86pc/os/mp_pc.c
@@ -22,6 +22,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* Welcome to the world of the "real mode platter".
@@ -31,6 +35,7 @@
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/cpuvar.h>
+#include <sys/cpu_module.h>
#include <sys/kmem.h>
#include <sys/archsystm.h>
#include <sys/machsystm.h>
@@ -41,10 +46,21 @@
#include <sys/mach_mmu.h>
#include <sys/promif.h>
#include <sys/cpu.h>
+#include <sys/cpu_event.h>
+#include <sys/sunndi.h>
+#include <sys/fs/dv_node.h>
#include <vm/hat_i86.h>
+#include <vm/as.h>
+
+extern cpuset_t cpu_ready_set;
-extern void real_mode_start(void);
-extern void real_mode_end(void);
+extern int mp_start_cpu_common(cpu_t *cp, boolean_t boot);
+extern void real_mode_start_cpu(void);
+extern void real_mode_start_cpu_end(void);
+extern void real_mode_stop_cpu_stage1(void);
+extern void real_mode_stop_cpu_stage1_end(void);
+extern void real_mode_stop_cpu_stage2(void);
+extern void real_mode_stop_cpu_stage2_end(void);
extern void *(*cpu_pause_func)(void *);
void rmp_gdt_init(rm_platter_t *);
@@ -61,22 +77,36 @@ int
mach_cpucontext_init(void)
{
ushort_t *vec;
+ ulong_t addr;
+ struct rm_platter *rm = (struct rm_platter *)rm_platter_va;
if (!(vec = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
sizeof (vec), PROT_READ | PROT_WRITE)))
return (-1);
+
/*
* setup secondary cpu bios boot up vector
+ * Write page offset to 0x467 and page frame number to 0x469.
*/
- *vec = (ushort_t)((caddr_t)
- ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
- + ((ulong_t)rm_platter_va & 0xf));
- vec[1] = (ushort_t)(rm_platter_pa >> 4);
+ addr = (ulong_t)((caddr_t)rm->rm_code - (caddr_t)rm) + rm_platter_pa;
+ vec[0] = (ushort_t)(addr & PAGEOFFSET);
+ vec[1] = (ushort_t)((addr & (0xfffff & PAGEMASK)) >> 4);
warm_reset_vector = vec;
- bcopy((caddr_t)real_mode_start,
- (caddr_t)((rm_platter_t *)rm_platter_va)->rm_code,
- (size_t)real_mode_end - (size_t)real_mode_start);
+ /* Map real mode platter into kas so kernel can access it. */
+ hat_devload(kas.a_hat,
+ (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
+ btop(rm_platter_pa), PROT_READ | PROT_WRITE | PROT_EXEC,
+ HAT_LOAD_NOCONSIST);
+
+ /* Copy CPU startup code to rm_platter if it's still during boot. */
+ if (!plat_dr_enabled()) {
+ ASSERT((size_t)real_mode_start_cpu_end -
+ (size_t)real_mode_start_cpu <= RM_PLATTER_CODE_SIZE);
+ bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
+ (size_t)real_mode_start_cpu_end -
+ (size_t)real_mode_start_cpu);
+ }
return (0);
}
@@ -95,12 +125,53 @@ mach_cpucontext_fini(void)
extern void *long_mode_64(void);
#endif /* __amd64 */
-void *
-mach_cpucontext_alloc(struct cpu *cp)
+/*ARGSUSED*/
+void
+rmp_gdt_init(rm_platter_t *rm)
+{
+
+#if defined(__amd64)
+ /* Use the kas address space for the CPU startup thread. */
+ if (MAKECR3(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL)
+ panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
+ "located above 4G in physical memory (@ 0x%lx)",
+ MAKECR3(kas.a_hat->hat_htable->ht_pfn));
+
+ /*
+ * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
+ * by code in real_mode_start_cpu():
+ *
+ * GDT[0]: NULL selector
+ * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
+ *
+ * Clear the IDT as interrupts will be off and a limit of 0 will cause
+ * the CPU to triple fault and reset on an NMI, seemingly as reasonable
+ * a course of action as any other, though it may cause the entire
+ * platform to reset in some cases...
+ */
+ rm->rm_temp_gdt[0] = 0;
+ rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
+
+ rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
+ rm->rm_temp_gdt_base = rm_platter_pa +
+ (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
+ rm->rm_temp_idt_lim = 0;
+ rm->rm_temp_idt_base = 0;
+
+ /*
+ * Since the CPU needs to jump to protected mode using an identity
+ * mapped address, we need to calculate it here.
+ */
+ rm->rm_longmode64_addr = rm_platter_pa +
+ ((uint32_t)long_mode_64 - (uint32_t)real_mode_start_cpu);
+#endif /* __amd64 */
+}
+
+static void *
+mach_cpucontext_alloc_tables(struct cpu *cp)
{
- rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
- struct cpu_tables *ct;
struct tss *ntss;
+ struct cpu_tables *ct;
/*
* Allocate space for stack, tss, gdt and idt. We round the size
@@ -109,7 +180,8 @@ mach_cpucontext_alloc(struct cpu *cp)
*/
ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP);
if ((uintptr_t)ct & PAGEOFFSET)
- panic("mp_startup_init: cpu%d misaligned tables", cp->cpu_id);
+ panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables",
+ cp->cpu_id);
ntss = cp->cpu_tss = &ct->ct_tss;
@@ -149,93 +221,174 @@ mach_cpucontext_alloc(struct cpu *cp)
set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
sizeof (*cp->cpu_tss) - 1, SDT_SYSTSS, SEL_KPL);
+ return (ct);
+}
+
+void *
+mach_cpucontext_xalloc(struct cpu *cp, int optype)
+{
+ size_t len;
+ struct cpu_tables *ct;
+ rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
+ static int cpu_halt_code_ready;
+
+ if (optype == MACH_CPUCONTEXT_OP_STOP) {
+ ASSERT(plat_dr_enabled());
+
+ /*
+ * The WARM_RESET_VECTOR has a limitation that the physical
+ * address written to it must be page-aligned. To work around
+ * this limitation, the CPU stop code has been splitted into
+ * two stages.
+ * The stage 2 code, which implements the real logic to halt
+ * CPUs, is copied to the rm_cpu_halt_code field in the real
+ * mode platter. The stage 1 code, which simply jumps to the
+ * stage 2 code in the rm_cpu_halt_code field, is copied to
+ * rm_code field in the real mode platter and it may be
+ * overwritten after the CPU has been stopped.
+ */
+ if (!cpu_halt_code_ready) {
+ /*
+ * The rm_cpu_halt_code field in the real mode platter
+ * is used by the CPU stop code only. So only copy the
+ * CPU stop stage 2 code into the rm_cpu_halt_code
+ * field on the first call.
+ */
+ len = (size_t)real_mode_stop_cpu_stage2_end -
+ (size_t)real_mode_stop_cpu_stage2;
+ ASSERT(len <= RM_PLATTER_CPU_HALT_CODE_SIZE);
+ bcopy((caddr_t)real_mode_stop_cpu_stage2,
+ (caddr_t)rm->rm_cpu_halt_code, len);
+ cpu_halt_code_ready = 1;
+ }
+
+ /*
+ * The rm_code field in the real mode platter is shared by
+ * the CPU start, CPU stop, CPR and fast reboot code. So copy
+ * the CPU stop stage 1 code into the rm_code field every time.
+ */
+ len = (size_t)real_mode_stop_cpu_stage1_end -
+ (size_t)real_mode_stop_cpu_stage1;
+ ASSERT(len <= RM_PLATTER_CODE_SIZE);
+ bcopy((caddr_t)real_mode_stop_cpu_stage1,
+ (caddr_t)rm->rm_code, len);
+ rm->rm_cpu_halted = 0;
+
+ return (cp->cpu_m.mcpu_mach_ctx_ptr);
+ } else if (optype != MACH_CPUCONTEXT_OP_START) {
+ return (NULL);
+ }
+
+ /*
+ * Only need to allocate tables when starting CPU.
+ * Tables allocated when starting CPU will be reused when stopping CPU.
+ */
+ ct = mach_cpucontext_alloc_tables(cp);
+ if (ct == NULL) {
+ return (NULL);
+ }
+
+ /* Copy CPU startup code to rm_platter for CPU hot-add operations. */
+ if (plat_dr_enabled()) {
+ bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
+ (size_t)real_mode_start_cpu_end -
+ (size_t)real_mode_start_cpu);
+ }
+
/*
* Now copy all that we've set up onto the real mode platter
* for the real mode code to digest as part of starting the cpu.
*/
-
rm->rm_idt_base = cp->cpu_idt;
rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1;
rm->rm_gdt_base = cp->cpu_gdt;
rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1;
- rm->rm_pdbr = getcr3();
+ /*
+ * CPU needs to access kernel address space after powering on.
+ * When hot-adding CPU at runtime, directly use top level page table
+ * of kas other than the return value of getcr3(). getcr3() returns
+ * current process's top level page table, which may be different from
+ * the one of kas.
+ */
+ rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn);
rm->rm_cpu = cp->cpu_id;
rm->rm_x86feature = x86_feature;
+
+ /*
+ * For hot-adding CPU at runtime, Machine Check and Performance Counter
+ * should be disabled. They will be enabled on demand after CPU powers
+ * on successfully
+ */
rm->rm_cr4 = getcr4();
+ rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE);
rmp_gdt_init(rm);
return (ct);
}
-/*ARGSUSED*/
void
-rmp_gdt_init(rm_platter_t *rm)
+mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype)
{
+ struct cpu_tables *ct = arg;
-#if defined(__amd64)
-
- if (getcr3() > 0xffffffffUL)
- panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
- "located above 4G in physical memory (@ 0x%lx)", getcr3());
-
- /*
- * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
- * by code in real_mode_start():
- *
- * GDT[0]: NULL selector
- * GDT[1]: 64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
- *
- * Clear the IDT as interrupts will be off and a limit of 0 will cause
- * the CPU to triple fault and reset on an NMI, seemingly as reasonable
- * a course of action as any other, though it may cause the entire
- * platform to reset in some cases...
- */
- rm->rm_temp_gdt[0] = 0;
- rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
-
- rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
- rm->rm_temp_gdt_base = rm_platter_pa +
- (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
- rm->rm_temp_idt_lim = 0;
- rm->rm_temp_idt_base = 0;
+ ASSERT(&ct->ct_tss == cp->cpu_tss);
+ if (optype == MACH_CPUCONTEXT_OP_START) {
+ switch (err) {
+ case 0:
+ /*
+ * Save pointer for reuse when stopping CPU.
+ */
+ cp->cpu_m.mcpu_mach_ctx_ptr = arg;
+ break;
+ case ETIMEDOUT:
+ /*
+ * The processor was poked, but failed to start before
+ * we gave up waiting for it. In case it starts later,
+ * don't free anything.
+ */
+ cp->cpu_m.mcpu_mach_ctx_ptr = arg;
+ break;
+ default:
+ /*
+ * Some other, passive, error occurred.
+ */
+ kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
+ cp->cpu_tss = NULL;
+ break;
+ }
+ } else if (optype == MACH_CPUCONTEXT_OP_STOP) {
+ switch (err) {
+ case 0:
+ /*
+ * Free resources allocated when starting CPU.
+ */
+ kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
+ cp->cpu_tss = NULL;
+ cp->cpu_m.mcpu_mach_ctx_ptr = NULL;
+ break;
+ default:
+ /*
+ * Don't touch table pointer in case of failure.
+ */
+ break;
+ }
+ } else {
+ ASSERT(0);
+ }
+}
- /*
- * Since the CPU needs to jump to protected mode using an identity
- * mapped address, we need to calculate it here.
- */
- rm->rm_longmode64_addr = rm_platter_pa +
- ((uint32_t)long_mode_64 - (uint32_t)real_mode_start);
-#endif /* __amd64 */
+void *
+mach_cpucontext_alloc(struct cpu *cp)
+{
+ return (mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_START));
}
-/*ARGSUSED*/
void
mach_cpucontext_free(struct cpu *cp, void *arg, int err)
{
- struct cpu_tables *ct = arg;
-
- ASSERT(&ct->ct_tss == cp->cpu_tss);
-
- switch (err) {
- case 0:
- break;
- case ETIMEDOUT:
- /*
- * The processor was poked, but failed to start before
- * we gave up waiting for it. In case it starts later,
- * don't free anything.
- */
- break;
- default:
- /*
- * Some other, passive, error occurred.
- */
- kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
- cp->cpu_tss = NULL;
- break;
- }
+ mach_cpucontext_xfree(cp, arg, err, MACH_CPUCONTEXT_OP_START);
}
/*
@@ -276,25 +429,221 @@ mach_cpu_pause(volatile char *safe)
}
/*
- * Power on CPU.
+ * Power on the target CPU.
*/
-/*ARGSUSED*/
int
mp_cpu_poweron(struct cpu *cp)
{
- ASSERT(MUTEX_HELD(&cpu_lock));
- return (ENOTSUP); /* not supported */
+ int error;
+ cpuset_t tempset;
+ processorid_t cpuid;
+
+ ASSERT(cp != NULL);
+ cpuid = cp->cpu_id;
+ if (use_mp == 0 || plat_dr_support_cpu() == 0) {
+ return (ENOTSUP);
+ } else if (cpuid < 0 || cpuid >= max_ncpus) {
+ return (EINVAL);
+ }
+
+ /*
+ * The currrent x86 implementaiton of mp_cpu_configure() and
+ * mp_cpu_poweron() have a limitation that mp_cpu_poweron() could only
+ * be called once after calling mp_cpu_configure() for a specific CPU.
+ * It's because mp_cpu_poweron() will destroy data structure created
+ * by mp_cpu_configure(). So reject the request if the CPU has already
+ * been powered on once after calling mp_cpu_configure().
+ * This limitaiton only affects the p_online syscall and the DR driver
+ * won't be affected because the DR driver always invoke public CPU
+ * management interfaces in the predefined order:
+ * cpu_configure()->cpu_poweron()...->cpu_poweroff()->cpu_unconfigure()
+ */
+ if (cpuid_checkpass(cp, 4) || cp->cpu_thread == cp->cpu_idle_thread) {
+ return (ENOTSUP);
+ }
+
+ /*
+ * Check if there's at least a Mbyte of kmem available
+ * before attempting to start the cpu.
+ */
+ if (kmem_avail() < 1024 * 1024) {
+ /*
+ * Kick off a reap in case that helps us with
+ * later attempts ..
+ */
+ kmem_reap();
+ return (ENOMEM);
+ }
+
+ affinity_set(CPU->cpu_id);
+
+ /*
+ * Start the target CPU. No need to call mach_cpucontext_fini()
+ * if mach_cpucontext_init() fails.
+ */
+ if ((error = mach_cpucontext_init()) == 0) {
+ error = mp_start_cpu_common(cp, B_FALSE);
+ mach_cpucontext_fini();
+ }
+ if (error != 0) {
+ affinity_clear();
+ return (error);
+ }
+
+ /* Wait for the target cpu to reach READY state. */
+ tempset = cpu_ready_set;
+ while (!CPU_IN_SET(tempset, cpuid)) {
+ delay(1);
+ tempset = *((volatile cpuset_t *)&cpu_ready_set);
+ }
+
+ /* Mark the target CPU as available for mp operation. */
+ CPUSET_ATOMIC_ADD(mp_cpus, cpuid);
+
+ /* Free the space allocated to hold the microcode file */
+ ucode_cleanup();
+
+ affinity_clear();
+
+ return (0);
+}
+
+#define MP_CPU_DETACH_MAX_TRIES 5
+#define MP_CPU_DETACH_DELAY 100
+
+static int
+mp_cpu_detach_driver(dev_info_t *dip)
+{
+ int i;
+ int rv = EBUSY;
+ dev_info_t *pdip;
+
+ pdip = ddi_get_parent(dip);
+ ASSERT(pdip != NULL);
+ /*
+ * Check if caller holds pdip busy - can cause deadlocks in
+ * e_ddi_branch_unconfigure(), which calls devfs_clean().
+ */
+ if (DEVI_BUSY_OWNED(pdip)) {
+ return (EDEADLOCK);
+ }
+
+ for (i = 0; i < MP_CPU_DETACH_MAX_TRIES; i++) {
+ if (e_ddi_branch_unconfigure(dip, NULL, 0) == 0) {
+ rv = 0;
+ break;
+ }
+ DELAY(MP_CPU_DETACH_DELAY);
+ }
+
+ return (rv);
}
/*
- * Power off CPU.
+ * Power off the target CPU.
+ * Note: cpu_lock will be released and then reacquired.
*/
-/*ARGSUSED*/
int
mp_cpu_poweroff(struct cpu *cp)
{
- ASSERT(MUTEX_HELD(&cpu_lock));
- return (ENOTSUP); /* not supported */
+ int rv = 0;
+ void *ctx;
+ dev_info_t *dip = NULL;
+ rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
+ extern void cpupm_start(cpu_t *);
+ extern void cpupm_stop(cpu_t *);
+
+ ASSERT(cp != NULL);
+ ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
+ ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
+
+ if (use_mp == 0 || plat_dr_support_cpu() == 0) {
+ return (ENOTSUP);
+ }
+ /*
+ * There is no support for powering off cpu0 yet.
+ * There are many pieces of code which have a hard dependency on cpu0.
+ */
+ if (cp->cpu_id == 0) {
+ return (ENOTSUP);
+ };
+
+ if (mach_cpu_get_device_node(cp, &dip) != PSM_SUCCESS) {
+ return (ENXIO);
+ }
+ ASSERT(dip != NULL);
+ if (mp_cpu_detach_driver(dip) != 0) {
+ rv = EBUSY;
+ goto out_online;
+ }
+
+ /* Allocate CPU context for stopping */
+ if (mach_cpucontext_init() != 0) {
+ rv = ENXIO;
+ goto out_online;
+ }
+ ctx = mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_STOP);
+ if (ctx == NULL) {
+ rv = ENXIO;
+ goto out_context_fini;
+ }
+
+ cpupm_stop(cp);
+ cpu_event_fini_cpu(cp);
+
+ if (cp->cpu_m.mcpu_cmi_hdl != NULL) {
+ cmi_fini(cp->cpu_m.mcpu_cmi_hdl);
+ cp->cpu_m.mcpu_cmi_hdl = NULL;
+ }
+
+ rv = mach_cpu_stop(cp, ctx);
+ if (rv != 0) {
+ goto out_enable_cmi;
+ }
+
+ /* Wait until the target CPU has been halted. */
+ while (*(volatile ushort_t *)&(rm->rm_cpu_halted) != 0xdead) {
+ delay(1);
+ }
+ rm->rm_cpu_halted = 0xffff;
+
+ /* CPU_READY has been cleared by mach_cpu_stop. */
+ ASSERT((cp->cpu_flags & CPU_READY) == 0);
+ ASSERT((cp->cpu_flags & CPU_RUNNING) == 0);
+ cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
+ CPUSET_ATOMIC_DEL(mp_cpus, cp->cpu_id);
+
+ mach_cpucontext_xfree(cp, ctx, 0, MACH_CPUCONTEXT_OP_STOP);
+ mach_cpucontext_fini();
+
+ return (0);
+
+out_enable_cmi:
+ {
+ cmi_hdl_t hdl;
+
+ if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
+ cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) {
+ if (x86_feature & X86_MCA)
+ cmi_mca_init(hdl);
+ cp->cpu_m.mcpu_cmi_hdl = hdl;
+ }
+ }
+ cpu_event_init_cpu(cp);
+ cpupm_start(cp);
+ mach_cpucontext_xfree(cp, ctx, rv, MACH_CPUCONTEXT_OP_STOP);
+
+out_context_fini:
+ mach_cpucontext_fini();
+
+out_online:
+ (void) e_ddi_branch_configure(dip, NULL, 0);
+
+ if (rv != EAGAIN && rv != ETIME) {
+ rv = ENXIO;
+ }
+
+ return (rv);
}
/*
diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c
index 0ba377f67c..146f879a66 100644
--- a/usr/src/uts/i86pc/os/mp_startup.c
+++ b/usr/src/uts/i86pc/os/mp_startup.c
@@ -23,10 +23,15 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/thread.h>
#include <sys/cpuvar.h>
+#include <sys/cpu.h>
#include <sys/t_lock.h>
#include <sys/param.h>
#include <sys/proc.h>
@@ -34,6 +39,7 @@
#include <sys/class.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
+#include <sys/note.h>
#include <sys/asm_linkage.h>
#include <sys/x_call.h>
#include <sys/systm.h>
@@ -60,6 +66,7 @@
#include <sys/reboot.h>
#include <sys/kdi_machimpl.h>
#include <vm/hat_i86.h>
+#include <vm/vm_dep.h>
#include <sys/memnode.h>
#include <sys/pci_cfgspace.h>
#include <sys/mach_mmu.h>
@@ -71,8 +78,11 @@
struct cpu cpus[1]; /* CPU data */
struct cpu *cpu[NCPU] = {&cpus[0]}; /* pointers to all CPUs */
+struct cpu *cpu_free_list; /* list for released CPUs */
cpu_core_t cpu_core[NCPU]; /* cpu_core structures */
+#define cpu_next_free cpu_prev
+
/*
* Useful for disabling MP bring-up on a MP capable system.
*/
@@ -94,7 +104,8 @@ int flushes_require_xcalls;
cpuset_t cpu_ready_set; /* initialized in startup() */
-static void mp_startup(void);
+static void mp_startup_boot(void);
+static void mp_startup_hotplug(void);
static void cpu_sep_enable(void);
static void cpu_sep_disable(void);
@@ -108,7 +119,6 @@ void
init_cpu_info(struct cpu *cp)
{
processor_info_t *pi = &cp->cpu_type_info;
- char buf[CPU_IDSTRLEN];
/*
* Get clock-frequency property for the CPU.
@@ -131,14 +141,18 @@ init_cpu_info(struct cpu *cp)
if (fpu_exists)
(void) strcpy(pi->pi_fputypes, "i387 compatible");
- (void) cpuid_getidstr(cp, buf, sizeof (buf));
+ cp->cpu_idstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP);
+ cp->cpu_brandstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP);
- cp->cpu_idstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
- (void) strcpy(cp->cpu_idstr, buf);
-
- (void) cpuid_getbrandstr(cp, buf, sizeof (buf));
- cp->cpu_brandstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
- (void) strcpy(cp->cpu_brandstr, buf);
+ /*
+ * If called for the BSP, cp is equal to current CPU.
+ * For non-BSPs, cpuid info of cp is not ready yet, so use cpuid info
+ * of current CPU as default values for cpu_idstr and cpu_brandstr.
+ * They will be corrected in mp_startup_common() after cpuid_pass1()
+ * has been invoked on target CPU.
+ */
+ (void) cpuid_getidstr(CPU, cp->cpu_idstr, CPU_IDSTRLEN);
+ (void) cpuid_getbrandstr(CPU, cp->cpu_brandstr, CPU_IDSTRLEN);
}
/*
@@ -228,9 +242,11 @@ init_cpu_syscall(struct cpu *cp)
*
* Allocate and initialize the cpu structure, TRAPTRACE buffer, and the
* startup and idle threads for the specified CPU.
+ * Parameter boot is true for boot time operations and is false for CPU
+ * DR operations.
*/
-struct cpu *
-mp_startup_init(int cpun)
+static struct cpu *
+mp_cpu_configure_common(int cpun, boolean_t boot)
{
struct cpu *cp;
kthread_id_t tp;
@@ -247,27 +263,25 @@ mp_startup_init(int cpun)
trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun];
#endif
+ ASSERT(MUTEX_HELD(&cpu_lock));
ASSERT(cpun < NCPU && cpu[cpun] == NULL);
- cp = kmem_zalloc(sizeof (*cp), KM_SLEEP);
-#if !defined(__xpv)
- if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) {
- cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(CPU);
- cp->cpu_m.mcpu_idle_cpu = cpu_idle_mwait;
- } else
-#endif
- cp->cpu_m.mcpu_idle_cpu = cpu_idle;
+ if (cpu_free_list == NULL) {
+ cp = kmem_zalloc(sizeof (*cp), KM_SLEEP);
+ } else {
+ cp = cpu_free_list;
+ cpu_free_list = cp->cpu_next_free;
+ }
cp->cpu_m.mcpu_istamp = cpun << 16;
- procp = curthread->t_procp;
+ /* Create per CPU specific threads in the process p0. */
+ procp = &p0;
- mutex_enter(&cpu_lock);
/*
* Initialize the dispatcher first.
*/
disp_cpu_init(cp);
- mutex_exit(&cpu_lock);
cpu_vm_data_init(cp);
@@ -294,14 +308,21 @@ mp_startup_init(int cpun)
tp->t_disp_queue = cp->cpu_disp;
/*
- * Setup thread to start in mp_startup.
+ * Setup thread to start in mp_startup_common.
*/
sp = tp->t_stk;
- tp->t_pc = (uintptr_t)mp_startup;
tp->t_sp = (uintptr_t)(sp - MINFRAME);
#if defined(__amd64)
tp->t_sp -= STACK_ENTRY_ALIGN; /* fake a call */
#endif
+ /*
+ * Setup thread start entry point for boot or hotplug.
+ */
+ if (boot) {
+ tp->t_pc = (uintptr_t)mp_startup_boot;
+ } else {
+ tp->t_pc = (uintptr_t)mp_startup_hotplug;
+ }
cp->cpu_id = cpun;
cp->cpu_self = cp;
@@ -312,12 +333,13 @@ mp_startup_init(int cpun)
/*
* cpu_base_spl must be set explicitly here to prevent any blocking
- * operations in mp_startup from causing the spl of the cpu to drop
- * to 0 (allowing device interrupts before we're ready) in resume().
+ * operations in mp_startup_common from causing the spl of the cpu
+ * to drop to 0 (allowing device interrupts before we're ready) in
+ * resume().
* cpu_base_spl MUST remain at LOCK_LEVEL until the cpu is CPU_READY.
* As an extra bit of security on DEBUG kernels, this is enforced with
- * an assertion in mp_startup() -- before cpu_base_spl is set to its
- * proper value.
+ * an assertion in mp_startup_common() -- before cpu_base_spl is set
+ * to its proper value.
*/
cp->cpu_base_spl = ipltospl(LOCK_LEVEL);
@@ -392,6 +414,15 @@ mp_startup_init(int cpun)
* alloc space for cpuid info
*/
cpuid_alloc_space(cp);
+#if !defined(__xpv)
+ if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) {
+ cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(cp);
+ cp->cpu_m.mcpu_idle_cpu = cpu_idle_mwait;
+ } else
+#endif
+ cp->cpu_m.mcpu_idle_cpu = cpu_idle;
+
+ init_cpu_info(cp);
/*
* alloc space for ucode_info
@@ -408,31 +439,34 @@ mp_startup_init(int cpun)
ttc->ttc_next = ttc->ttc_first;
ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize;
#endif
+
/*
* Record that we have another CPU.
*/
- mutex_enter(&cpu_lock);
/*
* Initialize the interrupt threads for this CPU
*/
cpu_intr_alloc(cp, NINTR_THREADS);
+
+ cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
+ cpu_set_state(cp);
+
/*
* Add CPU to list of available CPUs. It'll be on the active list
- * after mp_startup().
+ * after mp_startup_common().
*/
cpu_add_unit(cp);
- mutex_exit(&cpu_lock);
return (cp);
}
/*
- * Undo what was done in mp_startup_init
+ * Undo what was done in mp_cpu_configure_common
*/
static void
-mp_startup_fini(struct cpu *cp, int error)
+mp_cpu_unconfigure_common(struct cpu *cp, int error)
{
- mutex_enter(&cpu_lock);
+ ASSERT(MUTEX_HELD(&cpu_lock));
/*
* Remove the CPU from the list of available CPUs.
@@ -451,7 +485,6 @@ mp_startup_fini(struct cpu *cp, int error)
* leave the fundamental data structures intact.
*/
cp->cpu_flags = 0;
- mutex_exit(&cpu_lock);
return;
}
@@ -468,8 +501,7 @@ mp_startup_fini(struct cpu *cp, int error)
*/
segkp_release(segkp,
cp->cpu_intr_stack - (INTR_STACK_SIZE - SA(MINFRAME)));
-
- mutex_exit(&cpu_lock);
+ cp->cpu_intr_stack = NULL;
#ifdef TRAPTRACE
/*
@@ -485,10 +517,26 @@ mp_startup_fini(struct cpu *cp, int error)
hat_cpu_offline(cp);
- cpuid_free_space(cp);
-
ucode_free_space(cp);
+ /* Free CPU ID string and brand string. */
+ if (cp->cpu_idstr) {
+ kmem_free(cp->cpu_idstr, CPU_IDSTRLEN);
+ cp->cpu_idstr = NULL;
+ }
+ if (cp->cpu_brandstr) {
+ kmem_free(cp->cpu_brandstr, CPU_IDSTRLEN);
+ cp->cpu_brandstr = NULL;
+ }
+
+#if !defined(__xpv)
+ if (cp->cpu_m.mcpu_mwait != NULL) {
+ cpuid_mwait_free(cp);
+ cp->cpu_m.mcpu_mwait = NULL;
+ }
+#endif
+ cpuid_free_space(cp);
+
if (cp->cpu_idt != CPU->cpu_idt)
kmem_free(cp->cpu_idt, PAGESIZE);
cp->cpu_idt = NULL;
@@ -496,6 +544,12 @@ mp_startup_fini(struct cpu *cp, int error)
kmem_free(cp->cpu_gdt, PAGESIZE);
cp->cpu_gdt = NULL;
+ if (cp->cpu_supp_freqs != NULL) {
+ size_t len = strlen(cp->cpu_supp_freqs) + 1;
+ kmem_free(cp->cpu_supp_freqs, len);
+ cp->cpu_supp_freqs = NULL;
+ }
+
teardown_vaddr_for_ppcopy(cp);
kcpc_hw_fini(cp);
@@ -505,15 +559,13 @@ mp_startup_fini(struct cpu *cp, int error)
cpu_vm_data_destroy(cp);
- mutex_enter(&cpu_lock);
+ xc_fini_cpu(cp);
disp_cpu_fini(cp);
- mutex_exit(&cpu_lock);
-#if !defined(__xpv)
- if (cp->cpu_m.mcpu_mwait != NULL)
- cpuid_mwait_free(cp);
-#endif
- kmem_free(cp, sizeof (*cp));
+ ASSERT(cp != CPU0);
+ bzero(cp, sizeof (*cp));
+ cp->cpu_next_free = cpu_free_list;
+ cpu_free_list = cp;
}
/*
@@ -529,8 +581,8 @@ mp_startup_fini(struct cpu *cp, int error)
* system.
*
* workaround_errata is invoked early in mlsetup() for CPU 0, and in
- * mp_startup() for all slave CPUs. Slaves process workaround_errata prior
- * to acknowledging their readiness to the master, so this routine will
+ * mp_startup_common() for all slave CPUs. Slaves process workaround_errata
+ * prior to acknowledging their readiness to the master, so this routine will
* never be executed by multiple CPUs in parallel, thus making updates to
* global data safe.
*
@@ -1214,77 +1266,82 @@ workaround_errata_end()
#endif
}
-static cpuset_t procset;
-
/*
- * Start a single cpu, assuming that the kernel context is available
- * to successfully start another cpu.
- *
- * (For example, real mode code is mapped into the right place
- * in memory and is ready to be run.)
+ * The procset_slave and procset_master are used to synchronize
+ * between the control CPU and the target CPU when starting CPUs.
*/
+static cpuset_t procset_slave, procset_master;
+
+static void
+mp_startup_wait(cpuset_t *sp, processorid_t cpuid)
+{
+ cpuset_t tempset;
+
+ for (tempset = *sp; !CPU_IN_SET(tempset, cpuid);
+ tempset = *(volatile cpuset_t *)sp) {
+ SMT_PAUSE();
+ }
+ CPUSET_ATOMIC_DEL(*(cpuset_t *)sp, cpuid);
+}
+
+static void
+mp_startup_signal(cpuset_t *sp, processorid_t cpuid)
+{
+ cpuset_t tempset;
+
+ CPUSET_ATOMIC_ADD(*(cpuset_t *)sp, cpuid);
+ for (tempset = *sp; CPU_IN_SET(tempset, cpuid);
+ tempset = *(volatile cpuset_t *)sp) {
+ SMT_PAUSE();
+ }
+}
+
int
-start_cpu(processorid_t who)
+mp_start_cpu_common(cpu_t *cp, boolean_t boot)
{
+ _NOTE(ARGUNUSED(boot));
+
void *ctx;
- cpu_t *cp;
int delays;
int error = 0;
+ cpuset_t tempset;
+ processorid_t cpuid;
+#ifndef __xpv
+ extern void cpupm_init(cpu_t *);
+#endif
- ASSERT(who != 0);
-
- /*
- * Check if there's at least a Mbyte of kmem available
- * before attempting to start the cpu.
- */
- if (kmem_avail() < 1024 * 1024) {
- /*
- * Kick off a reap in case that helps us with
- * later attempts ..
- */
- kmem_reap();
- return (ENOMEM);
+ ASSERT(cp != NULL);
+ cpuid = cp->cpu_id;
+ ctx = mach_cpucontext_alloc(cp);
+ if (ctx == NULL) {
+ cmn_err(CE_WARN,
+ "cpu%d: failed to allocate context", cp->cpu_id);
+ return (EAGAIN);
}
-
- cp = mp_startup_init(who);
- if ((ctx = mach_cpucontext_alloc(cp)) == NULL ||
- (error = mach_cpu_start(cp, ctx)) != 0) {
-
- /*
- * Something went wrong before we even started it
- */
- if (ctx)
- cmn_err(CE_WARN,
- "cpu%d: failed to start error %d",
- cp->cpu_id, error);
- else
- cmn_err(CE_WARN,
- "cpu%d: failed to allocate context", cp->cpu_id);
-
- if (ctx)
- mach_cpucontext_free(cp, ctx, error);
- else
- error = EAGAIN; /* hmm. */
- mp_startup_fini(cp, error);
+ error = mach_cpu_start(cp, ctx);
+ if (error != 0) {
+ cmn_err(CE_WARN,
+ "cpu%d: failed to start, error %d", cp->cpu_id, error);
+ mach_cpucontext_free(cp, ctx, error);
return (error);
}
- for (delays = 0; !CPU_IN_SET(procset, who); delays++) {
+ for (delays = 0, tempset = procset_slave; !CPU_IN_SET(tempset, cpuid);
+ delays++) {
if (delays == 500) {
/*
* After five seconds, things are probably looking
* a bit bleak - explain the hang.
*/
cmn_err(CE_NOTE, "cpu%d: started, "
- "but not running in the kernel yet", who);
+ "but not running in the kernel yet", cpuid);
} else if (delays > 2000) {
/*
* We waited at least 20 seconds, bail ..
*/
error = ETIMEDOUT;
- cmn_err(CE_WARN, "cpu%d: timed out", who);
+ cmn_err(CE_WARN, "cpu%d: timed out", cpuid);
mach_cpucontext_free(cp, ctx, error);
- mp_startup_fini(cp, error);
return (error);
}
@@ -1292,37 +1349,105 @@ start_cpu(processorid_t who)
* wait at least 10ms, then check again..
*/
delay(USEC_TO_TICK_ROUNDUP(10000));
+ tempset = *((volatile cpuset_t *)&procset_slave);
}
+ CPUSET_ATOMIC_DEL(procset_slave, cpuid);
mach_cpucontext_free(cp, ctx, 0);
#ifndef __xpv
if (tsc_gethrtime_enable)
- tsc_sync_master(who);
+ tsc_sync_master(cpuid);
#endif
if (dtrace_cpu_init != NULL) {
+ (*dtrace_cpu_init)(cpuid);
+ }
+
+ /*
+ * During CPU DR operations, the cpu_lock is held by current
+ * (the control) thread. We can't release the cpu_lock here
+ * because that will break the CPU DR logic.
+ * On the other hand, CPUPM and processor group initialization
+ * routines need to access the cpu_lock. So we invoke those
+ * routines here on behalf of mp_startup_common().
+ *
+ * CPUPM and processor group initialization routines depend
+ * on the cpuid probing results. Wait for mp_startup_common()
+ * to signal that cpuid probing is done.
+ */
+ mp_startup_wait(&procset_slave, cpuid);
+#ifndef __xpv
+ cpupm_init(cp);
+#endif
+ (void) pg_cpu_init(cp, B_FALSE);
+ cpu_set_state(cp);
+ mp_startup_signal(&procset_master, cpuid);
+
+ return (0);
+}
+
+/*
+ * Start a single cpu, assuming that the kernel context is available
+ * to successfully start another cpu.
+ *
+ * (For example, real mode code is mapped into the right place
+ * in memory and is ready to be run.)
+ */
+int
+start_cpu(processorid_t who)
+{
+ cpu_t *cp;
+ int error = 0;
+ cpuset_t tempset;
+
+ ASSERT(who != 0);
+
+ /*
+ * Check if there's at least a Mbyte of kmem available
+ * before attempting to start the cpu.
+ */
+ if (kmem_avail() < 1024 * 1024) {
/*
- * DTrace CPU initialization expects cpu_lock to be held.
+ * Kick off a reap in case that helps us with
+ * later attempts ..
*/
- mutex_enter(&cpu_lock);
- (*dtrace_cpu_init)(who);
- mutex_exit(&cpu_lock);
+ kmem_reap();
+ return (ENOMEM);
+ }
+
+ /*
+ * First configure cpu.
+ */
+ cp = mp_cpu_configure_common(who, B_TRUE);
+ ASSERT(cp != NULL);
+
+ /*
+ * Then start cpu.
+ */
+ error = mp_start_cpu_common(cp, B_TRUE);
+ if (error != 0) {
+ mp_cpu_unconfigure_common(cp, error);
+ return (error);
}
- while (!CPU_IN_SET(cpu_ready_set, who))
- delay(1);
+ mutex_exit(&cpu_lock);
+ tempset = cpu_ready_set;
+ while (!CPU_IN_SET(tempset, who)) {
+ drv_usecwait(1);
+ tempset = *((volatile cpuset_t *)&cpu_ready_set);
+ }
+ mutex_enter(&cpu_lock);
return (0);
}
-
-/*ARGSUSED*/
void
start_other_cpus(int cprboot)
{
+ _NOTE(ARGUNUSED(cprboot));
+
uint_t who;
- uint_t skipped = 0;
uint_t bootcpuid = 0;
/*
@@ -1347,9 +1472,12 @@ start_other_cpus(int cprboot)
CPUSET_ADD(cpu_ready_set, bootcpuid);
/*
- * if only 1 cpu or not using MP, skip the rest of this
+ * skip the rest of this if
+ * . only 1 cpu dectected and system isn't hotplug-capable
+ * . not using MP
*/
- if (CPUSET_ISNULL(mp_cpus) || use_mp == 0) {
+ if ((CPUSET_ISNULL(mp_cpus) && plat_dr_support_cpu() == 0) ||
+ use_mp == 0) {
if (use_mp == 0)
cmn_err(CE_CONT, "?***** Not in MP mode\n");
goto done;
@@ -1375,18 +1503,13 @@ start_other_cpus(int cprboot)
affinity_set(CPU_CURRENT);
for (who = 0; who < NCPU; who++) {
-
if (!CPU_IN_SET(mp_cpus, who))
continue;
ASSERT(who != bootcpuid);
- if (ncpus >= max_ncpus) {
- skipped = who;
- continue;
- }
- if (start_cpu(who) != 0)
- CPUSET_DEL(mp_cpus, who);
mutex_enter(&cpu_lock);
+ if (start_cpu(who) != 0)
+ CPUSET_DEL(mp_cpus, who);
cpu_state_change_notify(who, CPU_SETUP);
mutex_exit(&cpu_lock);
}
@@ -1396,59 +1519,92 @@ start_other_cpus(int cprboot)
affinity_clear();
- if (skipped) {
+ mach_cpucontext_fini();
+
+done:
+ if (get_hwenv() == HW_NATIVE)
+ workaround_errata_end();
+ cmi_post_mpstartup();
+
+ if (use_mp && ncpus != boot_max_ncpus) {
cmn_err(CE_NOTE,
"System detected %d cpus, but "
"only %d cpu(s) were enabled during boot.",
- skipped + 1, ncpus);
+ boot_max_ncpus, ncpus);
cmn_err(CE_NOTE,
"Use \"boot-ncpus\" parameter to enable more CPU(s). "
"See eeprom(1M).");
}
-
-done:
- if (get_hwenv() == HW_NATIVE)
- workaround_errata_end();
- mach_cpucontext_fini();
-
- cmi_post_mpstartup();
}
-/*
- * Dummy functions - no i86pc platforms support dynamic cpu allocation.
- */
-/*ARGSUSED*/
int
mp_cpu_configure(int cpuid)
{
- return (ENOTSUP); /* not supported */
+ cpu_t *cp;
+
+ if (use_mp == 0 || plat_dr_support_cpu() == 0) {
+ return (ENOTSUP);
+ }
+
+ cp = cpu_get(cpuid);
+ if (cp != NULL) {
+ return (EALREADY);
+ }
+
+ /*
+ * Check if there's at least a Mbyte of kmem available
+ * before attempting to start the cpu.
+ */
+ if (kmem_avail() < 1024 * 1024) {
+ /*
+ * Kick off a reap in case that helps us with
+ * later attempts ..
+ */
+ kmem_reap();
+ return (ENOMEM);
+ }
+
+ cp = mp_cpu_configure_common(cpuid, B_FALSE);
+ ASSERT(cp != NULL && cpu_get(cpuid) == cp);
+
+ return (cp != NULL ? 0 : EAGAIN);
}
-/*ARGSUSED*/
int
mp_cpu_unconfigure(int cpuid)
{
- return (ENOTSUP); /* not supported */
+ cpu_t *cp;
+
+ if (use_mp == 0 || plat_dr_support_cpu() == 0) {
+ return (ENOTSUP);
+ } else if (cpuid < 0 || cpuid >= max_ncpus) {
+ return (EINVAL);
+ }
+
+ cp = cpu_get(cpuid);
+ if (cp == NULL) {
+ return (ENODEV);
+ }
+ mp_cpu_unconfigure_common(cp, 0);
+
+ return (0);
}
/*
* Startup function for 'other' CPUs (besides boot cpu).
* Called from real_mode_start.
*
- * WARNING: until CPU_READY is set, mp_startup and routines called by
- * mp_startup should not call routines (e.g. kmem_free) that could call
+ * WARNING: until CPU_READY is set, mp_startup_common and routines called by
+ * mp_startup_common should not call routines (e.g. kmem_free) that could call
* hat_unload which requires CPU_READY to be set.
*/
-void
-mp_startup(void)
+static void
+mp_startup_common(boolean_t boot)
{
- struct cpu *cp = CPU;
+ cpu_t *cp = CPU;
uint_t new_x86_feature;
- extern void cpu_event_init_cpu(cpu_t *);
-#ifndef __xpv
- extern void cpupm_init(cpu_t *);
-#endif
const char *fmt = "?cpu%d: %b\n";
+ extern void cpu_event_init_cpu(cpu_t *);
/*
* We need to get TSC on this proc synced (i.e., any delta
@@ -1457,8 +1613,8 @@ mp_startup(void)
* interrupts, cmn_err, etc.
*/
- /* Let cpu0 continue into tsc_sync_master() */
- CPUSET_ATOMIC_ADD(procset, cp->cpu_id);
+ /* Let the control CPU continue into tsc_sync_master() */
+ mp_startup_signal(&procset_slave, cp->cpu_id);
#ifndef __xpv
if (tsc_gethrtime_enable)
@@ -1537,39 +1693,42 @@ mp_startup(void)
if (workaround_errata(cp) != 0)
panic("critical workaround(s) missing for cpu%d", cp->cpu_id);
+ /*
+ * We can touch cpu_flags here without acquiring the cpu_lock here
+ * because the cpu_lock is held by the control CPU which is running
+ * mp_start_cpu_common().
+ * Need to clear CPU_QUIESCED flag before calling any function which
+ * may cause thread context switching, such as kmem_alloc() etc.
+ * The idle thread checks for CPU_QUIESCED flag and loops for ever if
+ * it's set. So the startup thread may have no chance to switch back
+ * again if it's switched away with CPU_QUIESCED set.
+ */
+ cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED);
+
cpuid_pass2(cp);
cpuid_pass3(cp);
(void) cpuid_pass4(cp);
- init_cpu_info(cp);
-
- mutex_enter(&cpu_lock);
-
- cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_EXISTS;
-
- cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr);
- cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr);
-
- if (dtrace_cpu_init != NULL) {
- (*dtrace_cpu_init)(cp->cpu_id);
- }
-
/*
- * Fill out cpu_ucode_info. Update microcode if necessary.
+ * Correct cpu_idstr and cpu_brandstr on target CPU after
+ * cpuid_pass1() is done.
*/
- ucode_check(cp);
+ (void) cpuid_getidstr(cp, cp->cpu_idstr, CPU_IDSTRLEN);
+ (void) cpuid_getbrandstr(cp, cp->cpu_brandstr, CPU_IDSTRLEN);
- mutex_exit(&cpu_lock);
+ cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_EXISTS;
post_startup_cpu_fixups();
+ cpu_event_init_cpu(cp);
+
/*
* Enable preemption here so that contention for any locks acquired
- * later in mp_startup may be preempted if the thread owning those
- * locks is continuously executing on other CPUs (for example, this
- * CPU must be preemptible to allow other CPUs to pause it during their
- * startup phases). It's safe to enable preemption here because the
- * CPU state is pretty-much fully constructed.
+ * later in mp_startup_common may be preempted if the thread owning
+ * those locks is continuously executing on other CPUs (for example,
+ * this CPU must be preemptible to allow other CPUs to pause it during
+ * their startup phases). It's safe to enable preemption here because
+ * the CPU state is pretty-much fully constructed.
*/
curthread->t_preempt = 0;
@@ -1577,29 +1736,31 @@ mp_startup(void)
ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL));
set_base_spl(); /* Restore the spl to its proper value */
- cpu_event_init_cpu(cp);
-#ifndef __xpv
- cpupm_init(cp);
-#endif
-
+ pghw_physid_create(cp);
/*
- * Processor group initialization for this CPU is dependent on the
- * cpuid probing, which must be done in the context of the current
- * CPU, as well as the CPU's device node initialization (for ACPI).
+ * Delegate initialization tasks, which need to access the cpu_lock,
+ * to mp_start_cpu_common() because we can't acquire the cpu_lock here
+ * during CPU DR operations.
*/
- mutex_enter(&cpu_lock);
- pghw_physid_create(cp);
- (void) pg_cpu_init(cp, B_FALSE);
+ mp_startup_signal(&procset_slave, cp->cpu_id);
+ mp_startup_wait(&procset_master, cp->cpu_id);
pg_cmt_cpu_startup(cp);
- mutex_exit(&cpu_lock);
+
+ if (boot) {
+ mutex_enter(&cpu_lock);
+ cp->cpu_flags &= ~CPU_OFFLINE;
+ cpu_enable_intr(cp);
+ cpu_add_active(cp);
+ mutex_exit(&cpu_lock);
+ }
/* Enable interrupts */
(void) spl0();
- mutex_enter(&cpu_lock);
- cpu_enable_intr(cp);
- cpu_add_active(cp);
- mutex_exit(&cpu_lock);
+ /*
+ * Fill out cpu_ucode_info. Update microcode if necessary.
+ */
+ ucode_check(cp);
#ifndef __xpv
{
@@ -1616,6 +1777,7 @@ mp_startup(void)
cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL) {
if (x86_feature & X86_MCA)
cmi_mca_init(hdl);
+ cp->cpu_m.mcpu_cmi_hdl = hdl;
}
}
#endif /* __xpv */
@@ -1630,16 +1792,13 @@ mp_startup(void)
*/
CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id);
- /*
- * Because mp_startup() gets fired off after init() starts, we
- * can't use the '?' trick to do 'boot -v' printing - so we
- * always direct the 'cpu .. online' messages to the log.
- */
- cmn_err(CE_CONT, "!cpu%d initialization complete - online\n",
- cp->cpu_id);
-
(void) mach_cpu_create_device_node(cp, NULL);
+ cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr);
+ cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr);
+ cmn_err(CE_CONT, "?cpu%d initialization complete - online\n",
+ cp->cpu_id);
+
/*
* Now we are done with the startup thread, so free it up.
*/
@@ -1648,6 +1807,23 @@ mp_startup(void)
/*NOTREACHED*/
}
+/*
+ * Startup function for 'other' CPUs at boot time (besides boot cpu).
+ */
+static void
+mp_startup_boot(void)
+{
+ mp_startup_common(B_TRUE);
+}
+
+/*
+ * Startup function for hotplug CPUs at runtime.
+ */
+void
+mp_startup_hotplug(void)
+{
+ mp_startup_common(B_FALSE);
+}
/*
* Start CPU on user request.
@@ -1663,7 +1839,6 @@ mp_cpu_start(struct cpu *cp)
/*
* Stop CPU on user request.
*/
-/* ARGSUSED */
int
mp_cpu_stop(struct cpu *cp)
{
@@ -1713,31 +1888,42 @@ cpu_enable_intr(struct cpu *cp)
psm_enable_intr(cp->cpu_id);
}
-
-/*ARGSUSED*/
void
mp_cpu_faulted_enter(struct cpu *cp)
{
-#ifndef __xpv
- cmi_hdl_t hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
- cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
+#ifdef __xpv
+ _NOTE(ARGUNUSED(cp));
+#else
+ cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl;
if (hdl != NULL) {
+ cmi_hdl_hold(hdl);
+ } else {
+ hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
+ cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
+ }
+ if (hdl != NULL) {
cmi_faulted_enter(hdl);
cmi_hdl_rele(hdl);
}
#endif
}
-/*ARGSUSED*/
void
mp_cpu_faulted_exit(struct cpu *cp)
{
-#ifndef __xpv
- cmi_hdl_t hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
- cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
+#ifdef __xpv
+ _NOTE(ARGUNUSED(cp));
+#else
+ cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl;
if (hdl != NULL) {
+ cmi_hdl_hold(hdl);
+ } else {
+ hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
+ cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
+ }
+ if (hdl != NULL) {
cmi_faulted_exit(hdl);
cmi_hdl_rele(hdl);
}
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index b4b235cf5c..c0e9c2404b 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -22,6 +22,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/t_lock.h>
@@ -63,6 +67,7 @@
#include <sys/dumphdr.h>
#include <sys/bootconf.h>
+#include <sys/memlist_plat.h>
#include <sys/varargs.h>
#include <sys/promif.h>
#include <sys/modctl.h>
@@ -116,7 +121,7 @@
#include <sys/systeminfo.h>
#include <sys/multiboot.h>
-#ifdef __xpv
+#ifdef __xpv
#include <sys/hypervisor.h>
#include <sys/xen_mmu.h>
@@ -130,6 +135,10 @@ extern void xen_late_startup(void);
struct xen_evt_data cpu0_evt_data;
+#else /* __xpv */
+#include <sys/memlist_impl.h>
+
+extern void mem_config_init(void);
#endif /* __xpv */
extern void progressbar_init(void);
@@ -315,6 +324,17 @@ pgcnt_t segkpsize = 0;
pgcnt_t segziosize = 0; /* size of zio segment in pages */
/*
+ * A static DR page_t VA map is reserved that can map the page structures
+ * for a domain's entire RA space. The pages that back this space are
+ * dynamically allocated and need not be physically contiguous. The DR
+ * map size is derived from KPM size.
+ * This mechanism isn't used by x86 yet, so just stubs here.
+ */
+int ppvm_enable = 0; /* Static virtual map for page structs */
+page_t *ppvm_base = NULL; /* Base of page struct map */
+pgcnt_t ppvm_size = 0; /* Size of page struct map */
+
+/*
* VA range available to the debugger
*/
const caddr_t kdi_segdebugbase = (const caddr_t)SEGDEBUGBASE;
@@ -965,20 +985,38 @@ startup_memlist(void)
if (prom_debug)
print_memlist("boot physinstalled",
bootops->boot_mem->physinstalled);
- installed_top_size(bootops->boot_mem->physinstalled, &physmax,
+ installed_top_size_ex(bootops->boot_mem->physinstalled, &physmax,
&physinstalled, &memblocks);
PRM_DEBUG(physmax);
PRM_DEBUG(physinstalled);
PRM_DEBUG(memblocks);
/*
+ * Compute maximum physical address for memory DR operations.
+ * Memory DR operations are unsupported on xpv or 32bit OSes.
+ */
+#ifdef __amd64
+ if (plat_dr_support_memory()) {
+ if (plat_dr_physmax == 0) {
+ uint_t pabits = UINT_MAX;
+
+ cpuid_get_addrsize(CPU, &pabits, NULL);
+ plat_dr_physmax = btop(1ULL << pabits);
+ }
+ if (plat_dr_physmax > PHYSMEM_MAX64)
+ plat_dr_physmax = PHYSMEM_MAX64;
+ } else
+#endif
+ plat_dr_physmax = 0;
+
+ /*
* Examine the bios reserved memory to find out:
* - the number of discontiguous segments of memory.
*/
if (prom_debug)
print_memlist("boot reserved mem",
bootops->boot_mem->rsvdmem);
- installed_top_size(bootops->boot_mem->rsvdmem, &rsvd_high_pfn,
+ installed_top_size_ex(bootops->boot_mem->rsvdmem, &rsvd_high_pfn,
&rsvd_pgcnt, &rsvdmemblocks);
PRM_DEBUG(rsvd_high_pfn);
PRM_DEBUG(rsvd_pgcnt);
@@ -1137,9 +1175,14 @@ startup_memlist(void)
* memory is at addresses above 1 TB. When adjusted, segkpm_base must
* be aligned on KERNEL_REDZONE_SIZE boundary (span of top level pte).
*/
- if (physmax + 1 > mmu_btop(TERABYTE)) {
+ if (physmax + 1 > mmu_btop(TERABYTE) ||
+ plat_dr_physmax > mmu_btop(TERABYTE)) {
uint64_t kpm_resv_amount = mmu_ptob(physmax + 1);
+ if (kpm_resv_amount < mmu_ptob(plat_dr_physmax)) {
+ kpm_resv_amount = mmu_ptob(plat_dr_physmax);
+ }
+
segkpm_base = -(P2ROUNDUP((2 * kpm_resv_amount),
KERNEL_REDZONE_SIZE)); /* down from top VA */
@@ -1186,6 +1229,16 @@ startup_memlist(void)
panic("physavail was too big!");
if (prom_debug)
print_memlist("phys_avail", phys_avail);
+#ifndef __xpv
+ /*
+ * Free unused memlist items, which may be used by memory DR driver
+ * at runtime.
+ */
+ if ((caddr_t)current < (caddr_t)memlist + memlist_sz) {
+ memlist_free_block((caddr_t)current,
+ (caddr_t)memlist + memlist_sz - (caddr_t)current);
+ }
+#endif
/*
* Build bios reserved memspace
@@ -1196,6 +1249,16 @@ startup_memlist(void)
panic("bios_rsvd was too big!");
if (prom_debug)
print_memlist("bios_rsvd", bios_rsvd);
+#ifndef __xpv
+ /*
+ * Free unused memlist items, which may be used by memory DR driver
+ * at runtime.
+ */
+ if ((caddr_t)current < (caddr_t)bios_rsvd + rsvdmemlist_sz) {
+ memlist_free_block((caddr_t)current,
+ (caddr_t)bios_rsvd + rsvdmemlist_sz - (caddr_t)current);
+ }
+#endif
/*
* setup page coloring
@@ -1391,7 +1454,11 @@ startup_kmem(void)
}
#endif
-#ifdef __xpv
+#ifndef __xpv
+ if (plat_dr_support_memory()) {
+ mem_config_init();
+ }
+#else /* __xpv */
/*
* Some of the xen start information has to be relocated up
* into the kernel's permanent address space.
@@ -1406,7 +1473,7 @@ startup_kmem(void)
*/
CPU->cpu_m.mcpu_vcpu_info =
&HYPERVISOR_shared_info->vcpu_info[CPU->cpu_id];
-#endif
+#endif /* __xpv */
PRM_POINT("startup_kmem() done");
}
@@ -1591,8 +1658,10 @@ startup_modules(void)
if ((get_hwenv() != HW_XEN_HVM) &&
(hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(CPU),
cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL &&
- (x86_feature & X86_MCA))
+ (x86_feature & X86_MCA)) {
cmi_mca_init(hdl);
+ CPU->cpu_m.mcpu_cmi_hdl = hdl;
+ }
#endif /* __xpv */
/*
@@ -1701,7 +1770,11 @@ layout_kernel_va(void)
#if defined(__amd64)
kpm_vbase = (caddr_t)segkpm_base;
- kpm_size = ROUND_UP_LPAGE(mmu_ptob(physmax + 1));
+ if (physmax + 1 < plat_dr_physmax) {
+ kpm_size = ROUND_UP_LPAGE(mmu_ptob(plat_dr_physmax));
+ } else {
+ kpm_size = ROUND_UP_LPAGE(mmu_ptob(physmax + 1));
+ }
if ((uintptr_t)kpm_vbase + kpm_size > (uintptr_t)valloc_base)
panic("not enough room for kpm!");
PRM_DEBUG(kpm_size);
@@ -2342,10 +2415,6 @@ release_bootstrap(void)
rm_platter_va = i86devmap(pfn, 1,
PROT_READ | PROT_WRITE | PROT_EXEC);
rm_platter_pa = ptob(pfn);
- hat_devload(kas.a_hat,
- (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
- pfn, PROT_READ | PROT_WRITE | PROT_EXEC,
- HAT_LOAD_NOCONSIST);
break;
}
if (pfn == btop(1*1024*1024) && use_mp)
diff --git a/usr/src/uts/i86pc/os/x_call.c b/usr/src/uts/i86pc/os/x_call.c
index 33b75c1a5a..17d5123508 100644
--- a/usr/src/uts/i86pc/os/x_call.c
+++ b/usr/src/uts/i86pc/os/x_call.c
@@ -22,6 +22,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#include <sys/types.h>
#include <sys/param.h>
@@ -186,11 +190,11 @@ xc_extract(xc_msg_t **queue)
return (old_head);
}
-
/*
* Initialize the machcpu fields used for cross calls
*/
static uint_t xc_initialized = 0;
+
void
xc_init_cpu(struct cpu *cpup)
{
@@ -198,34 +202,100 @@ xc_init_cpu(struct cpu *cpup)
int c;
/*
- * add a new msg to each existing CPU's free list, as well as one for
- * my list for each of them. ncpus has an inconsistent value when this
- * function is called, so use cpup->cpu_id.
+ * Allocate message buffers for the new CPU.
*/
- for (c = 0; c < cpup->cpu_id; ++c) {
- if (cpu[c] == NULL)
- continue;
- msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
- msg->xc_command = XC_MSG_FREE;
- msg->xc_master = c;
- xc_insert(&cpu[c]->cpu_m.xc_free, msg);
+ for (c = 0; c < max_ncpus; ++c) {
+ if (plat_dr_support_cpu()) {
+ /*
+ * Allocate a message buffer for every CPU possible
+ * in system, including our own, and add them to our xc
+ * message queue.
+ */
+ msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
+ msg->xc_command = XC_MSG_FREE;
+ msg->xc_master = cpup->cpu_id;
+ xc_insert(&cpup->cpu_m.xc_free, msg);
+ } else if (cpu[c] != NULL && cpu[c] != cpup) {
+ /*
+ * Add a new message buffer to each existing CPU's free
+ * list, as well as one for my list for each of them.
+ * Note: cpu0 is statically inserted into cpu[] array,
+ * so need to check cpu[c] isn't cpup itself to avoid
+ * allocating extra message buffers for cpu0.
+ */
+ msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
+ msg->xc_command = XC_MSG_FREE;
+ msg->xc_master = c;
+ xc_insert(&cpu[c]->cpu_m.xc_free, msg);
+
+ msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
+ msg->xc_command = XC_MSG_FREE;
+ msg->xc_master = cpup->cpu_id;
+ xc_insert(&cpup->cpu_m.xc_free, msg);
+ }
+ }
+ if (!plat_dr_support_cpu()) {
+ /*
+ * Add one for self messages if CPU hotplug is disabled.
+ */
msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
msg->xc_command = XC_MSG_FREE;
msg->xc_master = cpup->cpu_id;
xc_insert(&cpup->cpu_m.xc_free, msg);
}
+ if (!xc_initialized)
+ xc_initialized = 1;
+}
+
+void
+xc_fini_cpu(struct cpu *cpup)
+{
+ xc_msg_t *msg;
+
+ ASSERT((cpup->cpu_flags & CPU_READY) == 0);
+ ASSERT(cpup->cpu_m.xc_msgbox == NULL);
+ ASSERT(cpup->cpu_m.xc_work_cnt == 0);
+
+ while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
+ kmem_free(msg, sizeof (*msg));
+ }
+}
+
+#define XC_FLUSH_MAX_WAITS 1000
+
+/* Flush inflight message buffers. */
+int
+xc_flush_cpu(struct cpu *cpup)
+{
+ int i;
+
+ ASSERT((cpup->cpu_flags & CPU_READY) == 0);
+
/*
- * Add one for self messages
+ * Pause all working CPUs, which ensures that there's no CPU in
+ * function xc_common().
+ * This is used to work around a race condition window in xc_common()
+ * between checking CPU_READY flag and increasing working item count.
*/
- msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
- msg->xc_command = XC_MSG_FREE;
- msg->xc_master = cpup->cpu_id;
- xc_insert(&cpup->cpu_m.xc_free, msg);
+ pause_cpus(cpup);
+ start_cpus();
- if (!xc_initialized)
- xc_initialized = 1;
+ for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
+ if (cpup->cpu_m.xc_work_cnt == 0) {
+ break;
+ }
+ DELAY(1);
+ }
+ for (; i < XC_FLUSH_MAX_WAITS; i++) {
+ if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
+ break;
+ }
+ DELAY(1);
+ }
+
+ return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
}
/*
@@ -416,7 +486,7 @@ xc_common(
* Post messages to all CPUs involved that are CPU_READY
*/
CPU->cpu_m.xc_wait_cnt = 0;
- for (c = 0; c < ncpus; ++c) {
+ for (c = 0; c < max_ncpus; ++c) {
if (!BT_TEST(set, c))
continue;
cpup = cpu[c];
@@ -487,7 +557,7 @@ xc_priority_common(
/*
* Wait briefly for any previous xc_priority to have finished.
*/
- for (c = 0; c < ncpus; ++c) {
+ for (c = 0; c < max_ncpus; ++c) {
cpup = cpu[c];
if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
continue;
@@ -527,7 +597,7 @@ xc_priority_common(
* Post messages to all CPUs involved that are CPU_READY
* We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
*/
- for (c = 0; c < ncpus; ++c) {
+ for (c = 0; c < max_ncpus; ++c) {
if (!BT_TEST(set, c))
continue;
cpup = cpu[c];
diff --git a/usr/src/uts/i86pc/sys/Makefile b/usr/src/uts/i86pc/sys/Makefile
index 11ebb58187..7fa9f8f6ef 100644
--- a/usr/src/uts/i86pc/sys/Makefile
+++ b/usr/src/uts/i86pc/sys/Makefile
@@ -60,6 +60,7 @@ HDRS= \
psm_types.h \
rm_platter.h \
smp_impldefs.h \
+ sbd_ioctl.h \
vm_machparam.h \
x_call.h \
xc_levels.h \
diff --git a/usr/src/uts/i86pc/sys/acpidev.h b/usr/src/uts/i86pc/sys/acpidev.h
index e0fbaa4999..6d11277aaf 100644
--- a/usr/src/uts/i86pc/sys/acpidev.h
+++ b/usr/src/uts/i86pc/sys/acpidev.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -28,8 +28,10 @@
#include <sys/types.h>
#include <sys/obpdefs.h>
#include <sys/sunddi.h>
+#ifdef _KERNEL
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
+#endif
#ifdef __cplusplus
extern "C" {
@@ -55,23 +57,19 @@ extern "C" {
#define ACPIDEV_HID_CPU "ACPI0007"
#define ACPIDEV_HID_PCI_HOSTBRIDGE "PNP0A03"
#define ACPIDEV_HID_PCIE_HOSTBRIDGE "PNP0A08"
+#define ACPIDEV_HID_PCIEX_HOSTBRIDGE "PNP0A08"
#define ACPIDEV_HID_MEMORY "PNP0C80"
-/* Common ACPI object names. */
-#define ACPIDEV_OBJECT_NAME_SB ACPI_NS_SYSTEM_BUS
-#define ACPIDEV_OBJECT_NAME_PR "_PR_"
-
-/* Common ACPI method names. */
-#define ACPIDEV_METHOD_NAME_MAT "_MAT"
-
/* Device names for ACPI objects. */
#define ACPIDEV_NODE_NAME_ROOT "fw"
+#define ACPIDEV_NODE_NAME_ACPIDR "acpidr"
#define ACPIDEV_NODE_NAME_CONTAINER "container"
#define ACPIDEV_NODE_NAME_MODULE_SBD "sb"
#define ACPIDEV_NODE_NAME_MODULE_CPU "socket"
#define ACPIDEV_NODE_NAME_CPU "cpu"
#define ACPIDEV_NODE_NAME_PROCESSOR "cpus"
#define ACPIDEV_NODE_NAME_MEMORY "mem"
+#define ACPIDEV_NODE_NAME_PCI "pci"
/* Device types for ACPI objects. */
#define ACPIDEV_TYPE_ROOTNEX "acpirootnex"
@@ -81,15 +79,31 @@ extern "C" {
#define ACPIDEV_TYPE_CONTAINER "acpicontainer"
#define ACPIDEV_TYPE_CPU "acpicpu"
#define ACPIDEV_TYPE_MEMORY "acpimemory"
+#define ACPIDEV_TYPE_PCI "pci"
+#define ACPIDEV_TYPE_PCIEX "pciex"
/* Device property names for ACPI objects. */
#define ACPIDEV_PROP_NAME_UNIT_ADDR "unit-address"
#define ACPIDEV_PROP_NAME_ACPI_UID "acpi-uid"
#define ACPIDEV_PROP_NAME_PROCESSOR_ID "acpi-processor-id"
#define ACPIDEV_PROP_NAME_LOCALAPIC_ID "apic-id"
+#define ACPIDEV_PROP_NAME_PROXIMITY_ID "proximity-id"
#define ACPIDEV_PROP_NAME_UID_FORMAT "acpidev-uid-format"
+/* Miscellaneous strings. */
+#define ACPIDEV_CMD_OST_PREFIX "acpi-update-status"
+#define ACPIDEV_CMD_OST_INPROGRESS "acpi-update-status=inprogress"
+#define ACPIDEV_CMD_OST_SUCCESS "acpi-update-status=success"
+#define ACPIDEV_CMD_OST_FAILURE "acpi-update-status=failure"
+#define ACPIDEV_CMD_OST_NOOP "acpi-update-status=noop"
+
+#define ACPIDEV_EVENT_TYPE_ATTR_NAME "acpi-event-type"
+#define ACPIDEV_EVENT_TYPE_BUS_CHECK "bus_check"
+#define ACPIDEV_EVENT_TYPE_DEVICE_CHECK "device_check"
+#define ACPIDEV_EVENT_TYPE_DEVICE_CHECK_LIGHT "device_check_light"
+#define ACPIDEV_EVENT_TYPE_EJECT_REQUEST "eject_request"
+
/* ACPI device class Id. */
typedef enum acpidev_class_id {
ACPIDEV_CLASS_ID_INVALID = 0,
@@ -99,6 +113,8 @@ typedef enum acpidev_class_id {
ACPIDEV_CLASS_ID_CONTAINER = 4,
ACPIDEV_CLASS_ID_CPU = 5,
ACPIDEV_CLASS_ID_MEMORY = 6,
+ ACPIDEV_CLASS_ID_PCI = 7,
+ ACPIDEV_CLASS_ID_PCIEX = 8,
ACPIDEV_CLASS_ID_MAX
} acpidev_class_id_t;
@@ -106,9 +122,88 @@ typedef enum acpidev_class_id {
#define ACPIDEV_OUSER_NO_CPU 0x1
#define ACPIDEV_OUSER_NO_MEM 0x2
#define ACPIDEV_OUSER_NO_CONTAINER 0x4
+#define ACPIDEV_OUSER_NO_PCI 0x8
+#define ACPIDEV_OUSER_NO_CACHE 0x10000
#ifdef _KERNEL
+/* Common ACPI object names. */
+#define ACPIDEV_OBJECT_NAME_SB ACPI_NS_SYSTEM_BUS
+#define ACPIDEV_OBJECT_NAME_PR "_PR_"
+
+/* Common ACPI method names. */
+#define ACPIDEV_METHOD_NAME_MAT "_MAT"
+#define ACPIDEV_METHOD_NAME_EJ0 "_EJ0"
+#define ACPIDEV_METHOD_NAME_EDL "_EDL"
+#define ACPIDEV_METHOD_NAME_EJD "_EJD"
+#define ACPIDEV_METHOD_NAME_OST "_OST"
+#define ACPIDEV_METHOD_NAME_PXM "_PXM"
+#define ACPIDEV_METHOD_NAME_SLI "_SLI"
+
+/* Source event code for _OST. */
+#define ACPI_OST_EVENT_EJECTING 0x103
+#define ACPI_OST_EVENT_INSERTING 0x200
+
+/* Status code for _OST. */
+#define ACPI_OST_STA_SUCCESS 0x0
+
+/* Non-specific failure. */
+#define ACPI_OST_STA_FAILURE 0x1
+
+/* Unrecognized Notify Code. */
+#define ACPI_OST_STA_NOT_SUPPORT 0x2
+
+/* Device ejection not supported by OSPM. */
+#define ACPI_OST_STA_EJECT_NOT_SUPPORT 0x80
+
+/* Device in use by application. */
+#define ACPI_OST_STA_EJECT_IN_USE 0x81
+
+/* Device Busy. */
+#define ACPI_OST_STA_EJECT_BUSY 0x82
+
+/* Ejection dependency is busy or not supported for ejection by OSPM. */
+#define ACPI_OST_STA_EJECT_DEPENDENCY 0x83
+
+/* Ejection is in progress (pending). */
+#define ACPI_OST_STA_EJECT_IN_PROGRESS 0x84
+
+/* Device insertion in progress (pending). */
+#define ACPI_OST_STA_INSERT_IN_PROGRESS 0x80
+
+/* Device driver load failure. */
+#define ACPI_OST_STA_INSERT_DRIVER 0x81
+
+/* Device insertion not supported by OSPM. */
+#define ACPI_OST_STA_INSERT_NOT_SUPPORT 0x82
+
+/*
+ * Insertion failure
+ * Resources Unavailable as described by the following bit encodings:
+ * Bit[3] Bus Numbers
+ * Bit[2] Interrupts
+ * Bit[1] I/O
+ * Bit[0] Memory
+ */
+#define ACPI_OST_STA_INSERT_NO_RESOURCE 0x90
+#define ACPI_OST_STA_INSERT_NO_BUS 0x8
+#define ACPI_OST_STA_INSERT_NO_INTR 0x4
+#define ACPI_OST_STA_INSERT_NO_IO 0x2
+#define ACPI_OST_STA_INSERT_NO_MEM 0x1
+
+/*
+ * According to the ACPI specification, self latency (entry[n][n]) in the
+ * SLIT table should be 10.
+ */
+#define ACPI_SLIT_SELF_LATENCY 10
+
+/*
+ * The DR driver assigns a unique device id for each hot-added memory device.
+ * ACPI_MEMNODE_DEVID_BOOT is assigned to memory devices present at boot,
+ * which is distinguished from device ids assigned by the DR driver.
+ */
+#define ACPI_MEMNODE_DEVID_BOOT UINT32_MAX
+
/* Forward declaration */
typedef struct acpidev_data_impl *acpidev_data_handle_t;
typedef struct acpidev_walk_info acpidev_walk_info_t;
@@ -197,7 +292,7 @@ typedef ACPI_STATUS (* acpidev_probe_t)(acpidev_walk_info_t *);
typedef acpidev_filter_result_t (* acpidev_filter_t)(acpidev_walk_info_t *,
char *, int);
typedef ACPI_STATUS (* acpidev_init_t)(acpidev_walk_info_t *);
-typedef ACPI_STATUS (* acpidev_fini_t)(ACPI_HANDLE, dev_info_t *,
+typedef void (* acpidev_fini_t)(ACPI_HANDLE, acpidev_data_handle_t,
acpidev_class_t *);
/* Device class driver interface. */
@@ -237,6 +332,7 @@ extern acpidev_class_t acpidev_class_device;
extern acpidev_class_t acpidev_class_container;
extern acpidev_class_t acpidev_class_cpu;
extern acpidev_class_t acpidev_class_memory;
+extern acpidev_class_t acpidev_class_pci;
/*
* Class driver lists.
@@ -309,6 +405,7 @@ extern ACPI_STATUS acpidev_process_object(acpidev_walk_info_t *infop,
#define ACPIDEV_PROCESS_FLAG_OFFLINE 0x400 /* Put device into offline. */
#define ACPIDEV_PROCESS_FLAG_NOTAG 0x800 /* Skip tag dip with object. */
#define ACPIDEV_PROCESS_FLAG_SYNCSTATUS 0x1000 /* Sync object status. */
+#define ACPIDEV_PROCESS_FLAG_HOLDBRANCH 0x10000 /* Hold device branch. */
/*
* Filter ACPI objects according to filter rules, generate devname if needed.
@@ -346,10 +443,16 @@ extern void acpidev_data_destroy_handle(ACPI_HANDLE hdl);
extern ACPI_HANDLE acpidev_data_get_object(acpidev_data_handle_t hdl);
extern dev_info_t *acpidev_data_get_devinfo(acpidev_data_handle_t hdl);
extern int acpidev_data_get_status(acpidev_data_handle_t hdl);
+extern boolean_t acpidev_data_dr_capable(acpidev_data_handle_t hdl);
+extern boolean_t acpidev_data_dr_ready(acpidev_data_handle_t hdl);
+extern boolean_t acpidev_data_dr_failed(acpidev_data_handle_t hdl);
extern void acpidev_data_set_flag(acpidev_data_handle_t hdl, uint32_t flag);
extern void acpidev_data_clear_flag(acpidev_data_handle_t hdl, uint32_t flag);
extern uint32_t acpidev_data_get_flag(acpidev_data_handle_t hdl, uint32_t flag);
+/* ACPI system event handler has been registered. */
+#define ACPIDEV_DATA_HANDLER_READY 0x1
+
/*
* Try to generate meaningful device unit address from uid.
* Return buf on success and NULL on failure.
@@ -430,6 +533,26 @@ typedef ACPI_STATUS (* acpidev_apic_walker_t)(ACPI_SUBTABLE_HEADER *, void *);
extern ACPI_STATUS acpidev_walk_apic(ACPI_BUFFER *bufp, ACPI_HANDLE hdl,
char *method, acpidev_apic_walker_t func, void *context);
+/*
+ * Evaluate _OST method under object, which is used to support hotplug event.
+ * hdl: object handle
+ * code: _OST source event code
+ * stauts: _OST result status code
+ * bufp and len: optional third parameter for _OST.
+ */
+extern ACPI_STATUS acpidev_eval_ost(ACPI_HANDLE hdl, uint32_t code,
+ uint32_t status, char *bufp, size_t len);
+
+/*
+ * Evaluate _EJ0 method under object.
+ */
+extern ACPI_STATUS acpidev_eval_ej0(ACPI_HANDLE hdl);
+
+/*
+ * Evaluate _PXM method under object.
+ */
+extern ACPI_STATUS acpidev_eval_pxm(ACPI_HANDLE hdl, uint32_t *idp);
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/i86pc/sys/acpidev_dr.h b/usr/src/uts/i86pc/sys/acpidev_dr.h
new file mode 100644
index 0000000000..6219f98bf1
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/acpidev_dr.h
@@ -0,0 +1,252 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+/*
+ * Interfaces to support System Board Dynamic Reconfiguration.
+ */
+
+#ifndef _SYS_ACPIDEV_DR_H
+#define _SYS_ACPIDEV_DR_H
+#include <sys/types.h>
+#include <sys/obpdefs.h>
+#include <sys/cpuvar.h>
+#include <sys/memlist.h>
+#include <sys/sunddi.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_rsc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+/* Maximum number of DR capable system boards supported. */
+#define ACPIDEV_DR_MAX_BOARDS 0x40
+#define ACPIDEV_DR_SEGS_PER_MEM_DEV 0x10
+#define ACPIDEV_DR_MEMLISTS_PER_SEG 0x10
+#define ACPIDEV_DR_MAX_MEMLIST_ENTRIES 0x10000
+
+#define ACPIDEV_DR_PROP_PORTID "portid"
+#define ACPIDEV_DR_PROP_BOARDNUM OBP_BOARDNUM
+#define ACPIDEV_DR_PROP_DEVNAME OBP_NAME
+
+/*
+ * Format strings for DR capable system boards.
+ * They will be used as attachment point names.
+ */
+#define ACPIDEV_DR_CPU_BD_FMT "CPU%u"
+#define ACPIDEV_DR_MEMORY_BD_FMT "MEM%u"
+#define ACPIDEV_DR_IO_BD_FMT "IO%u"
+#define ACPIDEV_DR_SYSTEM_BD_FMT "SB%u"
+
+typedef enum {
+ ACPIDEV_INVALID_BOARD = 0,
+ ACPIDEV_CPU_BOARD,
+ ACPIDEV_MEMORY_BOARD,
+ ACPIDEV_IO_BOARD,
+ ACPIDEV_SYSTEM_BOARD
+} acpidev_board_type_t;
+
+/* Check whether the system is DR capable. */
+extern int acpidev_dr_capable(void);
+
+extern uint32_t acpidev_dr_max_boards(void);
+extern uint32_t acpidev_dr_max_mem_units_per_board(void);
+extern uint32_t acpidev_dr_max_io_units_per_board(void);
+extern uint32_t acpidev_dr_max_cmp_units_per_board(void);
+extern uint32_t acpidev_dr_max_cpu_units_per_cmp(void);
+extern uint32_t acpidev_dr_max_segments_per_mem_device(void);
+extern uint32_t acpidev_dr_max_memlists_per_segment(void);
+extern ACPI_STATUS acpidev_dr_get_mem_alignment(ACPI_HANDLE hdl, uint64_t *ap);
+
+/* Initialize support of DR operations. */
+extern void acpidev_dr_init(void);
+
+/* Scan for DR capable boards and setup environment for DR operations. */
+extern void acpidev_dr_check(acpidev_walk_info_t *infop);
+
+/*
+ * Initialize DR interfaces to enable DR operations.
+ */
+extern ACPI_STATUS acpidev_dr_initialize(dev_info_t *pdip);
+
+/* Get ACPI handle of the DR capable board. */
+extern ACPI_STATUS acpidev_dr_get_board_handle(uint_t board,
+ ACPI_HANDLE *hdlp);
+
+/* Get board type of the DR capable board. */
+extern acpidev_board_type_t acpidev_dr_get_board_type(ACPI_HANDLE hdl);
+
+/* Get board number of the DR capable board. */
+extern ACPI_STATUS acpidev_dr_get_board_number(ACPI_HANDLE hdl,
+ uint32_t *bnump);
+
+/* Get board name of the DR capable board. */
+extern ACPI_STATUS acpidev_dr_get_board_name(ACPI_HANDLE hdl,
+ char *buf, size_t len);
+
+/* Get attachment point of the DR capable board. */
+extern ACPI_STATUS acpidev_dr_get_attachment_point(ACPI_HANDLE hdl,
+ char *buf, size_t len);
+
+/*
+ * Figure out device type of the object/device.
+ * It only supports device types which may be involved in DR operations.
+ */
+extern acpidev_class_id_t acpidev_dr_device_get_class(ACPI_HANDLE hdl);
+
+/* Get memory device index/id. */
+extern ACPI_STATUS acpidev_dr_device_get_memory_index(ACPI_HANDLE hdl,
+ uint32_t *idxp);
+
+/* Check whether the device is a DR capable board or not. */
+extern int acpidev_dr_device_is_board(ACPI_HANDLE hdl);
+
+/* Check whether the device is present or not. */
+extern int acpidev_dr_device_is_present(ACPI_HANDLE hdl);
+
+/* Check whether the device is powered-on or not. */
+extern int acpidev_dr_device_is_powered(ACPI_HANDLE hdl);
+
+/* Check whether the device is DR capable. */
+extern int acpidev_dr_device_hotplug_capable(ACPI_HANDLE hdl);
+
+/* Check whether the device has an eject device list. */
+extern int acpidev_dr_device_has_edl(ACPI_HANDLE hdl);
+
+/*
+ * Simulate OBP property interfaces to support drmach driver,
+ * so we can keep drmach in consistency with SPARC version.
+ * Return size of data copied to buf if it's big enough,
+ * otherwise return size of buffer needed.
+ */
+extern int acpidev_dr_device_getprop(ACPI_HANDLE hdl, char *name,
+ caddr_t buf, size_t len);
+
+/*
+ * Get "reg" or "assigned-address" property of the device.
+ * Return "assigned-address" property if assigned is non-zero,
+ * otherwise return "reg" property.
+ * Caller needs to release returned resources by calling
+ * acpidev_dr_device_free_regspec().
+ */
+extern ACPI_STATUS acpidev_dr_device_get_regspec(ACPI_HANDLE hdl,
+ boolean_t assigned, acpidev_regspec_t **regpp, uint_t *cntp);
+
+/* Free resources returned by acpidev_dr_device_get_regspec(). */
+extern void acpidev_dr_device_free_regspec(acpidev_regspec_t *regp,
+ uint_t count);
+
+/* Walk devices in eject device list (ACPI _EDL method). */
+extern ACPI_STATUS acpidev_dr_device_walk_edl(ACPI_HANDLE hdl,
+ ACPI_WALK_CALLBACK cb, void *arg, void **retval);
+
+/* Walk devices in eject dependency list (ACPI _EJD method). */
+extern ACPI_STATUS acpidev_dr_device_walk_ejd(ACPI_HANDLE hdl,
+ ACPI_WALK_CALLBACK cb, void *arg, void **retval);
+
+/*
+ * Walk child and dependent devices which may be involved in DR operations.
+ * PCI host bridges embedded in physical processors may be presented in eject
+ * device list instead of as children of processors.
+ */
+extern ACPI_STATUS acpidev_dr_device_walk_device(ACPI_HANDLE hdl,
+ uint_t max_lvl, ACPI_WALK_CALLBACK cb, void *arg, void **retval);
+
+/* Check whether the device is in working state without any error. */
+extern ACPI_STATUS acpidev_dr_device_check_status(ACPI_HANDLE hdl);
+
+/* Power on the device. */
+extern ACPI_STATUS acpidev_dr_device_poweron(ACPI_HANDLE hdl);
+
+/* Power off the device. */
+extern ACPI_STATUS acpidev_dr_device_poweroff(ACPI_HANDLE hdl);
+
+/*
+ * Create device nodes for hot-added devices under hdl.
+ * Return:
+ * AE_OK: on success
+ * AE_SUPPORT: if it's not capable of DR operation.
+ * AE_ERROR: for other errors
+ */
+extern ACPI_STATUS acpidev_dr_device_insert(ACPI_HANDLE hdl);
+
+/*
+ * Destroy device nodes to be removed under hdl.
+ * AE_OK: on success
+ * AE_SUPPORT: if it's not capable of DR operation.
+ * AE_ERROR: for other errors
+ */
+extern ACPI_STATUS acpidev_dr_device_remove(ACPI_HANDLE hdl);
+
+/* Block dynamic reconfiguration operations. */
+extern void acpidev_dr_lock_all(void);
+
+/* Unblock dynamic reconfiguration operations. */
+extern void acpidev_dr_unlock_all(void);
+
+extern ACPI_STATUS acpidev_dr_allocate_cpuid(ACPI_HANDLE hdl,
+ processorid_t *idp);
+extern ACPI_STATUS acpidev_dr_free_cpuid(ACPI_HANDLE hdl);
+
+/*
+ * Query NUMA relative information for the CPU device.
+ * It returns APIC id, Proximity id and latency information of the CPU device.
+ * Latency information is retrieved from the ACPI _SLI method or the ACPI SLIT
+ * table.
+ */
+extern int acpidev_dr_get_cpu_numa_info(cpu_t *cp, void **hdlpp,
+ uint32_t *apicidp, uint32_t *pxmidp, uint32_t *slicntp, uchar_t **slipp);
+
+/*
+ * Release resources allocated by acpidev_dr_get_cpu_numa_info().
+ */
+extern void acpidev_dr_free_cpu_numa_info(void *hdlp);
+
+/*
+ * Query NUMA relative information for a memory device.
+ * It returns proximity id and latency information of the memory device.
+ * Latency information is obtained from the ACPI _SLI method or the ACPI
+ * SLIT table.
+ */
+extern ACPI_STATUS acpidev_dr_get_mem_numa_info(ACPI_HANDLE hdl,
+ struct memlist *ml, void **hdlpp, uint32_t *pxmidp,
+ uint32_t *slicntp, uchar_t **slipp);
+
+/*
+ * Release resources allocated by acpidev_dr_get_mem_numa_info().
+ */
+extern void acpidev_dr_free_mem_numa_info(void *hdlp);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_ACPIDEV_DR_H */
diff --git a/usr/src/uts/i86pc/sys/acpidev_impl.h b/usr/src/uts/i86pc/sys/acpidev_impl.h
index 666ab5578d..c01ea9f5cb 100644
--- a/usr/src/uts/i86pc/sys/acpidev_impl.h
+++ b/usr/src/uts/i86pc/sys/acpidev_impl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -27,10 +27,13 @@
#define _SYS_ACPIDEV_IMPL_H
#include <sys/types.h>
#include <sys/cmn_err.h>
+#include <sys/bitmap.h>
+#include <sys/synch.h>
#include <sys/sunddi.h>
#include <sys/acpi/acpi.h>
#include <sys/acpica.h>
#include <sys/acpidev.h>
+#include <sys/acpidev_dr.h>
#ifdef __cplusplus
extern "C" {
@@ -54,12 +57,52 @@ struct acpidev_data_impl {
dev_info_t *aod_dip;
acpidev_class_t *aod_class;
acpidev_class_list_t **aod_class_list;
+ acpidev_board_type_t aod_bdtype; /* Type of board. */
+ uint32_t aod_bdnum; /* Board # for DR. */
+ uint32_t aod_portid; /* Port id for DR. */
+ uint32_t aod_bdidx; /* Index # of AP */
+ volatile uint32_t aod_chidx; /* Index # of child */
+ uint32_t aod_memidx; /* Index # of memory */
+ acpidev_class_id_t aod_class_id; /* Dev type for DR. */
};
#define ACPIDEV_ODF_STATUS_VALID 0x1
#define ACPIDEV_ODF_DEVINFO_CREATED 0x2
#define ACPIDEV_ODF_DEVINFO_TAGGED 0x4
-#define ACPIDEV_ODF_DEVINFO_OFFLINE 0x8
+#define ACPIDEV_ODF_HOTPLUG_CAPABLE 0x100
+#define ACPIDEV_ODF_HOTPLUG_READY 0x200
+#define ACPIDEV_ODF_HOTPLUG_FAILED 0x400
+
+#define ACPIDEV_DR_IS_BOARD(hdl) \
+ ((hdl)->aod_iflag & ACPIDEV_ODF_HOTPLUG_CAPABLE)
+
+#define ACPIDEV_DR_SET_BOARD(hdl) \
+ (hdl)->aod_iflag |= ACPIDEV_ODF_HOTPLUG_CAPABLE
+
+#define ACPIDEV_DR_IS_READY(hdl) \
+ ((hdl)->aod_iflag & ACPIDEV_ODF_HOTPLUG_READY)
+
+#define ACPIDEV_DR_SET_READY(hdl) \
+ (hdl)->aod_iflag |= ACPIDEV_ODF_HOTPLUG_READY
+
+#define ACPIDEV_DR_IS_FAILED(hdl) \
+ ((hdl)->aod_iflag & ACPIDEV_ODF_HOTPLUG_FAILED)
+
+#define ACPIDEV_DR_SET_FAILED(hdl) \
+ (hdl)->aod_iflag |= ACPIDEV_ODF_HOTPLUG_FAILED
+
+#define ACPIDEV_DR_IS_WORKING(hdl) \
+ (((hdl)->aod_iflag & (ACPIDEV_ODF_HOTPLUG_READY | \
+ ACPIDEV_ODF_HOTPLUG_FAILED)) == ACPIDEV_ODF_HOTPLUG_READY)
+
+#define ACPIDEV_DR_IS_PROCESSED(hdl) \
+ ((hdl)->aod_iflag & (ACPIDEV_ODF_HOTPLUG_READY | \
+ ACPIDEV_ODF_HOTPLUG_FAILED | ACPIDEV_ODF_HOTPLUG_CAPABLE))
+
+#define ACPIDEV_DR_BOARD_READY(hdl) \
+ (((hdl)->aod_iflag & \
+ (ACPIDEV_ODF_HOTPLUG_READY | ACPIDEV_ODF_HOTPLUG_CAPABLE)) == \
+ (ACPIDEV_ODF_HOTPLUG_READY | ACPIDEV_ODF_HOTPLUG_CAPABLE))
/*
* List of registered device class drivers.
@@ -83,6 +126,23 @@ typedef struct acpidev_pseudo_uid_head {
acpidev_pseudo_uid_t *apuh_first;
} acpidev_pseudo_uid_head_t;
+typedef struct acpidev_dr_capacity {
+ uint_t cpu_vendor;
+ uint_t cpu_family;
+ uint_t cpu_model_min;
+ uint_t cpu_model_max;
+ uint_t cpu_step_min;
+ uint_t cpu_step_max;
+ boolean_t hotplug_supported;
+ uint64_t memory_alignment;
+} acpidev_dr_capacity_t;
+
+extern int acpidev_dr_enable;
+extern krwlock_t acpidev_class_lock;
+extern ulong_t acpidev_object_type_mask[BT_BITOUL(ACPI_TYPE_NS_NODE_MAX + 1)];
+extern ACPI_TABLE_SRAT *acpidev_srat_tbl_ptr;
+extern ACPI_TABLE_SLIT *acpidev_slit_tbl_ptr;
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/i86pc/sys/acpinex.h b/usr/src/uts/i86pc/sys/acpinex.h
index be21c8e0e2..4aa6f19636 100644
--- a/usr/src/uts/i86pc/sys/acpinex.h
+++ b/usr/src/uts/i86pc/sys/acpinex.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -62,6 +62,10 @@ typedef struct {
char ans_path[MAXPATHLEN];
} acpinex_softstate_t;
+extern void acpinex_event_init(void);
+extern void acpinex_event_fini(void);
+extern int acpinex_event_scan(acpinex_softstate_t *, boolean_t);
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/i86pc/sys/apic.h b/usr/src/uts/i86pc/sys/apic.h
index d0aea5d877..48500c4396 100644
--- a/usr/src/uts/i86pc/sys/apic.h
+++ b/usr/src/uts/i86pc/sys/apic.h
@@ -502,7 +502,7 @@ typedef struct apic_irq {
* adjust APIC_PADSZ as we add/modify any member of apic_cpus_info. We also
* don't want the compiler to optimize apic_cpus_info.
*/
-#define APIC_PADSZ 19
+#define APIC_PADSZ 15
#pragma pack(1)
typedef struct apic_cpus_info {
@@ -517,6 +517,7 @@ typedef struct apic_cpus_info {
uchar_t aci_current[MAXIPL]; /* Current IRQ at each IPL */
uint32_t aci_bound; /* # of user requested binds ? */
uint32_t aci_temp_bound; /* # of non user IRQ binds */
+ uint32_t aci_processor_id; /* Only used in ACPI mode. */
uchar_t aci_idle; /* The CPU is idle */
/*
* Fill to make sure each struct is in separate 64-byte cache line.
@@ -527,6 +528,8 @@ typedef struct apic_cpus_info {
#define APIC_CPU_ONLINE 1
#define APIC_CPU_INTR_ENABLE 2
+#define APIC_CPU_FREE 4 /* APIC CPU slot is free */
+#define APIC_CPU_DIRTY 8 /* Slot was once used */
/*
* APIC ops to support various flavors of APIC like APIC and x2APIC.
@@ -784,6 +787,7 @@ extern int apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
psm_intr_op_t intr_op, int *result);
extern int apic_state(psm_state_request_t *);
extern boolean_t apic_cpu_in_range(int cpu);
+extern processorid_t apic_find_next_cpu_intr(void);
extern int apic_check_msi_support();
extern apic_irq_t *apic_find_irq(dev_info_t *dip, struct intrspec *ispec,
int type);
@@ -849,6 +853,7 @@ extern int apic_diff_for_redistribution;
extern int apic_poweroff_method;
extern int apic_enable_acpi;
extern int apic_nproc;
+extern int apic_max_nproc;
extern int apic_next_bind_cpu;
extern int apic_redistribute_sample_interval;
extern int apic_multi_msi_enable;
diff --git a/usr/src/uts/i86pc/sys/dr.h b/usr/src/uts/i86pc/sys/dr.h
new file mode 100644
index 0000000000..492eaeaae9
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/dr.h
@@ -0,0 +1,500 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_DR_H
+#define _SYS_DR_H
+#include <sys/types.h>
+#include <sys/note.h>
+#include <sys/processor.h>
+#include <sys/obpdefs.h>
+#include <sys/memlist.h>
+#include <sys/mem_config.h>
+#include <sys/param.h> /* for MAXPATHLEN */
+#include <sys/varargs.h>
+#include <sys/sbd_ioctl.h>
+#include <sys/dr_util.h>
+#include <sys/drmach.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * helper macros for constructing and reporting internal error messages.
+ * NOTE: each module which uses one or more this these macros is expected
+ * to supply a char *dr_ie_fmt string containing the SCCS filename
+ * expansion macro (percent M percent) and a sprintf %d to render the
+ * line number argument.
+ */
+#define DR_INTERNAL_ERROR(hp) \
+ drerr_new(1, ESBD_INTERNAL, dr_ie_fmt, __LINE__)
+
+#define DR_OP_INTERNAL_ERROR(hp) \
+ drerr_set_c(CE_WARN, &(hp)->h_err, \
+ ESBD_INTERNAL, dr_ie_fmt, __LINE__)
+
+#define DR_DEV_INTERNAL_ERROR(cp) \
+ drerr_set_c(CE_WARN, &(cp)->sbdev_error, \
+ ESBD_INTERNAL, dr_ie_fmt, __LINE__)
+
+/*
+ * Macros for keeping an error code and an associated list of integers.
+ */
+#define DR_MAX_ERR_INT (32)
+#define DR_GET_E_CODE(sep) ((sep)->e_code)
+#define DR_SET_E_CODE(sep, en) ((sep)->e_code = (en))
+#define DR_GET_E_RSC(sep) ((sep)->e_rsc)
+
+/* Number of device node types. */
+#define DR_MAXNUM_NT 3
+
+/* used to map sbd_comp_type_t to array index */
+#define DEVSET_NIX(t) \
+ (((t) == SBD_COMP_CPU) ? 0 : \
+ ((t) == SBD_COMP_MEM) ? 1 : \
+ ((t) == SBD_COMP_IO) ? 2 : \
+ ((t) == SBD_COMP_CMP) ? 0 : DR_MAXNUM_NT)
+
+/*
+ * Format of dr_devset_t bit masks:
+ *
+ * 64 56 48 40 32 24 16 8 0
+ * |....|IIII|IIII|IIII|IIII|MMMM|MMMM|CCCC|CCCC|CCCC|CCCC|CCCC|CCCC|CCCC|CCCC|
+ *
+ * 1 = indicates respective component present/attached.
+ * I = I/O, M = Memory, C = CPU.
+ */
+#define DEVSET_CPU_OFFSET 0
+#define DEVSET_CPU_NUMBER 32
+#define DEVSET_MEM_OFFSET (DEVSET_CPU_OFFSET + DEVSET_CPU_NUMBER)
+#define DEVSET_MEM_NUMBER 8
+#define DEVSET_IO_OFFSET (DEVSET_MEM_OFFSET + DEVSET_MEM_NUMBER)
+#define DEVSET_IO_NUMBER 16
+#define DEVSET_MAX_BITS (DEVSET_IO_OFFSET + DEVSET_IO_NUMBER)
+
+#define DEVSET_BIX(t) \
+ (((t) == SBD_COMP_CPU) ? DEVSET_CPU_OFFSET : \
+ ((t) == SBD_COMP_MEM) ? DEVSET_MEM_OFFSET : \
+ ((t) == SBD_COMP_IO) ? DEVSET_IO_OFFSET : \
+ ((t) == SBD_COMP_CMP) ? DEVSET_CPU_OFFSET : 0)
+
+#define DEVSET_NT2DEVPOS(t, u) (((t) == SBD_COMP_CMP) ?\
+ (DEVSET_BIX(t) + (u) * MAX_CORES_PER_CMP) : DEVSET_BIX(t) + (u))
+
+#if (DEVSET_MAX_BITS <= 64)
+typedef uint64_t dr_devset_t;
+
+#define DEVSET_ONEUNIT ((dr_devset_t)1)
+#define DEVSET_ANYUNIT ((dr_devset_t)(-1))
+#define DEVSET_CPU_NMASK ((dr_devset_t)((1ULL << DEVSET_CPU_NUMBER) - 1))
+#define DEVSET_MEM_NMASK ((dr_devset_t)((1ULL << DEVSET_MEM_NUMBER) - 1))
+#define DEVSET_IO_NMASK ((dr_devset_t)((1ULL << DEVSET_IO_NUMBER) - 1))
+#define DEVSET_CMP_NMASK ((dr_devset_t)((1ULL << MAX_CORES_PER_CMP) - 1))
+
+#define DEVSET_NMASK(t) \
+ (((t) == SBD_COMP_CPU) ? DEVSET_CPU_NMASK : \
+ ((t) == SBD_COMP_MEM) ? DEVSET_MEM_NMASK : \
+ ((t) == SBD_COMP_IO) ? DEVSET_IO_NMASK : \
+ ((t) == SBD_COMP_CMP) ? DEVSET_CPU_NMASK : 0)
+
+#define DEVSET_MASK \
+ ((DEVSET_CPU_NMASK << DEVSET_CPU_OFFSET) | \
+ (DEVSET_MEM_NMASK << DEVSET_MEM_OFFSET) | \
+ (DEVSET_IO_NMASK << DEVSET_IO_OFFSET))
+
+#define DEVSET(t, u) \
+ (((u) == DEVSET_ANYUNIT) ? \
+ ((DEVSET_NMASK(t) << DEVSET_NT2DEVPOS((t), 0)) & \
+ DEVSET_MASK) : \
+ ((t) == SBD_COMP_CMP) ? \
+ (DEVSET_CMP_NMASK << DEVSET_NT2DEVPOS((t), (u))) : \
+ (DEVSET_ONEUNIT << DEVSET_NT2DEVPOS((t), (u))))
+
+#define DEVSET_IS_NULL(ds) ((ds) == 0)
+#define DEVSET_IN_SET(ds, t, u) (((ds) & DEVSET((t), (u))) != 0)
+#define DEVSET_ADD(ds, t, u) ((ds) |= DEVSET((t), (u)))
+#define DEVSET_DEL(ds, t, u) ((ds) &= ~DEVSET((t), (u)))
+#define DEVSET_AND(ds1, ds2) ((ds1) & (ds2))
+#define DEVSET_OR(ds1, ds2) ((ds1) | (ds2))
+#define DEVSET_NAND(ds1, ds2) ((ds1) & ~(ds2))
+#define DEVSET_GET_UNITSET(ds, t) \
+ (((ds) & DEVSET((t), DEVSET_ANYUNIT)) >> DEVSET_NT2DEVPOS((t), 0))
+#define DEVSET_FMT_STR "0x%" PRIx64 ""
+#define DEVSET_FMT_ARG(ds) (ds)
+#else /* DEVSET_MAX_BITS <= 64 */
+#error please implement devset with bitmap to support more 64 devices
+#endif /* DEVSET_MAX_BITS <= 64 */
+
+/*
+ * Ops for dr_board_t.b_dev_*
+ */
+#define DR_DEV_IS(ds, cp) DEVSET_IN_SET( \
+ (cp)->sbdev_bp->b_dev_##ds, \
+ (cp)->sbdev_type, \
+ (cp)->sbdev_unum)
+
+#define DR_DEV_ADD(ds, cp) DEVSET_ADD( \
+ (cp)->sbdev_bp->b_dev_##ds, \
+ (cp)->sbdev_type, \
+ (cp)->sbdev_unum)
+
+#define DR_DEV_DEL(ds, cp) DEVSET_DEL( \
+ (cp)->sbdev_bp->b_dev_##ds, \
+ (cp)->sbdev_type, \
+ (cp)->sbdev_unum)
+
+/*
+ * Ops for dr_board_t.b_dev_present
+ */
+#define DR_DEV_IS_PRESENT(cp) DR_DEV_IS(present, cp)
+#define DR_DEV_SET_PRESENT(cp) DR_DEV_ADD(present, cp)
+#define DR_DEV_CLR_PRESENT(cp) DR_DEV_DEL(present, cp)
+
+/*
+ * Ops for dr_board_t.b_dev_attached
+ */
+#define DR_DEV_IS_ATTACHED(cp) DR_DEV_IS(attached, cp)
+#define DR_DEV_SET_ATTACHED(cp) DR_DEV_ADD(attached, cp)
+#define DR_DEV_CLR_ATTACHED(cp) DR_DEV_DEL(attached, cp)
+
+/*
+ * Ops for dr_board_t.b_dev_released
+ */
+#define DR_DEV_IS_RELEASED(cp) DR_DEV_IS(released, cp)
+#define DR_DEV_SET_RELEASED(cp) DR_DEV_ADD(released, cp)
+#define DR_DEV_CLR_RELEASED(cp) DR_DEV_DEL(released, cp)
+
+/*
+ * Ops for dr_board_t.b_dev_unreferenced
+ */
+#define DR_DEV_IS_UNREFERENCED(cp) DR_DEV_IS(unreferenced, cp)
+#define DR_DEV_SET_UNREFERENCED(cp) DR_DEV_ADD(unreferenced, cp)
+#define DR_DEV_CLR_UNREFERENCED(cp) DR_DEV_DEL(unreferenced, cp)
+
+#define DR_DEVS_PRESENT(bp) \
+ ((bp)->b_dev_present)
+#define DR_DEVS_ATTACHED(bp) \
+ ((bp)->b_dev_attached)
+#define DR_DEVS_RELEASED(bp) \
+ ((bp)->b_dev_released)
+#define DR_DEVS_UNREFERENCED(bp) \
+ ((bp)->b_dev_unreferenced)
+#define DR_DEVS_UNATTACHED(bp) \
+ ((bp)->b_dev_present & ~(bp)->b_dev_attached)
+#define DR_DEVS_CONFIGURE(bp, devs) \
+ ((bp)->b_dev_attached = (devs))
+#define DR_DEVS_DISCONNECT(bp, devs) \
+ ((bp)->b_dev_present &= ~(devs))
+#define DR_DEVS_CANCEL(bp, devs) \
+ ((bp)->b_dev_released &= ~(devs), \
+ (bp)->b_dev_unreferenced &= ~(devs))
+
+/*
+ * CMP Specific Helpers
+ */
+#define DR_CMP_CORE_UNUM(cmp, core) ((cmp) * MAX_CORES_PER_CMP + (core))
+
+/*
+ * For CPU and CMP devices, DR_UNUM2SBD_UNUM is used to extract the physical
+ * CPU/CMP id from the device id.
+ */
+#define DR_UNUM2SBD_UNUM(n, d) \
+ ((d) == SBD_COMP_CPU ? ((n) / MAX_CORES_PER_CMP) : \
+ (d) == SBD_COMP_CMP ? ((n) / MAX_CORES_PER_CMP) : (n))
+
+/*
+ * Some stuff to assist in debug.
+ */
+#ifdef DEBUG
+#define DRDBG_STATE 0x00000001
+#define DRDBG_QR 0x00000002
+#define DRDBG_CPU 0x00000004
+#define DRDBG_MEM 0x00000008
+#define DRDBG_IO 0x00000010
+
+#define PR_ALL if (dr_debug) printf
+#define PR_STATE if (dr_debug & DRDBG_STATE) printf
+#define PR_QR if (dr_debug & DRDBG_QR) prom_printf
+#define PR_CPU if (dr_debug & DRDBG_CPU) printf
+#define PR_MEM if (dr_debug & DRDBG_MEM) printf
+#define PR_IO if (dr_debug & DRDBG_IO) printf
+#define PR_MEMLIST_DUMP if (dr_debug & DRDBG_MEM) MEMLIST_DUMP
+
+extern uint_t dr_debug;
+#else /* DEBUG */
+#define PR_ALL _NOTE(CONSTANTCONDITION) if (0) printf
+#define PR_STATE PR_ALL
+#define PR_QR PR_ALL
+#define PR_CPU PR_ALL
+#define PR_MEM PR_ALL
+#define PR_IO PR_ALL
+#define PR_MEMLIST_DUMP _NOTE(CONSTANTCONDITION) if (0) MEMLIST_DUMP
+
+#endif /* DEBUG */
+
+/*
+ * dr_board_t b_sflags.
+ */
+#define DR_BSLOCK 0x01 /* for blocking status (protected by b_slock) */
+
+typedef const char *fn_t;
+
+/*
+ * Unsafe devices based on dr.conf prop "unsupported-io-drivers"
+ */
+typedef struct {
+ char **devnames;
+ uint_t ndevs;
+} dr_unsafe_devs_t;
+
+/*
+ * Device states.
+ * PARTIAL state is really only relevant for board state.
+ */
+typedef enum {
+ DR_STATE_EMPTY = 0,
+ DR_STATE_OCCUPIED,
+ DR_STATE_CONNECTED,
+ DR_STATE_UNCONFIGURED,
+ DR_STATE_PARTIAL, /* part connected, part configured */
+ DR_STATE_CONFIGURED,
+ DR_STATE_RELEASE,
+ DR_STATE_UNREFERENCED,
+ DR_STATE_FATAL,
+ DR_STATE_MAX
+} dr_state_t;
+
+typedef struct dr_handle {
+ struct dr_board *h_bd;
+ sbd_error_t *h_err;
+ int h_op_intr; /* nz if op interrupted */
+ dev_t h_dev; /* dev_t of opened device */
+ int h_cmd; /* PIM ioctl argument */
+ int h_mode; /* device open mode */
+ sbd_cmd_t h_sbdcmd; /* copied-in ioctl cmd struct */
+ sbd_ioctl_arg_t *h_iap; /* ptr to caller-space cmd struct */
+ dr_devset_t h_devset; /* based on h_dev */
+ uint_t h_ndi;
+ drmach_opts_t h_opts; /* command-line platform options */
+} dr_handle_t;
+
+typedef struct dr_common_unit {
+ dr_state_t sbdev_state;
+ sbd_state_t sbdev_ostate;
+ sbd_cond_t sbdev_cond;
+ time_t sbdev_time;
+ int sbdev_busy;
+ struct dr_board *sbdev_bp;
+ int sbdev_unum;
+ sbd_comp_type_t sbdev_type;
+ drmachid_t sbdev_id;
+ char sbdev_path[MAXNAMELEN];
+ sbd_error_t *sbdev_error;
+} dr_common_unit_t;
+
+typedef struct dr_mem_unit {
+ dr_common_unit_t sbm_cm; /* mem-unit state */
+ uint_t sbm_flags;
+ pfn_t sbm_basepfn;
+ pgcnt_t sbm_npages;
+ pgcnt_t sbm_pageslost;
+ struct memlist *sbm_dyn_segs; /* kphysm_add_dynamic segs */
+ /*
+ * The following fields are used during
+ * the memory detach process only. sbm_mlist
+ * will be used to store the board memlist
+ * following a detach. The memlist will be
+ * used to re-attach the board when configuring
+ * the unit directly after an unconfigure.
+ */
+ struct dr_mem_unit *sbm_peer;
+ struct memlist *sbm_mlist;
+ struct memlist *sbm_del_mlist;
+ memhandle_t sbm_memhandle;
+ uint64_t sbm_alignment_mask;
+ uint64_t sbm_slice_base;
+ uint64_t sbm_slice_top;
+ uint64_t sbm_slice_size;
+} dr_mem_unit_t;
+
+/*
+ * Currently only maintain state information for individual
+ * components.
+ */
+typedef struct dr_cpu_unit {
+ dr_common_unit_t sbc_cm; /* cpu-unit state */
+ processorid_t sbc_cpu_id;
+ cpu_flag_t sbc_cpu_flags; /* snapshot of CPU flags */
+ ushort_t sbc_pad1; /* padded for compatibility */
+ int sbc_speed;
+ int sbc_ecache;
+ int sbc_cpu_impl;
+} dr_cpu_unit_t;
+
+typedef struct dr_io_unit {
+ dr_common_unit_t sbi_cm; /* io-unit state */
+} dr_io_unit_t;
+
+typedef union {
+ dr_common_unit_t du_common;
+ dr_mem_unit_t du_mem;
+ dr_cpu_unit_t du_cpu;
+ dr_io_unit_t du_io;
+} dr_dev_unit_t;
+
+typedef struct dr_board {
+ kmutex_t b_lock; /* lock for this board struct */
+ kmutex_t b_slock; /* lock for status on the board */
+ kcondvar_t b_scv; /* condvar for status on the board */
+ int b_sflags; /* for serializing status */
+ sbd_state_t b_rstate; /* board's cfgadm receptacle state */
+ sbd_state_t b_ostate; /* board's cfgadm occupant state */
+ sbd_cond_t b_cond; /* cfgadm condition */
+ int b_busy;
+ int b_assigned;
+ time_t b_time; /* time of last board operation */
+ char b_type[MAXNAMELEN];
+ drmachid_t b_id;
+ int b_num; /* board number */
+ int b_ndev; /* # of devices on board */
+ dev_info_t *b_dip; /* dip for make-nodes */
+ dr_state_t b_state; /* board DR state */
+ dr_devset_t b_dev_present; /* present mask */
+ dr_devset_t b_dev_attached; /* attached mask */
+ dr_devset_t b_dev_released; /* released mask */
+ dr_devset_t b_dev_unreferenced; /* unreferenced mask */
+ char b_path[MAXNAMELEN];
+ dr_dev_unit_t *b_dev[DR_MAXNUM_NT];
+} dr_board_t;
+
+/*
+ * dr_quiesce.c interfaces
+ */
+struct dr_sr_handle;
+typedef struct dr_sr_handle dr_sr_handle_t;
+
+extern dr_sr_handle_t *dr_get_sr_handle(dr_handle_t *handle);
+extern void dr_release_sr_handle(dr_sr_handle_t *srh);
+extern int dr_suspend(dr_sr_handle_t *srh);
+extern void dr_resume(dr_sr_handle_t *srh);
+extern void dr_check_devices(dev_info_t *dip, int *refcount,
+ dr_handle_t *handle, uint64_t *arr, int *idx,
+ int len, int *refcount_non_gldv3);
+extern int dr_pt_test_suspend(dr_handle_t *hp);
+
+/*
+ * dr_cpu.c interface
+ */
+extern void dr_init_cpu_unit(dr_cpu_unit_t *cp);
+extern int dr_pre_attach_cpu(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_attach_cpu(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int dr_post_attach_cpu(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_pre_release_cpu(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_pre_detach_cpu(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_detach_cpu(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int dr_post_detach_cpu(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_cpu_status(dr_handle_t *hp, dr_devset_t devset,
+ sbd_dev_stat_t *dsp);
+extern int dr_cancel_cpu(dr_cpu_unit_t *cp);
+extern int dr_disconnect_cpu(dr_cpu_unit_t *cp);
+
+
+/*
+ * dr_mem.c interface
+ */
+extern void dr_init_mem_unit(dr_mem_unit_t *mp);
+extern int dr_pre_attach_mem(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int dr_post_attach_mem(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_pre_release_mem(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_release_mem(dr_common_unit_t *cp);
+extern void dr_release_mem_done(dr_common_unit_t *cp);
+extern int dr_pre_detach_mem(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_detach_mem(dr_handle_t *, dr_common_unit_t *);
+extern int dr_post_detach_mem(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_mem_status(dr_handle_t *hp, dr_devset_t devset,
+ sbd_dev_stat_t *dsp);
+extern int dr_cancel_mem(dr_mem_unit_t *mp);
+extern int dr_disconnect_mem(dr_mem_unit_t *mp);
+
+/*
+ * dr_io.c interface
+ */
+extern void dr_init_io_unit(dr_io_unit_t *io);
+extern int dr_pre_attach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_attach_io(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int dr_post_attach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_pre_release_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_pre_detach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern void dr_detach_io(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int dr_post_detach_io(dr_handle_t *hp,
+ dr_common_unit_t **devlist, int devnum);
+extern int dr_io_status(dr_handle_t *hp, dr_devset_t devset,
+ sbd_dev_stat_t *dsp);
+extern int dr_disconnect_io(dr_io_unit_t *ip);
+
+
+/*
+ * dr.c interface
+ */
+extern void dr_op_err(int ce, dr_handle_t *hp, int code, char *fmt, ...);
+extern void dr_dev_err(int ce, dr_common_unit_t *cp, int code);
+
+extern dr_cpu_unit_t *dr_get_cpu_unit(dr_board_t *bp, int unit_num);
+extern dr_mem_unit_t *dr_get_mem_unit(dr_board_t *bp, int unit_num);
+extern dr_io_unit_t *dr_get_io_unit(dr_board_t *bp, int unit_num);
+
+extern dr_board_t *dr_lookup_board(int board_num);
+extern int dr_release_dev_done(dr_common_unit_t *cp);
+extern char *dr_nt_to_dev_type(int type);
+extern void dr_device_transition(dr_common_unit_t *cp,
+ dr_state_t new_state);
+extern void dr_lock_status(dr_board_t *bp);
+extern void dr_unlock_status(dr_board_t *bp);
+extern int dr_cmd_flags(dr_handle_t *hp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DR_H */
diff --git a/usr/src/uts/i86pc/sys/dr_util.h b/usr/src/uts/i86pc/sys/dr_util.h
new file mode 100644
index 0000000000..46a66c759b
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/dr_util.h
@@ -0,0 +1,99 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_DR_UTIL_H_
+#define _SYS_DR_UTIL_H_
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/memlist.h>
+#include <sys/varargs.h>
+#include <sys/sbd_ioctl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define GETSTRUCT(t, n) \
+ ((t *)kmem_zalloc((size_t)(n) * sizeof (t), KM_SLEEP))
+#define FREESTRUCT(p, t, n) \
+ (kmem_free((caddr_t)(p), sizeof (t) * (size_t)(n)))
+
+#define DRERR_SET_C(epps, eppn) \
+ if (*(epps) == NULL) \
+ *(epps) = *(eppn); \
+ else \
+ sbd_err_clear(eppn)
+
+#ifdef DEBUG
+#define MEMLIST_DUMP(ml) memlist_dump(ml)
+#else
+#define MEMLIST_DUMP(ml)
+#endif
+
+extern sbd_error_t *sbd_err_new(int e_code, char *fmt, va_list args);
+extern void sbd_err_log(sbd_error_t *ep, int ce);
+extern void sbd_err_clear(sbd_error_t **ep);
+extern void sbd_err_set_c(sbd_error_t **ep, int ce,
+ int e_code, char *fmt, ...);
+extern void sbd_err_set(sbd_error_t **ep, int ce,
+ int e_code, char *fmt, ...);
+
+extern sbd_error_t *drerr_new(int log, int e_code, char *fmt, ...);
+extern sbd_error_t *drerr_new_v(int e_code, char *fmt, va_list args);
+extern void drerr_set_c(int log, sbd_error_t **ep,
+ int e_code, char *fmt, ...);
+
+extern void dr_memlist_delete(struct memlist *mlist);
+extern void memlist_dump(struct memlist *mlist);
+extern int dr_memlist_intersect(struct memlist *al,
+ struct memlist *bl);
+extern void dr_memlist_coalesce(struct memlist *mlist);
+extern struct memlist *dr_memlist_dup(struct memlist *mlist);
+extern struct memlist *dr_memlist_add_span(struct memlist *mlist,
+ uint64_t base, uint64_t len);
+extern struct memlist *dr_memlist_del_span(struct memlist *mlist,
+ uint64_t base, uint64_t len);
+extern struct memlist *dr_memlist_cat_span(struct memlist *mlist,
+ uint64_t base, uint64_t len);
+
+/*
+ * These are all utilities internal for DR. There are
+ * similar functions in common/os which have similar names.
+ * We rename them to make sure there is no name space
+ * conflict.
+ */
+#define memlist_delete dr_memlist_delete
+#define memlist_intersect dr_memlist_intersect
+#define memlist_coalesce dr_memlist_coalesce
+#define memlist_dup dr_memlist_dup
+#define memlist_add_span dr_memlist_add_span
+#define memlist_del_span dr_memlist_del_span
+#define memlist_cat_span dr_memlist_cat_span
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DR_UTIL_H_ */
diff --git a/usr/src/uts/i86pc/sys/drmach.h b/usr/src/uts/i86pc/sys/drmach.h
new file mode 100644
index 0000000000..2d2a2cff7b
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/drmach.h
@@ -0,0 +1,177 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_DRMACH_H_
+#define _SYS_DRMACH_H_
+#include <sys/types.h>
+#include <sys/memlist.h>
+#include <sys/ddi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/sysevent.h>
+#include <sys/x86_archext.h>
+#include <sys/sbd_ioctl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_BOARDS drmach_max_boards()
+#define MAX_MEM_UNITS_PER_BOARD drmach_max_mem_units_per_board()
+#define MAX_IO_UNITS_PER_BOARD drmach_max_io_units_per_board()
+#define MAX_CMP_UNITS_PER_BOARD drmach_max_cmp_units_per_board()
+
+/* DR uses MAX_CORES_PER_CMP as number of logical CPUs within a CMP. */
+#define MAX_CORES_PER_CMP drmach_max_core_per_cmp()
+
+/* Maximum possible logical CPUs per board. */
+#define MAX_CPU_UNITS_PER_BOARD (MAX_CMP_UNITS_PER_BOARD * MAX_CORES_PER_CMP)
+
+/* Check whether CPU is CMP. True if chip has more than one core/thread. */
+#define CPU_IMPL_IS_CMP(impl) (MAX_CORES_PER_CMP > 1)
+
+/* CPU implementation ID for Intel Nehalem CPU. */
+#define X86_CPU_IMPL_NEHALEM_EX 0x062E0000
+#define X86_CPU_IMPL_UNKNOWN 0x00000000
+
+/* returned with drmach_board_find_devices callback */
+#define DRMACH_DEVTYPE_CPU "cpu"
+#define DRMACH_DEVTYPE_MEM "memory"
+#define DRMACH_DEVTYPE_PCI "pci"
+
+/*
+ * x86 platform specific routines currently only defined
+ * in drmach_acpi.c and referenced by DR.
+ */
+
+typedef void *drmachid_t;
+
+typedef struct {
+ boolean_t assigned;
+ boolean_t powered;
+ boolean_t configured;
+ boolean_t busy;
+ boolean_t empty;
+ sbd_cond_t cond;
+ char type[SBD_TYPE_LEN];
+ char info[SBD_MAX_INFO];
+} drmach_status_t;
+
+typedef struct {
+ int size;
+ char *copts;
+} drmach_opts_t;
+
+typedef struct {
+ uint64_t mi_basepa;
+ uint64_t mi_size;
+ uint64_t mi_slice_base;
+ uint64_t mi_slice_top;
+ uint64_t mi_slice_size;
+ uint64_t mi_alignment_mask;
+} drmach_mem_info_t;
+
+extern uint_t drmach_max_boards(void);
+extern uint_t drmach_max_io_units_per_board(void);
+extern uint_t drmach_max_cmp_units_per_board(void);
+extern uint_t drmach_max_mem_units_per_board(void);
+extern uint_t drmach_max_core_per_cmp(void);
+
+extern sbd_error_t *drmach_get_dip(drmachid_t id, dev_info_t **dip);
+extern sbd_error_t *drmach_release(drmachid_t id);
+extern sbd_error_t *drmach_pre_op(int cmd, drmachid_t id,
+ drmach_opts_t *opts, void *devsetp);
+extern sbd_error_t *drmach_post_op(int cmd, drmachid_t id,
+ drmach_opts_t *opts, int rv);
+extern sbd_error_t *drmach_configure(drmachid_t id, int flags);
+extern sbd_error_t *drmach_unconfigure(drmachid_t id, int flags);
+extern sbd_error_t *drmach_status(drmachid_t id, drmach_status_t *stat);
+extern sbd_error_t *drmach_passthru(drmachid_t id,
+ drmach_opts_t *opts);
+
+extern sbd_error_t *drmach_board_find_devices(drmachid_t id, void *a,
+ sbd_error_t *(*found)(void *a, const char *, int, drmachid_t));
+extern int drmach_board_lookup(int bnum, drmachid_t *id);
+extern sbd_error_t *drmach_board_name(int bnum, char *buf, int buflen);
+extern sbd_error_t *drmach_board_assign(int bnum, drmachid_t *id);
+extern sbd_error_t *drmach_board_unassign(drmachid_t id);
+extern sbd_error_t *drmach_board_poweroff(drmachid_t id);
+extern sbd_error_t *drmach_board_poweron(drmachid_t id);
+extern sbd_error_t *drmach_board_test(drmachid_t id, drmach_opts_t *opts,
+ int force);
+extern sbd_error_t *drmach_board_connect(drmachid_t id,
+ drmach_opts_t *opts);
+extern sbd_error_t *drmach_board_disconnect(drmachid_t id,
+ drmach_opts_t *opts);
+extern sbd_error_t *drmach_board_deprobe(drmachid_t id);
+extern int drmach_board_is_floating(drmachid_t);
+
+extern sbd_error_t *drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid);
+extern sbd_error_t *drmach_cpu_get_impl(drmachid_t id, int *ip);
+extern sbd_error_t *drmach_cpu_disconnect(drmachid_t id);
+
+extern sbd_error_t *drmach_io_is_attached(drmachid_t id, int *yes);
+extern sbd_error_t *drmach_io_post_attach(drmachid_t id);
+extern sbd_error_t *drmach_io_pre_release(drmachid_t id);
+extern sbd_error_t *drmach_io_unrelease(drmachid_t id);
+extern sbd_error_t *drmach_io_post_release(drmachid_t id);
+
+extern sbd_error_t *drmach_mem_get_slice_info(drmachid_t id,
+ uint64_t *basepa, uint64_t *endpa,
+ uint64_t *sizep);
+extern sbd_error_t *drmach_mem_get_memlist(drmachid_t id,
+ struct memlist **ml);
+extern sbd_error_t *drmach_mem_get_info(drmachid_t, drmach_mem_info_t *);
+extern sbd_error_t *drmach_mem_enable(drmachid_t id);
+extern sbd_error_t *drmach_mem_disable(drmachid_t id);
+extern sbd_error_t *drmach_mem_add_span(drmachid_t id,
+ uint64_t basepa, uint64_t size);
+extern sbd_error_t *drmach_mem_del_span(drmachid_t id,
+ uint64_t basepa, uint64_t size);
+extern sbd_error_t *drmach_copy_rename_init(
+ drmachid_t dst_id, drmachid_t src_id,
+ struct memlist *src_copy_ml,
+ drmachid_t *pgm_id);
+extern sbd_error_t *drmach_copy_rename_fini(drmachid_t id);
+extern void drmach_copy_rename(drmachid_t id);
+extern int drmach_copy_rename_need_suspend(drmachid_t id);
+
+extern int drmach_log_sysevent(int board, char *hint, int flag,
+ int verbose);
+
+extern int drmach_verify_sr(dev_info_t *dip, int sflag);
+extern void drmach_suspend_last();
+extern void drmach_resume_first();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DRMACH_H_ */
diff --git a/usr/src/uts/i86pc/sys/fastboot_msg.h b/usr/src/uts/i86pc/sys/fastboot_msg.h
index a35fcf2f67..4c7ee7f8b5 100644
--- a/usr/src/uts/i86pc/sys/fastboot_msg.h
+++ b/usr/src/uts/i86pc/sys/fastboot_msg.h
@@ -45,6 +45,7 @@
fastboot_nosup_msg(FBNS_DEFAULT, "")
fastboot_nosup_msg(FBNS_SUSPEND, " after suspend/resume")
fastboot_nosup_msg(FBNS_FMAHWERR, " due to FMA recovery from hardware error")
+fastboot_nosup_msg(FBNS_HOTPLUG, " after DR operations")
/*
* Should ALWAYS be the last one.
diff --git a/usr/src/uts/i86pc/sys/machcpuvar.h b/usr/src/uts/i86pc/sys/machcpuvar.h
index b652331715..67b9a07439 100644
--- a/usr/src/uts/i86pc/sys/machcpuvar.h
+++ b/usr/src/uts/i86pc/sys/machcpuvar.h
@@ -63,6 +63,7 @@ typedef void *cpu_pri_lev_t;
struct cpuid_info;
struct cpu_ucode_info;
+struct cmi_hdl;
/*
* A note about the hypervisor affinity bits: a one bit in the affinity mask
@@ -135,7 +136,9 @@ struct machcpu {
struct cpu_ucode_info *mcpu_ucode_info;
- void *mcpu_pm_mach_state;
+ void *mcpu_pm_mach_state;
+ struct cmi_hdl *mcpu_cmi_hdl;
+ void *mcpu_mach_ctx_ptr;
/*
* A stamp that is unique per processor and changes
diff --git a/usr/src/uts/i86pc/sys/machsystm.h b/usr/src/uts/i86pc/sys/machsystm.h
index b98a48326e..8c79243a29 100644
--- a/usr/src/uts/i86pc/sys/machsystm.h
+++ b/usr/src/uts/i86pc/sys/machsystm.h
@@ -23,6 +23,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#ifndef _SYS_MACHSYSTM_H
#define _SYS_MACHSYSTM_H
@@ -51,11 +55,30 @@ extern "C" {
#ifdef _KERNEL
+typedef enum mach_cpu_add_arg_type {
+ MACH_CPU_ARG_LOCAL_APIC,
+ MACH_CPU_ARG_LOCAL_X2APIC,
+} mach_cpu_add_arg_type_t;
+
+typedef struct mach_cpu_add_arg {
+ mach_cpu_add_arg_type_t type;
+ union {
+ struct {
+ uint32_t apic_id;
+ uint32_t proc_id;
+ } apic;
+ } arg;
+} mach_cpu_add_arg_t;
+
extern void mach_cpu_idle(void);
extern void mach_cpu_halt(char *);
extern int mach_cpu_start(cpu_t *, void *);
extern int mach_cpuid_start(processorid_t, void *);
+extern int mach_cpu_stop(cpu_t *, void *);
+extern int mach_cpu_add(mach_cpu_add_arg_t *, processorid_t *);
+extern int mach_cpu_remove(processorid_t);
extern int mach_cpu_create_device_node(cpu_t *, dev_info_t **);
+extern int mach_cpu_get_device_node(cpu_t *, dev_info_t **);
extern int Cpudelay;
extern void setcpudelay(void);
@@ -120,10 +143,16 @@ extern int use_mp;
extern struct cpu cpus[]; /* pointer to other cpus */
extern struct cpu *cpu[]; /* pointer to all cpus */
+/* Operation types for extended mach_cpucontext interfaces */
+#define MACH_CPUCONTEXT_OP_START 0
+#define MACH_CPUCONTEXT_OP_STOP 1
+
extern int mach_cpucontext_init(void);
extern void mach_cpucontext_fini(void);
extern void *mach_cpucontext_alloc(struct cpu *);
extern void mach_cpucontext_free(struct cpu *, void *, int);
+extern void *mach_cpucontext_xalloc(struct cpu *, int);
+extern void mach_cpucontext_xfree(struct cpu *, void *, int, int);
extern void rmp_gdt_init(rm_platter_t *);
extern uintptr_t hole_start, hole_end;
@@ -145,6 +174,52 @@ extern int dtrace_linear_pc(struct regs *rp, proc_t *p, caddr_t *linearp);
extern int force_shutdown_method;
+/* Dynamic Reconfiguration capability interface. */
+#define PLAT_DR_OPTIONS_NAME "plat-dr-options"
+#define PLAT_DR_PHYSMAX_NAME "plat-dr-physmax"
+#define PLAT_MAX_NCPUS_NAME "plat-max-ncpus"
+#define BOOT_MAX_NCPUS_NAME "boot-max-ncpus"
+#define BOOT_NCPUS_NAME "boot-ncpus"
+
+#define PLAT_DR_FEATURE_CPU 0x1
+#define PLAT_DR_FEATURE_MEMORY 0x2
+#define PLAT_DR_FEATURE_ENABLED 0x1000000
+
+#define plat_dr_enabled() \
+ plat_dr_check_capability(PLAT_DR_FEATURE_ENABLED)
+
+#define plat_dr_enable() \
+ plat_dr_enable_capability(PLAT_DR_FEATURE_ENABLED)
+
+#define plat_dr_disable_cpu() \
+ plat_dr_disable_capability(PLAT_DR_FEATURE_CPU)
+#define plat_dr_disable_memory() \
+ plat_dr_disable_capability(PLAT_DR_FEATURE_MEMORY)
+
+extern boolean_t plat_dr_support_cpu(void);
+extern boolean_t plat_dr_support_memory(void);
+extern boolean_t plat_dr_check_capability(uint64_t features);
+extern void plat_dr_enable_capability(uint64_t features);
+extern void plat_dr_disable_capability(uint64_t features);
+
+#pragma weak plat_dr_support_cpu
+#pragma weak plat_dr_support_memory
+
+/*
+ * Used to communicate DR updates to platform lgroup framework
+ */
+typedef struct {
+ uint64_t u_base;
+ uint64_t u_length;
+ uint32_t u_domain;
+ uint32_t u_device_id;
+ uint32_t u_sli_cnt;
+ uchar_t *u_sli_ptr;
+} update_membounds_t;
+
+/* Maximum physical page number (PFN) for memory DR operations. */
+extern uint64_t plat_dr_physmax;
+
#ifdef __xpv
#include <sys/xen_mmu.h>
extern page_t *page_get_high_mfn(mfn_t);
diff --git a/usr/src/uts/i86pc/sys/memnode.h b/usr/src/uts/i86pc/sys/memnode.h
index dde3931627..2bcb623617 100644
--- a/usr/src/uts/i86pc/sys/memnode.h
+++ b/usr/src/uts/i86pc/sys/memnode.h
@@ -41,8 +41,9 @@ extern "C" {
* memory slice ID and the high-order bits are the SSM nodeid.
*/
+#define MAX_MEM_NODES_PER_LGROUP 3
#ifndef MAX_MEM_NODES
-#define MAX_MEM_NODES (8)
+#define MAX_MEM_NODES (8 * MAX_MEM_NODES_PER_LGROUP)
#endif /* MAX_MEM_NODES */
#define PFN_2_MEM_NODE(pfn) \
@@ -57,14 +58,12 @@ extern "C" {
*/
extern int plat_pfn_to_mem_node(pfn_t);
-extern int plat_lgrphand_to_mem_node(lgrp_handle_t);
extern void plat_assign_lgrphand_to_mem_node(lgrp_handle_t, int);
extern lgrp_handle_t plat_mem_node_to_lgrphand(int);
extern void plat_slice_add(pfn_t, pfn_t);
extern void plat_slice_del(pfn_t, pfn_t);
#pragma weak plat_pfn_to_mem_node
-#pragma weak plat_lgrphand_to_mem_node
#pragma weak plat_mem_node_to_lgrphand
#pragma weak plat_slice_add
#pragma weak plat_slice_del
diff --git a/usr/src/uts/i86pc/sys/psm_types.h b/usr/src/uts/i86pc/sys/psm_types.h
index 17e9db17b1..24ad7d700f 100644
--- a/usr/src/uts/i86pc/sys/psm_types.h
+++ b/usr/src/uts/i86pc/sys/psm_types.h
@@ -23,12 +23,14 @@
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#ifndef _SYS_PSM_TYPES_H
#define _SYS_PSM_TYPES_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Platform Specific Module Types
*/
@@ -88,6 +90,31 @@ typedef struct psm_state_req {
} req;
} psm_state_request_t;
+typedef enum psm_cpu_op_e {
+ PSM_CPU_ADD = 1,
+ PSM_CPU_REMOVE,
+ PSM_CPU_STOP
+} psm_cpu_op_t;
+
+typedef struct psm_cpu_request {
+ psm_cpu_op_t pcr_cmd;
+ union {
+ struct {
+ processorid_t cpuid;
+ void *argp;
+ } cpu_add;
+
+ struct {
+ processorid_t cpuid;
+ } cpu_remove;
+
+ struct {
+ processorid_t cpuid;
+ void *ctx;
+ } cpu_stop;
+ } req;
+} psm_cpu_request_t;
+
struct psm_ops {
int (*psm_probe)(void);
@@ -106,7 +133,7 @@ struct psm_ops {
void (*psm_unset_idlecpu)(processorid_t cpun);
#if defined(PSMI_1_3) || defined(PSMI_1_4) || defined(PSMI_1_5) || \
- defined(PSMI_1_6)
+ defined(PSMI_1_6) || defined(PSMI_1_7)
int (*psm_clkinit)(int hertz);
#else
void (*psm_clkinit)(int hertz);
@@ -117,14 +144,14 @@ struct psm_ops {
hrtime_t (*psm_gethrtime)(void);
processorid_t (*psm_get_next_processorid)(processorid_t cpu_id);
-#if defined(PSMI_1_5) || defined(PSMI_1_6)
+#if defined(PSMI_1_5) || defined(PSMI_1_6) || defined(PSMI_1_7)
int (*psm_cpu_start)(processorid_t cpun, caddr_t ctxt);
#else
void (*psm_cpu_start)(processorid_t cpun, caddr_t rm_code);
#endif
int (*psm_post_cpu_start)(void);
#if defined(PSMI_1_2) || defined(PSMI_1_3) || defined(PSMI_1_4) || \
- defined(PSMI_1_5) || defined(PSMI_1_6)
+ defined(PSMI_1_5) || defined(PSMI_1_6) || defined(PSMI_1_7)
void (*psm_shutdown)(int cmd, int fcn);
#else
void (*psm_shutdown)(void);
@@ -140,26 +167,30 @@ struct psm_ops {
#endif
void (*psm_notify_error)(int level, char *errmsg);
#if defined(PSMI_1_2) || defined(PSMI_1_3) || defined(PSMI_1_4) || \
- defined(PSMI_1_5) || defined(PSMI_1_6)
+ defined(PSMI_1_5) || defined(PSMI_1_6) || defined(PSMI_1_7)
void (*psm_notify_func)(int msg);
#endif
#if defined(PSMI_1_3) || defined(PSMI_1_4) || defined(PSMI_1_5) || \
- defined(PSMI_1_6)
+ defined(PSMI_1_6) || defined(PSMI_1_7)
void (*psm_timer_reprogram)(hrtime_t time);
void (*psm_timer_enable)(void);
void (*psm_timer_disable)(void);
void (*psm_post_cyclic_setup)(void *arg);
#endif
-#if defined(PSMI_1_4) || defined(PSMI_1_5) || defined(PSMI_1_6)
+#if defined(PSMI_1_4) || defined(PSMI_1_5) || defined(PSMI_1_6) || \
+ defined(PSMI_1_7)
void (*psm_preshutdown)(int cmd, int fcn);
#endif
-#if defined(PSMI_1_5) || defined(PSMI_1_6)
+#if defined(PSMI_1_5) || defined(PSMI_1_6) || defined(PSMI_1_7)
int (*psm_intr_ops)(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
psm_intr_op_t op, int *result);
#endif
-#if defined(PSMI_1_6)
+#if defined(PSMI_1_6) || defined(PSMI_1_7)
int (*psm_state)(psm_state_request_t *request);
#endif
+#if defined(PSMI_1_7)
+ int (*psm_cpu_ops)(psm_cpu_request_t *reqp);
+#endif
};
@@ -183,7 +214,8 @@ struct psm_info {
#define PSM_INFO_VER01_3 0x8604
#define PSM_INFO_VER01_4 0x8605
#define PSM_INFO_VER01_5 0x8606
-#define PSM_INFO_VER01_6 0x8706
+#define PSM_INFO_VER01_6 0x8607
+#define PSM_INFO_VER01_7 0x8608
#define PSM_INFO_VER01_X (PSM_INFO_VER01_1 & 0xFFF0) /* ver 1.X */
/*
diff --git a/usr/src/uts/i86pc/sys/rm_platter.h b/usr/src/uts/i86pc/sys/rm_platter.h
index b21c54dd63..48e141126b 100644
--- a/usr/src/uts/i86pc/sys/rm_platter.h
+++ b/usr/src/uts/i86pc/sys/rm_platter.h
@@ -22,12 +22,14 @@
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
#ifndef _SYS_RM_PLATTER_H
#define _SYS_RM_PLATTER_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/tss.h>
#include <sys/segments.h>
@@ -36,8 +38,12 @@
extern "C" {
#endif
+#define RM_PLATTER_CODE_SIZE 0x400
+#define RM_PLATTER_CPU_HALT_CODE_SIZE 0x100
+
typedef struct rm_platter {
- char rm_code[1024];
+ char rm_code[RM_PLATTER_CODE_SIZE];
+ char rm_cpu_halt_code[RM_PLATTER_CPU_HALT_CODE_SIZE];
#if defined(__amd64)
/*
* The compiler will want to 64-bit align the 64-bit rm_gdt_base
@@ -59,7 +65,7 @@ typedef struct rm_platter {
*/
uint32_t rm_idt_pad;
#endif /* __amd64 */
- ushort_t rm_filler2; /* till I am sure that pragma works */
+ ushort_t rm_cpu_halted; /* non-zero if CPU has been halted */
ushort_t rm_idt_lim; /* stuff for lidt */
gate_desc_t *rm_idt_base;
uint_t rm_pdbr; /* cr3 value */
diff --git a/usr/src/uts/i86pc/sys/sbd_ioctl.h b/usr/src/uts/i86pc/sys/sbd_ioctl.h
new file mode 100644
index 0000000000..1eab543aa3
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/sbd_ioctl.h
@@ -0,0 +1,655 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SBD_IOCTL_H
+#define _SBD_IOCTL_H
+
+#ifndef _ASM
+#include <sys/types.h>
+#include <sys/obpdefs.h>
+#include <sys/processor.h>
+#include <sys/param.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _ASM
+typedef enum {
+ SBD_COMP_NONE,
+ SBD_COMP_CPU,
+ SBD_COMP_MEM,
+ SBD_COMP_IO,
+ SBD_COMP_CMP,
+ SBD_COMP_UNKNOWN
+} sbd_comp_type_t;
+
+typedef enum {
+ SBD_STAT_NONE = 0,
+ SBD_STAT_EMPTY,
+ SBD_STAT_DISCONNECTED,
+ SBD_STAT_CONNECTED,
+ SBD_STAT_UNCONFIGURED,
+ SBD_STAT_CONFIGURED
+} sbd_state_t;
+
+typedef enum {
+ SBD_COND_UNKNOWN = 0,
+ SBD_COND_OK,
+ SBD_COND_FAILING,
+ SBD_COND_FAILED,
+ SBD_COND_UNUSABLE
+} sbd_cond_t;
+
+typedef int sbd_busy_t;
+
+#define SBD_MAX_UNSAFE 16
+#define SBD_TYPE_LEN 12
+#define SBD_NULL_UNIT -1
+
+typedef struct {
+ sbd_comp_type_t c_type;
+ int c_unit;
+ char c_name[OBP_MAXPROPNAME];
+} sbd_comp_id_t;
+
+typedef struct {
+ sbd_comp_id_t c_id;
+ sbd_state_t c_ostate;
+ sbd_cond_t c_cond;
+ sbd_busy_t c_busy;
+ uint_t c_sflags;
+ time_t c_time;
+} sbd_cm_stat_t;
+
+#define ci_type c_id.c_type
+#define ci_unit c_id.c_unit
+#define ci_name c_id.c_name
+
+typedef struct {
+ sbd_cm_stat_t cs_cm;
+ int cs_isbootproc;
+ processorid_t cs_cpuid;
+ int cs_speed;
+ int cs_ecache;
+} sbd_cpu_stat_t;
+
+#define cs_type cs_cm.ci_type
+#define cs_unit cs_cm.ci_unit
+#define cs_name cs_cm.ci_name
+#define cs_ostate cs_cm.c_ostate
+#define cs_cond cs_cm.c_cond
+#define cs_busy cs_cm.c_busy
+#define cs_suspend cs_cm.c_sflags
+#define cs_time cs_cm.c_time
+
+typedef struct {
+ sbd_cm_stat_t ms_cm;
+ int ms_interleave;
+ pfn_t ms_basepfn;
+ pgcnt_t ms_totpages;
+ pgcnt_t ms_detpages;
+ pgcnt_t ms_pageslost;
+ pgcnt_t ms_managed_pages;
+ pgcnt_t ms_noreloc_pages;
+ pgcnt_t ms_noreloc_first;
+ pgcnt_t ms_noreloc_last;
+ int ms_cage_enabled;
+ int ms_peer_is_target; /* else peer is source */
+ char ms_peer_ap_id[MAXPATHLEN]; /* board's AP name */
+} sbd_mem_stat_t;
+
+#define ms_type ms_cm.ci_type
+#define ms_unit ms_cm.ci_unit
+#define ms_name ms_cm.ci_name
+#define ms_ostate ms_cm.c_ostate
+#define ms_cond ms_cm.c_cond
+#define ms_busy ms_cm.c_busy
+#define ms_suspend ms_cm.c_sflags
+#define ms_time ms_cm.c_time
+
+typedef struct {
+ sbd_cm_stat_t is_cm;
+ int is_referenced;
+ int is_unsafe_count;
+ int is_unsafe_list[SBD_MAX_UNSAFE];
+ char is_pathname[MAXPATHLEN];
+} sbd_io_stat_t;
+
+#define is_type is_cm.ci_type
+#define is_unit is_cm.ci_unit
+#define is_name is_cm.ci_name
+#define is_ostate is_cm.c_ostate
+#define is_cond is_cm.c_cond
+#define is_busy is_cm.c_busy
+#define is_suspend is_cm.c_sflags
+#define is_time is_cm.c_time
+
+/* This constant must be the max of the max cores on all platforms */
+#define SBD_MAX_CORES_PER_CMP 64
+
+typedef struct {
+ sbd_cm_stat_t ps_cm;
+ processorid_t ps_cpuid[SBD_MAX_CORES_PER_CMP];
+ int ps_ncores;
+ int ps_speed;
+ int ps_ecache;
+} sbd_cmp_stat_t;
+
+#define ps_type ps_cm.ci_type
+#define ps_unit ps_cm.ci_unit
+#define ps_name ps_cm.ci_name
+#define ps_ostate ps_cm.c_ostate
+#define ps_cond ps_cm.c_cond
+#define ps_busy ps_cm.c_busy
+#define ps_suspend ps_cm.c_sflags
+#define ps_time ps_cm.c_time
+
+typedef union {
+ sbd_cm_stat_t d_cm;
+ sbd_cpu_stat_t d_cpu;
+ sbd_mem_stat_t d_mem;
+ sbd_io_stat_t d_io;
+ sbd_cmp_stat_t d_cmp;
+} sbd_dev_stat_t;
+
+#define ds_type d_cm.ci_type
+#define ds_unit d_cm.ci_unit
+#define ds_name d_cm.ci_name
+#define ds_ostate d_cm.c_ostate
+#define ds_cond d_cm.c_cond
+#define ds_busy d_cm.c_busy
+#define ds_suspend d_cm.c_sflags
+#define ds_time d_cm.c_time
+
+#define SBD_MAX_INFO 256
+
+typedef struct {
+ int s_board;
+ char s_type[SBD_TYPE_LEN];
+ char s_info[SBD_MAX_INFO];
+ sbd_state_t s_rstate;
+ sbd_state_t s_ostate;
+ sbd_cond_t s_cond;
+ sbd_busy_t s_busy;
+ time_t s_time;
+ uint_t s_power:1;
+ uint_t s_assigned:1;
+ uint_t s_platopts;
+ int s_nstat;
+ sbd_dev_stat_t s_stat[1];
+} sbd_stat_t;
+
+typedef struct {
+ sbd_comp_id_t c_id;
+ uint_t c_flags;
+ int c_len;
+ caddr_t c_opts;
+} sbd_cm_cmd_t;
+
+typedef struct {
+ sbd_cm_cmd_t g_cm;
+ int g_ncm;
+} sbd_getncm_cmd_t;
+
+typedef struct {
+ sbd_cm_cmd_t s_cm;
+ int s_nbytes;
+ caddr_t s_statp;
+} sbd_stat_cmd_t;
+
+typedef union {
+ sbd_cm_cmd_t cmd_cm;
+ sbd_getncm_cmd_t cmd_getncm;
+ sbd_stat_cmd_t cmd_stat;
+} sbd_cmd_t;
+
+typedef struct {
+ int e_code;
+ char e_rsc[MAXPATHLEN];
+} sbd_error_t;
+
+typedef struct {
+ sbd_cmd_t i_cmd;
+ sbd_error_t i_err;
+} sbd_ioctl_arg_t;
+
+typedef struct {
+ int t_base;
+ int t_bnd;
+ char **t_text;
+} sbd_etab_t;
+
+#define i_flags i_cmd.cmd_cm.c_flags
+#define i_len i_cmd.cmd_cm.c_len
+#define i_opts i_cmd.cmd_cm.c_opts
+#define ic_type i_cmd.cmd_cm.ci_type
+#define ic_name i_cmd.cmd_cm.ci_name
+#define ic_unit i_cmd.cmd_cm.ci_unit
+#define ie_code i_err.e_code
+#define ie_rsc i_err.e_rsc
+
+#define _SBD_IOC (('D' << 16) | ('R' << 8))
+
+#define SBD_CMD_ASSIGN (_SBD_IOC | 0x01)
+#define SBD_CMD_UNASSIGN (_SBD_IOC | 0x02)
+#define SBD_CMD_POWERON (_SBD_IOC | 0x03)
+#define SBD_CMD_POWEROFF (_SBD_IOC | 0x04)
+#define SBD_CMD_TEST (_SBD_IOC | 0x05)
+#define SBD_CMD_CONNECT (_SBD_IOC | 0x06)
+#define SBD_CMD_CONFIGURE (_SBD_IOC | 0x07)
+#define SBD_CMD_UNCONFIGURE (_SBD_IOC | 0x08)
+#define SBD_CMD_DISCONNECT (_SBD_IOC | 0x09)
+#define SBD_CMD_STATUS (_SBD_IOC | 0x0a)
+#define SBD_CMD_GETNCM (_SBD_IOC | 0x0b)
+#define SBD_CMD_PASSTHRU (_SBD_IOC | 0x0c)
+
+#define SBD_CHECK_SUSPEND(cmd, c_sflags) \
+ (((c_sflags) >> (((cmd) & 0xf) - 1)) & 0x01)
+
+#define SBD_SET_SUSPEND(cmd, c_sflags) \
+ ((c_sflags) |= (0x01 << (((cmd) & 0xf) - 1)))
+
+#define SBD_CHECK_PLATOPTS(cmd, c_platopts) \
+ (((c_platopts) >> (((cmd) & 0xf) - 1)) & 0x01)
+
+#define SBD_SET_PLATOPTS(cmd, c_platopts) \
+ ((c_platopts) &= ~(0x01 << (((cmd) & 0xf) - 1)))
+
+#define SBD_FLAG_FORCE 0x1
+#define SBD_FLAG_ALLCMP 0x2
+#define SBD_FLAG_QUIESCE_OKAY 0x4
+
+#if defined(_SYSCALL32)
+
+typedef struct {
+ int32_t c_type;
+ int32_t c_unit;
+ char c_name[OBP_MAXPROPNAME];
+} sbd_comp_id32_t;
+
+typedef struct {
+ sbd_comp_id32_t c_id;
+ int32_t c_ostate;
+ int32_t c_cond;
+ int32_t c_busy;
+ uint32_t c_sflags;
+ time32_t c_time;
+} sbd_cm_stat32_t;
+
+typedef struct {
+ sbd_cm_stat32_t cs_cm;
+ int32_t cs_isbootproc;
+ int32_t cs_cpuid;
+ int32_t cs_speed;
+ int32_t cs_ecache;
+} sbd_cpu_stat32_t;
+
+typedef struct {
+ sbd_cm_stat32_t ms_cm;
+ int32_t ms_interleave;
+ uint32_t ms_basepfn;
+ uint32_t ms_totpages;
+ uint32_t ms_detpages;
+ int32_t ms_pageslost;
+ uint32_t ms_managed_pages;
+ uint32_t ms_noreloc_pages;
+ uint32_t ms_noreloc_first;
+ uint32_t ms_noreloc_last;
+ int32_t ms_cage_enabled;
+ int32_t ms_peer_is_target;
+ char ms_peer_ap_id[MAXPATHLEN];
+} sbd_mem_stat32_t;
+
+typedef struct {
+ sbd_cm_stat32_t is_cm;
+ int32_t is_referenced;
+ int32_t is_unsafe_count;
+ int32_t is_unsafe_list[SBD_MAX_UNSAFE];
+ char is_pathname[MAXPATHLEN];
+} sbd_io_stat32_t;
+
+typedef struct {
+ sbd_cm_stat32_t ps_cm;
+ int32_t ps_cpuid[SBD_MAX_CORES_PER_CMP];
+ int32_t ps_ncores;
+ int32_t ps_speed;
+ int32_t ps_ecache;
+} sbd_cmp_stat32_t;
+
+typedef union {
+ sbd_cm_stat32_t d_cm;
+ sbd_cpu_stat32_t d_cpu;
+ sbd_mem_stat32_t d_mem;
+ sbd_io_stat32_t d_io;
+ sbd_cmp_stat32_t d_cmp;
+} sbd_dev_stat32_t;
+
+typedef struct {
+ int32_t s_board;
+ char s_type[SBD_TYPE_LEN];
+ char s_info[SBD_MAX_INFO];
+ int32_t s_rstate;
+ int32_t s_ostate;
+ int32_t s_cond;
+ int32_t s_busy;
+ time32_t s_time;
+ uint32_t s_power:1;
+ uint32_t s_assigned:1;
+ uint32_t s_platopts;
+ int32_t s_nstat;
+ sbd_dev_stat32_t s_stat[1];
+} sbd_stat32_t;
+
+typedef struct {
+ int32_t e_code;
+ char e_rsc[MAXPATHLEN];
+} sbd_error32_t;
+
+typedef struct {
+ sbd_comp_id32_t c_id;
+ uint32_t c_flags;
+ int32_t c_len;
+ caddr32_t c_opts;
+} sbd_cm_cmd32_t;
+
+typedef struct {
+ sbd_cm_cmd32_t g_cm;
+ int32_t g_ncm;
+} sbd_getncm_cmd32_t;
+
+typedef struct {
+ sbd_cm_cmd32_t s_cm;
+ int32_t s_nbytes;
+ caddr32_t s_statp;
+} sbd_stat_cmd32_t;
+
+typedef union {
+ sbd_cm_cmd32_t cmd_cm;
+ sbd_getncm_cmd32_t cmd_getncm;
+ sbd_stat_cmd32_t cmd_stat;
+} sbd_cmd32_t;
+
+typedef struct {
+ sbd_cmd32_t i_cmd;
+ sbd_error32_t i_err;
+} sbd_ioctl_arg32_t;
+
+typedef struct {
+ int32_t t_base;
+ int32_t t_bnd;
+ char **t_text;
+} sbd_etab32_t;
+
+#endif /* _SYSCALL32 */
+#endif /* _ASM */
+
+/* Common error codes */
+
+#define ESBD_NOERROR 0 /* no error */
+#define ESBD_INTERNAL 1 /* Internal error */
+#define ESBD_NOMEM 2 /* Insufficient memory */
+#define ESBD_PROTO 3 /* Protocol error */
+#define ESBD_BUSY 4 /* Device busy */
+#define ESBD_NODEV 5 /* No such device */
+#define ESBD_ALREADY 6 /* Operation already in progress */
+#define ESBD_IO 7 /* I/O error */
+#define ESBD_FAULT 8 /* Bad address */
+#define ESBD_EMPTY_BD 9 /* No device(s) on board */
+#define ESBD_INVAL 10 /* Invalid argument */
+#define ESBD_STATE 11 /* Invalid state transition */
+#define ESBD_FATAL_STATE 12 /* Device in fatal state */
+#define ESBD_OUTSTANDING 13 /* Outstanding error */
+#define ESBD_SUSPEND 14 /* Device failed to suspend */
+#define ESBD_RESUME 15 /* Device failed to resume */
+#define ESBD_UTHREAD 16 /* Cannot stop user thread */
+#define ESBD_RTTHREAD 17 /* Cannot quiesce realtime thread */
+#define ESBD_KTHREAD 18 /* Cannot stop kernel thread */
+#define ESBD_OFFLINE 19 /* Failed to off-line */
+#define ESBD_ONLINE 20 /* Failed to on-line */
+#define ESBD_CPUSTART 21 /* Failed to start CPU */
+#define ESBD_CPUSTOP 22 /* Failed to stop CPU */
+#define ESBD_INVAL_COMP 23 /* Invalid component type */
+#define ESBD_KCAGE_OFF 24 /* Kernel cage is disabled */
+#define ESBD_NO_TARGET 25 /* No available memory target */
+#define ESBD_HW_PROGRAM 26 /* Hardware programming error */
+#define ESBD_MEM_NOTVIABLE 27 /* VM viability test failed */
+#define ESBD_MEM_REFUSED 28 /* Memory operation refused */
+#define ESBD_MEM_NONRELOC 29 /* Non-relocatable pages in span */
+#define ESBD_MEM_CANCELLED 30 /* Memory operation cancelled */
+#define ESBD_MEMFAIL 31 /* Memory operation failed */
+#define ESBD_MEMONLINE 32 /* Can't unconfig cpu if mem online */
+#define ESBD_QUIESCE_REQD 33
+ /* Operator confirmation for quiesce is required */
+#define ESBD_MEMINTLV 34
+ /* Memory is interleaved across boards */
+#define ESBD_CPUONLINE 35
+ /* Can't config memory if not all cpus are online */
+#define ESBD_UNSAFE 36 /* Unsafe driver present */
+#define ESBD_INVAL_OPT 37 /* option invalid */
+
+/* Starcat error codes */
+
+#define ESTC_NONE 1000 /* No error */
+#define ESTC_GETPROP 1001 /* Cannot read property value */
+#define ESTC_BNUM 1002 /* Invalid board number */
+#define ESTC_CONFIGBUSY 1003
+ /* Cannot proceed; Board is configured or busy */
+#define ESTC_PROBE 1004 /* Solaris failed to probe */
+#define ESTC_DEPROBE 1005 /* Solaris failed to deprobe */
+#define ESTC_MOVESIGB 1006 /* Firmware move-cpu0 failed */
+#define ESTC_SUPPORT 1007 /* Operation not supported */
+#define ESTC_DRVFAIL 1008 /* Device driver failure */
+#define ESTC_UNKPTCMD 1012 /* Unrecognized platform command */
+#define ESTC_NOTID 1013
+ /* drmach parameter is not a valid ID */
+#define ESTC_INAPPROP 1014
+ /* drmach parameter is inappropriate for operation */
+#define ESTC_INTERNAL 1015 /* Unexpected internal condition */
+#define ESTC_MBXRQST 1016
+ /* Mailbox framework failure: outgoing */
+#define ESTC_MBXRPLY 1017
+ /* Mailbox framework failure: incoming */
+#define ESTC_NOACL 1018 /* Board is not in domain ACL */
+#define ESTC_NOT_ASSIGNED 1019 /* Board is not assigned to domain */
+#define ESTC_NOT_ACTIVE 1020 /* Board is not active */
+#define ESTC_EMPTY_SLOT 1021 /* Slot is empty */
+#define ESTC_POWER_OFF 1022 /* Board is powered off */
+#define ESTC_TEST_IN_PROGRESS 1023 /* Board is already being tested */
+#define ESTC_TESTING_BUSY 1024
+ /* Wait: All SC test resources are in use */
+#define ESTC_TEST_REQUIRED 1025 /* Board requires test prior to use */
+#define ESTC_TEST_ABORTED 1026 /* Board test has been aborted */
+#define ESTC_MBOX_UNKNOWN 1027
+ /* Unknown error type received from SC */
+#define ESTC_TEST_STATUS_UNKNOWN 1028
+ /* Test completed with unknown status */
+#define ESTC_TEST_RESULT_UNKNOWN 1029
+ /* Unknown test result returned by SC */
+#define ESTC_TEST_FAILED 1030
+ /* SMS hpost reported error, see POST log for details */
+#define ESTC_UNAVAILABLE 1031 /* Slot is unavailable to the domain */
+#define ESTC_NZ_LPA 1032 /* Nonzero LPA not yet supported */
+#define ESTC_IOSWITCH 1033
+ /* Cannot unconfigure I/O board: tunnel switch failed */
+#define ESTC_IOCAGE_NO_CPU_AVAIL 1034
+ /* No CPU available for I/O cage test. */
+#define ESTC_SMS_ERR_RECOVERABLE 1035
+ /* SMS reported recoverable error: check SMS status and Retry */
+#define ESTC_SMS_ERR_UNRECOVERABLE 1036
+ /* SMS reported unrecoverable error: Board is Unusable */
+#define ESTC_NWSWITCH 1037
+ /* Cannot unconfigure I/O board: network switch failed */
+
+/* Starfire error codes */
+
+#define ESTF_NONE 2000 /* No error */
+#define ESTF_GETPROP 2001 /* Cannot read property value */
+#define ESTF_GETPROPLEN 2002 /* Cannot determine property length */
+#define ESTF_BNUM 2003 /* Invalid board number */
+#define ESTF_CONFIGBUSY 2004
+ /* Cannot proceed; Board is configured or busy */
+#define ESTF_NOCPUID 2005 /* No CPU specified for connect */
+#define ESTF_PROBE 2006 /* Firmware probe failed */
+#define ESTF_DEPROBE 2007 /* Firmware deprobe failed */
+#define ESTF_MOVESIGB 2008 /* Firmware move-cpu0 failed */
+#define ESTF_JUGGLE 2009 /* Cannot move SIGB assignment */
+#define ESTF_HASSIGB 2010
+ /* Cannot disconnect CPU; SIGB is currently assigned */
+#define ESTF_SUPPORT 2011 /* Operation not supported */
+#define ESTF_DRVFAIL 2012 /* Device driver failure */
+#define ESTF_SETCPUVAL 2013
+ /* Must specify a CPU on the given board */
+#define ESTF_NODEV 2014 /* No such device */
+#define ESTF_INTERBOARD 2015
+ /* Memory configured with inter-board interleaving */
+#define ESTF_UNKPTCMD 2016 /* Unrecognized platform command */
+#define ESTF_NOTID 2017 /* drmach parameter is not a valid ID */
+#define ESTF_INAPPROP 2018
+ /* drmach parameter is inappropriate for operation */
+#define ESTF_INTERNAL 2019 /* Unexpected internal condition */
+
+/* Daktari error codes */
+
+#define EDAK_NONE 3000 /* no error */
+#define EDAK_INTERNAL 3001 /* Internal error */
+#define EDAK_NOFRUINFO 3002 /* Didn't receive fru info */
+#define EDAK_NONDR_BOARD 3003
+ /* DR is not supported on this board type */
+#define EDAK_POWERON 3004 /* Power on request failed */
+#define EDAK_POWEROK 3005 /* Failed to power on */
+#define EDAK_INTERRUPTED 3006 /* Operation interrupted */
+#define EDAK_BOARDINIT 3007 /* Board initialization failed */
+#define EDAK_CPUINIT 3008 /* CPU intialization failed */
+#define EDAK_MEMFAIL 3009 /* Memory operation failed */
+
+/* Serengeti error codes */
+
+#define ESGT_NONE 4000 /* no error */
+#define ESGT_INTERNAL 4001 /* Internal error */
+#define ESGT_INVAL 4002 /* Invalid argument */
+#define ESGT_MEMFAIL 4003 /* Memory operation failed */
+#define ESGT_PROBE 4004 /* Board probe failed */
+#define ESGT_DEPROBE 4005 /* Board deprobe failed */
+#define ESGT_JUGGLE_BOOTPROC 4006 /* Failed to juggle bootproc */
+#define ESGT_NOT_CPUTYPE 4007 /* Not a cpu device */
+#define ESGT_NO_DEV_TYPE 4008 /* Cannot find device type */
+#define ESGT_BAD_PORTID 4009 /* Bad port id */
+#define ESGT_RESUME 4010 /* Failed to resume device */
+#define ESGT_SUSPEND 4011 /* Failed to suspend device */
+#define ESGT_KTHREAD 4012 /* failed to stop kernel thd */
+#define ESGT_UNSAFE 4013 /* unsafe */
+#define ESGT_RTTHREAD 4014 /* real time threads */
+#define ESGT_UTHREAD 4015 /* failed to stop user thd */
+#define ESGT_PROM_ATTACH 4016 /* prom failed attach board */
+#define ESGT_PROM_DETACH 4017 /* prom failed detach board */
+#define ESGT_SC_ERR 4018 /* sc return a failure */
+#define ESGT_GET_BOARD_STAT 4019 /* Failed to obtain board information */
+#define ESGT_WAKEUPCPU 4020 /* Failed to wake up cpu */
+#define ESGT_STOPCPU 4021 /* Failed to stop cpu */
+/* Serengeti SC return codes */
+#define ESGT_HW_FAIL 4022 /* Hardware Failure */
+#define ESGT_BD_ACCESS 4023 /* Board access denied */
+#define ESGT_STALE_CMP 4024 /* Stale components */
+#define ESGT_STALE_OBJ 4025 /* Stale objects */
+#define ESGT_NO_SEPROM_SPACE 4026 /* No SEPROM space */
+#define ESGT_NOT_SUPP 4027 /* Operation not supported */
+#define ESGT_NO_MEM 4028 /* No Memory */
+
+/* OPL error codes */
+
+#define EOPL_GETPROP 5001 /* Cannot read property value */
+#define EOPL_BNUM 5002 /* Invalid board number */
+#define EOPL_CONFIGBUSY 5003
+ /* Cannot proceed; Board is configured or busy */
+#define EOPL_PROBE 5004 /* Firmware probe failed */
+#define EOPL_DEPROBE 5005 /* Firmware deprobe failed */
+#define EOPL_SUPPORT 5006 /* Operation not supported */
+#define EOPL_DRVFAIL 5007 /* Device driver failure */
+#define EOPL_UNKPTCMD 5008 /* Unrecognized platform command */
+#define EOPL_NOTID 5009 /* drmach parameter is not a valid ID */
+#define EOPL_INAPPROP 5010
+ /* drmach parameter is inappropriate for operation */
+#define EOPL_INTERNAL 5011 /* Unexpected internal condition */
+#define EOPL_FINDDEVICE 5012 /* Firmware cannot find node. */
+#define EOPL_MC_SETUP 5013 /* Cannot setup memory node */
+#define EOPL_CPU_STATE 5014 /* Invalid CPU/core state */
+#define EOPL_MC_OPL 5015 /* Cannot find mc-opl interface */
+#define EOPL_SCF_FMEM 5016 /* Cannot find scf_fmem interface */
+#define EOPL_FMEM_SETUP 5017 /* Error setting up FMEM buffer */
+#define EOPL_SCF_FMEM_START 5018 /* scf_fmem_start error */
+#define EOPL_FMEM_ERROR 5019 /* FMEM error */
+#define EOPL_SCF_FMEM_CANCEL 5020 /* scf_fmem_cancel error */
+#define EOPL_FMEM_XC_TIMEOUT 5021 /* xcall timeout */
+#define EOPL_FMEM_COPY_TIMEOUT 5022 /* DR parellel copy timeout */
+#define EOPL_FMEM_SCF_BUSY 5023 /* SCF busy */
+#define EOPL_FMEM_RETRY_OUT 5024 /* SCF IO Retry Error */
+#define EOPL_FMEM_TIMEOUT 5025 /* FMEM command timeout */
+#define EOPL_FMEM_HW_ERROR 5026 /* Hardware error */
+#define EOPL_FMEM_TERMINATE 5027 /* FMEM operation Terminated */
+#define EOPL_FMEM_COPY_ERROR 5028 /* Memory copy error */
+#define EOPL_FMEM_SCF_ERR 5029 /* SCF error */
+#define EOPL_MIXED_CPU 5030
+ /* Cannot add SPARC64-VI to domain booted with all SPARC64-VII CPUs */
+#define EOPL_FMEM_SCF_OFFLINE 5031 /* SCF OFFLINE */
+
+/* X86 error codes */
+
+#define EX86_GETPROP 10001 /* Cannot read property value */
+#define EX86_BNUM 10002 /* Invalid board number */
+#define EX86_NOTID 10003 /* drmach parameter is not a valid ID */
+#define EX86_INAPPROP 10004
+ /* drmach parameter is inappropriate for operation */
+#define EX86_PROBE 10005 /* Firmware probe failed */
+#define EX86_DEPROBE 10006 /* Firmware deprobe failed */
+#define EX86_SUPPORT 10007 /* Operation not supported */
+#define EX86_INTERNAL 10008 /* Unexpected internal condition */
+#define EX86_CONFIGBUSY 10009
+ /* Cannot proceed, board is configured or busy */
+#define EX86_POWERBUSY 10010 /* Cannot proceed, board is powered */
+#define EX86_CONNECTBUSY 10011 /* Cannot proceed, board is connected */
+#define EX86_INVALID_ARG 10012 /* Invalid argument */
+#define EX86_DRVFAIL 10013 /* Device driver failure */
+#define EX86_UNKPTCMD 10014 /* Unrecognized platform command */
+#define EX86_ALLOC_CPUID 10015 /* Failed to allocate processor id */
+#define EX86_FREE_CPUID 10016 /* Failed to release processor id */
+#define EX86_POWERON 10017 /* Failed to power on board */
+#define EX86_POWEROFF 10018 /* Failed to power off board */
+#define EX86_MC_SETUP 10019 /* Cannot setup memory node */
+#define EX86_ACPIWALK 10020 /* Cannot walk ACPI namespace */
+#define EX86_WALK_DEPENDENCY 10021
+ /* Failed to check dependency for board */
+#define EX86_IN_FAILURE 10022 /* Board is in failure state */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SBD_IOCTL_H */
diff --git a/usr/src/uts/i86pc/sys/x_call.h b/usr/src/uts/i86pc/sys/x_call.h
index ca778a4c21..65b5813b95 100644
--- a/usr/src/uts/i86pc/sys/x_call.h
+++ b/usr/src/uts/i86pc/sys/x_call.h
@@ -68,6 +68,8 @@ typedef struct xc_msg {
#if defined(_KERNEL)
extern void xc_init_cpu(struct cpu *);
+extern void xc_fini_cpu(struct cpu *);
+extern int xc_flush_cpu(struct cpu *);
extern uint_t xc_serv(caddr_t, caddr_t);
#define CPUSET2BV(set) ((ulong_t *)(void *)&(set))
diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c
index ade54caab2..6ff1216d76 100644
--- a/usr/src/uts/i86pc/vm/hat_i86.c
+++ b/usr/src/uts/i86pc/vm/hat_i86.c
@@ -22,6 +22,10 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
@@ -188,6 +192,10 @@ int pt_kern;
extern void atomic_orb(uchar_t *addr, uchar_t val);
extern void atomic_andb(uchar_t *addr, uchar_t val);
+#ifndef __xpv
+extern pfn_t memseg_get_start(struct memseg *);
+#endif
+
#define PP_GETRM(pp, rmmask) (pp->p_nrm & rmmask)
#define PP_ISMOD(pp) PP_GETRM(pp, P_MOD)
#define PP_ISREF(pp) PP_GETRM(pp, P_REF)
@@ -4050,8 +4058,8 @@ hat_cpu_offline(struct cpu *cpup)
ASSERT(cpup != CPU);
CPUSET_ATOMIC_DEL(khat_cpuset, cpup->cpu_id);
- x86pte_cpu_fini(cpup);
hat_vlp_teardown(cpup);
+ x86pte_cpu_fini(cpup);
}
/*
@@ -4363,7 +4371,96 @@ void
hat_kpm_mseghash_update(pgcnt_t inx, struct memseg *msp)
{}
-#ifdef __xpv
+#ifndef __xpv
+void
+hat_kpm_addmem_mseg_update(struct memseg *msp, pgcnt_t nkpmpgs,
+ offset_t kpm_pages_off)
+{
+ _NOTE(ARGUNUSED(nkpmpgs, kpm_pages_off));
+ pfn_t base, end;
+
+ /*
+ * kphysm_add_memory_dynamic() does not set nkpmpgs
+ * when page_t memory is externally allocated. That
+ * code must properly calculate nkpmpgs in all cases
+ * if nkpmpgs needs to be used at some point.
+ */
+
+ /*
+ * The meta (page_t) pages for dynamically added memory are allocated
+ * either from the incoming memory itself or from existing memory.
+ * In the former case the base of the incoming pages will be different
+ * than the base of the dynamic segment so call memseg_get_start() to
+ * get the actual base of the incoming memory for each case.
+ */
+
+ base = memseg_get_start(msp);
+ end = msp->pages_end;
+
+ hat_devload(kas.a_hat, kpm_vbase + mmu_ptob(base),
+ mmu_ptob(end - base), base, PROT_READ | PROT_WRITE,
+ HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
+}
+
+void
+hat_kpm_addmem_mseg_insert(struct memseg *msp)
+{
+ _NOTE(ARGUNUSED(msp));
+}
+
+void
+hat_kpm_addmem_memsegs_update(struct memseg *msp)
+{
+ _NOTE(ARGUNUSED(msp));
+}
+
+/*
+ * Return end of metadata for an already setup memseg.
+ * X86 platforms don't need per-page meta data to support kpm.
+ */
+caddr_t
+hat_kpm_mseg_reuse(struct memseg *msp)
+{
+ return ((caddr_t)msp->epages);
+}
+
+void
+hat_kpm_delmem_mseg_update(struct memseg *msp, struct memseg **mspp)
+{
+ _NOTE(ARGUNUSED(msp, mspp));
+ ASSERT(0);
+}
+
+void
+hat_kpm_split_mseg_update(struct memseg *msp, struct memseg **mspp,
+ struct memseg *lo, struct memseg *mid, struct memseg *hi)
+{
+ _NOTE(ARGUNUSED(msp, mspp, lo, mid, hi));
+ ASSERT(0);
+}
+
+/*
+ * Walk the memsegs chain, applying func to each memseg span.
+ */
+void
+hat_kpm_walk(void (*func)(void *, void *, size_t), void *arg)
+{
+ pfn_t pbase, pend;
+ void *base;
+ size_t size;
+ struct memseg *msp;
+
+ for (msp = memsegs; msp; msp = msp->next) {
+ pbase = msp->pages_base;
+ pend = msp->pages_end;
+ base = ptob(pbase) + kpm_vbase;
+ size = ptob(pend - pbase);
+ func(arg, base, size);
+ }
+}
+
+#else /* __xpv */
+
/*
* There are specific Hypervisor calls to establish and remove mappings
* to grant table references and the privcmd driver. We have to ensure
@@ -4406,5 +4503,5 @@ hat_release_mapping(hat_t *hat, caddr_t addr)
htable_release(ht);
htable_release(ht);
XPV_ALLOW_MIGRATE();
- }
-#endif
+}
+#endif /* __xpv */
diff --git a/usr/src/uts/i86pc/vm/vm_dep.h b/usr/src/uts/i86pc/vm/vm_dep.h
index ae2b6c12c1..753aa9d146 100644
--- a/usr/src/uts/i86pc/vm/vm_dep.h
+++ b/usr/src/uts/i86pc/vm/vm_dep.h
@@ -22,6 +22,10 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/*
* UNIX machine dependent virtual memory support.
@@ -71,7 +75,14 @@ extern void plcnt_inc_dec(page_t *, int, int, long, int);
*/
#define PLCNT_XFER_NORELOC(pp)
-#define PLCNT_MODIFY_MAX(pfn, cnt) mtype_modify_max(pfn, (pgcnt_t)cnt)
+/*
+ * macro to modify the page list max counts when memory is added to
+ * the page lists during startup (add_physmem) or during a DR operation
+ * when memory is added (kphysm_add_memory_dynamic) or deleted
+ * (kphysm_del_cleanup).
+ */
+#define PLCNT_MODIFY_MAX(pfn, cnt) mtype_modify_max(pfn, cnt)
+
extern int memrange_num(pfn_t);
extern int pfn_2_mtype(pfn_t);
extern int mtype_func(int, int, uint_t);
@@ -82,7 +93,7 @@ extern int mnode_range_cnt(int);
/*
* candidate counters in vm_pagelist.c are indexed by color and range
*/
-#define NUM_MEM_RANGES 4 /* memory range types */
+#define NUM_MEM_RANGES 4 /* memory range types */
#define MAX_MNODE_MRANGES NUM_MEM_RANGES
#define MNODE_RANGE_CNT(mnode) mnode_range_cnt(mnode)
#define MNODE_MAX_MRANGE(mnode) memrange_num(mem_node_config[mnode].physbase)
@@ -161,6 +172,25 @@ extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int);
rw_exit(&page_ctrs_rwlock[(mnode)]); \
}
+/*
+ * macro to call page_ctrs_adjust() when memory is added
+ * during a DR operation.
+ */
+#define PAGE_CTRS_ADJUST(pfn, cnt, rv) { \
+ spgcnt_t _cnt = (spgcnt_t)(cnt); \
+ int _mn; \
+ pgcnt_t _np; \
+ pfn_t _pfn = (pfn); \
+ pfn_t _endpfn = _pfn + _cnt; \
+ while (_pfn < _endpfn) { \
+ _mn = PFN_2_MEM_NODE(_pfn); \
+ _np = MIN(_endpfn, mem_node_config[_mn].physmax + 1) - _pfn; \
+ _pfn += _np; \
+ if ((rv = page_ctrs_adjust(_mn)) != 0) \
+ break; \
+ } \
+}
+
#define PAGE_GET_COLOR_SHIFT(szc, nszc) \
(hw_page_array[(nszc)].hp_shift - hw_page_array[(szc)].hp_shift)
@@ -524,6 +554,7 @@ extern page_t *page_freelist_split(uchar_t,
uint_t, int, int, pfn_t, pfn_t, page_list_walker_t *);
extern page_t *page_freelist_coalesce(int, uchar_t, uint_t, uint_t, int,
pfn_t);
+extern void page_freelist_coalesce_all(int);
extern uint_t page_get_pagecolors(uint_t);
extern void pfnzero(pfn_t, uint_t, uint_t);
diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c
index 63c8129ddf..79c0ee073e 100644
--- a/usr/src/uts/i86pc/vm/vm_machdep.c
+++ b/usr/src/uts/i86pc/vm/vm_machdep.c
@@ -22,6 +22,10 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
@@ -127,7 +131,6 @@ int largepagesupport = 0;
extern uint_t page_create_new;
extern uint_t page_create_exists;
extern uint_t page_create_putbacks;
-extern uint_t page_create_putbacks;
/*
* Allow users to disable the kernel's use of SSE.
*/
@@ -142,6 +145,8 @@ typedef struct {
pfn_t mnr_pfnhi;
int mnr_mnode;
int mnr_memrange; /* index into memranges[] */
+ int mnr_next; /* next lower PA mnoderange */
+ int mnr_exists;
/* maintain page list stats */
pgcnt_t mnr_mt_clpgcnt; /* cache list cnt */
pgcnt_t mnr_mt_flpgcnt[MMU_PAGE_SIZES]; /* free list cnt per szc */
@@ -175,6 +180,11 @@ typedef struct {
*/
#define PFN_4GIG 0x100000
#define PFN_16MEG 0x1000
+/* Indices into the memory range (arch_memranges) array. */
+#define MRI_4G 0
+#define MRI_2G 1
+#define MRI_16M 2
+#define MRI_0 3
static pfn_t arch_memranges[NUM_MEM_RANGES] = {
PFN_4GIG, /* pfn range for 4G and above */
0x80000, /* pfn range for 2G-4G */
@@ -191,6 +201,8 @@ int nranges = NUM_MEM_RANGES;
mnoderange_t *mnoderanges;
int mnoderangecnt;
int mtype4g;
+int mtype16m;
+int mtypetop; /* index of highest pfn'ed mnoderange */
/*
* 4g memory management variables for systems with more than 4g of memory:
@@ -220,7 +232,6 @@ int mtype4g;
* for requests that don't specifically require it.
*/
-#define LOTSFREE4G (maxmem4g >> lotsfree4gshift)
#define DESFREE4G (maxmem4g >> desfree4gshift)
#define RESTRICT4G_ALLOC \
@@ -230,7 +241,6 @@ static pgcnt_t maxmem4g;
static pgcnt_t freemem4g;
static int physmax4g;
static int desfree4gshift = 4; /* maxmem4g shift to derive DESFREE4G */
-static int lotsfree4gshift = 3;
/*
* 16m memory management:
@@ -246,7 +256,7 @@ static int lotsfree4gshift = 3;
* are not restricted.
*/
-#define FREEMEM16M MTYPE_FREEMEM(0)
+#define FREEMEM16M MTYPE_FREEMEM(mtype16m)
#define DESFREE16M desfree16m
#define RESTRICT16M_ALLOC(freemem, pgcnt, flags) \
((freemem != 0) && ((flags & PG_PANIC) == 0) && \
@@ -337,6 +347,9 @@ hw_pagesize_t hw_page_array[MAX_NUM_LEVEL + 1];
kmutex_t *fpc_mutex[NPC_MUTEX];
kmutex_t *cpc_mutex[NPC_MUTEX];
+/* Lock to protect mnoderanges array for memory DR operations. */
+static kmutex_t mnoderange_lock;
+
/*
* Only let one thread at a time try to coalesce large pages, to
* prevent them from working against each other.
@@ -951,7 +964,8 @@ pfn_2_mtype(pfn_t pfn)
#else
int n;
- for (n = mnoderangecnt - 1; n >= 0; n--) {
+ /* Always start from highest pfn and work our way down */
+ for (n = mtypetop; n != -1; n = mnoderanges[n].mnr_next) {
if (pfn >= mnoderanges[n].mnr_pfnlo) {
break;
}
@@ -1063,7 +1077,7 @@ retry:
* verify that pages being returned from allocator have correct DMA attribute
*/
#ifndef DEBUG
-#define check_dma(a, b, c) (0)
+#define check_dma(a, b, c) (void)(0)
#else
static void
check_dma(ddi_dma_attr_t *dma_attr, page_t *pp, int cnt)
@@ -1253,7 +1267,12 @@ mnode_range_cnt(int mnode)
void
mnode_range_setup(mnoderange_t *mnoderanges)
{
+ mnoderange_t *mp = mnoderanges;
int mnode, mri;
+ int mindex = 0; /* current index into mnoderanges array */
+ int i, j;
+ pfn_t hipfn;
+ int last, hi;
for (mnode = 0; mnode < max_mem_nodes; mnode++) {
if (mem_node_config[mnode].exists == 0)
@@ -1272,26 +1291,168 @@ mnode_range_setup(mnoderange_t *mnoderanges)
mem_node_config[mnode].physmax);
mnoderanges->mnr_mnode = mnode;
mnoderanges->mnr_memrange = mri;
+ mnoderanges->mnr_exists = 1;
mnoderanges++;
+ mindex++;
if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
mri--;
else
break;
}
}
+
+ /*
+ * For now do a simple sort of the mnoderanges array to fill in
+ * the mnr_next fields. Since mindex is expected to be relatively
+ * small, using a simple O(N^2) algorithm.
+ */
+ for (i = 0; i < mindex; i++) {
+ if (mp[i].mnr_pfnlo == 0) /* find lowest */
+ break;
+ }
+ ASSERT(i < mindex);
+ last = i;
+ mtype16m = last;
+ mp[last].mnr_next = -1;
+ for (i = 0; i < mindex - 1; i++) {
+ hipfn = (pfn_t)(-1);
+ hi = -1;
+ /* find next highest mnode range */
+ for (j = 0; j < mindex; j++) {
+ if (mp[j].mnr_pfnlo > mp[last].mnr_pfnlo &&
+ mp[j].mnr_pfnlo < hipfn) {
+ hipfn = mp[j].mnr_pfnlo;
+ hi = j;
+ }
+ }
+ mp[hi].mnr_next = last;
+ last = hi;
+ }
+ mtypetop = last;
}
+#ifndef __xpv
+/*
+ * Update mnoderanges for memory hot-add DR operations.
+ */
+static void
+mnode_range_add(int mnode)
+{
+ int *prev;
+ int n, mri;
+ pfn_t start, end;
+ extern void membar_sync(void);
+
+ ASSERT(0 <= mnode && mnode < max_mem_nodes);
+ ASSERT(mem_node_config[mnode].exists);
+ start = mem_node_config[mnode].physbase;
+ end = mem_node_config[mnode].physmax;
+ ASSERT(start <= end);
+ mutex_enter(&mnoderange_lock);
+
+#ifdef DEBUG
+ /* Check whether it interleaves with other memory nodes. */
+ for (n = mtypetop; n != -1; n = mnoderanges[n].mnr_next) {
+ ASSERT(mnoderanges[n].mnr_exists);
+ if (mnoderanges[n].mnr_mnode == mnode)
+ continue;
+ ASSERT(start > mnoderanges[n].mnr_pfnhi ||
+ end < mnoderanges[n].mnr_pfnlo);
+ }
+#endif /* DEBUG */
+
+ mri = nranges - 1;
+ while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase)
+ mri--;
+ while (mri >= 0 && mem_node_config[mnode].physmax >= MEMRANGELO(mri)) {
+ /* Check whether mtype already exists. */
+ for (n = mtypetop; n != -1; n = mnoderanges[n].mnr_next) {
+ if (mnoderanges[n].mnr_mnode == mnode &&
+ mnoderanges[n].mnr_memrange == mri) {
+ mnoderanges[n].mnr_pfnlo = MAX(MEMRANGELO(mri),
+ start);
+ mnoderanges[n].mnr_pfnhi = MIN(MEMRANGEHI(mri),
+ end);
+ break;
+ }
+ }
+
+ /* Add a new entry if it doesn't exist yet. */
+ if (n == -1) {
+ /* Try to find an unused entry in mnoderanges array. */
+ for (n = 0; n < mnoderangecnt; n++) {
+ if (mnoderanges[n].mnr_exists == 0)
+ break;
+ }
+ ASSERT(n < mnoderangecnt);
+ mnoderanges[n].mnr_pfnlo = MAX(MEMRANGELO(mri), start);
+ mnoderanges[n].mnr_pfnhi = MIN(MEMRANGEHI(mri), end);
+ mnoderanges[n].mnr_mnode = mnode;
+ mnoderanges[n].mnr_memrange = mri;
+ mnoderanges[n].mnr_exists = 1;
+ /* Page 0 should always be present. */
+ for (prev = &mtypetop;
+ mnoderanges[*prev].mnr_pfnlo > start;
+ prev = &mnoderanges[*prev].mnr_next) {
+ ASSERT(mnoderanges[*prev].mnr_next >= 0);
+ ASSERT(mnoderanges[*prev].mnr_pfnlo > end);
+ }
+ mnoderanges[n].mnr_next = *prev;
+ membar_sync();
+ *prev = n;
+ }
+
+ if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
+ mri--;
+ else
+ break;
+ }
+
+ mutex_exit(&mnoderange_lock);
+}
+
+/*
+ * Update mnoderanges for memory hot-removal DR operations.
+ */
+static void
+mnode_range_del(int mnode)
+{
+ _NOTE(ARGUNUSED(mnode));
+ ASSERT(0 <= mnode && mnode < max_mem_nodes);
+ /* TODO: support deletion operation. */
+ ASSERT(0);
+}
+
+void
+plat_slice_add(pfn_t start, pfn_t end)
+{
+ mem_node_add_slice(start, end);
+ if (plat_dr_enabled()) {
+ mnode_range_add(PFN_2_MEM_NODE(start));
+ }
+}
+
+void
+plat_slice_del(pfn_t start, pfn_t end)
+{
+ ASSERT(PFN_2_MEM_NODE(start) == PFN_2_MEM_NODE(end));
+ ASSERT(plat_dr_enabled());
+ mnode_range_del(PFN_2_MEM_NODE(start));
+ mem_node_del_slice(start, end);
+}
+#endif /* __xpv */
+
/*ARGSUSED*/
int
mtype_init(vnode_t *vp, caddr_t vaddr, uint_t *flags, size_t pgsz)
{
- int mtype = mnoderangecnt - 1;
+ int mtype = mtypetop;
#if !defined(__xpv)
#if defined(__i386)
/*
* set the mtype range
- * - kmem requests needs to be below 4g if restricted_kmemalloc is set.
+ * - kmem requests need to be below 4g if restricted_kmemalloc is set.
* - for non kmem requests, set range to above 4g if memory below 4g
* runs low.
*/
@@ -1334,8 +1495,8 @@ mtype_init(vnode_t *vp, caddr_t vaddr, uint_t *flags, size_t pgsz)
int
mtype_pgr_init(int *flags, page_t *pp, int mnode, pgcnt_t pgcnt)
{
- int mtype = mnoderangecnt - 1;
-#if !defined(__ixpv)
+ int mtype = mtypetop;
+#if !defined(__xpv)
if (RESTRICT16M_ALLOC(freemem, pgcnt, *flags)) {
*flags |= PGI_MT_RANGE16M;
} else {
@@ -1349,7 +1510,7 @@ mtype_pgr_init(int *flags, page_t *pp, int mnode, pgcnt_t pgcnt)
/*
* Determine if the mnode range specified in mtype contains memory belonging
* to memory node mnode. If flags & PGI_MT_RANGE is set then mtype contains
- * the range of indices from high pfn to 0, 16m or 4g.
+ * the range from high pfn to 0, 16m or 4g.
*
* Return first mnode range type index found otherwise return -1 if none found.
*/
@@ -1357,18 +1518,20 @@ int
mtype_func(int mnode, int mtype, uint_t flags)
{
if (flags & PGI_MT_RANGE) {
- int mtlim = 0;
+ int mnr_lim = MRI_0;
- if (flags & PGI_MT_NEXT)
- mtype--;
+ if (flags & PGI_MT_NEXT) {
+ mtype = mnoderanges[mtype].mnr_next;
+ }
if (flags & PGI_MT_RANGE4G)
- mtlim = mtype4g + 1; /* exclude 0-4g range */
+ mnr_lim = MRI_4G; /* exclude 0-4g range */
else if (flags & PGI_MT_RANGE16M)
- mtlim = 1; /* exclude 0-16m range */
- while (mtype >= mtlim) {
+ mnr_lim = MRI_16M; /* exclude 0-16m range */
+ while (mtype != -1 &&
+ mnoderanges[mtype].mnr_memrange <= mnr_lim) {
if (mnoderanges[mtype].mnr_mnode == mnode)
return (mtype);
- mtype--;
+ mtype = mnoderanges[mtype].mnr_next;
}
} else if (mnoderanges[mtype].mnr_mnode == mnode) {
return (mtype);
@@ -1378,34 +1541,40 @@ mtype_func(int mnode, int mtype, uint_t flags)
/*
* Update the page list max counts with the pfn range specified by the
- * input parameters. Called from add_physmem() when physical memory with
- * page_t's are initially added to the page lists.
+ * input parameters.
*/
void
mtype_modify_max(pfn_t startpfn, long cnt)
{
- int mtype = 0;
- pfn_t endpfn = startpfn + cnt, pfn;
- pgcnt_t inc;
-
- ASSERT(cnt > 0);
+ int mtype;
+ pgcnt_t inc;
+ spgcnt_t scnt = (spgcnt_t)(cnt);
+ pgcnt_t acnt = ABS(scnt);
+ pfn_t endpfn = startpfn + acnt;
+ pfn_t pfn, lo;
if (!physmax4g)
return;
- for (pfn = startpfn; pfn < endpfn; ) {
- if (pfn <= mnoderanges[mtype].mnr_pfnhi) {
- if (endpfn < mnoderanges[mtype].mnr_pfnhi) {
- inc = endpfn - pfn;
+ mtype = mtypetop;
+ for (pfn = endpfn; pfn > startpfn; ) {
+ ASSERT(mtype != -1);
+ lo = mnoderanges[mtype].mnr_pfnlo;
+ if (pfn > lo) {
+ if (startpfn >= lo) {
+ inc = pfn - startpfn;
} else {
- inc = mnoderanges[mtype].mnr_pfnhi - pfn + 1;
+ inc = pfn - lo;
+ }
+ if (mnoderanges[mtype].mnr_memrange != MRI_4G) {
+ if (scnt > 0)
+ maxmem4g += inc;
+ else
+ maxmem4g -= inc;
}
- if (mtype <= mtype4g)
- maxmem4g += inc;
- pfn += inc;
+ pfn -= inc;
}
- mtype++;
- ASSERT(mtype < mnoderangecnt || pfn >= endpfn);
+ mtype = mnoderanges[mtype].mnr_next;
}
}
@@ -1418,6 +1587,7 @@ mtype_2_mrange(int mtype)
void
mnodetype_2_pfn(int mnode, int mtype, pfn_t *pfnlo, pfn_t *pfnhi)
{
+ _NOTE(ARGUNUSED(mnode));
ASSERT(mnoderanges[mtype].mnr_mnode == mnode);
*pfnlo = mnoderanges[mtype].mnr_pfnlo;
*pfnhi = mnoderanges[mtype].mnr_pfnhi;
@@ -1462,6 +1632,7 @@ plcnt_init(caddr_t addr)
void
plcnt_inc_dec(page_t *pp, int mtype, int szc, long cnt, int flags)
{
+ _NOTE(ARGUNUSED(pp));
#ifdef DEBUG
int bin = PP_2_BIN(pp);
@@ -1470,7 +1641,7 @@ plcnt_inc_dec(page_t *pp, int mtype, int szc, long cnt, int flags)
cnt);
#endif
ASSERT(mtype == PP_2_MTYPE(pp));
- if (physmax4g && mtype <= mtype4g)
+ if (physmax4g && mnoderanges[mtype].mnr_memrange != MRI_4G)
atomic_add_long(&freemem4g, cnt);
if (flags & PG_CACHE_LIST)
atomic_add_long(&mnoderanges[mtype].mnr_mt_clpgcnt, cnt);
@@ -1485,7 +1656,7 @@ plcnt_inc_dec(page_t *pp, int mtype, int szc, long cnt, int flags)
int
mnode_pgcnt(int mnode)
{
- int mtype = mnoderangecnt - 1;
+ int mtype = mtypetop;
int flags = PGI_MT_RANGE0;
pgcnt_t pgcnt = 0;
@@ -1505,6 +1676,7 @@ mnode_pgcnt(int mnode)
size_t
page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc)
{
+ _NOTE(ARGUNUSED(l2_linesz));
size_t colorsz = 0;
int i;
int colors;
@@ -1519,14 +1691,19 @@ page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc)
/*
* Reduce the memory ranges lists if we don't have large amounts
* of memory. This avoids searching known empty free lists.
+ * To support memory DR operations, we need to keep memory ranges
+ * for possible memory hot-add operations.
*/
- i = memrange_num(physmax);
+ if (plat_dr_physmax > physmax)
+ i = memrange_num(plat_dr_physmax);
+ else
+ i = memrange_num(physmax);
#if defined(__i386)
- if (i > 0)
+ if (i > MRI_4G)
restricted_kmemalloc = 0;
#endif
/* physmax greater than 4g */
- if (i == 0)
+ if (i == MRI_4G)
physmax4g = 1;
#endif /* !__xpv */
memranges += i;
@@ -1619,6 +1796,16 @@ page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc)
/* size for mnoderanges */
for (mnoderangecnt = 0, i = 0; i < max_mem_nodes; i++)
mnoderangecnt += mnode_range_cnt(i);
+ if (plat_dr_support_memory()) {
+ /*
+ * Reserve enough space for memory DR operations.
+ * Two extra mnoderanges for possbile fragmentations,
+ * one for the 2G boundary and the other for the 4G boundary.
+ * We don't expect a memory board crossing the 16M boundary
+ * for memory hot-add operations on x86 platforms.
+ */
+ mnoderangecnt += 2 + max_mem_nodes - lgrp_plat_node_cnt;
+ }
colorsz = mnoderangecnt * sizeof (mnoderange_t);
/* size for fpc_mutex and cpc_mutex */
@@ -3122,8 +3309,8 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
* ordering.
*/
if (dma_attr == NULL) {
- n = 0;
- m = mnoderangecnt - 1;
+ n = mtype16m;
+ m = mtypetop;
fullrange = 1;
VM_STAT_ADD(pga_vmstats.pga_nulldmaattr);
} else {
@@ -3136,6 +3323,10 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
if (dma_attr->dma_attr_align > MMU_PAGESIZE)
return (NULL);
+ /* Sanity check the dma_attr */
+ if (pfnlo > pfnhi)
+ return (NULL);
+
n = pfn_2_mtype(pfnlo);
m = pfn_2_mtype(pfnhi);
@@ -3144,13 +3335,10 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
}
VM_STAT_COND_ADD(fullrange == 0, pga_vmstats.pga_notfullrange);
- if (n > m)
- return (NULL);
-
szc = 0;
- /* cylcing thru mtype handled by RANGE0 if n == 0 */
- if (n == 0) {
+ /* cylcing thru mtype handled by RANGE0 if n == mtype16m */
+ if (n == mtype16m) {
flags |= PGI_MT_RANGE0;
n = m;
}
@@ -3164,7 +3352,8 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
/*
* allocate pages from high pfn to low.
*/
- for (mtype = m; mtype >= n; mtype--) {
+ mtype = m;
+ do {
if (fullrange != 0) {
pp = page_get_mnode_freelist(mnode,
bin, mtype, szc, flags);
@@ -3181,7 +3370,8 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
check_dma(dma_attr, pp, 1);
return (pp);
}
- }
+ } while (mtype != n &&
+ (mtype = mnoderanges[mtype].mnr_next) != -1);
if (!local_failed_stat) {
lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1);
local_failed_stat = 1;
diff --git a/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c b/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c
index f0a257d1fd..8bc46f8e3e 100644
--- a/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c
+++ b/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c
@@ -161,6 +161,12 @@ gcpu_mca_poll_init(cmi_hdl_t hdl)
}
}
+/* deconfigure gcpu_mca_poll_init() */
+void
+gcpu_mca_poll_fini(cmi_hdl_t hdl)
+{
+}
+
void
gcpu_mca_poll_start(cmi_hdl_t hdl)
{
diff --git a/usr/src/uts/i86xpv/io/psm/xpv_psm.c b/usr/src/uts/i86xpv/io/psm/xpv_psm.c
index 07db030ed5..c59d1015a0 100644
--- a/usr/src/uts/i86xpv/io/psm/xpv_psm.c
+++ b/usr/src/uts/i86xpv/io/psm/xpv_psm.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
-#define PSMI_1_6
+#define PSMI_1_7
#include <sys/mutex.h>
#include <sys/types.h>
@@ -1672,7 +1672,8 @@ static struct psm_ops xen_psm_ops = {
(void (*)(void *arg))NULL, /* psm_post_cyclic_setup */
(void (*)(int, int))NULL, /* psm_preshutdown */
xen_intr_ops, /* Advanced DDI Interrupt framework */
- (int (*)(psm_state_request_t *))NULL /* psm_state */
+ (int (*)(psm_state_request_t *))NULL, /* psm_state */
+ (int (*)(psm_cpu_request_t *))NULL /* psm_cpu_ops */
};
static struct psm_info xen_psm_info = {
diff --git a/usr/src/uts/i86xpv/io/psm/xpv_uppc.c b/usr/src/uts/i86xpv/io/psm/xpv_uppc.c
index 9427ead955..06049ed9e0 100644
--- a/usr/src/uts/i86xpv/io/psm/xpv_uppc.c
+++ b/usr/src/uts/i86xpv/io/psm/xpv_uppc.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
-#define PSMI_1_6
+#define PSMI_1_7
#include <sys/mutex.h>
#include <sys/types.h>
@@ -891,7 +891,8 @@ static struct psm_ops xen_uppc_ops = {
(int (*)(dev_info_t *, ddi_intr_handle_impl_t *,
psm_intr_op_t, int *))NULL, /* psm_intr_ops */
- (int (*)(psm_state_request_t *))NULL /* psm_state */
+ (int (*)(psm_state_request_t *))NULL, /* psm_state */
+ (int (*)(psm_cpu_request_t *))NULL /* psm_cpu_ops */
};
static struct psm_info xen_uppc_info = {
diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s
index 87e1323772..807cb8f790 100644
--- a/usr/src/uts/intel/ia32/ml/modstubs.s
+++ b/usr/src/uts/intel/ia32/ml/modstubs.s
@@ -1359,6 +1359,17 @@ fcnname/**/_info: \
END_MODULE(acpica);
#endif
+/*
+ * Stubs for acpidev
+ */
+#ifndef ACPIDEV_MODULE
+ MODULE(acpidev,misc);
+ NO_UNLOAD_STUB(acpidev, acpidev_dr_get_cpu_numa_info, nomod_minus_one) ;
+ NO_UNLOAD_STUB(acpidev, acpidev_dr_free_cpu_numa_info,
+ nomod_minus_one) ;
+ END_MODULE(acpidev);
+#endif
+
#ifndef IPNET_MODULE
MODULE(ipnet,drv);
STUB(ipnet, ipnet_if_getdev, nomod_zero);
diff --git a/usr/src/uts/intel/ia32/os/sundep.c b/usr/src/uts/intel/ia32/os/sundep.c
index 9aca9f4ea1..13f04658aa 100644
--- a/usr/src/uts/intel/ia32/os/sundep.c
+++ b/usr/src/uts/intel/ia32/os/sundep.c
@@ -115,9 +115,10 @@ check_boot_version(int boots_version)
* 1) the pfn of the highest installed physical page,
* 2) the number of pages installed
* 3) the number of distinct contiguous regions these pages fall into.
+ * 4) the number of contiguous memory ranges
*/
void
-installed_top_size(
+installed_top_size_ex(
struct memlist *list, /* pointer to start of installed list */
pfn_t *high_pfn, /* return ptr for top value */
pgcnt_t *pgcnt, /* return ptr for sum of installed pages */
@@ -141,6 +142,21 @@ installed_top_size(
*ranges = cnt;
}
+void
+installed_top_size(
+ struct memlist *list, /* pointer to start of installed list */
+ pfn_t *high_pfn, /* return ptr for top value */
+ pgcnt_t *pgcnt) /* return ptr for sum of installed pages */
+{
+ int ranges;
+
+ installed_top_size_ex(list, high_pfn, pgcnt, &ranges);
+}
+
+void
+phys_install_has_changed(void)
+{}
+
/*
* Copy in a memory list from boot to kernel, with a filter function
* to remove pages. The filter function can increase the address and/or
diff --git a/usr/src/uts/intel/io/acpica/acpica.c b/usr/src/uts/intel/io/acpica/acpica.c
index 088b1f0eb4..30a8d79be1 100644
--- a/usr/src/uts/intel/io/acpica/acpica.c
+++ b/usr/src/uts/intel/io/acpica/acpica.c
@@ -36,6 +36,7 @@
#include <sys/modctl.h>
#include <sys/open.h>
#include <sys/stat.h>
+#include <sys/spl.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/esunddi.h>
@@ -121,8 +122,11 @@ _init(void)
int error = EBUSY;
int status;
extern int (*acpi_fp_setwake)();
+ extern kmutex_t cpu_map_lock;
mutex_init(&acpica_module_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&cpu_map_lock, NULL, MUTEX_SPIN,
+ (ddi_iblock_cookie_t)ipltospl(DISP_LEVEL));
if ((error = mod_install(&modlinkage)) != 0) {
mutex_destroy(&acpica_module_lock);
diff --git a/usr/src/uts/intel/io/acpica/osl.c b/usr/src/uts/intel/io/acpica/osl.c
index 7593bde61a..6cb08ee27e 100644
--- a/usr/src/uts/intel/io/acpica/osl.c
+++ b/usr/src/uts/intel/io/acpica/osl.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
/*
@@ -98,7 +98,7 @@ static int scanning_d2a_map = 0;
static int d2a_done = 0;
/* features supported by ACPICA and ACPI device configuration. */
-uint64_t acpica_core_features = 0;
+uint64_t acpica_core_features = ACPI_FEATURE_OSI_MODULE;
static uint64_t acpica_devcfg_features = 0;
/* set by acpi_poweroff() in PSMs and appm_ioctl() in acpippm for S3 */
@@ -112,7 +112,7 @@ struct cpu_map_item {
ACPI_HANDLE obj;
};
-static kmutex_t cpu_map_lock;
+kmutex_t cpu_map_lock;
static struct cpu_map_item **cpu_map = NULL;
static int cpu_map_count_max = 0;
static int cpu_map_count = 0;
@@ -1552,7 +1552,7 @@ acpica_get_handle_cpu(int cpu_id, ACPI_HANDLE *rh)
}
}
if (i < cpu_map_count && (cpu_map[i]->obj != NULL)) {
- *rh = cpu_map[cpu_id]->obj;
+ *rh = cpu_map[i]->obj;
mutex_exit(&cpu_map_lock);
return (AE_OK);
}
@@ -1567,7 +1567,7 @@ acpica_get_handle_cpu(int cpu_id, ACPI_HANDLE *rh)
}
}
if (i < cpu_map_count && (cpu_map[i]->obj != NULL)) {
- *rh = cpu_map[cpu_id]->obj;
+ *rh = cpu_map[i]->obj;
mutex_exit(&cpu_map_lock);
return (AE_OK);
}
@@ -2209,6 +2209,82 @@ acpica_get_cpu_object_by_apicid(UINT32 apicid, ACPI_HANDLE *hdlp)
return (rc);
}
+ACPI_STATUS
+acpica_get_cpu_id_by_object(ACPI_HANDLE hdl, processorid_t *cpuidp)
+{
+ int i;
+ ACPI_STATUS rc = AE_NOT_EXIST;
+
+ ASSERT(cpuidp != NULL);
+ if (hdl == NULL || cpuidp == NULL) {
+ return (rc);
+ }
+
+ *cpuidp = -1;
+ mutex_enter(&cpu_map_lock);
+ for (i = 0; i < cpu_map_count; i++) {
+ if (cpu_map[i]->obj == hdl && cpu_map[i]->cpu_id != -1) {
+ *cpuidp = cpu_map[i]->cpu_id;
+ rc = AE_OK;
+ break;
+ }
+ }
+ mutex_exit(&cpu_map_lock);
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpica_get_apicid_by_object(ACPI_HANDLE hdl, UINT32 *rp)
+{
+ int i;
+ ACPI_STATUS rc = AE_NOT_EXIST;
+
+ ASSERT(rp != NULL);
+ if (hdl == NULL || rp == NULL) {
+ return (rc);
+ }
+
+ *rp = UINT32_MAX;
+ mutex_enter(&cpu_map_lock);
+ for (i = 0; i < cpu_map_count; i++) {
+ if (cpu_map[i]->obj == hdl &&
+ cpu_map[i]->apic_id != UINT32_MAX) {
+ *rp = cpu_map[i]->apic_id;
+ rc = AE_OK;
+ break;
+ }
+ }
+ mutex_exit(&cpu_map_lock);
+
+ return (rc);
+}
+
+ACPI_STATUS
+acpica_get_procid_by_object(ACPI_HANDLE hdl, UINT32 *rp)
+{
+ int i;
+ ACPI_STATUS rc = AE_NOT_EXIST;
+
+ ASSERT(rp != NULL);
+ if (hdl == NULL || rp == NULL) {
+ return (rc);
+ }
+
+ *rp = UINT32_MAX;
+ mutex_enter(&cpu_map_lock);
+ for (i = 0; i < cpu_map_count; i++) {
+ if (cpu_map[i]->obj == hdl) {
+ *rp = cpu_map[i]->proc_id;
+ rc = AE_OK;
+ break;
+ }
+ }
+ mutex_exit(&cpu_map_lock);
+
+ return (rc);
+}
+
void
acpica_set_core_feature(uint64_t features)
{
diff --git a/usr/src/uts/intel/sys/acpica.h b/usr/src/uts/intel/sys/acpica.h
index 5f74655418..ff55dcb302 100644
--- a/usr/src/uts/intel/sys/acpica.h
+++ b/usr/src/uts/intel/sys/acpica.h
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
*/
@@ -133,6 +133,7 @@ typedef struct iflag {
#define ACPI_DEVCFG_CPU 0x1
#define ACPI_DEVCFG_MEMORY 0x2
#define ACPI_DEVCFG_CONTAINER 0x4
+#define ACPI_DEVCFG_PCI 0x8
/*
* master_ops.c
@@ -187,6 +188,9 @@ extern ACPI_STATUS acpica_unmap_cpu(processorid_t);
extern ACPI_STATUS acpica_get_cpu_object_by_cpuid(processorid_t, ACPI_HANDLE *);
extern ACPI_STATUS acpica_get_cpu_object_by_procid(UINT32, ACPI_HANDLE *);
extern ACPI_STATUS acpica_get_cpu_object_by_apicid(UINT32, ACPI_HANDLE *);
+extern ACPI_STATUS acpica_get_cpu_id_by_object(ACPI_HANDLE, processorid_t *);
+extern ACPI_STATUS acpica_get_apicid_by_object(ACPI_HANDLE, UINT32 *);
+extern ACPI_STATUS acpica_get_procid_by_object(ACPI_HANDLE, UINT32 *);
extern uint64_t acpica_get_core_feature(uint64_t);
extern void acpica_set_core_feature(uint64_t);
diff --git a/usr/src/uts/intel/sys/bootconf.h b/usr/src/uts/intel/sys/bootconf.h
index f626fbbb79..4f6c48c395 100644
--- a/usr/src/uts/intel/sys/bootconf.h
+++ b/usr/src/uts/intel/sys/bootconf.h
@@ -50,6 +50,7 @@ extern "C" {
#define BP_CPU_APICID_ARRAY "cpu_apicid_array"
#define BP_LGRP_SLIT_ENABLE "lgrp_slit_enable"
#define BP_LGRP_SRAT_ENABLE "lgrp_srat_enable"
+#define BP_LGRP_MSCT_ENABLE "lgrp_msct_enable"
#define BP_LGRP_TOPO_LEVELS "lgrp_topo_levels"
/*
@@ -213,18 +214,6 @@ extern int octet_to_hexascii(const void *, uint_t, char *, uint_t *);
extern int dhcpinit(void);
-/*
- * XXX The memlist stuff belongs in a header of its own
- */
-extern int check_boot_version(int);
-extern void size_physavail(struct memlist *, pgcnt_t *, int *, pfn_t);
-extern int copy_physavail(struct memlist *, struct memlist **,
- uint_t, uint_t);
-extern void installed_top_size(struct memlist *, pfn_t *, pgcnt_t *, int *);
-extern int check_memexp(struct memlist *, uint_t);
-extern void copy_memlist_filter(struct memlist *, struct memlist **,
- void (*filter)(uint64_t *, uint64_t *));
-
extern struct bootops *bootops;
extern int netboot;
extern int swaploaded;
diff --git a/usr/src/uts/intel/sys/cpu_module.h b/usr/src/uts/intel/sys/cpu_module.h
index 9b64b8527a..8d801b07f5 100644
--- a/usr/src/uts/intel/sys/cpu_module.h
+++ b/usr/src/uts/intel/sys/cpu_module.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -143,7 +143,6 @@ extern cmi_errno_t cmi_hdl_rdmsr(cmi_hdl_t, uint_t, uint64_t *);
extern cmi_errno_t cmi_hdl_wrmsr(cmi_hdl_t, uint_t, uint64_t);
extern void cmi_hdl_enable_mce(cmi_hdl_t);
-
extern uint_t cmi_hdl_vendor(cmi_hdl_t);
extern const char *cmi_hdl_vendorstr(cmi_hdl_t);
extern uint_t cmi_hdl_family(cmi_hdl_t);
@@ -172,6 +171,7 @@ extern uint_t cmi_ntv_hwchipid(cpu_t *);
extern uint_t cmi_ntv_hwprocnodeid(cpu_t *);
extern uint_t cmi_ntv_hwcoreid(cpu_t *);
extern uint_t cmi_ntv_hwstrandid(cpu_t *);
+extern void cmi_ntv_hwdisable_mce(cmi_hdl_t);
#endif /* __xpv */
typedef struct cmi_mca_regs {
@@ -210,6 +210,7 @@ extern void cmi_mca_trap(struct regs *);
extern boolean_t cmi_panic_on_ue(void);
extern void cmi_mc_register(cmi_hdl_t, const struct cmi_mc_ops *, void *);
+extern cmi_errno_t cmi_mc_register_global(const struct cmi_mc_ops *, void *);
extern void cmi_mc_sw_memscrub_disable(void);
extern cmi_errno_t cmi_mc_patounum(uint64_t, uint8_t, uint8_t, uint32_t, int,
mc_unum_t *);
diff --git a/usr/src/uts/intel/sys/memlist_plat.h b/usr/src/uts/intel/sys/memlist_plat.h
new file mode 100644
index 0000000000..065f8ed37a
--- /dev/null
+++ b/usr/src/uts/intel/sys/memlist_plat.h
@@ -0,0 +1,55 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_MEMLIST_PLAT_H
+#define _SYS_MEMLIST_PLAT_H
+
+/*
+ * Boot time configuration information objects
+ */
+
+#include <sys/types.h>
+#include <sys/memlist.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int check_boot_version(int);
+extern void copy_memlist_filter(struct memlist *, struct memlist **,
+ void (*filter)(uint64_t *, uint64_t *));
+extern void installed_top_size(struct memlist *, pfn_t *, pgcnt_t *);
+extern void installed_top_size_ex(struct memlist *, pfn_t *, pgcnt_t *, int *);
+extern void phys_install_has_changed(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_MEMLIST_PLAT_H */
diff --git a/usr/src/uts/sun4/os/ddi_impl.c b/usr/src/uts/sun4/os/ddi_impl.c
index d01eabae19..f3efc433c5 100644
--- a/usr/src/uts/sun4/os/ddi_impl.c
+++ b/usr/src/uts/sun4/os/ddi_impl.c
@@ -48,12 +48,8 @@
#include <sys/systeminfo.h>
#include <sys/fpu/fpusystm.h>
#include <sys/vm.h>
-#include <sys/fs/dv_node.h>
-#include <sys/fs/snode.h>
#include <sys/ddi_isa.h>
-#include <sys/modhash.h>
#include <sys/modctl.h>
-#include <sys/sunldi_impl.h>
dev_info_t *get_intr_parent(dev_info_t *, dev_info_t *,
ddi_intr_handle_impl_t *);
@@ -1954,937 +1950,3 @@ peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
return (err);
}
-
-/*
- * Platform independent DR routines
- */
-
-static int
-ndi2errno(int n)
-{
- int err = 0;
-
- switch (n) {
- case NDI_NOMEM:
- err = ENOMEM;
- break;
- case NDI_BUSY:
- err = EBUSY;
- break;
- case NDI_FAULT:
- err = EFAULT;
- break;
- case NDI_FAILURE:
- err = EIO;
- break;
- case NDI_SUCCESS:
- break;
- case NDI_BADHANDLE:
- default:
- err = EINVAL;
- break;
- }
- return (err);
-}
-
-/*
- * Prom tree node list
- */
-struct ptnode {
- pnode_t nodeid;
- struct ptnode *next;
-};
-
-/*
- * Prom tree walk arg
- */
-struct pta {
- dev_info_t *pdip;
- devi_branch_t *bp;
- uint_t flags;
- dev_info_t *fdip;
- struct ptnode *head;
-};
-
-static void
-visit_node(pnode_t nodeid, struct pta *ap)
-{
- struct ptnode **nextp;
- int (*select)(pnode_t, void *, uint_t);
-
- ASSERT(nodeid != OBP_NONODE && nodeid != OBP_BADNODE);
-
- select = ap->bp->create.prom_branch_select;
-
- ASSERT(select);
-
- if (select(nodeid, ap->bp->arg, 0) == DDI_SUCCESS) {
-
- for (nextp = &ap->head; *nextp; nextp = &(*nextp)->next)
- ;
-
- *nextp = kmem_zalloc(sizeof (struct ptnode), KM_SLEEP);
-
- (*nextp)->nodeid = nodeid;
- }
-
- if ((ap->flags & DEVI_BRANCH_CHILD) == DEVI_BRANCH_CHILD)
- return;
-
- nodeid = prom_childnode(nodeid);
- while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
- visit_node(nodeid, ap);
- nodeid = prom_nextnode(nodeid);
- }
-}
-
-/*
- * NOTE: The caller of this function must check for device contracts
- * or LDI callbacks against this dip before setting the dip offline.
- */
-static int
-set_infant_dip_offline(dev_info_t *dip, void *arg)
-{
- char *path = (char *)arg;
-
- ASSERT(dip);
- ASSERT(arg);
-
- if (i_ddi_node_state(dip) >= DS_ATTACHED) {
- (void) ddi_pathname(dip, path);
- cmn_err(CE_WARN, "Attempt to set offline flag on attached "
- "node: %s", path);
- return (DDI_FAILURE);
- }
-
- mutex_enter(&(DEVI(dip)->devi_lock));
- if (!DEVI_IS_DEVICE_OFFLINE(dip))
- DEVI_SET_DEVICE_OFFLINE(dip);
- mutex_exit(&(DEVI(dip)->devi_lock));
-
- return (DDI_SUCCESS);
-}
-
-typedef struct result {
- char *path;
- int result;
-} result_t;
-
-static int
-dip_set_offline(dev_info_t *dip, void *arg)
-{
- int end;
- result_t *resp = (result_t *)arg;
-
- ASSERT(dip);
- ASSERT(resp);
-
- /*
- * We stop the walk if e_ddi_offline_notify() returns
- * failure, because this implies that one or more consumers
- * (either LDI or contract based) has blocked the offline.
- * So there is no point in conitnuing the walk
- */
- if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
- resp->result = DDI_FAILURE;
- return (DDI_WALK_TERMINATE);
- }
-
- /*
- * If set_infant_dip_offline() returns failure, it implies
- * that we failed to set a particular dip offline. This
- * does not imply that the offline as a whole should fail.
- * We want to do the best we can, so we continue the walk.
- */
- if (set_infant_dip_offline(dip, resp->path) == DDI_SUCCESS)
- end = DDI_SUCCESS;
- else
- end = DDI_FAILURE;
-
- e_ddi_offline_finalize(dip, end);
-
- return (DDI_WALK_CONTINUE);
-}
-
-/*
- * The call to e_ddi_offline_notify() exists for the
- * unlikely error case that a branch we are trying to
- * create already exists and has device contracts or LDI
- * event callbacks against it.
- *
- * We allow create to succeed for such branches only if
- * no constraints block the offline.
- */
-static int
-branch_set_offline(dev_info_t *dip, char *path)
-{
- int circ;
- int end;
- result_t res;
-
-
- if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
- return (DDI_FAILURE);
- }
-
- if (set_infant_dip_offline(dip, path) == DDI_SUCCESS)
- end = DDI_SUCCESS;
- else
- end = DDI_FAILURE;
-
- e_ddi_offline_finalize(dip, end);
-
- if (end == DDI_FAILURE)
- return (DDI_FAILURE);
-
- res.result = DDI_SUCCESS;
- res.path = path;
-
- ndi_devi_enter(dip, &circ);
- ddi_walk_devs(ddi_get_child(dip), dip_set_offline, &res);
- ndi_devi_exit(dip, circ);
-
- return (res.result);
-}
-
-/*ARGSUSED*/
-static int
-create_prom_branch(void *arg, int has_changed)
-{
- int circ;
- int exists, rv;
- pnode_t nodeid;
- struct ptnode *tnp;
- dev_info_t *dip;
- struct pta *ap = arg;
- devi_branch_t *bp;
- char *path;
-
- ASSERT(ap);
- ASSERT(ap->fdip == NULL);
- ASSERT(ap->pdip && ndi_dev_is_prom_node(ap->pdip));
-
- bp = ap->bp;
-
- nodeid = ddi_get_nodeid(ap->pdip);
- if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE) {
- cmn_err(CE_WARN, "create_prom_branch: invalid "
- "nodeid: 0x%x", nodeid);
- return (EINVAL);
- }
-
- ap->head = NULL;
-
- nodeid = prom_childnode(nodeid);
- while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
- visit_node(nodeid, ap);
- nodeid = prom_nextnode(nodeid);
- }
-
- if (ap->head == NULL)
- return (ENODEV);
-
- path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- rv = 0;
- while ((tnp = ap->head) != NULL) {
- ap->head = tnp->next;
-
- ndi_devi_enter(ap->pdip, &circ);
-
- /*
- * Check if the branch already exists.
- */
- exists = 0;
- dip = e_ddi_nodeid_to_dip(tnp->nodeid);
- if (dip != NULL) {
- exists = 1;
-
- /* Parent is held busy, so release hold */
- ndi_rele_devi(dip);
-#ifdef DEBUG
- cmn_err(CE_WARN, "create_prom_branch: dip(%p) exists"
- " for nodeid 0x%x", (void *)dip, tnp->nodeid);
-#endif
- } else {
- dip = i_ddi_create_branch(ap->pdip, tnp->nodeid);
- }
-
- kmem_free(tnp, sizeof (struct ptnode));
-
- /*
- * Hold the branch if it is not already held
- */
- if (dip && !exists) {
- e_ddi_branch_hold(dip);
- }
-
- ASSERT(dip == NULL || e_ddi_branch_held(dip));
-
- /*
- * Set all dips in the newly created branch offline so that
- * only a "configure" operation can attach
- * the branch
- */
- if (dip == NULL || branch_set_offline(dip, path)
- == DDI_FAILURE) {
- ndi_devi_exit(ap->pdip, circ);
- rv = EIO;
- continue;
- }
-
- ASSERT(ddi_get_parent(dip) == ap->pdip);
-
- ndi_devi_exit(ap->pdip, circ);
-
- if (ap->flags & DEVI_BRANCH_CONFIGURE) {
- int error = e_ddi_branch_configure(dip, &ap->fdip, 0);
- if (error && rv == 0)
- rv = error;
- }
-
- /*
- * Invoke devi_branch_callback() (if it exists) only for
- * newly created branches
- */
- if (bp->devi_branch_callback && !exists)
- bp->devi_branch_callback(dip, bp->arg, 0);
- }
-
- kmem_free(path, MAXPATHLEN);
-
- return (rv);
-}
-
-static int
-sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp)
-{
- int rv, circ, len;
- int i, flags, ret;
- dev_info_t *dip;
- char *nbuf;
- char *path;
- static const char *noname = "<none>";
-
- ASSERT(pdip);
- ASSERT(DEVI_BUSY_OWNED(pdip));
-
- flags = 0;
-
- /*
- * Creating the root of a branch ?
- */
- if (rdipp) {
- *rdipp = NULL;
- flags = DEVI_BRANCH_ROOT;
- }
-
- ndi_devi_alloc_sleep(pdip, (char *)noname, DEVI_SID_NODEID, &dip);
- rv = bp->create.sid_branch_create(dip, bp->arg, flags);
-
- nbuf = kmem_alloc(OBP_MAXDRVNAME, KM_SLEEP);
-
- if (rv == DDI_WALK_ERROR) {
- cmn_err(CE_WARN, "e_ddi_branch_create: Error setting"
- " properties on devinfo node %p", (void *)dip);
- goto fail;
- }
-
- len = OBP_MAXDRVNAME;
- if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
- DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "name", nbuf, &len)
- != DDI_PROP_SUCCESS) {
- cmn_err(CE_WARN, "e_ddi_branch_create: devinfo node %p has"
- "no name property", (void *)dip);
- goto fail;
- }
-
- ASSERT(i_ddi_node_state(dip) == DS_PROTO);
- if (ndi_devi_set_nodename(dip, nbuf, 0) != NDI_SUCCESS) {
- cmn_err(CE_WARN, "e_ddi_branch_create: cannot set name (%s)"
- " for devinfo node %p", nbuf, (void *)dip);
- goto fail;
- }
-
- kmem_free(nbuf, OBP_MAXDRVNAME);
-
- /*
- * Ignore bind failures just like boot does
- */
- (void) ndi_devi_bind_driver(dip, 0);
-
- switch (rv) {
- case DDI_WALK_CONTINUE:
- case DDI_WALK_PRUNESIB:
- ndi_devi_enter(dip, &circ);
-
- i = DDI_WALK_CONTINUE;
- for (; i == DDI_WALK_CONTINUE; ) {
- i = sid_node_create(dip, bp, NULL);
- }
-
- ASSERT(i == DDI_WALK_ERROR || i == DDI_WALK_PRUNESIB);
- if (i == DDI_WALK_ERROR)
- rv = i;
- /*
- * If PRUNESIB stop creating siblings
- * of dip's child. Subsequent walk behavior
- * is determined by rv returned by dip.
- */
-
- ndi_devi_exit(dip, circ);
- break;
- case DDI_WALK_TERMINATE:
- /*
- * Don't create children and ask our parent
- * to not create siblings either.
- */
- rv = DDI_WALK_PRUNESIB;
- break;
- case DDI_WALK_PRUNECHILD:
- /*
- * Don't create children, but ask parent to continue
- * with siblings.
- */
- rv = DDI_WALK_CONTINUE;
- break;
- default:
- ASSERT(0);
- break;
- }
-
- if (rdipp)
- *rdipp = dip;
-
- /*
- * Set device offline - only the "configure" op should cause an attach.
- * Note that it is safe to set the dip offline without checking
- * for either device contract or layered driver (LDI) based constraints
- * since there cannot be any contracts or LDI opens of this device.
- * This is because this node is a newly created dip with the parent busy
- * held, so no other thread can come in and attach this dip. A dip that
- * has never been attached cannot have contracts since by definition
- * a device contract (an agreement between a process and a device minor
- * node) can only be created against a device that has minor nodes
- * i.e is attached. Similarly an LDI open will only succeed if the
- * dip is attached. We assert below that the dip is not attached.
- */
- ASSERT(i_ddi_node_state(dip) < DS_ATTACHED);
- path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- ret = set_infant_dip_offline(dip, path);
- ASSERT(ret == DDI_SUCCESS);
- kmem_free(path, MAXPATHLEN);
-
- return (rv);
-fail:
- (void) ndi_devi_free(dip);
- kmem_free(nbuf, OBP_MAXDRVNAME);
- return (DDI_WALK_ERROR);
-}
-
-static int
-create_sid_branch(
- dev_info_t *pdip,
- devi_branch_t *bp,
- dev_info_t **dipp,
- uint_t flags)
-{
- int rv = 0, state = DDI_WALK_CONTINUE;
- dev_info_t *rdip;
-
- while (state == DDI_WALK_CONTINUE) {
- int circ;
-
- ndi_devi_enter(pdip, &circ);
-
- state = sid_node_create(pdip, bp, &rdip);
- if (rdip == NULL) {
- ndi_devi_exit(pdip, circ);
- ASSERT(state == DDI_WALK_ERROR);
- break;
- }
-
- e_ddi_branch_hold(rdip);
-
- ndi_devi_exit(pdip, circ);
-
- if (flags & DEVI_BRANCH_CONFIGURE) {
- int error = e_ddi_branch_configure(rdip, dipp, 0);
- if (error && rv == 0)
- rv = error;
- }
-
- /*
- * devi_branch_callback() is optional
- */
- if (bp->devi_branch_callback)
- bp->devi_branch_callback(rdip, bp->arg, 0);
- }
-
- ASSERT(state == DDI_WALK_ERROR || state == DDI_WALK_PRUNESIB);
-
- return (state == DDI_WALK_ERROR ? EIO : rv);
-}
-
-int
-e_ddi_branch_create(
- dev_info_t *pdip,
- devi_branch_t *bp,
- dev_info_t **dipp,
- uint_t flags)
-{
- int prom_devi, sid_devi, error;
-
- if (pdip == NULL || bp == NULL || bp->type == 0)
- return (EINVAL);
-
- prom_devi = (bp->type == DEVI_BRANCH_PROM) ? 1 : 0;
- sid_devi = (bp->type == DEVI_BRANCH_SID) ? 1 : 0;
-
- if (prom_devi && bp->create.prom_branch_select == NULL)
- return (EINVAL);
- else if (sid_devi && bp->create.sid_branch_create == NULL)
- return (EINVAL);
- else if (!prom_devi && !sid_devi)
- return (EINVAL);
-
- if (flags & DEVI_BRANCH_EVENT)
- return (EINVAL);
-
- if (prom_devi) {
- struct pta pta = {0};
-
- pta.pdip = pdip;
- pta.bp = bp;
- pta.flags = flags;
-
- error = prom_tree_access(create_prom_branch, &pta, NULL);
-
- if (dipp)
- *dipp = pta.fdip;
- else if (pta.fdip)
- ndi_rele_devi(pta.fdip);
- } else {
- error = create_sid_branch(pdip, bp, dipp, flags);
- }
-
- return (error);
-}
-
-int
-e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp, uint_t flags)
-{
- int rv;
- char *devnm;
- dev_info_t *pdip;
-
- if (dipp)
- *dipp = NULL;
-
- if (rdip == NULL || flags != 0 || (flags & DEVI_BRANCH_EVENT))
- return (EINVAL);
-
- pdip = ddi_get_parent(rdip);
-
- ndi_hold_devi(pdip);
-
- if (!e_ddi_branch_held(rdip)) {
- ndi_rele_devi(pdip);
- cmn_err(CE_WARN, "e_ddi_branch_configure: "
- "dip(%p) not held", (void *)rdip);
- return (EINVAL);
- }
-
- if (i_ddi_node_state(rdip) < DS_INITIALIZED) {
- /*
- * First attempt to bind a driver. If we fail, return
- * success (On some platforms, dips for some device
- * types (CPUs) may not have a driver)
- */
- if (ndi_devi_bind_driver(rdip, 0) != NDI_SUCCESS) {
- ndi_rele_devi(pdip);
- return (0);
- }
-
- if (ddi_initchild(pdip, rdip) != DDI_SUCCESS) {
- rv = NDI_FAILURE;
- goto out;
- }
- }
-
- ASSERT(i_ddi_node_state(rdip) >= DS_INITIALIZED);
-
- devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
-
- (void) ddi_deviname(rdip, devnm);
-
- if ((rv = ndi_devi_config_one(pdip, devnm+1, &rdip,
- NDI_DEVI_ONLINE | NDI_CONFIG)) == NDI_SUCCESS) {
- /* release hold from ndi_devi_config_one() */
- ndi_rele_devi(rdip);
- }
-
- kmem_free(devnm, MAXNAMELEN + 1);
-out:
- if (rv != NDI_SUCCESS && dipp && rdip) {
- ndi_hold_devi(rdip);
- *dipp = rdip;
- }
- ndi_rele_devi(pdip);
- return (ndi2errno(rv));
-}
-
-void
-e_ddi_branch_hold(dev_info_t *rdip)
-{
- if (e_ddi_branch_held(rdip)) {
- cmn_err(CE_WARN, "e_ddi_branch_hold: branch already held");
- return;
- }
-
- mutex_enter(&DEVI(rdip)->devi_lock);
- if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) == 0) {
- DEVI(rdip)->devi_flags |= DEVI_BRANCH_HELD;
- DEVI(rdip)->devi_ref++;
- }
- ASSERT(DEVI(rdip)->devi_ref > 0);
- mutex_exit(&DEVI(rdip)->devi_lock);
-}
-
-int
-e_ddi_branch_held(dev_info_t *rdip)
-{
- int rv = 0;
-
- mutex_enter(&DEVI(rdip)->devi_lock);
- if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) &&
- DEVI(rdip)->devi_ref > 0) {
- rv = 1;
- }
- mutex_exit(&DEVI(rdip)->devi_lock);
-
- return (rv);
-}
-void
-e_ddi_branch_rele(dev_info_t *rdip)
-{
- mutex_enter(&DEVI(rdip)->devi_lock);
- DEVI(rdip)->devi_flags &= ~DEVI_BRANCH_HELD;
- DEVI(rdip)->devi_ref--;
- mutex_exit(&DEVI(rdip)->devi_lock);
-}
-
-int
-e_ddi_branch_unconfigure(
- dev_info_t *rdip,
- dev_info_t **dipp,
- uint_t flags)
-{
- int circ, rv;
- int destroy;
- char *devnm;
- uint_t nflags;
- dev_info_t *pdip;
-
- if (dipp)
- *dipp = NULL;
-
- if (rdip == NULL)
- return (EINVAL);
-
- pdip = ddi_get_parent(rdip);
-
- ASSERT(pdip);
-
- /*
- * Check if caller holds pdip busy - can cause deadlocks during
- * devfs_clean()
- */
- if (DEVI_BUSY_OWNED(pdip)) {
- cmn_err(CE_WARN, "e_ddi_branch_unconfigure: failed: parent"
- " devinfo node(%p) is busy held", (void *)pdip);
- return (EINVAL);
- }
-
- destroy = (flags & DEVI_BRANCH_DESTROY) ? 1 : 0;
-
- devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
-
- ndi_devi_enter(pdip, &circ);
- (void) ddi_deviname(rdip, devnm);
- ndi_devi_exit(pdip, circ);
-
- /*
- * ddi_deviname() returns a component name with / prepended.
- */
- (void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
-
- ndi_devi_enter(pdip, &circ);
-
- /*
- * Recreate device name as it may have changed state (init/uninit)
- * when parent busy lock was dropped for devfs_clean()
- */
- (void) ddi_deviname(rdip, devnm);
-
- if (!e_ddi_branch_held(rdip)) {
- kmem_free(devnm, MAXNAMELEN + 1);
- ndi_devi_exit(pdip, circ);
- cmn_err(CE_WARN, "e_ddi_%s_branch: dip(%p) not held",
- destroy ? "destroy" : "unconfigure", (void *)rdip);
- return (EINVAL);
- }
-
- /*
- * Release hold on the branch. This is ok since we are holding the
- * parent busy. If rdip is not removed, we must do a hold on the
- * branch before returning.
- */
- e_ddi_branch_rele(rdip);
-
- nflags = NDI_DEVI_OFFLINE;
- if (destroy || (flags & DEVI_BRANCH_DESTROY)) {
- nflags |= NDI_DEVI_REMOVE;
- destroy = 1;
- } else {
- nflags |= NDI_UNCONFIG; /* uninit but don't remove */
- }
-
- if (flags & DEVI_BRANCH_EVENT)
- nflags |= NDI_POST_EVENT;
-
- if (i_ddi_devi_attached(pdip) &&
- (i_ddi_node_state(rdip) >= DS_INITIALIZED)) {
- rv = ndi_devi_unconfig_one(pdip, devnm+1, dipp, nflags);
- } else {
- rv = e_ddi_devi_unconfig(rdip, dipp, nflags);
- if (rv == NDI_SUCCESS) {
- ASSERT(!destroy || ddi_get_child(rdip) == NULL);
- rv = ndi_devi_offline(rdip, nflags);
- }
- }
-
- if (!destroy || rv != NDI_SUCCESS) {
- /* The dip still exists, so do a hold */
- e_ddi_branch_hold(rdip);
- }
-out:
- kmem_free(devnm, MAXNAMELEN + 1);
- ndi_devi_exit(pdip, circ);
- return (ndi2errno(rv));
-}
-
-int
-e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp, uint_t flag)
-{
- return (e_ddi_branch_unconfigure(rdip, dipp,
- flag|DEVI_BRANCH_DESTROY));
-}
-
-/*
- * Number of chains for hash table
- */
-#define NUMCHAINS 17
-
-/*
- * Devinfo busy arg
- */
-struct devi_busy {
- int dv_total;
- int s_total;
- mod_hash_t *dv_hash;
- mod_hash_t *s_hash;
- int (*callback)(dev_info_t *, void *, uint_t);
- void *arg;
-};
-
-static int
-visit_dip(dev_info_t *dip, void *arg)
-{
- uintptr_t sbusy, dvbusy, ref;
- struct devi_busy *bsp = arg;
-
- ASSERT(bsp->callback);
-
- /*
- * A dip cannot be busy if its reference count is 0
- */
- if ((ref = e_ddi_devi_holdcnt(dip)) == 0) {
- return (bsp->callback(dip, bsp->arg, 0));
- }
-
- if (mod_hash_find(bsp->dv_hash, dip, (mod_hash_val_t *)&dvbusy))
- dvbusy = 0;
-
- /*
- * To catch device opens currently maintained on specfs common snodes.
- */
- if (mod_hash_find(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
- sbusy = 0;
-
-#ifdef DEBUG
- if (ref < sbusy || ref < dvbusy) {
- cmn_err(CE_WARN, "dip(%p): sopen = %lu, dvopen = %lu "
- "dip ref = %lu\n", (void *)dip, sbusy, dvbusy, ref);
- }
-#endif
-
- dvbusy = (sbusy > dvbusy) ? sbusy : dvbusy;
-
- return (bsp->callback(dip, bsp->arg, dvbusy));
-}
-
-static int
-visit_snode(struct snode *sp, void *arg)
-{
- uintptr_t sbusy;
- dev_info_t *dip;
- int count;
- struct devi_busy *bsp = arg;
-
- ASSERT(sp);
-
- /*
- * The stable lock is held. This prevents
- * the snode and its associated dip from
- * going away.
- */
- dip = NULL;
- count = spec_devi_open_count(sp, &dip);
-
- if (count <= 0)
- return (DDI_WALK_CONTINUE);
-
- ASSERT(dip);
-
- if (mod_hash_remove(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
- sbusy = count;
- else
- sbusy += count;
-
- if (mod_hash_insert(bsp->s_hash, dip, (mod_hash_val_t)sbusy)) {
- cmn_err(CE_WARN, "%s: s_hash insert failed: dip=0x%p, "
- "sbusy = %lu", "e_ddi_branch_referenced",
- (void *)dip, sbusy);
- }
-
- bsp->s_total += count;
-
- return (DDI_WALK_CONTINUE);
-}
-
-static void
-visit_dvnode(struct dv_node *dv, void *arg)
-{
- uintptr_t dvbusy;
- uint_t count;
- struct vnode *vp;
- struct devi_busy *bsp = arg;
-
- ASSERT(dv && dv->dv_devi);
-
- vp = DVTOV(dv);
-
- mutex_enter(&vp->v_lock);
- count = vp->v_count;
- mutex_exit(&vp->v_lock);
-
- if (!count)
- return;
-
- if (mod_hash_remove(bsp->dv_hash, dv->dv_devi,
- (mod_hash_val_t *)&dvbusy))
- dvbusy = count;
- else
- dvbusy += count;
-
- if (mod_hash_insert(bsp->dv_hash, dv->dv_devi,
- (mod_hash_val_t)dvbusy)) {
- cmn_err(CE_WARN, "%s: dv_hash insert failed: dip=0x%p, "
- "dvbusy=%lu", "e_ddi_branch_referenced",
- (void *)dv->dv_devi, dvbusy);
- }
-
- bsp->dv_total += count;
-}
-
-/*
- * Returns reference count on success or -1 on failure.
- */
-int
-e_ddi_branch_referenced(
- dev_info_t *rdip,
- int (*callback)(dev_info_t *dip, void *arg, uint_t ref),
- void *arg)
-{
- int circ;
- char *path;
- dev_info_t *pdip;
- struct devi_busy bsa = {0};
-
- ASSERT(rdip);
-
- path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-
- ndi_hold_devi(rdip);
-
- pdip = ddi_get_parent(rdip);
-
- ASSERT(pdip);
-
- /*
- * Check if caller holds pdip busy - can cause deadlocks during
- * devfs_walk()
- */
- if (!e_ddi_branch_held(rdip) || DEVI_BUSY_OWNED(pdip)) {
- cmn_err(CE_WARN, "e_ddi_branch_referenced: failed: "
- "devinfo branch(%p) not held or parent busy held",
- (void *)rdip);
- ndi_rele_devi(rdip);
- kmem_free(path, MAXPATHLEN);
- return (-1);
- }
-
- ndi_devi_enter(pdip, &circ);
- (void) ddi_pathname(rdip, path);
- ndi_devi_exit(pdip, circ);
-
- bsa.dv_hash = mod_hash_create_ptrhash("dv_node busy hash", NUMCHAINS,
- mod_hash_null_valdtor, sizeof (struct dev_info));
-
- bsa.s_hash = mod_hash_create_ptrhash("snode busy hash", NUMCHAINS,
- mod_hash_null_valdtor, sizeof (struct snode));
-
- if (devfs_walk(path, visit_dvnode, &bsa)) {
- cmn_err(CE_WARN, "e_ddi_branch_referenced: "
- "devfs walk failed for: %s", path);
- kmem_free(path, MAXPATHLEN);
- bsa.s_total = bsa.dv_total = -1;
- goto out;
- }
-
- kmem_free(path, MAXPATHLEN);
-
- /*
- * Walk the snode table to detect device opens, which are currently
- * maintained on specfs common snodes.
- */
- spec_snode_walk(visit_snode, &bsa);
-
- if (callback == NULL)
- goto out;
-
- bsa.callback = callback;
- bsa.arg = arg;
-
- if (visit_dip(rdip, &bsa) == DDI_WALK_CONTINUE) {
- ndi_devi_enter(rdip, &circ);
- ddi_walk_devs(ddi_get_child(rdip), visit_dip, &bsa);
- ndi_devi_exit(rdip, circ);
- }
-
-out:
- ndi_rele_devi(rdip);
- mod_hash_destroy_ptrhash(bsa.s_hash);
- mod_hash_destroy_ptrhash(bsa.dv_hash);
- return (bsa.s_total > bsa.dv_total ? bsa.s_total : bsa.dv_total);
-}
diff --git a/usr/src/uts/sun4u/sys/machsystm.h b/usr/src/uts/sun4u/sys/machsystm.h
index 76e96a56d6..ccacd922b4 100644
--- a/usr/src/uts/sun4u/sys/machsystm.h
+++ b/usr/src/uts/sun4u/sys/machsystm.h
@@ -26,8 +26,6 @@
#ifndef _SYS_MACHSYSTM_H
#define _SYS_MACHSYSTM_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Numerous platform-dependent interfaces that don't seem to belong
* in any other header file.
@@ -357,46 +355,6 @@ extern uint64_t cpu_pa[];
extern void ptl1_init_cpu(struct cpu *);
/*
- * Defines for DR interfaces
- */
-#define DEVI_BRANCH_CHILD 0x01 /* Walk immediate children of root */
-#define DEVI_BRANCH_CONFIGURE 0x02 /* Configure branch after create */
-#define DEVI_BRANCH_DESTROY 0x04 /* Destroy branch after unconfigure */
-#define DEVI_BRANCH_EVENT 0x08 /* Post NDI event */
-#define DEVI_BRANCH_PROM 0x10 /* Branches derived from PROM nodes */
-#define DEVI_BRANCH_SID 0x20 /* SID node branches */
-#define DEVI_BRANCH_ROOT 0x40 /* Node is the root of a branch */
-
-typedef struct devi_branch {
- void *arg;
- void (*devi_branch_callback)(dev_info_t *, void *, uint_t);
- int type;
- union {
- int (*prom_branch_select)(pnode_t, void *, uint_t);
- int (*sid_branch_create)(dev_info_t *, void *, uint_t);
- } create;
-} devi_branch_t;
-
-
-/*
- * Prototypes which really belongs to sunddi.c, and should be moved to
- * sunddi.c if there is another platform using these calls.
- */
-extern int e_ddi_branch_create(dev_info_t *pdip, devi_branch_t *bp,
- dev_info_t **dipp, uint_t flags);
-extern int e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp,
- uint_t flags);
-extern int e_ddi_branch_unconfigure(dev_info_t *rdip, dev_info_t **dipp,
- uint_t flags);
-extern int e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp,
- uint_t flags);
-extern void e_ddi_branch_hold(dev_info_t *rdip);
-extern void e_ddi_branch_rele(dev_info_t *rdip);
-extern int e_ddi_branch_held(dev_info_t *rdip);
-extern int e_ddi_branch_referenced(dev_info_t *rdip,
- int (*cb)(dev_info_t *dip, void *, uint_t), void *arg);
-
-/*
* Constants which define the "hole" in the 64-bit sfmmu address space.
* These are set to specific values by the CPU module code.
*/
diff --git a/usr/src/uts/sun4v/sys/machsystm.h b/usr/src/uts/sun4v/sys/machsystm.h
index 832a634d0f..d9c795ecf9 100644
--- a/usr/src/uts/sun4v/sys/machsystm.h
+++ b/usr/src/uts/sun4v/sys/machsystm.h
@@ -341,46 +341,6 @@ extern uint64_t cpu_pa[];
extern void ptl1_init_cpu(struct cpu *);
/*
- * Defines for DR interfaces
- */
-#define DEVI_BRANCH_CHILD 0x01 /* Walk immediate children of root */
-#define DEVI_BRANCH_CONFIGURE 0x02 /* Configure branch after create */
-#define DEVI_BRANCH_DESTROY 0x04 /* Destroy branch after unconfigure */
-#define DEVI_BRANCH_EVENT 0x08 /* Post NDI event */
-#define DEVI_BRANCH_PROM 0x10 /* Branches derived from PROM nodes */
-#define DEVI_BRANCH_SID 0x20 /* SID node branches */
-#define DEVI_BRANCH_ROOT 0x40 /* Node is the root of a branch */
-
-typedef struct devi_branch {
- void *arg;
- void (*devi_branch_callback)(dev_info_t *, void *, uint_t);
- int type;
- union {
- int (*prom_branch_select)(pnode_t, void *, uint_t);
- int (*sid_branch_create)(dev_info_t *, void *, uint_t);
- } create;
-} devi_branch_t;
-
-
-/*
- * Prototypes which really belongs to sunddi.c, and should be moved to
- * sunddi.c if there is another platform using these calls.
- */
-extern int e_ddi_branch_create(dev_info_t *pdip, devi_branch_t *bp,
- dev_info_t **dipp, uint_t flags);
-extern int e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp,
- uint_t flags);
-extern int e_ddi_branch_unconfigure(dev_info_t *rdip, dev_info_t **dipp,
- uint_t flags);
-extern int e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp,
- uint_t flags);
-extern void e_ddi_branch_hold(dev_info_t *rdip);
-extern void e_ddi_branch_rele(dev_info_t *rdip);
-extern int e_ddi_branch_held(dev_info_t *rdip);
-extern int e_ddi_branch_referenced(dev_info_t *rdip,
- int (*cb)(dev_info_t *dip, void *, uint_t), void *arg);
-
-/*
* Constants which define the "hole" in the 64-bit sfmmu address space.
* These are set to specific values by the CPU module code.
*/