131 files changed, 22855 insertions, 2347 deletions
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint
index 90b76afe47..2182a61ff9 100644
--- a/usr/src/Makefile.lint
+++ b/usr/src/Makefile.lint
@@ -477,11 +477,13 @@ $(CLOSED_BUILD)COMMON_SUBDIRS += \
 	$(CLOSED)/lib/libc_i18n
 
 i386_SUBDIRS= \
+	cmd/acpihpd \
 	cmd/biosdev \
 	cmd/rtc \
 	cmd/ucodeadm \
 	lib/brand/lx \
 	lib/cfgadm_plugins/sata \
+	lib/cfgadm_plugins/sbd \
 	lib/libfdisk
 
 sparc_SUBDIRS= \
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index 2d0ce5e6d5..b493fa6ef1 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -476,6 +476,7 @@ ODE	$(CLOSED)/cmd/tail	\
 	$(CLOSED)/cmd/tr_xpg4
 
 i386_SUBDIRS=		\
+	acpihpd		\
 	addbadsec	\
 	biosdev		\
 	diskscan	\
diff --git a/usr/src/cmd/acpihpd/Makefile b/usr/src/cmd/acpihpd/Makefile
new file mode 100644
index 0000000000..33a26fb2be
--- /dev/null
+++ b/usr/src/cmd/acpihpd/Makefile
@@ -0,0 +1,62 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# cmd/acpihpd/Makefile
+#
+
+PLATFORM = i86pc
+MANIFEST = acpihpd.xml
+SVCMETHOD = svc-acpihpd
+
+include ../Makefile.cmd
+
+$(64ONLY)SUBDIRS= $(MACH)
+
+ROOTMANIFESTDIR = $(ROOTSVCPLATFORM)/${PLATFORM}
+
+all	:=	TARGET = all
+install	:=	TARGET = install
+clean	:=	TARGET = clean
+clobber	:=	TARGET = clobber
+lint	:=	TARGET = lint
+
+.KEEP_STATE:
+
+all:	$(SUBDIRS)
+
+install:	$(SUBDIRS) $(ROOTMANIFEST) $(ROOTSVCMETHOD)
+
+clean clobber lint:	$(SUBDIRS)
+
+check:	$(CHKMANIFEST)
+
+$(SUBDIRS):	FRC
+	@cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/acpihpd/Makefile.com b/usr/src/cmd/acpihpd/Makefile.com
new file mode 100644
index 0000000000..287a48d896
--- /dev/null
+++ b/usr/src/cmd/acpihpd/Makefile.com
@@ -0,0 +1,46 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# cmd/acpihpd/Makefile.com
+#
+
+LIBS =		acpihpd
+PLATFORM =      i86pc
+
+include ../../Makefile.cmd
+include ../../../Makefile.psm
+
+OBJS =	acpihpd.o notify.o
+SRCS =	$(OBJS:%.o=../%.c)
+LINTFILES = $(SRCS:%.c=%.ln)
+CLOBBERFILES = $(LIBS)
+
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -I$(USR_PSM_INCL_DIR)
+LDLIBS += -lsysevent -lnvpair -lcfgadm
+
+FILEMODE = 0755
diff --git a/usr/src/cmd/acpihpd/Makefile.targ b/usr/src/cmd/acpihpd/Makefile.targ
new file mode 100644
index 0000000000..89da583847
--- /dev/null
+++ b/usr/src/cmd/acpihpd/Makefile.targ
@@ -0,0 +1,62 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# cmd/acpihpd/Makefile.targ
+#
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+clean:
+	$(RM) $(OBJS)
+
+lint:
+	$(LINT.c) $(SRCS) $(LDLIBS)
+
+$(LIBS): $(OBJS)
+	$(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+	$(POST_PROCESS)
+
+%.o:	../%.c
+	$(COMPILE.c) $<
+	$(POST_PROCESS_O)
+
+$(USR_PLAT_DIR):
+	-$(INS.dir)
+
+$(USR_PSM_DIR):	$(USR_PLAT_DIR)
+	-$(INS.dir)
+
+$(USR_PSM_LIB_DIR):	$(USR_PSM_DIR)
+	-$(INS.dir)
+
+$(USR_PSM_LIB_DIR)/%: % $(USR_PSM_LIB_DIR)
+	$(INS.file)
+
+include ../../Makefile.targ
+
diff --git a/usr/src/cmd/acpihpd/acpihpd.c b/usr/src/cmd/acpihpd/acpihpd.c
new file mode 100644
index 0000000000..6beee85f54
--- /dev/null
+++ b/usr/src/cmd/acpihpd/acpihpd.c
@@ -0,0 +1,325 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/dr.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <syslog.h>
+#include <string.h>
+#include <strings.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <config_admin.h>
+#include <libscf.h>
+#include <libsysevent.h>
+#include <stdarg.h>
+
+/* Signal handler type */
+typedef void (sig_handler_t)(int);
+
+#define	ACPIHPD_PID_FILE "/var/run/acpihpd.pid" /* lock file path */
+
+/* Program Name */
+char *g_prog_name;
+int g_debuglevel = 0;
+
+static int s_pid_fd;
+static sysevent_handle_t *s_acpihpd_hdl;
+
+static int daemon_init(void);
+static void daemon_quit(int);
+static int set_sig_handler(int, sig_handler_t *);
+static int acpihpd_init(void);
+static void acpihpd_fini(void);
+static void acpihpd_event(sysevent_t *);
+extern void notify_hotplug(sysevent_t *ev);
+void debug_print(int, const char *, ...);
+
+int
+main(int argc, char *argv[])
+{
+	int c;
+
+	/* Get Program Name */
+	if ((g_prog_name = strrchr(argv[0], '/')) == NULL) {
+		g_prog_name = argv[0];
+	} else {
+		g_prog_name++;
+	}
+
+	while ((c = getopt(argc, argv, ":d:")) != -1) {
+		switch (c) {
+		case 'd':
+			g_debuglevel = atoi(optarg);
+			if ((g_debuglevel < 0) || (g_debuglevel > 2)) {
+				g_debuglevel = 0;
+			}
+			break;
+
+		case ':':
+			syslog(LOG_ERR,
+			    "missed argument for option %c.", optopt);
+			break;
+
+		case '?':
+			syslog(LOG_ERR, "unrecognized option %c.", optopt);
+			break;
+		}
+	}
+
+	s_acpihpd_hdl = NULL;
+
+	/* Check the daemon running lock and initialize the signal */
+	if (daemon_init() != 0) {
+		debug_print(0, "%s could not startup!", g_prog_name);
+		exit(SMF_EXIT_ERR_FATAL);
+	}
+
+	/* Subscribe to the hotplug event */
+	if (acpihpd_init() != 0) {
+		debug_print(0, "%s could not startup!", g_prog_name);
+		daemon_quit(SMF_EXIT_ERR_FATAL);
+	}
+
+	debug_print(2, "daemon is running.");
+	/*CONSTCOND*/
+	while (1) {
+		(void) pause();
+	}
+
+	return (SMF_EXIT_OK);
+}
+
+static int
+daemon_init(void)
+{
+	int	i, ret;
+	pid_t	pid;
+	char	pid_str[32];
+
+	if (geteuid() != 0) {
+		debug_print(0, "must be root to execute %s", g_prog_name);
+		return (1);
+	}
+
+	if ((pid = fork()) < 0) {
+		return (1);
+	}
+
+	if (pid > 0) {
+		/* Parent to exit. */
+		exit(SMF_EXIT_OK);
+	}
+
+	(void) setsid();
+	(void) chdir("/");
+	(void) umask(0);
+	(void) closefrom(0);
+	(void) open("/dev/null", O_RDONLY);
+	(void) open("/dev/null", O_WRONLY);
+	(void) dup(1);
+	(void) openlog(g_prog_name, LOG_PID, LOG_DAEMON);
+
+	/*
+	 * Create the lock file for singleton
+	 */
+	if ((s_pid_fd = open(ACPIHPD_PID_FILE, O_RDWR | O_CREAT, 0644)) < 0) {
+		debug_print(0, "could not create pid file: %s",
+		    strerror(errno));
+		return (1);
+	}
+
+	if (lockf(s_pid_fd, F_TLOCK, 0L) < 0) {
+		if (errno == EACCES || errno == EAGAIN) {
+			debug_print(0, "another acpihpd is already running");
+		} else {
+			debug_print(0, "could not lock pid file");
+		}
+
+		return (1);
+	}
+
+	(void) ftruncate(s_pid_fd, 0);
+	i = sprintf(pid_str, "%ld", (long)getpid());
+	while ((ret = write(s_pid_fd, pid_str, i)) != i) {
+		if (errno == EINTR) {
+			continue;
+		}
+		if (ret < 0) {
+			debug_print(0, "pid file write failed: %s",
+			    strerror(errno));
+			return (1);
+		}
+	}
+
+	if (set_sig_handler(SIGTERM, (sig_handler_t *)daemon_quit) != 0) {
+		debug_print(2, "could not set signal handler(SIGTERM)");
+		return (1);
+	}
+
+	if (set_sig_handler(SIGQUIT, (sig_handler_t *)daemon_quit) != 0) {
+		debug_print(2, "could not set signal handler(SIGQUIT)");
+		return (1);
+	}
+
+	if (set_sig_handler(SIGINT, (sig_handler_t *)daemon_quit) != 0) {
+		debug_print(2, "could not set signal handler(SIGINT)");
+		return (1);
+	}
+
+	if (set_sig_handler(SIGCHLD, SIG_IGN) != 0) {
+		debug_print(2, "could not set signal handler(SIGCHLD)");
+		return (1);
+	}
+
+	return (0);
+}
+
+static void
+daemon_quit(int signo)
+{
+	int status = 0;
+	id_t pgid;
+
+	debug_print(1, "daemon quit [signal#:%d].", signo);
+
+	acpihpd_fini();
+	(void) set_sig_handler(SIGTERM, SIG_IGN);
+	pgid = getpgrp();
+	(void) kill(-pgid, SIGTERM);
+	(void) close(s_pid_fd);
+	(void) unlink(ACPIHPD_PID_FILE);
+
+	if (signo < 0) {
+		status = signo;
+	}
+	_exit(status);
+}
+
+static int
+set_sig_handler(int sig, sig_handler_t *handler)
+{
+	struct sigaction act;
+
+	act.sa_handler = handler;
+	act.sa_flags = 0;
+	if (sig == SIGCHLD && handler == SIG_IGN) {
+		act.sa_flags |= SA_NOCLDWAIT;
+	}
+
+	(void) sigemptyset(&act.sa_mask);
+	if (sigaction(sig, &act, NULL) < 0) {
+		return (1);
+	}
+
+	return (0);
+}
+
+static int
+acpihpd_init(void)
+{
+	const char *subclass = ESC_DR_REQ;
+
+	debug_print(2, "acpihpd_init");
+
+	if ((s_acpihpd_hdl = sysevent_bind_handle(acpihpd_event)) == NULL) {
+		debug_print(2, "could not bind to sysevent.");
+		return (-1);
+	}
+
+	if (sysevent_subscribe_event(s_acpihpd_hdl, EC_DR, &subclass, 1) != 0) {
+		debug_print(2, "could not subscribe an event.");
+		sysevent_unbind_handle(s_acpihpd_hdl);
+		s_acpihpd_hdl = NULL;
+		return (-1);
+	}
+
+	return (0);
+}
+
+static void
+acpihpd_fini(void)
+{
+	debug_print(2, "acpihpd_fini");
+
+	if (s_acpihpd_hdl != NULL) {
+		sysevent_unsubscribe_event(s_acpihpd_hdl, EC_DR);
+		sysevent_unbind_handle(s_acpihpd_hdl);
+	}
+}
+
+static void
+acpihpd_event(sysevent_t *ev)
+{
+	debug_print(2, "*** got an event ***");
+
+	/* Inform cfgadm of the hot-plug event. */
+	notify_hotplug(ev);
+}
+
+void
+debug_print(int level, const char *fmt, ...)
+{
+	va_list ap;
+	int pri, pr_out = 0;
+
+	if (level <= g_debuglevel) {
+		switch (level) {
+		case 0:
+			pri = LOG_ERR;
+			pr_out = 1;
+			break;
+
+		case 1:
+			pri = LOG_NOTICE;
+			pr_out = 1;
+			break;
+
+		case 2:
+			pri = LOG_DEBUG;
+			pr_out = 1;
+			break;
+		}
+
+		if (pr_out) {
+			va_start(ap, fmt);
+			vsyslog(pri, fmt, ap);
+			va_end(ap);
+		}
+	}
+}
diff --git a/usr/src/cmd/acpihpd/acpihpd.xml b/usr/src/cmd/acpihpd/acpihpd.xml
new file mode 100644
index 0000000000..e8e799de08
--- /dev/null
+++ b/usr/src/cmd/acpihpd/acpihpd.xml
@@ -0,0 +1,93 @@
+<?xml version="1.0"?>
+<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
+<!--
+
+ Copyright (c) 2010, Intel Corporation.
+ All rights reserved.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ Note:
+        NOTE:  This service manifest is not editable; its contents will
+        be overwritten by package or patch operations, including
+        operating system upgrade.  Make customizations in a different
+        file.
+-->
+
+<service_bundle type='manifest' name='SUNWacpihpd:acpihpd'>
+
+<service
+	name='platform/i86pc/acpihpd'
+	type='service'
+	version='1'>
+
+	<create_default_instance enabled='true' />
+
+	<single_instance/>
+
+	<!--
+		acpihpd should be started after syseventd is running.
+	-->
+	<dependency
+		name='startup_req'
+		grouping='require_all'
+		restart_on='none'
+		type='service'>
+		<service_fmri value='svc:/system/sysevent' />
+	</dependency>
+
+	<exec_method
+		type='method'
+		name='start'
+		exec='/lib/svc/method/svc-acpihpd'
+		timeout_seconds='60' >
+		<method_context>
+			<method_credential user='root' group='root'
+				privileges='basic,!proc_info,sys_config,sys_mount,sys_devices' />
+		</method_context>
+	</exec_method>
+
+	<exec_method
+		type='method'
+		name='stop'
+		exec=':kill'
+		timeout_seconds='60' >
+		<method_context>
+			<method_credential user='root' group='root' />
+		</method_context>
+	</exec_method>
+
+	<stability value='Unstable' />
+
+	<template>
+		<common_name>
+			<loctext xml:lang='C'>
+			Intel ACPI hot-plug daemon
+			</loctext>
+		</common_name>
+		<documentation>
+			<manpage title='acpihpd' section='1M'
+				manpath='/usr/share/man' />
+		</documentation>
+	</template>
+</service>
+
+</service_bundle>
+
diff --git a/usr/src/cmd/acpihpd/i386/Makefile b/usr/src/cmd/acpihpd/i386/Makefile
new file mode 100644
index 0000000000..4356b38b82
--- /dev/null
+++ b/usr/src/cmd/acpihpd/i386/Makefile
@@ -0,0 +1,37 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+# cmd/acpihpd/i386/Makefile
+#
+
+include ../Makefile.com
+
+.KEEP_STATE:
+
+install: all $(USR_PSM_LIBS)
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/acpihpd/notify.c b/usr/src/cmd/acpihpd/notify.c
new file mode 100644
index 0000000000..138b1a233d
--- /dev/null
+++ b/usr/src/cmd/acpihpd/notify.c
@@ -0,0 +1,353 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <config_admin.h>
+#include <strings.h>
+#include <syslog.h>
+#include <libsysevent.h>
+#include <libdevinfo.h>
+#include <libnvpair.h>
+#include <assert.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/dr.h>
+#include <sys/sbd_ioctl.h>
+#include <sys/acpidev.h>
+
+#define	PMCONFIG_PATH			"/usr/sbin/pmconfig"
+
+#define	CFGADM_CMD_ASSIGN		"assign"
+#define	CFGADM_CMD_POWERON		"poweron"
+#define	CFGADM_CMD_PASSTHRU		"passthru"
+
+#define	STATUS_INPROGRESS		0
+#define	STATUS_SUCCESS			1
+#define	STATUS_FAILURE			2
+#define	STATUS_NOOP			3
+
+static char *s_status_array[] = {
+	ACPIDEV_CMD_OST_INPROGRESS,
+	ACPIDEV_CMD_OST_SUCCESS,
+	ACPIDEV_CMD_OST_FAILURE,
+	ACPIDEV_CMD_OST_NOOP
+};
+
+extern void debug_print(int, const char *, ...);
+
+/*ARGSUSED*/
+static int
+confirm_no(void *appdata_ptr, const char *message)
+{
+	return (0);
+}
+
+/*ARGSUSED*/
+static int
+message_output(void *appdata_ptr, const char *message)
+{
+	debug_print(2, "cfgadm message: %s", message);
+	return (CFGA_OK);
+}
+
+static char *
+plat_opt_str_alloc(int cmd, char *acpi_event_type, int status)
+{
+	char *opt;
+	size_t len;
+
+	if (cmd == SBD_CMD_PASSTHRU) {
+		len = strlen(s_status_array[status]) +
+		    strlen(ACPIDEV_EVENT_TYPE_ATTR_NAME) +
+		    strlen(acpi_event_type) + 10;
+		if ((opt = malloc(len)) != NULL) {
+			(void) snprintf(opt, len, "%s %s=%s",
+			    s_status_array[status],
+			    ACPIDEV_EVENT_TYPE_ATTR_NAME,
+			    acpi_event_type);
+			debug_print(2, "plat_opt_str_alloc = '%s'", opt);
+		}
+	} else {
+		len = strlen("platform=''") +
+		    strlen(s_status_array[status]) +
+		    strlen(ACPIDEV_EVENT_TYPE_ATTR_NAME) +
+		    strlen(acpi_event_type) + 10;
+		if ((opt = malloc(len)) != NULL) {
+			(void) snprintf(opt, len, "platform='%s %s=%s'",
+			    s_status_array[status],
+			    ACPIDEV_EVENT_TYPE_ATTR_NAME,
+			    acpi_event_type);
+			debug_print(2, "plat_opt_str_alloc = '%s'", opt);
+		}
+	}
+
+	return (opt);
+}
+
+static int
+cfgadm_cmd_wrapper(int cmd, int apid_num, char **apids,
+	char *acpi_event_type, int status,
+	struct cfga_confirm *confirm, struct cfga_msg *message)
+{
+	cfga_err_t ret;
+	char *plat_opts;
+	char *estrp = NULL;
+
+	assert(apid_num == 1);
+	assert(apids != NULL);
+
+	plat_opts = plat_opt_str_alloc(cmd, acpi_event_type, status);
+	if (plat_opts == NULL) {
+		debug_print(0,
+		    "failed to generate platform option string for cfgadm");
+		return (-1);
+	}
+
+	switch (cmd) {
+	case SBD_CMD_CONNECT:
+		ret = config_change_state(CFGA_CMD_CONNECT, apid_num, apids,
+		    plat_opts, confirm, message, &estrp, 0);
+		if (ret != CFGA_OK) {
+			debug_print(0, "cfgadm('connect', '%s') failed, "
+			    "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+		}
+		break;
+
+	case SBD_CMD_CONFIGURE:
+		ret = config_change_state(CFGA_CMD_CONFIGURE, apid_num, apids,
+		    plat_opts, confirm, message, &estrp, 0);
+		if (ret != CFGA_OK) {
+			debug_print(0, "cfgadm('configure', '%s') failed, "
+			    "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+		}
+		break;
+
+	case SBD_CMD_ASSIGN:
+		ret = config_private_func(CFGADM_CMD_ASSIGN, apid_num, apids,
+		    plat_opts, confirm, message, &estrp, 0);
+		if (ret != CFGA_OK) {
+			debug_print(0, "cfgadm('assign', '%s') failed, "
+			    "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+		}
+		break;
+
+	case SBD_CMD_POWERON:
+		ret = config_private_func(CFGADM_CMD_POWERON, apid_num, apids,
+		    plat_opts, confirm, message, &estrp, 0);
+		if (ret != CFGA_OK) {
+			debug_print(0, "cfgadm('poweron', '%s') failed, "
+			    "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+		}
+		break;
+
+	case SBD_CMD_PASSTHRU:
+		ret = config_private_func(CFGADM_CMD_PASSTHRU, apid_num, apids,
+		    plat_opts, confirm, message, &estrp, 0);
+		if (ret != CFGA_OK) {
+			debug_print(0, "cfgadm('passthru', '%s') failed, "
+			    "ret = %d, errstr = '%s'", apids[0], ret, estrp);
+		}
+		break;
+
+	default:
+		debug_print(2, "unknown command (%d) to cfgadm_cmd_wrapper()");
+		ret = CFGA_ERROR;
+		break;
+	}
+
+	if (plat_opts != NULL)
+		free(plat_opts);
+
+	return (ret == CFGA_OK ? 0 : -1);
+}
+
+static int
+event_process(char *ap_id, char *req, char *acpi_event_type)
+{
+	char *apids[1];
+	struct cfga_msg message;
+	struct cfga_confirm confirm;
+
+	if (strcmp(req, DR_REQ_INCOMING_RES) != 0) {
+		debug_print(2,
+		    "Event is not supported (ap_id = %s, req = %s)",
+		    ap_id, req);
+		return (-1);
+	}
+
+	apids[0] = ap_id;
+	(void) memset(&confirm, 0, sizeof (confirm));
+	confirm.confirm = confirm_no;
+	(void) memset(&message, 0, sizeof (message));
+	message.message_routine = message_output;
+
+	if (cfgadm_cmd_wrapper(SBD_CMD_ASSIGN, 1, apids,
+	    acpi_event_type, STATUS_NOOP, &confirm, &message) != 0) {
+		goto L_ERR;
+	}
+	syslog(LOG_NOTICE,
+	    "board '%s' has been assigned successfully", ap_id);
+	(void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+	    acpi_event_type, STATUS_INPROGRESS, &confirm, &message);
+
+	if (cfgadm_cmd_wrapper(SBD_CMD_POWERON, 1, apids,
+	    acpi_event_type, STATUS_NOOP, &confirm, &message) != 0) {
+		goto L_ERR_SIG;
+	}
+	syslog(LOG_NOTICE,
+	    "board '%s' has been powered on successfully", ap_id);
+	(void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+	    acpi_event_type, STATUS_INPROGRESS, &confirm, &message);
+
+	if (cfgadm_cmd_wrapper(SBD_CMD_CONNECT, 1, apids,
+	    acpi_event_type, STATUS_INPROGRESS, &confirm, &message) != 0) {
+		goto L_ERR_SIG;
+	}
+	syslog(LOG_NOTICE,
+	    "board '%s' has been connected successfully", ap_id);
+	(void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+	    acpi_event_type, STATUS_INPROGRESS, &confirm, &message);
+
+	if (cfgadm_cmd_wrapper(SBD_CMD_CONFIGURE, 1, apids,
+	    acpi_event_type, STATUS_INPROGRESS, &confirm, &message) != 0) {
+		goto L_ERR_SIG;
+	}
+	syslog(LOG_NOTICE,
+	    "board '%s' has been configured successfully", ap_id);
+	(void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+	    acpi_event_type, STATUS_SUCCESS, &confirm, &message);
+
+	(void) system(PMCONFIG_PATH);
+	syslog(LOG_NOTICE,
+	    "board '%s' has been added into system successfully", ap_id);
+
+	return (0);
+
+L_ERR_SIG:
+	(void) cfgadm_cmd_wrapper(SBD_CMD_PASSTHRU, 1, apids,
+	    acpi_event_type, STATUS_FAILURE, &confirm, &message);
+L_ERR:
+	syslog(LOG_ERR, "failed to add board '%s' into system", ap_id);
+
+	return (-1);
+}
+
+void
+notify_hotplug(sysevent_t *ev)
+{
+	char *vendor = NULL;
+	nvlist_t *attr_list = NULL;
+	char *class, *subclass;
+	char *ap_id, *req, *acpi_event_type;
+
+	vendor = sysevent_get_vendor_name(ev);
+	debug_print(2, "message_vendor = '%s'", vendor ? vendor : "unknown");
+	if (vendor == NULL || strcmp(vendor, SUNW_VENDOR) != 0) {
+		debug_print(2,
+		    "vendor id of message is not '%s'", SUNW_VENDOR);
+		goto L_EXIT;
+	}
+
+	class = sysevent_get_class_name(ev);
+	debug_print(2, "message_class = '%s'", class ? class : "unknown");
+	if (class == NULL || strcmp(class, EC_DR) != 0) {
+		debug_print(2, "class of message is not '%s'", EC_DR);
+		goto L_EXIT;
+	}
+
+	subclass = sysevent_get_subclass_name(ev);
+	debug_print(2,
+	    "message_subclass = '%s'", subclass ? subclass : "unknown");
+	if (subclass == NULL || strcmp(subclass, ESC_DR_REQ) != 0) {
+		debug_print(2,
+		    "subclass of message is not '%s'", ESC_DR_REQ);
+		goto L_EXIT;
+	}
+
+	if (sysevent_get_attr_list(ev, &attr_list) != 0) {
+		debug_print(2,
+		    "can't retrieve attribute list from DR message");
+		goto L_EXIT;
+	}
+
+	if (nvlist_lookup_string(attr_list, DR_AP_ID, &ap_id) != 0) {
+		debug_print(2,
+		    "can't retrieve '%s' property from attribute list",
+		    DR_AP_ID);
+		goto L_EXIT;
+	}
+	debug_print(2, "%s = '%s'", DR_AP_ID, ap_id ? ap_id : "<null>");
+	if ((ap_id == NULL) || (strlen(ap_id) == 0)) {
+		debug_print(2, "'%s' property in message is NULL", DR_AP_ID);
+		goto L_EXIT;
+	}
+
+	if (nvlist_lookup_string(attr_list, DR_REQ_TYPE, &req) != 0) {
+		debug_print(2,
+		    "can't retrieve '%s' property from attribute list",
+		    DR_REQ_TYPE);
+		goto L_EXIT;
+	}
+	debug_print(2, "%s = '%s'", DR_REQ_TYPE, req ? req : "<null>");
+	if ((req == NULL) || (strlen(req) == 0)) {
+		debug_print(2, "'%s' property in message is NULL", DR_REQ_TYPE);
+		goto L_EXIT;
+	}
+
+	if (nvlist_lookup_string(attr_list, ACPIDEV_EVENT_TYPE_ATTR_NAME,
+	    &acpi_event_type) != 0) {
+		debug_print(2,
+		    "can't retrieve '%s' property from attribute list",
+		    ACPIDEV_EVENT_TYPE_ATTR_NAME);
+		goto L_EXIT;
+	}
+	debug_print(2, "%s = '%s'", ACPIDEV_EVENT_TYPE_ATTR_NAME,
+	    acpi_event_type ? acpi_event_type : "<null>");
+	if ((acpi_event_type == NULL) || (strlen(acpi_event_type) == 0)) {
+		debug_print(2, "'%s' property in message is NULL",
+		    ACPIDEV_EVENT_TYPE_ATTR_NAME);
+		goto L_EXIT;
+	}
+
+	(void) event_process(ap_id, req, acpi_event_type);
+
+L_EXIT:
+	if (vendor != NULL) {
+		free(vendor);
+	}
+
+	if (attr_list != NULL) {
+		nvlist_free(attr_list);
+	}
+
+	/* No need to free class & subclass. */
+}
diff --git a/usr/src/cmd/acpihpd/svc-acpihpd b/usr/src/cmd/acpihpd/svc-acpihpd
new file mode 100644
index 0000000000..f45f05ed06
--- /dev/null
+++ b/usr/src/cmd/acpihpd/svc-acpihpd
@@ -0,0 +1,39 @@
+#!/bin/sh
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+
+. /lib/svc/share/smf_include.sh
+
+PLATFORM="i86pc"
+ACPIHPD=/usr/platform/${PLATFORM}/lib/acpihpd
+
+/usr/sbin/prtconf -D /devices/fw/acpidr@0 > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+	svcadm disable -t $SMF_FMRI
+	sleep 5&
+	exit $SMF_EXIT_OK
+fi
+
+$ACPIHPD && exit $SMF_EXIT_OK || exit $SMF_EXIT_ERR_FATAL
diff --git a/usr/src/lib/cfgadm_plugins/Makefile b/usr/src/lib/cfgadm_plugins/Makefile
index bb37297b11..9fc029e2ec 100644
--- a/usr/src/lib/cfgadm_plugins/Makefile
+++ b/usr/src/lib/cfgadm_plugins/Makefile
@@ -27,8 +27,8 @@
 
 include $(SRC)/Makefile.master
 
-COMMON_SUBDIRS= scsi sdcard pci usb ib fp shp
-sparc_SUBDIRS=	sbd ac sysctrl
+COMMON_SUBDIRS= scsi sdcard pci usb ib fp shp sbd
+sparc_SUBDIRS=	ac sysctrl
 
 i386_SUBDIRS= sata
 
diff --git a/usr/src/lib/cfgadm_plugins/sbd/Makefile.com b/usr/src/lib/cfgadm_plugins/sbd/Makefile.com
index fb3046ba42..e28db75c53 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/Makefile.com
+++ b/usr/src/lib/cfgadm_plugins/sbd/Makefile.com
@@ -23,7 +23,6 @@
 # Use is subject to license terms.
 #
 
-PLATFORM=	sun4u
 LIBRARY= sbd.a
 VERS= .1
 
@@ -33,7 +32,7 @@ OBJECTS= ap.o ap_msg.o ap_rcm.o ap_sbd.o ap_seq.o cfga.o ap_err.o
 include ../../../Makefile.lib
 
 USR_PLAT_DIR		= $(ROOT)/usr/platform
-USR_PSM_DIR		= $(USR_PLAT_DIR)/sun4u
+USR_PSM_DIR		= $(USR_PLAT_DIR)/$(PLATFORM)
 USR_PSM_LIB_DIR		= $(USR_PSM_DIR)/lib
 USR_PSM_LIB_CFG_DIR	= $(USR_PSM_LIB_DIR)/cfgadm
 USR_PSM_LIB_CFG_DIR_64	= $(USR_PSM_LIB_CFG_DIR)/$(MACH64)
@@ -54,7 +53,7 @@ CPPFLAGS +=	-I$(ROOT)/usr/platform/$(PLATFORM)/include -DSBD_DEBUG
 #
 GENERRDIR=	$(SRC)/lib/cfgadm_plugins/sbd
 GENERR=		$(GENERRDIR)/sbdgenerr
-ERRSRC=		$(ROOT)/usr/platform/sun4u/include/sys/sbd_ioctl.h
+ERRSRC=		$(ROOT)/usr/platform/$(PLATFORM)/include/sys/sbd_ioctl.h
 
 .KEEP_STATE:
 
diff --git a/usr/src/lib/cfgadm_plugins/sbd/amd64/Makefile b/usr/src/lib/cfgadm_plugins/sbd/amd64/Makefile
new file mode 100644
index 0000000000..9fd69cde68
--- /dev/null
+++ b/usr/src/lib/cfgadm_plugins/sbd/amd64/Makefile
@@ -0,0 +1,40 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+
+PLATFORM = i86pc
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+CFLAGS += -D_POSIX_PTHREAD_SEMANTICS
+LINTFLAGS += -D_POSIX_PTHREAD_SEMANTICS
+LDLIBS += -lscf
+
+.KEEP_STATE:
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap.c
index c7c3e96f69..502d192014 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <assert.h>
 #include <ctype.h>
 #include <stdio.h>
@@ -40,6 +37,7 @@
 #include <config_admin.h>
 #include "ap.h"
 
+/*ARGSUSED0*/
 int
 ap_symid(apd_t *a, char *apid, char *symid, size_t bufsize)
 {
@@ -78,13 +76,13 @@ ap_symid(apd_t *a, char *apid, char *symid, size_t bufsize)
 			cp--;	/* Get the '/' */
 
 		if (strcmp(cp, apid) == 0) {
-			snprintf(symid, bufsize, "%s", dp->d_name);
+			(void) snprintf(symid, bufsize, "%s", dp->d_name);
 			rc = 0;
 			break;
 		}
 	}
 
-	closedir(dirp);
+	(void) closedir(dirp);
 	return (rc);
 }
 
@@ -293,7 +291,7 @@ done:
 
 	DBG("path=<%s> ", a->path ? a->path : "");
 	DBG("drv=<%s> inst=%d minor=<%s> ",
-		a->drv ? a->drv : "", a->inst, a->minor ? a->minor : "");
+	    a->drv ? a->drv : "", a->inst, a->minor ? a->minor : "");
 	DBG("target=<%s>\n", a->target ? a->target : "");
 	DBG("cid=<%s> ", a->cid ? a->cid : "");
 	DBG("cname=<%s> ", a->cname ? a->cname : "");
@@ -612,7 +610,7 @@ ap_opt_parse(apd_t *a, ap_cmd_t *acp, const char *options)
 			 * during sequencing.
 			 */
 			for (p = strtok(value, ":"); p != NULL;
-				p = strtok(NULL, ":")) {
+			    p = strtok(NULL, ":")) {
 				if ((i = ap_cmd(p)) == CMD_NONE) {
 					ap_err(a, ERR_CMD_INVAL, p);
 					return (-1);
@@ -711,8 +709,7 @@ ap_cmd_parse(apd_t *a, const char *f, const char *options, int *cmd)
 	target = mask(tgt);
 
 	DBG("cmd=%s(%d) tmask=0x%x cmask=0x%x omask=0x%x\n",
-		ap_cmd_name(c), c, target,
-		acp->cmask, acp->omask[tgt]);
+	    ap_cmd_name(c), c, target, acp->cmask, acp->omask[tgt]);
 
 	if ((acp->cmask & target) == 0)
 		ap_err(a, ERR_CMD_NOTSUPP, c);
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c
index d2049bdc1d..1005dab325 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap_msg.c
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -63,8 +61,7 @@ debugging(void)
 		if ((debug_fp = fopen(ep, "a")) == NULL)
 			return (0);
 	}
-	(void) fprintf(debug_fp,
-		"\nDebug started, pid=%d\n", (int)getpid());
+	(void) fprintf(debug_fp, "\nDebug started, pid=%d\n", (int)getpid());
 	return (1);
 }
 
@@ -110,7 +107,7 @@ ap_err_fmts[] = {
 	NULL
 };
 
-#define	ap_err_fmt(i)		ap_err_fmts[min((i), ERR_NONE)]
+#define	ap_err_fmt(i)		ap_err_fmts[min((uint_t)(i), ERR_NONE)]
 
 static char *
 ap_msg_fmts[] = {
@@ -125,7 +122,7 @@ ap_msg_fmts[] = {
 	NULL
 };
 
-#define	ap_msg_fmt(i)		ap_msg_fmts[min((i), MSG_NONE)]
+#define	ap_msg_fmt(i)		ap_msg_fmts[min((uint_t)(i), MSG_NONE)]
 
 #define	STR_BD			"board"
 #define	STR_SEP			": "
@@ -366,7 +363,7 @@ ap_err(apd_t *a, ...)
 	switch (err) {
 	case ERR_CMD_FAIL:
 		(void) snprintf(p, len, fmt, cmd, target,
-			syserr, rsep, sysrsrc);
+		    syserr, rsep, sysrsrc);
 		break;
 	case ERR_CMD_ABORT:
 	case ERR_CMD_NACK:
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c
index f917bb5ae4..770784f16f 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap_rcm.c
@@ -23,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -145,6 +143,8 @@ ap_rcm_ops[] = {
 
 #ifdef	__sparcv9
 #define	RCMLIB	"/lib/sparcv9/librcm.so";
+#elif defined(__amd64)
+#define	RCMLIB	"/lib/amd64/librcm.so";
 #else
 #define	RCMLIB	"/lib/librcm.so";
 #endif
@@ -323,22 +323,21 @@ ap_rcm_init(apd_t *a)
 		switch (i) {
 		case ALLOC_HANDLE:
 			rcm->alloc_handle = (int(*)
-				(char *, uint_t, void *, rcm_handle_t **))sym;
+			    (char *, uint_t, void *, rcm_handle_t **))sym;
 			break;
 		case FREE_HANDLE:
 			rcm->free_handle = (void (*)(rcm_handle_t *))sym;
 			break;
 		case GET_INFO:
 			rcm->get_info = (int (*)
-				(rcm_handle_t *, char *, uint_t,
-				rcm_info_t **))sym;
+			    (rcm_handle_t *, char *, uint_t, rcm_info_t **))sym;
 			break;
 		case FREE_INFO:
 			rcm->free_info = (void (*)(rcm_info_t *))sym;
 			break;
 		case INFO_TUPLE_NEXT:
 			rcm->info_next = (rcm_info_tuple_t *(*)
-				(rcm_info_t *, rcm_info_tuple_t *))sym;
+			    (rcm_info_t *, rcm_info_tuple_t *))sym;
 			break;
 		case INFO_TUPLE_STATE:
 			rcm->info_state = (int (*)(rcm_info_tuple_t *))sym;
@@ -348,50 +347,50 @@ ap_rcm_init(apd_t *a)
 			break;
 		case INFO_TUPLE_ERROR:
 			rcm->info_error = (const char *(*)
-				(rcm_info_tuple_t *))sym;
+			    (rcm_info_tuple_t *))sym;
 			break;
 		case INFO_TUPLE_INFO:
 			rcm->info_info = (const char *(*)
-				(rcm_info_tuple_t *))sym;
+			    (rcm_info_tuple_t *))sym;
 			break;
 		case INFO_TUPLE_RSRC:
 			rcm->info_rsrc = (const char *(*)
-				(rcm_info_tuple_t *))sym;
+			    (rcm_info_tuple_t *))sym;
 			break;
 		case REQUEST_OFFLINE:
 			rcm->request_offline_list = (int (*)
-				(rcm_handle_t *, char **, uint_t,
-				rcm_info_t **))sym;
+			    (rcm_handle_t *, char **, uint_t,
+			    rcm_info_t **))sym;
 			break;
 		case NOTIFY_ONLINE:
 			rcm->notify_online_list = (int (*)
-				(rcm_handle_t *, char **, uint_t,
-				rcm_info_t **))sym;
+			    (rcm_handle_t *, char **, uint_t,
+			    rcm_info_t **))sym;
 			break;
 		case REQUEST_SUSPEND:
 			rcm->request_suspend = (int (*)
-				(rcm_handle_t *, char *, uint_t,
-				timespec_t *, rcm_info_t **))sym;
+			    (rcm_handle_t *, char *, uint_t,
+			    timespec_t *, rcm_info_t **))sym;
 			break;
 		case NOTIFY_RESUME:
 			rcm->notify_resume = (int (*)
-				(rcm_handle_t *, char *, uint_t,
-				rcm_info_t **))sym;
+			    (rcm_handle_t *, char *, uint_t,
+			    rcm_info_t **))sym;
 			break;
 		case NOTIFY_REMOVE:
 			rcm->notify_remove_list = (int (*)
-				(rcm_handle_t *, char **, uint_t,
-				rcm_info_t **))sym;
+			    (rcm_handle_t *, char **, uint_t,
+			    rcm_info_t **))sym;
 			break;
 		case REQUEST_CAP_CHANGE:
 			rcm->request_capacity_change = (int (*)
-				(rcm_handle_t *, char *, uint_t,
-				nvlist_t *, rcm_info_t **))sym;
+			    (rcm_handle_t *, char *, uint_t,
+			    nvlist_t *, rcm_info_t **))sym;
 			break;
 		case NOTIFY_CAP_CHANGE:
 			rcm->notify_capacity_change = (int (*)
-				(rcm_handle_t *, char *, uint_t,
-				nvlist_t *, rcm_info_t **))sym;
+			    (rcm_handle_t *, char *, uint_t,
+			    nvlist_t *, rcm_info_t **))sym;
 			break;
 		default:
 			break;
@@ -400,7 +399,7 @@ ap_rcm_init(apd_t *a)
 
 	if (rcm->alloc_handle == NULL ||
 	    (*rcm->alloc_handle)(NULL, RCM_NOPID, NULL, &rcm->hd)
-		!= RCM_SUCCESS) {
+	    != RCM_SUCCESS) {
 		ap_err(a, ERR_RCM_HANDLE);
 		return (CFGA_LIB_ERROR);
 	}
@@ -441,7 +440,7 @@ ap_rcm_init(apd_t *a)
 
 		rcm->ncpus = ncpu;
 		if ((rcm->cpuids = (cpuid_t *)calloc(ncpu, sizeof (cpuid_t)))
-			== NULL) {
+		    == NULL) {
 			ap_err(a, ERR_NOMEM);
 			return (CFGA_LIB_ERROR);
 		}
@@ -781,20 +780,21 @@ ap_rcm_cap_cpu(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd, uint_t flags,
 	    nvlist_add_int32(nvl, "old_total", oldncpuids) != 0 ||
 	    nvlist_add_int32(nvl, "new_total", newncpuids) != 0 ||
 	    nvlist_add_int32_array(nvl, "old_cpu_list", oldcpuids,
-		oldncpuids) != 0 ||
+	    oldncpuids) != 0 ||
 	    nvlist_add_int32_array(nvl, "new_cpu_list", newcpuids,
-		newncpuids) != 0)
+	    newncpuids) != 0)
 		goto done;
 
-	snprintf(buf, sizeof (buf), fmt, change);
+	(void) snprintf(buf, sizeof (buf), fmt, change);
 	ap_msg(a, MSG_ISSUE, cmd, buf);
 
-	if (cmd == CMD_RCM_CAP_DEL)
+	if (cmd == CMD_RCM_CAP_DEL) {
 		rv = (*rcm->request_capacity_change)(hd, "SUNW_cpu",
-			flags, nvl, rinfo);
-	else
+		    flags, nvl, rinfo);
+	} else {
 		rv = (*rcm->notify_capacity_change)(hd, "SUNW_cpu",
-			flags & ~RCM_FORCE, nvl, rinfo);
+		    flags & ~RCM_FORCE, nvl, rinfo);
+	}
 
 done:
 	if (nvl)
@@ -865,12 +865,13 @@ ap_rcm_cap_mem(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd, uint_t flags,
 	(void) snprintf(buf, sizeof (buf), "(%ld pages)", change);
 	ap_msg(a, MSG_ISSUE, cmd, buf);
 
-	if (cmd == CMD_RCM_CAP_DEL)
+	if (cmd == CMD_RCM_CAP_DEL) {
 		rv = (*rcm->request_capacity_change)(hd, "SUNW_memory",
-			flags, nvl, rinfo);
-	else
+		    flags, nvl, rinfo);
+	} else {
 		rv = (*rcm->notify_capacity_change)(hd, "SUNW_memory",
-			flags & ~RCM_FORCE, nvl, rinfo);
+		    flags & ~RCM_FORCE, nvl, rinfo);
+	}
 
 	nvlist_free(nvl);
 
@@ -923,11 +924,13 @@ ap_rcm_request_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
 	}
 
 	if (ncpus && ((*rv = ap_rcm_cap_cpu(a, rcm, hd, flags, rinfo,
-		CMD_RCM_CAP_DEL, ncpus)) != RCM_SUCCESS))
+	    CMD_RCM_CAP_DEL, ncpus)) != RCM_SUCCESS)) {
 		return (CFGA_LIB_ERROR);
+	}
 	if (npages && ((*rv = ap_rcm_cap_mem(a, rcm, hd, flags, rinfo,
-		CMD_RCM_CAP_DEL, npages)) != RCM_SUCCESS))
+	    CMD_RCM_CAP_DEL, npages)) != RCM_SUCCESS)) {
 		return (CFGA_LIB_ERROR);
+	}
 
 	return (CFGA_OK);
 }
@@ -945,8 +948,8 @@ ap_rcm_add_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
 	DBG("ap_rcm_add_cap(%p)\n", (void *)a);
 
 	/* Get the new capacity info to figure out what has changed */
-	if ((rc = ap_capinfo(a, rcm->firstcm, rcm->lastcm, &capinfo))
-		!= CFGA_OK)
+	if ((rc = ap_capinfo(a, rcm->firstcm, rcm->lastcm, &capinfo)) !=
+	    CFGA_OK)
 		return (rc);
 
 	if (capinfo == NULL) {
@@ -981,7 +984,7 @@ ap_rcm_add_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
 		type = ap_cm_type(a, cm);
 
 		DBG("cm=%d valid=%d type=%d, prevos=%d os=%d\n",
-			cm, prevvalidity, type, prevos, os);
+		    cm, prevvalidity, type, prevos, os);
 
 		/*
 		 * We are interested only in those components
@@ -1008,11 +1011,13 @@ ap_rcm_add_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
 	free(capinfo);
 
 	if (ncpus && ((*rv = ap_rcm_cap_cpu(a, rcm, hd, flags, rinfo,
-		CMD_RCM_CAP_ADD, ncpus)) != RCM_SUCCESS))
+	    CMD_RCM_CAP_ADD, ncpus)) != RCM_SUCCESS)) {
 		return (CFGA_LIB_ERROR);
+	}
 	if (npages && ((*rv = ap_rcm_cap_mem(a, rcm, hd, flags, rinfo,
-		CMD_RCM_CAP_ADD, npages)) != RCM_SUCCESS))
+	    CMD_RCM_CAP_ADD, npages)) != RCM_SUCCESS)) {
 		return (CFGA_LIB_ERROR);
+	}
 
 	return (CFGA_OK);
 }
@@ -1045,8 +1050,8 @@ ap_rcm_notify_cap(apd_t *a, rcmd_t *rcm, rcm_handle_t *hd,
 	DBG("ap_rcm_notify_cap(%p)\n", (void *)a);
 
 	/* Get the new capacity info to figure out what has changed */
-	if ((rc = ap_capinfo(a, rcm->firstcm, rcm->lastcm, &capinfo))
-		!= CFGA_OK)
+	if ((rc = ap_capinfo(a, rcm->firstcm, rcm->lastcm, &capinfo)) !=
+	    CFGA_OK)
 		return (rc);
 
 	if (capinfo == NULL) {
@@ -1183,8 +1188,7 @@ ap_rcm_ctl(apd_t *a, int cmd)
 		if (rcm->capinfo == NULL)
 			noop++;
 		else
-			rc = ap_rcm_request_cap(a, rcm, hd, &rv,
-				flags, &rinfo);
+			rc = ap_rcm_request_cap(a, rcm, hd, &rv, flags, &rinfo);
 		break;
 	case CMD_RCM_CAP_ADD:
 		rc = ap_rcm_add_cap(a, rcm, hd, &rv, flags, &rinfo);
@@ -1266,13 +1270,13 @@ ap_rcm_ctl(apd_t *a, int cmd)
 			ap_msg(a, MSG_ISSUE, cmd, rlist[nrsrc]);
 		if (cmd == CMD_RCM_OFFLINE)
 			rv = (*rcm->request_offline_list)(hd, rlist, flags,
-				&rinfo);
+			    &rinfo);
 		else if (cmd == CMD_RCM_ONLINE)
 			rv = (*rcm->notify_online_list)(hd, rlist,
-				flags & ~RCM_FORCE, &rinfo);
+			    flags & ~RCM_FORCE, &rinfo);
 		else
 			rv = (*rcm->notify_remove_list)(hd, rlist,
-				flags & ~RCM_FORCE, &rinfo);
+			    flags & ~RCM_FORCE, &rinfo);
 		break;
 	}
 	case CMD_RCM_SUSPEND: {
@@ -1440,7 +1444,7 @@ ap_rcm_info(apd_t *a, char **msg)
 		if ((infostr = (*rcm->info_info)(tuple)) != NULL) {
 			(void) strcat(*msg, "\n");
 			(void) sprintf(&((*msg)[strlen(*msg)]), format,
-			(*rcm->info_rsrc)(tuple), infostr);
+			    (*rcm->info_rsrc)(tuple), infostr);
 		}
 	}
 
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c
index 70c3fe7dee..7c3b34b1f0 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap_sbd.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -175,11 +172,11 @@ ap_stat(apd_t *a, int all)
 		 */
 		ctl->ic_type = SBD_COMP_UNKNOWN;
 		ctl->ic_unit = a->cnum;
-		strcpy(ctl->ic_name, a->cname);
+		(void) strcpy(ctl->ic_name, a->cname);
 	}
 
 	DBG("ioctl(%d SBD_CMD_STATUS, sc=0x%p sz=%d flags=%d",
-		fd, (void *)sc->s_statp, sc->s_nbytes, ctl->i_flags);
+	    fd, (void *)sc->s_statp, sc->s_nbytes, ctl->i_flags);
 	if (select)
 		DBG(" cname=<%s> cnum=%d", a->cname, a->cnum);
 	DBG(")\n");
@@ -282,7 +279,7 @@ apd_init(apd_t *a, int all)
 	for (dst = st->s_stat, i = 0; i < st->s_nstat; i++, dst++) {
 
 		DBG("ds_name,ds_unit,ds_type=<%s,%d,%d> ",
-			dst->ds_name, dst->ds_unit, dst->ds_type);
+		    dst->ds_name, dst->ds_unit, dst->ds_type);
 
 		if (dst->ds_unit != a->cnum)
 			continue;
@@ -331,7 +328,7 @@ apd_free(apd_t *a)
 	ap_rcm_fini(a);
 
 	if (a->fd != -1)
-		close(a->fd);
+		(void) close(a->fd);
 
 	s_free(a->options);
 	s_free(a->path);
@@ -405,7 +402,7 @@ ap_init(apd_t *a, cfga_list_data_t *ap)
 	st = (sbd_stat_t *)a->stat;
 
 	DBG("ap_init bd=%d rs=%d os=%d type=<%s>\n",
-		a->bnum, st->s_rstate, st->s_ostate, st->s_type);
+	    a->bnum, st->s_rstate, st->s_ostate, st->s_type);
 
 	parsable_strncpy(ap->ap_type, st->s_type, sizeof (ap->ap_type));
 	ap->ap_r_state = (cfga_stat_t)st->s_rstate;
@@ -555,7 +552,7 @@ ap_ioctl(apd_t *a, int cmd)
 	else {
 		ctl->ic_type = SBD_COMP_UNKNOWN;
 		ctl->ic_unit = a->cnum;
-		strcpy(ctl->ic_name, a->cname);
+		(void) strcpy(ctl->ic_name, a->cname);
 	}
 
 	if (!(ioc = ap_ioc(cmd))) {
@@ -638,7 +635,7 @@ mod_estr(int code)
 	if (i == sbd_etab_len) {
 		char buf[32];
 
-		snprintf(buf, sizeof (buf), "error %d", code);
+		(void) snprintf(buf, sizeof (buf), "error %d", code);
 		s = strdup(buf);
 	}
 
@@ -760,7 +757,7 @@ ap_help(struct cfga_msg *msgp, const char *options, cfga_flags_t flags)
 			continue;
 		if ((q = (char *)calloc(len + 1, 1)) == NULL)
 			continue;
-		strcpy(q, *p);
+		(void) strcpy(q, *p);
 		(*msgp->message_routine)(msgp->appdata_ptr, q);
 		free(q);
 	}
@@ -823,7 +820,7 @@ ap_cm_devpath(apd_t *a, int seq)
 	 * Assume an io component so that we can get
 	 * the path if the component is indeed of type io.
 	 */
-	if (seq == -1)
+	if (seq == CM_DFLT)
 		dst = (sbd_io_stat_t *)a->cmstat;
 	else {
 		sbd_stat_t *st;
@@ -1002,7 +999,7 @@ ap_cm_init(apd_t *a, cfga_list_data_t *ap, int seq)
 	a->cmstat = (void *)dst;
 
 	DBG("ap_cm_init bd=%d rs=%d os=%d type=<%s> seq=%d\n",
-		a->bnum, st->s_rstate, dst->ds_ostate, type, seq);
+	    a->bnum, st->s_rstate, dst->ds_ostate, type, seq);
 
 	(void) strncpy(ap->ap_type, type, sizeof (ap->ap_type));
 	ap->ap_r_state = (cfga_stat_t)st->s_rstate;
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c b/usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c
index 929dc0251c..0536b838f4 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/ap_seq.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -24,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <assert.h>
 #include <ctype.h>
 #include <stdio.h>
@@ -40,6 +37,12 @@
 #include <config_admin.h>
 #include "ap.h"
 
+#ifdef	__x86
+#include <libscf_priv.h>
+
+static int fastreboot_disabled;
+#endif	/* __x86 */
+
 static cfga_err_t
 ap_suspend_check(apd_t *a, int cmd, int first, int last, int *suspend)
 {
@@ -202,7 +205,7 @@ ap_seq_get(apd_t *a, int cmd, int *first, int *last)
 	*last = l;
 
 	DBG("ap_seq(%d, %d, %d, %p, %p) = (%d, %d)\n",
-		rs, os, cmd, (void *)first, (void *)last, f, l);
+	    rs, os, cmd, (void *)first, (void *)last, f, l);
 
 	return (AP_SEQ_OK);
 }
@@ -268,7 +271,7 @@ ap_seq_exec(apd_t *a, int cmd, int first, int last)
 			 * interpose on the suspend operation.
 			 */
 			rc = ap_suspend_check(a, cmd,
-				first + 1, last, &suspend);
+			    first + 1, last, &suspend);
 			break;
 		case CMD_RCM_SUSPEND:
 			if (suspend && ((rc = ap_rcm_ctl(a, c)) == CFGA_OK)) {
@@ -297,6 +300,41 @@ ap_seq_exec(apd_t *a, int cmd, int first, int last)
 		case CMD_RCM_CAP_NOTIFY:
 			(void) ap_rcm_ctl(a, c);
 			break;
+
+#ifdef	__x86
+		/*
+		 * Disable fast reboot if a CPU/MEM/IOH hotplug event happens.
+		 * Note: this is a temporary solution and will be revised when
+		 * fast reboot can support CPU/MEM/IOH DR operations in the
+		 * future.
+		 *
+		 * ACPI BIOS generates some static ACPI tables, such as MADT,
+		 * SRAT and SLIT, to describe the system hardware configuration
+		 * on power-on. When a CPU/MEM/IOH hotplug event happens, those
+		 * static tables won't be updated and will become stale.
+		 *
+		 * If we reset the system by fast reboot, BIOS will have no
+		 * chance to regenerate those staled static tables. Fast reboot
+		 * can't tolerate such inconsistency between staled ACPI tables
+		 * and real hardware configuration yet.
+		 *
+		 * A temporary solution is introduced to disable fast reboot if
+		 * CPU/MEM/IOH hotplug event happens. This solution should be
+		 * revised when fast reboot is enhanced to support CPU/MEM/IOH
+		 * DR operations.
+		 */
+		case CMD_ASSIGN:
+		case CMD_POWERON:
+		case CMD_POWEROFF:
+		case CMD_UNASSIGN:
+			if (!fastreboot_disabled &&
+			    scf_fastreboot_default_set_transient(B_FALSE) ==
+			    SCF_SUCCESS) {
+				fastreboot_disabled = 1;
+			}
+			/* FALLTHROUGH */
+#endif	/* __x86 */
+
 		default:
 			rc = ap_ioctl(a, c);
 			break;
diff --git a/usr/src/lib/cfgadm_plugins/sbd/common/cfga.c b/usr/src/lib/cfgadm_plugins/sbd/common/cfga.c
index a891746634..c03350e19b 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/common/cfga.c
+++ b/usr/src/lib/cfgadm_plugins/sbd/common/cfga.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -24,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -243,14 +240,14 @@ cfga_list_ext(
 			ap_cm_id(a, i, dyn, sizeof (dyn));
 
 			(void) snprintf(ap->ap_log_id, szl, "%s::%s",
-				a->target, dyn);
+			    a->target, dyn);
 			(void) snprintf(ap->ap_phys_id, szp, "%s::%s",
-				a->path, dyn);
+			    a->path, dyn);
 
 			ap_cm_init(a, ap, i);
 
 			DBG("ap_phys_id=%s ap_log_id=%s\n",
-				ap->ap_phys_id, ap->ap_log_id);
+			    ap->ap_phys_id, ap->ap_log_id);
 		}
 
 	} else
diff --git a/usr/src/lib/cfgadm_plugins/sbd/i386/Makefile b/usr/src/lib/cfgadm_plugins/sbd/i386/Makefile
new file mode 100644
index 0000000000..7a382d291f
--- /dev/null
+++ b/usr/src/lib/cfgadm_plugins/sbd/i386/Makefile
@@ -0,0 +1,39 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+
+PLATFORM = i86pc
+
+include ../Makefile.com
+
+CFLAGS += -D_POSIX_PTHREAD_SEMANTICS
+LINTFLAGS += -D_POSIX_PTHREAD_SEMANTICS
+LDLIBS += -lscf
+
+.KEEP_STATE:
+
+install: all $(ROOTLIBS) $(ROOTLINKS)
diff --git a/usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile b/usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile
index dcaf7b8438..39bd47a725 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile
+++ b/usr/src/lib/cfgadm_plugins/sbd/sparc/Makefile
@@ -2,9 +2,8 @@
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
@@ -23,10 +22,10 @@
 # Copyright (c) 2000 by Sun Microsystems, Inc.
 # All rights reserved.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
 # lib/sbd/sparc/Makefile
 
+PLATFORM = sun4u
+
 include ../Makefile.com
 
 .KEEP_STATE:
diff --git a/usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile b/usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile
index 4ac3f6310c..1d9c67c4e1 100644
--- a/usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile
+++ b/usr/src/lib/cfgadm_plugins/sbd/sparcv9/Makefile
@@ -2,9 +2,8 @@
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License").  You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or http://www.opensolaris.org/os/licensing.
@@ -23,10 +22,10 @@
 # Copyright (c) 2000 by Sun Microsystems, Inc.
 # All rights reserved.
 #
-#ident	"%Z%%M%	%I%	%E% SMI"
-#
 # lib/sbd/sparcv9/Makefile
 
+PLATFORM = sun4u
+
 include ../Makefile.com
 include ../../../Makefile.lib.64
 
diff --git a/usr/src/pkg/license_files/cr_Intel.hotplug b/usr/src/pkg/license_files/cr_Intel.hotplug
new file mode 100644
index 0000000000..bf0648aabc
--- /dev/null
+++ b/usr/src/pkg/license_files/cr_Intel.hotplug
@@ -0,0 +1 @@
+ * Copyright(c) 2009 - 2010 Intel Corporation. All rights reserved.
diff --git a/usr/src/pkg/manifests/system-header.mf b/usr/src/pkg/manifests/system-header.mf
index 113817866b..b95dd46026 100644
--- a/usr/src/pkg/manifests/system-header.mf
+++ b/usr/src/pkg/manifests/system-header.mf
@@ -1660,6 +1660,7 @@ $(i386_ONLY)file path=usr/platform/i86pc/include/sys/psm_defs.h
 $(i386_ONLY)file path=usr/platform/i86pc/include/sys/psm_modctl.h
 $(i386_ONLY)file path=usr/platform/i86pc/include/sys/psm_types.h
 $(i386_ONLY)file path=usr/platform/i86pc/include/sys/rm_platter.h
+$(i386_ONLY)file path=usr/platform/i86pc/include/sys/sbd_ioctl.h
 $(i386_ONLY)file path=usr/platform/i86pc/include/sys/smp_impldefs.h
 $(i386_ONLY)file path=usr/platform/i86pc/include/sys/vm_machparam.h
 $(i386_ONLY)file path=usr/platform/i86pc/include/sys/x_call.h
diff --git a/usr/src/pkg/manifests/system-kernel-dynamic-reconfiguration-i86pc.mf b/usr/src/pkg/manifests/system-kernel-dynamic-reconfiguration-i86pc.mf
new file mode 100644
index 0000000000..28ddc833bb
--- /dev/null
+++ b/usr/src/pkg/manifests/system-kernel-dynamic-reconfiguration-i86pc.mf
@@ -0,0 +1,83 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# This package will install successfully into any zone, global or
+# non-global.  The files, directories, links, and hardlinks, however,
+# will only be installed into the global zone.
+#
+<include hollow_zone_pkg>
+set name=pkg.fmri \
+    value=pkg:/system/kernel/dynamic-reconfiguration/i86pc@$(PKGVERS)
+set name=pkg.description \
+    value="Dynamic Reconfiguration Modules for i86pc required to support \
+    hotplug on i86pc based systems.  Includes the ACPI Hotplug Daemon, SMF \
+    manifests/methods and drivers for hotplug operations."
+set name=pkg.summary value="Dynamic Reconfiguration Modules for i86pc"
+set name=info.classification \
+    value="org.opensolaris.category.2008:System/Administration and Configuration" \
+    value="org.opensolaris.category.2008:System/Enterprise Management"
+set name=variant.arch value=i386
+set name=variant.opensolaris.zone value=global value=nonglobal
+dir path=platform group=sys
+dir path=platform/i86pc group=sys
+dir path=platform/i86pc/kernel group=sys
+dir path=platform/i86pc/kernel/drv group=sys
+dir path=platform/i86pc/kernel/drv/$(ARCH64) group=sys
+dir path=platform/i86pc/kernel/misc group=sys
+dir path=platform/i86pc/kernel/misc/$(ARCH64) group=sys
+file path=platform/i86pc/kernel/misc/$(ARCH64)/drmach_acpi group=sys \
+    mode=0755
+dir path=usr group=sys
+dir path=usr/platform group=sys
+dir path=usr/platform/i86pc group=sys
+dir path=usr/platform/i86pc/lib group=sys
+dir path=var group=sys
+dir path=var/svc group=sys
+dir path=var/svc/manifest group=sys
+dir path=var/svc/manifest/platform group=sys
+dir path=var/svc/manifest/platform/i86pc group=sys
+file path=platform/i86pc/kernel/drv/$(ARCH64)/dr group=sys
+driver name=dr alias=acpidr_sbd
+file path=platform/i86pc/kernel/drv/dr.conf group=sys mode=0644
+file path=usr/platform/i86pc/lib/acpihpd group=sys mode=0755
+file path=var/svc/manifest/platform/i86pc/acpihpd.xml group=sys mode=0444
+dir path=lib group=sys
+dir path=lib/svc group=sys
+dir path=lib/svc/method group=sys
+file path=lib/svc/method/svc-acpihpd group=sys mode=0555
+dir path=usr/platform/i86pc/lib/cfgadm
+dir path=usr/platform/i86pc/lib/cfgadm/$(ARCH64)
+file path=usr/platform/i86pc/lib/cfgadm/sbd.so.1
+link path=usr/platform/i86pc/lib/cfgadm/sbd.so \
+    target=./sbd.so.1
+file path=usr/platform/i86pc/lib/cfgadm/$(ARCH64)/sbd.so.1
+link path=usr/platform/i86pc/lib/cfgadm/$(ARCH64)/sbd.so \
+    target=./sbd.so.1
+license cr_Intel.hotplug license=cr_Intel
+license cr_Sun license=cr_Sun
+license lic_CDDL license=lic_CDDL
+depend fmri=pkg:/system/kernel/power type=require
diff --git a/usr/src/uts/common/os/cpu_event.c b/usr/src/uts/common/os/cpu_event.c
index 11162ccfc9..b3ed323f92 100644
--- a/usr/src/uts/common/os/cpu_event.c
+++ b/usr/src/uts/common/os/cpu_event.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -152,8 +152,13 @@ typedef struct cpu_idle_cb_item {
 /* Per-CPU state aligned to CPU_CACHE_COHERENCE_SIZE to avoid false sharing. */
 typedef union cpu_idle_cb_state {
 	struct {
+		/* Index of already invoked callbacks. */
 		int			index;
+		/* Invoke registered callbacks if true. */
+		boolean_t		enabled;
+		/* Property values are valid if true. */
 		boolean_t		ready;
+		/* Pointers to per-CPU properties. */
 		cpu_idle_prop_value_t	*idle_state;
 		cpu_idle_prop_value_t	*enter_ts;
 		cpu_idle_prop_value_t	*exit_ts;
@@ -182,6 +187,17 @@ static int				cpu_idle_cb_max = 0;
 
 static cpu_idle_cb_state_t		*cpu_idle_cb_state;
 
+#ifdef	__x86
+/*
+ * cpuset used to intercept CPUs before powering them off.
+ * The control CPU sets the bit corresponding to the target CPU and waits
+ * until the bit is cleared.
+ * The target CPU disables interrupts before clearing corresponding bit and
+ * then loops for ever.
+ */
+static cpuset_t				cpu_idle_intercept_set;
+#endif
+
 static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum,
     cpu_idle_prop_value_t *valp);
 
@@ -326,17 +342,26 @@ cpu_event_init(void)
 #endif
 }
 
+/*
+ * This function is called to initialize per CPU state when starting CPUs.
+ */
 void
 cpu_event_init_cpu(cpu_t *cp)
 {
 	ASSERT(cp->cpu_seqid < max_ncpus);
+	cpu_idle_cb_state[cp->cpu_seqid].v.index = 0;
 	cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE;
+	cpu_idle_cb_state[cp->cpu_seqid].v.enabled = B_TRUE;
 }
 
+/*
+ * This function is called to clean up per CPU state when stopping CPUs.
+ */
 void
 cpu_event_fini_cpu(cpu_t *cp)
 {
 	ASSERT(cp->cpu_seqid < max_ncpus);
+	cpu_idle_cb_state[cp->cpu_seqid].v.enabled = B_FALSE;
 	cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE;
 }
 
@@ -618,6 +643,21 @@ cpu_idle_enter(int state, int flag,
 	ASSERT(CPU->cpu_seqid < max_ncpus);
 	sp = &cpu_idle_cb_state[CPU->cpu_seqid];
 	ASSERT(sp->v.index == 0);
+	if (sp->v.enabled == B_FALSE) {
+#if defined(__x86)
+		/* Intercept CPU at a safe point before powering off it. */
+		if (CPU_IN_SET(cpu_idle_intercept_set, CPU->cpu_id)) {
+			iflags = intr_clear();
+			CPUSET_ATOMIC_DEL(cpu_idle_intercept_set, CPU->cpu_id);
+			/*CONSTCOND*/
+			while (1) {
+				SMT_PAUSE();
+			}
+		}
+#endif
+
+		return (0);
+	}
 
 	/*
 	 * On x86, cpu_idle_enter can be called from idle thread with either
@@ -1091,3 +1131,26 @@ cpu_idle_get_cpu_state(cpu_t *cp)
 	    cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle,
 	    CPU_IDLE_GET_CTX(cp)));
 }
+
+#if defined(__x86)
+/*
+ * Intercept CPU at a safe point in idle() before powering it off.
+ */
+void
+cpu_idle_intercept_cpu(cpu_t *cp)
+{
+	ASSERT(cp->cpu_seqid < max_ncpus);
+	ASSERT(cpu_idle_cb_state[cp->cpu_seqid].v.enabled == B_FALSE);
+
+	/* Set flag to intercept CPU. */
+	CPUSET_ATOMIC_ADD(cpu_idle_intercept_set, cp->cpu_id);
+	/* Wake up CPU from possible sleep state. */
+	poke_cpu(cp->cpu_id);
+	while (CPU_IN_SET(cpu_idle_intercept_set, cp->cpu_id)) {
+		DELAY(1);
+	}
+	/*
+	 * Now target CPU is spinning in a pause loop with interrupts disabled.
+	 */
+}
+#endif
diff --git a/usr/src/uts/common/os/cpu_pm.c b/usr/src/uts/common/os/cpu_pm.c
index 324e4168d7..b885e421b8 100644
--- a/usr/src/uts/common/os/cpu_pm.c
+++ b/usr/src/uts/common/os/cpu_pm.c
@@ -677,9 +677,13 @@ cpupm_redefine_max_activepwr_state(struct cpu *cp, int max_perf_level)
 	cpupm_state_t	*new_state = NULL;
 
 	did = cpupm_domain_id(cp, type);
-	mutex_enter(&cpu_lock);
-	dom = cpupm_domain_find(did, type);
-	mutex_exit(&cpu_lock);
+	if (MUTEX_HELD(&cpu_lock)) {
+		dom = cpupm_domain_find(did, type);
+	} else {
+		mutex_enter(&cpu_lock);
+		dom = cpupm_domain_find(did, type);
+		mutex_exit(&cpu_lock);
+	}
 
 	/*
 	 * Can use a lock to avoid changing the power state of the cpu when
diff --git a/usr/src/uts/common/os/mem_config.c b/usr/src/uts/common/os/mem_config.c
index be67866592..20a89b0611 100644
--- a/usr/src/uts/common/os/mem_config.c
+++ b/usr/src/uts/common/os/mem_config.c
@@ -59,9 +59,6 @@
 
 extern struct memlist *phys_avail;
 
-extern void mem_node_add(pfn_t, pfn_t);
-extern void mem_node_del(pfn_t, pfn_t);
-
 extern uint_t page_ctrs_adjust(int);
 void page_ctrs_cleanup(void);
 static void kphysm_setup_post_add(pgcnt_t);
@@ -107,6 +104,13 @@ static void memseg_cpu_vm_flush(void);
 
 int meta_alloc_enable;
 
+#ifdef	DEBUG
+static int memseg_debug;
+#define	MEMSEG_DEBUG(args...) if (memseg_debug) printf(args)
+#else
+#define	MEMSEG_DEBUG(...)
+#endif
+
 /*
  * Add a chunk of memory to the system.
  * base: starting PAGESIZE page of new memory.
@@ -117,13 +121,6 @@ int meta_alloc_enable;
  * dynamically most likely means more hash misses, since the tables will
  * be smaller than they otherwise would be.
  */
-#ifdef	DEBUG
-static int memseg_debug;
-#define	MEMSEG_DEBUG(args...) if (memseg_debug) printf(args)
-#else
-#define	MEMSEG_DEBUG(...)
-#endif
-
 int
 kphysm_add_memory_dynamic(pfn_t base, pgcnt_t npgs)
 {
@@ -341,6 +338,9 @@ mapalloc:
 	 * this may change with COD or in larger SSM systems with
 	 * nested latency groups, so we must not assume that the
 	 * node does not yet exist.
+	 *
+	 * Note that there may be multiple memory nodes associated with
+	 * a single lgrp node on x86 systems.
 	 */
 	pnum = pt_base + tpgs - 1;
 	mem_node_add_range(pt_base, pnum);
@@ -569,8 +569,8 @@ mapalloc:
 	    (uint64_t)(tpgs) << PAGESHIFT);
 
 	delspan_unreserve(pt_base, tpgs);
-	return (KPHYSM_OK);		/* Successfully added system memory */
 
+	return (KPHYSM_OK);		/* Successfully added system memory */
 }
 
 /*
@@ -2566,7 +2566,7 @@ remap_to_dummy(caddr_t va, pgcnt_t metapgs)
 {
 	int phase;
 
-	ASSERT(IS_P2ALIGNED((uint64_t)va, PAGESIZE));
+	ASSERT(IS_P2ALIGNED((uint64_t)(uintptr_t)va, PAGESIZE));
 
 	/*
 	 * We may start remapping at a non-zero page offset
@@ -2574,7 +2574,8 @@ remap_to_dummy(caddr_t va, pgcnt_t metapgs)
 	 * of the outgoing pp's could be shared by other
 	 * memsegs (see memseg_remap_meta).
 	 */
-	phase = btop((uint64_t)va) % pp_dummy_npages;
+	phase = btop((uint64_t)(uintptr_t)va) % pp_dummy_npages;
+	/*CONSTCOND*/
 	ASSERT(PAGESIZE % sizeof (page_t) || phase == 0);
 
 	while (metapgs != 0) {
diff --git a/usr/src/uts/common/os/mem_config_stubs.c b/usr/src/uts/common/os/mem_config_stubs.c
index d3226ff633..1a265173f4 100644
--- a/usr/src/uts/common/os/mem_config_stubs.c
+++ b/usr/src/uts/common/os/mem_config_stubs.c
@@ -19,12 +19,10 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/types.h>
 #include <sys/cmn_err.h>
 #include <sys/errno.h>
@@ -44,6 +42,7 @@ pgcnt_t kcage_desfree;
 pgcnt_t kcage_needfree;
 pgcnt_t	kcage_lotsfree = 1;
 
+#if !defined(__x86) || defined(__xpv)
 /*ARGSUSED*/
 int
 kphysm_setup_func_register(kphysm_setup_vector_t *vec, void *arg)
@@ -56,6 +55,7 @@ void
 kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, void *arg)
 {
 }
+#endif
 
 /*ARGSUSED*/
 int
diff --git a/usr/src/uts/common/os/sunddi.c b/usr/src/uts/common/os/sunddi.c
index 91e2b72dc3..33cc970aea 100644
--- a/usr/src/uts/common/os/sunddi.c
+++ b/usr/src/uts/common/os/sunddi.c
@@ -85,6 +85,10 @@
 #include <sys/zone.h>
 #include <sys/clock_impl.h>
 #include <sys/ddi.h>
+#include <sys/modhash.h>
+#include <sys/sunldi_impl.h>
+#include <sys/fs/dv_node.h>
+#include <sys/fs/snode.h>
 
 extern	pri_t	minclsyspri;
 
@@ -9369,3 +9373,938 @@ ddi_cb_unregister(ddi_cb_handle_t hdl)
 
 	return (DDI_SUCCESS);
 }
+
+/*
+ * Platform independent DR routines
+ */
+
+static int
+ndi2errno(int n)
+{
+	int err = 0;
+
+	switch (n) {
+		case NDI_NOMEM:
+			err = ENOMEM;
+			break;
+		case NDI_BUSY:
+			err = EBUSY;
+			break;
+		case NDI_FAULT:
+			err = EFAULT;
+			break;
+		case NDI_FAILURE:
+			err = EIO;
+			break;
+		case NDI_SUCCESS:
+			break;
+		case NDI_BADHANDLE:
+		default:
+			err = EINVAL;
+			break;
+	}
+	return (err);
+}
+
+/*
+ * Prom tree node list
+ */
+struct ptnode {
+	pnode_t		nodeid;
+	struct ptnode	*next;
+};
+
+/*
+ * Prom tree walk arg
+ */
+struct pta {
+	dev_info_t	*pdip;
+	devi_branch_t	*bp;
+	uint_t		flags;
+	dev_info_t	*fdip;
+	struct ptnode	*head;
+};
+
+static void
+visit_node(pnode_t nodeid, struct pta *ap)
+{
+	struct ptnode	**nextp;
+	int		(*select)(pnode_t, void *, uint_t);
+
+	ASSERT(nodeid != OBP_NONODE && nodeid != OBP_BADNODE);
+
+	select = ap->bp->create.prom_branch_select;
+
+	ASSERT(select);
+
+	if (select(nodeid, ap->bp->arg, 0) == DDI_SUCCESS) {
+
+		for (nextp = &ap->head; *nextp; nextp = &(*nextp)->next)
+			;
+
+		*nextp = kmem_zalloc(sizeof (struct ptnode), KM_SLEEP);
+
+		(*nextp)->nodeid = nodeid;
+	}
+
+	if ((ap->flags & DEVI_BRANCH_CHILD) == DEVI_BRANCH_CHILD)
+		return;
+
+	nodeid = prom_childnode(nodeid);
+	while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
+		visit_node(nodeid, ap);
+		nodeid = prom_nextnode(nodeid);
+	}
+}
+
+/*
+ * NOTE: The caller of this function must check for device contracts
+ * or LDI callbacks against this dip before setting the dip offline.
+ */
+static int
+set_infant_dip_offline(dev_info_t *dip, void *arg)
+{
+	char	*path = (char *)arg;
+
+	ASSERT(dip);
+	ASSERT(arg);
+
+	if (i_ddi_node_state(dip) >= DS_ATTACHED) {
+		(void) ddi_pathname(dip, path);
+		cmn_err(CE_WARN, "Attempt to set offline flag on attached "
+		    "node: %s", path);
+		return (DDI_FAILURE);
+	}
+
+	mutex_enter(&(DEVI(dip)->devi_lock));
+	if (!DEVI_IS_DEVICE_OFFLINE(dip))
+		DEVI_SET_DEVICE_OFFLINE(dip);
+	mutex_exit(&(DEVI(dip)->devi_lock));
+
+	return (DDI_SUCCESS);
+}
+
+typedef struct result {
+	char	*path;
+	int	result;
+} result_t;
+
+static int
+dip_set_offline(dev_info_t *dip, void *arg)
+{
+	int end;
+	result_t *resp = (result_t *)arg;
+
+	ASSERT(dip);
+	ASSERT(resp);
+
+	/*
+	 * We stop the walk if e_ddi_offline_notify() returns
+	 * failure, because this implies that one or more consumers
+	 * (either LDI or contract based) has blocked the offline.
+	 * So there is no point in conitnuing the walk
+	 */
+	if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
+		resp->result = DDI_FAILURE;
+		return (DDI_WALK_TERMINATE);
+	}
+
+	/*
+	 * If set_infant_dip_offline() returns failure, it implies
+	 * that we failed to set a particular dip offline. This
+	 * does not imply that the offline as a whole should fail.
+	 * We want to do the best we can, so we continue the walk.
+	 */
+	if (set_infant_dip_offline(dip, resp->path) == DDI_SUCCESS)
+		end = DDI_SUCCESS;
+	else
+		end = DDI_FAILURE;
+
+	e_ddi_offline_finalize(dip, end);
+
+	return (DDI_WALK_CONTINUE);
+}
+
+/*
+ * The call to e_ddi_offline_notify() exists for the
+ * unlikely error case that a branch we are trying to
+ * create already exists and has device contracts or LDI
+ * event callbacks against it.
+ *
+ * We allow create to succeed for such branches only if
+ * no constraints block the offline.
+ */
+static int
+branch_set_offline(dev_info_t *dip, char *path)
+{
+	int		circ;
+	int		end;
+	result_t	res;
+
+
+	if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
+		return (DDI_FAILURE);
+	}
+
+	if (set_infant_dip_offline(dip, path) == DDI_SUCCESS)
+		end = DDI_SUCCESS;
+	else
+		end = DDI_FAILURE;
+
+	e_ddi_offline_finalize(dip, end);
+
+	if (end == DDI_FAILURE)
+		return (DDI_FAILURE);
+
+	res.result = DDI_SUCCESS;
+	res.path = path;
+
+	ndi_devi_enter(dip, &circ);
+	ddi_walk_devs(ddi_get_child(dip), dip_set_offline, &res);
+	ndi_devi_exit(dip, circ);
+
+	return (res.result);
+}
+
+/*ARGSUSED*/
+static int
+create_prom_branch(void *arg, int has_changed)
+{
+	int		circ;
+	int		exists, rv;
+	pnode_t		nodeid;
+	struct ptnode	*tnp;
+	dev_info_t	*dip;
+	struct pta	*ap = arg;
+	devi_branch_t	*bp;
+	char		*path;
+
+	ASSERT(ap);
+	ASSERT(ap->fdip == NULL);
+	ASSERT(ap->pdip && ndi_dev_is_prom_node(ap->pdip));
+
+	bp = ap->bp;
+
+	nodeid = ddi_get_nodeid(ap->pdip);
+	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE) {
+		cmn_err(CE_WARN, "create_prom_branch: invalid "
+		    "nodeid: 0x%x", nodeid);
+		return (EINVAL);
+	}
+
+	ap->head = NULL;
+
+	nodeid = prom_childnode(nodeid);
+	while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
+		visit_node(nodeid, ap);
+		nodeid = prom_nextnode(nodeid);
+	}
+
+	if (ap->head == NULL)
+		return (ENODEV);
+
+	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+	rv = 0;
+	while ((tnp = ap->head) != NULL) {
+		ap->head = tnp->next;
+
+		ndi_devi_enter(ap->pdip, &circ);
+
+		/*
+		 * Check if the branch already exists.
+		 */
+		exists = 0;
+		dip = e_ddi_nodeid_to_dip(tnp->nodeid);
+		if (dip != NULL) {
+			exists = 1;
+
+			/* Parent is held busy, so release hold */
+			ndi_rele_devi(dip);
+#ifdef	DEBUG
+			cmn_err(CE_WARN, "create_prom_branch: dip(%p) exists"
+			    " for nodeid 0x%x", (void *)dip, tnp->nodeid);
+#endif
+		} else {
+			dip = i_ddi_create_branch(ap->pdip, tnp->nodeid);
+		}
+
+		kmem_free(tnp, sizeof (struct ptnode));
+
+		/*
+		 * Hold the branch if it is not already held
+		 */
+		if (dip && !exists) {
+			e_ddi_branch_hold(dip);
+		}
+
+		ASSERT(dip == NULL || e_ddi_branch_held(dip));
+
+		/*
+		 * Set all dips in the newly created branch offline so that
+		 * only a "configure" operation can attach
+		 * the branch
+		 */
+		if (dip == NULL || branch_set_offline(dip, path)
+		    == DDI_FAILURE) {
+			ndi_devi_exit(ap->pdip, circ);
+			rv = EIO;
+			continue;
+		}
+
+		ASSERT(ddi_get_parent(dip) == ap->pdip);
+
+		ndi_devi_exit(ap->pdip, circ);
+
+		if (ap->flags & DEVI_BRANCH_CONFIGURE) {
+			int error = e_ddi_branch_configure(dip, &ap->fdip, 0);
+			if (error && rv == 0)
+				rv = error;
+		}
+
+		/*
+		 * Invoke devi_branch_callback() (if it exists) only for
+		 * newly created branches
+		 */
+		if (bp->devi_branch_callback && !exists)
+			bp->devi_branch_callback(dip, bp->arg, 0);
+	}
+
+	kmem_free(path, MAXPATHLEN);
+
+	return (rv);
+}
+
+static int
+sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp)
+{
+	int			rv, circ, len;
+	int			i, flags, ret;
+	dev_info_t		*dip;
+	char			*nbuf;
+	char			*path;
+	static const char	*noname = "<none>";
+
+	ASSERT(pdip);
+	ASSERT(DEVI_BUSY_OWNED(pdip));
+
+	flags = 0;
+
+	/*
+	 * Creating the root of a branch ?
+	 */
+	if (rdipp) {
+		*rdipp = NULL;
+		flags = DEVI_BRANCH_ROOT;
+	}
+
+	ndi_devi_alloc_sleep(pdip, (char *)noname, DEVI_SID_NODEID, &dip);
+	rv = bp->create.sid_branch_create(dip, bp->arg, flags);
+
+	nbuf = kmem_alloc(OBP_MAXDRVNAME, KM_SLEEP);
+
+	if (rv == DDI_WALK_ERROR) {
+		cmn_err(CE_WARN, "e_ddi_branch_create: Error setting"
+		    " properties on devinfo node %p",  (void *)dip);
+		goto fail;
+	}
+
+	len = OBP_MAXDRVNAME;
+	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
+	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "name", nbuf, &len)
+	    != DDI_PROP_SUCCESS) {
+		cmn_err(CE_WARN, "e_ddi_branch_create: devinfo node %p has"
+		    "no name property", (void *)dip);
+		goto fail;
+	}
+
+	ASSERT(i_ddi_node_state(dip) == DS_PROTO);
+	if (ndi_devi_set_nodename(dip, nbuf, 0) != NDI_SUCCESS) {
+		cmn_err(CE_WARN, "e_ddi_branch_create: cannot set name (%s)"
+		    " for devinfo node %p", nbuf, (void *)dip);
+		goto fail;
+	}
+
+	kmem_free(nbuf, OBP_MAXDRVNAME);
+
+	/*
+	 * Ignore bind failures just like boot does
+	 */
+	(void) ndi_devi_bind_driver(dip, 0);
+
+	switch (rv) {
+	case DDI_WALK_CONTINUE:
+	case DDI_WALK_PRUNESIB:
+		ndi_devi_enter(dip, &circ);
+
+		i = DDI_WALK_CONTINUE;
+		for (; i == DDI_WALK_CONTINUE; ) {
+			i = sid_node_create(dip, bp, NULL);
+		}
+
+		ASSERT(i == DDI_WALK_ERROR || i == DDI_WALK_PRUNESIB);
+		if (i == DDI_WALK_ERROR)
+			rv = i;
+		/*
+		 * If PRUNESIB stop creating siblings
+		 * of dip's child. Subsequent walk behavior
+		 * is determined by rv returned by dip.
+		 */
+
+		ndi_devi_exit(dip, circ);
+		break;
+	case DDI_WALK_TERMINATE:
+		/*
+		 * Don't create children and ask our parent
+		 * to not create siblings either.
+		 */
+		rv = DDI_WALK_PRUNESIB;
+		break;
+	case DDI_WALK_PRUNECHILD:
+		/*
+		 * Don't create children, but ask parent to continue
+		 * with siblings.
+		 */
+		rv = DDI_WALK_CONTINUE;
+		break;
+	default:
+		ASSERT(0);
+		break;
+	}
+
+	if (rdipp)
+		*rdipp = dip;
+
+	/*
+	 * Set device offline - only the "configure" op should cause an attach.
+	 * Note that it is safe to set the dip offline without checking
+	 * for either device contract or layered driver (LDI) based constraints
+	 * since there cannot be any contracts or LDI opens of this device.
+	 * This is because this node is a newly created dip with the parent busy
+	 * held, so no other thread can come in and attach this dip. A dip that
+	 * has never been attached cannot have contracts since by definition
+	 * a device contract (an agreement between a process and a device minor
+	 * node) can only be created against a device that has minor nodes
+	 * i.e is attached. Similarly an LDI open will only succeed if the
+	 * dip is attached. We assert below that the dip is not attached.
+	 */
+	ASSERT(i_ddi_node_state(dip) < DS_ATTACHED);
+	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+	ret = set_infant_dip_offline(dip, path);
+	ASSERT(ret == DDI_SUCCESS);
+	kmem_free(path, MAXPATHLEN);
+
+	return (rv);
+fail:
+	(void) ndi_devi_free(dip);
+	kmem_free(nbuf, OBP_MAXDRVNAME);
+	return (DDI_WALK_ERROR);
+}
+
+static int
+create_sid_branch(
+	dev_info_t	*pdip,
+	devi_branch_t	*bp,
+	dev_info_t	**dipp,
+	uint_t		flags)
+{
+	int		rv = 0, state = DDI_WALK_CONTINUE;
+	dev_info_t	*rdip;
+
+	while (state == DDI_WALK_CONTINUE) {
+		int	circ;
+
+		ndi_devi_enter(pdip, &circ);
+
+		state = sid_node_create(pdip, bp, &rdip);
+		if (rdip == NULL) {
+			ndi_devi_exit(pdip, circ);
+			ASSERT(state == DDI_WALK_ERROR);
+			break;
+		}
+
+		e_ddi_branch_hold(rdip);
+
+		ndi_devi_exit(pdip, circ);
+
+		if (flags & DEVI_BRANCH_CONFIGURE) {
+			int error = e_ddi_branch_configure(rdip, dipp, 0);
+			if (error && rv == 0)
+				rv = error;
+		}
+
+		/*
+		 * devi_branch_callback() is optional
+		 */
+		if (bp->devi_branch_callback)
+			bp->devi_branch_callback(rdip, bp->arg, 0);
+	}
+
+	ASSERT(state == DDI_WALK_ERROR || state == DDI_WALK_PRUNESIB);
+
+	return (state == DDI_WALK_ERROR ? EIO : rv);
+}
+
+int
+e_ddi_branch_create(
+	dev_info_t	*pdip,
+	devi_branch_t	*bp,
+	dev_info_t	**dipp,
+	uint_t		flags)
+{
+	int prom_devi, sid_devi, error;
+
+	if (pdip == NULL || bp == NULL || bp->type == 0)
+		return (EINVAL);
+
+	prom_devi = (bp->type == DEVI_BRANCH_PROM) ? 1 : 0;
+	sid_devi = (bp->type == DEVI_BRANCH_SID) ? 1 : 0;
+
+	if (prom_devi && bp->create.prom_branch_select == NULL)
+		return (EINVAL);
+	else if (sid_devi && bp->create.sid_branch_create == NULL)
+		return (EINVAL);
+	else if (!prom_devi && !sid_devi)
+		return (EINVAL);
+
+	if (flags & DEVI_BRANCH_EVENT)
+		return (EINVAL);
+
+	if (prom_devi) {
+		struct pta pta = {0};
+
+		pta.pdip = pdip;
+		pta.bp = bp;
+		pta.flags = flags;
+
+		error = prom_tree_access(create_prom_branch, &pta, NULL);
+
+		if (dipp)
+			*dipp = pta.fdip;
+		else if (pta.fdip)
+			ndi_rele_devi(pta.fdip);
+	} else {
+		error = create_sid_branch(pdip, bp, dipp, flags);
+	}
+
+	return (error);
+}
+
+int
+e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp, uint_t flags)
+{
+	int		rv;
+	char		*devnm;
+	dev_info_t	*pdip;
+
+	if (dipp)
+		*dipp = NULL;
+
+	if (rdip == NULL || flags != 0 || (flags & DEVI_BRANCH_EVENT))
+		return (EINVAL);
+
+	pdip = ddi_get_parent(rdip);
+
+	ndi_hold_devi(pdip);
+
+	if (!e_ddi_branch_held(rdip)) {
+		ndi_rele_devi(pdip);
+		cmn_err(CE_WARN, "e_ddi_branch_configure: "
+		    "dip(%p) not held", (void *)rdip);
+		return (EINVAL);
+	}
+
+	if (i_ddi_node_state(rdip) < DS_INITIALIZED) {
+		/*
+		 * First attempt to bind a driver. If we fail, return
+		 * success (On some platforms, dips for some device
+		 * types (CPUs) may not have a driver)
+		 */
+		if (ndi_devi_bind_driver(rdip, 0) != NDI_SUCCESS) {
+			ndi_rele_devi(pdip);
+			return (0);
+		}
+
+		if (ddi_initchild(pdip, rdip) != DDI_SUCCESS) {
+			rv = NDI_FAILURE;
+			goto out;
+		}
+	}
+
+	ASSERT(i_ddi_node_state(rdip) >= DS_INITIALIZED);
+
+	devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
+
+	(void) ddi_deviname(rdip, devnm);
+
+	if ((rv = ndi_devi_config_one(pdip, devnm+1, &rdip,
+	    NDI_DEVI_ONLINE | NDI_CONFIG)) == NDI_SUCCESS) {
+		/* release hold from ndi_devi_config_one() */
+		ndi_rele_devi(rdip);
+	}
+
+	kmem_free(devnm, MAXNAMELEN + 1);
+out:
+	if (rv != NDI_SUCCESS && dipp && rdip) {
+		ndi_hold_devi(rdip);
+		*dipp = rdip;
+	}
+	ndi_rele_devi(pdip);
+	return (ndi2errno(rv));
+}
+
+void
+e_ddi_branch_hold(dev_info_t *rdip)
+{
+	if (e_ddi_branch_held(rdip)) {
+		cmn_err(CE_WARN, "e_ddi_branch_hold: branch already held");
+		return;
+	}
+
+	mutex_enter(&DEVI(rdip)->devi_lock);
+	if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) == 0) {
+		DEVI(rdip)->devi_flags |= DEVI_BRANCH_HELD;
+		DEVI(rdip)->devi_ref++;
+	}
+	ASSERT(DEVI(rdip)->devi_ref > 0);
+	mutex_exit(&DEVI(rdip)->devi_lock);
+}
+
+int
+e_ddi_branch_held(dev_info_t *rdip)
+{
+	int rv = 0;
+
+	mutex_enter(&DEVI(rdip)->devi_lock);
+	if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) &&
+	    DEVI(rdip)->devi_ref > 0) {
+		rv = 1;
+	}
+	mutex_exit(&DEVI(rdip)->devi_lock);
+
+	return (rv);
+}
+
+void
+e_ddi_branch_rele(dev_info_t *rdip)
+{
+	mutex_enter(&DEVI(rdip)->devi_lock);
+	DEVI(rdip)->devi_flags &= ~DEVI_BRANCH_HELD;
+	DEVI(rdip)->devi_ref--;
+	mutex_exit(&DEVI(rdip)->devi_lock);
+}
+
+int
+e_ddi_branch_unconfigure(
+	dev_info_t *rdip,
+	dev_info_t **dipp,
+	uint_t flags)
+{
+	int	circ, rv;
+	int	destroy;
+	char	*devnm;
+	uint_t	nflags;
+	dev_info_t *pdip;
+
+	if (dipp)
+		*dipp = NULL;
+
+	if (rdip == NULL)
+		return (EINVAL);
+
+	pdip = ddi_get_parent(rdip);
+
+	ASSERT(pdip);
+
+	/*
+	 * Check if caller holds pdip busy - can cause deadlocks during
+	 * devfs_clean()
+	 */
+	if (DEVI_BUSY_OWNED(pdip)) {
+		cmn_err(CE_WARN, "e_ddi_branch_unconfigure: failed: parent"
+		    " devinfo node(%p) is busy held", (void *)pdip);
+		return (EINVAL);
+	}
+
+	destroy = (flags & DEVI_BRANCH_DESTROY) ? 1 : 0;
+
+	devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
+
+	ndi_devi_enter(pdip, &circ);
+	(void) ddi_deviname(rdip, devnm);
+	ndi_devi_exit(pdip, circ);
+
+	/*
+	 * ddi_deviname() returns a component name with / prepended.
+	 */
+	(void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
+
+	ndi_devi_enter(pdip, &circ);
+
+	/*
+	 * Recreate device name as it may have changed state (init/uninit)
+	 * when parent busy lock was dropped for devfs_clean()
+	 */
+	(void) ddi_deviname(rdip, devnm);
+
+	if (!e_ddi_branch_held(rdip)) {
+		kmem_free(devnm, MAXNAMELEN + 1);
+		ndi_devi_exit(pdip, circ);
+		cmn_err(CE_WARN, "e_ddi_%s_branch: dip(%p) not held",
+		    destroy ? "destroy" : "unconfigure", (void *)rdip);
+		return (EINVAL);
+	}
+
+	/*
+	 * Release hold on the branch. This is ok since we are holding the
+	 * parent busy. If rdip is not removed, we must do a hold on the
+	 * branch before returning.
+	 */
+	e_ddi_branch_rele(rdip);
+
+	nflags = NDI_DEVI_OFFLINE;
+	if (destroy || (flags & DEVI_BRANCH_DESTROY)) {
+		nflags |= NDI_DEVI_REMOVE;
+		destroy = 1;
+	} else {
+		nflags |= NDI_UNCONFIG;		/* uninit but don't remove */
+	}
+
+	if (flags & DEVI_BRANCH_EVENT)
+		nflags |= NDI_POST_EVENT;
+
+	if (i_ddi_devi_attached(pdip) &&
+	    (i_ddi_node_state(rdip) >= DS_INITIALIZED)) {
+		rv = ndi_devi_unconfig_one(pdip, devnm+1, dipp, nflags);
+	} else {
+		rv = e_ddi_devi_unconfig(rdip, dipp, nflags);
+		if (rv == NDI_SUCCESS) {
+			ASSERT(!destroy || ddi_get_child(rdip) == NULL);
+			rv = ndi_devi_offline(rdip, nflags);
+		}
+	}
+
+	if (!destroy || rv != NDI_SUCCESS) {
+		/* The dip still exists, so do a hold */
+		e_ddi_branch_hold(rdip);
+	}
+out:
+	kmem_free(devnm, MAXNAMELEN + 1);
+	ndi_devi_exit(pdip, circ);
+	return (ndi2errno(rv));
+}
+
+int
+e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp, uint_t flag)
+{
+	return (e_ddi_branch_unconfigure(rdip, dipp,
+	    flag|DEVI_BRANCH_DESTROY));
+}
+
+/*
+ * Number of chains for hash table
+ */
+#define	NUMCHAINS	17
+
+/*
+ * Devinfo busy arg
+ */
+struct devi_busy {
+	int dv_total;
+	int s_total;
+	mod_hash_t *dv_hash;
+	mod_hash_t *s_hash;
+	int (*callback)(dev_info_t *, void *, uint_t);
+	void *arg;
+};
+
+static int
+visit_dip(dev_info_t *dip, void *arg)
+{
+	uintptr_t sbusy, dvbusy, ref;
+	struct devi_busy *bsp = arg;
+
+	ASSERT(bsp->callback);
+
+	/*
+	 * A dip cannot be busy if its reference count is 0
+	 */
+	if ((ref = e_ddi_devi_holdcnt(dip)) == 0) {
+		return (bsp->callback(dip, bsp->arg, 0));
+	}
+
+	if (mod_hash_find(bsp->dv_hash, dip, (mod_hash_val_t *)&dvbusy))
+		dvbusy = 0;
+
+	/*
+	 * To catch device opens currently maintained on specfs common snodes.
+	 */
+	if (mod_hash_find(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
+		sbusy = 0;
+
+#ifdef	DEBUG
+	if (ref < sbusy || ref < dvbusy) {
+		cmn_err(CE_WARN, "dip(%p): sopen = %lu, dvopen = %lu "
+		    "dip ref = %lu\n", (void *)dip, sbusy, dvbusy, ref);
+	}
+#endif
+
+	dvbusy = (sbusy > dvbusy) ? sbusy : dvbusy;
+
+	return (bsp->callback(dip, bsp->arg, dvbusy));
+}
+
+static int
+visit_snode(struct snode *sp, void *arg)
+{
+	uintptr_t sbusy;
+	dev_info_t *dip;
+	int count;
+	struct devi_busy *bsp = arg;
+
+	ASSERT(sp);
+
+	/*
+	 * The stable lock is held. This prevents
+	 * the snode and its associated dip from
+	 * going away.
+	 */
+	dip = NULL;
+	count = spec_devi_open_count(sp, &dip);
+
+	if (count <= 0)
+		return (DDI_WALK_CONTINUE);
+
+	ASSERT(dip);
+
+	if (mod_hash_remove(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
+		sbusy = count;
+	else
+		sbusy += count;
+
+	if (mod_hash_insert(bsp->s_hash, dip, (mod_hash_val_t)sbusy)) {
+		cmn_err(CE_WARN, "%s: s_hash insert failed: dip=0x%p, "
+		    "sbusy = %lu", "e_ddi_branch_referenced",
+		    (void *)dip, sbusy);
+	}
+
+	bsp->s_total += count;
+
+	return (DDI_WALK_CONTINUE);
+}
+
+static void
+visit_dvnode(struct dv_node *dv, void *arg)
+{
+	uintptr_t dvbusy;
+	uint_t count;
+	struct vnode *vp;
+	struct devi_busy *bsp = arg;
+
+	ASSERT(dv && dv->dv_devi);
+
+	vp = DVTOV(dv);
+
+	mutex_enter(&vp->v_lock);
+	count = vp->v_count;
+	mutex_exit(&vp->v_lock);
+
+	if (!count)
+		return;
+
+	if (mod_hash_remove(bsp->dv_hash, dv->dv_devi,
+	    (mod_hash_val_t *)&dvbusy))
+		dvbusy = count;
+	else
+		dvbusy += count;
+
+	if (mod_hash_insert(bsp->dv_hash, dv->dv_devi,
+	    (mod_hash_val_t)dvbusy)) {
+		cmn_err(CE_WARN, "%s: dv_hash insert failed: dip=0x%p, "
+		    "dvbusy=%lu", "e_ddi_branch_referenced",
+		    (void *)dv->dv_devi, dvbusy);
+	}
+
+	bsp->dv_total += count;
+}
+
+/*
+ * Returns reference count on success or -1 on failure.
+ */
+int
+e_ddi_branch_referenced(
+	dev_info_t *rdip,
+	int (*callback)(dev_info_t *dip, void *arg, uint_t ref),
+	void *arg)
+{
+	int circ;
+	char *path;
+	dev_info_t *pdip;
+	struct devi_busy bsa = {0};
+
+	ASSERT(rdip);
+
+	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+	ndi_hold_devi(rdip);
+
+	pdip = ddi_get_parent(rdip);
+
+	ASSERT(pdip);
+
+	/*
+	 * Check if caller holds pdip busy - can cause deadlocks during
+	 * devfs_walk()
+	 */
+	if (!e_ddi_branch_held(rdip) || DEVI_BUSY_OWNED(pdip)) {
+		cmn_err(CE_WARN, "e_ddi_branch_referenced: failed: "
+		    "devinfo branch(%p) not held or parent busy held",
+		    (void *)rdip);
+		ndi_rele_devi(rdip);
+		kmem_free(path, MAXPATHLEN);
+		return (-1);
+	}
+
+	ndi_devi_enter(pdip, &circ);
+	(void) ddi_pathname(rdip, path);
+	ndi_devi_exit(pdip, circ);
+
+	bsa.dv_hash = mod_hash_create_ptrhash("dv_node busy hash", NUMCHAINS,
+	    mod_hash_null_valdtor, sizeof (struct dev_info));
+
+	bsa.s_hash = mod_hash_create_ptrhash("snode busy hash", NUMCHAINS,
+	    mod_hash_null_valdtor, sizeof (struct snode));
+
+	if (devfs_walk(path, visit_dvnode, &bsa)) {
+		cmn_err(CE_WARN, "e_ddi_branch_referenced: "
+		    "devfs walk failed for: %s", path);
+		kmem_free(path, MAXPATHLEN);
+		bsa.s_total = bsa.dv_total = -1;
+		goto out;
+	}
+
+	kmem_free(path, MAXPATHLEN);
+
+	/*
+	 * Walk the snode table to detect device opens, which are currently
+	 * maintained on specfs common snodes.
+	 */
+	spec_snode_walk(visit_snode, &bsa);
+
+	if (callback == NULL)
+		goto out;
+
+	bsa.callback = callback;
+	bsa.arg = arg;
+
+	if (visit_dip(rdip, &bsa) == DDI_WALK_CONTINUE) {
+		ndi_devi_enter(rdip, &circ);
+		ddi_walk_devs(ddi_get_child(rdip), visit_dip, &bsa);
+		ndi_devi_exit(rdip, circ);
+	}
+
+out:
+	ndi_rele_devi(rdip);
+	mod_hash_destroy_ptrhash(bsa.s_hash);
+	mod_hash_destroy_ptrhash(bsa.dv_hash);
+	return (bsa.s_total > bsa.dv_total ? bsa.s_total : bsa.dv_total);
+}
diff --git a/usr/src/uts/common/sys/esunddi.h b/usr/src/uts/common/sys/esunddi.h
index 3cde0b2b14..e2f860d520 100644
--- a/usr/src/uts/common/sys/esunddi.h
+++ b/usr/src/uts/common/sys/esunddi.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -26,9 +25,6 @@
 
 #ifndef	_SYS_ESUNDDI_H
 #define	_SYS_ESUNDDI_H
-
-#pragma ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 */
-
 #include <sys/sunddi.h>
 #include <sys/proc.h>
 #include <sys/autoconf.h>
@@ -214,6 +210,41 @@ dev_info_t *
 e_ddi_nodeid_to_dip(pnode_t nodeid);
 
 /*
+ * Defines for DR interfaces
+ */
+#define	DEVI_BRANCH_CHILD	0x01	/* Walk immediate children of root  */
+#define	DEVI_BRANCH_CONFIGURE	0x02	/* Configure branch after create    */
+#define	DEVI_BRANCH_DESTROY	0x04	/* Destroy branch after unconfigure */
+#define	DEVI_BRANCH_EVENT	0x08	/* Post NDI event		    */
+#define	DEVI_BRANCH_PROM	0x10	/* Branches derived from PROM nodes */
+#define	DEVI_BRANCH_SID		0x20	/* SID node branches		    */
+#define	DEVI_BRANCH_ROOT	0x40	/* Node is the root of a branch	    */
+
+typedef struct devi_branch {
+	void		*arg;
+	void		(*devi_branch_callback)(dev_info_t *, void *, uint_t);
+	int		type;
+	union {
+		int	(*prom_branch_select)(pnode_t, void *, uint_t);
+		int	(*sid_branch_create)(dev_info_t *, void *, uint_t);
+	} create;
+} devi_branch_t;
+
+extern int e_ddi_branch_create(dev_info_t *pdip, devi_branch_t *bp,
+    dev_info_t **dipp, uint_t flags);
+extern int e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp,
+    uint_t flags);
+extern int e_ddi_branch_unconfigure(dev_info_t *rdip, dev_info_t **dipp,
+    uint_t flags);
+extern int e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp,
+    uint_t flags);
+extern void e_ddi_branch_hold(dev_info_t *rdip);
+extern void e_ddi_branch_rele(dev_info_t *rdip);
+extern int e_ddi_branch_held(dev_info_t *rdip);
+extern int e_ddi_branch_referenced(dev_info_t *rdip,
+    int (*cb)(dev_info_t *dip, void *, uint_t), void *arg);
+
+/*
  * Obsolete interfaces, no longer used, to be removed.
  * Retained only for driver compatibility.
  */
diff --git a/usr/src/uts/common/sys/pci_impl.h b/usr/src/uts/common/sys/pci_impl.h
index d485800b96..429d6860bd 100644
--- a/usr/src/uts/common/sys/pci_impl.h
+++ b/usr/src/uts/common/sys/pci_impl.h
@@ -116,6 +116,9 @@ extern struct pci_bus_resource *pci_bus_res;
 /*
  * For now, x86-only to avoid conflicts with <sys/memlist_impl.h>
  */
+#define	memlist_find		memlist_find_pci
+#define	memlist_insert		memlist_insert_pci
+
 extern struct memlist *memlist_alloc(void);
 extern void memlist_free(struct memlist *);
 extern void memlist_free_all(struct memlist **);
diff --git a/usr/src/uts/common/sys/processor.h b/usr/src/uts/common/sys/processor.h
index 3a76c8c9b4..c0fe6e21b8 100644
--- a/usr/src/uts/common/sys/processor.h
+++ b/usr/src/uts/common/sys/processor.h
@@ -32,8 +32,6 @@
 #ifndef _SYS_PROCESSOR_H
 #define	_SYS_PROCESSOR_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/types.h>
 #include <sys/procset.h>
 
@@ -140,6 +138,7 @@ extern lgrpid_t gethomelgroup();
  * Internal interface prototypes
  */
 extern int	p_online_internal(processorid_t, int, int *);
+extern int	p_online_internal_locked(processorid_t, int, int *);
 
 #endif /* !_KERNEL */
 
diff --git a/usr/src/uts/common/syscall/p_online.c b/usr/src/uts/common/syscall/p_online.c
index 004627569c..88a0340bf7 100644
--- a/usr/src/uts/common/syscall/p_online.c
+++ b/usr/src/uts/common/syscall/p_online.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -24,8 +23,6 @@
  * Use is subject to license terms.
  */
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/var.h>
@@ -49,7 +46,7 @@
  *                  P_FAULTED
  */
 int
-p_online_internal(processorid_t cpun, int new_status, int *old_status)
+p_online_internal_locked(processorid_t cpun, int new_status, int *old_status)
 {
 	cpu_t	*cp;
 	int	status;
@@ -59,7 +56,7 @@ p_online_internal(processorid_t cpun, int new_status, int *old_status)
 	/*
 	 * Try to get a pointer to the requested CPU structure.
 	 */
-	mutex_enter(&cpu_lock);		/* protects CPU states */
+	ASSERT(MUTEX_HELD(&cpu_lock));
 	if ((cp = cpu_get(cpun)) == NULL) {
 		error = EINVAL;
 		goto out;
@@ -219,10 +216,21 @@ p_online_internal(processorid_t cpun, int new_status, int *old_status)
 		break;
 	}
 out:
-	mutex_exit(&cpu_lock);
 	return (error);
 }
 
+int
+p_online_internal(processorid_t cpun, int new_status, int *old_status)
+{
+	int rc;
+
+	mutex_enter(&cpu_lock);		/* protects CPU states */
+	rc = p_online_internal_locked(cpun, new_status, old_status);
+	mutex_exit(&cpu_lock);
+
+	return (rc);
+}
+
 /*
  * p_online(2) - get/change processor operational status.
  *
diff --git a/usr/src/uts/common/vm/page.h b/usr/src/uts/common/vm/page.h
index 8739e8a366..298b8d4415 100644
--- a/usr/src/uts/common/vm/page.h
+++ b/usr/src/uts/common/vm/page.h
@@ -1085,8 +1085,8 @@ struct memseg {
 	page_t *pages, *epages;		/* [from, to] in page array */
 	pfn_t pages_base, pages_end;	/* [from, to] in page numbers */
 	struct memseg *next;		/* next segment in list */
-#if defined(__sparc)
 	struct memseg *lnext;		/* next segment in deleted list */
+#if defined(__sparc)
 	uint64_t pagespa, epagespa;	/* [from, to] page array physical */
 	uint64_t nextpa;		/* physical next pointer */
 	pfn_t	kpm_pbase;		/* start of kpm range */
@@ -1096,8 +1096,8 @@ struct memseg {
 		kpm_spage_t *kpm_spgs;	/* ptr to kpm_spage array */
 	} mseg_un;
 	uint64_t kpm_pagespa;		/* physical ptr to kpm (s)pages array */
-	uint_t msegflags;		/* memseg flags */
 #endif /* __sparc */
+	uint_t msegflags;		/* memseg flags */
 };
 
 /* memseg union aliases */
diff --git a/usr/src/uts/common/vm/seg_kpm.h b/usr/src/uts/common/vm/seg_kpm.h
index 0b766bbaf4..b61980df01 100644
--- a/usr/src/uts/common/vm/seg_kpm.h
+++ b/usr/src/uts/common/vm/seg_kpm.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -27,8 +26,6 @@
 #ifndef	_VM_SEG_KPM_H
 #define	_VM_SEG_KPM_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #ifdef	__cplusplus
 extern "C" {
 #endif
@@ -85,8 +82,13 @@ extern int	segmap_kpm;
 #define	IS_KPM_ADDR(addr) \
 	((addr) >= segkpm->s_base && (addr) < (segkpm->s_base + segkpm->s_size))
 
+#ifdef	__x86
+/* x86 systems use neither kpm_page_t nor kpm_spage_t when supporting kpm. */
+#define	KPMPAGE_T_SZ	(0)
+#else	/* __x86 */
 #define	KPMPAGE_T_SZ \
 	((kpm_smallpages == 0) ? sizeof (kpm_page_t) : sizeof (kpm_spage_t))
+#endif	/* __x86 */
 
 #else	/* SEGKPM_SUPPORT */
 
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 8a1cf4b2b7..eeaaa379ab 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -80,7 +80,10 @@ CORE_OBJS +=			\
 	mach_kdi.o		\
 	mach_sysconfig.o	\
 	machdep.o		\
+	mem_config.o		\
 	mem_config_stubs.o	\
+	mem_config_arch.o	\
+	memlist_new.o		\
 	memnode.o		\
 	microcode.o		\
 	microfind.o		\
@@ -187,7 +190,7 @@ PCPLUSMP_OBJS += apic.o apic_regops.o psm_common.o apic_introp.o	\
 			mp_platform_common.o hpet_acpi.o
 
 ACPI_DRV_OBJS	+= acpi_drv.o acpi_video.o
-ACPINEX_OBJS	+= acpinex_drv.o
+ACPINEX_OBJS	+= acpinex_drv.o acpinex_event.o
 
 CPUDRV_OBJS	+= \
 	cpudrv.o \
@@ -200,10 +203,16 @@ ACPIDEV_OBJS += acpidev_drv.o \
 	acpidev_scope.o acpidev_device.o \
 	acpidev_container.o \
 	acpidev_cpu.o \
+	acpidev_dr.o \
 	acpidev_memory.o \
+	acpidev_pci.o \
 	acpidev_resource.o \
 	acpidev_util.o
 
+DRMACH_ACPI_OBJS += drmach_acpi.o dr_util.o drmach_err.o
+
+DR_OBJS	+= dr.o dr_cpu.o dr_err.o dr_io.o dr_mem_acpi.o dr_quiesce.o dr_util.o
+
 ROOTNEX_OBJS += rootnex.o immu.o immu_dmar.o immu_dvma.o \
 		immu_intrmap.o immu_qinv.o immu_regs.o
 
diff --git a/usr/src/uts/i86pc/Makefile.i86pc.shared b/usr/src/uts/i86pc/Makefile.i86pc.shared
index 89b47413f3..a8fbb83fdf 100644
--- a/usr/src/uts/i86pc/Makefile.i86pc.shared
+++ b/usr/src/uts/i86pc/Makefile.i86pc.shared
@@ -61,6 +61,9 @@ LINTS_DIR	 = $(OBJS_DIR)
 LINT_LIB_DIR	 = $(UTSBASE)/$(PLATFORM)/lint-libs/$(OBJS_DIR)
 GEN_LINT_LIB_DIR = $(UTSBASE)/intel/lint-libs/$(OBJS_DIR)
 
+LINT32_DIRS	 = $(LINT32_BUILDS:%=$(UTSBASE)/$(PLATFORM)/lint-libs/%)
+LINT32_FILES	 = $(LINT32_DIRS:%=%/llib-l$(MODULE).ln)
+
 DTRACESTUBS_O	 = $(OBJS_DIR)/dtracestubs.o
 DTRACESTUBS	 = $(OBJS_DIR)/libdtracestubs.so
 
@@ -256,6 +259,7 @@ DRV_KMODS	+= tzmon
 DRV_KMODS	+= acpi_drv 
 DRV_KMODS	+= acpinex
 DRV_KMODS	+= amd_iommu
+DRV_KMODS	+= dr
 DRV_KMODS	+= ioat
 DRV_KMODS	+= fipe
 
@@ -311,6 +315,7 @@ SYS_KMODS	+=
 #
 MISC_KMODS	+= gfx_private pcie
 MISC_KMODS	+= acpidev
+MISC_KMODS	+= drmach_acpi
 
 #
 #	'Dacf' modules (/kernel/dacf)
diff --git a/usr/src/uts/i86pc/Makefile.rules b/usr/src/uts/i86pc/Makefile.rules
index ab82a3537f..592250db83 100644
--- a/usr/src/uts/i86pc/Makefile.rules
+++ b/usr/src/uts/i86pc/Makefile.rules
@@ -79,10 +79,42 @@ $(OBJS_DIR)/%.o:		$(UTSBASE)/i86pc/io/acpi/acpinex/%.c
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
 
+SBD_IOCTL	= $(UTSBASE)/i86pc/sys/sbd_ioctl.h
+DRMACH_IO	= $(UTSBASE)/i86pc/io/acpi/drmach_acpi
+DRMACH_GENERR	= $(DRMACH_IO)/sbdgenerr
+DR_IO		= $(UTSBASE)/i86pc/io/dr
+DR_GENERR	= $(DR_IO)/sbdgenerr
+
+$(DRMACH_GENERR):	$(DR_IO)/sbdgenerr.pl
+	$(RM) $@
+	$(CAT) $(DR_IO)/sbdgenerr.pl > $@
+	$(CHMOD) +x $@
+
+$(DRMACH_IO)/drmach_err.c:	$(DRMACH_GENERR) $(SBD_IOCTL)
+	$(RM) $@
+	$(DRMACH_GENERR) EX86 < $(SBD_IOCTL) > $(DRMACH_IO)/drmach_err.c
+
+$(OBJS_DIR)/%.o:		$(UTSBASE)/i86pc/io/acpi/drmach_acpi/%.c
+	$(COMPILE.c) -o $@ $<
+	$(CTFCONVERT_O)
+
 $(OBJS_DIR)/%.o:		$(UTSBASE)/i86pc/io/amd_iommu/%.c
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
 
+$(DR_GENERR):			$(DR_IO)/sbdgenerr.pl
+	$(RM) $@
+	$(CAT) $(DR_IO)/sbdgenerr.pl > $@
+	$(CHMOD) +x $@
+
+$(DR_IO)/dr_err.c:		$(DR_GENERR) $(SBD_IOCTL)
+	$(RM) $@
+	$(DR_GENERR) ESBD < $(SBD_IOCTL) > $(DR_IO)/dr_err.c
+
+$(OBJS_DIR)/%.o:		$(UTSBASE)/i86pc/io/dr/%.c
+	$(COMPILE.c) -o $@ $<
+	$(CTFCONVERT_O)
+
 $(OBJS_DIR)/%.o:		$(UTSBASE)/i86pc/io/ioat/%.c
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
@@ -307,9 +339,15 @@ $(LINTS_DIR)/%.ln:		$(UTSBASE)/i86pc/io/acpi/acpidev/%.c
 $(LINTS_DIR)/%.ln:		$(UTSBASE)/i86pc/io/acpi/acpinex/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))
 
+$(LINTS_DIR)/%.ln:		$(UTSBASE)/i86pc/io/acpi/drmach_acpi/%.c
+	@($(LHEAD) $(LINT.c) $< $(LTAIL))
+
 $(LINTS_DIR)/%.ln:              $(UTSBASE)/i86pc/io/amd_iommu/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))
 
+$(LINTS_DIR)/%.ln:		$(UTSBASE)/i86pc/io/dr/%.c
+	@($(LHEAD) $(LINT.c) $< $(LTAIL))
+
 $(LINTS_DIR)/%.ln:		$(UTSBASE)/i86pc/io/ioat/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))
 
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h
index f5d852b01a..c520e06be7 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu.h
@@ -189,6 +189,7 @@ struct gcpu_chipshared {
 	kmutex_t gcpus_cfglock;		/* serial MCA config from chip cores */
 	kmutex_t gcpus_poll_lock;	/* serialize pollers on the same chip */
 	uint32_t gcpus_actv_banks;	/* MCA bank numbers active on chip */
+	volatile uint32_t gcpus_actv_cnt; /* active cpu count in this chip */
 };
 
 struct gcpu_data {
@@ -205,11 +206,13 @@ struct regs;
  * CMI implementation
  */
 extern int gcpu_init(cmi_hdl_t, void **);
+extern void gcpu_fini(cmi_hdl_t);
 extern void gcpu_post_startup(cmi_hdl_t);
 extern void gcpu_post_mpstartup(cmi_hdl_t);
 extern void gcpu_faulted_enter(cmi_hdl_t);
 extern void gcpu_faulted_exit(cmi_hdl_t);
 extern void gcpu_mca_init(cmi_hdl_t);
+extern void gcpu_mca_fini(cmi_hdl_t hdl);
 extern cmi_errno_t gcpu_msrinject(cmi_hdl_t, cmi_mca_regs_t *, uint_t, int);
 #ifndef __xpv
 extern uint64_t gcpu_mca_trap(cmi_hdl_t, struct regs *);
@@ -228,6 +231,7 @@ extern int cmi_enable_cmci;
  * Local functions
  */
 extern void gcpu_mca_poll_init(cmi_hdl_t);
+extern void gcpu_mca_poll_fini(cmi_hdl_t);
 extern void gcpu_mca_poll_start(cmi_hdl_t);
 extern void gcpu_poll_trace_init(gcpu_poll_trace_ctl_t *);
 extern void gcpu_poll_trace(gcpu_poll_trace_ctl_t *, uint8_t, uint8_t);
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c
index db4a695b7d..761d942f27 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_main.c
@@ -23,6 +23,10 @@
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 /*
  * Generic x86 CPU Module
@@ -78,22 +82,52 @@ gcpu_init(cmi_hdl_t hdl, void **datap)
 	 */
 	if ((sp = gcpu_shared[chipid]) == NULL) {
 		sp = kmem_zalloc(sizeof (struct gcpu_chipshared), KM_SLEEP);
+		mutex_init(&sp->gcpus_poll_lock, NULL, MUTEX_DRIVER, NULL);
+		mutex_init(&sp->gcpus_cfglock, NULL, MUTEX_DRIVER, NULL);
 		osp = atomic_cas_ptr(&gcpu_shared[chipid], NULL, sp);
-		if (osp == NULL) {
-			mutex_init(&sp->gcpus_poll_lock, NULL, MUTEX_DRIVER,
-			    NULL);
-			mutex_init(&sp->gcpus_cfglock, NULL, MUTEX_DRIVER,
-			    NULL);
-		} else {
+		if (osp != NULL) {
+			mutex_destroy(&sp->gcpus_cfglock);
+			mutex_destroy(&sp->gcpus_poll_lock);
 			kmem_free(sp, sizeof (struct gcpu_chipshared));
 			sp = osp;
 		}
 	}
+
+	atomic_inc_32(&sp->gcpus_actv_cnt);
 	gcpu->gcpu_shared = sp;
 
 	return (0);
 }
 
+/*
+ * deconfigure gcpu_init()
+ */
+void
+gcpu_fini(cmi_hdl_t hdl)
+{
+	uint_t chipid = cmi_hdl_chipid(hdl);
+	gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
+	struct gcpu_chipshared *sp;
+
+	if (gcpu_disable || chipid >= GCPU_MAX_CHIPID)
+		return;
+
+	gcpu_mca_fini(hdl);
+
+	/*
+	 * Keep shared data in cache for reuse.
+	 */
+	sp = gcpu_shared[chipid];
+	ASSERT(sp != NULL);
+	atomic_dec_32(&sp->gcpus_actv_cnt);
+
+	if (gcpu != NULL)
+		kmem_free(gcpu, sizeof (gcpu_data_t));
+
+	/* Release reference count held in gcpu_init(). */
+	cmi_hdl_rele(hdl);
+}
+
 void
 gcpu_post_startup(cmi_hdl_t hdl)
 {
@@ -150,7 +184,7 @@ const cmi_ops_t _cmi_ops = {
 	GCPU_OP(gcpu_cmci_trap, NULL),		/* cmi_cmci_trap */
 	gcpu_msrinject,				/* cmi_msrinject */
 	GCPU_OP(gcpu_hdl_poke, NULL),		/* cmi_hdl_poke */
-	NULL,					/* cmi_fini */
+	gcpu_fini,				/* cmi_fini */
 	GCPU_OP(NULL, gcpu_xpv_panic_callback),	/* cmi_panic_callback */
 };
 
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
index 911a7c6af7..730411ebf1 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_mca.c
@@ -23,6 +23,10 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 #include <sys/mca_x86.h>
 #include <sys/cpu_module_impl.h>
@@ -1064,7 +1068,7 @@ gcpu_mca_init(cmi_hdl_t hdl)
 	/*
 	 * CPU startup code only calls cmi_mca_init if x86_feature indicates
 	 * both MCA and MCE support (i.e., X86_MCA).  P5, K6, and earlier
-	 * processors, which have their own * more primitive way of doing
+	 * processors, which have their own more primitive way of doing
 	 * machine checks, will not have cmi_mca_init called since their
 	 * CPUID information will not indicate both MCA and MCE features.
 	 */
@@ -2002,3 +2006,50 @@ gcpu_msrinject(cmi_hdl_t hdl, cmi_mca_regs_t *regs, uint_t nregs,
 
 	return (errs == 0 ? CMI_SUCCESS : CMIERR_UNKNOWN);
 }
+
+/* deconfigure gcpu_mca_init() */
+void
+gcpu_mca_fini(cmi_hdl_t hdl)
+{
+	gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
+	gcpu_mca_t *mca = &gcpu->gcpu_mca;
+	int i;
+
+	/*
+	 * CPU startup code only calls cmi_mca_init if x86_feature indicates
+	 * both MCA and MCE support (i.e., X86_MCA).  P5, K6, and earlier
+	 * processors, which have their own more primitive way of doing
+	 * machine checks, will not have cmi_mca_init called since their
+	 * CPUID information will not indicate both MCA and MCE features.
+	 */
+	if ((x86_feature & X86_MCA) == 0)
+		return;
+#ifndef __xpv
+	/*
+	 * disable machine check in CR4
+	 */
+	cmi_ntv_hwdisable_mce(hdl);
+#endif
+	mutex_enter(&gcpu->gcpu_shared->gcpus_cfglock);
+	gcpu_mca_poll_fini(hdl);
+	mutex_exit(&gcpu->gcpu_shared->gcpus_cfglock);
+
+	/*
+	 * free resources allocated during init
+	 */
+	if (mca->gcpu_bank_cmci != NULL) {
+		kmem_free(mca->gcpu_bank_cmci, sizeof (gcpu_mca_cmci_t) *
+		    mca->gcpu_mca_nbanks);
+	}
+
+	for (i = 0; i < GCPU_MCA_LOGOUT_NUM; i++) {
+		if (mca->gcpu_mca_logout[i] != NULL) {
+			kmem_free(mca->gcpu_mca_logout[i], mca->gcpu_mca_lgsz);
+		}
+	}
+
+	if (mca->gcpu_mca_bioscfg.bios_bankcfg != NULL) {
+		kmem_free(mca->gcpu_mca_bioscfg.bios_bankcfg,
+		    sizeof (struct gcpu_bios_bankcfg) * mca->gcpu_mca_nbanks);
+	}
+}
diff --git a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c
index f68f96259c..361d147b56 100644
--- a/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c
+++ b/usr/src/uts/i86pc/cpu/generic_cpu/gcpu_poll_ntv.c
@@ -23,6 +23,10 @@
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 /*
  * Native MCA polling.  We establish an ommipresent cyclic to fire on all
@@ -31,6 +35,7 @@
  */
 
 #include <sys/types.h>
+#include <sys/atomic.h>
 #include <sys/cyclic.h>
 #include <sys/x86_archext.h>
 #include <sys/mca_x86.h>
@@ -39,7 +44,9 @@
 
 hrtime_t gcpu_mca_poll_interval = NANOSEC * 10ULL;	/* tuneable */
 static cyclic_id_t gcpu_mca_poll_cycid;
-static int gcpu_mca_poll_inits;
+static volatile uint_t gcpu_mca_poll_inits;
+extern int gcpu_poll_trace_always;
+extern uint_t gcpu_poll_trace_nent;
 
 /*
  * Return nonzero of the given handle should poll the MCH.  We stick with
@@ -280,7 +287,24 @@ gcpu_mca_poll_init(cmi_hdl_t hdl)
 
 	gcpu_poll_trace_init(ptc);
 
-	gcpu_mca_poll_inits++;
+	atomic_inc_uint(&gcpu_mca_poll_inits);
+}
+
+/* deconfigure gcpu_mca_poll_init() */
+void
+gcpu_mca_poll_fini(cmi_hdl_t hdl)
+{
+	gcpu_data_t *gcpu = cmi_hdl_getcmidata(hdl);
+	gcpu_poll_trace_ctl_t *ptc = &gcpu->gcpu_mca.gcpu_polltrace;
+
+	ASSERT(cmi_hdl_class(hdl) == CMI_HDL_NATIVE);
+
+	if (gcpu_poll_trace_always && (ptc->mptc_tbufs != NULL)) {
+		kmem_free(ptc->mptc_tbufs, sizeof (gcpu_poll_trace_t) *
+		    gcpu_poll_trace_nent);
+	}
+
+	atomic_dec_uint(&gcpu_mca_poll_inits);
 }
 
 void
diff --git a/usr/src/uts/i86pc/dr/Makefile b/usr/src/uts/i86pc/dr/Makefile
new file mode 100644
index 0000000000..03aa5f615e
--- /dev/null
+++ b/usr/src/uts/i86pc/dr/Makefile
@@ -0,0 +1,104 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+#	This makefile drives the production of the dr driver module.
+#
+#	i86pc architecture dependent
+#
+
+#
+#	Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+PLATFORM = i86pc
+
+#
+#	Define the module and object file sets.
+#
+MODULE		= dr
+OBJECTS		= $(DR_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(DR_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(ROOT_PSM_DRV_DIR)/$(MODULE)
+CONF_SRCDIR	= $(UTSBASE)/i86pc/io/dr
+
+#
+#	Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+
+#
+#	Define targets
+#
+ALL_TARGET	= $(BINARY) $(SRC_CONFILE)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+#	Overrides
+#
+DEF_BUILDS	= $(DEF_BUILDS64)
+ALL_BUILDS	= $(ALL_BUILDS64)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# module dependencies
+#
+LDFLAGS += -dy -Nmisc/drmach_acpi
+
+CLEANFILES +=	$(DR_GENERR)
+CLEANFILES +=	$(DR_IO)/dr_err.c
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS) lint32
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS) $(CONF_INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
+
diff --git a/usr/src/uts/i86pc/drmach_acpi/Makefile b/usr/src/uts/i86pc/drmach_acpi/Makefile
new file mode 100644
index 0000000000..24f9197b32
--- /dev/null
+++ b/usr/src/uts/i86pc/drmach_acpi/Makefile
@@ -0,0 +1,101 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2010, Intel Corporation.
+# All rights reserved.
+#
+#	This makefile drives the production of the drmach_acpi loadable module.
+#
+#	i86pc architecture dependent
+#
+
+#
+#	Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE	= ../..
+
+#
+#	Define the module and object file sets.
+#
+MODULE		= drmach_acpi
+OBJECTS		= $(DRMACH_ACPI_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(DRMACH_ACPI_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(ROOT_PSM_MISC_DIR)/$(MODULE)
+
+#
+#	Include common rules.
+#
+include $(UTSBASE)/i86pc/Makefile.i86pc
+
+#
+#	Define targets
+#
+ALL_TARGET	= $(BINARY)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE)
+
+#
+#	Overrides
+#
+DEF_BUILDS	= $(DEF_BUILDS64)
+ALL_BUILDS	= $(ALL_BUILDS64)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# module dependencies
+#
+LDFLAGS += -dy -Nmisc/acpica -Nmisc/acpidev
+
+CLEANFILES +=	$(DRMACH_GENERR)
+CLEANFILES +=	$(DRMACH_IO)/drmach_err.c
+
+#
+#	Default build targets.
+#
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS) lint32
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+#
+#	Include common targets.
+#
+include $(UTSBASE)/i86pc/Makefile.targ
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c
index 33de316493..5c58266634 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_container.c
@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -54,11 +54,13 @@
 
 #include <sys/types.h>
 #include <sys/atomic.h>
+#include <sys/note.h>
 #include <sys/sunddi.h>
 #include <sys/sunndi.h>
 #include <sys/acpi/acpi.h>
 #include <sys/acpica.h>
 #include <sys/acpidev.h>
+#include <sys/acpidev_dr.h>
 #include <sys/acpidev_impl.h>
 
 static ACPI_STATUS acpidev_container_probe(acpidev_walk_info_t *infop);
@@ -124,7 +126,7 @@ static acpidev_filter_rule_t acpidev_container_filters[] = {
 static ACPI_STATUS
 acpidev_container_probe(acpidev_walk_info_t *infop)
 {
-	ACPI_STATUS rc;
+	ACPI_STATUS rc = AE_OK;
 	int flags;
 
 	ASSERT(infop != NULL);
@@ -137,19 +139,35 @@ acpidev_container_probe(acpidev_walk_info_t *infop)
 		return (AE_OK);
 	}
 
-	if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
-		rc = acpidev_process_object(infop, flags);
-	} else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN;
-		rc = acpidev_process_object(infop, flags);
-	} else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
-		rc = acpidev_process_object(infop, flags);
-	} else {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u in "
+	flags = ACPIDEV_PROCESS_FLAG_SCAN;
+	switch (infop->awi_op_type) {
+	case ACPIDEV_OP_BOOT_PROBE:
+		if (acpica_get_devcfg_feature(ACPI_DEVCFG_CONTAINER)) {
+			flags |= ACPIDEV_PROCESS_FLAG_CREATE;
+			acpidev_dr_check(infop);
+		}
+		break;
+
+	case ACPIDEV_OP_BOOT_REPROBE:
+		break;
+
+	case ACPIDEV_OP_HOTPLUG_PROBE:
+		if (acpica_get_devcfg_feature(ACPI_DEVCFG_CONTAINER)) {
+			flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+			    ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+			    ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+		}
+		break;
+
+	default:
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u in "
 		    "acpidev_container_probe().", infop->awi_op_type);
 		rc = AE_BAD_PARAMETER;
+		break;
+	}
+
+	if (rc == AE_OK) {
+		rc = acpidev_process_object(infop, flags);
 	}
 	if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
 		cmn_err(CE_WARN,
@@ -162,11 +180,12 @@ acpidev_container_probe(acpidev_walk_info_t *infop)
 	return (rc);
 }
 
-/*ARGSUSED*/
 static ACPI_STATUS
 acpidev_container_search_dev(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
     void **retval)
 {
+	_NOTE(ARGUNUSED(hdl, retval));
+
 	int *fp = (int *)ctx;
 
 	*fp = lvl;
@@ -221,11 +240,11 @@ acpidev_container_filter_func(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
 	}
 	if (cpu_lvl == 1) {
 		/* CPU as child, most likely a physical CPU. */
-		(void) strncpy(devname, ACPIDEV_NODE_NAME_MODULE_CPU,
+		(void) strlcpy(devname, ACPIDEV_NODE_NAME_MODULE_CPU,
 		    devnamelen);
 	} else if (cpu_lvl == 2 && module_lvl == 1) {
 		/* CPU as grandchild, most likely a system board. */
-		(void) strncpy(devname, ACPIDEV_NODE_NAME_MODULE_SBD,
+		(void) strlcpy(devname, ACPIDEV_NODE_NAME_MODULE_SBD,
 		    devnamelen);
 	} else if (ACPI_FAILURE(AcpiGetName(infop->awi_hdl,
 	    ACPI_SINGLE_NAME, &buf))) {
@@ -233,7 +252,7 @@ acpidev_container_filter_func(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
 		 * Failed to get ACPI object name; use ACPI object name
 		 * as the default name.
 		 */
-		(void) strncpy(devname, ACPIDEV_NODE_NAME_CONTAINER,
+		(void) strlcpy(devname, ACPIDEV_NODE_NAME_CONTAINER,
 		    devnamelen);
 	}
 
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c
index 0576591147..fa93871fb4 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_cpu.c
@@ -24,7 +24,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -43,6 +43,7 @@
 #include <sys/bootconf.h>
 #include <sys/cpuvar.h>
 #include <sys/machsystm.h>
+#include <sys/note.h>
 #include <sys/psm_types.h>
 #include <sys/x86_archext.h>
 #include <sys/sunddi.h>
@@ -70,11 +71,14 @@ static ACPI_STATUS acpidev_cpu_probe(acpidev_walk_info_t *infop);
 static acpidev_filter_result_t acpidev_cpu_filter(acpidev_walk_info_t *infop,
     char *devname, int maxlen);
 static ACPI_STATUS acpidev_cpu_init(acpidev_walk_info_t *infop);
+static void acpidev_cpu_fini(ACPI_HANDLE hdl, acpidev_data_handle_t dhdl,
+    acpidev_class_t *clsp);
 
 static acpidev_filter_result_t acpidev_cpu_filter_func(
     acpidev_walk_info_t *infop, ACPI_HANDLE hdl, acpidev_filter_rule_t *afrp,
     char *devname, int len);
-static int acpidev_cpu_query_dip(cpu_t *, dev_info_t **);
+static int acpidev_cpu_create_dip(cpu_t *, dev_info_t **);
+static int acpidev_cpu_get_dip(cpu_t *, dev_info_t **);
 
 /*
  * Default class driver for ACPI processor/CPU objects.
@@ -91,7 +95,7 @@ acpidev_class_t acpidev_class_cpu = {
 	acpidev_cpu_probe,		/* adc_probe */
 	acpidev_cpu_filter,		/* adc_filter */
 	acpidev_cpu_init,		/* adc_init */
-	NULL,				/* adc_fini */
+	acpidev_cpu_fini,		/* adc_fini */
 };
 
 /*
@@ -133,12 +137,14 @@ static char *acpidev_cpu_uid_formats[] = {
 	"SCK%x-CPU%x",
 };
 
-static ACPI_HANDLE acpidev_cpu_map_hdl = NULL;
-static uint32_t acpidev_cpu_map_count = 0;
-static struct acpidev_cpu_map_item *acpidev_cpu_map = NULL;
+static ACPI_HANDLE acpidev_cpu_map_hdl;
+static uint32_t acpidev_cpu_map_count;
+static struct acpidev_cpu_map_item *acpidev_cpu_map;
 
 extern int (*psm_cpu_create_devinfo)(cpu_t *, dev_info_t **);
-static int (*psm_cpu_create_devinfo_old)(cpu_t *, dev_info_t **) = NULL;
+static int (*psm_cpu_create_devinfo_old)(cpu_t *, dev_info_t **);
+extern int (*psm_cpu_get_devinfo)(cpu_t *, dev_info_t **);
+static int (*psm_cpu_get_devinfo_old)(cpu_t *, dev_info_t **);
 
 /* Count how many enabled CPUs are in the MADT table. */
 static ACPI_STATUS
@@ -200,7 +206,7 @@ acpidev_cpu_parse_MADT(ACPI_SUBTABLE_HEADER *ap, void *context)
 		/* See comment at beginning about 255 limitation. */
 		if (mpx2a->LocalApicId < 255) {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: encountered CPU with X2APIC Id < 255.");
+			    "!acpidev: encountered CPU with X2APIC Id < 255.");
 		} else if (mpx2a->LapicFlags & ACPI_MADT_ENABLED) {
 			ASSERT(*cntp < acpidev_cpu_map_count);
 			acpidev_cpu_map[*cntp].proc_id = mpx2a->Uid;
@@ -270,7 +276,7 @@ acpidev_cpu_query_MAT(ACPI_SUBTABLE_HEADER *ap, void *context)
 			}
 			return (AE_CTRL_TERMINATE);
 		} else {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: encountered CPU "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: encountered CPU "
 			    "with X2APIC Id < 255 in _MAT.");
 		}
 		break;
@@ -292,7 +298,7 @@ acpidev_cpu_query_MAT(ACPI_SUBTABLE_HEADER *ap, void *context)
 		 * x2APIC and x2APIC NMI Structure.
 		 */
 		ACPIDEV_DEBUG(CE_NOTE,
-		    "acpidev: unknown APIC entry type %u in _MAT.", ap->Type);
+		    "!acpidev: unknown APIC entry type %u in _MAT.", ap->Type);
 		break;
 	}
 
@@ -313,7 +319,7 @@ acpidev_cpu_get_procid(acpidev_walk_info_t *infop, uint32_t *idp)
 	if (infop->awi_info->Type != ACPI_TYPE_PROCESSOR &&
 	    infop->awi_info->Type != ACPI_TYPE_DEVICE) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: object %s is not PROCESSOR or DEVICE.",
+		    "!acpidev: object %s is not PROCESSOR or DEVICE.",
 		    infop->awi_name);
 		return (AE_BAD_PARAMETER);
 	}
@@ -345,7 +351,7 @@ acpidev_cpu_get_procid(acpidev_walk_info_t *infop, uint32_t *idp)
 			return (AE_OK);
 		} else {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: failed to evaluate ACPI object %s.",
+			    "!acpidev: failed to evaluate ACPI object %s.",
 			    infop->awi_name);
 		}
 	}
@@ -365,6 +371,59 @@ acpidev_cpu_get_procid(acpidev_walk_info_t *infop, uint32_t *idp)
 }
 
 static ACPI_STATUS
+acpidev_cpu_get_proximity_id(ACPI_HANDLE hdl, uint32_t apicid, uint32_t *pxmidp)
+{
+	int len, off;
+	ACPI_SUBTABLE_HEADER *sp;
+	ACPI_SRAT_CPU_AFFINITY *xp;
+	ACPI_SRAT_X2APIC_CPU_AFFINITY *x2p;
+
+	ASSERT(hdl != NULL);
+	ASSERT(pxmidp != NULL);
+	*pxmidp = UINT32_MAX;
+
+	if (ACPI_SUCCESS(acpidev_eval_pxm(hdl, pxmidp))) {
+		return (AE_OK);
+	}
+	if (acpidev_srat_tbl_ptr == NULL) {
+		return (AE_NOT_FOUND);
+	}
+
+	/* Search the static ACPI SRAT table for proximity domain id. */
+	sp = (ACPI_SUBTABLE_HEADER *)(acpidev_srat_tbl_ptr + 1);
+	len = acpidev_srat_tbl_ptr->Header.Length;
+	off = sizeof (*acpidev_srat_tbl_ptr);
+	while (off < len) {
+		switch (sp->Type) {
+		case ACPI_SRAT_TYPE_CPU_AFFINITY:
+			xp = (ACPI_SRAT_CPU_AFFINITY *)sp;
+			if ((xp->Flags & ACPI_SRAT_CPU_ENABLED) &&
+			    xp->ApicId == apicid) {
+				*pxmidp = xp->ProximityDomainLo;
+				*pxmidp |= xp->ProximityDomainHi[0] << 8;
+				*pxmidp |= xp->ProximityDomainHi[1] << 16;
+				*pxmidp |= xp->ProximityDomainHi[2] << 24;
+				return (AE_OK);
+			}
+			break;
+
+		case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
+			x2p = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)sp;
+			if ((x2p->Flags & ACPI_SRAT_CPU_ENABLED) &&
+			    x2p->ApicId == apicid) {
+				*pxmidp = x2p->ProximityDomain;
+				return (AE_OK);
+			}
+			break;
+		}
+		off += sp->Length;
+		sp = (ACPI_SUBTABLE_HEADER *)(((char *)sp) + sp->Length);
+	}
+
+	return (AE_NOT_FOUND);
+}
+
+static ACPI_STATUS
 acpidev_cpu_pre_probe(acpidev_walk_info_t *infop)
 {
 	uint32_t count = 0;
@@ -373,6 +432,7 @@ acpidev_cpu_pre_probe(acpidev_walk_info_t *infop)
 	ASSERT(infop != NULL);
 	if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE &&
 	    acpidev_cpu_map_hdl == NULL) {
+		/* Parse CPU relative information in the ACPI MADT table. */
 		(void) acpidev_walk_apic(NULL, NULL, NULL,
 		    acpidev_cpu_count_MADT, &acpidev_cpu_map_count);
 		acpidev_cpu_map = kmem_zalloc(sizeof (acpidev_cpu_map[0])
@@ -381,6 +441,12 @@ acpidev_cpu_pre_probe(acpidev_walk_info_t *infop)
 		    acpidev_cpu_parse_MADT, &count);
 		ASSERT(count == acpidev_cpu_map_count);
 		acpidev_cpu_map_hdl = infop->awi_hdl;
+
+		/* Cache pointer to the ACPI SRAT table. */
+		if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_SRAT, 1,
+		    (ACPI_TABLE_HEADER **)&acpidev_srat_tbl_ptr))) {
+			acpidev_srat_tbl_ptr = NULL;
+		}
 	}
 
 	return (AE_OK);
@@ -404,7 +470,9 @@ acpidev_cpu_post_probe(acpidev_walk_info_t *infop)
 
 		/* replace psm_cpu_create_devinfo with local implementation. */
 		psm_cpu_create_devinfo_old = psm_cpu_create_devinfo;
-		psm_cpu_create_devinfo = acpidev_cpu_query_dip;
+		psm_cpu_create_devinfo = acpidev_cpu_create_dip;
+		psm_cpu_get_devinfo_old = psm_cpu_get_devinfo;
+		psm_cpu_get_devinfo = acpidev_cpu_get_dip;
 	}
 
 	return (AE_OK);
@@ -427,27 +495,40 @@ acpidev_cpu_probe(acpidev_walk_info_t *infop)
 		return (AE_OK);
 	}
 
-	/*
-	 * Mark device as offline. It will be changed to online state
-	 * when the corresponding CPU starts up.
-	 */
-	if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN |
-		    ACPIDEV_PROCESS_FLAG_CREATE |
-		    ACPIDEV_PROCESS_FLAG_OFFLINE;
-		rc = acpidev_process_object(infop, flags);
-	} else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN;
-		rc = acpidev_process_object(infop, flags);
-	} else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN |
-		    ACPIDEV_PROCESS_FLAG_CREATE |
-		    ACPIDEV_PROCESS_FLAG_OFFLINE;
-		rc = acpidev_process_object(infop, flags);
-	} else {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u in "
+	flags = ACPIDEV_PROCESS_FLAG_SCAN;
+	switch (infop->awi_op_type) {
+	case  ACPIDEV_OP_BOOT_PROBE:
+		/*
+		 * Mark device as offline. It will be changed to online state
+		 * when the corresponding CPU starts up.
+		 */
+		if (acpica_get_devcfg_feature(ACPI_DEVCFG_CPU)) {
+			flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+			    ACPIDEV_PROCESS_FLAG_OFFLINE;
+		}
+		break;
+
+	case ACPIDEV_OP_BOOT_REPROBE:
+		break;
+
+	case ACPIDEV_OP_HOTPLUG_PROBE:
+		if (acpica_get_devcfg_feature(ACPI_DEVCFG_CPU)) {
+			flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+			    ACPIDEV_PROCESS_FLAG_OFFLINE |
+			    ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+			    ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+		}
+		break;
+
+	default:
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u in "
 		    "acpidev_cpu_probe().", infop->awi_op_type);
 		rc = AE_BAD_PARAMETER;
+		break;
+	}
+
+	if (rc == AE_OK) {
+		rc = acpidev_process_object(infop, flags);
 	}
 	if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
 		cmn_err(CE_WARN,
@@ -474,12 +555,12 @@ acpidev_cpu_filter_func(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
 
 		if (acpidev_cpu_get_procid(infop, &procid) != 0) {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: failed to query processor id for %s.",
+			    "!acpidev: failed to query processor id for %s.",
 			    infop->awi_name);
 			return (ACPIDEV_FILTER_SKIP);
 		} else if (acpidev_cpu_get_apicid(procid, &apicid) != 0) {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: failed to query apic id for %s.",
+			    "!acpidev: failed to query apic id for %s.",
 			    infop->awi_name);
 			return (ACPIDEV_FILTER_SKIP);
 		}
@@ -499,7 +580,7 @@ acpidev_cpu_filter_func(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
 			return (ACPIDEV_FILTER_SKIP);
 		} else if (!mat.enabled) {
 			ACPIDEV_DEBUG(CE_NOTE,
-			    "acpidev: CPU %s has been disabled.",
+			    "!acpidev: CPU %s has been disabled.",
 			    infop->awi_name);
 			return (ACPIDEV_FILTER_SKIP);
 		}
@@ -536,6 +617,7 @@ static ACPI_STATUS
 acpidev_cpu_init(acpidev_walk_info_t *infop)
 {
 	int count;
+	uint32_t pxmid;
 	dev_info_t *dip;
 	ACPI_HANDLE hdl;
 	char unitaddr[64];
@@ -550,7 +632,7 @@ acpidev_cpu_init(acpidev_walk_info_t *infop)
 	dip = infop->awi_dip;
 	hdl = infop->awi_hdl;
 
-	/* Create "apic_id" and "processor_id" properties. */
+	/* Create "apic_id", "processor_id" and "proximity_id" properties. */
 	if (ndi_prop_update_int(DDI_DEV_T_NONE, dip,
 	    ACPIDEV_PROP_NAME_PROCESSOR_ID, infop->awi_scratchpad[0]) !=
 	    NDI_SUCCESS) {
@@ -567,6 +649,15 @@ acpidev_cpu_init(acpidev_walk_info_t *infop)
 		    infop->awi_name);
 		return (AE_ERROR);
 	}
+	if (ACPI_SUCCESS(acpidev_cpu_get_proximity_id(infop->awi_hdl,
+	    infop->awi_scratchpad[1], &pxmid))) {
+		if (ndi_prop_update_int(DDI_DEV_T_NONE, dip,
+		    ACPIDEV_PROP_NAME_PROXIMITY_ID, pxmid) != NDI_SUCCESS) {
+			cmn_err(CE_WARN, "!acpidev: failed to set proximity id "
+			    "property for %s.", infop->awi_name);
+			return (AE_ERROR);
+		}
+	}
 
 	/* Set "compatible" property for CPU dip */
 	count = sizeof (compatible) / sizeof (compatible[0]);
@@ -616,7 +707,7 @@ acpidev_cpu_init(acpidev_walk_info_t *infop)
 		}
 	} else {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: unknown operation type %u in acpidev_cpu_init.",
+		    "!acpidev: unknown operation type %u in acpidev_cpu_init.",
 		    infop->awi_op_type);
 		return (AE_BAD_PARAMETER);
 	}
@@ -624,18 +715,38 @@ acpidev_cpu_init(acpidev_walk_info_t *infop)
 	return (AE_OK);
 }
 
+static void
+acpidev_cpu_fini(ACPI_HANDLE hdl, acpidev_data_handle_t dhdl,
+    acpidev_class_t *clsp)
+{
+	_NOTE(ARGUNUSED(clsp, dhdl));
+
+	int rc;
+	uint32_t procid;
+
+	rc = acpica_get_procid_by_object(hdl, &procid);
+	ASSERT(ACPI_SUCCESS(rc));
+	if (ACPI_SUCCESS(rc)) {
+		rc = acpica_remove_processor_from_map(procid);
+		ASSERT(ACPI_SUCCESS(rc));
+		if (ACPI_FAILURE(rc)) {
+			cmn_err(CE_WARN, "!acpidev: failed to remove "
+			    "processor from ACPICA.");
+		}
+	}
+}
+
+/*
+ * Lookup the dip for a CPU if ACPI CPU autoconfig is enabled.
+ */
 static int
-acpidev_cpu_query_dip(cpu_t *cp, dev_info_t **dipp)
+acpidev_cpu_lookup_dip(cpu_t *cp, dev_info_t **dipp)
 {
 	uint32_t apicid;
 	ACPI_HANDLE hdl;
 	dev_info_t *dip = NULL;
 
 	*dipp = NULL;
-	/*
-	 * Try to get the dip associated with the CPU if ACPI_DEVCFG_CPU is
-	 * enabled.
-	 */
 	if (acpica_get_devcfg_feature(ACPI_DEVCFG_CPU)) {
 		apicid = cpuid_get_apicid(cp);
 		if (acpica_get_cpu_object_by_cpuid(cp->cpu_id, &hdl) == 0 ||
@@ -644,18 +755,41 @@ acpidev_cpu_query_dip(cpu_t *cp, dev_info_t **dipp)
 			ASSERT(hdl != NULL);
 			if (ACPI_SUCCESS(acpica_get_devinfo(hdl, &dip))) {
 				ASSERT(dip != NULL);
-				ndi_hold_devi(dip);
 				*dipp = dip;
 				return (PSM_SUCCESS);
 			}
 		}
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to lookup dip for cpu %d(%p).",
+		    cp->cpu_id, (void *)cp);
 	}
 
-	ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to get dip for cpu %d(%p).",
-	    cp->cpu_id, (void *)cp);
+	return (PSM_FAILURE);
+}
+
+static int
+acpidev_cpu_create_dip(cpu_t *cp, dev_info_t **dipp)
+{
+	if (acpidev_cpu_lookup_dip(cp, dipp) == PSM_SUCCESS) {
+		ndi_hold_devi(*dipp);
+		return (PSM_SUCCESS);
+	}
 	if (psm_cpu_create_devinfo_old != NULL) {
 		return (psm_cpu_create_devinfo_old(cp, dipp));
 	} else {
 		return (PSM_FAILURE);
 	}
 }
+
+static int
+acpidev_cpu_get_dip(cpu_t *cp, dev_info_t **dipp)
+{
+	if (acpidev_cpu_lookup_dip(cp, dipp) == PSM_SUCCESS) {
+		return (PSM_SUCCESS);
+	}
+	if (psm_cpu_get_devinfo_old != NULL) {
+		return (psm_cpu_get_devinfo_old(cp, dipp));
+	} else {
+		return (PSM_FAILURE);
+	}
+}
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c
index fbf6e944b4..cf65cf4084 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_device.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -103,7 +103,7 @@ static acpidev_filter_rule_t acpidev_device_filters[] = {
 static ACPI_STATUS
 acpidev_device_probe(acpidev_walk_info_t *infop)
 {
-	ACPI_STATUS rc;
+	ACPI_STATUS rc = AE_OK;
 	int flags;
 
 	ASSERT(infop != NULL);
@@ -114,20 +114,31 @@ acpidev_device_probe(acpidev_walk_info_t *infop)
 		return (AE_OK);
 	}
 
-	if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
-		rc = acpidev_process_object(infop, flags);
-	} else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN;
-		rc = acpidev_process_object(infop, flags);
-	} else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
-		rc = acpidev_process_object(infop, flags);
-	} else {
+	flags = ACPIDEV_PROCESS_FLAG_SCAN;
+	switch (infop->awi_op_type) {
+	case ACPIDEV_OP_BOOT_PROBE:
+		flags |= ACPIDEV_PROCESS_FLAG_CREATE;
+		break;
+
+	case ACPIDEV_OP_BOOT_REPROBE:
+		break;
+
+	case ACPIDEV_OP_HOTPLUG_PROBE:
+		flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+		    ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+		    ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+		break;
+
+	default:
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: unknown operation type %u in "
+		    "!acpidev: unknown operation type %u in "
 		    "acpi_device_probe().", infop->awi_op_type);
 		rc = AE_BAD_PARAMETER;
+		break;
+	}
+
+	if (rc == AE_OK) {
+		rc = acpidev_process_object(infop, flags);
 	}
 	if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
 		cmn_err(CE_WARN,
@@ -153,7 +164,7 @@ acpidev_device_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
 		    ACPIDEV_ARRAY_PARAM(acpidev_device_filters),
 		    devname, maxlen);
 	} else {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
 		    "in acpidev_device_filter().", infop->awi_op_type);
 		res = ACPIDEV_FILTER_FAILED;
 	}
@@ -161,7 +172,6 @@ acpidev_device_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
 	return (res);
 }
 
-/*ARGSUSED*/
 static ACPI_STATUS
 acpidev_device_init(acpidev_walk_info_t *infop)
 {
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_dr.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_dr.c
new file mode 100644
index 0000000000..e346c65cfb
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_dr.c
@@ -0,0 +1,2743 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/cmn_err.h>
+#include <sys/cpuvar.h>
+#include <sys/memlist.h>
+#include <sys/memlist_impl.h>
+#include <sys/note.h>
+#include <sys/obpdefs.h>
+#include <sys/synch.h>
+#include <sys/sysmacros.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/x86_archext.h>
+#include <sys/machsystm.h>
+#include <sys/memnode.h>	/* for lgrp_plat_node_cnt */
+#include <sys/psm_types.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_rsc.h>
+#include <sys/acpidev_dr.h>
+#include <sys/acpidev_impl.h>
+
+struct acpidev_dr_set_prop_arg {
+	uint32_t	level;
+	uint32_t	bdnum;
+	uint32_t	cpu_id;
+	uint32_t	mem_id;
+	uint32_t	io_id;
+	uint32_t	mod_id;
+};
+
+struct acpidev_dr_device_remove_arg {
+	uint32_t	level;
+};
+
+extern int acpidev_options;
+
+/* User configurable option to enable/disable ACPI based DR operations. */
+int acpidev_dr_enable = 1;
+int acpidev_dr_hierarchy_name = 1;
+uint32_t acpidev_dr_max_segs_per_mem_device = ACPIDEV_DR_SEGS_PER_MEM_DEV;
+uint32_t acpidev_dr_max_memlists_per_seg = ACPIDEV_DR_MEMLISTS_PER_SEG;
+
+ACPI_TABLE_SRAT *acpidev_srat_tbl_ptr;
+ACPI_TABLE_SLIT *acpidev_slit_tbl_ptr;
+
+/* ACPI based DR operations are unsupported if zero. */
+static int acpidev_dr_supported = -1;
+
+/* Failed to initialize support of DR operations if non-zero. */
+static int acpidev_dr_failed;
+
+static volatile uint32_t acpidev_dr_boards;
+static volatile uint32_t acpidev_dr_board_index;
+static uint32_t acpidev_dr_max_cmp_per_board;
+static uint32_t acpidev_dr_max_memory_per_board;
+static uint32_t acpidev_dr_max_io_per_board;
+static uint32_t acpidev_dr_memory_device_cnt;
+
+static ACPI_HANDLE *acpidev_dr_board_handles[ACPIDEV_DR_MAX_BOARDS];
+
+/* Lock to protect/block DR operations at runtime. */
+static kmutex_t acpidev_dr_lock;
+
+static acpidev_dr_capacity_t acpidev_dr_capacities[] = {
+	{   /* Nehalem-EX */
+	    X86_VENDOR_Intel, 0x6, 0x2e, 0x2e, 0, UINT_MAX,
+	    B_TRUE,		/* Hotplug capable */
+	    1ULL << 30,		/* Align on 1GB boundary */
+	},
+	{   /* the last item is used to mark end of the table */
+	    UINT_MAX, UINT_MAX, UINT_MAX, 0, UINT_MAX, 0,
+	    B_FALSE,
+	    0,
+	},
+};
+
+static ACPI_STATUS acpidev_dr_scan_topo(ACPI_HANDLE hdl, UINT32 lvl, void *arg,
+    void **retval);
+
+static acpidev_dr_capacity_t *
+acpidev_dr_get_capacity(void)
+{
+	acpidev_dr_capacity_t *cp, *cp1;
+	uint_t vendor, family, model, step;
+	static acpidev_dr_capacity_t *acpidev_dr_capacity_curr = NULL;
+
+	if (acpidev_dr_capacity_curr != NULL) {
+		return (acpidev_dr_capacity_curr);
+	}
+
+	kpreempt_disable();
+	vendor = cpuid_getvendor(CPU);
+	family = cpuid_getfamily(CPU);
+	model = cpuid_getmodel(CPU);
+	step = cpuid_getstep(CPU);
+	kpreempt_enable();
+
+	for (cp = acpidev_dr_capacities; ; cp++) {
+		ASSERT(cp < acpidev_dr_capacities +
+		    sizeof (acpidev_dr_capacities) / sizeof (*cp));
+
+		/* Check whether it reaches the last item of the table. */
+		if (cp->cpu_vendor == UINT_MAX && cp->cpu_family == UINT_MAX &&
+		    cp->cpu_model_min == UINT_MAX && cp->cpu_model_max == 0 &&
+		    cp->cpu_step_min == UINT_MAX && cp->cpu_step_max == 0) {
+			break;
+		}
+		if (cp->cpu_vendor == vendor && cp->cpu_family == family &&
+		    model >= cp->cpu_model_min && model <= cp->cpu_model_max &&
+		    step >= cp->cpu_step_min && step <= cp->cpu_step_max) {
+			break;
+		}
+	}
+
+	/* Assume all CPUs in system are homogeneous. */
+	cp1 = atomic_cas_ptr(&acpidev_dr_capacity_curr, NULL, cp);
+	ASSERT(cp1 == NULL || cp1 == cp);
+	if (cp1 != NULL && cp1 != cp) {
+		return (NULL);
+	}
+
+	return (cp);
+}
+
+int
+acpidev_dr_capable(void)
+{
+	uint64_t flags1, flags2;
+	acpidev_dr_capacity_t *cp;
+
+	/*
+	 * Disable support of DR operations if:
+	 * 1) acpidev fails to initialize DR interfaces.
+	 * 2) ACPI based DR has been disabled by user.
+	 * 3) No DR capable devices have been detected.
+	 * 4) The system doesn't support DR operations.
+	 * 5) Some acpidev features have been disabled by user.
+	 */
+	if (acpidev_dr_failed != 0 || acpidev_dr_enable == 0 ||
+	    acpidev_dr_supported == 0) {
+		return (0);
+	}
+
+	flags1 = ACPI_FEATURE_DEVCFG | ACPI_FEATURE_OSI_MODULE;
+	flags2 = ACPI_DEVCFG_CPU | ACPI_DEVCFG_MEMORY |
+	    ACPI_DEVCFG_CONTAINER | ACPI_DEVCFG_PCI;
+	if (acpica_get_core_feature(flags1) != flags1 ||
+	    acpica_get_devcfg_feature(flags2) != flags2) {
+		cmn_err(CE_CONT,
+		    "?acpidev: disable support of ACPI based DR because "
+		    "some acpidev features have been disabled by user.\n");
+		acpidev_dr_supported = 0;
+		return (0);
+	}
+
+	cp = acpidev_dr_get_capacity();
+	if (cp == NULL || cp->hotplug_supported == B_FALSE) {
+		return (0);
+	}
+
+	return (1);
+}
+
+uint32_t
+acpidev_dr_max_boards(void)
+{
+	return (acpidev_dr_boards);
+}
+
+uint32_t
+acpidev_dr_max_io_units_per_board(void)
+{
+	return (acpidev_dr_max_io_per_board);
+}
+
+uint32_t
+acpidev_dr_max_mem_units_per_board(void)
+{
+	return (acpidev_dr_max_memory_per_board);
+}
+
+uint32_t
+acpidev_dr_max_cmp_units_per_board(void)
+{
+	return (acpidev_dr_max_cmp_per_board);
+}
+
+uint32_t
+acpidev_dr_max_cpu_units_per_cmp(void)
+{
+	static int max_cnt;
+
+	if (max_cnt == 0) {
+		kpreempt_disable();
+		max_cnt = cpuid_get_ncpu_per_chip(CPU);
+		kpreempt_enable();
+	}
+
+	return (max_cnt);
+}
+
+uint32_t
+acpidev_dr_max_segments_per_mem_device(void)
+{
+	if (acpidev_dr_max_segs_per_mem_device < 1) {
+		return (ACPIDEV_DR_SEGS_PER_MEM_DEV);
+	} else {
+		return (acpidev_dr_max_segs_per_mem_device);
+	}
+}
+
+uint32_t
+acpidev_dr_max_memlists_per_segment(void)
+{
+	if (acpidev_dr_max_memlists_per_seg < ACPIDEV_DR_MEMLISTS_PER_SEG) {
+		return (ACPIDEV_DR_MEMLISTS_PER_SEG);
+	} else {
+		return (acpidev_dr_max_memlists_per_seg);
+	}
+}
+
+void
+acpidev_dr_init(void)
+{
+	mutex_init(&acpidev_dr_lock, NULL, MUTEX_DRIVER, NULL);
+}
+
+static void
+acpidev_dr_check_board_type(acpidev_data_handle_t dhdl,
+    struct acpidev_dr_set_prop_arg *ap, char *objname)
+{
+	if (dhdl->aod_class_id == ACPIDEV_CLASS_ID_MEMORY) {
+		/* Memory board should have only one memory device. */
+		ASSERT(ap->cpu_id == 0);
+		ASSERT(ap->mem_id == 1);
+		ASSERT(ap->io_id == 0);
+		ASSERT(ap->mod_id == 0);
+		dhdl->aod_bdtype = ACPIDEV_MEMORY_BOARD;
+	} else if (dhdl->aod_class_id == ACPIDEV_CLASS_ID_PCI ||
+	    dhdl->aod_class_id == ACPIDEV_CLASS_ID_PCIEX) {
+		/* IO board should have only one IO device. */
+		ASSERT(ap->cpu_id == 0);
+		ASSERT(ap->mem_id == 0);
+		ASSERT(ap->io_id == 1);
+		ASSERT(ap->mod_id == 0);
+		dhdl->aod_bdtype = ACPIDEV_IO_BOARD;
+	} else if (dhdl->aod_class_id == ACPIDEV_CLASS_ID_CONTAINER) {
+		if (ap->mod_id == 1 && ap->mem_id == 0) {
+			dhdl->aod_bdtype = ACPIDEV_CPU_BOARD;
+		} else {
+			dhdl->aod_bdtype = ACPIDEV_SYSTEM_BOARD;
+		}
+	} else {
+		cmn_err(CE_WARN,
+		    "!acpidev: unknown type of hotplug capable board %s.",
+		    objname);
+		ASSERT(0);
+	}
+}
+
+/*
+ * Check for hotplug capable boards and create environment to support
+ * ACPI based DR operations. No need to acquire lock here, it's called
+ * from single-threaded context during boot.
+ */
+void
+acpidev_dr_check(acpidev_walk_info_t *infop)
+{
+	uint_t cmp;
+	boolean_t found = B_FALSE;
+	ACPI_HANDLE phdl;
+	acpidev_data_handle_t dhdl, pdhdl;
+	struct acpidev_dr_set_prop_arg arg;
+
+	if (infop == NULL ||
+	    infop->awi_op_type != ACPIDEV_OP_BOOT_PROBE) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: invalid parameter to acpidev_dr_check().");
+		return;
+	}
+
+	if (acpidev_dr_capable() == 0) {
+		return;
+	}
+
+	dhdl = infop->awi_data;
+	ASSERT(dhdl != NULL);
+
+	/* This device has already been handled before. */
+	if (ACPIDEV_DR_IS_PROCESSED(dhdl)) {
+		return;
+	}
+
+	/*
+	 * It implies that the device is hotplug capable if ACPI _EJ0 method
+	 * is available.
+	 */
+	if (!ACPIDEV_DR_IS_BOARD(dhdl) &&
+	    acpidev_dr_device_hotplug_capable(infop->awi_hdl)) {
+		ACPIDEV_DR_SET_BOARD(dhdl);
+	}
+
+	/* All things are done if the device isn't hotplug capable. */
+	if (!ACPIDEV_DR_IS_BOARD(dhdl)) {
+		return;
+	}
+
+	/* Check whether hardware topology is supported or not. */
+	if (ACPI_FAILURE(acpidev_dr_scan_topo(infop->awi_hdl, 0, NULL,
+	    NULL))) {
+		ACPIDEV_DR_SET_FAILED(dhdl);
+		ACPIDEV_DEBUG(CE_NOTE, "!acpidev: hardware topology under %s "
+		    "is unsupported for DR operations.", infop->awi_name);
+		return;
+	}
+
+	/* Generate board/index/port number for the hotplug capable board. */
+	dhdl->aod_bdnum = atomic_inc_32_nv(&acpidev_dr_boards) - 1;
+	dhdl->aod_portid = 0;
+	phdl = infop->awi_hdl;
+	while (ACPI_SUCCESS(AcpiGetParent(phdl, &phdl)) &&
+	    phdl != ACPI_ROOT_OBJECT) {
+		pdhdl = acpidev_data_get_handle(phdl);
+		if (pdhdl != NULL && ACPIDEV_DR_IS_BOARD(pdhdl)) {
+			dhdl->aod_bdidx = atomic_inc_32_nv(&pdhdl->aod_chidx);
+			found = B_TRUE;
+			break;
+		}
+	}
+	if (found == B_FALSE) {
+		dhdl->aod_bdidx = atomic_inc_32_nv(&acpidev_dr_board_index);
+	}
+	dhdl->aod_bdidx -= 1;
+
+	/* Found too many hotplug capable boards. */
+	if (dhdl->aod_bdnum >= ACPIDEV_DR_MAX_BOARDS) {
+		ACPIDEV_DR_SET_FAILED(dhdl);
+		cmn_err(CE_WARN, "!acpidev: too many hotplug capable boards, "
+		    "max %d, found %d.",
+		    ACPIDEV_DR_MAX_BOARDS, dhdl->aod_bdnum + 1);
+		return;
+	}
+
+	/* Scan all descendant devices to prepare info for DR operations. */
+	bzero(&arg, sizeof (arg));
+	arg.bdnum = dhdl->aod_bdnum;
+	arg.level = infop->awi_level;
+	if (ACPI_FAILURE(acpidev_dr_scan_topo(infop->awi_hdl, 0, &arg,
+	    NULL))) {
+		ACPIDEV_DR_SET_FAILED(dhdl);
+		ACPIDEV_DEBUG(CE_NOTE, "!acpidev: failed to set DR properties "
+		    "for descendants of %s.", infop->awi_name);
+		return;
+	}
+
+	/* Get type of the hotplug capable board. */
+	acpidev_dr_check_board_type(dhdl, &arg, infop->awi_name);
+
+	/*
+	 * Save ACPI handle of the hotplug capable board to speed up lookup
+	 * board handle if caching is enabled.
+	 */
+	if ((acpidev_options & ACPIDEV_OUSER_NO_CACHE) == 0) {
+		acpidev_dr_board_handles[dhdl->aod_bdnum] = infop->awi_hdl;
+	}
+
+	/* Update system maximum DR capabilities. */
+	cmp = (arg.cpu_id + acpidev_dr_max_cpu_units_per_cmp() - 1);
+	cmp /= acpidev_dr_max_cpu_units_per_cmp();
+	if (cmp > acpidev_dr_max_cmp_per_board) {
+		acpidev_dr_max_cmp_per_board = cmp;
+	}
+	if (arg.mem_id > acpidev_dr_max_memory_per_board) {
+		acpidev_dr_max_memory_per_board = arg.mem_id;
+	}
+	if (arg.io_id > acpidev_dr_max_io_per_board) {
+		acpidev_dr_max_io_per_board = arg.io_id;
+	}
+}
+
+static void
+acpidev_dr_initialize_memory_hotplug(void)
+{
+	caddr_t buf;
+	uint32_t cnt;
+	acpidev_dr_capacity_t *cp;
+
+	/*
+	 * We have already checked that the platform supports DR operations.
+	 */
+	cp = acpidev_dr_get_capacity();
+	ASSERT(cp != NULL && cp->hotplug_supported);
+	ASSERT(ISP2(cp->memory_alignment));
+	ASSERT(cp->memory_alignment > MMU_PAGESIZE);
+	mem_node_physalign = cp->memory_alignment;
+
+	/* Pre-populate memlist cache. */
+	cnt = acpidev_dr_memory_device_cnt;
+	cnt *= acpidev_dr_max_segments_per_mem_device();
+	cnt *= acpidev_dr_max_memlists_per_segment();
+	if (cnt > ACPIDEV_DR_MAX_MEMLIST_ENTRIES) {
+		cmn_err(CE_WARN, "!acpidev: attempted to reserve too many "
+		    "memlist entries (%u), max %u.  Falling back to %u and "
+		    "some memory hot add operations may fail.",
+		    cnt, ACPIDEV_DR_MAX_MEMLIST_ENTRIES,
+		    ACPIDEV_DR_MAX_MEMLIST_ENTRIES);
+		cnt = ACPIDEV_DR_MAX_MEMLIST_ENTRIES;
+	}
+	cnt *= sizeof (struct memlist);
+	buf = kmem_zalloc(cnt, KM_SLEEP);
+	memlist_free_block(buf, cnt);
+}
+
+/*
+ * Create pseudo DR control device node if the system is hotplug capable.
+ * No need to acquire lock, it's called from single-threaded context
+ * during boot. pdip has been held by the caller.
+ */
+static ACPI_STATUS
+acpidev_dr_create_node(dev_info_t *pdip)
+{
+	dev_info_t *dip;
+	char unit[32];
+	char *path;
+	char *comps[] = {
+		"acpidr_sbd",
+	};
+
+	/*
+	 * Disable support of DR operations if no hotplug capable board has
+	 * been detected.
+	 */
+	if (acpidev_dr_boards == 0) {
+		acpidev_dr_supported = 0;
+	} else {
+		acpidev_dr_supported = 1;
+	}
+
+	/*
+	 * Don't create control device node if the system isn't hotplug capable.
+	 */
+	if (acpidev_dr_capable() == 0) {
+		return (AE_SUPPORT);
+	}
+
+	/* Cache pointer to the ACPI SLIT table. */
+	if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_SLIT, 1,
+	    (ACPI_TABLE_HEADER **)&acpidev_slit_tbl_ptr))) {
+		acpidev_slit_tbl_ptr = NULL;
+	}
+	if (acpidev_srat_tbl_ptr == NULL || acpidev_slit_tbl_ptr == NULL) {
+		if (lgrp_plat_node_cnt != 1) {
+			/*
+			 * Disable support of CPU/memory DR operations if lgrp
+			 * is enabled but failed to cache SRAT/SLIT table
+			 * pointers.
+			 */
+			cmn_err(CE_WARN,
+			    "!acpidev: failed to get ACPI SRAT/SLIT table.");
+			plat_dr_disable_cpu();
+			plat_dr_disable_memory();
+		}
+	}
+
+	ndi_devi_alloc_sleep(pdip, ACPIDEV_NODE_NAME_ACPIDR,
+	    (pnode_t)DEVI_PSEUDO_NODEID, &dip);
+
+	/* Set "unit-address" device property. */
+	(void) snprintf(unit, sizeof (unit), "%u", 0);
+	if (ndi_prop_update_string(DDI_DEV_T_NONE, dip,
+	    ACPIDEV_PROP_NAME_UNIT_ADDR, unit) != NDI_SUCCESS) {
+		path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+		cmn_err(CE_CONT,
+		    "?acpidev: failed to set unit-address property for %s.\n",
+		    ddi_pathname(dip, path));
+		kmem_free(path, MAXPATHLEN);
+		(void) ddi_remove_child(dip, 0);
+		acpidev_dr_failed = 1;
+		return (AE_ERROR);
+	}
+
+	/* Set "compatible" device property. */
+	if (ndi_prop_update_string_array(DDI_DEV_T_NONE, dip, OBP_COMPATIBLE,
+	    comps, sizeof (comps) / sizeof (comps[0])) != NDI_SUCCESS) {
+		path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+		cmn_err(CE_CONT, "?acpidev: failed to set compatible "
+		    "property for %s.\n", ddi_pathname(dip, path));
+		kmem_free(path, MAXPATHLEN);
+		(void) ddi_remove_child(dip, 0);
+		acpidev_dr_failed = 1;
+		return (AE_ERROR);
+	}
+
+	(void) ndi_devi_bind_driver(dip, 0);
+
+	return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_initialize(dev_info_t *pdip)
+{
+	ACPI_STATUS rc;
+
+	rc = acpidev_dr_create_node(pdip);
+	if (ACPI_FAILURE(rc)) {
+		return (rc);
+	}
+
+	/* Initialize support of memory DR operations. */
+	if (plat_dr_support_memory()) {
+		acpidev_dr_initialize_memory_hotplug();
+	}
+
+	/* Mark the DR subsystem is ready for use. */
+	plat_dr_enable();
+
+	return (AE_OK);
+}
+
+static ACPI_STATUS
+acpidev_dr_find_board(ACPI_HANDLE hdl, uint_t lvl, void *ctx, void **retval)
+{
+	_NOTE(ARGUNUSED(lvl));
+
+	acpidev_data_handle_t dhdl;
+
+	ASSERT(hdl != NULL);
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		/* No data handle available, not ready for DR operations. */
+		return (AE_CTRL_DEPTH);
+	} else if (ACPIDEV_DR_IS_BOARD(dhdl) && ACPIDEV_DR_IS_WORKING(dhdl) &&
+	    dhdl->aod_bdnum == (intptr_t)ctx) {
+		ASSERT(retval != NULL);
+		*(ACPI_HANDLE *)retval = hdl;
+		return (AE_CTRL_TERMINATE);
+	}
+
+	return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_get_board_handle(uint_t board, ACPI_HANDLE *hdlp)
+{
+	ACPI_STATUS rc = AE_OK;
+	ACPI_HANDLE hdl;
+
+	ASSERT(hdlp != NULL);
+	if (hdlp == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+		    "acpidev_dr_get_board_handle().");
+		return (AE_BAD_PARAMETER);
+	}
+
+	if (board >= acpidev_dr_boards) {
+		ACPIDEV_DEBUG(CE_NOTE,
+		    "!acpidev: board number %d is out of range, max %d.",
+		    board, acpidev_dr_boards);
+		return (AE_NOT_FOUND);
+	}
+
+	/* Use cached handles if caching is enabled. */
+	if ((acpidev_options & ACPIDEV_OUSER_NO_CACHE) == 0) {
+		if (acpidev_dr_board_handles[board] != NULL) {
+			hdl = acpidev_dr_board_handles[board];
+			if (ACPI_FAILURE(acpidev_dr_find_board(hdl, 1,
+			    (void *)(intptr_t)board, (void **)hdlp)) &&
+			    *hdlp != NULL) {
+				return (AE_OK);
+			}
+		}
+		ACPIDEV_DEBUG(CE_NOTE,
+		    "!acpidev: board %d doesn't exist.", board);
+		*hdlp = NULL;
+		return (AE_NOT_FOUND);
+	}
+
+	/* All hotplug capable boards should exist under \_SB_. */
+	if (ACPI_FAILURE(AcpiGetHandle(ACPI_ROOT_OBJECT,
+	    ACPIDEV_OBJECT_NAME_SB, &hdl))) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get handle of %s.",
+		    ACPIDEV_OBJECT_NAME_SB);
+		return (AE_ERROR);
+	}
+
+	*hdlp = NULL;
+	if (ACPI_FAILURE(AcpiWalkNamespace(ACPI_TYPE_DEVICE, hdl,
+	    ACPIDEV_MAX_ENUM_LEVELS - 1, acpidev_dr_find_board, NULL,
+	    (void *)(intptr_t)board, (void **)hdlp))) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to find ACPI handle "
+		    "for board %d.", board);
+		rc = AE_NOT_FOUND;
+	} else if (*hdlp == NULL) {
+		ACPIDEV_DEBUG(CE_NOTE,
+		    "!acpidev: board %d doesn't exist.", board);
+		rc = AE_NOT_FOUND;
+	}
+
+	return (rc);
+}
+
+acpidev_board_type_t
+acpidev_dr_get_board_type(ACPI_HANDLE hdl)
+{
+	acpidev_data_handle_t dhdl;
+	acpidev_board_type_t type = ACPIDEV_INVALID_BOARD;
+
+	if (hdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+		    "acpidev_dr_get_board_type().");
+		return (type);
+	}
+
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get data associated with %p.", hdl);
+	} else {
+		type = dhdl->aod_bdtype;
+	}
+
+	return (type);
+}
+
+ACPI_STATUS
+acpidev_dr_get_board_number(ACPI_HANDLE hdl, uint32_t *bnump)
+{
+	acpidev_data_handle_t dhdl;
+
+	if (hdl == NULL || bnump == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+		    "acpidev_dr_get_board_number().");
+		return (AE_BAD_PARAMETER);
+	}
+
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get data associated with %p.", hdl);
+		return (AE_ERROR);
+	}
+	*bnump = dhdl->aod_bdnum;
+
+	return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_get_board_name(ACPI_HANDLE hdl, char *buf, size_t len)
+{
+	char *fmt;
+	int count = 0;
+	size_t rlen = 0;
+	ACPI_HANDLE thdl;
+	acpidev_data_handle_t dhdl;
+	acpidev_data_handle_t dhdls[ACPIDEV_MAX_ENUM_LEVELS];
+
+	if (hdl == NULL || buf == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+		    "acpidev_dr_get_board_name().");
+		return (AE_BAD_PARAMETER);
+	}
+
+	/* Find ancestors of the device which are hotplug capable. */
+	for (thdl = hdl; thdl != NULL; ) {
+		dhdl = acpidev_data_get_handle(thdl);
+		if (dhdl == NULL) {
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get data "
+			    "associated with %p.", thdl);
+			return (AE_ERROR);
+		}
+
+		if (!ACPIDEV_DR_IS_BOARD(dhdl)) {
+			/* The board itself should be hotplug capable. */
+			if (count == 0) {
+				ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is "
+				    "not hotplug capable.", thdl);
+				return (AE_ERROR);
+			}
+		} else {
+			if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+				ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is "
+				    "in the FAILED state.", thdl);
+			}
+
+			if (count >= ACPIDEV_MAX_ENUM_LEVELS) {
+				ACPIDEV_DEBUG(CE_WARN,
+				    "!acpidev: recursive level for hotplug "
+				    "capable board is too deep.");
+				return (AE_ERROR);
+			}
+
+			dhdls[count] = dhdl;
+			count++;
+		}
+
+		if (acpidev_dr_hierarchy_name == 0) {
+			thdl = NULL;
+		} else if (ACPI_FAILURE(AcpiGetParent(thdl, &thdl))) {
+			thdl = NULL;
+		}
+	}
+
+	/* Generate hierarchy board name for the board. */
+	ASSERT(count > 0);
+	for (count--; count >= 0 && rlen < len; count--) {
+		dhdl = dhdls[count];
+		switch (dhdl->aod_bdtype) {
+		case ACPIDEV_CPU_BOARD:
+			fmt = ACPIDEV_DR_CPU_BD_FMT;
+			break;
+		case ACPIDEV_MEMORY_BOARD:
+			fmt = ACPIDEV_DR_MEMORY_BD_FMT;
+			break;
+		case ACPIDEV_IO_BOARD:
+			fmt = ACPIDEV_DR_IO_BD_FMT;
+			break;
+		case ACPIDEV_SYSTEM_BOARD:
+			fmt = ACPIDEV_DR_SYSTEM_BD_FMT;
+			break;
+		case ACPIDEV_INVALID_BOARD:
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid board type.");
+			return (AE_ERROR);
+		default:
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: unknown board type %u.",
+			    dhdl->aod_bdtype);
+			return (AE_ERROR);
+		}
+
+		/* Add "." before component name except first item. */
+		if (rlen != 0) {
+			rlen += snprintf(buf + rlen, len - rlen, ".");
+		}
+		if (rlen < len) {
+			rlen += snprintf(buf + rlen, len - rlen, fmt,
+			    dhdl->aod_bdidx);
+		}
+	}
+
+	/* Check whether the buffer is sufficient. */
+	if (rlen >= len) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: buffer length to "
+		    "acpidev_dr_get_board_name() is too small.");
+		return (AE_NO_MEMORY);
+	}
+
+	return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_get_attachment_point(ACPI_HANDLE hdl, char *buf, size_t len)
+{
+	size_t rlen;
+
+	if (hdl == NULL || buf == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+		    "acpidev_dr_get_attachment_point().");
+		return (AE_BAD_PARAMETER);
+	}
+
+	rlen = snprintf(buf, len, "/devices/%s/%s@%u:",
+	    ACPIDEV_NODE_NAME_ROOT, ACPIDEV_NODE_NAME_ACPIDR, 0);
+	if (rlen >= len) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: buffer to "
+		    "acpidev_dr_get_attachment_point() is too small.");
+		return (AE_NO_MEMORY);
+	}
+
+	return (acpidev_dr_get_board_name(hdl, buf + rlen, len - rlen));
+}
+
+/*
+ * Existence of ACPI _EJ0 method implies that the device is hotplug capable.
+ */
+int
+acpidev_dr_device_hotplug_capable(ACPI_HANDLE hdl)
+{
+	ACPI_HANDLE ej0;
+
+	ASSERT(hdl != NULL);
+	if (ACPI_FAILURE(AcpiGetHandle(hdl, ACPIDEV_METHOD_NAME_EJ0, &ej0))) {
+		return (0);
+	}
+
+	return (1);
+}
+
+int
+acpidev_dr_device_has_edl(ACPI_HANDLE hdl)
+{
+	ACPI_HANDLE edl;
+
+	ASSERT(hdl != NULL);
+	if (ACPI_FAILURE(AcpiGetHandle(hdl, ACPIDEV_METHOD_NAME_EDL, &edl))) {
+		return (0);
+	}
+
+	return (1);
+}
+
+int
+acpidev_dr_device_is_present(ACPI_HANDLE hdl)
+{
+	int 		status;
+
+	ASSERT(hdl != NULL);
+
+	status = acpidev_query_device_status(hdl);
+	if (acpidev_check_device_present(status)) {
+		return (1);
+	}
+
+	return (0);
+}
+
+int
+acpidev_dr_device_is_powered(ACPI_HANDLE hdl)
+{
+	int 		status;
+
+	ASSERT(hdl != NULL);
+
+	/*
+	 * Check device status returned by ACPI _STA method.
+	 * It implies that the device is powered if status is both PRESENT
+	 * and ENABLED.
+	 */
+	status = acpidev_query_device_status(hdl);
+	if (acpidev_check_device_enabled(status)) {
+		return (1);
+	}
+
+	return (0);
+}
+
+ACPI_STATUS
+acpidev_dr_get_mem_alignment(ACPI_HANDLE hdl, uint64_t *ap)
+{
+	acpidev_dr_capacity_t *cp;
+
+	ASSERT(hdl != NULL);
+	ASSERT(ap != NULL);
+	if (ap == NULL || hdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+		    "acpidev_dr_get_mem_alignment().");
+		return (AE_BAD_PARAMETER);
+	}
+
+	cp = acpidev_dr_get_capacity();
+	if (cp == NULL || cp->hotplug_supported == B_FALSE) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get memory alignment.");
+		return (AE_SUPPORT);
+	}
+	*ap = cp->memory_alignment;
+
+	return (AE_OK);
+}
+
+/*
+ * Get the device property for the given name and store it into buf.
+ * Returns the amount of data copied to buf if len is large enough to
+ * hold all of the data.  If len is not large enough, then the required
+ * len would be returned and buf would not be modified.  On any errors,
+ * -1 is returned and buf is not modified.
+ */
+ACPI_STATUS
+acpidev_dr_device_get_regspec(ACPI_HANDLE hdl, boolean_t assigned,
+    acpidev_regspec_t **regpp, uint_t *cntp)
+{
+	int *valp;
+	uint_t count;
+	char *propname;
+	dev_info_t *dip;
+	acpidev_data_handle_t dhdl;
+
+	ASSERT(hdl != NULL);
+	ASSERT(regpp != NULL && cntp != NULL);
+	if (hdl == NULL || regpp == NULL || cntp == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters to "
+		    "acpidev_dr_device_get_regspec().");
+		return (AE_BAD_PARAMETER);
+	}
+
+	/* Set default return value. */
+	*regpp = NULL;
+	*cntp = 0;
+
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get data associated with %p.", hdl);
+		return (AE_ERROR);
+	} else if ((dip = acpidev_data_get_devinfo(dhdl)) == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get dip associated with %p.", hdl);
+		return (AE_NOT_FOUND);
+	}
+
+	propname = assigned ? "assigned-addresses" : "reg";
+	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+	    propname, &valp, &count) != DDI_PROP_SUCCESS) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to lookup device property %s.", propname);
+		return (AE_NOT_FOUND);
+	}
+
+	if (count % (sizeof (**regpp) / sizeof (int)) != 0) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: device property %s is invalid.", propname);
+		ddi_prop_free(valp);
+		return (AE_ERROR);
+	}
+
+	*regpp = (acpidev_regspec_t *)valp;
+	*cntp = count / (sizeof (**regpp) / sizeof (int));
+
+	return (AE_OK);
+}
+
+void
+acpidev_dr_device_free_regspec(acpidev_regspec_t *regp, uint_t count)
+{
+	_NOTE(ARGUNUSED(count));
+
+	if (regp != NULL) {
+		ddi_prop_free(regp);
+	}
+}
+
+/*
+ * Return values
+ * . negative values on error
+ * . size of data copied to buffer if it's bigger enough
+ * . size of buffer needed if buffer is too small
+ */
+int
+acpidev_dr_device_getprop(ACPI_HANDLE hdl, char *name, caddr_t buf, size_t len)
+{
+	int rlen = -1;
+	acpidev_data_handle_t dhdl;
+
+	if (hdl == NULL) {
+		return (-1);
+	}
+
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		return (-1);
+	} else if (!ACPIDEV_DR_IS_WORKING(dhdl)) {
+		return (-1);
+	}
+
+	if (strcmp(name, ACPIDEV_DR_PROP_PORTID) == 0) {
+		if (len >= sizeof (uint32_t)) {
+			*(uint32_t *)(void *)buf = dhdl->aod_portid;
+		}
+		rlen = sizeof (uint32_t);
+	} else if (strcmp(name, ACPIDEV_DR_PROP_BOARDNUM) == 0) {
+		if (len >= sizeof (uint32_t)) {
+			*(uint32_t *)(void *)buf = dhdl->aod_bdnum;
+		}
+		rlen = sizeof (uint32_t);
+	} else if (strcmp(name, ACPIDEV_DR_PROP_DEVNAME) == 0) {
+		switch (dhdl->aod_class_id) {
+		case ACPIDEV_CLASS_ID_CPU:
+			if (len >= sizeof (ACPIDEV_NODE_NAME_CPU)) {
+				(void) strlcpy((char *)buf,
+				    ACPIDEV_NODE_NAME_CPU, len);
+			}
+			rlen = sizeof (ACPIDEV_NODE_NAME_CPU);
+			break;
+
+		case ACPIDEV_CLASS_ID_MEMORY:
+			if (len >= sizeof (ACPIDEV_NODE_NAME_MEMORY)) {
+				(void) strlcpy((char *)buf,
+				    ACPIDEV_NODE_NAME_MEMORY, len);
+			}
+			rlen = sizeof (ACPIDEV_NODE_NAME_MEMORY);
+			break;
+
+		case ACPIDEV_CLASS_ID_PCI:
+		case ACPIDEV_CLASS_ID_PCIEX:
+			if (len >= sizeof (ACPIDEV_NODE_NAME_PCI)) {
+				(void) strlcpy((char *)buf,
+				    ACPIDEV_NODE_NAME_PCI, len);
+			}
+			rlen = sizeof (ACPIDEV_NODE_NAME_PCI);
+			break;
+
+		default:
+			break;
+		}
+	}
+
+	return (rlen);
+}
+
+/*
+ * Figure out device class of the device.
+ * It only supports device classes which may be involved in DR operations.
+ */
+acpidev_class_id_t
+acpidev_dr_device_get_class(ACPI_HANDLE hdl)
+{
+	ACPI_OBJECT_TYPE type;
+	ACPI_DEVICE_INFO *infop;
+	acpidev_class_id_t id = ACPIDEV_CLASS_ID_INVALID;
+
+	static char *acpidev_id_cpu[] = {
+		ACPIDEV_HID_CPU,
+	};
+	static char *acpidev_id_mem[] = {
+		ACPIDEV_HID_MEMORY,
+	};
+	static char *acpidev_id_mod[] = {
+		ACPIDEV_HID_MODULE,
+	};
+	static char *acpidev_id_pci[] = {
+		ACPIDEV_HID_PCI_HOSTBRIDGE,
+	};
+	static char *acpidev_id_pciex[] = {
+		ACPIDEV_HID_PCIEX_HOSTBRIDGE,
+	};
+
+	/* Figure out device type by checking ACPI object type. */
+	if (ACPI_FAILURE(AcpiGetType(hdl, &type))) {
+		return (ACPIDEV_CLASS_ID_INVALID);
+	} else if (type == ACPI_TYPE_PROCESSOR) {
+		return (ACPIDEV_CLASS_ID_CPU);
+	} else if (type != ACPI_TYPE_DEVICE) {
+		return (ACPIDEV_CLASS_ID_INVALID);
+	}
+
+	if (ACPI_FAILURE(AcpiGetObjectInfo(hdl, &infop))) {
+		return (ACPIDEV_CLASS_ID_INVALID);
+	}
+
+	/* Figure out device type by checking _HID and _CID. */
+	if (acpidev_match_device_id(infop,
+	    ACPIDEV_ARRAY_PARAM(acpidev_id_cpu))) {
+		id = ACPIDEV_CLASS_ID_CPU;
+	} else if (acpidev_match_device_id(infop,
+	    ACPIDEV_ARRAY_PARAM(acpidev_id_mem))) {
+		id = ACPIDEV_CLASS_ID_MEMORY;
+	} else if (acpidev_match_device_id(infop,
+	    ACPIDEV_ARRAY_PARAM(acpidev_id_mod))) {
+		id = ACPIDEV_CLASS_ID_CONTAINER;
+	} else if (acpidev_match_device_id(infop,
+	    ACPIDEV_ARRAY_PARAM(acpidev_id_pciex))) {
+		id = ACPIDEV_CLASS_ID_PCIEX;
+	} else if (acpidev_match_device_id(infop,
+	    ACPIDEV_ARRAY_PARAM(acpidev_id_pci))) {
+		id = ACPIDEV_CLASS_ID_PCI;
+	}
+
+	AcpiOsFree(infop);
+
+	return (id);
+}
+
+ACPI_STATUS
+acpidev_dr_device_get_memory_index(ACPI_HANDLE hdl, uint32_t *idxp)
+{
+	acpidev_data_handle_t dhdl;
+
+	ASSERT(idxp != NULL);
+	ASSERT(hdl != NULL);
+
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get data handle for %p.", hdl);
+		return (AE_ERROR);
+	} else if (dhdl->aod_class_id != ACPIDEV_CLASS_ID_MEMORY) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: object %p is not a memory device.", hdl);
+		return (AE_ERROR);
+	} else {
+		*idxp = dhdl->aod_memidx;
+	}
+
+	return (AE_OK);
+}
+
+int
+acpidev_dr_device_is_board(ACPI_HANDLE hdl)
+{
+	acpidev_data_handle_t dhdl;
+
+	ASSERT(hdl != NULL);
+	if (hdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+		    "acpidev_dr_is_board().");
+		return (0);
+	}
+
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		return (0);
+	} else if (!ACPIDEV_DR_IS_BOARD(dhdl)) {
+		return (0);
+	}
+
+	return (1);
+}
+
+ACPI_STATUS
+acpidev_dr_device_walk_edl(ACPI_HANDLE hdl,
+    ACPI_WALK_CALLBACK cb, void *arg, void **retval)
+{
+	ACPI_STATUS rc = AE_OK;
+	int i;
+	char *objname;
+	ACPI_OBJECT *obj;
+	ACPI_BUFFER buf;
+	char *method = ACPIDEV_METHOD_NAME_EDL;
+
+	ASSERT(hdl != NULL);
+	ASSERT(cb != NULL);
+	if (hdl == NULL || cb == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+		    "acpidev_dr_device_walk_edl().");
+		return (AE_BAD_PARAMETER);
+	}
+
+	objname = acpidev_get_object_name(hdl);
+	buf.Length = ACPI_ALLOCATE_BUFFER;
+	rc = AcpiEvaluateObjectTyped(hdl, method, NULL, &buf,
+	    ACPI_TYPE_PACKAGE);
+	if (rc == AE_NOT_FOUND) {
+		acpidev_free_object_name(objname);
+		return (AE_OK);
+	} else if (ACPI_FAILURE(rc)) {
+		cmn_err(CE_WARN,
+		    "!acpidev: failed to evaluate method %s under %s.",
+		    method, objname);
+		acpidev_free_object_name(objname);
+		return (AE_ERROR);
+	}
+
+	/* Validate the package structure. */
+	obj = buf.Pointer;
+	for (i = 0; i < obj->Package.Count; i++) {
+		if (obj->Package.Elements[i].Type !=
+		    ACPI_TYPE_LOCAL_REFERENCE) {
+			cmn_err(CE_WARN, "!acpidev: element %d in package "
+			    "returned by %s of %s is not local reference.",
+			    i, method, objname);
+			AcpiOsFree(buf.Pointer);
+			acpidev_free_object_name(objname);
+			return (AE_ERROR);
+		} else if (obj->Package.Elements[i].Reference.ActualType !=
+		    ACPI_TYPE_DEVICE) {
+			cmn_err(CE_WARN, "!acpidev: element %d in package "
+			    "returned by %s of %s doesn't refer to device.",
+			    i, method, objname);
+			AcpiOsFree(buf.Pointer);
+			acpidev_free_object_name(objname);
+			return (AE_ERROR);
+		}
+	}
+
+	for (i = 0; i < obj->Package.Count; i++) {
+		if (obj->Package.Elements[i].Reference.Handle == NULL) {
+			cmn_err(CE_WARN, "!acpidev: handle of element %d in "
+			    "package returned by %s of %s is NULL.",
+			    i, method, objname);
+			continue;
+		}
+		rc = (*cb)(obj->Package.Elements[i].Reference.Handle,
+		    UINT32_MAX, arg, retval);
+		if (rc == AE_CTRL_DEPTH || rc == AE_CTRL_TERMINATE) {
+			rc = AE_OK;
+		}
+		if (ACPI_FAILURE(rc)) {
+			break;
+		}
+	}
+
+	AcpiOsFree(buf.Pointer);
+	acpidev_free_object_name(objname);
+
+	return (rc);
+}
+
+ACPI_STATUS
+acpidev_dr_device_walk_ejd(ACPI_HANDLE hdl,
+    ACPI_WALK_CALLBACK cb, void *arg, void **retval)
+{
+	ACPI_STATUS rc = AE_OK;
+	char *objname;
+	ACPI_OBJECT *obj;
+	ACPI_BUFFER buf;
+	ACPI_HANDLE chdl;
+	char *method = ACPIDEV_METHOD_NAME_EJD;
+
+	ASSERT(hdl != NULL);
+	ASSERT(cb != NULL);
+	if (hdl == NULL || cb == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+		    "acpidev_dr_device_walk_ejd().");
+		return (AE_BAD_PARAMETER);
+	}
+
+	objname = acpidev_get_object_name(hdl);
+	buf.Length = ACPI_ALLOCATE_BUFFER;
+	rc = AcpiEvaluateObjectTyped(hdl, method, NULL, &buf,
+	    ACPI_TYPE_STRING);
+	if (rc == AE_NOT_FOUND) {
+		acpidev_free_object_name(objname);
+		return (AE_OK);
+	} else if (ACPI_FAILURE(rc)) {
+		cmn_err(CE_WARN,
+		    "!acpidev: failed to evaluate method %s under %s.",
+		    method, objname);
+		acpidev_free_object_name(objname);
+		return (AE_ERROR);
+	}
+
+	obj = buf.Pointer;
+	ASSERT(obj->String.Pointer);
+	if (ACPI_FAILURE(AcpiGetHandle(NULL, obj->String.Pointer, &chdl))) {
+		cmn_err(CE_WARN, "!acpidev: failed to get handle for %s.",
+		    obj->String.Pointer);
+		rc = AE_ERROR;
+	} else {
+		rc = (*cb)(chdl, UINT32_MAX, arg, retval);
+		if (rc == AE_CTRL_DEPTH || rc == AE_CTRL_TERMINATE) {
+			rc = AE_OK;
+		}
+	}
+
+	AcpiOsFree(buf.Pointer);
+	acpidev_free_object_name(objname);
+
+	return (rc);
+}
+
+/*
+ * Walk all child devices and special devices in the eject device list.
+ */
+static ACPI_STATUS
+acpidev_dr_device_walk_child(ACPI_HANDLE hdl, boolean_t init, uint_t max_lvl,
+    ACPI_WALK_CALLBACK cb, void *arg, void **retval)
+{
+	ACPI_STATUS rc = AE_OK;
+
+	ASSERT(hdl != NULL);
+	ASSERT(cb != NULL);
+	if (hdl == NULL || cb == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+		    "acpidev_dr_device_walk_child().");
+		return (AE_BAD_PARAMETER);
+	}
+
+	/*
+	 * Walk the eject device list first when destroying.
+	 * According to ACPI spec, devices in _EDL list must be handled first
+	 * when the ejecting device.
+	 */
+	if (init == B_FALSE) {
+		rc = acpidev_dr_device_walk_edl(hdl, cb, arg, retval);
+		if (ACPI_FAILURE(rc)) {
+			ACPIDEV_DEBUG(CE_NOTE,
+			    "!acpidev: failed to walk eject device list in "
+			    "acpidev_dr_device_walk_child().");
+		}
+	}
+
+	/* Walk all child ACPI DEVICE objects. */
+	if (ACPI_SUCCESS(rc)) {
+		rc = AcpiWalkNamespace(ACPI_TYPE_DEVICE, hdl,
+		    max_lvl, cb, NULL, arg, retval);
+		if (ACPI_FAILURE(rc)) {
+			ACPIDEV_DEBUG(CE_NOTE,
+			    "!acpidev: failed to walk DEVICE objects in "
+			    "acpidev_dr_device_walk_child().");
+		}
+	}
+
+	/* Walk all child ACPI PROCESSOR objects. */
+	if (ACPI_SUCCESS(rc)) {
+		rc = AcpiWalkNamespace(ACPI_TYPE_PROCESSOR, hdl,
+		    max_lvl, cb, NULL, arg, retval);
+		if (ACPI_FAILURE(rc)) {
+			ACPIDEV_DEBUG(CE_NOTE,
+			    "!acpidev: failed to walk PROCESSOR objects in "
+			    "acpidev_dr_device_walk_child().");
+		}
+	}
+
+	/*
+	 * Walk the eject device list last when initializing.
+	 */
+	if (init == B_TRUE && ACPI_SUCCESS(rc)) {
+		rc = acpidev_dr_device_walk_edl(hdl, cb, arg, retval);
+		if (ACPI_FAILURE(rc)) {
+			ACPIDEV_DEBUG(CE_NOTE,
+			    "!acpidev: failed to walk eject device list in "
+			    "acpidev_dr_device_walk_child().");
+		}
+	}
+
+	return (rc);
+}
+
+ACPI_STATUS
+acpidev_dr_device_walk_device(ACPI_HANDLE hdl, uint_t max_lvl,
+    ACPI_WALK_CALLBACK cb, void *arg, void **retval)
+{
+	ACPI_STATUS rc = AE_OK;
+	char *objname;
+
+	ASSERT(hdl != NULL);
+	ASSERT(cb != NULL);
+	if (hdl == NULL || cb == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter to "
+		    "acpidev_dr_walk_device().");
+		return (AE_BAD_PARAMETER);
+	}
+
+	/* Walk the top object itself first. */
+	rc = (*cb)(hdl, 0, arg, retval);
+	if (rc == AE_CTRL_DEPTH || rc == AE_CTRL_TERMINATE) {
+		rc = AE_OK;
+	} else if (ACPI_FAILURE(rc)) {
+		objname = acpidev_get_object_name(hdl);
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to handle top node %s "
+		    "in acpidev_dr_walk_device().", objname);
+		acpidev_free_object_name(objname);
+	} else {
+		rc = acpidev_dr_device_walk_child(hdl, B_TRUE, max_lvl,
+		    cb, arg, retval);
+		if (ACPI_FAILURE(rc)) {
+			objname = acpidev_get_object_name(hdl);
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: failed to handle descendant nodes of %s "
+			    "in acpidev_dr_walk_device().", objname);
+			acpidev_free_object_name(objname);
+		}
+	}
+
+	return (rc);
+}
+
+static ACPI_STATUS
+acpidev_dr_no_support(ACPI_HANDLE hdl, UINT32 lvl, void *arg, void **retval)
+{
+	_NOTE(ARGUNUSED(arg, retval));
+
+	char *objname;
+
+	ASSERT(hdl != NULL);
+
+	objname = acpidev_get_object_name(hdl);
+	ACPIDEV_DEBUG(CE_NOTE,
+	    "!acpidev: device %s at level 0x%x is unsupported.",
+	    objname, lvl);
+	acpidev_free_object_name(objname);
+
+	return (AE_SUPPORT);
+}
+
+static ACPI_STATUS
+acpidev_dr_set_prop(ACPI_HANDLE hdl, char *objname,
+    struct acpidev_dr_set_prop_arg *ap, uint32_t lvl,
+    acpidev_class_id_t clsid, uint_t *devid)
+{
+	acpidev_data_handle_t dhdl;
+
+	/* Create data handle first if it doesn't exist yet. */
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		uint32_t rlvl;
+		ACPI_HANDLE phdl;
+
+		/*
+		 * Compute level by walking ACPI namespace if it's a device
+		 * from the eject device list.
+		 */
+		if (lvl == UINT32_MAX) {
+			/*
+			 * AcpiGetParent() fails when it tries to get
+			 * the parent of the ACPI namespace root node.
+			 */
+			for (rlvl = 0, phdl = hdl;
+			    ACPI_SUCCESS(AcpiGetParent(phdl, &phdl));
+			    rlvl++) {
+				if (phdl == ACPI_ROOT_OBJECT) {
+					break;
+				}
+			}
+			if (rlvl == 0) {
+				ACPIDEV_DEBUG(CE_WARN,
+				    "!acpidev: failed to get level of %s.",
+				    objname);
+				return (AE_BAD_PARAMETER);
+			}
+		} else {
+			rlvl = ap->level;
+		}
+		if (rlvl >= ACPIDEV_MAX_ENUM_LEVELS) {
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: recursive level of %s is too deep.",
+			    objname);
+			return (AE_SUPPORT);
+		}
+
+		dhdl = acpidev_data_create_handle(hdl);
+		if (dhdl != NULL) {
+			dhdl->aod_hdl = hdl;
+			dhdl->aod_level = rlvl;
+		}
+	}
+
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create data handle "
+		    "for device %s.", objname);
+		return (AE_NO_MEMORY);
+	}
+
+	if (ACPIDEV_DR_IS_READY(dhdl)) {
+		/*
+		 * The same device may be enumerated twice at most. Once as
+		 * child devices, another time from the eject device list.
+		 */
+		if (dhdl->aod_bdnum == ap->bdnum) {
+			return (AE_OK);
+		} else {
+			/*
+			 * A device has been enumerated more than once from
+			 * different paths. It's dangerous to support such
+			 * a topology. Disable support of DR operations.
+			 */
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: device %s has been "
+			    "enumerated more than once for DR.", objname);
+			acpidev_dr_failed = 1;
+			return (AE_SUPPORT);
+		}
+	}
+
+	/* Set properties for DR operations. */
+	dhdl->aod_class_id = clsid;
+	dhdl->aod_bdnum = ap->bdnum;
+	dhdl->aod_portid = atomic_inc_32_nv(devid) - 1;
+	if (clsid == ACPIDEV_CLASS_ID_MEMORY) {
+		dhdl->aod_memidx = acpidev_dr_memory_device_cnt;
+		ASSERT(dhdl->aod_memidx < ACPI_MEMNODE_DEVID_BOOT);
+	}
+	ACPIDEV_DR_SET_READY(dhdl);
+
+	return (AE_OK);
+}
+
+/*
+ * Verify whether the hardware topology is supported by the DR driver.
+ * The ACPI specification is so flexible that for safety reasons, only
+ * a few well defined topologies are supported.
+ * Possible values of parameter lvl:
+ * 0:		the device is the board itself.
+ * UINT32_MAX:	the device is from the _EDL list of the board.
+ * other:	the device is a descendant of the board.
+ * Return values:
+ * AE_OK: the topology is supported
+ * AE_SUPPORT: the topology is unsupported
+ * AE_ERROR: other errors
+ */
+static ACPI_STATUS
+acpidev_dr_scan_topo(ACPI_HANDLE hdl, UINT32 lvl, void *arg, void **retval)
+{
+	_NOTE(ARGUNUSED(retval));
+
+	ACPI_STATUS rc = AE_OK;
+	char *objname;
+	acpidev_class_id_t cid;
+	struct acpidev_dr_set_prop_arg *ap = arg;
+
+	ASSERT(hdl != NULL);
+	ASSERT(lvl == 0 || lvl == 1 || lvl == UINT32_MAX);
+	objname = acpidev_get_object_name(hdl);
+
+	/*
+	 * Validate descendants of the hotplug capable board.
+	 * lvl is zero if it's the hotplug capable board itself, otherwise
+	 * non-zero for descendants.
+	 */
+	if (lvl != 0) {
+		/*
+		 * Skip subtree if the device is hotplug capable.
+		 * It will be treated as another hotplug capable board.
+		 */
+		if (acpidev_dr_device_hotplug_capable(hdl)) {
+			acpidev_free_object_name(objname);
+			return (AE_CTRL_DEPTH);
+		}
+
+		/*
+		 * Don't support the _EDL list of a non-hotplug-capable device.
+		 */
+		if (acpidev_dr_device_has_edl(hdl)) {
+			ACPIDEV_DEBUG(CE_NOTE, "!acpidev: non-hotplug-capable "
+			    "object %s has _EDL method.", objname);
+			acpidev_free_object_name(objname);
+			return (AE_SUPPORT);
+		}
+	}
+
+	cid = acpidev_dr_device_get_class(hdl);
+	switch (cid) {
+	case ACPIDEV_CLASS_ID_CPU:
+		/* Don't support logical CPUs in the _EDL list. */
+		if (lvl == UINT32_MAX) {
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: logical CPU %s in "
+			    "_EDL is unsupported.", objname);
+			rc = AE_SUPPORT;
+			break;
+		}
+
+		/* Don't support logical CPUs with children. */
+		ap->level++;
+		rc = acpidev_dr_device_walk_child(hdl, B_TRUE, 1,
+		    acpidev_dr_no_support, arg, NULL);
+		ap->level--;
+		if (rc == AE_SUPPORT) {
+			ACPIDEV_DEBUG(CE_NOTE, "!acpidev: logical CPU %s has "
+			    "child or dependent devices.", objname);
+			break;
+		} else if (ACPI_FAILURE(rc)) {
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to scan "
+			    "children of logical CPU %s.", objname);
+			rc = AE_ERROR;
+			break;
+		} else if (ap != NULL) {
+			rc = acpidev_dr_set_prop(hdl, objname, ap, lvl,
+			    ACPIDEV_CLASS_ID_CPU, &ap->cpu_id);
+		}
+		break;
+
+	case ACPIDEV_CLASS_ID_MEMORY:
+		/* Don't support memory devices with children. */
+		ap->level++;
+		rc = acpidev_dr_device_walk_child(hdl, B_TRUE, 1,
+		    acpidev_dr_no_support, arg, NULL);
+		ap->level--;
+		if (rc == AE_SUPPORT) {
+			ACPIDEV_DEBUG(CE_NOTE,
+			    "!acpidev: memory device %s has child or "
+			    "dependent devices.", objname);
+		} else if (ACPI_FAILURE(rc)) {
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: failed to scan children of "
+			    "memory device %s.", objname);
+			rc = AE_ERROR;
+		} else if (ap != NULL) {
+			acpidev_dr_memory_device_cnt++;
+			rc = acpidev_dr_set_prop(hdl, objname, ap, lvl,
+			    ACPIDEV_CLASS_ID_MEMORY, &ap->mem_id);
+		}
+		break;
+
+	case ACPIDEV_CLASS_ID_PCI:
+	case ACPIDEV_CLASS_ID_PCIEX:
+		/* Don't scan child/descendant devices of PCI/PCIex devices. */
+		if (ap != NULL) {
+			rc = acpidev_dr_set_prop(hdl, objname, ap, lvl,
+			    cid, &ap->io_id);
+		}
+		break;
+
+	case ACPIDEV_CLASS_ID_CONTAINER:
+		/* Don't support module devices in the _EDL list. */
+		if (lvl == UINT32_MAX) {
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: module device %s in "
+			    "_EDL is unsupported.", objname);
+			rc = AE_SUPPORT;
+			break;
+		}
+
+		/* Don't support recurrence of module devices. */
+		if (lvl > 0) {
+			ACPIDEV_DEBUG(CE_NOTE, "!acpidev: recursion level of "
+			    "module device %s is too deep.", objname);
+			rc = AE_SUPPORT;
+			break;
+		}
+
+		ap->level++;
+		rc = acpidev_dr_device_walk_child(hdl, B_TRUE, 1,
+		    acpidev_dr_scan_topo, arg, NULL);
+		ap->level--;
+		if (ACPI_SUCCESS(rc) && ap != NULL) {
+			rc = acpidev_dr_set_prop(hdl, objname, ap, lvl,
+			    ACPIDEV_CLASS_ID_CONTAINER, &ap->mod_id);
+		}
+		break;
+
+	case ACPIDEV_CLASS_ID_INVALID:
+		/*FALLTHROUGH*/
+	default:
+		ACPIDEV_DEBUG(CE_NOTE,
+		    "!acpidev: device %s is unsupported.", objname);
+		rc = AE_SUPPORT;
+		break;
+	}
+
+	acpidev_free_object_name(objname);
+
+	return (rc);
+}
+
+/* Create walk information structures. */
+static ACPI_STATUS
+acpidev_dr_create_walk_info(ACPI_HANDLE hdl, acpidev_data_handle_t dhdl,
+    char *objname, acpidev_walk_info_t **infopp, acpidev_walk_info_t **cinfopp)
+{
+	ACPI_HANDLE phdl = NULL;
+	dev_info_t *pdip = NULL;
+	acpidev_data_handle_t pdhdl, tdhdl;
+	acpidev_walk_info_t *infop = NULL, *cinfop = NULL;
+
+	ASSERT(hdl != NULL);
+	ASSERT(dhdl != NULL);
+	ASSERT(dhdl->aod_class_list != NULL);
+	ASSERT(objname != NULL);
+	ASSERT(infopp != NULL);
+	ASSERT(cinfopp != NULL);
+
+	if (ACPI_FAILURE(AcpiGetParent(hdl, &phdl))) {
+		cmn_err(CE_WARN,
+		    "!acpidev: failed to get parent object of %s.", objname);
+		return (AE_ERROR);
+	}
+
+	pdhdl = acpidev_data_get_handle(phdl);
+	if (pdhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get data "
+		    "associated with parent of %s.", objname);
+		return (AE_ERROR);
+	}
+	if (pdhdl->aod_level >= ACPIDEV_MAX_ENUM_LEVELS - 1) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: recursion level (%d) of %s is too deep.",
+		    pdhdl->aod_level, objname);
+		return (AE_ERROR);
+	}
+	ASSERT(pdhdl->aod_class_list != NULL);
+	if (pdhdl->aod_class_list == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: class list for parent of %s is NULL.", objname);
+		return (AE_ERROR);
+	}
+
+	/* Allocate a walk info structure for its parent. */
+	infop = acpidev_alloc_walk_info(ACPIDEV_OP_HOTPLUG_PROBE,
+	    pdhdl->aod_level, phdl, dhdl->aod_class_list, NULL);
+	if (infop == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to allocate walk info "
+		    "structure for parent of %s.", objname);
+		return (AE_ERROR);
+	}
+
+	/* Get the parent dip if it's not ready yet. */
+	while (infop->awi_dip == NULL) {
+		if (ACPI_FAILURE(AcpiGetParent(phdl, &phdl))) {
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: failed to get parent of object %p.",
+			    phdl);
+			break;
+		}
+		tdhdl = acpidev_data_get_handle(phdl);
+		if (tdhdl == NULL) {
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get data "
+			    "associated with object %p.", phdl);
+			break;
+		}
+		pdip = acpidev_data_get_devinfo(tdhdl);
+		if (pdip != NULL) {
+			infop->awi_dip = pdip;
+			break;
+		}
+		/* Give up if reaches the ACPI namespace root node. */
+		if (phdl == ACPI_ROOT_OBJECT) {
+			break;
+		}
+	}
+	if (infop->awi_dip == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get parent dip of %s.", objname);
+		acpidev_free_walk_info(infop);
+		return (AE_ERROR);
+	}
+
+	/* Allocate a walk info for the child. */
+	cinfop = acpidev_alloc_walk_info(ACPIDEV_OP_HOTPLUG_PROBE,
+	    infop->awi_level + 1, hdl, NULL, infop);
+	if (cinfop == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to allocate walk info "
+		    "structure for %s.", objname);
+		acpidev_free_walk_info(infop);
+		return (AE_ERROR);
+	}
+
+	*infopp = infop;
+	*cinfopp = cinfop;
+
+	return (AE_OK);
+}
+
+static ACPI_STATUS
+acpidev_dr_probe_object(ACPI_HANDLE hdl, acpidev_data_handle_t dhdl)
+{
+	ACPI_STATUS rc = AE_OK;
+	int circ;
+	char *objname;
+	dev_info_t *pdip;
+	ACPI_STATUS res;
+	ACPI_OBJECT_TYPE type;
+	acpidev_class_list_t *it;
+	acpidev_walk_info_t *infop, *cinfop;
+
+	ASSERT(hdl != NULL);
+	ASSERT(dhdl != NULL);
+	if (hdl == NULL || dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: hdl or dhdl is NULL in "
+		    "acpidev_dr_probe_object().");
+		return (AE_BAD_PARAMETER);
+	}
+	objname = acpidev_get_object_name(hdl);
+
+	/* Check whether the device is of interest. */
+	if (ACPI_FAILURE(AcpiGetType(hdl, &type)) ||
+	    type > ACPI_TYPE_NS_NODE_MAX ||
+	    BT_TEST(acpidev_object_type_mask, type) == 0) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: ACPI object %s is unsupported.", objname);
+		acpidev_free_object_name(objname);
+		return (AE_SUPPORT);
+	}
+
+	if (dhdl->aod_class_list == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: class list is NULL in data associated with %s.",
+		    objname);
+		acpidev_free_object_name(objname);
+		return (AE_ERROR);
+	}
+
+	pdip = NULL;
+	infop = NULL;
+	cinfop = NULL;
+	rc = acpidev_dr_create_walk_info(hdl, dhdl, objname, &infop, &cinfop);
+	if (ACPI_FAILURE(rc)) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to create walk info structures for %s.",
+		    objname);
+		acpidev_free_object_name(objname);
+		return (rc);
+	}
+	ASSERT(infop != NULL);
+	ASSERT(infop->awi_dip != NULL);
+	ASSERT(infop->awi_class_list != NULL);
+	ASSERT(cinfop != NULL);
+	ASSERT(cinfop->awi_data == dhdl);
+
+	/* Lock the parent dip before touching children. */
+	pdip = infop->awi_dip;
+	ndi_devi_enter(pdip, &circ);
+	rw_enter(&acpidev_class_lock, RW_READER);
+
+	/* Call pre-probe callback functions to prepare for probing. */
+	for (it = *(infop->awi_class_list); it != NULL; it = it->acl_next) {
+		if (it->acl_class->adc_pre_probe == NULL) {
+			continue;
+		}
+		infop->awi_class_curr = it->acl_class;
+		if (ACPI_FAILURE(it->acl_class->adc_pre_probe(infop))) {
+			ACPIDEV_DEBUG(CE_NOTE, "!acpidev: failed to pre-probe "
+			    "device of type %s under %s.",
+			    it->acl_class->adc_class_name, infop->awi_name);
+		}
+	}
+
+	/* Call registered probe callback functions to probe devices. */
+	for (it = *(infop->awi_class_list); it != NULL; it = it->acl_next) {
+		if (it->acl_class->adc_probe == NULL) {
+			continue;
+		}
+		cinfop->awi_class_curr = it->acl_class;
+		res = it->acl_class->adc_probe(cinfop);
+		if (ACPI_FAILURE(res)) {
+			rc = res;
+			ACPIDEV_DEBUG(CE_NOTE,
+			    "!acpidev: failed to process object %s under %s.",
+			    objname, infop->awi_name);
+		}
+	}
+
+	/* Call post-probe callback functions to clean up. */
+	for (it = *(infop->awi_class_list); it != NULL; it = it->acl_next) {
+		if (it->acl_class->adc_post_probe == NULL) {
+			continue;
+		}
+		infop->awi_class_curr = it->acl_class;
+		if (ACPI_FAILURE(it->acl_class->adc_post_probe(infop))) {
+			ACPIDEV_DEBUG(CE_NOTE, "!acpidev: failed to post-probe "
+			    "device of type %s under %s.",
+			    it->acl_class->adc_class_name, infop->awi_name);
+		}
+	}
+
+	rw_exit(&acpidev_class_lock);
+	ndi_devi_exit(pdip, circ);
+
+	acpidev_free_walk_info(cinfop);
+	acpidev_free_walk_info(infop);
+	acpidev_free_object_name(objname);
+
+	return (rc);
+}
+
+/*
+ * Some PCI/PCIex buses embedded in physical processors may be presented in
+ * the eject device list instead of being presented as child devices.
+ * This function figures out such devices and create device nodes for them.
+ */
+static ACPI_STATUS
+acpidev_dr_probe_dependent(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+    void **retval)
+{
+	_NOTE(ARGUNUSED(retval));
+
+	ACPI_STATUS rc = AE_OK;
+	int status;
+	char *objname;
+	ACPI_HANDLE phdl, thdl;
+	acpidev_data_handle_t dhdl;
+
+	ASSERT(lvl == UINT32_MAX);
+	ASSERT(hdl != NULL);
+	ASSERT(ctx != NULL);
+	phdl = ctx;
+	objname = acpidev_get_object_name(hdl);
+
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get data associated with %s.",
+		    objname);
+		acpidev_free_object_name(objname);
+		return (AE_ERROR);
+	}
+
+	/*
+	 * It should be treated as another board if device is hotplug capable.
+	 */
+	if (ACPIDEV_DR_IS_BOARD(dhdl)) {
+		acpidev_free_object_name(objname);
+		return (AE_OK);
+	} else if (!ACPIDEV_DR_IS_WORKING(dhdl)) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: %s is unusable for DR operations.", objname);
+		acpidev_free_object_name(objname);
+		return (AE_SUPPORT);
+	}
+
+	/*
+	 * Skip hdl if it's a descendant of phdl because it should have
+	 * already been handled when handling phdl itself.
+	 */
+	for (thdl = hdl; ACPI_SUCCESS(AcpiGetParent(thdl, &thdl)); ) {
+		/* Return when reaches the phdl. */
+		if (thdl == phdl) {
+			acpidev_free_object_name(objname);
+			return (AE_OK);
+		}
+		/* Break out when reaches the ACPI namespace root node. */
+		if (thdl == ACPI_ROOT_OBJECT) {
+			break;
+		}
+	}
+
+	/*
+	 * No support of enumerating PCI/PCIex Host Bridge devices yet.
+	 * It will be enabled when PCI/PCIex Host Bridge hotplug is ready.
+	 */
+	if (dhdl->aod_class_id == ACPIDEV_CLASS_ID_PCI ||
+	    dhdl->aod_class_id == ACPIDEV_CLASS_ID_PCIEX) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: PCI/PCIEX host bridge %s is "
+		    "unsupported, skip it.", objname);
+		acpidev_free_object_name(objname);
+		return (AE_OK);
+	}
+
+	/* Check whether the device exists and has been enabled. */
+	status = acpidev_query_device_status(hdl);
+	if (!acpidev_check_device_enabled(status)) {
+		ACPIDEV_DEBUG(CE_NOTE, "!acpidev: object %s is disabled/absent "
+		    "when trying to connect it.", objname);
+		acpidev_free_object_name(objname);
+		return (AE_OK);
+	}
+
+	/* Probe the device and its children. */
+	rc = acpidev_dr_probe_object(hdl, dhdl);
+	if (ACPI_FAILURE(rc)) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to probe object %s in eject device list.",
+		    objname);
+		return (rc);
+	}
+
+	return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_device_insert(ACPI_HANDLE hdl)
+{
+	ACPI_STATUS rc = AE_OK;
+	int status, circ;
+	char *objname;
+	dev_info_t *dip;
+	acpidev_data_handle_t dhdl;
+
+	ASSERT(acpidev_root_node() != NULL);
+	ASSERT(hdl != NULL);
+	if (hdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter hdl to "
+		    "acpidev_dr_insert_insert() is NULL.");
+		return (AE_BAD_PARAMETER);
+	}
+
+	objname = acpidev_get_object_name(hdl);
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get data handle associated with %s.",
+		    objname);
+		acpidev_free_object_name(objname);
+		return (AE_ERROR);
+	}
+
+	/* Validate that the object is hotplug capable. */
+	if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: object %s is not hotplug capable.", objname);
+		acpidev_free_object_name(objname);
+		return (AE_SUPPORT);
+	} else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %s is in the FAILED "
+		    "state, unusable for DR.", objname);
+		acpidev_free_object_name(objname);
+		return (AE_ERROR);
+	}
+
+	/* Check whether the device exists and has been enabled. */
+	status = acpidev_query_device_status(hdl);
+	if (!acpidev_check_device_enabled(status)) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %s is disabled/absent "
+		    "when trying to connect it.", objname);
+		acpidev_free_object_name(objname);
+		return (AE_NOT_EXIST);
+	}
+
+	/* Check that there's no device node created for object yet. */
+	dip = acpidev_data_get_devinfo(dhdl);
+	if (dip != NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: device node for object %s "
+		    "already exists when trying to connect it.", objname);
+		acpidev_free_object_name(objname);
+		return (AE_ALREADY_EXISTS);
+	}
+
+	/*
+	 * Solaris has a limitation that all device nodes for PCI/PCIex host
+	 * bridges must exist directly under /devices.
+	 * Special care is needed here to deal with hot-adding PCI/PCIex host
+	 * bridges to avoid dead lock caused by ndi_devi_enter().
+	 * Here the lock on ddi_root_node() is held first, which will break
+	 * the dead lock loop.
+	 */
+	ndi_devi_enter(ddi_root_node(), &circ);
+
+	rc = acpidev_dr_probe_object(hdl, dhdl);
+	if (ACPI_SUCCESS(rc)) {
+		rc = acpidev_dr_device_walk_edl(hdl,
+		    &acpidev_dr_probe_dependent, hdl, NULL);
+	}
+	if (ACPI_FAILURE(rc)) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create device "
+		    "nodes for children of %s.", objname);
+		cmn_err(CE_WARN, "!acpidev: disable DR support for object %s "
+		    "due to failure when creating device nodes for it.",
+		    objname);
+		ACPIDEV_DR_SET_FAILED(dhdl);
+	}
+
+	ndi_devi_exit(ddi_root_node(), circ);
+	acpidev_free_object_name(objname);
+
+	return (rc);
+}
+
+static ACPI_STATUS
+acpidev_dr_device_remove_cb(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+    void **retval)
+{
+	_NOTE(ARGUNUSED(lvl));
+
+	ACPI_STATUS rc = AE_OK;
+	int status;
+	char *objname;
+	dev_info_t *dip;
+	acpidev_data_handle_t dhdl;
+	struct acpidev_dr_device_remove_arg *argp;
+
+	ASSERT(hdl != NULL && ctx != NULL);
+	if (hdl == NULL || ctx == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter to "
+		    "acpidev_dr_device_remove_cb() is NULL.");
+		return (AE_BAD_PARAMETER);
+	}
+
+	argp = (struct acpidev_dr_device_remove_arg *)ctx;
+	objname = acpidev_get_object_name(hdl);
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get data handle associated with %s.",
+		    objname);
+		acpidev_free_object_name(objname);
+		return (AE_ERROR);
+	}
+
+	/* Validate that the object is hotplug capable. */
+	/* It's the hotplug capable board itself if level is zero. */
+	if (argp->level == 0) {
+		if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: object %s is not hotplug capable.",
+			    objname);
+			acpidev_free_object_name(objname);
+			return (AE_SUPPORT);
+		} else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: object %s is unusable for DR.", objname);
+			acpidev_free_object_name(objname);
+			return (AE_SUPPORT);
+		}
+	} else {
+		/* It's a device under the hotplug capable board. */
+		/*
+		 * Skip it if device itself is hotplug capable.
+		 * It will be treated as another hotplug capable board.
+		 */
+		if (ACPIDEV_DR_IS_BOARD(dhdl)) {
+			acpidev_free_object_name(objname);
+			return (AE_OK);
+		}
+
+		if (!ACPIDEV_DR_IS_READY(dhdl)) {
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: object %s is not hotplug capable.",
+			    objname);
+			acpidev_free_object_name(objname);
+			return (AE_SUPPORT);
+		} else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: object %s is unusable for DR.", objname);
+			acpidev_free_object_name(objname);
+			return (AE_SUPPORT);
+		}
+	}
+
+	/* Skip the device if it hasn't been enabled at all. */
+	status = acpidev_data_get_status(dhdl);
+	if (!acpidev_check_device_enabled(status)) {
+		acpidev_free_object_name(objname);
+		return (AE_OK);
+	}
+
+	dip = acpidev_data_get_devinfo(dhdl);
+	if (dip == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get dev_info associated with %s.",
+		    objname);
+		acpidev_free_object_name(objname);
+		return (AE_SUPPORT);
+	}
+
+	/* For safety, only handle supported device types when unconfiguring. */
+	switch (dhdl->aod_class_id) {
+	case ACPIDEV_CLASS_ID_CONTAINER:
+		/*FALLTHROUGH*/
+	case ACPIDEV_CLASS_ID_CPU:
+		/*FALLTHROUGH*/
+	case ACPIDEV_CLASS_ID_MEMORY:
+		break;
+
+	default:
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %s (type %d) doesn't "
+		    "support unconfiguration.", objname, dhdl->aod_class_id);
+		acpidev_free_object_name(objname);
+		return (AE_SUPPORT);
+	}
+
+	/* Destroy descendants first. */
+	argp->level++;
+	rc = acpidev_dr_device_walk_child(hdl, B_FALSE, 1,
+	    acpidev_dr_device_remove_cb, ctx, retval);
+	argp->level--;
+	if (ACPI_FAILURE(rc)) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to destroy descendants of %s.", objname);
+		acpidev_free_object_name(objname);
+		return (rc);
+	}
+
+	/* Untag dip and ACPI object before destroying the dip. */
+	if ((dhdl->aod_iflag & ACPIDEV_ODF_DEVINFO_TAGGED) &&
+	    ACPI_FAILURE(acpica_untag_devinfo(dip, hdl))) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to untag object %s.", objname);
+		/* Mark the node as unusable. */
+		ACPIDEV_DR_SET_FAILED(dhdl);
+		acpidev_free_object_name(objname);
+		return (AE_ERROR);
+	}
+
+	/* Destroy the node itself. */
+	if (e_ddi_branch_destroy(dip, NULL, 0) != 0) {
+		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+		if ((dhdl->aod_iflag & ACPIDEV_ODF_DEVINFO_TAGGED) &&
+		    ACPI_FAILURE(acpica_tag_devinfo(dip, hdl))) {
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: failed to retag object %s.", objname);
+		}
+
+		/* Mark the node as unusable. */
+		ACPIDEV_DR_SET_FAILED(dhdl);
+
+		(void) ddi_pathname(dip, path);
+		cmn_err(CE_WARN,
+		    "acpidev: failed to remove node %s (%s).", path, objname);
+		kmem_free(path, MAXPATHLEN);
+		acpidev_free_object_name(objname);
+
+		return (AE_ERROR);
+	}
+
+	/* Update status and information associated with the device. */
+	dhdl->aod_dip = NULL;
+	dhdl->aod_iflag &= ~ACPIDEV_ODF_DEVINFO_CREATED;
+	dhdl->aod_iflag &= ~ACPIDEV_ODF_DEVINFO_TAGGED;
+	if (dhdl->aod_class != NULL) {
+		if (dhdl->aod_class->adc_fini != NULL) {
+			(*(dhdl->aod_class->adc_fini))(hdl, dhdl,
+			    dhdl->aod_class);
+		}
+		atomic_dec_32(&(dhdl->aod_class->adc_refcnt));
+		dhdl->aod_class = NULL;
+	}
+	dhdl->aod_iflag &= ~ACPIDEV_ODF_STATUS_VALID;
+	dhdl->aod_status = 0;
+
+	acpidev_free_object_name(objname);
+
+	return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_device_remove(ACPI_HANDLE hdl)
+{
+	ACPI_STATUS rc = AE_OK;
+	int circ;
+	char *objname;
+	acpidev_data_handle_t dhdl;
+	struct acpidev_dr_device_remove_arg arg;
+
+	ASSERT(acpidev_root_node() != NULL);
+	ASSERT(hdl != NULL);
+	if (hdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter hdl to "
+		    "acpidev_dr_device_remove() is NULL.");
+		return (AE_BAD_PARAMETER);
+	}
+
+	objname = acpidev_get_object_name(hdl);
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get data handle associated with %s.",
+		    objname);
+		acpidev_free_object_name(objname);
+		return (AE_ERROR);
+	}
+
+	/* Validate that the device is hotplug capable. */
+	if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: object %s is not hotplug capable.", objname);
+		acpidev_free_object_name(objname);
+		return (AE_SUPPORT);
+	} else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %s is in the FAILED "
+		    "state, unusable for DR.", objname);
+		acpidev_free_object_name(objname);
+		return (AE_ERROR);
+	}
+
+	/*
+	 * Recursively destroy descendants under the top node.
+	 * No need to undo what has been done if error happens, it will be
+	 * handled by DR driver.
+	 */
+	/*
+	 * Lock ddi_root_node() to avoid deadlock.
+	 */
+	ndi_devi_enter(ddi_root_node(), &circ);
+
+	arg.level = 0;
+	rc = acpidev_dr_device_remove_cb(hdl, 0, &arg, NULL);
+	ASSERT(arg.level == 0);
+	if (ACPI_FAILURE(rc)) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to destroy device "
+		    "nodes for children of %s.", objname);
+		cmn_err(CE_WARN, "!acpidev: disable DR support for object %s "
+		    "due to failure when destroying device nodes for it.",
+		    objname);
+		ACPIDEV_DR_SET_FAILED(dhdl);
+	}
+
+	ndi_devi_exit(ddi_root_node(), circ);
+	acpidev_free_object_name(objname);
+
+	return (rc);
+}
+
+ACPI_STATUS
+acpidev_dr_device_poweron(ACPI_HANDLE hdl)
+{
+	acpidev_data_handle_t dhdl;
+
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get data handle associated with %p.",
+		    hdl);
+		return (AE_ERROR);
+	}
+
+	/* Check whether the device is hotplug capable. */
+	if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: object %p is not hotplug capable.", hdl);
+		return (AE_SUPPORT);
+	} else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is in the FAILED "
+		    "state, unusable for DR.", hdl);
+		return (AE_ERROR);
+	}
+
+	return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_device_poweroff(ACPI_HANDLE hdl)
+{
+	ACPI_STATUS rc;
+	acpidev_data_handle_t dhdl;
+
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get data handle associated with %p.",
+		    hdl);
+		return (AE_ERROR);
+	}
+
+	/* Check whether the device is hotplug capable. */
+	if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: object %p is not hotplug capable.", hdl);
+		return (AE_SUPPORT);
+	} else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is in the FAILED "
+		    "state, unusable for DR.", hdl);
+		return (AE_ERROR);
+	}
+
+	rc = acpidev_eval_ej0(hdl);
+	if (ACPI_FAILURE(rc)) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to evaluate _EJ0 for object %p.", hdl);
+	}
+
+	return (rc);
+}
+
+ACPI_STATUS
+acpidev_dr_device_check_status(ACPI_HANDLE hdl)
+{
+	acpidev_data_handle_t dhdl;
+
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get data handle associated with %p.",
+		    hdl);
+		return (AE_ERROR);
+	}
+
+	/* Check whether the device is hotplug capable. */
+	if (!ACPIDEV_DR_BOARD_READY(dhdl)) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: object %p is not hotplug capable.", hdl);
+		return (AE_SUPPORT);
+	} else if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: object %p is in the FAILED "
+		    "state, unusable for DR.", hdl);
+		return (AE_ERROR);
+	}
+
+	return (AE_OK);
+}
+
+void
+acpidev_dr_lock_all(void)
+{
+	mutex_enter(&acpidev_dr_lock);
+}
+
+void
+acpidev_dr_unlock_all(void)
+{
+	mutex_exit(&acpidev_dr_lock);
+}
+
+ACPI_STATUS
+acpidev_dr_allocate_cpuid(ACPI_HANDLE hdl, processorid_t *idp)
+{
+	int rv;
+	processorid_t cpuid;
+	uint32_t procid, apicid;
+	mach_cpu_add_arg_t arg;
+	acpidev_data_handle_t dhdl;
+	dev_info_t *dip = NULL;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+	ASSERT(hdl != NULL);
+	if (hdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter hdl to "
+		    "acpidev_dr_allocate_cpuid() is NULL.");
+		return (AE_BAD_PARAMETER);
+	}
+
+	/* Validate that the device is ready for hotplug. */
+	if (ACPI_FAILURE(acpica_get_devinfo(hdl, &dip))) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get devinfo for object %p.", hdl);
+		return (AE_ERROR);
+	}
+	ASSERT(dip != NULL);
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get data associated with object %p",
+		    hdl);
+		return (AE_SUPPORT);
+	}
+	if (!ACPIDEV_DR_IS_READY(dhdl)) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: dip %p is not hotplug ready.", (void *)dip);
+		return (AE_SUPPORT);
+	}
+	if (ACPIDEV_DR_IS_FAILED(dhdl)) {
+		ACPIDEV_DEBUG(CE_NOTE,
+		    "!acpidev: dip %p is in the FAILED state.", (void *)dip);
+		return (AE_SUPPORT);
+	}
+
+	/* Query CPU relative information */
+	apicid = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+	    DDI_PROP_DONTPASS, ACPIDEV_PROP_NAME_LOCALAPIC_ID, UINT32_MAX);
+	procid = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+	    DDI_PROP_DONTPASS, ACPIDEV_PROP_NAME_PROCESSOR_ID, UINT32_MAX);
+	if (procid == UINT32_MAX || apicid == UINT32_MAX || apicid == 255) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: dip %p is malformed, "
+		    "procid(0x%x) or apicid(0x%x) is invalid.",
+		    (void *)dip, procid, apicid);
+		return (AE_ERROR);
+	}
+
+	/* Check whether the CPU device is in offline state. */
+	mutex_enter(&(DEVI(dip)->devi_lock));
+	if (!DEVI_IS_DEVICE_OFFLINE(dip)) {
+		mutex_exit(&DEVI(dip)->devi_lock);
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: dip %p isn't in offline state.", (void *)dip);
+		return (AE_ERROR);
+	}
+	mutex_exit(&DEVI(dip)->devi_lock);
+
+	/* Check whether the CPU already exists. */
+	if (ACPI_SUCCESS(acpica_get_cpu_id_by_object(hdl, &cpuid))) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: dip %p already has CPU id(%d) assigned.",
+		    (void *)dip, cpuid);
+		return (AE_ALREADY_EXISTS);
+	}
+
+	/* Allocate cpuid for the CPU */
+	arg.arg.apic.apic_id = apicid;
+	arg.arg.apic.proc_id = procid;
+	if (apicid >= 255) {
+		arg.type = MACH_CPU_ARG_LOCAL_X2APIC;
+	} else {
+		arg.type = MACH_CPU_ARG_LOCAL_APIC;
+	}
+	rv = mach_cpu_add(&arg, &cpuid);
+	if (rv != PSM_SUCCESS) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to allocate cpu id for dip %p.",
+		    (void *)dip);
+		return (AE_NOT_EXIST);
+	}
+
+	ASSERT(cpuid >= 0 && cpuid < NCPU && cpuid < max_ncpus);
+	if (idp != NULL) {
+		*idp = cpuid;
+	}
+
+	return (AE_OK);
+}
+
+ACPI_STATUS
+acpidev_dr_free_cpuid(ACPI_HANDLE hdl)
+{
+	ACPI_STATUS rv = AE_OK;
+	processorid_t cpuid;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+	ASSERT(hdl != NULL);
+	if (hdl == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: parameter hdl to "
+		    "acpidev_dr_free_cpuid() is NULL.");
+		return (AE_BAD_PARAMETER);
+	}
+
+	if (ACPI_FAILURE(acpica_get_cpu_id_by_object(hdl, &cpuid))) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to get cpuid for object %p.", hdl);
+		rv = AE_NOT_EXIST;
+	} else if (cpuid < 0 || cpuid > max_ncpus) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: cpuid(%d) of object %p is invalid.",
+		    cpuid, hdl);
+		rv = AE_ERROR;
+	} else if (mach_cpu_remove(cpuid) != PSM_SUCCESS) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to free cpuid(%d) for object %p.",
+		    cpuid, hdl);
+		rv = AE_ERROR;
+	}
+
+	return (rv);
+}
+
+static ACPI_STATUS
+acpidev_dr_get_latency(ACPI_HANDLE hdl, void **hdlpp,
+    uint32_t pxmid, uint32_t *slicntp, uchar_t **slipp)
+{
+	ACPI_STATUS rc;
+	ACPI_BUFFER buf;
+	uint32_t i, pxmcnt;
+	uchar_t *valp, *sp, *ep;
+
+	/* Evaluate the ACPI _SLI method under the object. */
+	buf.Length = ACPI_ALLOCATE_BUFFER;
+	rc = AcpiEvaluateObjectTyped(hdl, ACPIDEV_METHOD_NAME_SLI, NULL, &buf,
+	    ACPI_TYPE_BUFFER);
+	if (ACPI_SUCCESS(rc)) {
+		valp = (uchar_t *)buf.Pointer;
+		if (acpidev_slit_tbl_ptr->LocalityCount > pxmid) {
+			pxmcnt = acpidev_slit_tbl_ptr->LocalityCount;
+		} else {
+			pxmcnt = pxmid + 1;
+		}
+
+		/*
+		 * Validate data returned by the ACPI _SLI method.
+		 * Please refer to 6.2.14 "_SLI (System Locality Information)"
+		 * in ACPI4.0 for data format returned by _SLI method.
+		 */
+		if (buf.Length != pxmcnt * 2 * sizeof (uchar_t)) {
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: buffer length returned by _SLI method "
+			    "under %p is invalid.", hdl);
+			AcpiOsFree(buf.Pointer);
+		} else if (valp[pxmid] != ACPI_SLIT_SELF_LATENCY ||
+		    valp[pxmid + pxmcnt] != ACPI_SLIT_SELF_LATENCY) {
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: local latency returned by _SLI method "
+			    "under %p is not %u.", hdl, ACPI_SLIT_SELF_LATENCY);
+			AcpiOsFree(buf.Pointer);
+		} else {
+			*slicntp = pxmcnt;
+			*slipp = (uchar_t *)buf.Pointer;
+			*hdlpp = buf.Pointer;
+			return (AE_OK);
+		}
+	} else if (rc != AE_NOT_FOUND) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to evaluate "
+		    "_SLI method under object %p.", hdl);
+	}
+
+	/* Return data from the ACPI SLIT table. */
+	ASSERT(acpidev_slit_tbl_ptr != NULL);
+	pxmcnt = acpidev_slit_tbl_ptr->LocalityCount;
+	if (pxmid >= pxmcnt) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: proximity domain id "
+		    "(%u) is too big, max %u.", pxmid, pxmcnt - 1);
+		*slicntp = 0;
+		*slipp = NULL;
+		return (AE_ERROR);
+	} else {
+		sp = AcpiOsAllocate(pxmcnt * 2 * sizeof (uchar_t));
+		ep = acpidev_slit_tbl_ptr->Entry;
+		for (i = 0; i < pxmcnt; i++) {
+			sp[i] = ep[pxmcnt * pxmid + i];
+			sp[i + pxmcnt] = ep[pxmcnt * i + pxmid];
+		}
+		*slicntp = pxmcnt;
+		*slipp = sp;
+		*hdlpp = sp;
+		return (AE_OK);
+	}
+}
+
+/*
+ * Query NUMA information for the CPU device.
+ * It returns APIC id, Proximity id and latency information of the CPU device.
+ */
+int
+acpidev_dr_get_cpu_numa_info(cpu_t *cp, void **hdlpp, uint32_t *apicidp,
+    uint32_t *pxmidp, uint32_t *slicntp, uchar_t **slipp)
+{
+	dev_info_t *dip = NULL;
+	ACPI_HANDLE hdl = NULL;
+
+	ASSERT(cp != NULL);
+	ASSERT(hdlpp != NULL);
+	ASSERT(apicidp != NULL);
+	ASSERT(pxmidp != NULL);
+	if (cp == NULL || hdlpp == NULL || apicidp == NULL || pxmidp == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters to "
+		    "acpidev_dr_get_cpu_numa_info().");
+		return (-1);
+	}
+
+	*hdlpp = NULL;
+	*apicidp = UINT32_MAX;
+	*pxmidp = UINT32_MAX;
+	if (lgrp_plat_node_cnt == 1) {
+		return (-1);
+	}
+	ASSERT(acpidev_slit_tbl_ptr != NULL);
+
+	/* Query APIC id and Proximity id from device properties. */
+	if (ACPI_FAILURE(acpica_get_cpu_object_by_cpuid(cp->cpu_id, &hdl))) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get ACPI object "
+		    "for CPU(%d).", cp->cpu_id);
+		return (-1);
+	}
+	if (ACPI_FAILURE(acpica_get_devinfo(hdl, &dip))) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get device node "
+		    "for CPU(%d).", cp->cpu_id);
+		return (-1);
+	}
+	*apicidp = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+	    ACPIDEV_PROP_NAME_LOCALAPIC_ID, UINT32_MAX);
+	*pxmidp = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+	    ACPIDEV_PROP_NAME_PROXIMITY_ID, UINT32_MAX);
+	if (*apicidp == UINT32_MAX || *pxmidp == UINT32_MAX) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get local APIC id "
+		    "or proximity id for CPU(%d).", cp->cpu_id);
+		return (-1);
+	}
+
+	ASSERT((slicntp && slipp) || (!slicntp && !slipp));
+	if (slicntp != NULL && slipp != NULL) {
+		if (ACPI_FAILURE(acpidev_dr_get_latency(hdl, hdlpp, *pxmidp,
+		    slicntp, slipp))) {
+			return (-1);
+		}
+	}
+
+	return (0);
+}
+
+void
+acpidev_dr_free_cpu_numa_info(void *hdlp)
+{
+	if (hdlp != NULL) {
+		AcpiOsFree(hdlp);
+	}
+}
+
+static ACPI_STATUS
+acpidev_dr_mem_search_srat(struct memlist *ml, uint32_t *pxmidp)
+{
+	int len, off;
+	uint64_t start, end;
+	boolean_t found = B_FALSE;
+	ACPI_SUBTABLE_HEADER *sp;
+	ACPI_SRAT_MEM_AFFINITY *mp;
+
+	ASSERT(ml != NULL);
+	ASSERT(pxmidp != NULL);
+	ASSERT(acpidev_srat_tbl_ptr != NULL);
+
+	/* Search the static ACPI SRAT table for proximity domain. */
+	sp = (ACPI_SUBTABLE_HEADER *)(acpidev_srat_tbl_ptr + 1);
+	len = acpidev_srat_tbl_ptr->Header.Length;
+	off = sizeof (*acpidev_srat_tbl_ptr);
+	while (off < len) {
+		if (sp->Type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) {
+			mp = (ACPI_SRAT_MEM_AFFINITY *)sp;
+			if ((mp->Flags & ACPI_SRAT_MEM_ENABLED) &&
+			    (mp->Flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
+			    ml->ml_address >= mp->BaseAddress &&
+			    ml->ml_address <= mp->BaseAddress + mp->Length) {
+				found = B_TRUE;
+				break;
+			}
+		}
+		off += sp->Length;
+		sp = (ACPI_SUBTABLE_HEADER *)(((char *)sp) + sp->Length);
+	}
+	if (!found)
+		return (AE_NOT_FOUND);
+
+	/*
+	 * Verify that all memory regions in the list belong to the same domain.
+	 */
+	start = mp->BaseAddress;
+	end = mp->BaseAddress + mp->Length;
+	while (ml) {
+		if (ml->ml_address < start ||
+		    ml->ml_address + ml->ml_size > end) {
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: memory for hot-adding doesn't belong "
+			    "to the same proximity domain.");
+			return (AE_ERROR);
+		}
+		ml = ml->ml_next;
+	}
+
+	return (AE_OK);
+}
+
+/*
+ * Query lgrp information for a memory device.
+ * It returns proximity domain id and latency information of the memory device.
+ */
+ACPI_STATUS
+acpidev_dr_get_mem_numa_info(ACPI_HANDLE hdl, struct memlist *ml,
+    void **hdlpp, uint32_t *pxmidp, uint32_t *slicntp, uchar_t **slipp)
+{
+	ASSERT(ml != NULL);
+	ASSERT(hdlpp != NULL);
+	ASSERT(pxmidp != NULL);
+	if (ml == NULL || hdlpp == NULL || pxmidp == NULL) {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters to "
+		    "acpidev_dr_get_mem_numa_info().");
+		return (AE_BAD_PARAMETER);
+	}
+
+	*pxmidp = UINT32_MAX;
+	if (lgrp_plat_node_cnt == 1) {
+		return (AE_SUPPORT);
+	}
+
+	if (ACPI_FAILURE(acpidev_eval_pxm(hdl, pxmidp))) {
+		/*
+		 * Try to get proximity domain id from SRAT table if failed to
+		 * evaluate ACPI _PXM method for memory device.
+		 */
+		if (ACPI_FAILURE(acpidev_dr_mem_search_srat(ml, pxmidp))) {
+			ACPIDEV_DEBUG(CE_WARN,
+			    "!acpidev: failed to get proximity domain id for "
+			    "memory device %p.", hdl);
+			return (AE_ERROR);
+		}
+	}
+
+	ASSERT((slicntp && slipp) || (!slicntp && !slipp));
+	if (slicntp != NULL && slipp != NULL) {
+		if (ACPI_FAILURE(acpidev_dr_get_latency(hdl, hdlpp, *pxmidp,
+		    slicntp, slipp))) {
+			return (AE_ERROR);
+		}
+	}
+
+	return (AE_OK);
+}
+
+void
+acpidev_dr_free_mem_numa_info(void *hdlp)
+{
+	if (hdlp != NULL) {
+		AcpiOsFree(hdlp);
+	}
+}
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c
index eee5d84715..2c0dab936b 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_drv.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -60,19 +60,23 @@
 #include <sys/errno.h>
 #include <sys/modctl.h>
 #include <sys/mutex.h>
+#include <sys/note.h>
 #include <sys/obpdefs.h>
 #include <sys/sunddi.h>
 #include <sys/sunndi.h>
 #include <sys/acpi/acpi.h>
 #include <sys/acpica.h>
 #include <sys/acpidev.h>
+#include <sys/acpidev_dr.h>
 #include <sys/acpidev_impl.h>
 
 /* Patchable through /etc/system */
 int acpidev_options = 0;
 int acpidev_debug = 0;
 
+krwlock_t acpidev_class_lock;
 acpidev_class_list_t *acpidev_class_list_root = NULL;
+ulong_t acpidev_object_type_mask[BT_BITOUL(ACPI_TYPE_NS_NODE_MAX + 1)];
 
 /* ACPI device autoconfig global status */
 typedef enum acpidev_status {
@@ -86,9 +90,7 @@ typedef enum acpidev_status {
 
 static acpidev_status_t acpidev_status = ACPIDEV_STATUS_UNKNOWN;
 static kmutex_t	acpidev_drv_lock;
-static krwlock_t acpidev_class_lock;
 static dev_info_t *acpidev_root_dip = NULL;
-static ulong_t acpidev_object_type_mask[BT_BITOUL(ACPI_TYPE_NS_NODE_MAX + 1)];
 
 /* Boot time ACPI device enumerator. */
 static void acpidev_boot_probe(int type);
@@ -117,6 +119,7 @@ _init(void)
 		    sizeof (acpidev_object_type_mask));
 		mutex_init(&acpidev_drv_lock, NULL, MUTEX_DRIVER, NULL);
 		rw_init(&acpidev_class_lock, NULL, RW_DEFAULT, NULL);
+		acpidev_dr_init();
 		impl_bus_add_probe(acpidev_boot_probe);
 	} else {
 		cmn_err(CE_WARN, "!acpidev: failed to install driver.");
@@ -157,6 +160,13 @@ acpidev_class_list_fini(void)
 {
 	acpidev_unload_plat_modules();
 
+	if ((acpidev_options & ACPIDEV_OUSER_NO_PCI) == 0) {
+		(void) acpidev_unregister_class(&acpidev_class_list_scope,
+		    &acpidev_class_pci);
+		(void) acpidev_unregister_class(&acpidev_class_list_device,
+		    &acpidev_class_pci);
+	}
+
 	if ((acpidev_options & ACPIDEV_OUSER_NO_MEM) == 0) {
 		(void) acpidev_unregister_class(&acpidev_class_list_device,
 		    &acpidev_class_memory);
@@ -252,7 +262,7 @@ acpidev_class_list_init(uint64_t *fp)
 		*fp |= ACPI_DEVCFG_CPU;
 	}
 
-	/* Check support for ACPI memory device. */
+	/* Check support of ACPI memory devices. */
 	if ((acpidev_options & ACPIDEV_OUSER_NO_MEM) == 0) {
 		/*
 		 * Register the ACPI memory class driver onto the
@@ -267,10 +277,36 @@ acpidev_class_list_init(uint64_t *fp)
 		*fp |= ACPI_DEVCFG_MEMORY;
 	}
 
+	/* Check support of PCI/PCIex Host Bridge devices. */
+	if ((acpidev_options & ACPIDEV_OUSER_NO_PCI) == 0) {
+		/*
+		 * Register pci/pciex class drivers onto
+		 * the acpidev_class_list_device class list because ACPI
+		 * module class driver uses that list.
+		 */
+		if (ACPI_FAILURE(acpidev_register_class(
+		    &acpidev_class_list_device, &acpidev_class_pci,
+		    B_FALSE))) {
+			goto error_device_pci;
+		}
+
+		/*
+		 * Register pci/pciex class drivers onto the
+		 * acpidev_class_list_scope class list.
+		 */
+		if (ACPI_FAILURE(acpidev_register_class(
+		    &acpidev_class_list_scope, &acpidev_class_pci,
+		    B_FALSE))) {
+			goto error_scope_pci;
+		}
+
+		*fp |= ACPI_DEVCFG_PCI;
+	}
+
 	/* Check blacklist and load platform specific modules. */
 	rc = acpidev_load_plat_modules();
 	if (ACPI_FAILURE(rc)) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to check blacklist "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to check blacklist "
 		    "or load pratform modules.");
 		goto error_plat;
 	}
@@ -278,6 +314,16 @@ acpidev_class_list_init(uint64_t *fp)
 	return (AE_OK);
 
 error_plat:
+	if ((acpidev_options & ACPIDEV_OUSER_NO_PCI) == 0) {
+		(void) acpidev_unregister_class(&acpidev_class_list_scope,
+		    &acpidev_class_pci);
+	}
+error_scope_pci:
+	if ((acpidev_options & ACPIDEV_OUSER_NO_PCI) == 0) {
+		(void) acpidev_unregister_class(&acpidev_class_list_device,
+		    &acpidev_class_pci);
+	}
+error_device_pci:
 	if ((acpidev_options & ACPIDEV_OUSER_NO_MEM) == 0) {
 		(void) acpidev_unregister_class(&acpidev_class_list_device,
 		    &acpidev_class_memory);
@@ -313,7 +359,7 @@ error_root_device:
 	    &acpidev_class_scope);
 error_out:
 	ACPIDEV_DEBUG(CE_WARN,
-	    "acpidev: failed to register built-in class drivers.");
+	    "!acpidev: failed to register built-in class drivers.");
 	*fp = 0;
 
 	return (AE_ERROR);
@@ -353,7 +399,7 @@ acpidev_create_root_node(void)
 	    (pnode_t)DEVI_SID_NODEID, &dip);
 	if (rv != NDI_SUCCESS) {
 		ndi_devi_exit(ddi_root_node(), circ);
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to create device node "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create device node "
 		    "for ACPI root with errcode %d.", rv);
 		return (AE_ERROR);
 	}
@@ -362,7 +408,7 @@ acpidev_create_root_node(void)
 	if (ACPI_FAILURE(acpica_tag_devinfo(dip, ACPI_ROOT_OBJECT))) {
 		(void) ddi_remove_child(dip, 0);
 		ndi_devi_exit(ddi_root_node(), circ);
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to tag object %s.",
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to tag object %s.",
 		    ACPIDEV_OBJECT_NAME_SB);
 		return (AE_ERROR);
 	}
@@ -376,7 +422,7 @@ acpidev_create_root_node(void)
 	}
 	if (rv != DDI_SUCCESS) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: failed to set device property for /devices/%s.",
+		    "!acpidev: failed to set device property for /devices/%s.",
 		    ACPIDEV_NODE_NAME_ROOT);
 		goto error_out;
 	}
@@ -384,7 +430,7 @@ acpidev_create_root_node(void)
 	/* Manually create an object handle for the root node */
 	objhdl = acpidev_data_create_handle(ACPI_ROOT_OBJECT);
 	if (objhdl == NULL) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to create object "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create object "
 		    "handle for the root node.");
 		goto error_out;
 	}
@@ -425,7 +471,7 @@ acpidev_initialize(void)
 		return;
 	} else if (acpidev_status != ACPIDEV_STATUS_UNKNOWN) {
 		ACPIDEV_DEBUG(CE_NOTE,
-		    "acpidev: initialization called more than once.");
+		    "!acpidev: initialization called more than once.");
 		return;
 	}
 
@@ -476,6 +522,18 @@ acpidev_initialize(void)
 	acpidev_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(),
 	    DDI_PROP_DONTPASS, "acpidev-options", acpidev_options);
 
+	/* Check whether ACPI based DR has been disabled by user. */
+	rc = ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
+	    DDI_PROP_DONTPASS, "acpidev-dr", &str);
+	if (rc == DDI_SUCCESS) {
+		if (strcasecmp(str, "off") == 0 || strcasecmp(str, "no") == 0) {
+			cmn_err(CE_CONT, "?acpidev: ACPI based DR has been "
+			    "disabled by user.\n");
+			acpidev_dr_enable = 0;
+		}
+		ddi_prop_free(str);
+	}
+
 	/* Register all device class drivers. */
 	if (ACPI_FAILURE(acpidev_class_list_init(&features))) {
 		cmn_err(CE_WARN,
@@ -518,7 +576,7 @@ acpidev_boot_probe_device(acpidev_op_type_t op_type)
 	infop = acpidev_alloc_walk_info(op_type, 0, ACPI_ROOT_OBJECT,
 	    &acpidev_class_list_root, NULL);
 	if (infop == NULL) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to allocate walk info "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to allocate walk info "
 		    "object in acpi_boot_probe_device().");
 		return (AE_ERROR);
 	}
@@ -558,9 +616,18 @@ acpidev_boot_probe(int type)
 	if (type == 0 && acpidev_status == ACPIDEV_STATUS_INITIALIZED) {
 		rc = acpidev_boot_probe_device(ACPIDEV_OP_BOOT_PROBE);
 		if (ACPI_SUCCESS(rc)) {
+			/*
+			 * Support of DR operations will be disabled
+			 * if failed to initialize DR subsystem.
+			 */
+			rc = acpidev_dr_initialize(acpidev_root_dip);
+			if (ACPI_FAILURE(rc) && rc != AE_SUPPORT) {
+				cmn_err(CE_CONT, "?acpidev: failed to "
+				    "initialize DR subsystem.");
+			}
 			acpidev_status = ACPIDEV_STATUS_FIRST_PASS;
 		} else {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to probe ACPI "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to probe ACPI "
 			    "devices during boot.");
 			acpidev_status = ACPIDEV_STATUS_FAILED;
 		}
@@ -569,7 +636,7 @@ acpidev_boot_probe(int type)
 		if (ACPI_SUCCESS(rc)) {
 			acpidev_status = ACPIDEV_STATUS_READY;
 		} else {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to reprobe "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to reprobe "
 			    "ACPI devices during boot.");
 			acpidev_status = ACPIDEV_STATUS_FAILED;
 		}
@@ -598,12 +665,12 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
 	ASSERT(infop != NULL);
 	if (infop == NULL) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: infop is NULL in acpidev_probe_child().");
+		    "!acpidev: infop is NULL in acpidev_probe_child().");
 		return (AE_BAD_PARAMETER);
 	}
 	ASSERT(infop->awi_level < ACPIDEV_MAX_ENUM_LEVELS - 1);
 	if (infop->awi_level >= ACPIDEV_MAX_ENUM_LEVELS - 1) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: recursive level is too deep "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: recursive level is too deep "
 		    "in acpidev_probe_child().");
 		return (AE_BAD_PARAMETER);
 	}
@@ -615,14 +682,14 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
 	if (infop->awi_class_list == NULL || infop->awi_hdl == NULL ||
 	    infop->awi_info == NULL || infop->awi_name == NULL ||
 	    infop->awi_data == NULL) {
-		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: infop has NULL fields in acpidev_probe_child().");
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: infop has NULL fields in "
+		    "acpidev_probe_child().");
 		return (AE_BAD_PARAMETER);
 	}
 	pdip = acpidev_walk_info_get_pdip(infop);
 	if (pdip == NULL) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: pdip is NULL in acpidev_probe_child().");
+		    "!acpidev: pdip is NULL in acpidev_probe_child().");
 		return (AE_BAD_PARAMETER);
 	}
 
@@ -636,7 +703,7 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
 		}
 		infop->awi_class_curr = it->acl_class;
 		if (ACPI_FAILURE(it->acl_class->adc_pre_probe(infop))) {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to pre-probe "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to pre-probe "
 			    "device of type %s under %s.",
 			    it->acl_class->adc_class_name, infop->awi_name);
 		}
@@ -653,11 +720,17 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
 			continue;
 		}
 
+		/* It's another hotplug-capable board, skip it. */
+		if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE &&
+		    acpidev_dr_device_is_board(child)) {
+			continue;
+		}
+
 		/* Allocate the walk info structure. */
 		cinfop = acpidev_alloc_walk_info(infop->awi_op_type,
 		    infop->awi_level + 1, child, NULL, infop);
 		if (cinfop == NULL) {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to allocate "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to allocate "
 			    "walk info child object of %s.",
 			    infop->awi_name);
 			/* Mark error and continue to handle next child. */
@@ -673,12 +746,6 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
 		datap = cinfop->awi_data;
 		if (cinfop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
 			datap->aod_class_list = infop->awi_class_list;
-		} else if (datap->aod_class_list != infop->awi_class_list) {
-			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: class list for %s has been changed",
-			    infop->awi_name);
-			acpidev_free_walk_info(cinfop);
-			continue;
 		}
 
 		/* Call registered process callbacks. */
@@ -691,7 +758,7 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
 			res = it->acl_class->adc_probe(cinfop);
 			if (ACPI_FAILURE(res)) {
 				rc = res;
-				ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+				ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
 				    "process object of type %s under %s.",
 				    it->acl_class->adc_class_name,
 				    infop->awi_name);
@@ -709,7 +776,7 @@ acpidev_probe_child(acpidev_walk_info_t *infop)
 		}
 		infop->awi_class_curr = it->acl_class;
 		if (ACPI_FAILURE(it->acl_class->adc_post_probe(infop))) {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to post-probe "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to post-probe "
 			    "device of type %s under %s.",
 			    it->acl_class->adc_class_name, infop->awi_name);
 		}
@@ -737,7 +804,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
 	ASSERT(infop != NULL);
 	if (infop == NULL) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: infop is NULL in acpidev_process_object().");
+		    "!acpidev: infop is NULL in acpidev_process_object().");
 		return (AE_BAD_PARAMETER);
 	}
 	ASSERT(infop->awi_hdl != NULL);
@@ -751,13 +818,13 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
 	clsp = infop->awi_class_curr;
 	if (hdl == NULL || datap == NULL || adip == NULL || clsp == NULL ||
 	    clsp->adc_filter == NULL) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: infop has NULL pointer in "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: infop has NULL pointer in "
 		    "acpidev_process_object().");
 		return (AE_BAD_PARAMETER);
 	}
 	pdip = acpidev_walk_info_get_pdip(infop);
 	if (pdip == NULL) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to get pdip for %s "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to get pdip for %s "
 		    "in acpidev_process_object().", infop->awi_name);
 		return (AE_BAD_PARAMETER);
 	}
@@ -782,7 +849,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
 	    (infop->awi_flags & ACPIDEV_WI_DEVICE_CREATED)) {
 		ASSERT(infop->awi_dip != NULL);
 		ACPIDEV_DEBUG(CE_NOTE,
-		    "acpidev: device has already been created for object %s.",
+		    "!acpidev: device has already been created for object %s.",
 		    infop->awi_name);
 		return (AE_ALREADY_EXISTS);
 	}
@@ -793,9 +860,9 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
 	 * 6.3.1 and 6.5.1.
 	 * present functioning enabled	Action
 	 *	0	0	x	Do nothing
-	 *	1	x	0	Create node in OFFLINE and scan child
+	 *	1	x	0	Do nothing
 	 *	1	x	1	Create node and scan child
-	 *	x	1	0	Create node in OFFLINE and scan child
+	 *	x	1	0	Do nothing
 	 *	x	1	1	Create node and scan child
 	 */
 	if ((datap->aod_iflag & ACPIDEV_ODF_STATUS_VALID) == 0 ||
@@ -807,31 +874,34 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
 		}
 		datap->aod_iflag |= ACPIDEV_ODF_STATUS_VALID;
 	}
-	if (!acpidev_check_device_present(datap->aod_status)) {
-		ACPIDEV_DEBUG(CE_NOTE, "acpidev: object %s doesn't exist.",
+	if (!acpidev_check_device_enabled(datap->aod_status)) {
+		ACPIDEV_DEBUG(CE_NOTE, "!acpidev: object %s doesn't exist.",
 		    infop->awi_name);
-		return (AE_NOT_EXIST);
+		/*
+		 * Need to scan for hotplug-capable boards even if object
+		 * doesn't exist or has been disabled during the first pass.
+		 * So just disable creating device node and keep on scanning.
+		 */
+		if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
+			flags &= ~ACPIDEV_PROCESS_FLAG_CREATE;
+		} else {
+			return (AE_NOT_EXIST);
+		}
 	}
 
 	ASSERT(infop->awi_data != NULL);
 	ASSERT(infop->awi_parent != NULL);
 	ASSERT(infop->awi_parent->awi_data != NULL);
-	/* Put device into offline state if parent is in offline state. */
-	if (infop->awi_parent->awi_data->aod_iflag &
-	    ACPIDEV_ODF_DEVINFO_OFFLINE) {
-		flags |= ACPIDEV_PROCESS_FLAG_OFFLINE;
-	/* Put device into offline state if it's disabled. */
-	} else if (!acpidev_check_device_enabled(datap->aod_status)) {
-		flags |= ACPIDEV_PROCESS_FLAG_OFFLINE;
-	}
-	/*
-	 * Mark current node status as OFFLINE even if a device node will not
-	 * be created for it. This is needed to handle the case when the current
-	 * node is SKIPPED (no device node will be created for it), so that all
-	 * descedants of current nodes could be correctly marked as OFFLINE.
-	 */
-	if (flags & ACPIDEV_PROCESS_FLAG_OFFLINE) {
-		infop->awi_data->aod_iflag |= ACPIDEV_ODF_DEVINFO_OFFLINE;
+	if (flags & ACPIDEV_PROCESS_FLAG_CREATE) {
+		mutex_enter(&(DEVI(pdip)->devi_lock));
+		/*
+		 * Put the device into offline state if its parent is in
+		 * offline state.
+		 */
+		if (DEVI_IS_DEVICE_OFFLINE(pdip)) {
+			flags |= ACPIDEV_PROCESS_FLAG_OFFLINE;
+		}
+		mutex_exit(&(DEVI(pdip)->devi_lock));
 	}
 
 	/* Evaluate filtering rules and generate device name. */
@@ -862,7 +932,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
 		    OBP_DEVICETYPE, clsp->adc_dev_type);
 		if (ret != NDI_SUCCESS) {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: failed to set device property for %s.",
+			    "!acpidev: failed to set device property for %s.",
 			    infop->awi_name);
 			(void) ddi_remove_child(dip, 0);
 			infop->awi_dip = NULL;
@@ -886,7 +956,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
 		if (clsp->adc_init != NULL &&
 		    ACPI_FAILURE(clsp->adc_init(infop))) {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: failed to initialize device %s.",
+			    "!acpidev: failed to initialize device %s.",
 			    infop->awi_name);
 			if ((flags & ACPIDEV_PROCESS_FLAG_NOTAG) == 0) {
 				(void) acpica_untag_devinfo(dip, hdl);
@@ -923,6 +993,11 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
 		} else {
 			(void) ndi_devi_bind_driver(dip, 0);
 		}
+
+		/* Hold reference on branch when hot-adding devices. */
+		if (flags & ACPIDEV_PROCESS_FLAG_HOLDBRANCH) {
+			e_ddi_branch_hold(dip);
+		}
 	}
 
 	/* Free resources */
@@ -942,7 +1017,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
 			rc = acpidev_probe_child(infop);
 			if (ACPI_FAILURE(rc)) {
 				ACPIDEV_DEBUG(CE_WARN,
-				    "acpidev: failed to probe subtree of %s.",
+				    "!acpidev: failed to probe subtree of %s.",
 				    infop->awi_name);
 				rc = AE_ERROR;
 			}
@@ -960,7 +1035,7 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
 
 	case ACPIDEV_FILTER_FAILED:
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: failed to probe device for %s.",
+		    "!acpidev: failed to probe device for %s.",
 		    infop->awi_name);
 		rc = AE_ERROR;
 		break;
@@ -975,11 +1050,12 @@ acpidev_process_object(acpidev_walk_info_t *infop, int flags)
 	return (rc);
 }
 
-/*ARGSUSED*/
 acpidev_filter_result_t
 acpidev_filter_default(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
     acpidev_filter_rule_t *afrp, char *devname, int len)
 {
+	_NOTE(ARGUNUSED(hdl));
+
 	ASSERT(afrp != NULL);
 	ASSERT(devname == NULL || len >= ACPIDEV_MAX_NAMELEN);
 	if (infop->awi_level < afrp->adf_minlvl ||
@@ -992,8 +1068,7 @@ acpidev_filter_default(acpidev_walk_info_t *infop, ACPI_HANDLE hdl,
 		return (ACPIDEV_FILTER_CONTINUE);
 	}
 	if (afrp->adf_replace != NULL && devname != NULL) {
-		(void) strncpy(devname, afrp->adf_replace, len - 1);
-		devname[len - 1] = 0;
+		(void) strlcpy(devname, afrp->adf_replace, len);
 	}
 
 	return (afrp->adf_retcode);
@@ -1042,7 +1117,7 @@ acpidev_register_class(acpidev_class_list_t **listpp, acpidev_class_t *clsp,
 	ASSERT(listpp != NULL);
 	if (listpp == NULL || clsp == NULL) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: invalid parameter in acpidev_register_class().");
+		    "!acpidev: invalid parameter in acpidev_register_class().");
 		return (AE_BAD_PARAMETER);
 	} else if (clsp->adc_version != ACPIDEV_CLASS_REV) {
 		cmn_err(CE_WARN,
@@ -1092,7 +1167,7 @@ acpidev_unregister_class(acpidev_class_list_t **listpp,
 	ASSERT(clsp != NULL);
 	ASSERT(listpp != NULL);
 	if (listpp == NULL || clsp == NULL) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameter "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter "
 		    "in acpidev_unregister_class().");
 		return (AE_BAD_PARAMETER);
 	}
@@ -1106,12 +1181,12 @@ acpidev_unregister_class(acpidev_class_list_t **listpp,
 		}
 	}
 	if (temp == NULL) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: class %p(%s) doesn't exist "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: class %p(%s) doesn't exist "
 		    "in acpidev_unregister_class().",
 		    (void *)clsp, clsp->adc_class_name);
 		rc = AE_NOT_FOUND;
 	} else if (temp->acl_class->adc_refcnt != 0) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: class %p(%s) is still in use "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: class %p(%s) is still in use "
 		    "in acpidev_unregister_class()..",
 		    (void *)clsp, clsp->adc_class_name);
 		rc = AE_ERROR;
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c
index e4dcbcdd67..4185817dd9 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_memory.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -31,6 +31,7 @@
 #include <sys/acpica.h>
 #include <sys/acpidev.h>
 #include <sys/acpidev_rsc.h>
+#include <sys/acpidev_dr.h>
 #include <sys/acpidev_impl.h>
 
 static ACPI_STATUS acpidev_memory_probe(acpidev_walk_info_t *infop);
@@ -97,7 +98,7 @@ static acpidev_filter_rule_t acpidev_memory_filters[] = {
 static ACPI_STATUS
 acpidev_memory_probe(acpidev_walk_info_t *infop)
 {
-	ACPI_STATUS rc;
+	ACPI_STATUS rc = AE_OK;
 	int flags;
 
 	ASSERT(infop != NULL);
@@ -109,19 +110,35 @@ acpidev_memory_probe(acpidev_walk_info_t *infop)
 		return (AE_OK);
 	}
 
-	if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
-		rc = acpidev_process_object(infop, flags);
-	} else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN;
-		rc = acpidev_process_object(infop, flags);
-	} else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
-		rc = acpidev_process_object(infop, flags);
-	} else {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u "
+	flags = ACPIDEV_PROCESS_FLAG_SCAN;
+	switch (infop->awi_op_type) {
+	case ACPIDEV_OP_BOOT_PROBE:
+		if (acpica_get_devcfg_feature(ACPI_DEVCFG_MEMORY)) {
+			flags |= ACPIDEV_PROCESS_FLAG_CREATE;
+			acpidev_dr_check(infop);
+		}
+		break;
+
+	case ACPIDEV_OP_BOOT_REPROBE:
+		break;
+
+	case ACPIDEV_OP_HOTPLUG_PROBE:
+		if (acpica_get_devcfg_feature(ACPI_DEVCFG_MEMORY)) {
+			flags |= ACPIDEV_PROCESS_FLAG_CREATE |
+			    ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+			    ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+		}
+		break;
+
+	default:
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
 		    "in acpidev_memory_probe.", infop->awi_op_type);
 		rc = AE_BAD_PARAMETER;
+		break;
+	}
+
+	if (rc == AE_OK) {
+		rc = acpidev_process_object(infop, flags);
 	}
 	if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
 		cmn_err(CE_WARN,
@@ -153,7 +170,6 @@ acpidev_memory_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
 	return (res);
 }
 
-/*ARGSUSED*/
 static ACPI_STATUS
 acpidev_memory_init(acpidev_walk_info_t *infop)
 {
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_pci.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_pci.c
new file mode 100644
index 0000000000..5f0c6c6c13
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_pci.c
@@ -0,0 +1,172 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_rsc.h>
+#include <sys/acpidev_dr.h>
+#include <sys/acpidev_impl.h>
+
+static ACPI_STATUS acpidev_pci_probe(acpidev_walk_info_t *infop);
+
+/*
+ * Default class driver for PCI/PCIEX Host Bridge devices.
+ */
+acpidev_class_t acpidev_class_pci = {
+	0,				/* adc_refcnt */
+	ACPIDEV_CLASS_REV1,		/* adc_version */
+	ACPIDEV_CLASS_ID_PCI,		/* adc_class_id */
+	"PCI/PCIex Host Bridge",	/* adc_class_name */
+	ACPIDEV_TYPE_PCI,		/* adc_dev_type */
+	NULL,				/* adc_private */
+	NULL,				/* adc_pre_probe */
+	NULL,				/* adc_post_probe */
+	acpidev_pci_probe,		/* adc_probe */
+	NULL,				/* adc_filter */
+	NULL,				/* adc_init */
+	NULL,				/* adc_fini */
+};
+
+static char *acpidev_pci_device_ids[] = {
+	ACPIDEV_HID_PCIEX_HOSTBRIDGE,
+	ACPIDEV_HID_PCI_HOSTBRIDGE,
+};
+
+static char *acpidev_pciex_device_ids[] = {
+	ACPIDEV_HID_PCIEX_HOSTBRIDGE,
+};
+
+static void
+acpidev_pci_update_status(acpidev_walk_info_t *infop)
+{
+	int status;
+	dev_info_t *dip = NULL;
+	acpidev_data_handle_t dhdl;
+
+	dhdl = infop->awi_data;
+	ASSERT((dhdl->aod_iflag & ACPIDEV_ODF_DEVINFO_CREATED) == 0);
+	ASSERT((dhdl->aod_iflag & ACPIDEV_ODF_DEVINFO_TAGGED) == 0);
+	if ((dhdl->aod_iflag & ACPIDEV_ODF_STATUS_VALID) == 0) {
+		status = acpidev_query_device_status(infop->awi_hdl);
+		dhdl->aod_status = status;
+		dhdl->aod_iflag |= ACPIDEV_ODF_STATUS_VALID;
+	} else {
+		status = dhdl->aod_status;
+	}
+	dhdl->aod_level = infop->awi_level;
+	dhdl->aod_hdl = infop->awi_hdl;
+	dhdl->aod_class = NULL;
+	dhdl->aod_class_list = NULL;
+	if (acpidev_match_device_id(infop->awi_info,
+	    ACPIDEV_ARRAY_PARAM(acpidev_pciex_device_ids))) {
+		dhdl->aod_class_id = ACPIDEV_CLASS_ID_PCIEX;
+	} else {
+		dhdl->aod_class_id = ACPIDEV_CLASS_ID_PCI;
+	}
+
+	if (ACPI_FAILURE(acpica_get_devinfo(infop->awi_hdl, &dip))) {
+		dip = NULL;
+	} else {
+		ASSERT(dip != NULL);
+	}
+	if (acpidev_check_device_enabled(status)) {
+		/*
+		 * Mark the device as DISABLE if no device node created.
+		 * BIOS may hide some special PCI/PCIex buses from OS.
+		 */
+		if (dip == NULL) {
+			dhdl->aod_dip = NULL;
+			dhdl->aod_status &= ~ACPI_STA_DEVICE_ENABLED;
+		} else {
+			dhdl->aod_dip = dip;
+			dhdl->aod_iflag |= ACPIDEV_ODF_DEVINFO_CREATED;
+		}
+	} else {
+		ASSERT(dip == NULL);
+		dhdl->aod_dip = NULL;
+		dhdl->aod_status &= ~ACPI_STA_DEVICE_ENABLED;
+	}
+}
+
+static ACPI_STATUS
+acpidev_pci_probe(acpidev_walk_info_t *infop)
+{
+	ACPI_STATUS rc = AE_OK;
+
+	ASSERT(infop != NULL);
+	ASSERT(infop->awi_hdl != NULL);
+	ASSERT(infop->awi_info != NULL);
+	if (infop->awi_info->Type != ACPI_TYPE_DEVICE ||
+	    acpidev_match_device_id(infop->awi_info,
+	    ACPIDEV_ARRAY_PARAM(acpidev_pci_device_ids)) == B_FALSE) {
+		return (AE_OK);
+	}
+
+	if (acpica_get_devcfg_feature(ACPI_DEVCFG_PCI) == 0) {
+		return (AE_OK);
+	}
+
+	if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
+		/*
+		 * Check hotplug capability on the first pass.
+		 */
+		acpidev_dr_check(infop);
+	} else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
+		/*
+		 * Check whether the PCI device enumerator has created device
+		 * nodes for PCI/PCIEX host bridges.
+		 */
+		acpidev_pci_update_status(infop);
+	} else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
+		/*
+		 * No support of PCI/PCIEX host bridge hotplug yet.
+		 * It will come in next phase.
+		 */
+		cmn_err(CE_WARN,
+		    "!acpidev: no support of PCI/PCIEX host bridge hotplug.");
+		/*
+		 * Don't block the hot-adding process, just skip it.
+		 */
+		rc = AE_OK;
+	} else {
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
+		    "in acpidev_pci_probe().", infop->awi_op_type);
+		rc = AE_BAD_PARAMETER;
+	}
+	if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
+		cmn_err(CE_CONT, "?acpidev: failed to process PCI/PCIEX host "
+		    "bridge object %s.\n", infop->awi_name);
+	} else {
+		rc = AE_OK;
+	}
+
+	return (rc);
+}
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c
index 17dd9c774b..e9664c7614 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_resource.c
@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -331,7 +331,7 @@ acpidev_resource_insert_dma(acpidev_resource_handle_t rhdl, int dma)
 	ASSERT(rhdl != NULL);
 	if (rhdl->acpidev_dma_count >= ACPIDEV_RES_DMA_MAX) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: too many DMA resources, max %u.",
+		    "!acpidev: too many DMA resources, max %u.",
 		    ACPIDEV_RES_DMA_MAX);
 		return (AE_LIMIT);
 	}
@@ -380,7 +380,7 @@ acpidev_resource_insert_irq(acpidev_resource_handle_t rhdl, int irq)
 	ASSERT(rhdl != NULL);
 	if (rhdl->acpidev_irq_count >= ACPIDEV_RES_IRQ_MAX) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: too many IRQ resources, max %u.",
+		    "!acpidev: too many IRQ resources, max %u.",
 		    ACPIDEV_RES_IRQ_MAX);
 		return (AE_LIMIT);
 	}
@@ -456,7 +456,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
 			high |= ACPIDEV_REG_MEM_COHERENT_PF;
 		} else {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: unknown memory caching type %u.",
+			    "!acpidev: unknown memory caching type %u.",
 			    addrp->Info.Mem.Caching);
 			rc = AE_ERROR;
 			break;
@@ -477,7 +477,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
 			reg.size_low = addrp->AddressLength & 0xFFFFFFFF;
 			rc = acpidev_resource_insert_reg(rhdl, &reg);
 			if (ACPI_FAILURE(rc)) {
-				ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+				ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
 				    "insert regspec into resource handle.");
 			}
 		/* Generate 'ranges' for producer. */
@@ -502,7 +502,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
 			range.size_low = addrp->AddressLength & 0xFFFFFFFF;
 			rc = acpidev_resource_insert_range(rhdl, &range);
 			if (ACPI_FAILURE(rc)) {
-				ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+				ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
 				    "insert range into resource handle.");
 			}
 		}
@@ -524,7 +524,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
 			high |= ACPIDEV_REG_IO_RANGE_FULL;
 		} else {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: unknown IO range type %u.",
+			    "!acpidev: unknown IO range type %u.",
 			    addrp->Info.Io.RangeType);
 			rc = AE_ERROR;
 			break;
@@ -545,7 +545,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
 			reg.size_low = addrp->AddressLength & 0xFFFFFFFF;
 			rc = acpidev_resource_insert_reg(rhdl, &reg);
 			if (ACPI_FAILURE(rc)) {
-				ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+				ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
 				    "insert regspec into resource handle.");
 			}
 		/* Generate 'ranges' for producer. */
@@ -570,7 +570,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
 			range.size_low = addrp->AddressLength & 0xFFFFFFFF;
 			rc = acpidev_resource_insert_range(rhdl, &range);
 			if (ACPI_FAILURE(rc)) {
-				ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+				ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
 				    "insert range into resource handle.");
 			}
 		}
@@ -585,7 +585,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
 
 			end = addrp->Minimum + addrp->AddressLength;
 			if (end < addrp->Minimum || end > UINT_MAX) {
-				ACPIDEV_DEBUG(CE_WARN, "acpidev: bus range "
+				ACPIDEV_DEBUG(CE_WARN, "!acpidev: bus range "
 				    "in ADDRESS64 is invalid.");
 				rc = AE_ERROR;
 				break;
@@ -595,7 +595,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
 			ASSERT(bus.bus_start <= bus.bus_end);
 			rc = acpidev_resource_insert_bus(rhdl, &bus);
 			if (ACPI_FAILURE(rc)) {
-				ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+				ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
 				    "insert bus range into resource handle.");
 			}
 		}
@@ -603,7 +603,7 @@ acpidev_resource_address64(acpidev_resource_handle_t rhdl,
 
 	default:
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: unknown resource type %u in ADDRESS64.",
+		    "!acpidev: unknown resource type %u in ADDRESS64.",
 		    addrp->ResourceType);
 		rc = AE_BAD_PARAMETER;
 	}
@@ -632,7 +632,7 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
 	case ACPI_RESOURCE_TYPE_GENERIC_REGISTER:
 	case ACPI_RESOURCE_TYPE_VENDOR:
 		ACPIDEV_DEBUG(CE_NOTE,
-		    "acpidev: unsupported producer resource type %u, ignored.",
+		    "!acpidev: unsupported producer resource type %u, ignored.",
 		    rscp->Type);
 		break;
 
@@ -651,13 +651,13 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
 		range.size_hi = 0;
 		range.size_low = rscp->Data.Io.AddressLength;
 		if ((uint64_t)range.child_low + range.size_low > UINT16_MAX) {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid IO record, "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid IO record, "
 			    "IO max is out of range.");
 			rc = AE_ERROR;
 		} else if (range.size_low != 0) {
 			rc = acpidev_resource_insert_range(rhdl, &range);
 			if (ACPI_FAILURE(rc)) {
-				ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+				ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
 				    "insert range into resource handle.");
 			}
 		}
@@ -671,16 +671,16 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
 		ACPI_RESOURCE_ADDRESS64 addr64;
 
 		if (rscp->Data.Address.ProducerConsumer != ACPI_PRODUCER) {
-			ACPIDEV_DEBUG(CE_NOTE, "acpidev: producer encountered "
+			ACPIDEV_DEBUG(CE_NOTE, "!acpidev: producer encountered "
 			    "a CONSUMER resource, ignored.");
 		} else if (ACPI_FAILURE(AcpiResourceToAddress64(rscp,
 		    &addr64))) {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: failed to convert resource to ADDR64.");
+			    "!acpidev: failed to convert resource to ADDR64.");
 		} else if (ACPI_FAILURE(rc = acpidev_resource_address64(rhdl,
 		    &addr64))) {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: failed to handle ADDRESS resource.");
+			    "!acpidev: failed to handle ADDRESS resource.");
 		}
 		break;
 	}
@@ -690,7 +690,7 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
 		ACPI_RESOURCE_ADDRESS64 addr64;
 
 		if (rscp->Data.ExtAddress64.ProducerConsumer != ACPI_PRODUCER) {
-			ACPIDEV_DEBUG(CE_NOTE, "acpidev: producer encountered "
+			ACPIDEV_DEBUG(CE_NOTE, "!acpidev: producer encountered "
 			    "a CONSUMER resource, ignored.");
 			break;
 		}
@@ -705,14 +705,14 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
 		if (ACPI_FAILURE(rc = acpidev_resource_address64(rhdl,
 		    &addr64))) {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: failed to handle EXTADDRESS resource.");
+			    "!acpidev: failed to handle EXTADDRESS resource.");
 		}
 		break;
 	}
 
 	case ACPI_RESOURCE_TYPE_START_DEPENDENT:
 	case ACPI_RESOURCE_TYPE_END_DEPENDENT:
-		ACPIDEV_DEBUG(CE_NOTE, "acpidev: producer encountered "
+		ACPIDEV_DEBUG(CE_NOTE, "!acpidev: producer encountered "
 		    "START_DEPENDENT or END_DEPENDENT tag, ignored.");
 		break;
 
@@ -723,7 +723,7 @@ acpidev_resource_walk_producer(ACPI_RESOURCE *rscp, void *ctxp)
 
 	default:
 		ACPIDEV_DEBUG(CE_NOTE,
-		    "acpidev: unknown ACPI resource type %u, ignored.",
+		    "!acpidev: unknown ACPI resource type %u, ignored.",
 		    rscp->Type);
 		break;
 	}
@@ -746,7 +746,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
 	case ACPI_RESOURCE_TYPE_GENERIC_REGISTER:
 	case ACPI_RESOURCE_TYPE_VENDOR:
 		ACPIDEV_DEBUG(CE_NOTE,
-		    "acpidev: unsupported consumer resource type %u, ignored.",
+		    "!acpidev: unsupported consumer resource type %u, ignored.",
 		    rscp->Type);
 		break;
 
@@ -755,7 +755,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
 		int i;
 
 		if (rscp->Data.ExtendedIrq.ProducerConsumer != ACPI_CONSUMER) {
-			ACPIDEV_DEBUG(CE_NOTE, "acpidev: consumer encountered "
+			ACPIDEV_DEBUG(CE_NOTE, "!acpidev: consumer encountered "
 			    "a PRODUCER resource, ignored.");
 			break;
 		}
@@ -764,7 +764,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
 			    rscp->Data.ExtendedIrq.Interrupts[i]))) {
 				continue;
 			}
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to insert"
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to insert"
 			    "Extended IRQ into resource handle.");
 			rc = AE_ERROR;
 			break;
@@ -781,7 +781,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
 			    rscp->Data.Irq.Interrupts[i]))) {
 				continue;
 			}
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to insert"
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to insert"
 			    "IRQ into resource handle.");
 			rc = AE_ERROR;
 			break;
@@ -798,7 +798,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
 			    rscp->Data.Dma.Channels[i]))) {
 				continue;
 			}
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to insert"
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to insert"
 			    "dma into resource handle.");
 			rc = AE_ERROR;
 			break;
@@ -827,13 +827,13 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
 		reg.phys_mid = 0;
 		reg.size_hi = 0;
 		if ((uint64_t)reg.phys_low + reg.size_low > UINT16_MAX) {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid IO/FIXEDIO "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid IO/FIXEDIO "
 			    "record, IO max is out of range.");
 			rc = AE_ERROR;
 		} else if (reg.size_low != 0) {
 			rc = acpidev_resource_insert_reg(rhdl, &reg);
 			if (ACPI_FAILURE(rc)) {
-				ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+				ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
 				    "insert reg into resource handle.");
 			}
 		}
@@ -866,13 +866,13 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
 		reg.size_hi = 0;
 		if ((uint64_t)reg.phys_low + reg.size_low > UINT32_MAX) {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: invalid MEMORY32/FIXEDMEMORY32 record, "
+			    "!acpidev: invalid MEMORY32/FIXEDMEMORY32 record, "
 			    "memory max is out of range.");
 			rc = AE_ERROR;
 		} else if (reg.size_low != 0) {
 			rc = acpidev_resource_insert_reg(rhdl, &reg);
 			if (ACPI_FAILURE(rc)) {
-				ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to "
+				ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to "
 				    "insert reg into resource handle.");
 			}
 		}
@@ -886,16 +886,16 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
 		ACPI_RESOURCE_ADDRESS64 addr64;
 
 		if (rscp->Data.Address.ProducerConsumer != ACPI_CONSUMER) {
-			ACPIDEV_DEBUG(CE_NOTE, "acpidev: consumer encountered "
+			ACPIDEV_DEBUG(CE_NOTE, "!acpidev: consumer encountered "
 			    "a PRODUCER resource, ignored.");
 		} else if (ACPI_FAILURE(AcpiResourceToAddress64(rscp,
 		    &addr64))) {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: failed to convert resource to ADDR64.");
+			    "!acpidev: failed to convert resource to ADDR64.");
 		} else if (ACPI_FAILURE(rc = acpidev_resource_address64(rhdl,
 		    &addr64))) {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: failed to handle ADDRESS resource.");
+			    "!acpidev: failed to handle ADDRESS resource.");
 		}
 		break;
 	}
@@ -905,7 +905,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
 		ACPI_RESOURCE_ADDRESS64 addr64;
 
 		if (rscp->Data.ExtAddress64.ProducerConsumer != ACPI_CONSUMER) {
-			ACPIDEV_DEBUG(CE_NOTE, "acpidev: consumer encountered "
+			ACPIDEV_DEBUG(CE_NOTE, "!acpidev: consumer encountered "
 			    "a PRODUCER resource, ignored.");
 			break;
 		}
@@ -920,14 +920,14 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
 		if (ACPI_FAILURE(rc = acpidev_resource_address64(rhdl,
 		    &addr64))) {
 			ACPIDEV_DEBUG(CE_WARN,
-			    "acpidev: failed to handle EXTADDRESS resource.");
+			    "!acpidev: failed to handle EXTADDRESS resource.");
 		}
 		break;
 	}
 
 	case ACPI_RESOURCE_TYPE_START_DEPENDENT:
 	case ACPI_RESOURCE_TYPE_END_DEPENDENT:
-		ACPIDEV_DEBUG(CE_NOTE, "acpidev: consumer encountered "
+		ACPIDEV_DEBUG(CE_NOTE, "!acpidev: consumer encountered "
 		    "START_DEPENDENT or END_DEPENDENT tag, ignored.");
 		break;
 
@@ -938,7 +938,7 @@ acpidev_resource_walk_consumer(ACPI_RESOURCE *rscp, void *ctxp)
 
 	default:
 		ACPIDEV_DEBUG(CE_NOTE,
-		    "acpidev: unknown ACPI resource type %u, ignored.",
+		    "!acpidev: unknown ACPI resource type %u, ignored.",
 		    rscp->Type);
 		break;
 	}
@@ -959,14 +959,14 @@ acpidev_resource_walk(ACPI_HANDLE hdl, char *method,
 	ASSERT(rhdlp != NULL);
 	if (hdl == NULL) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: hdl is NULL in acpidev_resource_walk().");
+		    "!acpidev: hdl is NULL in acpidev_resource_walk().");
 		return (AE_BAD_PARAMETER);
 	} else if (method == NULL) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: method is NULL in acpidev_resource_walk().");
+		    "!acpidev: method is NULL in acpidev_resource_walk().");
 		return (AE_BAD_PARAMETER);
 	} else if (rhdlp == NULL) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: resource handle ptr is NULL "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: resource handle ptr is NULL "
 		    "in acpidev_resource_walk().");
 		return (AE_BAD_PARAMETER);
 	}
@@ -975,7 +975,7 @@ acpidev_resource_walk(ACPI_HANDLE hdl, char *method,
 	if (ACPI_FAILURE(AcpiGetHandle(hdl, method, &mhdl))) {
 		char *objname = acpidev_get_object_name(hdl);
 		ACPIDEV_DEBUG(CE_NOTE,
-		    "acpidev: method %s doesn't exist under %s",
+		    "!acpidev: method %s doesn't exist under %s",
 		    method, objname);
 		acpidev_free_object_name(objname);
 		return (AE_NOT_FOUND);
@@ -997,11 +997,9 @@ acpidev_resource_walk(ACPI_HANDLE hdl, char *method,
 	}
 	if (ACPI_FAILURE(rc)) {
 		char *objname = acpidev_get_object_name(hdl);
-		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: failed to walk resource from method %s under %s.",
-		    method, objname);
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to walk resource from "
+		    "method %s under %s.", method, objname);
 		acpidev_free_object_name(objname);
-
 	}
 
 	return (rc);
@@ -1016,7 +1014,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
 
 	ASSERT(infop != NULL);
 	if (infop == NULL) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameter "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameter "
 		    "in acpidev_resource_process().");
 		return (AE_BAD_PARAMETER);
 	}
@@ -1027,7 +1025,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
 	    consumer, &rhdl);
 	if (ACPI_FAILURE(rc)) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: failed to walk ACPI resources of %s(%s).",
+		    "!acpidev: failed to walk ACPI resources of %s(%s).",
 		    path, infop->awi_name);
 		return (rc);
 	}
@@ -1041,7 +1039,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
 		    "reg", (int *)rhdl->acpidev_regp,
 		    rhdl->acpidev_reg_count * sizeof (acpidev_regspec_t) /
 		    sizeof (int)) != NDI_SUCCESS) {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
 			    "'reg' property for %s.", path);
 			rc = AE_ERROR;
 			goto out;
@@ -1051,7 +1049,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
 		    "assigned-addresses", (int *)rhdl->acpidev_regp,
 		    rhdl->acpidev_reg_count * sizeof (acpidev_regspec_t) /
 		    sizeof (int)) != NDI_SUCCESS) {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
 			    "'assigned-addresses' property for %s.", path);
 			rc = AE_ERROR;
 			goto out;
@@ -1062,7 +1060,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
 		    ndi_prop_update_int_array(DDI_DEV_T_NONE, infop->awi_dip,
 		    "interrupts", (int *)rhdl->acpidev_irqp,
 		    rhdl->acpidev_irq_count) != NDI_SUCCESS) {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
 			    "'interrupts' property for %s.", path);
 			rc = AE_ERROR;
 			goto out;
@@ -1073,7 +1071,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
 		    ndi_prop_update_int_array(DDI_DEV_T_NONE, infop->awi_dip,
 		    "dma-channels", (int *)rhdl->acpidev_dmap,
 		    rhdl->acpidev_dma_count) != NDI_SUCCESS) {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
 			    "'dma-channels' property for %s.", path);
 			rc = AE_ERROR;
 			goto out;
@@ -1088,7 +1086,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
 		    "ranges", (int *)rhdl->acpidev_rangep,
 		    rhdl->acpidev_range_count * sizeof (acpidev_ranges_t) /
 		    sizeof (int)) != NDI_SUCCESS) {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
 			    "'ranges' property for %s.", path);
 			rc = AE_ERROR;
 			goto out;
@@ -1100,7 +1098,7 @@ acpidev_resource_process(acpidev_walk_info_t *infop, boolean_t consumer)
 		    "bus-range", (int *)rhdl->acpidev_busp,
 		    rhdl->acpidev_bus_count * sizeof (acpidev_bus_range_t) /
 		    sizeof (int)) != NDI_SUCCESS) {
-			ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to set "
+			ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to set "
 			    "'bus-range' property for %s.", path);
 			rc = AE_ERROR;
 			goto out;
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c
index 144cec2dbc..e19a6e92e4 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_scope.c
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -102,7 +102,7 @@ static acpidev_filter_rule_t acpidev_scope_filters[] = {
 static ACPI_STATUS
 acpidev_scope_probe(acpidev_walk_info_t *infop)
 {
-	ACPI_STATUS rc;
+	ACPI_STATUS rc = AE_OK;
 	int flags;
 
 	ASSERT(infop != NULL);
@@ -112,19 +112,29 @@ acpidev_scope_probe(acpidev_walk_info_t *infop)
 		return (AE_OK);
 	}
 
-	if (infop->awi_op_type == ACPIDEV_OP_BOOT_PROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN | ACPIDEV_PROCESS_FLAG_CREATE;
-		rc = acpidev_process_object(infop, flags);
-	} else if (infop->awi_op_type == ACPIDEV_OP_BOOT_REPROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN;
-		rc = acpidev_process_object(infop, flags);
-	} else if (infop->awi_op_type == ACPIDEV_OP_HOTPLUG_PROBE) {
-		flags = ACPIDEV_PROCESS_FLAG_SCAN;
-		rc = acpidev_process_object(infop, flags);
-	} else {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u "
+	flags = ACPIDEV_PROCESS_FLAG_SCAN;
+	switch (infop->awi_op_type) {
+	case ACPIDEV_OP_BOOT_PROBE:
+		flags |= ACPIDEV_PROCESS_FLAG_CREATE;
+		break;
+
+	case ACPIDEV_OP_BOOT_REPROBE:
+		break;
+
+	case ACPIDEV_OP_HOTPLUG_PROBE:
+		flags |= ACPIDEV_PROCESS_FLAG_SYNCSTATUS |
+		    ACPIDEV_PROCESS_FLAG_HOLDBRANCH;
+		break;
+
+	default:
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
 		    "in acpidev_scope_probe().", infop->awi_op_type);
 		rc = AE_BAD_PARAMETER;
+		break;
+	}
+
+	if (rc == AE_OK) {
+		rc = acpidev_process_object(infop, flags);
 	}
 	if (ACPI_FAILURE(rc) && rc != AE_NOT_EXIST && rc != AE_ALREADY_EXISTS) {
 		cmn_err(CE_WARN,
@@ -150,7 +160,7 @@ acpidev_scope_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
 		    ACPIDEV_ARRAY_PARAM(acpidev_scope_filters),
 		    devname, maxlen);
 	} else {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: unknown operation type %u "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: unknown operation type %u "
 		    "in acpidev_scope_filter().", infop->awi_op_type);
 		res = ACPIDEV_FILTER_FAILED;
 	}
@@ -158,7 +168,6 @@ acpidev_scope_filter(acpidev_walk_info_t *infop, char *devname, int maxlen)
 	return (res);
 }
 
-/*ARGSUSED*/
 static ACPI_STATUS
 acpidev_scope_init(acpidev_walk_info_t *infop)
 {
diff --git a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c
index 869a935f0d..1b1a0cf3ad 100644
--- a/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c
+++ b/usr/src/uts/i86pc/io/acpi/acpidev/acpidev_util.c
@@ -24,12 +24,13 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
 #include <sys/types.h>
 #include <sys/cmn_err.h>
+#include <sys/note.h>
 #include <sys/sysmacros.h>
 #include <sys/sunddi.h>
 #include <sys/sunndi.h>
@@ -55,7 +56,7 @@ acpidev_query_device_status(ACPI_HANDLE hdl)
 	ASSERT(hdl != NULL);
 	if (hdl == NULL) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: hdl is NULL in acpidev_query_device_status().");
+		    "!acpidev: hdl is NULL in acpidev_query_device_status().");
 		return (0);
 	}
 
@@ -114,7 +115,7 @@ acpidev_match_device_id(ACPI_DEVICE_INFO *infop, char **ids, int count)
 	if (count == 0) {
 		return (B_TRUE);
 	} else if (infop == NULL || ids == NULL) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameters in "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters in "
 		    "acpidev_match_device_id().");
 		return (B_FALSE);
 	}
@@ -206,7 +207,7 @@ acpidev_get_device_by_id(ACPI_HANDLE hdl, char **ids, int count,
 
 	ASSERT(userfunc != NULL);
 	if (hdl == NULL || userfunc == NULL || (ids == NULL && count != 0)) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameters "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters "
 		    "in acpidev_get_device_by_id().");
 		return (AE_BAD_PARAMETER);
 	}
@@ -237,7 +238,7 @@ acpidev_walk_apic(ACPI_BUFFER *bufp, ACPI_HANDLE hdl, char *method,
 	ASSERT(func != NULL);
 	if (func == NULL) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: invalid parameters for acpidev_walk_apic().");
+		    "!acpidev: invalid parameters for acpidev_walk_apic().");
 		return (AE_BAD_PARAMETER);
 	}
 
@@ -361,7 +362,7 @@ acpidev_alloc_walk_info(acpidev_op_type_t op_type, int lvl, ACPI_HANDLE hdl,
 		datap->aod_level = lvl;
 		datap->aod_hdl = hdl;
 	} else {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: failed to create object "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: failed to create object "
 		    "handle for %s in acpidev_alloc_walk_info().",
 		    infop->awi_name);
 		AcpiOsFree(infop->awi_info);
@@ -416,12 +417,17 @@ acpidev_walk_info_get_pdip(acpidev_walk_info_t *infop)
  * Called to release resources when the corresponding object is going
  * to be destroyed.
  */
-/*ARGSUSED*/
 static void
-acpidev_get_object_handler(ACPI_HANDLE hdl, void *data)
+acpidev_free_object_handler(ACPI_HANDLE hdl, void *data)
 {
+	_NOTE(ARGUNUSED(hdl));
+
 	acpidev_data_handle_t objhdl = data;
 
+	if (objhdl->aod_class != NULL) {
+		atomic_dec_32(&objhdl->aod_class->adc_refcnt);
+		objhdl->aod_class = NULL;
+	}
 	kmem_free(objhdl, sizeof (acpidev_data_handle_t));
 }
 
@@ -431,7 +437,7 @@ acpidev_data_get_handle(ACPI_HANDLE hdl)
 	void *ptr;
 	acpidev_data_handle_t objhdl = NULL;
 
-	if (ACPI_SUCCESS(AcpiGetData(hdl, acpidev_get_object_handler, &ptr))) {
+	if (ACPI_SUCCESS(AcpiGetData(hdl, acpidev_free_object_handler, &ptr))) {
 		objhdl = (acpidev_data_handle_t)ptr;
 	}
 
@@ -444,7 +450,12 @@ acpidev_data_create_handle(ACPI_HANDLE hdl)
 	acpidev_data_handle_t objhdl;
 
 	objhdl = kmem_zalloc(sizeof (*objhdl), KM_SLEEP);
-	if (ACPI_FAILURE(AcpiAttachData(hdl, acpidev_get_object_handler,
+	objhdl->aod_bdtype = ACPIDEV_INVALID_BOARD;
+	objhdl->aod_bdnum = UINT32_MAX;
+	objhdl->aod_portid = UINT32_MAX;
+	objhdl->aod_class_id = ACPIDEV_CLASS_ID_INVALID;
+
+	if (ACPI_FAILURE(AcpiAttachData(hdl, acpidev_free_object_handler,
 	    (void *)objhdl))) {
 		cmn_err(CE_WARN,
 		    "!acpidev: failed to attach handle data to object.");
@@ -459,9 +470,15 @@ void
 acpidev_data_destroy_handle(ACPI_HANDLE hdl)
 {
 	void *ptr;
+	acpidev_data_handle_t objhdl = NULL;
 
-	if (ACPI_SUCCESS(AcpiGetData(hdl, acpidev_get_object_handler, &ptr)) &&
-	    ACPI_SUCCESS(AcpiDetachData(hdl, acpidev_get_object_handler))) {
+	if (ACPI_SUCCESS(AcpiGetData(hdl, acpidev_free_object_handler, &ptr)) &&
+	    ACPI_SUCCESS(AcpiDetachData(hdl, acpidev_free_object_handler))) {
+		objhdl = ptr;
+		if (objhdl->aod_class != NULL) {
+			atomic_dec_32(&objhdl->aod_class->adc_refcnt);
+			objhdl->aod_class = NULL;
+		}
 		kmem_free(ptr, sizeof (acpidev_data_handle_t));
 	}
 }
@@ -502,14 +519,14 @@ void
 acpidev_data_set_flag(acpidev_data_handle_t hdl, uint32_t flag)
 {
 	ASSERT(hdl != NULL);
-	hdl->aod_eflag |= flag;
+	atomic_or_32(&hdl->aod_eflag, flag);
 }
 
 void
 acpidev_data_clear_flag(acpidev_data_handle_t hdl, uint32_t flag)
 {
 	ASSERT(hdl != NULL);
-	hdl->aod_eflag &= ~flag;
+	atomic_and_32(&hdl->aod_eflag, ~flag);
 }
 
 uint32_t
@@ -519,6 +536,27 @@ acpidev_data_get_flag(acpidev_data_handle_t hdl, uint32_t flag)
 	return (hdl->aod_eflag & flag);
 }
 
+boolean_t
+acpidev_data_dr_capable(acpidev_data_handle_t hdl)
+{
+	ASSERT(hdl != NULL);
+	return (hdl->aod_iflag & ACPIDEV_ODF_HOTPLUG_CAPABLE);
+}
+
+boolean_t
+acpidev_data_dr_ready(acpidev_data_handle_t hdl)
+{
+	ASSERT(hdl != NULL);
+	return (hdl->aod_iflag & ACPIDEV_ODF_HOTPLUG_READY);
+}
+
+boolean_t
+acpidev_data_dr_failed(acpidev_data_handle_t hdl)
+{
+	ASSERT(hdl != NULL);
+	return (hdl->aod_iflag & ACPIDEV_ODF_HOTPLUG_FAILED);
+}
+
 static char *
 acpidev_generate_pseudo_unitaddr(char *uid, acpidev_class_id_t cid,
     char *buf, size_t len)
@@ -591,7 +629,7 @@ acpidev_gen_unitaddr(char *uid, char *fmt, char *buf, size_t len)
 	}
 	if (cnt != 1 && cnt != 2) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: invalid uid format string '%s'.", fmt);
+		    "!acpidev: invalid uid format string '%s'.", fmt);
 		return (NULL);
 	}
 
@@ -605,11 +643,11 @@ acpidev_gen_unitaddr(char *uid, char *fmt, char *buf, size_t len)
 	 */
 	if (cnt == 2 && snprintf(buf, len, "%u,%u", id2, id1) >= len) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: generated unitaddr is too long.");
+		    "!acpidev: generated unitaddr is too long.");
 		return (NULL);
 	} else if (cnt == 1 && snprintf(buf, len, "%u", id1) >= len) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: generated unitaddr is too long.");
+		    "!acpidev: generated unitaddr is too long.");
 		return (NULL);
 	}
 
@@ -681,7 +719,7 @@ acpidev_set_unitaddr(acpidev_walk_info_t *infop, char **fmts, size_t nfmt,
 	if (infop == NULL || infop->awi_dip == NULL ||
 	    infop->awi_info == NULL) {
 		ACPIDEV_DEBUG(CE_WARN,
-		    "acpidev: invalid parameters in acpidev_set_unitaddr().");
+		    "!acpidev: invalid parameters in acpidev_set_unitaddr().");
 		return (AE_BAD_PARAMETER);
 	}
 
@@ -758,7 +796,7 @@ acpidev_set_compatible(acpidev_walk_info_t *infop, char **compat, int acount)
 	ASSERT(compat != NULL || acount == 0);
 	if (infop == NULL || infop->awi_dip == NULL ||
 	    infop->awi_info == NULL || (compat == NULL && acount != 0)) {
-		ACPIDEV_DEBUG(CE_WARN, "acpidev: invalid parameters "
+		ACPIDEV_DEBUG(CE_WARN, "!acpidev: invalid parameters "
 		    "in acpidev_set_compatible().");
 		return (AE_BAD_PARAMETER);
 	}
@@ -802,3 +840,90 @@ acpidev_set_compatible(acpidev_walk_info_t *infop, char **compat, int acount)
 
 	return (AE_OK);
 }
+
+/* Evaluate _OST method under object, which is used to support hotplug event. */
+ACPI_STATUS
+acpidev_eval_ost(ACPI_HANDLE hdl, uint32_t code, uint32_t status,
+    char *bufp, size_t len)
+{
+	ACPI_STATUS rc;
+	ACPI_OBJECT args[3];
+	ACPI_OBJECT_LIST arglist;
+
+	args[0].Type = ACPI_TYPE_INTEGER;
+	args[0].Integer.Value = code;
+	args[1].Type = ACPI_TYPE_INTEGER;
+	args[1].Integer.Value = status;
+	args[2].Type = ACPI_TYPE_BUFFER;
+	args[2].Buffer.Pointer = (UINT8 *)bufp;
+	args[2].Buffer.Length = (UINT32)len;
+	if (bufp == NULL || len == 0) {
+		arglist.Count = 2;
+	} else {
+		arglist.Count = 3;
+	}
+	arglist.Pointer = args;
+	rc = AcpiEvaluateObject(hdl, ACPIDEV_METHOD_NAME_OST, &arglist, NULL);
+	if (rc != AE_OK && rc != AE_NOT_FOUND) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to evaluate _OST method, code 0x%x.", rc);
+	}
+
+	return (rc);
+}
+
+ACPI_STATUS
+acpidev_eval_ej0(ACPI_HANDLE hdl)
+{
+	ACPI_STATUS rc;
+	ACPI_OBJECT args[1];
+	ACPI_OBJECT_LIST arglist;
+
+	/*
+	 * Quotation from ACPI spec 4.0 section 6.3.3.
+	 * Arg0 An Integer containing a device ejection control
+	 * 	0  Cancel a mark for ejection request (EJ0 will never be called
+	 *	   with this value)
+	 * 	1  Hot eject or mark for ejection
+	 */
+	args[0].Type = ACPI_TYPE_INTEGER;
+	args[0].Integer.Value = 1;
+	arglist.Count = 1;
+	arglist.Pointer = args;
+	rc = AcpiEvaluateObject(hdl, ACPIDEV_METHOD_NAME_EJ0, &arglist, NULL);
+	if (rc != AE_OK) {
+		ACPIDEV_DEBUG(CE_WARN,
+		    "!acpidev: failed to evaluate _EJ0 method, code 0x%x.", rc);
+	}
+
+	return (rc);
+}
+
+ACPI_STATUS
+acpidev_eval_pxm(ACPI_HANDLE hdl, uint32_t *idp)
+{
+	int pxmid;
+
+	ASSERT(idp != NULL);
+
+	/*
+	 * Try to evaluate ACPI _PXM method to get proximity doamin id.
+	 * Quotation from ACPI4.0:
+	 * If the Local APIC ID / Local SAPIC ID / Local x2APIC ID of a
+	 * dynamically added processor is not present in the System Resource
+	 * Affinity Table (SRAT), a _PXM object must exist for the processor's
+	 * device or one of its ancestors in the ACPI Namespace.
+	 */
+	while (hdl != NULL) {
+		if (ACPI_SUCCESS(acpica_eval_int(hdl,
+		    ACPIDEV_METHOD_NAME_PXM, &pxmid))) {
+			*idp = (uint32_t)pxmid;
+			return (AE_OK);
+		}
+		if (ACPI_FAILURE(AcpiGetParent(hdl, &hdl))) {
+			break;
+		}
+	}
+
+	return (AE_NOT_FOUND);
+}
diff --git a/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c b/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c
index d9908aec59..35fb4b6af5 100644
--- a/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c
+++ b/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_drv.c
@@ -24,7 +24,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 /*
@@ -40,6 +40,7 @@
 #include <sys/ddi.h>
 #include <sys/ddi_impldefs.h>
 #include <sys/ddifm.h>
+#include <sys/note.h>
 #include <sys/ndifm.h>
 #include <sys/sunddi.h>
 #include <sys/sunndi.h>
@@ -175,6 +176,9 @@ _init(void)
 		return (error);
 	}
 
+	/* Initialize event subsystem. */
+	acpinex_event_init();
+
 	/* Install the module. */
 	if ((error = mod_install(&modlinkage)) != 0) {
 		cmn_err(CE_WARN, "acpinex: failed to install module.");
@@ -197,6 +201,9 @@ _fini(void)
 		return (error);
 	}
 
+	/* Shut down event subsystem. */
+	acpinex_event_fini();
+
 	/* Free the soft state info. */
 	ddi_soft_state_fini(&acpinex_softstates);
 
@@ -211,10 +218,11 @@ _info(struct modinfo *modinfop)
 	return (mod_info(&modlinkage, modinfop));
 }
 
-/* ARGSUSED */
 static int
 acpinex_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 {
+	_NOTE(ARGUNUSED(dip));
+
 	dev_t	dev;
 	int	instance;
 
@@ -268,16 +276,24 @@ acpinex_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 	(void) ddi_pathname(devi, softsp->ans_path);
 	if (ACPI_FAILURE(acpica_get_handle(devi, &softsp->ans_hdl))) {
 		ACPINEX_DEBUG(CE_WARN,
-		    "acpinex: failed to get ACPI handle for %s.",
+		    "!acpinex: failed to get ACPI handle for %s.",
 		    softsp->ans_path);
 		ddi_soft_state_free(acpinex_softstates, instance);
 		return (DDI_FAILURE);
 	}
 	mutex_init(&softsp->ans_lock, NULL, MUTEX_DRIVER, NULL);
 
+	/* Install event handler for child/descendant objects. */
+	if (acpinex_event_scan(softsp, B_TRUE) != DDI_SUCCESS) {
+		cmn_err(CE_WARN, "!acpinex: failed to install event handler "
+		    "for children of %s.", softsp->ans_path);
+	}
+
 	/* nothing to suspend/resume here */
 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
 	    "pm-hardware-state", "no-suspend-resume");
+	(void) ddi_prop_update_int(DDI_DEV_T_NONE, devi,
+	    DDI_NO_AUTODETACH, 1);
 
 	acpinex_fm_init(softsp);
 	ddi_report_dev(devi);
@@ -301,17 +317,24 @@ acpinex_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
 
 	softsp = ddi_get_soft_state(acpinex_softstates, instance);
 	if (softsp == NULL) {
-		ACPINEX_DEBUG(CE_WARN, "acpinex: failed to get soft state "
+		ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to get soft state "
 		    "object for instance %d in acpinex_detach()", instance);
 		return (DDI_FAILURE);
 	}
 
 	switch (cmd) {
 	case DDI_DETACH:
+		if (acpinex_event_scan(softsp, B_FALSE) != DDI_SUCCESS) {
+			cmn_err(CE_WARN, "!acpinex: failed to uninstall event "
+			    "handler for children of %s.", softsp->ans_path);
+			return (DDI_FAILURE);
+		}
 		ddi_remove_minor_node(devi, NULL);
 		acpinex_fm_fini(softsp);
 		mutex_destroy(&softsp->ans_lock);
 		ddi_soft_state_free(acpinex_softstates, instance);
+		(void) ddi_prop_update_int(DDI_DEV_T_NONE, devi,
+		    DDI_NO_AUTODETACH, 0);
 		return (DDI_SUCCESS);
 
 	case DDI_SUSPEND:
@@ -332,13 +355,11 @@ name_child(dev_info_t *child, char *name, int namelen)
 	name[0] = '\0';
 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
 	    ACPIDEV_PROP_NAME_UNIT_ADDR, &unitaddr) == DDI_SUCCESS) {
-		(void) strncpy(name, unitaddr, namelen - 1);
-		name[namelen - 1] = '\0';
+		(void) strlcpy(name, unitaddr, namelen);
 		ddi_prop_free(unitaddr);
 	} else {
-		ACPINEX_DEBUG(CE_NOTE,
-		    "acpinex: failed to lookup child unit-address prop for %p.",
-		    (void *)child);
+		ACPINEX_DEBUG(CE_NOTE, "!acpinex: failed to lookup child "
+		    "unit-address prop for %p.", (void *)child);
 	}
 
 	return (DDI_SUCCESS);
@@ -406,21 +427,22 @@ acpinex_bus_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp,
     off_t offset, off_t len, caddr_t *vaddrp)
 {
 	ACPINEX_DEBUG(CE_WARN,
-	    "acpinex: acpinex_bus_map called and it's unimplemented.");
+	    "!acpinex: acpinex_bus_map called and it's unimplemented.");
 	return (DDI_ME_UNIMPLEMENTED);
 }
 
-/* ARGSUSED */
 static int
 acpinex_open(dev_t *devi, int flags, int otyp, cred_t *credp)
 {
+	_NOTE(ARGUNUSED(flags, otyp, credp));
+
 	minor_t minor, instance;
 	acpinex_softstate_t *softsp;
 
 	minor = getminor(*devi);
 	instance = ACPINEX_GET_INSTANCE(minor);
 	if (instance >= ACPINEX_INSTANCE_MAX) {
-		ACPINEX_DEBUG(CE_WARN, "acpinex: instance number %d out of "
+		ACPINEX_DEBUG(CE_WARN, "!acpinex: instance number %d out of "
 		    "range in acpinex_open, max %d.",
 		    instance, ACPINEX_INSTANCE_MAX - 1);
 		return (EINVAL);
@@ -428,7 +450,7 @@ acpinex_open(dev_t *devi, int flags, int otyp, cred_t *credp)
 
 	softsp = ddi_get_soft_state(acpinex_softstates, instance);
 	if (softsp == NULL) {
-		ACPINEX_DEBUG(CE_WARN, "acpinex: failed to get soft state "
+		ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to get soft state "
 		    "object for instance %d in acpinex_open().", instance);
 		return (EINVAL);
 	}
@@ -437,23 +459,24 @@ acpinex_open(dev_t *devi, int flags, int otyp, cred_t *credp)
 		return (0);
 	} else {
 		ACPINEX_DEBUG(CE_WARN,
-		    "acpinex: invalid minor number %d in acpinex_open().",
+		    "!acpinex: invalid minor number %d in acpinex_open().",
 		    minor);
 		return (EINVAL);
 	}
 }
 
-/* ARGSUSED */
 static int
 acpinex_close(dev_t dev, int flags, int otyp, cred_t *credp)
 {
+	_NOTE(ARGUNUSED(flags, otyp, credp));
+
 	minor_t minor, instance;
 	acpinex_softstate_t *softsp;
 
 	minor = getminor(dev);
 	instance = ACPINEX_GET_INSTANCE(minor);
 	if (instance >= ACPINEX_INSTANCE_MAX) {
-		ACPINEX_DEBUG(CE_WARN, "acpinex: instance number %d out of "
+		ACPINEX_DEBUG(CE_WARN, "!acpinex: instance number %d out of "
 		    "range in acpinex_close(), max %d.",
 		    instance, ACPINEX_INSTANCE_MAX - 1);
 		return (EINVAL);
@@ -461,7 +484,7 @@ acpinex_close(dev_t dev, int flags, int otyp, cred_t *credp)
 
 	softsp = ddi_get_soft_state(acpinex_softstates, instance);
 	if (softsp == NULL) {
-		ACPINEX_DEBUG(CE_WARN, "acpinex: failed to get soft state "
+		ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to get soft state "
 		    "object for instance %d in acpinex_close().", instance);
 		return (EINVAL);
 	}
@@ -470,17 +493,18 @@ acpinex_close(dev_t dev, int flags, int otyp, cred_t *credp)
 		return (0);
 	} else {
 		ACPINEX_DEBUG(CE_WARN,
-		    "acpinex: invalid minor number %d in acpinex_close().",
+		    "!acpinex: invalid minor number %d in acpinex_close().",
 		    minor);
 		return (EINVAL);
 	}
 }
 
-/* ARGSUSED */
 static int
 acpinex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
     int *rvalp)
 {
+	_NOTE(ARGUNUSED(cmd, arg, mode, credp, rvalp));
+
 	int rv = 0;
 	minor_t minor, instance;
 	acpinex_softstate_t *softsp;
@@ -488,21 +512,21 @@ acpinex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
 	minor = getminor(dev);
 	instance = ACPINEX_GET_INSTANCE(minor);
 	if (instance >= ACPINEX_INSTANCE_MAX) {
-		ACPINEX_DEBUG(CE_NOTE, "acpinex: instance number %d out of "
+		ACPINEX_DEBUG(CE_NOTE, "!acpinex: instance number %d out of "
 		    "range in acpinex_ioctl(), max %d.",
 		    instance, ACPINEX_INSTANCE_MAX - 1);
 		return (EINVAL);
 	}
 	softsp = ddi_get_soft_state(acpinex_softstates, instance);
 	if (softsp == NULL) {
-		ACPINEX_DEBUG(CE_WARN, "acpinex: failed to get soft state "
+		ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to get soft state "
 		    "object for instance %d in acpinex_ioctl().", instance);
 		return (EINVAL);
 	}
 
 	rv = ENOTSUP;
 	ACPINEX_DEBUG(CE_WARN,
-	    "acpinex: invalid minor number %d in acpinex_ioctl().", minor);
+	    "!acpinex: invalid minor number %d in acpinex_ioctl().", minor);
 
 	return (rv);
 }
@@ -512,11 +536,12 @@ acpinex_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
  * Register error handling callback with our parent. We will just call
  * our children's error callbacks and return their status.
  */
-/*ARGSUSED*/
 static int
 acpinex_err_callback(dev_info_t *dip, ddi_fm_error_t *derr,
     const void *impl_data)
 {
+	_NOTE(ARGUNUSED(impl_data));
+
 	/* Call our childrens error handlers */
 	return (ndi_fm_handler_dispatch(dip, NULL, derr));
 }
@@ -560,11 +585,12 @@ acpinex_fm_fini(acpinex_softstate_t *softsp)
  * Initialize FMA resources for child devices.
  * Called when child calls ddi_fm_init().
  */
-/*ARGSUSED*/
 static int
 acpinex_fm_init_child(dev_info_t *dip, dev_info_t *tdip, int cap,
     ddi_iblock_cookie_t *ibc)
 {
+	_NOTE(ARGUNUSED(tdip, cap));
+
 	acpinex_softstate_t *softsp = ddi_get_soft_state(acpinex_softstates,
 	    ddi_get_instance(dip));
 
diff --git a/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_event.c b/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_event.c
new file mode 100644
index 0000000000..633c99b442
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/acpinex/acpinex_event.c
@@ -0,0 +1,712 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/atomic.h>
+#include <sys/bitmap.h>
+#include <sys/cmn_err.h>
+#include <sys/note.h>
+#include <sys/sunndi.h>
+#include <sys/fastboot_impl.h>
+#include <sys/sysevent.h>
+#include <sys/sysevent/dr.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_dr.h>
+#include <sys/acpinex.h>
+
+int acpinex_event_support_remove = 0;
+
+static volatile uint_t acpinex_dr_event_cnt = 0;
+static ulong_t acpinex_object_type_mask[BT_BITOUL(ACPI_TYPE_NS_NODE_MAX + 1)];
+
+/*
+ * Generate DR_REQ event to syseventd.
+ * Please refer to sys/sysevent/dr.h for message definition.
+ */
+static int
+acpinex_event_generate_event(dev_info_t *dip, ACPI_HANDLE hdl, int req,
+    int event, char *objname)
+{
+	int rv = 0;
+	sysevent_id_t eid;
+	sysevent_value_t evnt_val;
+	sysevent_attr_list_t *evnt_attr_list = NULL;
+	char *attach_pnt;
+	char event_type[32];
+
+	/* Add "attachment point" attribute. */
+	attach_pnt = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+	if (ACPI_FAILURE(acpidev_dr_get_attachment_point(hdl,
+	    attach_pnt, MAXPATHLEN))) {
+		cmn_err(CE_WARN,
+		    "!acpinex: failed to generate AP name for %s.", objname);
+		kmem_free(attach_pnt, MAXPATHLEN);
+		return (-1);
+	}
+	ASSERT(attach_pnt[0] != '\0');
+	evnt_val.value_type = SE_DATA_TYPE_STRING;
+	evnt_val.value.sv_string = attach_pnt;
+	rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID, &evnt_val, KM_SLEEP);
+	if (rv != 0) {
+		cmn_err(CE_WARN,
+		    "!acpinex: failed to add attr [%s] for %s event.",
+		    DR_AP_ID, EC_DR);
+		kmem_free(attach_pnt, MAXPATHLEN);
+		return (rv);
+	}
+
+	/* Add "request type" attribute. */
+	evnt_val.value_type = SE_DATA_TYPE_STRING;
+	evnt_val.value.sv_string = SE_REQ2STR(req);
+	rv = sysevent_add_attr(&evnt_attr_list, DR_REQ_TYPE, &evnt_val,
+	    KM_SLEEP);
+	if (rv != 0) {
+		cmn_err(CE_WARN,
+		    "!acpinex: failed to add attr [%s] for %s event.",
+		    DR_REQ_TYPE, EC_DR);
+		sysevent_free_attr(evnt_attr_list);
+		kmem_free(attach_pnt, MAXPATHLEN);
+		return (rv);
+	}
+
+	/* Add "acpi-event-type" attribute. */
+	switch (event) {
+	case ACPI_NOTIFY_BUS_CHECK:
+		(void) snprintf(event_type, sizeof (event_type),
+		    ACPIDEV_EVENT_TYPE_BUS_CHECK);
+		break;
+	case ACPI_NOTIFY_DEVICE_CHECK:
+		(void) snprintf(event_type, sizeof (event_type),
+		    ACPIDEV_EVENT_TYPE_DEVICE_CHECK);
+		break;
+	case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
+		(void) snprintf(event_type, sizeof (event_type),
+		    ACPIDEV_EVENT_TYPE_DEVICE_CHECK_LIGHT);
+		break;
+	case ACPI_NOTIFY_EJECT_REQUEST:
+		(void) snprintf(event_type, sizeof (event_type),
+		    ACPIDEV_EVENT_TYPE_EJECT_REQUEST);
+		break;
+	default:
+		cmn_err(CE_WARN,
+		    "!acpinex: unknown ACPI event type %d.", event);
+		sysevent_free_attr(evnt_attr_list);
+		kmem_free(attach_pnt, MAXPATHLEN);
+		return (-1);
+	}
+	evnt_val.value_type = SE_DATA_TYPE_STRING;
+	evnt_val.value.sv_string = event_type;
+	rv = sysevent_add_attr(&evnt_attr_list, ACPIDEV_EVENT_TYPE_ATTR_NAME,
+	    &evnt_val, KM_SLEEP);
+	if (rv != 0) {
+		cmn_err(CE_WARN,
+		    "!acpinex: failed to add attr [%s] for %s event.",
+		    ACPIDEV_EVENT_TYPE_ATTR_NAME, EC_DR);
+		sysevent_free_attr(evnt_attr_list);
+		kmem_free(attach_pnt, MAXPATHLEN);
+		return (rv);
+	}
+
+	rv = ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DR, ESC_DR_REQ,
+	    evnt_attr_list, &eid, KM_SLEEP);
+	if (rv != DDI_SUCCESS) {
+		cmn_err(CE_WARN,
+		    "!acpinex: failed to log DR_REQ event for %s.", objname);
+		rv = -1;
+	}
+
+	nvlist_free(evnt_attr_list);
+	kmem_free(attach_pnt, MAXPATHLEN);
+
+	return (rv);
+}
+
+/*
+ * Event handler for ACPI EJECT_REQUEST notifications.
+ * EJECT_REQUEST notifications should be generated on the device to be ejected,
+ * so no need to scan subtree of it.
+ * It also invokes ACPI _OST method to update event status if call_ost is true.
+ */
+static void
+acpinex_event_handle_eject_request(ACPI_HANDLE hdl, acpinex_softstate_t *sp,
+    boolean_t call_ost)
+{
+	int code;
+	char *objname;
+
+	ASSERT(hdl != NULL);
+	objname = acpidev_get_object_name(hdl);
+
+	ASSERT(sp != NULL);
+	ASSERT(sp->ans_dip != NULL && sp->ans_hdl != NULL);
+	if (sp == NULL || sp->ans_dip == NULL || sp->ans_hdl == NULL) {
+		if (call_ost) {
+			(void) acpidev_eval_ost(hdl, ACPI_NOTIFY_EJECT_REQUEST,
+			    ACPI_OST_STA_FAILURE, NULL, 0);
+		}
+		ACPINEX_DEBUG(CE_WARN,
+		    "!acpinex: softstate data structure is invalid.");
+		cmn_err(CE_WARN,
+		    "!acpinex: failed to handle EJECT_REQUEST event from %s.",
+		    objname);
+		acpidev_free_object_name(objname);
+		return;
+	}
+
+	if (acpinex_event_support_remove == 0) {
+		cmn_err(CE_WARN,
+		    "!acpinex: hot-removing of device %s is unsupported.",
+		    objname);
+		code = ACPI_OST_STA_EJECT_NOT_SUPPORT;
+	} else if (acpinex_event_generate_event(sp->ans_dip, hdl,
+	    SE_OUTGOING_RES, ACPI_NOTIFY_EJECT_REQUEST, objname) != 0) {
+		cmn_err(CE_WARN, "!acpinex: failed to generate ESC_DR_REQ "
+		    "event for device eject request from %s.", objname);
+		code = ACPI_OST_STA_FAILURE;
+	} else {
+		cmn_err(CE_NOTE, "!acpinex: generate ESC_DR_REQ event for "
+		    "device eject request from %s.", objname);
+		code = ACPI_OST_STA_EJECT_IN_PROGRESS;
+	}
+	if (call_ost) {
+		(void) acpidev_eval_ost(hdl, ACPI_NOTIFY_EJECT_REQUEST,
+		    code, NULL, 0);
+	}
+
+	acpidev_free_object_name(objname);
+}
+
+struct acpinex_event_check_arg {
+	acpinex_softstate_t	*softstatep;
+	int			event_type;
+	uint32_t		device_insert;
+	uint32_t		device_remove;
+	uint32_t		device_fail;
+};
+
+static ACPI_STATUS
+acpinex_event_handle_check_one(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+    void **retval)
+{
+	_NOTE(ARGUNUSED(lvl, retval));
+
+	char *objname;
+	int status, psta, csta;
+	acpidev_data_handle_t dhdl;
+	struct acpinex_event_check_arg *argp;
+
+	ASSERT(hdl != NULL);
+	ASSERT(ctx != NULL);
+	argp = (struct acpinex_event_check_arg *)ctx;
+
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		/* Skip subtree if failed to get the data handle. */
+		ACPINEX_DEBUG(CE_NOTE,
+		    "!acpinex: failed to get data associated with %p.", hdl);
+		return (AE_CTRL_DEPTH);
+	} else if (!acpidev_data_dr_capable(dhdl)) {
+		return (AE_OK);
+	}
+
+	objname = acpidev_get_object_name(hdl);
+
+	status = 0;
+	/* Query previous device status. */
+	psta = acpidev_data_get_status(dhdl);
+	if (acpidev_check_device_enabled(psta)) {
+		status |= 0x1;
+	}
+	/* Query current device status. */
+	csta = acpidev_query_device_status(hdl);
+	if (acpidev_check_device_enabled(csta)) {
+		status |= 0x2;
+	}
+
+	switch (status) {
+	case 0x0:
+		/*FALLTHROUGH*/
+	case 0x3:
+		/* No status changes, keep on walking. */
+		acpidev_free_object_name(objname);
+		return (AE_OK);
+
+	case 0x1:
+		/* Surprising removal. */
+		cmn_err(CE_WARN,
+		    "!acpinex: device %s has been surprisingly removed.",
+		    objname);
+		if (argp->event_type == ACPI_NOTIFY_BUS_CHECK) {
+			/*
+			 * According to ACPI spec, BUS_CHECK notification
+			 * should be triggered for hot-adding events only.
+			 */
+			ACPINEX_DEBUG(CE_WARN,
+			    "!acpinex: device %s has been surprisingly removed "
+			    "when handling BUS_CHECK event.", objname);
+		}
+		acpidev_free_object_name(objname);
+		argp->device_remove++;
+		return (AE_CTRL_DEPTH);
+
+	case 0x2:
+		/* Hot-adding. */
+		ACPINEX_DEBUG(CE_NOTE,
+		    "!acpinex: device %s has been inserted.", objname);
+		argp->device_insert++;
+		if (acpinex_event_generate_event(argp->softstatep->ans_dip, hdl,
+		    SE_INCOMING_RES, argp->event_type, objname) != 0) {
+			cmn_err(CE_WARN,
+			    "!acpinex: failed to generate ESC_DR_REQ event for "
+			    "device insert request from %s.", objname);
+			argp->device_fail++;
+		} else {
+			cmn_err(CE_NOTE, "!acpinex: generate ESC_DR_REQ event "
+			    "for device insert request from %s.", objname);
+		}
+		acpidev_free_object_name(objname);
+		return (AE_OK);
+
+	default:
+		ASSERT(0);
+		break;
+	}
+
+	return (AE_ERROR);
+}
+
+/*
+ * Event handler for BUS_CHECK/DEVICE_CHECK/DEVICE_CHECK_LIGHT notifications.
+ * These events may be signaled on parent/ancestor of devices to be hot-added,
+ * so need to scan ACPI namespace to figure out devices in question.
+ * It also invokes ACPI _OST method to update event status if call_ost is true.
+ */
+static void
+acpinex_event_handle_check_request(int event, ACPI_HANDLE hdl,
+    acpinex_softstate_t *sp, boolean_t call_ost)
+{
+	ACPI_STATUS rv;
+	int code;
+	char *objname;
+	struct acpinex_event_check_arg arg;
+
+	ASSERT(hdl != NULL);
+	objname = acpidev_get_object_name(hdl);
+
+	ASSERT(sp != NULL);
+	ASSERT(sp->ans_dip != NULL && sp->ans_hdl != NULL);
+	if (sp == NULL || sp->ans_dip == NULL || sp->ans_hdl == NULL) {
+		if (call_ost) {
+			(void) acpidev_eval_ost(hdl, event,
+			    ACPI_OST_STA_FAILURE, NULL, 0);
+		}
+		ACPINEX_DEBUG(CE_WARN,
+		    "!acpinex: softstate data structure is invalid.");
+		cmn_err(CE_WARN, "!acpinex: failed to handle "
+		    "BUS/DEVICE_CHECK event from %s.", objname);
+		acpidev_free_object_name(objname);
+		return;
+	}
+
+	bzero(&arg, sizeof (arg));
+	arg.event_type = event;
+	arg.softstatep = sp;
+	rv = acpinex_event_handle_check_one(hdl, 0, &arg, NULL);
+	if (ACPI_SUCCESS(rv)) {
+		rv = AcpiWalkNamespace(ACPI_TYPE_DEVICE, hdl,
+		    ACPIDEV_MAX_ENUM_LEVELS,
+		    &acpinex_event_handle_check_one, NULL, &arg, NULL);
+	}
+
+	if (ACPI_FAILURE(rv)) {
+		/* Failed to scan the ACPI namespace. */
+		cmn_err(CE_WARN, "!acpinex: failed to handle event %d from %s.",
+		    event, objname);
+		code = ACPI_OST_STA_FAILURE;
+	} else if (arg.device_remove != 0) {
+		/* Surprising removal happened. */
+		ACPINEX_DEBUG(CE_WARN,
+		    "!acpinex: some devices have been surprisingly removed.");
+		code = ACPI_OST_STA_NOT_SUPPORT;
+	} else if (arg.device_fail != 0) {
+		/* Failed to handle some devices. */
+		ACPINEX_DEBUG(CE_WARN,
+		    "!acpinex: failed to check status of some devices.");
+		code = ACPI_OST_STA_FAILURE;
+	} else if (arg.device_insert == 0) {
+		/* No hot-added devices found. */
+		cmn_err(CE_WARN,
+		    "!acpinex: no hot-added devices under %s found.", objname);
+		code = ACPI_OST_STA_FAILURE;
+	} else {
+		code = ACPI_OST_STA_INSERT_IN_PROGRESS;
+	}
+	if (call_ost) {
+		(void) acpidev_eval_ost(hdl, event, code, NULL, 0);
+	}
+
+	acpidev_free_object_name(objname);
+}
+
+static void
+acpinex_event_system_handler(ACPI_HANDLE hdl, UINT32 type, void *arg)
+{
+	acpinex_softstate_t *sp;
+
+	ASSERT(hdl != NULL);
+	ASSERT(arg != NULL);
+	sp = (acpinex_softstate_t *)arg;
+
+	acpidev_dr_lock_all();
+	mutex_enter(&sp->ans_lock);
+
+	switch (type) {
+	case ACPI_NOTIFY_BUS_CHECK:
+		/*
+		 * Bus Check. This notification is performed on a device object
+		 * to indicate to OSPM that it needs to perform the Plug and
+		 * Play re-enumeration operation on the device tree starting
+		 * from the point where it has been notified. OSPM will only
+		 * perform this operation at boot, and when notified. It is
+		 * the responsibility of the ACPI AML code to notify OSPM at
+		 * any other times that this operation is required. The more
+		 * accurately and closer to the actual device tree change the
+		 * notification can be done, the more efficient the operating
+		 * system response will be; however, it can also be an issue
+		 * when a device change cannot be confirmed. For example, if
+		 * the hardware cannot notice a device change for a particular
+		 * location during a system sleeping state, it issues a Bus
+		 * Check notification on wake to inform OSPM that it needs to
+		 * check the configuration for a device change.
+		 */
+		/*FALLTHROUGH*/
+	case ACPI_NOTIFY_DEVICE_CHECK:
+		/*
+		 * Device Check. Used to notify OSPM that the device either
+		 * appeared or disappeared. If the device has appeared, OSPM
+		 * will re-enumerate from the parent. If the device has
+		 * disappeared, OSPM will invalidate the state of the device.
+		 * OSPM may optimize out re-enumeration. If _DCK is present,
+		 * then Notify(object,1) is assumed to indicate an undock
+		 * request.
+		 */
+		/*FALLTHROUGH*/
+	case ACPI_NOTIFY_DEVICE_CHECK_LIGHT:
+		/*
+		 * Device Check Light. Used to notify OSPM that the device
+		 * either appeared or disappeared. If the device has appeared,
+		 * OSPM will re-enumerate from the device itself, not the
+		 * parent. If the device has disappeared, OSPM will invalidate
+		 * the state of the device.
+		 */
+		atomic_inc_uint(&acpinex_dr_event_cnt);
+		acpinex_event_handle_check_request(type, hdl, sp, B_TRUE);
+		break;
+
+	case ACPI_NOTIFY_EJECT_REQUEST:
+		/*
+		 * Eject Request. Used to notify OSPM that the device should
+		 * be ejected, and that OSPM needs to perform the Plug and Play
+		 * ejection operation. OSPM will run the _EJx method.
+		 */
+		atomic_inc_uint(&acpinex_dr_event_cnt);
+		acpinex_event_handle_eject_request(hdl, sp, B_TRUE);
+		break;
+
+	default:
+		ACPINEX_DEBUG(CE_NOTE,
+		    "!acpinex: unhandled event(%d) on hdl %p under %s.",
+		    type, hdl, sp->ans_path);
+		(void) acpidev_eval_ost(hdl, type, ACPI_OST_STA_NOT_SUPPORT,
+		    NULL, 0);
+		break;
+	}
+
+	if (acpinex_dr_event_cnt != 0) {
+		/*
+		 * Disable fast reboot if a CPU/MEM/IOH hotplug event happens.
+		 * Note: this is a temporary solution and will be revised when
+		 * fast reboot can support CPU/MEM/IOH DR operations in the
+		 * future.
+		 *
+		 * ACPI BIOS generates some static ACPI tables, such as MADT,
+		 * SRAT and SLIT, to describe the system hardware configuration
+		 * on power-on. When a CPU/MEM/IOH hotplug event happens, those
+		 * static tables won't be updated and will become stale.
+		 *
+		 * If we reset the system by fast reboot, BIOS will have no
+		 * chance to regenerate those staled static tables. Fast reboot
+		 * can't tolerate such inconsistency between staled ACPI tables
+		 * and real hardware configuration yet.
+		 *
+		 * A temporary solution is introduced to disable fast reboot if
+		 * CPU/MEM/IOH hotplug event happens. This solution should be
+		 * revised when fast reboot is enhanced to support CPU/MEM/IOH
+		 * DR operations.
+		 */
+		fastreboot_disable(FBNS_HOTPLUG);
+	}
+
+	mutex_exit(&sp->ans_lock);
+	acpidev_dr_unlock_all();
+}
+
+/*
+ * Install event handler for ACPI system events.
+ * Acpinex driver handles ACPI system events for its children,
+ * device specific events will be handled by device drivers.
+ * Return DDI_SUCCESS on success, and DDI_FAILURE on failure.
+ */
+static int
+acpinex_event_install_handler(ACPI_HANDLE hdl, void *arg,
+    ACPI_DEVICE_INFO *infop, acpidev_data_handle_t dhdl)
+{
+	int rc = DDI_SUCCESS;
+
+	ASSERT(hdl != NULL);
+	ASSERT(dhdl != NULL);
+	ASSERT(infop != NULL);
+
+	/*
+	 * Check whether the event handler has already been installed on the
+	 * device object. With the introduction of ACPI Alias objects, which are
+	 * similar to symlinks in file systems, there may be multiple name
+	 * objects in the ACPI namespace pointing to the same underlying device
+	 * object. Those Alias objects need to be filtered out, otherwise
+	 * it will attempt to install the event handler multiple times on the
+	 * same device object which will fail.
+	 */
+	if (acpidev_data_get_flag(dhdl, ACPIDEV_DATA_HANDLER_READY)) {
+		return (DDI_SUCCESS);
+	}
+	if (ACPI_SUCCESS(AcpiInstallNotifyHandler(hdl, ACPI_SYSTEM_NOTIFY,
+	    acpinex_event_system_handler, arg))) {
+		acpidev_data_set_flag(dhdl, ACPIDEV_DATA_HANDLER_READY);
+	} else {
+		char *objname;
+
+		objname = acpidev_get_object_name(hdl);
+		cmn_err(CE_WARN,
+		    "!acpinex: failed to install system event handler for %s.",
+		    objname);
+		acpidev_free_object_name(objname);
+		rc = DDI_FAILURE;
+	}
+
+	return (rc);
+}
+
+/*
+ * Uninstall event handler for ACPI system events.
+ * Return DDI_SUCCESS on success, and DDI_FAILURE on failure.
+ */
+static int
+acpinex_event_uninstall_handler(ACPI_HANDLE hdl, ACPI_DEVICE_INFO *infop,
+    acpidev_data_handle_t dhdl)
+{
+	ASSERT(hdl != NULL);
+	ASSERT(dhdl != NULL);
+	ASSERT(infop != NULL);
+
+	if (!acpidev_data_get_flag(dhdl, ACPIDEV_DATA_HANDLER_READY)) {
+		return (DDI_SUCCESS);
+	}
+	if (ACPI_SUCCESS(AcpiRemoveNotifyHandler(hdl, ACPI_SYSTEM_NOTIFY,
+	    acpinex_event_system_handler))) {
+		acpidev_data_clear_flag(dhdl, ACPIDEV_DATA_HANDLER_READY);
+	} else {
+		char *objname;
+
+		objname = acpidev_get_object_name(hdl);
+		cmn_err(CE_WARN, "!acpinex: failed to uninstall system event "
+		    "handler for %s.", objname);
+		acpidev_free_object_name(objname);
+		return (DDI_FAILURE);
+	}
+
+	return (DDI_SUCCESS);
+}
+
+/*
+ * Install/uninstall ACPI system event handler for child objects of hdl.
+ * Return DDI_SUCCESS on success, and DDI_FAILURE on failure.
+ */
+static int
+acpinex_event_walk(boolean_t init, acpinex_softstate_t *sp, ACPI_HANDLE hdl)
+{
+	int rc;
+	int retval = DDI_SUCCESS;
+	dev_info_t *dip;
+	ACPI_HANDLE child = NULL;
+	ACPI_OBJECT_TYPE type;
+	ACPI_DEVICE_INFO *infop;
+	acpidev_data_handle_t dhdl;
+
+	/* Walk all child objects. */
+	ASSERT(hdl != NULL);
+	while (ACPI_SUCCESS(AcpiGetNextObject(ACPI_TYPE_ANY, hdl, child,
+	    &child))) {
+		/* Skip unwanted object types. */
+		if (ACPI_FAILURE(AcpiGetType(child, &type)) ||
+		    type > ACPI_TYPE_NS_NODE_MAX ||
+		    BT_TEST(acpinex_object_type_mask, type) == 0) {
+			continue;
+		}
+
+		/* Get data associated with the object. Skip it if fails. */
+		dhdl = acpidev_data_get_handle(child);
+		if (dhdl == NULL) {
+			ACPINEX_DEBUG(CE_NOTE, "!acpinex: failed to get data "
+			    "associated with %p, skip.", child);
+			continue;
+		}
+
+		/* Query ACPI object info for the object. */
+		if (ACPI_FAILURE(AcpiGetObjectInfo(child, &infop))) {
+			cmn_err(CE_WARN,
+			    "!acpidnex: failed to get object info for %p.",
+			    child);
+			continue;
+		}
+
+		if (init) {
+			rc = acpinex_event_install_handler(child, sp, infop,
+			    dhdl);
+			if (rc != DDI_SUCCESS) {
+				ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to "
+				    "install handler for child %p of %s.",
+				    child, sp->ans_path);
+				retval = DDI_FAILURE;
+			/*
+			 * Try to handle descendants if both of the
+			 * following two conditions are true:
+			 * 1) Device corresponding to the current object is
+			 *    enabled. If the device is absent/disabled,
+			 *    no notification should be generated from
+			 *    descendant objects of it.
+			 * 2) No Solaris device node has been created for the
+			 *    current object yet. If the device node has been
+			 *    created for the current object, notification
+			 *    events from child objects should be handled by
+			 *    the corresponding driver.
+			 */
+			} else if (acpidev_check_device_enabled(
+			    acpidev_data_get_status(dhdl)) &&
+			    ACPI_FAILURE(acpica_get_devinfo(child, &dip))) {
+				rc = acpinex_event_walk(B_TRUE, sp, child);
+				if (rc != DDI_SUCCESS) {
+					ACPINEX_DEBUG(CE_WARN,
+					    "!acpinex: failed to install "
+					    "handler for descendants of %s.",
+					    sp->ans_path);
+					retval = DDI_FAILURE;
+				}
+			}
+		} else {
+			rc = DDI_SUCCESS;
+			/* Uninstall handler for descendants if needed. */
+			if (ACPI_FAILURE(acpica_get_devinfo(child, &dip))) {
+				rc = acpinex_event_walk(B_FALSE, sp, child);
+			}
+			if (rc == DDI_SUCCESS) {
+				rc = acpinex_event_uninstall_handler(child,
+				    infop, dhdl);
+			}
+			/* Undo will be done by caller in case of failure. */
+			if (rc != DDI_SUCCESS) {
+				ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to "
+				    "uninstall handler for descendants of %s.",
+				    sp->ans_path);
+				AcpiOsFree(infop);
+				retval = DDI_FAILURE;
+				break;
+			}
+		}
+
+		/* Release cached resources. */
+		AcpiOsFree(infop);
+	}
+
+	return (retval);
+}
+
+int
+acpinex_event_scan(acpinex_softstate_t *sp, boolean_t init)
+{
+	int rc;
+
+	ASSERT(sp != NULL);
+	ASSERT(sp->ans_hdl != NULL);
+	ASSERT(sp->ans_dip != NULL);
+	if (sp == NULL || sp->ans_hdl == NULL || sp->ans_dip == NULL) {
+		ACPINEX_DEBUG(CE_WARN,
+		    "!acpinex: invalid parameter to acpinex_event_scan().");
+		return (DDI_FAILURE);
+	}
+
+	/* Lock current device node and walk all child device nodes of it. */
+	mutex_enter(&sp->ans_lock);
+
+	rc = acpinex_event_walk(init, sp, sp->ans_hdl);
+	if (rc != DDI_SUCCESS) {
+		if (init) {
+			ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to "
+			    "configure child objects of %s.", sp->ans_path);
+			rc = DDI_FAILURE;
+		} else {
+			ACPINEX_DEBUG(CE_WARN, "!acpinex: failed to "
+			    "unconfigure child objects of %s.", sp->ans_path);
+			/* Undo in case of errors */
+			(void) acpinex_event_walk(B_TRUE, sp, sp->ans_hdl);
+			rc = DDI_FAILURE;
+		}
+	}
+
+	mutex_exit(&sp->ans_lock);
+
+	return (rc);
+}
+
+void
+acpinex_event_init(void)
+{
+	/*
+	 * According to ACPI specifications, notification is only supported on
+	 * Device, Processor and ThermalZone. Currently we only need to handle
+	 * Device and Processor objects.
+	 */
+	BT_SET(acpinex_object_type_mask, ACPI_TYPE_PROCESSOR);
+	BT_SET(acpinex_object_type_mask, ACPI_TYPE_DEVICE);
+}
+
+void
+acpinex_event_fini(void)
+{
+	bzero(acpinex_object_type_mask, sizeof (acpinex_object_type_mask));
+}
diff --git a/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.c b/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.c
new file mode 100644
index 0000000000..35ad4353aa
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.c
@@ -0,0 +1,2927 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/note.h>
+#include <sys/dditypes.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/varargs.h>
+#include <sys/modctl.h>
+#include <sys/kmem.h>
+#include <sys/cpuvar.h>
+#include <sys/cpupart.h>
+#include <sys/mem_config.h>
+#include <sys/mem_cage.h>
+#include <sys/memnode.h>
+#include <sys/callb.h>
+#include <sys/ontrap.h>
+#include <sys/obpdefs.h>
+#include <sys/promif.h>
+#include <sys/synch.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+#include <sys/x_call.h>
+#include <sys/x86_archext.h>
+#include <sys/fastboot_impl.h>
+#include <sys/sysevent.h>
+#include <sys/sysevent/dr.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_rsc.h>
+#include <sys/acpidev_dr.h>
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+#include <sys/drmach.h>
+#include "drmach_acpi.h"
+
+/* utility */
+#define	MBYTE		(1048576ull)
+#define	_ptob64(p)	((uint64_t)(p) << PAGESHIFT)
+#define	_b64top(b)	((pgcnt_t)((b) >> PAGESHIFT))
+
+static int 		drmach_init(void);
+static void 		drmach_fini(void);
+static int		drmach_name2type_idx(char *);
+static sbd_error_t	*drmach_mem_update_lgrp(drmachid_t);
+
+#ifdef DEBUG
+int drmach_debug = 1;		 /* set to non-zero to enable debug messages */
+#endif /* DEBUG */
+
+drmach_domain_info_t	 drmach_domain;
+
+static char		*drmach_ie_fmt = "drmach_acpi.c %d";
+static drmach_array_t	*drmach_boards;
+
+/* rwlock to protect drmach_boards. */
+static krwlock_t	 drmach_boards_rwlock;
+
+/* rwlock to block out CPR thread. */
+static krwlock_t	 drmach_cpr_rwlock;
+
+/* CPR callb id. */
+static callb_id_t	 drmach_cpr_cid;
+
+static struct {
+	const char	*name;
+	const char	*type;
+	sbd_error_t	*(*new)(drmach_device_t *, drmachid_t *);
+} drmach_name2type[] = {
+	{ ACPIDEV_NODE_NAME_CPU,	DRMACH_DEVTYPE_CPU, drmach_cpu_new },
+	{ ACPIDEV_NODE_NAME_MEMORY,	DRMACH_DEVTYPE_MEM, drmach_mem_new },
+	{ ACPIDEV_NODE_NAME_PCI,	DRMACH_DEVTYPE_PCI, drmach_io_new  },
+};
+
+/*
+ * drmach autoconfiguration data structures and interfaces
+ */
+static struct modlmisc modlmisc = {
+	&mod_miscops,
+	"ACPI based DR v1.0"
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1,
+	(void *)&modlmisc,
+	NULL
+};
+
+int
+_init(void)
+{
+	int err;
+
+	if ((err = drmach_init()) != 0) {
+		return (err);
+	}
+
+	if ((err = mod_install(&modlinkage)) != 0) {
+		drmach_fini();
+	}
+
+	return (err);
+}
+
+int
+_fini(void)
+{
+	int	err;
+
+	if ((err = mod_remove(&modlinkage)) == 0) {
+		drmach_fini();
+	}
+
+	return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+/*
+ * Internal support functions.
+ */
+static DRMACH_HANDLE
+drmach_node_acpi_get_dnode(drmach_node_t *np)
+{
+	return ((DRMACH_HANDLE)(uintptr_t)np->here);
+}
+
+static dev_info_t *
+drmach_node_acpi_get_dip(drmach_node_t *np)
+{
+	dev_info_t *dip = NULL;
+
+	if (ACPI_FAILURE(acpica_get_devinfo((DRMACH_HANDLE)(np->here), &dip))) {
+		return (NULL);
+	}
+
+	return (dip);
+}
+
+static int
+drmach_node_acpi_get_prop(drmach_node_t *np, char *name, void *buf, int len)
+{
+	int		rv = 0;
+	DRMACH_HANDLE	hdl;
+
+	hdl = np->get_dnode(np);
+	if (hdl == NULL) {
+		DRMACH_PR("!drmach_node_acpi_get_prop: NULL handle");
+		rv = -1;
+	} else {
+		rv = acpidev_dr_device_getprop(hdl, name, buf, len);
+		if (rv >= 0) {
+			ASSERT(rv <= len);
+			rv = 0;
+		}
+	}
+
+	return (rv);
+}
+
+static int
+drmach_node_acpi_get_proplen(drmach_node_t *np, char *name, int *len)
+{
+	int		rv = 0;
+	DRMACH_HANDLE	hdl;
+
+	hdl = np->get_dnode(np);
+	if (hdl == NULL) {
+		DRMACH_PR("!drmach_node_acpi_get_proplen: NULL handle");
+		rv = -1;
+	} else {
+		rv = acpidev_dr_device_getprop(hdl, name, NULL, 0);
+		if (rv >= 0) {
+			*len = rv;
+			return (0);
+		}
+	}
+
+	return (-1);
+}
+
+static ACPI_STATUS
+drmach_node_acpi_callback(ACPI_HANDLE hdl, uint_t lvl, void *ctx, void **retval)
+{
+	_NOTE(ARGUNUSED(lvl));
+
+	int rv;
+	dev_info_t *dip;
+	drmach_node_walk_args_t *argp = ctx;
+	int (*cb)(drmach_node_walk_args_t *args);
+	acpidev_class_id_t clsid;
+
+	ASSERT(hdl != NULL);
+	ASSERT(ctx != NULL);
+	ASSERT(retval != NULL);
+
+	/* Skip subtree if the device is not powered. */
+	if (!acpidev_dr_device_is_powered(hdl)) {
+		return (AE_CTRL_DEPTH);
+	}
+
+	/*
+	 * Keep scanning subtree if it fails to lookup device node.
+	 * There may be some ACPI objects without device nodes created.
+	 */
+	if (ACPI_FAILURE(acpica_get_devinfo(hdl, &dip))) {
+		return (AE_OK);
+	}
+
+	argp->node->here = hdl;
+	cb = (int (*)(drmach_node_walk_args_t *args))argp->func;
+	rv = (*cb)(argp);
+	argp->node->here = NULL;
+	if (rv) {
+		*(int *)retval = rv;
+		return (AE_CTRL_TERMINATE);
+	}
+
+	/*
+	 * Skip descendants of PCI/PCIex host bridges.
+	 * PCI/PCIex devices will be handled by pcihp.
+	 */
+	clsid = acpidev_dr_device_get_class(hdl);
+	if (clsid == ACPIDEV_CLASS_ID_PCI || clsid == ACPIDEV_CLASS_ID_PCIEX) {
+		return (AE_CTRL_DEPTH);
+	}
+
+	return (AE_OK);
+}
+
+static int
+drmach_node_acpi_walk(drmach_node_t *np, void *data,
+    int (*cb)(drmach_node_walk_args_t *args))
+{
+	DRMACH_HANDLE		hdl;
+	int			rv = 0;
+	drmach_node_walk_args_t	args;
+
+	/* initialize the args structure for callback */
+	args.node = np;
+	args.data = data;
+	args.func = (void *)cb;
+
+	/* save the handle, it will be modified when walking the tree. */
+	hdl = np->get_dnode(np);
+	if (hdl == NULL) {
+		DRMACH_PR("!drmach_node_acpi_walk: failed to get device node.");
+		return (EX86_INAPPROP);
+	}
+
+	if (ACPI_FAILURE(acpidev_dr_device_walk_device(hdl,
+	    ACPIDEV_MAX_ENUM_LEVELS, drmach_node_acpi_callback,
+	    &args, (void *)&rv))) {
+		/*
+		 * If acpidev_dr_device_walk_device() itself fails, rv won't
+		 * be set to suitable error code. Set it here.
+		 */
+		if (rv == 0) {
+			cmn_err(CE_WARN, "!drmach_node_acpi_walk: failed to "
+			    "walk ACPI namespace.");
+			rv = EX86_ACPIWALK;
+		}
+	}
+
+	/* restore the handle to original value after walking the tree. */
+	np->here = (void *)hdl;
+
+	return ((int)rv);
+}
+
+static drmach_node_t *
+drmach_node_new(void)
+{
+	drmach_node_t *np;
+
+	np = kmem_zalloc(sizeof (drmach_node_t), KM_SLEEP);
+
+	np->get_dnode = drmach_node_acpi_get_dnode;
+	np->getdip = drmach_node_acpi_get_dip;
+	np->getproplen = drmach_node_acpi_get_proplen;
+	np->getprop = drmach_node_acpi_get_prop;
+	np->walk = drmach_node_acpi_walk;
+
+	return (np);
+}
+
+static drmachid_t
+drmach_node_dup(drmach_node_t *np)
+{
+	drmach_node_t *dup;
+
+	dup = drmach_node_new();
+	dup->here = np->here;
+	dup->get_dnode = np->get_dnode;
+	dup->getdip = np->getdip;
+	dup->getproplen = np->getproplen;
+	dup->getprop = np->getprop;
+	dup->walk = np->walk;
+
+	return (dup);
+}
+
+static void
+drmach_node_dispose(drmach_node_t *np)
+{
+	kmem_free(np, sizeof (*np));
+}
+
+static int
+drmach_node_walk(drmach_node_t *np, void *param,
+	int (*cb)(drmach_node_walk_args_t *args))
+{
+	return (np->walk(np, param, cb));
+}
+
+static DRMACH_HANDLE
+drmach_node_get_dnode(drmach_node_t *np)
+{
+	return (np->get_dnode(np));
+}
+
+/*
+ * drmach_array provides convenient array construction, access,
+ * bounds checking and array destruction logic.
+ */
+static drmach_array_t *
+drmach_array_new(uint_t min_index, uint_t max_index)
+{
+	drmach_array_t *arr;
+
+	arr = kmem_zalloc(sizeof (drmach_array_t), KM_SLEEP);
+
+	arr->arr_sz = (max_index - min_index + 1) * sizeof (void *);
+	if (arr->arr_sz > 0) {
+		arr->min_index = min_index;
+		arr->max_index = max_index;
+
+		arr->arr = kmem_zalloc(arr->arr_sz, KM_SLEEP);
+		return (arr);
+	} else {
+		kmem_free(arr, sizeof (*arr));
+		return (0);
+	}
+}
+
+static int
+drmach_array_set(drmach_array_t *arr, uint_t idx, drmachid_t val)
+{
+	if (idx < arr->min_index || idx > arr->max_index)
+		return (-1);
+	arr->arr[idx - arr->min_index] = val;
+	return (0);
+}
+
+/*
+ * Get the item with index idx.
+ * Return 0 with the value stored in val if succeeds, otherwise return -1.
+ */
+static int
+drmach_array_get(drmach_array_t *arr, uint_t idx, drmachid_t *val)
+{
+	if (idx < arr->min_index || idx > arr->max_index)
+		return (-1);
+	*val = arr->arr[idx - arr->min_index];
+	return (0);
+}
+
+static int
+drmach_array_first(drmach_array_t *arr, uint_t *idx, drmachid_t *val)
+{
+	int rv;
+
+	*idx = arr->min_index;
+	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
+		*idx += 1;
+
+	return (rv);
+}
+
+static int
+drmach_array_next(drmach_array_t *arr, uint_t *idx, drmachid_t *val)
+{
+	int rv;
+
+	*idx += 1;
+	while ((rv = drmach_array_get(arr, *idx, val)) == 0 && *val == NULL)
+		*idx += 1;
+
+	return (rv);
+}
+
+static void
+drmach_array_dispose(drmach_array_t *arr, void (*disposer)(drmachid_t))
+{
+	drmachid_t	val;
+	uint_t		idx;
+	int		rv;
+
+	rv = drmach_array_first(arr, &idx, &val);
+	while (rv == 0) {
+		(*disposer)(val);
+		rv = drmach_array_next(arr, &idx, &val);
+	}
+
+	kmem_free(arr->arr, arr->arr_sz);
+	kmem_free(arr, sizeof (*arr));
+}
+
+static drmach_board_t *
+drmach_get_board_by_bnum(uint_t bnum)
+{
+	drmachid_t id;
+
+	if (drmach_array_get(drmach_boards, bnum, &id) == 0)
+		return ((drmach_board_t *)id);
+	else
+		return (NULL);
+}
+
+sbd_error_t *
+drmach_device_new(drmach_node_t *node,
+	drmach_board_t *bp, int portid, drmachid_t *idp)
+{
+	int		 i;
+	int		 rv;
+	drmach_device_t	 proto;
+	sbd_error_t	*err;
+	char		 name[OBP_MAXDRVNAME];
+
+	rv = node->getprop(node, ACPIDEV_DR_PROP_DEVNAME, name, OBP_MAXDRVNAME);
+	if (rv) {
+		/* every node is expected to have a name */
+		err = drerr_new(1, EX86_GETPROP, "device node %s: property %s",
+		    ddi_node_name(node->getdip(node)),
+		    ACPIDEV_DR_PROP_DEVNAME);
+		return (err);
+	}
+
+	/*
+	 * The node currently being examined is not listed in the name2type[]
+	 * array.  In this case, the node is no interest to drmach.  Both
+	 * dp and err are initialized here to yield nothing (no device or
+	 * error structure) for this case.
+	 */
+	i = drmach_name2type_idx(name);
+	if (i < 0) {
+		*idp = (drmachid_t)0;
+		return (NULL);
+	}
+
+	/* device specific new function will set unum */
+	bzero(&proto, sizeof (proto));
+	proto.type = drmach_name2type[i].type;
+	proto.bp = bp;
+	proto.node = node;
+	proto.portid = portid;
+
+	return (drmach_name2type[i].new(&proto, idp));
+}
+
+static void
+drmach_device_dispose(drmachid_t id)
+{
+	drmach_device_t *self = id;
+
+	self->cm.dispose(id);
+}
+
+static sbd_error_t *
+drmach_device_status(drmachid_t id, drmach_status_t *stat)
+{
+	drmach_common_t *cp;
+
+	if (!DRMACH_IS_ID(id))
+		return (drerr_new(0, EX86_NOTID, NULL));
+	cp = id;
+
+	return (cp->status(id, stat));
+}
+
+drmach_board_t *
+drmach_board_new(uint_t bnum, int boot_board)
+{
+	static void drmach_board_dispose(drmachid_t id);
+	static sbd_error_t *drmach_board_release(drmachid_t);
+	static sbd_error_t *drmach_board_status(drmachid_t, drmach_status_t *);
+
+	sbd_error_t *err;
+	drmach_board_t	*bp;
+	dev_info_t *dip = NULL;
+
+	bp = kmem_zalloc(sizeof (drmach_board_t), KM_SLEEP);
+	bp->cm.isa = (void *)drmach_board_new;
+	bp->cm.release = drmach_board_release;
+	bp->cm.status = drmach_board_status;
+
+	bp->bnum = bnum;
+	bp->devices = NULL;
+	bp->tree = drmach_node_new();
+
+	acpidev_dr_lock_all();
+	if (ACPI_FAILURE(acpidev_dr_get_board_handle(bnum, &bp->tree->here))) {
+		acpidev_dr_unlock_all();
+		drmach_board_dispose(bp);
+		return (NULL);
+	}
+	acpidev_dr_unlock_all();
+	ASSERT(bp->tree->here != NULL);
+
+	err = drmach_board_name(bnum, bp->cm.name, sizeof (bp->cm.name));
+	if (err != NULL) {
+		sbd_err_clear(&err);
+		drmach_board_dispose(bp);
+		return (NULL);
+	}
+
+	if (acpidev_dr_device_is_powered(bp->tree->here)) {
+		bp->boot_board = boot_board;
+		bp->powered = 1;
+	} else {
+		bp->boot_board = 0;
+		bp->powered = 0;
+	}
+	bp->assigned = boot_board;
+	if (ACPI_SUCCESS(acpica_get_devinfo(bp->tree->here, &dip))) {
+		bp->connected = 1;
+	} else {
+		bp->connected = 0;
+	}
+
+	(void) drmach_array_set(drmach_boards, bnum, bp);
+
+	return (bp);
+}
+
+static void
+drmach_board_dispose(drmachid_t id)
+{
+	drmach_board_t *bp;
+
+	ASSERT(DRMACH_IS_BOARD_ID(id));
+	bp = id;
+
+	if (bp->tree)
+		drmach_node_dispose(bp->tree);
+
+	if (bp->devices)
+		drmach_array_dispose(bp->devices, drmach_device_dispose);
+
+	kmem_free(bp, sizeof (drmach_board_t));
+}
+
+static sbd_error_t *
+drmach_board_release(drmachid_t id)
+{
+	if (!DRMACH_IS_BOARD_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	return (NULL);
+}
+
+static int
+drmach_board_check_power(drmach_board_t *bp)
+{
+	DRMACH_HANDLE	hdl;
+
+	hdl = drmach_node_get_dnode(bp->tree);
+
+	return (acpidev_dr_device_is_powered(hdl));
+}
+
+struct drmach_board_list_dep_arg {
+	int	count;
+	size_t	len;
+	ssize_t	off;
+	char	*buf;
+	char	temp[MAXPATHLEN];
+};
+
+static ACPI_STATUS
+drmach_board_generate_name(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+    void **retval)
+{
+	_NOTE(ARGUNUSED(retval));
+
+	struct drmach_board_list_dep_arg *argp = ctx;
+
+	ASSERT(hdl != NULL);
+	ASSERT(lvl == UINT32_MAX);
+	ASSERT(ctx != NULL);
+
+	/* Skip non-board devices. */
+	if (!acpidev_dr_device_is_board(hdl)) {
+		return (AE_OK);
+	}
+
+	if (ACPI_FAILURE(acpidev_dr_get_board_name(hdl, argp->temp,
+	    sizeof (argp->temp)))) {
+		DRMACH_PR("!drmach_board_generate_name: failed to "
+		    "generate board name for handle %p.", hdl);
+		/* Keep on walking. */
+		return (AE_OK);
+	}
+	argp->count++;
+	argp->off += snprintf(argp->buf + argp->off, argp->len - argp->off,
+	    " %s", argp->temp);
+	if (argp->off >= argp->len) {
+		return (AE_CTRL_TERMINATE);
+	}
+
+	return (AE_OK);
+}
+
+static ssize_t
+drmach_board_list_dependency(ACPI_HANDLE hdl, boolean_t edl, char *prefix,
+    char *buf, size_t len)
+{
+	ACPI_STATUS rc;
+	ssize_t off;
+	struct drmach_board_list_dep_arg *ap;
+
+	ASSERT(buf != NULL && len != 0);
+	if (buf == NULL || len == 0) {
+		return (-1);
+	}
+
+	ap = kmem_zalloc(sizeof (*ap), KM_SLEEP);
+	ap->buf = buf;
+	ap->len = len;
+	ap->off = snprintf(buf, len, "%s", prefix);
+	if (ap->off >= len) {
+		*buf = '\0';
+		kmem_free(ap, sizeof (*ap));
+		return (-1);
+	}
+
+	/* Generate the device dependency list. */
+	if (edl) {
+		rc = acpidev_dr_device_walk_edl(hdl,
+		    drmach_board_generate_name, ap, NULL);
+	} else {
+		rc = acpidev_dr_device_walk_ejd(hdl,
+		    drmach_board_generate_name, ap, NULL);
+	}
+	if (ACPI_FAILURE(rc)) {
+		*buf = '\0';
+		ap->off = -1;
+	/* No device has dependency on this board. */
+	} else if (ap->count == 0) {
+		*buf = '\0';
+		ap->off = 0;
+	}
+
+	off = ap->off;
+	kmem_free(ap, sizeof (*ap));
+
+	return (off);
+}
+
+static sbd_error_t *
+drmach_board_status(drmachid_t id, drmach_status_t *stat)
+{
+	sbd_error_t	*err = NULL;
+	drmach_board_t	*bp;
+	DRMACH_HANDLE	hdl;
+	size_t		off;
+
+	if (!DRMACH_IS_BOARD_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	bp = id;
+
+	if (bp->tree == NULL)
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	hdl = drmach_node_get_dnode(bp->tree);
+	if (hdl == NULL)
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	stat->busy = 0;			/* assume not busy */
+	stat->configured = 0;		/* assume not configured */
+	stat->assigned = bp->assigned;
+	stat->powered = bp->powered = acpidev_dr_device_is_powered(hdl);
+	stat->empty = !acpidev_dr_device_is_present(hdl);
+	if (ACPI_SUCCESS(acpidev_dr_device_check_status(hdl))) {
+		stat->cond = bp->cond = SBD_COND_OK;
+	} else {
+		stat->cond = bp->cond = SBD_COND_FAILED;
+	}
+	stat->info[0] = '\0';
+
+	/* Generate the eject device list. */
+	if (drmach_board_list_dependency(hdl, B_TRUE, "EDL:",
+	    stat->info, sizeof (stat->info)) < 0) {
+		DRMACH_PR("!drmach_board_status: failed to generate "
+		    "eject device list for board %d.", bp->bnum);
+		stat->info[0] = '\0';
+	}
+	off = strlen(stat->info);
+	if (off < sizeof (stat->info)) {
+		if (drmach_board_list_dependency(hdl, B_FALSE,
+		    off ? ", EJD:" : "EJD:",
+		    stat->info + off, sizeof (stat->info) - off) < 0) {
+			DRMACH_PR("!drmach_board_status: failed to generate "
+			    "eject dependent device for board %d.", bp->bnum);
+			stat->info[off] = '\0';
+		}
+	}
+
+	switch (acpidev_dr_get_board_type(bp->tree->get_dnode(bp->tree))) {
+	case ACPIDEV_CPU_BOARD:
+		(void) strlcpy(stat->type, "CPU Board", sizeof (stat->type));
+		break;
+	case ACPIDEV_MEMORY_BOARD:
+		(void) strlcpy(stat->type, "MemoryBoard", sizeof (stat->type));
+		break;
+	case ACPIDEV_IO_BOARD:
+		(void) strlcpy(stat->type, "IO Board", sizeof (stat->type));
+		break;
+	case ACPIDEV_SYSTEM_BOARD:
+		/*FALLTHROUGH*/
+	default:
+		(void) strlcpy(stat->type, "SystemBoard", sizeof (stat->type));
+		break;
+	}
+
+	if (bp->devices) {
+		int		 rv;
+		uint_t		 d_idx;
+		drmachid_t	 d_id;
+
+		rv = drmach_array_first(bp->devices, &d_idx, &d_id);
+		while (rv == 0) {
+			drmach_status_t	d_stat;
+
+			err = drmach_device_status(d_id, &d_stat);
+			if (err)
+				break;
+
+			stat->busy |= d_stat.busy;
+			stat->configured |= d_stat.configured;
+
+			rv = drmach_array_next(bp->devices, &d_idx, &d_id);
+		}
+	}
+
+	return (err);
+}
+
+/*
+ * When DR is initialized, we walk the device tree and acquire a hold on
+ * all the nodes that are interesting to DR. This is so that the corresponding
+ * branches cannot be deleted.
+ */
+static int
+drmach_hold_rele_devtree(dev_info_t *rdip, void *arg)
+{
+	int *holdp = (int *)arg;
+	ACPI_HANDLE hdl = NULL;
+	acpidev_data_handle_t dhdl;
+
+	/* Skip nodes and subtrees which are not created by acpidev. */
+	if (ACPI_FAILURE(acpica_get_handle(rdip, &hdl))) {
+		return (DDI_WALK_PRUNECHILD);
+	}
+	ASSERT(hdl != NULL);
+	dhdl = acpidev_data_get_handle(hdl);
+	if (dhdl == NULL) {
+		return (DDI_WALK_PRUNECHILD);
+	}
+
+	/* Hold/release devices which are interesting to DR operations. */
+	if (acpidev_data_dr_ready(dhdl)) {
+		if (*holdp) {
+			ASSERT(!e_ddi_branch_held(rdip));
+			e_ddi_branch_hold(rdip);
+		} else {
+			ASSERT(e_ddi_branch_held(rdip));
+			e_ddi_branch_rele(rdip);
+		}
+	}
+
+	return (DDI_WALK_CONTINUE);
+}
+
+static void
+drmach_hold_devtree(void)
+{
+	dev_info_t *dip;
+	int circ;
+	int hold = 1;
+
+	dip = ddi_root_node();
+	ndi_devi_enter(dip, &circ);
+	ddi_walk_devs(ddi_get_child(dip), drmach_hold_rele_devtree, &hold);
+	ndi_devi_exit(dip, circ);
+}
+
+static void
+drmach_release_devtree(void)
+{
+	dev_info_t *dip;
+	int circ;
+	int hold = 0;
+
+	dip = ddi_root_node();
+	ndi_devi_enter(dip, &circ);
+	ddi_walk_devs(ddi_get_child(dip), drmach_hold_rele_devtree, &hold);
+	ndi_devi_exit(dip, circ);
+}
+
+static boolean_t
+drmach_cpr_callb(void *arg, int code)
+{
+	_NOTE(ARGUNUSED(arg));
+
+	if (code == CB_CODE_CPR_CHKPT) {
+		/*
+		 * Temporarily block CPR operations if there are DR operations
+		 * ongoing.
+		 */
+		rw_enter(&drmach_cpr_rwlock, RW_WRITER);
+	} else {
+		rw_exit(&drmach_cpr_rwlock);
+	}
+
+	return (B_TRUE);
+}
+
+static int
+drmach_init(void)
+{
+	DRMACH_HANDLE	hdl;
+	drmachid_t	id;
+	uint_t		bnum;
+
+	if (MAX_BOARDS > SHRT_MAX) {
+		cmn_err(CE_WARN, "!drmach_init: system has too many (%d) "
+		    "hotplug capable boards.", MAX_BOARDS);
+		return (ENXIO);
+	} else if (MAX_CMP_UNITS_PER_BOARD > 1) {
+		cmn_err(CE_WARN, "!drmach_init: DR doesn't support multiple "
+		    "(%d) physical processors on one board.",
+		    MAX_CMP_UNITS_PER_BOARD);
+		return (ENXIO);
+	} else if (MAX_CORES_PER_CMP & (MAX_CORES_PER_CMP - 1)) {
+		cmn_err(CE_WARN, "!drmach_init: number of logical CPUs (%d) in "
+		    "physical processor is not power of 2.",
+		    MAX_CORES_PER_CMP);
+		return (ENXIO);
+	} else if (MAX_CPU_UNITS_PER_BOARD > DEVSET_CPU_NUMBER ||
+	    MAX_MEM_UNITS_PER_BOARD > DEVSET_MEM_NUMBER ||
+	    MAX_IO_UNITS_PER_BOARD > DEVSET_IO_NUMBER) {
+		cmn_err(CE_WARN, "!drmach_init: system has more CPU/memory/IO "
+		    "units than the DR driver can handle.");
+		return (ENXIO);
+	}
+
+	rw_init(&drmach_cpr_rwlock, NULL, RW_DEFAULT, NULL);
+	drmach_cpr_cid = callb_add(drmach_cpr_callb, NULL,
+	    CB_CL_CPR_PM, "drmach");
+
+	rw_init(&drmach_boards_rwlock, NULL, RW_DEFAULT, NULL);
+	drmach_boards = drmach_array_new(0, MAX_BOARDS - 1);
+	drmach_domain.allow_dr = acpidev_dr_capable();
+
+	for (bnum = 0; bnum < MAX_BOARDS; bnum++) {
+		hdl = NULL;
+		if (ACPI_FAILURE(acpidev_dr_get_board_handle(bnum, &hdl)) ||
+		    hdl == NULL) {
+			cmn_err(CE_WARN, "!drmach_init: failed to lookup ACPI "
+			    "handle for board %d.", bnum);
+			continue;
+		}
+		if (drmach_array_get(drmach_boards, bnum, &id) == -1) {
+			DRMACH_PR("!drmach_init: failed to get handle "
+			    "for board %d.", bnum);
+			ASSERT(0);
+			goto error;
+		} else if (id == NULL) {
+			(void) drmach_board_new(bnum, 1);
+		}
+	}
+
+	/*
+	 * Walk descendants of the devinfo root node and hold
+	 * all devinfo branches of interest.
+	 */
+	drmach_hold_devtree();
+
+	return (0);
+
+error:
+	drmach_array_dispose(drmach_boards, drmach_board_dispose);
+	rw_destroy(&drmach_boards_rwlock);
+	rw_destroy(&drmach_cpr_rwlock);
+	return (ENXIO);
+}
+
+static void
+drmach_fini(void)
+{
+	rw_enter(&drmach_boards_rwlock, RW_WRITER);
+	if (drmach_boards != NULL) {
+		drmach_array_dispose(drmach_boards, drmach_board_dispose);
+		drmach_boards = NULL;
+	}
+	rw_exit(&drmach_boards_rwlock);
+
+	/*
+	 * Walk descendants of the root devinfo node
+	 * release holds acquired on branches in drmach_init()
+	 */
+	drmach_release_devtree();
+
+	(void) callb_delete(drmach_cpr_cid);
+	rw_destroy(&drmach_cpr_rwlock);
+	rw_destroy(&drmach_boards_rwlock);
+}
+
+sbd_error_t *
+drmach_io_new(drmach_device_t *proto, drmachid_t *idp)
+{
+	static void drmach_io_dispose(drmachid_t);
+	static sbd_error_t *drmach_io_release(drmachid_t);
+	static sbd_error_t *drmach_io_status(drmachid_t, drmach_status_t *);
+
+	drmach_io_t	*ip;
+	int		portid;
+
+	portid = proto->portid;
+	ASSERT(portid != -1);
+	proto->unum = portid;
+
+	ip = kmem_zalloc(sizeof (drmach_io_t), KM_SLEEP);
+	bcopy(proto, &ip->dev, sizeof (ip->dev));
+	ip->dev.node = drmach_node_dup(proto->node);
+	ip->dev.cm.isa = (void *)drmach_io_new;
+	ip->dev.cm.dispose = drmach_io_dispose;
+	ip->dev.cm.release = drmach_io_release;
+	ip->dev.cm.status = drmach_io_status;
+	(void) snprintf(ip->dev.cm.name, sizeof (ip->dev.cm.name), "%s%d",
+	    ip->dev.type, ip->dev.unum);
+
+	*idp = (drmachid_t)ip;
+
+	return (NULL);
+}
+
+static void
+drmach_io_dispose(drmachid_t id)
+{
+	drmach_io_t *self;
+
+	ASSERT(DRMACH_IS_IO_ID(id));
+
+	self = id;
+	if (self->dev.node)
+		drmach_node_dispose(self->dev.node);
+
+	kmem_free(self, sizeof (*self));
+}
+
+static sbd_error_t *
+drmach_io_release(drmachid_t id)
+{
+	if (!DRMACH_IS_IO_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	return (NULL);
+}
+
+static sbd_error_t *
+drmach_io_status(drmachid_t id, drmach_status_t *stat)
+{
+	drmach_device_t *dp;
+	sbd_error_t	*err;
+	int		 configured;
+
+	ASSERT(DRMACH_IS_IO_ID(id));
+	dp = id;
+
+	err = drmach_io_is_attached(id, &configured);
+	if (err)
+		return (err);
+
+	stat->assigned = dp->bp->assigned;
+	stat->powered = dp->bp->powered;
+	stat->configured = (configured != 0);
+	stat->busy = dp->busy;
+	(void) strlcpy(stat->type, dp->type, sizeof (stat->type));
+	stat->info[0] = '\0';
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_cpu_new(drmach_device_t *proto, drmachid_t *idp)
+{
+	static void drmach_cpu_dispose(drmachid_t);
+	static sbd_error_t *drmach_cpu_release(drmachid_t);
+	static sbd_error_t *drmach_cpu_status(drmachid_t, drmach_status_t *);
+
+	int		 portid;
+	processorid_t	 cpuid;
+	drmach_cpu_t	*cp = NULL;
+
+	/* the portid is APIC ID of the node */
+	portid = proto->portid;
+	ASSERT(portid != -1);
+
+	/*
+	 * Assume all CPUs are homogeneous and have the same number of
+	 * cores/threads.
+	 */
+	proto->unum = portid % MAX_CPU_UNITS_PER_BOARD;
+
+	cp = kmem_zalloc(sizeof (drmach_cpu_t), KM_SLEEP);
+	bcopy(proto, &cp->dev, sizeof (cp->dev));
+	cp->dev.node = drmach_node_dup(proto->node);
+	cp->dev.cm.isa = (void *)drmach_cpu_new;
+	cp->dev.cm.dispose = drmach_cpu_dispose;
+	cp->dev.cm.release = drmach_cpu_release;
+	cp->dev.cm.status = drmach_cpu_status;
+	(void) snprintf(cp->dev.cm.name, sizeof (cp->dev.cm.name), "%s%d",
+	    cp->dev.type, cp->dev.unum);
+
+	cp->apicid = portid;
+	if (ACPI_SUCCESS(acpica_get_cpu_id_by_object(
+	    drmach_node_get_dnode(proto->node), &cpuid))) {
+		cp->cpuid = cpuid;
+	} else {
+		cp->cpuid = -1;
+	}
+
+	/* Mark CPU0 as busy, many other components have dependency on it. */
+	if (cp->cpuid == 0) {
+		cp->dev.busy = 1;
+	}
+
+	*idp = (drmachid_t)cp;
+
+	return (NULL);
+}
+
+static void
+drmach_cpu_dispose(drmachid_t id)
+{
+	drmach_cpu_t	*self;
+
+	ASSERT(DRMACH_IS_CPU_ID(id));
+
+	self = id;
+	if (self->dev.node)
+		drmach_node_dispose(self->dev.node);
+
+	kmem_free(self, sizeof (*self));
+}
+
+static sbd_error_t *
+drmach_cpu_release(drmachid_t id)
+{
+	if (!DRMACH_IS_CPU_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	return (NULL);
+}
+
+static sbd_error_t *
+drmach_cpu_status(drmachid_t id, drmach_status_t *stat)
+{
+	drmach_cpu_t *cp;
+	drmach_device_t *dp;
+
+	ASSERT(DRMACH_IS_CPU_ID(id));
+	cp = (drmach_cpu_t *)id;
+	dp = &cp->dev;
+
+	stat->assigned = dp->bp->assigned;
+	stat->powered = dp->bp->powered;
+	mutex_enter(&cpu_lock);
+	stat->configured = (cpu_get(cp->cpuid) != NULL);
+	mutex_exit(&cpu_lock);
+	stat->busy = dp->busy;
+	(void) strlcpy(stat->type, dp->type, sizeof (stat->type));
+	stat->info[0] = '\0';
+
+	return (NULL);
+}
+
+static int
+drmach_setup_mc_info(DRMACH_HANDLE hdl, drmach_mem_t *mp)
+{
+	uint_t i, j, count;
+	struct memlist	*ml = NULL, *ml2 = NULL;
+	acpidev_regspec_t *regp;
+	uint64_t align, addr_min, addr_max, total_size, skipped_size;
+
+	if (hdl == NULL) {
+		return (-1);
+	} else if (ACPI_FAILURE(acpidev_dr_get_mem_alignment(hdl, &align))) {
+		return (-1);
+	} else {
+		ASSERT((align & (align - 1)) == 0);
+		mp->mem_alignment = align;
+	}
+
+	addr_min = UINT64_MAX;
+	addr_max = 0;
+	total_size = 0;
+	skipped_size = 0;
+	/*
+	 * There's a memory hole just below 4G on x86, which needs special
+	 * handling. All other addresses assigned to a specific memory device
+	 * should be contiguous.
+	 */
+	if (ACPI_FAILURE(acpidev_dr_device_get_regspec(hdl, TRUE, &regp,
+	    &count))) {
+		return (-1);
+	}
+	for (i = 0, j = 0; i < count; i++) {
+		uint64_t	addr, size;
+
+		addr  = (uint64_t)regp[i].phys_mid << 32;
+		addr |= (uint64_t)regp[i].phys_low;
+		size  = (uint64_t)regp[i].size_hi << 32;
+		size |= (uint64_t)regp[i].size_low;
+		if (size == 0)
+			continue;
+		else
+			j++;
+
+		total_size += size;
+		if (addr < addr_min)
+			addr_min = addr;
+		if (addr + size > addr_max)
+			addr_max = addr + size;
+		if (mp->dev.bp->boot_board ||
+		    j <= acpidev_dr_max_segments_per_mem_device()) {
+			ml = memlist_add_span(ml, addr, size);
+		} else {
+			skipped_size += size;
+		}
+	}
+	acpidev_dr_device_free_regspec(regp, count);
+
+	if (skipped_size != 0) {
+		cmn_err(CE_WARN, "!drmach: too many (%d) segments on memory "
+		    "device, max (%d) segments supported, 0x%" PRIx64 " bytes "
+		    "of memory skipped.",
+		    j, acpidev_dr_max_segments_per_mem_device(), skipped_size);
+	}
+
+	mp->slice_base = addr_min;
+	mp->slice_top = addr_max;
+	mp->slice_size = total_size;
+
+	if (mp->dev.bp->boot_board) {
+		uint64_t endpa = _ptob64(physmax + 1);
+
+		/*
+		 * we intersect phys_install to get base_pa.
+		 * This only works at boot-up time.
+		 */
+		memlist_read_lock();
+		ml2 = memlist_dup(phys_install);
+		memlist_read_unlock();
+
+		ml2 = memlist_del_span(ml2, 0ull, mp->slice_base);
+		if (ml2 && endpa > addr_max) {
+			ml2 = memlist_del_span(ml2, addr_max, endpa - addr_max);
+		}
+	}
+
+	/*
+	 * Create a memlist for the memory board.
+	 * The created memlist only contains configured memory if there's
+	 * configured memory on the board, otherwise it contains all memory
+	 * on the board.
+	 */
+	if (ml2) {
+		uint64_t nbytes = 0;
+		struct memlist *p;
+
+		for (p = ml2; p; p = p->ml_next) {
+			nbytes += p->ml_size;
+		}
+		if (nbytes == 0) {
+			memlist_delete(ml2);
+			ml2 = NULL;
+		} else {
+			/* Node has configured memory at boot time. */
+			mp->base_pa = ml2->ml_address;
+			mp->nbytes = nbytes;
+			mp->memlist = ml2;
+			if (ml)
+				memlist_delete(ml);
+		}
+	}
+	if (ml2 == NULL) {
+		/* Not configured at boot time. */
+		mp->base_pa = UINT64_MAX;
+		mp->nbytes = 0;
+		mp->memlist = ml;
+	}
+
+	return (0);
+}
+
+sbd_error_t *
+drmach_mem_new(drmach_device_t *proto, drmachid_t *idp)
+{
+	static void drmach_mem_dispose(drmachid_t);
+	static sbd_error_t *drmach_mem_release(drmachid_t);
+	static sbd_error_t *drmach_mem_status(drmachid_t, drmach_status_t *);
+
+	DRMACH_HANDLE	hdl;
+	drmach_mem_t	*mp;
+	int		portid;
+
+	mp = kmem_zalloc(sizeof (drmach_mem_t), KM_SLEEP);
+	portid = proto->portid;
+	ASSERT(portid != -1);
+	proto->unum = portid;
+
+	bcopy(proto, &mp->dev, sizeof (mp->dev));
+	mp->dev.node = drmach_node_dup(proto->node);
+	mp->dev.cm.isa = (void *)drmach_mem_new;
+	mp->dev.cm.dispose = drmach_mem_dispose;
+	mp->dev.cm.release = drmach_mem_release;
+	mp->dev.cm.status = drmach_mem_status;
+
+	(void) snprintf(mp->dev.cm.name, sizeof (mp->dev.cm.name), "%s%d",
+	    mp->dev.type, proto->unum);
+	hdl = mp->dev.node->get_dnode(mp->dev.node);
+	ASSERT(hdl != NULL);
+	if (drmach_setup_mc_info(hdl, mp) != 0) {
+		kmem_free(mp, sizeof (drmach_mem_t));
+		*idp = (drmachid_t)NULL;
+		return (drerr_new(1, EX86_MC_SETUP, NULL));
+	}
+
+	/* make sure we do not create memoryless nodes */
+	if (mp->nbytes == 0 && mp->slice_size == 0) {
+		kmem_free(mp, sizeof (drmach_mem_t));
+		*idp = (drmachid_t)NULL;
+	} else
+		*idp = (drmachid_t)mp;
+
+	return (NULL);
+}
+
+static void
+drmach_mem_dispose(drmachid_t id)
+{
+	drmach_mem_t *mp;
+
+	ASSERT(DRMACH_IS_MEM_ID(id));
+
+	mp = id;
+
+	if (mp->dev.node)
+		drmach_node_dispose(mp->dev.node);
+
+	if (mp->memlist) {
+		memlist_delete(mp->memlist);
+		mp->memlist = NULL;
+	}
+
+	kmem_free(mp, sizeof (*mp));
+}
+
+static sbd_error_t *
+drmach_mem_release(drmachid_t id)
+{
+	if (!DRMACH_IS_MEM_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	return (NULL);
+}
+
+static sbd_error_t *
+drmach_mem_status(drmachid_t id, drmach_status_t *stat)
+{
+	uint64_t	 pa;
+	drmach_mem_t	*dp;
+	struct memlist	*ml = NULL;
+
+	ASSERT(DRMACH_IS_MEM_ID(id));
+	dp = id;
+
+	/* get starting physical address of target memory */
+	pa = dp->base_pa;
+	/* round down to slice boundary */
+	pa &= ~(dp->mem_alignment - 1);
+
+	/* stop at first span that is in slice */
+	memlist_read_lock();
+	for (ml = phys_install; ml; ml = ml->ml_next)
+		if (ml->ml_address >= pa && ml->ml_address < dp->slice_top)
+			break;
+	memlist_read_unlock();
+
+	stat->assigned = dp->dev.bp->assigned;
+	stat->powered = dp->dev.bp->powered;
+	stat->configured = (ml != NULL);
+	stat->busy = dp->dev.busy;
+	(void) strlcpy(stat->type, dp->dev.type, sizeof (stat->type));
+	stat->info[0] = '\0';
+
+	return (NULL);
+}
+
+/*
+ * Public interfaces exported to support platform independent dr driver.
+ */
+uint_t
+drmach_max_boards(void)
+{
+	return (acpidev_dr_max_boards());
+}
+
+uint_t
+drmach_max_io_units_per_board(void)
+{
+	return (acpidev_dr_max_io_units_per_board());
+}
+
+uint_t
+drmach_max_cmp_units_per_board(void)
+{
+	return (acpidev_dr_max_cmp_units_per_board());
+}
+
+uint_t
+drmach_max_mem_units_per_board(void)
+{
+	return (acpidev_dr_max_mem_units_per_board());
+}
+
+uint_t
+drmach_max_core_per_cmp(void)
+{
+	return (acpidev_dr_max_cpu_units_per_cmp());
+}
+
+sbd_error_t *
+drmach_pre_op(int cmd, drmachid_t id, drmach_opts_t *opts, void *argp)
+{
+	drmach_board_t	*bp = (drmach_board_t *)id;
+	sbd_error_t	*err = NULL;
+
+	/* allow status and ncm operations to always succeed */
+	if ((cmd == SBD_CMD_STATUS) || (cmd == SBD_CMD_GETNCM)) {
+		return (NULL);
+	}
+
+	switch (cmd) {
+	case SBD_CMD_POWERON:
+	case SBD_CMD_POWEROFF:
+		/*
+		 * Disable fast reboot if CPU/MEM/IOH hotplug event happens.
+		 * Note: this is a temporary solution and will be revised when
+		 * fast reboot can support CPU/MEM/IOH DR operations in future.
+		 *
+		 * ACPI BIOS generates some static ACPI tables, such as MADT,
+		 * SRAT and SLIT, to describe system hardware configuration on
+		 * power-on. When CPU/MEM/IOH hotplug event happens, those
+		 * static tables won't be updated and will become stale.
+		 *
+		 * If we reset system by fast reboot, BIOS will have no chance
+		 * to regenerate those staled static tables. Fast reboot can't
+		 * tolerate such inconsistency between staled ACPI tables and
+		 * real hardware configuration yet.
+		 *
+		 * A temporary solution is introduced to disable fast reboot if
+		 * CPU/MEM/IOH hotplug event happens. This solution should be
+		 * revised when fast reboot is enhanced to support CPU/MEM/IOH
+		 * DR operations.
+		 */
+		fastreboot_disable(FBNS_HOTPLUG);
+		/*FALLTHROUGH*/
+
+	default:
+		/* Block out the CPR thread. */
+		rw_enter(&drmach_cpr_rwlock, RW_READER);
+		break;
+	}
+
+	/* check all other commands for the required option string */
+	if ((opts->size > 0) && (opts->copts != NULL)) {
+		if (strstr(opts->copts, ACPIDEV_CMD_OST_PREFIX) == NULL) {
+			err = drerr_new(1, EX86_SUPPORT, NULL);
+		}
+	} else {
+		err = drerr_new(1, EX86_SUPPORT, NULL);
+	}
+
+	if (!err && id && DRMACH_IS_BOARD_ID(id)) {
+		switch (cmd) {
+		case SBD_CMD_TEST:
+			break;
+		case SBD_CMD_CONNECT:
+			if (bp->connected)
+				err = drerr_new(0, ESBD_STATE, NULL);
+			else if (!drmach_domain.allow_dr)
+				err = drerr_new(1, EX86_SUPPORT, NULL);
+			break;
+		case SBD_CMD_DISCONNECT:
+			if (!bp->connected)
+				err = drerr_new(0, ESBD_STATE, NULL);
+			else if (!drmach_domain.allow_dr)
+				err = drerr_new(1, EX86_SUPPORT, NULL);
+			break;
+		default:
+			if (!drmach_domain.allow_dr)
+				err = drerr_new(1, EX86_SUPPORT, NULL);
+			break;
+
+		}
+	}
+
+	/*
+	 * CPU/memory/IO DR operations will be supported in stages on x86.
+	 * With early versions, some operations should be blocked here.
+	 * This temporary hook will be removed when all CPU/memory/IO DR
+	 * operations are supported on x86 systems.
+	 *
+	 * We only need to filter unsupported device types for
+	 * SBD_CMD_CONFIGURE/SBD_CMD_UNCONFIGURE commands, all other
+	 * commands are supported by all device types.
+	 */
+	if (!err && (cmd == SBD_CMD_CONFIGURE || cmd == SBD_CMD_UNCONFIGURE)) {
+		int		i;
+		dr_devset_t	*devsetp = (dr_devset_t *)argp;
+		dr_devset_t	devset = *devsetp;
+
+		switch (cmd) {
+		case SBD_CMD_CONFIGURE:
+			if (!plat_dr_support_cpu()) {
+				DEVSET_DEL(devset, SBD_COMP_CPU,
+				    DEVSET_ANYUNIT);
+			} else {
+				for (i = MAX_CPU_UNITS_PER_BOARD;
+				    i < DEVSET_CPU_NUMBER; i++) {
+					DEVSET_DEL(devset, SBD_COMP_CPU, i);
+				}
+			}
+
+			if (!plat_dr_support_memory()) {
+				DEVSET_DEL(devset, SBD_COMP_MEM,
+				    DEVSET_ANYUNIT);
+			} else {
+				for (i = MAX_MEM_UNITS_PER_BOARD;
+				    i < DEVSET_MEM_NUMBER; i++) {
+					DEVSET_DEL(devset, SBD_COMP_MEM, i);
+				}
+			}
+
+			/* No support of configuring IOH devices yet. */
+			DEVSET_DEL(devset, SBD_COMP_IO, DEVSET_ANYUNIT);
+			break;
+
+		case SBD_CMD_UNCONFIGURE:
+			if (!plat_dr_support_cpu()) {
+				DEVSET_DEL(devset, SBD_COMP_CPU,
+				    DEVSET_ANYUNIT);
+			} else {
+				for (i = MAX_CPU_UNITS_PER_BOARD;
+				    i < DEVSET_CPU_NUMBER; i++) {
+					DEVSET_DEL(devset, SBD_COMP_CPU, i);
+				}
+			}
+
+			/* No support of unconfiguring MEM/IOH devices yet. */
+			DEVSET_DEL(devset, SBD_COMP_MEM, DEVSET_ANYUNIT);
+			DEVSET_DEL(devset, SBD_COMP_IO, DEVSET_ANYUNIT);
+			break;
+		}
+
+		*devsetp = devset;
+		if (DEVSET_IS_NULL(devset)) {
+			err = drerr_new(1, EX86_SUPPORT, NULL);
+		}
+	}
+
+	return (err);
+}
+
+sbd_error_t *
+drmach_post_op(int cmd, drmachid_t id, drmach_opts_t *opts, int rv)
+{
+	_NOTE(ARGUNUSED(id, opts, rv));
+
+	switch (cmd) {
+	case SBD_CMD_STATUS:
+	case SBD_CMD_GETNCM:
+		break;
+
+	default:
+		rw_exit(&drmach_cpr_rwlock);
+		break;
+	}
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_configure(drmachid_t id, int flags)
+{
+	_NOTE(ARGUNUSED(flags));
+
+	drmach_device_t		*dp;
+	sbd_error_t		*err = NULL;
+	dev_info_t		*rdip;
+	dev_info_t		*fdip = NULL;
+
+	if (!DRMACH_IS_DEVICE_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	dp = id;
+
+	rdip = dp->node->getdip(dp->node);
+	ASSERT(rdip);
+	ASSERT(e_ddi_branch_held(rdip));
+
+	/* allocate cpu id for the CPU device. */
+	if (DRMACH_IS_CPU_ID(id)) {
+		DRMACH_HANDLE hdl = drmach_node_get_dnode(dp->node);
+		ASSERT(hdl != NULL);
+		if (ACPI_FAILURE(acpidev_dr_allocate_cpuid(hdl, NULL))) {
+			err = drerr_new(1, EX86_ALLOC_CPUID, NULL);
+		}
+		return (err);
+	}
+
+	if (DRMACH_IS_MEM_ID(id)) {
+		err = drmach_mem_update_lgrp(id);
+		if (err)
+			return (err);
+	}
+
+	if (e_ddi_branch_configure(rdip, &fdip, 0) != 0) {
+		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+		dev_info_t *dip = (fdip != NULL) ? fdip : rdip;
+
+		(void) ddi_pathname(dip, path);
+		err = drerr_new(1, EX86_DRVFAIL, path);
+		kmem_free(path, MAXPATHLEN);
+
+		/* If non-NULL, fdip is returned held and must be released */
+		if (fdip != NULL)
+			ddi_release_devi(fdip);
+	}
+
+	return (err);
+}
+
+sbd_error_t *
+drmach_unconfigure(drmachid_t id, int flags)
+{
+	_NOTE(ARGUNUSED(flags));
+
+	drmach_device_t *dp;
+	sbd_error_t	*err = NULL;
+	dev_info_t	*rdip, *fdip = NULL;
+
+	if (!DRMACH_IS_DEVICE_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	dp = id;
+
+	rdip = dp->node->getdip(dp->node);
+	ASSERT(rdip);
+	ASSERT(e_ddi_branch_held(rdip));
+
+	if (DRMACH_IS_CPU_ID(id)) {
+		DRMACH_HANDLE hdl = drmach_node_get_dnode(dp->node);
+		ASSERT(hdl != NULL);
+		if (ACPI_FAILURE(acpidev_dr_free_cpuid(hdl))) {
+			err = drerr_new(1, EX86_FREE_CPUID, NULL);
+		}
+		return (err);
+	}
+
+	/*
+	 * Note: FORCE flag is no longer necessary under devfs
+	 */
+	if (e_ddi_branch_unconfigure(rdip, &fdip, 0)) {
+		char		*path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+		/*
+		 * If non-NULL, fdip is returned held and must be released.
+		 */
+		if (fdip != NULL) {
+			(void) ddi_pathname(fdip, path);
+			ndi_rele_devi(fdip);
+		} else {
+			(void) ddi_pathname(rdip, path);
+		}
+
+		err = drerr_new(1, EX86_DRVFAIL, path);
+
+		kmem_free(path, MAXPATHLEN);
+	}
+
+	return (err);
+}
+
+sbd_error_t *
+drmach_get_dip(drmachid_t id, dev_info_t **dip)
+{
+	drmach_device_t	*dp;
+
+	if (!DRMACH_IS_DEVICE_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	dp = id;
+
+	*dip = dp->node->getdip(dp->node);
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_release(drmachid_t id)
+{
+	drmach_common_t *cp;
+
+	if (!DRMACH_IS_DEVICE_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	cp = id;
+
+	return (cp->release(id));
+}
+
+sbd_error_t *
+drmach_status(drmachid_t id, drmach_status_t *stat)
+{
+	drmach_common_t *cp;
+	sbd_error_t	*err;
+
+	rw_enter(&drmach_boards_rwlock, RW_READER);
+	if (!DRMACH_IS_ID(id)) {
+		rw_exit(&drmach_boards_rwlock);
+		return (drerr_new(0, EX86_NOTID, NULL));
+	}
+	cp = (drmach_common_t *)id;
+	err = cp->status(id, stat);
+	rw_exit(&drmach_boards_rwlock);
+
+	return (err);
+}
+
+static sbd_error_t *
+drmach_update_acpi_status(drmachid_t id, drmach_opts_t *opts)
+{
+	char		*copts;
+	drmach_board_t	*bp;
+	DRMACH_HANDLE	hdl;
+	int		event, code;
+	boolean_t	inprogress = B_FALSE;
+
+	if (DRMACH_NULL_ID(id) || !DRMACH_IS_BOARD_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	bp = (drmach_board_t *)id;
+	hdl = drmach_node_get_dnode(bp->tree);
+	ASSERT(hdl != NULL);
+	if (hdl == NULL)
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	/* Get the status code. */
+	copts = opts->copts;
+	if (strncmp(copts, ACPIDEV_CMD_OST_INPROGRESS,
+	    strlen(ACPIDEV_CMD_OST_INPROGRESS)) == 0) {
+		inprogress = B_TRUE;
+		code = ACPI_OST_STA_INSERT_IN_PROGRESS;
+		copts += strlen(ACPIDEV_CMD_OST_INPROGRESS);
+	} else if (strncmp(copts, ACPIDEV_CMD_OST_SUCCESS,
+	    strlen(ACPIDEV_CMD_OST_SUCCESS)) == 0) {
+		code = ACPI_OST_STA_SUCCESS;
+		copts += strlen(ACPIDEV_CMD_OST_SUCCESS);
+	} else if (strncmp(copts, ACPIDEV_CMD_OST_FAILURE,
+	    strlen(ACPIDEV_CMD_OST_FAILURE)) == 0) {
+		code = ACPI_OST_STA_FAILURE;
+		copts += strlen(ACPIDEV_CMD_OST_FAILURE);
+	} else if (strncmp(copts, ACPIDEV_CMD_OST_NOOP,
+	    strlen(ACPIDEV_CMD_OST_NOOP)) == 0) {
+		return (NULL);
+	} else {
+		return (drerr_new(0, EX86_UNKPTCMD, opts->copts));
+	}
+
+	/* Get the event type. */
+	copts = strstr(copts, ACPIDEV_EVENT_TYPE_ATTR_NAME);
+	if (copts == NULL) {
+		return (drerr_new(0, EX86_UNKPTCMD, opts->copts));
+	}
+	copts += strlen(ACPIDEV_EVENT_TYPE_ATTR_NAME);
+	if (copts[0] != '=') {
+		return (drerr_new(0, EX86_UNKPTCMD, opts->copts));
+	}
+	copts += strlen("=");
+	if (strncmp(copts, ACPIDEV_EVENT_TYPE_BUS_CHECK,
+	    strlen(ACPIDEV_EVENT_TYPE_BUS_CHECK)) == 0) {
+		event = ACPI_NOTIFY_BUS_CHECK;
+	} else if (strncmp(copts, ACPIDEV_EVENT_TYPE_DEVICE_CHECK,
+	    strlen(ACPIDEV_EVENT_TYPE_DEVICE_CHECK)) == 0) {
+		event = ACPI_NOTIFY_DEVICE_CHECK;
+	} else if (strncmp(copts, ACPIDEV_EVENT_TYPE_DEVICE_CHECK_LIGHT,
+	    strlen(ACPIDEV_EVENT_TYPE_DEVICE_CHECK_LIGHT)) == 0) {
+		event = ACPI_NOTIFY_DEVICE_CHECK_LIGHT;
+	} else if (strncmp(copts, ACPIDEV_EVENT_TYPE_EJECT_REQUEST,
+	    strlen(ACPIDEV_EVENT_TYPE_EJECT_REQUEST)) == 0) {
+		event = ACPI_NOTIFY_EJECT_REQUEST;
+		if (inprogress) {
+			code = ACPI_OST_STA_EJECT_IN_PROGRESS;
+		}
+	} else {
+		return (drerr_new(0, EX86_UNKPTCMD, opts->copts));
+	}
+
+	(void) acpidev_eval_ost(hdl, event, code, NULL, 0);
+
+	return (NULL);
+}
+
+static struct {
+	const char	*name;
+	sbd_error_t	*(*handler)(drmachid_t id, drmach_opts_t *opts);
+} drmach_pt_arr[] = {
+	{ ACPIDEV_CMD_OST_PREFIX,	&drmach_update_acpi_status	},
+	/* the following line must always be last */
+	{ NULL,				NULL				}
+};
+
+sbd_error_t *
+drmach_passthru(drmachid_t id, drmach_opts_t *opts)
+{
+	int		i;
+	sbd_error_t	*err;
+
+	i = 0;
+	while (drmach_pt_arr[i].name != NULL) {
+		int len = strlen(drmach_pt_arr[i].name);
+
+		if (strncmp(drmach_pt_arr[i].name, opts->copts, len) == 0)
+			break;
+
+		i += 1;
+	}
+
+	if (drmach_pt_arr[i].name == NULL)
+		err = drerr_new(0, EX86_UNKPTCMD, opts->copts);
+	else
+		err = (*drmach_pt_arr[i].handler)(id, opts);
+
+	return (err);
+}
+
+/*
+ * Board specific interfaces to support dr driver
+ */
+static int
+drmach_get_portid(drmach_node_t *np)
+{
+	uint32_t	portid;
+
+	if (np->getprop(np, ACPIDEV_DR_PROP_PORTID,
+	    &portid, sizeof (portid)) == 0) {
+		/*
+		 * acpidev returns portid as uint32_t, validates it.
+		 */
+		if (portid > INT_MAX) {
+			return (-1);
+		} else {
+			return (portid);
+		}
+	}
+
+	return (-1);
+}
+
+/*
+ * This is a helper function to determine if a given
+ * node should be considered for a dr operation according
+ * to predefined dr type nodes and the node's name.
+ * Formal Parameter : The name of a device node.
+ * Return Value: -1, name does not map to a valid dr type.
+ *		 A value greater or equal to 0, name is a valid dr type.
+ */
+static int
+drmach_name2type_idx(char *name)
+{
+	int 	index, ntypes;
+
+	if (name == NULL)
+		return (-1);
+
+	/*
+	 * Determine how many possible types are currently supported
+	 * for dr.
+	 */
+	ntypes = sizeof (drmach_name2type) / sizeof (drmach_name2type[0]);
+
+	/* Determine if the node's name correspond to a predefined type. */
+	for (index = 0; index < ntypes; index++) {
+		if (strcmp(drmach_name2type[index].name, name) == 0)
+			/* The node is an allowed type for dr. */
+			return (index);
+	}
+
+	/*
+	 * If the name of the node does not map to any of the
+	 * types in the array drmach_name2type then the node is not of
+	 * interest to dr.
+	 */
+	return (-1);
+}
+
+static int
+drmach_board_find_devices_cb(drmach_node_walk_args_t *args)
+{
+	drmach_node_t			*node = args->node;
+	drmach_board_cb_data_t		*data = args->data;
+	drmach_board_t			*obj = data->obj;
+
+	int		rv, portid;
+	uint32_t	bnum;
+	drmachid_t	id;
+	drmach_device_t	*device;
+	char		name[OBP_MAXDRVNAME];
+
+	portid = drmach_get_portid(node);
+	rv = node->getprop(node, ACPIDEV_DR_PROP_DEVNAME,
+	    name, OBP_MAXDRVNAME);
+	if (rv)
+		return (0);
+
+	rv = node->getprop(node, ACPIDEV_DR_PROP_BOARDNUM,
+	    &bnum, sizeof (bnum));
+	if (rv) {
+		return (0);
+	}
+	if (bnum > INT_MAX) {
+		return (0);
+	}
+
+	if (bnum != obj->bnum)
+		return (0);
+
+	if (drmach_name2type_idx(name) < 0) {
+		return (0);
+	}
+
+	/*
+	 * Create a device data structure from this node data.
+	 * The call may yield nothing if the node is not of interest
+	 * to drmach.
+	 */
+	data->err = drmach_device_new(node, obj, portid, &id);
+	if (data->err)
+		return (-1);
+	else if (!id) {
+		/*
+		 * drmach_device_new examined the node we passed in
+		 * and determined that it was one not of interest to
+		 * drmach.  So, it is skipped.
+		 */
+		return (0);
+	}
+
+	rv = drmach_array_set(obj->devices, data->ndevs++, id);
+	if (rv) {
+		data->err = DRMACH_INTERNAL_ERROR();
+		return (-1);
+	}
+	device = id;
+
+	data->err = (*data->found)(data->a, device->type, device->unum, id);
+
+	return (data->err == NULL ? 0 : -1);
+}
+
+sbd_error_t *
+drmach_board_find_devices(drmachid_t id, void *a,
+	sbd_error_t *(*found)(void *a, const char *, int, drmachid_t))
+{
+	drmach_board_t		*bp = (drmach_board_t *)id;
+	sbd_error_t		*err;
+	int			 max_devices;
+	int			 rv;
+	drmach_board_cb_data_t	data;
+
+	if (!DRMACH_IS_BOARD_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	max_devices  = MAX_CPU_UNITS_PER_BOARD;
+	max_devices += MAX_MEM_UNITS_PER_BOARD;
+	max_devices += MAX_IO_UNITS_PER_BOARD;
+
+	if (bp->devices == NULL)
+		bp->devices = drmach_array_new(0, max_devices);
+	ASSERT(bp->tree != NULL);
+
+	data.obj = bp;
+	data.ndevs = 0;
+	data.found = found;
+	data.a = a;
+	data.err = NULL;
+
+	acpidev_dr_lock_all();
+	rv = drmach_node_walk(bp->tree, &data, drmach_board_find_devices_cb);
+	acpidev_dr_unlock_all();
+	if (rv == 0) {
+		err = NULL;
+	} else {
+		drmach_array_dispose(bp->devices, drmach_device_dispose);
+		bp->devices = NULL;
+
+		if (data.err)
+			err = data.err;
+		else
+			err = DRMACH_INTERNAL_ERROR();
+	}
+
+	return (err);
+}
+
+int
+drmach_board_lookup(int bnum, drmachid_t *id)
+{
+	int	rv = 0;
+
+	if (bnum < 0) {
+		*id = 0;
+		return (-1);
+	}
+
+	rw_enter(&drmach_boards_rwlock, RW_READER);
+	if (drmach_array_get(drmach_boards, (uint_t)bnum, id)) {
+		*id = 0;
+		rv = -1;
+	}
+	rw_exit(&drmach_boards_rwlock);
+
+	return (rv);
+}
+
+sbd_error_t *
+drmach_board_name(int bnum, char *buf, int buflen)
+{
+	ACPI_HANDLE hdl;
+	sbd_error_t *err = NULL;
+
+	if (bnum < 0) {
+		return (drerr_new(1, EX86_BNUM, "%d", bnum));
+	}
+
+	acpidev_dr_lock_all();
+	if (ACPI_FAILURE(acpidev_dr_get_board_handle(bnum, &hdl))) {
+		DRMACH_PR("!drmach_board_name: failed to lookup ACPI handle "
+		    "for board %d.", bnum);
+		err = drerr_new(1, EX86_BNUM, "%d", bnum);
+	} else if (ACPI_FAILURE(acpidev_dr_get_board_name(hdl, buf, buflen))) {
+		DRMACH_PR("!drmach_board_name: failed to generate board name "
+		    "for board %d.", bnum);
+		err = drerr_new(0, EX86_INVALID_ARG,
+		    ": buffer is too small for board name.");
+	}
+	acpidev_dr_unlock_all();
+
+	return (err);
+}
+
+int
+drmach_board_is_floating(drmachid_t id)
+{
+	drmach_board_t *bp;
+
+	if (!DRMACH_IS_BOARD_ID(id))
+		return (0);
+
+	bp = (drmach_board_t *)id;
+
+	return ((drmach_domain.floating & (1ULL << bp->bnum)) ? 1 : 0);
+}
+
+static ACPI_STATUS
+drmach_board_check_dependent_cb(ACPI_HANDLE hdl, UINT32 lvl, void *ctx,
+    void **retval)
+{
+	uint32_t bdnum;
+	drmach_board_t *bp;
+	ACPI_STATUS rc = AE_OK;
+	int cmd = (int)(intptr_t)ctx;
+
+	ASSERT(hdl != NULL);
+	ASSERT(lvl == UINT32_MAX);
+	ASSERT(retval != NULL);
+
+	/* Skip non-board devices. */
+	if (!acpidev_dr_device_is_board(hdl)) {
+		return (AE_OK);
+	} else if (ACPI_FAILURE(acpidev_dr_get_board_number(hdl, &bdnum))) {
+		DRMACH_PR("!drmach_board_check_dependent_cb: failed to get "
+		    "board number for object %p.\n", hdl);
+		return (AE_ERROR);
+	} else if (bdnum > MAX_BOARDS) {
+		DRMACH_PR("!drmach_board_check_dependent_cb: board number %u "
+		    "is too big, max %u.", bdnum, MAX_BOARDS);
+		return (AE_ERROR);
+	}
+
+	bp = drmach_get_board_by_bnum(bdnum);
+	switch (cmd) {
+	case SBD_CMD_CONNECT:
+		/*
+		 * Its parent board should be present, assigned, powered and
+		 * connected when connecting the child board.
+		 */
+		if (bp == NULL) {
+			*retval = hdl;
+			rc = AE_ERROR;
+		} else {
+			bp->powered = acpidev_dr_device_is_powered(hdl);
+			if (!bp->connected || !bp->powered || !bp->assigned) {
+				*retval = hdl;
+				rc = AE_ERROR;
+			}
+		}
+		break;
+
+	case SBD_CMD_POWERON:
+		/*
+		 * Its parent board should be present, assigned and powered when
+		 * powering on the child board.
+		 */
+		if (bp == NULL) {
+			*retval = hdl;
+			rc = AE_ERROR;
+		} else {
+			bp->powered = acpidev_dr_device_is_powered(hdl);
+			if (!bp->powered || !bp->assigned) {
+				*retval = hdl;
+				rc = AE_ERROR;
+			}
+		}
+		break;
+
+	case SBD_CMD_ASSIGN:
+		/*
+		 * Its parent board should be present and assigned when
+		 * assigning the child board.
+		 */
+		if (bp == NULL) {
+			*retval = hdl;
+			rc = AE_ERROR;
+		} else if (!bp->assigned) {
+			*retval = hdl;
+			rc = AE_ERROR;
+		}
+		break;
+
+	case SBD_CMD_DISCONNECT:
+		/*
+		 * The child board should be disconnected if present when
+		 * disconnecting its parent board.
+		 */
+		if (bp != NULL && bp->connected) {
+			*retval = hdl;
+			rc = AE_ERROR;
+		}
+		break;
+
+	case SBD_CMD_POWEROFF:
+		/*
+		 * The child board should be disconnected and powered off if
+		 * present when powering off its parent board.
+		 */
+		if (bp != NULL) {
+			bp->powered = acpidev_dr_device_is_powered(hdl);
+			if (bp->connected || bp->powered) {
+				*retval = hdl;
+				rc = AE_ERROR;
+			}
+		}
+		break;
+
+	case SBD_CMD_UNASSIGN:
+		/*
+		 * The child board should be disconnected, powered off and
+		 * unassigned if present when unassigning its parent board.
+		 */
+		if (bp != NULL) {
+			bp->powered = acpidev_dr_device_is_powered(hdl);
+			if (bp->connected || bp->powered || bp->assigned) {
+				*retval = hdl;
+				rc = AE_ERROR;
+			}
+		}
+		break;
+
+	default:
+		/* Return success for all other commands. */
+		break;
+	}
+
+	return (rc);
+}
+
+sbd_error_t *
+drmach_board_check_dependent(int cmd, drmach_board_t *bp)
+{
+	int reverse;
+	char *name;
+	sbd_error_t *err = NULL;
+	DRMACH_HANDLE hdl;
+	DRMACH_HANDLE dp = NULL;
+
+	ASSERT(bp != NULL);
+	ASSERT(DRMACH_IS_BOARD_ID(bp));
+	ASSERT(RW_LOCK_HELD(&drmach_boards_rwlock));
+
+	hdl = drmach_node_get_dnode(bp->tree);
+	if (hdl == NULL)
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	switch (cmd) {
+	case SBD_CMD_ASSIGN:
+	case SBD_CMD_POWERON:
+	case SBD_CMD_CONNECT:
+		if (ACPI_SUCCESS(acpidev_dr_device_walk_ejd(hdl,
+		    &drmach_board_check_dependent_cb,
+		    (void *)(intptr_t)cmd, &dp))) {
+			return (NULL);
+		}
+		reverse = 0;
+		break;
+
+	case SBD_CMD_UNASSIGN:
+	case SBD_CMD_POWEROFF:
+	case SBD_CMD_DISCONNECT:
+		if (ACPI_SUCCESS(acpidev_dr_device_walk_edl(hdl,
+		    &drmach_board_check_dependent_cb,
+		    (void *)(intptr_t)cmd, &dp))) {
+			return (NULL);
+		}
+		reverse = 1;
+		break;
+
+	default:
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	}
+
+	if (dp == NULL) {
+		return (drerr_new(1, EX86_WALK_DEPENDENCY, "%s", bp->cm.name));
+	}
+	name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+	if (ACPI_FAILURE(acpidev_dr_get_board_name(dp, name, MAXPATHLEN))) {
+		err = drerr_new(1, EX86_WALK_DEPENDENCY, "%s", bp->cm.name);
+	} else if (reverse == 0) {
+		err = drerr_new(1, EX86_WALK_DEPENDENCY,
+		    "%s, depends on board %s", bp->cm.name, name);
+	} else {
+		err = drerr_new(1, EX86_WALK_DEPENDENCY,
+		    "board %s depends on %s", name, bp->cm.name);
+	}
+	kmem_free(name, MAXPATHLEN);
+
+	return (err);
+}
+
+sbd_error_t *
+drmach_board_assign(int bnum, drmachid_t *id)
+{
+	sbd_error_t	*err = NULL;
+
+	if (bnum < 0) {
+		return (drerr_new(1, EX86_BNUM, "%d", bnum));
+	}
+
+	rw_enter(&drmach_boards_rwlock, RW_WRITER);
+
+	if (drmach_array_get(drmach_boards, bnum, id) == -1) {
+		err = drerr_new(1, EX86_BNUM, "%d", bnum);
+	} else {
+		drmach_board_t	*bp;
+
+		/*
+		 * Board has already been created, downgrade to reader.
+		 */
+		if (*id)
+			rw_downgrade(&drmach_boards_rwlock);
+
+		bp = *id;
+		if (!(*id))
+			bp = *id  =
+			    (drmachid_t)drmach_board_new(bnum, 0);
+
+		if (bp == NULL) {
+			DRMACH_PR("!drmach_board_assign: failed to create "
+			    "object for board %d.", bnum);
+			err = drerr_new(1, EX86_BNUM, "%d", bnum);
+		} else {
+			err = drmach_board_check_dependent(SBD_CMD_ASSIGN, bp);
+			if (err == NULL)
+				bp->assigned = 1;
+		}
+	}
+
+	rw_exit(&drmach_boards_rwlock);
+
+	return (err);
+}
+
+sbd_error_t *
+drmach_board_unassign(drmachid_t id)
+{
+	drmach_board_t	*bp;
+	sbd_error_t	*err;
+	drmach_status_t	 stat;
+
+	if (DRMACH_NULL_ID(id))
+		return (NULL);
+
+	if (!DRMACH_IS_BOARD_ID(id)) {
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	}
+	bp = id;
+
+	rw_enter(&drmach_boards_rwlock, RW_WRITER);
+
+	err = drmach_board_status(id, &stat);
+	if (err) {
+		rw_exit(&drmach_boards_rwlock);
+		return (err);
+	}
+
+	if (stat.configured || stat.busy) {
+		err = drerr_new(0, EX86_CONFIGBUSY, bp->cm.name);
+	} else if (bp->connected) {
+		err = drerr_new(0, EX86_CONNECTBUSY, bp->cm.name);
+	} else if (stat.powered) {
+		err = drerr_new(0, EX86_POWERBUSY, bp->cm.name);
+	} else {
+		err = drmach_board_check_dependent(SBD_CMD_UNASSIGN, bp);
+		if (err == NULL) {
+			if (drmach_array_set(drmach_boards, bp->bnum, 0) != 0)
+				err = DRMACH_INTERNAL_ERROR();
+			else
+				drmach_board_dispose(bp);
+		}
+	}
+
+	rw_exit(&drmach_boards_rwlock);
+
+	return (err);
+}
+
+sbd_error_t *
+drmach_board_poweron(drmachid_t id)
+{
+	drmach_board_t	*bp;
+	sbd_error_t *err = NULL;
+	DRMACH_HANDLE hdl;
+
+	if (!DRMACH_IS_BOARD_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	bp = id;
+
+	hdl = drmach_node_get_dnode(bp->tree);
+	if (hdl == NULL)
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	bp->powered = drmach_board_check_power(bp);
+	if (bp->powered) {
+		return (NULL);
+	}
+
+	rw_enter(&drmach_boards_rwlock, RW_WRITER);
+	err = drmach_board_check_dependent(SBD_CMD_POWERON, bp);
+	if (err == NULL) {
+		acpidev_dr_lock_all();
+		if (ACPI_FAILURE(acpidev_dr_device_poweron(hdl)))
+			err = drerr_new(0, EX86_POWERON, NULL);
+		acpidev_dr_unlock_all();
+
+		/* Check whether the board is powered on. */
+		bp->powered = drmach_board_check_power(bp);
+		if (err == NULL && bp->powered == 0)
+			err = drerr_new(0, EX86_POWERON, NULL);
+	}
+	rw_exit(&drmach_boards_rwlock);
+
+	return (err);
+}
+
+sbd_error_t *
+drmach_board_poweroff(drmachid_t id)
+{
+	sbd_error_t	*err = NULL;
+	drmach_board_t	*bp;
+	drmach_status_t	 stat;
+	DRMACH_HANDLE	 hdl;
+
+	if (DRMACH_NULL_ID(id))
+		return (NULL);
+
+	if (!DRMACH_IS_BOARD_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	bp = id;
+
+	hdl = drmach_node_get_dnode(bp->tree);
+	if (hdl == NULL)
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	/* Check whether the board is busy, configured or connected. */
+	err = drmach_board_status(id, &stat);
+	if (err != NULL)
+		return (err);
+	if (stat.configured || stat.busy) {
+		return (drerr_new(0, EX86_CONFIGBUSY, bp->cm.name));
+	} else if (bp->connected) {
+		return (drerr_new(0, EX86_CONNECTBUSY, bp->cm.name));
+	}
+
+	bp->powered = drmach_board_check_power(bp);
+	if (bp->powered == 0) {
+		return (NULL);
+	}
+
+	rw_enter(&drmach_boards_rwlock, RW_WRITER);
+	err = drmach_board_check_dependent(SBD_CMD_POWEROFF, bp);
+	if (err == NULL) {
+		acpidev_dr_lock_all();
+		if (ACPI_FAILURE(acpidev_dr_device_poweroff(hdl)))
+			err = drerr_new(0, EX86_POWEROFF, NULL);
+		acpidev_dr_unlock_all();
+
+		bp->powered = drmach_board_check_power(bp);
+		if (err == NULL && bp->powered != 0)
+			err = drerr_new(0, EX86_POWEROFF, NULL);
+	}
+	rw_exit(&drmach_boards_rwlock);
+
+	return (err);
+}
+
+sbd_error_t *
+drmach_board_test(drmachid_t id, drmach_opts_t *opts, int force)
+{
+	_NOTE(ARGUNUSED(opts, force));
+
+	drmach_board_t	*bp;
+	DRMACH_HANDLE	 hdl;
+
+	if (DRMACH_NULL_ID(id))
+		return (NULL);
+
+	if (!DRMACH_IS_BOARD_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	bp = id;
+
+	hdl = drmach_node_get_dnode(bp->tree);
+	if (hdl == NULL)
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	if (ACPI_FAILURE(acpidev_dr_device_check_status(hdl)))
+		return (drerr_new(0, EX86_IN_FAILURE, NULL));
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_board_connect(drmachid_t id, drmach_opts_t *opts)
+{
+	_NOTE(ARGUNUSED(opts));
+
+	sbd_error_t	*err = NULL;
+	drmach_board_t	*bp = (drmach_board_t *)id;
+	DRMACH_HANDLE	hdl;
+
+	if (!DRMACH_IS_BOARD_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	bp = (drmach_board_t *)id;
+
+	hdl = drmach_node_get_dnode(bp->tree);
+	if (hdl == NULL)
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	rw_enter(&drmach_boards_rwlock, RW_WRITER);
+	err = drmach_board_check_dependent(SBD_CMD_CONNECT, bp);
+	if (err == NULL) {
+		acpidev_dr_lock_all();
+		if (ACPI_FAILURE(acpidev_dr_device_insert(hdl))) {
+			(void) acpidev_dr_device_remove(hdl);
+			err = drerr_new(1, EX86_PROBE, NULL);
+		} else {
+			bp->connected = 1;
+		}
+		acpidev_dr_unlock_all();
+	}
+	rw_exit(&drmach_boards_rwlock);
+
+	return (err);
+}
+
+sbd_error_t *
+drmach_board_disconnect(drmachid_t id, drmach_opts_t *opts)
+{
+	_NOTE(ARGUNUSED(opts));
+
+	DRMACH_HANDLE hdl;
+	drmach_board_t *bp;
+	drmach_status_t	stat;
+	sbd_error_t *err = NULL;
+
+	if (DRMACH_NULL_ID(id))
+		return (NULL);
+	if (!DRMACH_IS_BOARD_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	bp = (drmach_board_t *)id;
+
+	hdl = drmach_node_get_dnode(bp->tree);
+	if (hdl == NULL)
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	/* Check whether the board is busy or configured. */
+	err = drmach_board_status(id, &stat);
+	if (err != NULL)
+		return (err);
+	if (stat.configured || stat.busy)
+		return (drerr_new(0, EX86_CONFIGBUSY, bp->cm.name));
+
+	rw_enter(&drmach_boards_rwlock, RW_WRITER);
+	err = drmach_board_check_dependent(SBD_CMD_DISCONNECT, bp);
+	if (err == NULL) {
+		acpidev_dr_lock_all();
+		if (ACPI_SUCCESS(acpidev_dr_device_remove(hdl))) {
+			bp->connected = 0;
+		} else {
+			err = drerr_new(1, EX86_DEPROBE, bp->cm.name);
+		}
+		acpidev_dr_unlock_all();
+	}
+	rw_exit(&drmach_boards_rwlock);
+
+	return (err);
+}
+
+sbd_error_t *
+drmach_board_deprobe(drmachid_t id)
+{
+	drmach_board_t	*bp;
+
+	if (!DRMACH_IS_BOARD_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	bp = id;
+
+	cmn_err(CE_CONT, "DR: detach board %d\n", bp->bnum);
+
+	if (bp->devices) {
+		drmach_array_dispose(bp->devices, drmach_device_dispose);
+		bp->devices = NULL;
+	}
+
+	bp->boot_board = 0;
+
+	return (NULL);
+}
+
+/*
+ * CPU specific interfaces to support dr driver
+ */
+sbd_error_t *
+drmach_cpu_disconnect(drmachid_t id)
+{
+	if (!DRMACH_IS_CPU_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid)
+{
+	drmach_cpu_t *cpu;
+
+	if (!DRMACH_IS_CPU_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	cpu = (drmach_cpu_t *)id;
+
+	if (cpu->cpuid == -1) {
+		if (ACPI_SUCCESS(acpica_get_cpu_id_by_object(
+		    drmach_node_get_dnode(cpu->dev.node), cpuid))) {
+			cpu->cpuid = *cpuid;
+		} else {
+			*cpuid = -1;
+		}
+	} else {
+		*cpuid = cpu->cpuid;
+	}
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_cpu_get_impl(drmachid_t id, int *ip)
+{
+	if (!DRMACH_IS_CPU_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	/* Assume all CPUs in system are homogeneous. */
+	*ip = X86_CPU_IMPL_UNKNOWN;
+
+	kpreempt_disable();
+	if (cpuid_getvendor(CPU) == X86_VENDOR_Intel) {
+		/* NHM-EX CPU */
+		if (cpuid_getfamily(CPU) == 0x6 &&
+		    cpuid_getmodel(CPU) == 0x2e) {
+			*ip = X86_CPU_IMPL_NEHALEM_EX;
+		}
+	}
+	kpreempt_enable();
+
+	return (NULL);
+}
+
+/*
+ * Memory specific interfaces to support dr driver
+ */
+
+/*
+ * When drmach_mem_new() is called, the mp->base_pa field is set to the base
+ * address of configured memory if there's configured memory on the board,
+ * otherwise set to UINT64_MAX. For hot-added memory board, there's no
+ * configured memory when drmach_mem_new() is called, so mp->base_pa is set
+ * to UINT64_MAX and we need to set a correct value for it after memory
+ * hot-add  operations.
+ * A hot-added memory board may contain multiple memory segments,
+ * drmach_mem_add_span() will be called once for each segment, so we can't
+ * rely on the basepa argument. And it's possible that only part of a memory
+ * segment is added into OS, so need to intersect with phys_installed list
+ * to get the real base address of configured memory on the board.
+ */
+sbd_error_t *
+drmach_mem_add_span(drmachid_t id, uint64_t basepa, uint64_t size)
+{
+	_NOTE(ARGUNUSED(basepa));
+
+	uint64_t	nbytes = 0;
+	uint64_t	endpa;
+	drmach_mem_t	*mp;
+	struct memlist	*ml2;
+	struct memlist	*p;
+
+	ASSERT(size != 0);
+
+	if (!DRMACH_IS_MEM_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	mp = (drmach_mem_t *)id;
+
+	/* Compute basepa and size of installed memory. */
+	endpa = _ptob64(physmax + 1);
+	memlist_read_lock();
+	ml2 = memlist_dup(phys_install);
+	memlist_read_unlock();
+	ml2 = memlist_del_span(ml2, 0ull, mp->slice_base);
+	if (ml2 && endpa > mp->slice_top) {
+		ml2 = memlist_del_span(ml2, mp->slice_top,
+		    endpa - mp->slice_top);
+	}
+
+	ASSERT(ml2);
+	if (ml2) {
+		for (p = ml2; p; p = p->ml_next) {
+			nbytes += p->ml_size;
+			if (mp->base_pa > p->ml_address)
+				mp->base_pa = p->ml_address;
+		}
+		ASSERT(nbytes > 0);
+		mp->nbytes += nbytes;
+		memlist_delete(ml2);
+	}
+
+	return (NULL);
+}
+
+static sbd_error_t *
+drmach_mem_update_lgrp(drmachid_t id)
+{
+	ACPI_STATUS	rc;
+	DRMACH_HANDLE	hdl;
+	void		*hdlp;
+	drmach_mem_t	*mp;
+	update_membounds_t umb;
+
+	if (!DRMACH_IS_MEM_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	mp = (drmach_mem_t *)id;
+	/* No need to update lgrp if memory is already installed. */
+	if (mp->nbytes != 0)
+		return (NULL);
+	/* No need to update lgrp if lgrp is disabled. */
+	if (max_mem_nodes == 1)
+		return (NULL);
+
+	/* Add memory to lgroup */
+	hdl = mp->dev.node->get_dnode(mp->dev.node);
+	rc = acpidev_dr_device_get_memory_index(hdl, &umb.u_device_id);
+	ASSERT(ACPI_SUCCESS(rc));
+	if (ACPI_FAILURE(rc)) {
+		cmn_err(CE_WARN, "drmach: failed to get device id of memory, "
+		    "can't update lgrp information.");
+		return (drerr_new(0, EX86_INTERNAL, NULL));
+	}
+	rc = acpidev_dr_get_mem_numa_info(hdl, mp->memlist, &hdlp,
+	    &umb.u_domain, &umb.u_sli_cnt, &umb.u_sli_ptr);
+	ASSERT(ACPI_SUCCESS(rc));
+	if (ACPI_FAILURE(rc)) {
+		cmn_err(CE_WARN, "drmach: failed to get lgrp info of memory, "
+		    "can't update lgrp information.");
+		return (drerr_new(0, EX86_INTERNAL, NULL));
+	}
+	umb.u_base = (uint64_t)mp->slice_base;
+	umb.u_length = (uint64_t)(mp->slice_top - mp->slice_base);
+	lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
+	acpidev_dr_free_mem_numa_info(hdlp);
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_mem_enable(drmachid_t id)
+{
+	if (!DRMACH_IS_MEM_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	else
+		return (NULL);
+}
+
+sbd_error_t *
+drmach_mem_get_info(drmachid_t id, drmach_mem_info_t *mem)
+{
+	drmach_mem_t *mp;
+
+	if (!DRMACH_IS_MEM_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	mp = (drmach_mem_t *)id;
+
+	/*
+	 * This is only used by dr to round up/down the memory
+	 * for copying.
+	 */
+	mem->mi_alignment_mask = mp->mem_alignment - 1;
+	mem->mi_basepa = mp->base_pa;
+	mem->mi_size = mp->nbytes;
+	mem->mi_slice_base = mp->slice_base;
+	mem->mi_slice_top = mp->slice_top;
+	mem->mi_slice_size = mp->slice_size;
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_mem_get_slice_info(drmachid_t id,
+    uint64_t *bp, uint64_t *ep, uint64_t *sp)
+{
+	drmach_mem_t *mp;
+
+	if (!DRMACH_IS_MEM_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	mp = (drmach_mem_t *)id;
+
+	if (bp)
+		*bp = mp->slice_base;
+	if (ep)
+		*ep = mp->slice_top;
+	if (sp)
+		*sp = mp->slice_size;
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_mem_get_memlist(drmachid_t id, struct memlist **ml)
+{
+#ifdef	DEBUG
+	int		rv;
+#endif
+	drmach_mem_t	*mem;
+	struct memlist	*mlist;
+
+	if (!DRMACH_IS_MEM_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	mem = (drmach_mem_t *)id;
+
+	mlist = memlist_dup(mem->memlist);
+	*ml = mlist;
+
+#ifdef DEBUG
+	/*
+	 * Make sure the incoming memlist doesn't already
+	 * intersect with what's present in the system (phys_install).
+	 */
+	memlist_read_lock();
+	rv = memlist_intersect(phys_install, mlist);
+	memlist_read_unlock();
+	if (rv) {
+		DRMACH_PR("Derived memlist intersects with phys_install\n");
+		memlist_dump(mlist);
+
+		DRMACH_PR("phys_install memlist:\n");
+		memlist_dump(phys_install);
+
+		memlist_delete(mlist);
+		return (DRMACH_INTERNAL_ERROR());
+	}
+
+	DRMACH_PR("Derived memlist:");
+	memlist_dump(mlist);
+#endif
+
+	return (NULL);
+}
+
+processorid_t
+drmach_mem_cpu_affinity(drmachid_t id)
+{
+	_NOTE(ARGUNUSED(id));
+
+	return (CPU_CURRENT);
+}
+
+int
+drmach_copy_rename_need_suspend(drmachid_t id)
+{
+	_NOTE(ARGUNUSED(id));
+
+	return (0);
+}
+
+/*
+ * IO specific interfaces to support dr driver
+ */
+sbd_error_t *
+drmach_io_pre_release(drmachid_t id)
+{
+	if (!DRMACH_IS_IO_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_io_unrelease(drmachid_t id)
+{
+	if (!DRMACH_IS_IO_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_io_post_release(drmachid_t id)
+{
+	_NOTE(ARGUNUSED(id));
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_io_post_attach(drmachid_t id)
+{
+	if (!DRMACH_IS_IO_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+
+	return (NULL);
+}
+
+sbd_error_t *
+drmach_io_is_attached(drmachid_t id, int *yes)
+{
+	drmach_device_t *dp;
+	dev_info_t	*dip;
+	int		state;
+
+	if (!DRMACH_IS_IO_ID(id))
+		return (drerr_new(0, EX86_INAPPROP, NULL));
+	dp = id;
+
+	dip = dp->node->getdip(dp->node);
+	if (dip == NULL) {
+		*yes = 0;
+		return (NULL);
+	}
+
+	state = ddi_get_devstate(dip);
+	*yes = ((i_ddi_node_state(dip) >= DS_ATTACHED) ||
+	    (state == DDI_DEVSTATE_UP));
+
+	return (NULL);
+}
+
+/*
+ * Miscellaneous interfaces to support dr driver
+ */
+int
+drmach_verify_sr(dev_info_t *dip, int sflag)
+{
+	_NOTE(ARGUNUSED(dip, sflag));
+
+	return (0);
+}
+
+void
+drmach_suspend_last(void)
+{
+}
+
+void
+drmach_resume_first(void)
+{
+}
+
+/*
+ * Log a DR sysevent.
+ * Return value: 0 success, non-zero failure.
+ */
+int
+drmach_log_sysevent(int board, char *hint, int flag, int verbose)
+{
+	sysevent_t			*ev = NULL;
+	sysevent_id_t			eid;
+	int				rv, km_flag;
+	sysevent_value_t		evnt_val;
+	sysevent_attr_list_t		*evnt_attr_list = NULL;
+	sbd_error_t			*err;
+	char				attach_pnt[MAXNAMELEN];
+
+	km_flag = (flag == SE_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
+	attach_pnt[0] = '\0';
+	err = drmach_board_name(board, attach_pnt, MAXNAMELEN);
+	if (err != NULL) {
+		sbd_err_clear(&err);
+		rv = -1;
+		goto logexit;
+	}
+	if (verbose) {
+		DRMACH_PR("drmach_log_sysevent: %s %s, flag: %d, verbose: %d\n",
+		    attach_pnt, hint, flag, verbose);
+	}
+
+	if ((ev = sysevent_alloc(EC_DR, ESC_DR_AP_STATE_CHANGE,
+	    SUNW_KERN_PUB"dr", km_flag)) == NULL) {
+		rv = -2;
+		goto logexit;
+	}
+	evnt_val.value_type = SE_DATA_TYPE_STRING;
+	evnt_val.value.sv_string = attach_pnt;
+	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_AP_ID, &evnt_val,
+	    km_flag)) != 0)
+		goto logexit;
+
+	evnt_val.value_type = SE_DATA_TYPE_STRING;
+	evnt_val.value.sv_string = hint;
+	if ((rv = sysevent_add_attr(&evnt_attr_list, DR_HINT, &evnt_val,
+	    km_flag)) != 0) {
+		sysevent_free_attr(evnt_attr_list);
+		goto logexit;
+	}
+
+	(void) sysevent_attach_attributes(ev, evnt_attr_list);
+
+	/*
+	 * Log the event but do not sleep waiting for its
+	 * delivery. This provides insulation from syseventd.
+	 */
+	rv = log_sysevent(ev, SE_NOSLEEP, &eid);
+
+logexit:
+	if (ev)
+		sysevent_free(ev);
+	if ((rv != 0) && verbose)
+		cmn_err(CE_WARN, "!drmach_log_sysevent failed (rv %d) for %s "
+		    " %s\n", rv, attach_pnt, hint);
+
+	return (rv);
+}
diff --git a/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.h b/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.h
new file mode 100644
index 0000000000..abbc264ef0
--- /dev/null
+++ b/usr/src/uts/i86pc/io/acpi/drmach_acpi/drmach_acpi.h
@@ -0,0 +1,199 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef	_SYS_DRMACH_ACPI_H
+#define	_SYS_DRMACH_ACPI_H
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/param.h>
+#include <sys/sunddi.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/drmach.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef	_KERNEL
+
+/* Use ACPI handle as DRMACH handle on x86 systems. */
+#define	DRMACH_HANDLE		ACPI_HANDLE
+
+/* Macros to deal with object type. */
+#define	DRMACH_OBJ(id)		((drmach_common_t *)id)
+
+#define	DRMACH_NULL_ID(id)	((id) == 0)
+
+#define	DRMACH_IS_BOARD_ID(id)	\
+	((id != 0) && (DRMACH_OBJ(id)->isa == (void *)drmach_board_new))
+
+#define	DRMACH_IS_CPU_ID(id)	\
+	((id != 0) && (DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new))
+
+#define	DRMACH_IS_MEM_ID(id)	\
+	((id != 0) && (DRMACH_OBJ(id)->isa == (void *)drmach_mem_new))
+
+#define	DRMACH_IS_IO_ID(id)	\
+	((id != 0) && (DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
+
+#define	DRMACH_IS_DEVICE_ID(id)					\
+	((id != 0) &&						\
+	(DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
+	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
+	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
+
+#define	DRMACH_IS_ID(id)					\
+	((id != 0) &&						\
+	(DRMACH_OBJ(id)->isa == (void *)drmach_board_new ||	\
+	    DRMACH_OBJ(id)->isa == (void *)drmach_cpu_new ||	\
+	    DRMACH_OBJ(id)->isa == (void *)drmach_mem_new ||	\
+	    DRMACH_OBJ(id)->isa == (void *)drmach_io_new))
+
+#define	DRMACH_INTERNAL_ERROR() \
+	drerr_new(1, EX86_INTERNAL, drmach_ie_fmt, __LINE__)
+
+#ifdef DEBUG
+extern int drmach_debug;
+
+#define	DRMACH_PR		if (drmach_debug) printf
+#else
+#define	DRMACH_PR		_NOTE(CONSTANTCONDITION) if (0) printf
+#endif /* DEBUG */
+
+typedef struct {
+	struct drmach_node	*node;
+	void			*data;
+	void			*func;
+} drmach_node_walk_args_t;
+
+typedef struct drmach_node {
+	void		*here;
+
+	DRMACH_HANDLE	(*get_dnode)(struct drmach_node *node);
+	dev_info_t	*(*getdip)(struct drmach_node *node);
+	int		(*getproplen)(struct drmach_node *node, char *name,
+				int *len);
+	int		(*getprop)(struct drmach_node *node, char *name,
+				void *buf, int len);
+	int		(*walk)(struct drmach_node *node, void *data,
+				int (*cb)(drmach_node_walk_args_t *args));
+} drmach_node_t;
+
+typedef struct {
+	int		 min_index;
+	int		 max_index;
+	int		 arr_sz;
+	drmachid_t	*arr;
+} drmach_array_t;
+
+typedef struct {
+	void		*isa;
+
+	void		(*dispose)(drmachid_t);
+	sbd_error_t	*(*release)(drmachid_t);
+	sbd_error_t	*(*status)(drmachid_t, drmach_status_t *);
+
+	char		 name[MAXNAMELEN];
+} drmach_common_t;
+
+typedef struct {
+	drmach_common_t	 cm;
+	uint_t		 bnum;
+	int		 assigned;
+	int		 powered;
+	int		 connected;
+	int		 cond;
+	drmach_node_t	*tree;
+	drmach_array_t	*devices;
+	int		 boot_board;	/* if board exists on bootup */
+} drmach_board_t;
+
+typedef struct {
+	drmach_common_t	 cm;
+	drmach_board_t	*bp;
+	int		 unum;
+	uint_t		 portid;
+	int		 busy;
+	int		 powered;
+	const char	*type;
+	drmach_node_t	*node;
+} drmach_device_t;
+
+typedef struct drmach_cpu {
+	drmach_device_t  dev;
+	processorid_t    cpuid;
+	uint32_t	 apicid;
+} drmach_cpu_t;
+
+typedef struct drmach_mem {
+	drmach_device_t dev;
+	uint64_t	mem_alignment;
+	uint64_t	slice_base;
+	uint64_t	slice_top;
+	uint64_t	slice_size;
+	uint64_t	base_pa;	/* lowest installed memory base */
+	uint64_t	nbytes;		/* size of installed memory */
+	struct memlist *memlist;
+} drmach_mem_t;
+
+typedef struct drmach_io {
+	drmach_device_t  dev;
+} drmach_io_t;
+
+typedef struct drmach_domain_info {
+	uint64_t	floating;
+	int		allow_dr;
+} drmach_domain_info_t;
+
+typedef struct {
+	drmach_board_t	*obj;
+	int		 ndevs;
+	void		*a;
+	sbd_error_t	*(*found)(void *a, const char *, int, drmachid_t);
+	sbd_error_t	*err;
+} drmach_board_cb_data_t;
+
+extern drmach_domain_info_t drmach_domain;
+
+extern drmach_board_t	*drmach_board_new(uint_t, int);
+extern sbd_error_t	*drmach_device_new(drmach_node_t *,
+				drmach_board_t *, int, drmachid_t *);
+extern sbd_error_t	*drmach_cpu_new(drmach_device_t *, drmachid_t *);
+extern sbd_error_t	*drmach_mem_new(drmach_device_t *, drmachid_t *);
+extern sbd_error_t	*drmach_io_new(drmach_device_t *, drmachid_t *);
+
+#endif	/* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* _SYS_DRMACH_ACPI_H */
diff --git a/usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c b/usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c
index 3a237509ba..cece3f77f2 100644
--- a/usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c
+++ b/usr/src/uts/i86pc/io/acpi_drv/acpi_drv.c
@@ -919,7 +919,7 @@ acpi_drv_obj_copy(ACPI_OBJECT *op, char *bp, struct obj_desc *dp)
 		    ep->Type == dp->type) {
 			ACPI_DRV_DBG(CE_NOTE, NULL, "\t%s: \"%s\"",
 			    dp->name, ep->String.Pointer);
-			(void) strncpy(fp, ep->String.Pointer, dp->size);
+			(void) strlcpy(fp, ep->String.Pointer, dp->size);
 		} else if (dp->type == ACPI_TYPE_STRING &&
 		    ep->Type == ACPI_TYPE_BUFFER) {
 #ifdef DEBUG
@@ -1559,9 +1559,9 @@ acpi_drv_obj_init(struct acpi_drv_dev *p)
 	if ((info->Valid & ACPI_VALID_HID) == 0) {
 		ACPI_DRV_DBG(CE_WARN, NULL,
 		    "AcpiGetObjectInfo(): _HID not available");
-		(void) strncpy(p->hid, "\0", ID_LEN);
+		p->hid[0] = 0;
 	} else {
-		(void) strncpy(p->hid, info->HardwareId.String, ID_LEN);
+		(void) strlcpy(p->hid, info->HardwareId.String, ID_LEN);
 	}
 
 	/*
@@ -1572,9 +1572,9 @@ acpi_drv_obj_init(struct acpi_drv_dev *p)
 		ACPI_DRV_DBG(CE_WARN, NULL,
 		    "AcpiGetObjectInfo(): _UID not available");
 		/* Use 0 as the default _UID */
-		(void) strncpy(p->uid, "\0", ID_LEN);
+		p->uid[0] = 0;
 	} else {
-		(void) strncpy(p->uid, info->UniqueId.String, ID_LEN);
+		(void) strlcpy(p->uid, info->UniqueId.String, ID_LEN);
 	}
 
 	AcpiOsFree(info);
@@ -2064,9 +2064,9 @@ acpi_drv_dev_init(struct acpi_drv_dev *p)
 	if ((info->Valid & ACPI_VALID_HID) == 0) {
 		ACPI_DRV_DBG(CE_WARN, NULL,
 		    "!AcpiGetObjectInfo(): _HID not available");
-		(void) strncpy(p->hid, "\0", ID_LEN);
+		p->hid[0] = 0;
 	} else {
-		(void) strncpy(p->hid, info->HardwareId.String, ID_LEN);
+		(void) strlcpy(p->hid, info->HardwareId.String, ID_LEN);
 	}
 
 	/*
@@ -2077,9 +2077,9 @@ acpi_drv_dev_init(struct acpi_drv_dev *p)
 		ACPI_DRV_DBG(CE_WARN, NULL,
 		    "!AcpiGetObjectInfo(): _UID not available");
 		/* Use 0 as the default _UID */
-		(void) strncpy(p->uid, "\0", ID_LEN);
+		p->uid[0] = 0;
 	} else {
-		(void) strncpy(p->uid, info->UniqueId.String, ID_LEN);
+		(void) strlcpy(p->uid, info->UniqueId.String, ID_LEN);
 	}
 
 	if (info->Valid & ACPI_VALID_ADR) {
diff --git a/usr/src/uts/i86pc/io/cbe.c b/usr/src/uts/i86pc/io/cbe.c
index d5e08c63bc..87ec248717 100644
--- a/usr/src/uts/i86pc/io/cbe.c
+++ b/usr/src/uts/i86pc/io/cbe.c
@@ -40,6 +40,7 @@
 #include <sys/ddi_impldefs.h>
 #include <sys/ddi_intr.h>
 #include <sys/avintr.h>
+#include <sys/note.h>
 
 static int cbe_vector;
 static int cbe_ticks = 0;
@@ -207,6 +208,13 @@ cbe_configure(cpu_t *cpu)
 	return (cpu);
 }
 
+void
+cbe_unconfigure(void *arg)
+{
+	_NOTE(ARGUNUSED(arg));
+	ASSERT(!CPU_IN_SET(cbe_enabled, ((cpu_t *)arg)->cpu_id));
+}
+
 #ifndef __xpv
 /*
  * declarations needed for time adjustment
@@ -318,7 +326,7 @@ cbe_init(void)
 {
 	cyc_backend_t cbe = {
 		cbe_configure,		/* cyb_configure */
-		NULL,			/* cyb_unconfigure */
+		cbe_unconfigure,	/* cyb_unconfigure */
 		cbe_enable,		/* cyb_enable */
 		cbe_disable,		/* cyb_disable */
 		cbe_reprogram,		/* cyb_reprogram */
diff --git a/usr/src/uts/i86pc/io/dr/dr.c b/usr/src/uts/i86pc/io/dr/dr.c
new file mode 100644
index 0000000000..90f3a80adf
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr.c
@@ -0,0 +1,3329 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
+ * PIM-DR layer of DR driver.  Provides interface between user
+ * level applications and the PSM-DR layer.
+ */
+
+#include <sys/note.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/cred.h>
+#include <sys/dditypes.h>
+#include <sys/devops.h>
+#include <sys/modctl.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/stat.h>
+#include <sys/kmem.h>
+#include <sys/processor.h>
+#include <sys/cpuvar.h>
+#include <sys/mem_config.h>
+
+#include <sys/autoconf.h>
+#include <sys/cmn_err.h>
+
+#include <sys/ddi_impldefs.h>
+#include <sys/promif.h>
+#include <sys/machsystm.h>
+
+#include <sys/dr.h>
+#include <sys/drmach.h>
+#include <sys/dr_util.h>
+
+extern int		 nulldev();
+extern int		 nodev();
+extern struct memlist	*phys_install;
+
+#ifdef DEBUG
+uint_t	dr_debug = 0;			/* dr.h for bit values */
+#endif /* DEBUG */
+
+/*
+ * NOTE: state_str, nt_str and SBD_CMD_STR are only used in a debug
+ * kernel.  They are, however, referenced during both debug and non-debug
+ * compiles.
+ */
+
+static char *state_str[] = {
+	"EMPTY", "OCCUPIED", "CONNECTED", "UNCONFIGURED",
+	"PARTIAL", "CONFIGURED", "RELEASE", "UNREFERENCED",
+	"FATAL"
+};
+
+#define	SBD_CMD_STR(c) \
+	(((c) == SBD_CMD_ASSIGN)	? "ASSIGN"	: \
+	((c) == SBD_CMD_UNASSIGN)	? "UNASSIGN"	: \
+	((c) == SBD_CMD_POWERON)	? "POWERON"	: \
+	((c) == SBD_CMD_POWEROFF)	? "POWEROFF"	: \
+	((c) == SBD_CMD_TEST)		? "TEST"	: \
+	((c) == SBD_CMD_CONNECT)	? "CONNECT"	: \
+	((c) == SBD_CMD_DISCONNECT)	? "DISCONNECT"	: \
+	((c) == SBD_CMD_CONFIGURE)	? "CONFIGURE"	: \
+	((c) == SBD_CMD_UNCONFIGURE)	? "UNCONFIGURE"	: \
+	((c) == SBD_CMD_GETNCM)		? "GETNCM"	: \
+	((c) == SBD_CMD_PASSTHRU)	? "PASSTHRU"	: \
+	((c) == SBD_CMD_STATUS)		? "STATUS"	: "unknown")
+
+#define	DR_GET_BOARD_DEVUNIT(sb, ut, un) (&((sb)->b_dev[DEVSET_NIX(ut)][un]))
+
+#define	DR_MAKE_MINOR(i, b)	(((i) << 16) | (b))
+#define	DR_MINOR2INST(m)	(((m) >> 16) & 0xffff)
+#define	DR_MINOR2BNUM(m)	((m) & 0xffff)
+
+/* for the DR*INTERNAL_ERROR macros.  see sys/dr.h. */
+static char *dr_ie_fmt = "dr.c %d";
+
+/* struct for drmach device name to sbd_comp_type_t mapping */
+typedef	struct {
+	char		*s_devtype;
+	sbd_comp_type_t	s_nodetype;
+} dr_devname_t;
+
+/* struct to map starfire device attributes - name:sbd_comp_type_t */
+static	dr_devname_t	dr_devattr[] = {
+	{ DRMACH_DEVTYPE_MEM,	SBD_COMP_MEM },
+	{ DRMACH_DEVTYPE_CPU,	SBD_COMP_CPU },
+	{ DRMACH_DEVTYPE_PCI,	SBD_COMP_IO },
+#if defined(DRMACH_DEVTYPE_SBUS)
+	{ DRMACH_DEVTYPE_SBUS,	SBD_COMP_IO },
+#endif
+#if defined(DRMACH_DEVTYPE_WCI)
+	{ DRMACH_DEVTYPE_WCI,	SBD_COMP_IO },
+#endif
+	/* last s_devtype must be NULL, s_nodetype must be SBD_COMP_UNKNOWN */
+	{ NULL,			SBD_COMP_UNKNOWN }
+};
+
+/*
+ * Per instance soft-state structure.
+ */
+typedef struct dr_softstate {
+	dev_info_t	*dip;
+	dr_board_t	*boards;
+	kmutex_t	 i_lock;
+	int		 dr_initialized;
+} dr_softstate_t;
+
+/*
+ * dr Global data elements
+ */
+struct dr_global {
+	dr_softstate_t	*softsp;	/* pointer to initialize soft state */
+	kmutex_t	lock;
+} dr_g;
+
+dr_unsafe_devs_t	dr_unsafe_devs;
+
+/*
+ * Table of known passthru commands.
+ */
+struct {
+	char	*pt_name;
+	int	(*pt_func)(dr_handle_t *);
+} pt_arr[] = {
+	"quiesce",		dr_pt_test_suspend,
+};
+
+int dr_modunload_okay = 0;		/* set to non-zero to allow unload */
+
+/*
+ * State transition table.  States valid transitions for "board" state.
+ * Recall that non-zero return value terminates operation, however
+ * the herrno value is what really indicates an error , if any.
+ */
+static int
+_cmd2index(int c)
+{
+	/*
+	 * Translate DR CMD to index into dr_state_transition.
+	 */
+	switch (c) {
+	case SBD_CMD_CONNECT:		return (0);
+	case SBD_CMD_DISCONNECT:	return (1);
+	case SBD_CMD_CONFIGURE:		return (2);
+	case SBD_CMD_UNCONFIGURE:	return (3);
+	case SBD_CMD_ASSIGN:		return (4);
+	case SBD_CMD_UNASSIGN:		return (5);
+	case SBD_CMD_POWERON:		return (6);
+	case SBD_CMD_POWEROFF:		return (7);
+	case SBD_CMD_TEST:		return (8);
+	default:			return (-1);
+	}
+}
+
+#define	CMD2INDEX(c)	_cmd2index(c)
+
+static struct dr_state_trans {
+	int	x_cmd;
+	struct {
+		int	x_rv;		/* return value of pre_op */
+		int	x_err;		/* error, if any */
+	} x_op[DR_STATE_MAX];
+} dr_state_transition[] = {
+	{ SBD_CMD_CONNECT,
+		{
+			{ 0, 0 },			/* empty */
+			{ 0, 0 },			/* occupied */
+			{ -1, ESBD_STATE },		/* connected */
+			{ -1, ESBD_STATE },		/* unconfigured */
+			{ -1, ESBD_STATE },		/* partial */
+			{ -1, ESBD_STATE },		/* configured */
+			{ -1, ESBD_STATE },		/* release */
+			{ -1, ESBD_STATE },		/* unreferenced */
+			{ -1, ESBD_FATAL_STATE },	/* fatal */
+		}
+	},
+	{ SBD_CMD_DISCONNECT,
+		{
+			{ -1, ESBD_STATE },		/* empty */
+			{ 0, 0 },			/* occupied */
+			{ 0, 0 },			/* connected */
+			{ 0, 0 },			/* unconfigured */
+			{ -1, ESBD_STATE },		/* partial */
+			{ -1, ESBD_STATE },		/* configured */
+			{ -1, ESBD_STATE },		/* release */
+			{ -1, ESBD_STATE },		/* unreferenced */
+			{ -1, ESBD_FATAL_STATE },	/* fatal */
+		}
+	},
+	{ SBD_CMD_CONFIGURE,
+		{
+			{ -1, ESBD_STATE },		/* empty */
+			{ -1, ESBD_STATE },		/* occupied */
+			{ 0, 0 },			/* connected */
+			{ 0, 0 },			/* unconfigured */
+			{ 0, 0 },			/* partial */
+			{ 0, 0 },			/* configured */
+			{ -1, ESBD_STATE },		/* release */
+			{ -1, ESBD_STATE },		/* unreferenced */
+			{ -1, ESBD_FATAL_STATE },	/* fatal */
+		}
+	},
+	{ SBD_CMD_UNCONFIGURE,
+		{
+			{ -1, ESBD_STATE },		/* empty */
+			{ -1, ESBD_STATE },		/* occupied */
+			{ -1, ESBD_STATE },		/* connected */
+			{ -1, ESBD_STATE },		/* unconfigured */
+			{ 0, 0 },			/* partial */
+			{ 0, 0 },			/* configured */
+			{ 0, 0 },			/* release */
+			{ 0, 0 },			/* unreferenced */
+			{ -1, ESBD_FATAL_STATE },	/* fatal */
+		}
+	},
+	{ SBD_CMD_ASSIGN,
+		{
+			{ 0, 0 },			/* empty */
+			{ 0, 0 },			/* occupied */
+			{ -1, ESBD_STATE },		/* connected */
+			{ -1, ESBD_STATE },		/* unconfigured */
+			{ -1, ESBD_STATE },		/* partial */
+			{ -1, ESBD_STATE },		/* configured */
+			{ -1, ESBD_STATE },		/* release */
+			{ -1, ESBD_STATE },		/* unreferenced */
+			{ -1, ESBD_FATAL_STATE },	/* fatal */
+		}
+	},
+	{ SBD_CMD_UNASSIGN,
+		{
+			{ 0, 0 },			/* empty */
+			{ 0, 0 },			/* occupied */
+			{ -1, ESBD_STATE },		/* connected */
+			{ -1, ESBD_STATE },		/* unconfigured */
+			{ -1, ESBD_STATE },		/* partial */
+			{ -1, ESBD_STATE },		/* configured */
+			{ -1, ESBD_STATE },		/* release */
+			{ -1, ESBD_STATE },		/* unreferenced */
+			{ -1, ESBD_FATAL_STATE },	/* fatal */
+		}
+	},
+	{ SBD_CMD_POWERON,
+		{
+			{ 0, 0 },			/* empty */
+			{ 0, 0 },			/* occupied */
+			{ -1, ESBD_STATE },		/* connected */
+			{ -1, ESBD_STATE },		/* unconfigured */
+			{ -1, ESBD_STATE },		/* partial */
+			{ -1, ESBD_STATE },		/* configured */
+			{ -1, ESBD_STATE },		/* release */
+			{ -1, ESBD_STATE },		/* unreferenced */
+			{ -1, ESBD_FATAL_STATE },	/* fatal */
+		}
+	},
+	{ SBD_CMD_POWEROFF,
+		{
+			{ 0, 0 },			/* empty */
+			{ 0, 0 },			/* occupied */
+			{ -1, ESBD_STATE },		/* connected */
+			{ -1, ESBD_STATE },		/* unconfigured */
+			{ -1, ESBD_STATE },		/* partial */
+			{ -1, ESBD_STATE },		/* configured */
+			{ -1, ESBD_STATE },		/* release */
+			{ -1, ESBD_STATE },		/* unreferenced */
+			{ -1, ESBD_FATAL_STATE },	/* fatal */
+		}
+	},
+	{ SBD_CMD_TEST,
+		{
+			{ 0, 0 },			/* empty */
+			{ 0, 0 },			/* occupied */
+			{ -1, ESBD_STATE },		/* connected */
+			{ -1, ESBD_STATE },		/* unconfigured */
+			{ -1, ESBD_STATE },		/* partial */
+			{ -1, ESBD_STATE },		/* configured */
+			{ -1, ESBD_STATE },		/* release */
+			{ -1, ESBD_STATE },		/* unreferenced */
+			{ -1, ESBD_FATAL_STATE },	/* fatal */
+		}
+	},
+};
+
+/*
+ * Global R/W lock to synchronize access across
+ * multiple boards.  Users wanting multi-board access
+ * must grab WRITE lock, others must grab READ lock.
+ */
+krwlock_t	dr_grwlock;
+
+/*
+ * Head of the boardlist used as a reference point for
+ * locating board structs.
+ * TODO: eliminate dr_boardlist
+ */
+dr_board_t	*dr_boardlist;
+
+/*
+ * DR support functions.
+ */
+static dr_devset_t	dr_dev2devset(sbd_comp_id_t *cid);
+static int		dr_check_transition(dr_board_t *bp,
+					dr_devset_t *devsetp,
+					struct dr_state_trans *transp,
+					int cmd);
+static int		dr_check_unit_attached(dr_common_unit_t *dp);
+static sbd_error_t	*dr_init_devlists(dr_board_t *bp);
+static void		dr_board_discovery(dr_board_t *bp);
+static int		dr_board_init(dr_board_t *bp, dev_info_t *dip, int bd);
+static void		dr_board_destroy(dr_board_t *bp);
+static void		dr_board_transition(dr_board_t *bp, dr_state_t st);
+
+/*
+ * DR driver (DDI) entry points.
+ */
+static int	dr_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd,
+				void *arg, void **result);
+static int	dr_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
+static int	dr_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
+static int	dr_probe(dev_info_t *dip);
+static int	dr_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
+				cred_t *cred_p, int *rval_p);
+static int	dr_close(dev_t dev, int flag, int otyp, cred_t *cred_p);
+static int	dr_open(dev_t *dev, int flag, int otyp, cred_t *cred_p);
+
+/*
+ * DR command processing operations.
+ */
+static int	dr_copyin_iocmd(dr_handle_t *hp);
+static int	dr_copyout_iocmd(dr_handle_t *hp);
+static int	dr_copyout_errs(dr_handle_t *hp);
+static int	dr_pre_op(dr_handle_t *hp);
+static int	dr_post_op(dr_handle_t *hp, int rv);
+static int	dr_exec_op(dr_handle_t *hp);
+static void	dr_assign_board(dr_handle_t *hp);
+static void	dr_unassign_board(dr_handle_t *hp);
+static void	dr_connect(dr_handle_t *hp);
+static int	dr_disconnect(dr_handle_t *hp);
+static void	dr_dev_configure(dr_handle_t *hp);
+static void	dr_dev_release(dr_handle_t *hp);
+static int	dr_dev_unconfigure(dr_handle_t *hp);
+static void	dr_dev_cancel(dr_handle_t *hp);
+static int	dr_dev_status(dr_handle_t *hp);
+static int	dr_get_ncm(dr_handle_t *hp);
+static int	dr_pt_ioctl(dr_handle_t *hp);
+static void	dr_poweron_board(dr_handle_t *hp);
+static void	dr_poweroff_board(dr_handle_t *hp);
+static void	dr_test_board(dr_handle_t *hp);
+
+/*
+ * Autoconfiguration data structures
+ */
+struct cb_ops dr_cb_ops = {
+	dr_open,	/* open */
+	dr_close,	/* close */
+	nodev,		/* strategy */
+	nodev,		/* print */
+	nodev,		/* dump */
+	nodev,		/* read */
+	nodev,		/* write */
+	dr_ioctl,	/* ioctl */
+	nodev,		/* devmap */
+	nodev,		/* mmap */
+	nodev,		/* segmap */
+	nochpoll,	/* chpoll */
+	ddi_prop_op,	/* cb_prop_op */
+	NULL,		/* struct streamtab */
+	D_NEW | D_MP | D_MTSAFE,	/* compatibility flags */
+	CB_REV,		/* Rev */
+	nodev,		/* cb_aread */
+	nodev		/* cb_awrite */
+};
+
+struct dev_ops dr_dev_ops = {
+	DEVO_REV,	/* build version */
+	0,		/* dev ref count */
+	dr_getinfo,	/* getinfo */
+	nulldev,	/* identify */
+	dr_probe,	/* probe */
+	dr_attach,	/* attach */
+	dr_detach,	/* detach */
+	nodev,		/* reset */
+	&dr_cb_ops,	/* cb_ops */
+	(struct bus_ops *)NULL, /* bus ops */
+	NULL,		/* power */
+	ddi_quiesce_not_needed,	/* quiesce */
+};
+
+extern struct mod_ops mod_driverops;
+
+static struct modldrv modldrv = {
+	&mod_driverops,
+	"Dynamic Reconfiguration",
+	&dr_dev_ops
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1,
+	(void *)&modldrv,
+	NULL
+};
+
+/*
+ * Driver entry points.
+ */
+int
+_init(void)
+{
+	int	err;
+
+	/*
+	 * If you need to support multiple nodes (instances), then
+	 * whatever the maximum number of supported nodes is would
+	 * need to passed as the third parameter to ddi_soft_state_init().
+	 * Alternative would be to dynamically fini and re-init the
+	 * soft state structure each time a node is attached.
+	 */
+	err = ddi_soft_state_init((void **)&dr_g.softsp,
+	    sizeof (dr_softstate_t), 1);
+	if (err)
+		return (err);
+
+	mutex_init(&dr_g.lock, NULL, MUTEX_DRIVER, NULL);
+	rw_init(&dr_grwlock, NULL, RW_DEFAULT, NULL);
+
+	return (mod_install(&modlinkage));
+}
+
+int
+_fini(void)
+{
+	int	err;
+
+	if ((err = mod_remove(&modlinkage)) != 0)
+		return (err);
+
+	mutex_destroy(&dr_g.lock);
+	rw_destroy(&dr_grwlock);
+
+	ddi_soft_state_fini((void **)&dr_g.softsp);
+
+	return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+/*ARGSUSED1*/
+static int
+dr_open(dev_t *dev, int flag, int otyp, cred_t *cred_p)
+{
+	int		 instance;
+	dr_softstate_t	*softsp;
+	dr_board_t	*bp;
+
+	/*
+	 * Don't open unless we've attached.
+	 */
+	instance = DR_MINOR2INST(getminor(*dev));
+	softsp = ddi_get_soft_state(dr_g.softsp, instance);
+	if (softsp == NULL)
+		return (ENXIO);
+
+	mutex_enter(&softsp->i_lock);
+	if (!softsp->dr_initialized) {
+		int		 bd;
+		int		 rv = 0;
+
+		bp = softsp->boards;
+
+		/* initialize each array element */
+		for (bd = 0; bd < MAX_BOARDS; bd++, bp++) {
+			rv = dr_board_init(bp, softsp->dip, bd);
+			if (rv)
+				break;
+		}
+
+		if (rv == 0) {
+			softsp->dr_initialized = 1;
+		} else {
+			/* destroy elements initialized thus far */
+			while (--bp >= softsp->boards)
+				dr_board_destroy(bp);
+
+			/* TODO: should this be another errno val ? */
+			mutex_exit(&softsp->i_lock);
+			return (ENXIO);
+		}
+	}
+	mutex_exit(&softsp->i_lock);
+
+	bp = &softsp->boards[DR_MINOR2BNUM(getminor(*dev))];
+
+	/*
+	 * prevent opening of a dyn-ap for a board
+	 * that does not exist
+	 */
+	if (!bp->b_assigned) {
+		if (drmach_board_lookup(bp->b_num, &bp->b_id) != 0)
+			return (ENODEV);
+	}
+
+	return (0);
+}
+
+/*ARGSUSED*/
+static int
+dr_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
+{
+	return (0);
+}
+
+/*
+ * Enable/disable DR features.
+ */
+int dr_enable = 1;
+
+/*ARGSUSED3*/
+static int
+dr_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
+	cred_t *cred_p, int *rval_p)
+{
+	static int	dr_dev_type_to_nt(char *);
+
+	int		rv = 0;
+	int		instance;
+	int		bd;
+	dr_handle_t	*hp;
+	dr_softstate_t	*softsp;
+	static fn_t	f = "dr_ioctl";
+
+	PR_ALL("%s...\n", f);
+
+	instance = DR_MINOR2INST(getminor(dev));
+	softsp = ddi_get_soft_state(dr_g.softsp, instance);
+	if (softsp == NULL) {
+		cmn_err(CE_WARN, "dr%d: module not yet attached", instance);
+		return (ENXIO);
+	}
+
+	if (!dr_enable) {
+		switch (cmd) {
+			case SBD_CMD_STATUS:
+			case SBD_CMD_GETNCM:
+			case SBD_CMD_PASSTHRU:
+				break;
+			default:
+				return (ENOTSUP);
+		}
+	}
+
+	bd = DR_MINOR2BNUM(getminor(dev));
+	if (bd >= MAX_BOARDS)
+		return (ENXIO);
+
+	/* get and initialize storage for new handle */
+	hp = GETSTRUCT(dr_handle_t, 1);
+	hp->h_bd = &softsp->boards[bd];
+	hp->h_err = NULL;
+	hp->h_dev = getminor(dev);
+	hp->h_cmd = cmd;
+	hp->h_mode = mode;
+	hp->h_iap = (sbd_ioctl_arg_t *)arg;
+
+	/* copy sbd command into handle */
+	rv = dr_copyin_iocmd(hp);
+	if (rv) {
+		FREESTRUCT(hp, dr_handle_t, 1);
+		return (EINVAL);
+	}
+
+	/* translate canonical name to component type */
+	if (hp->h_sbdcmd.cmd_cm.c_id.c_name[0] != '\0') {
+		hp->h_sbdcmd.cmd_cm.c_id.c_type =
+		    dr_dev_type_to_nt(hp->h_sbdcmd.cmd_cm.c_id.c_name);
+
+		PR_ALL("%s: c_name = %s, c_type = %d\n",
+		    f,
+		    hp->h_sbdcmd.cmd_cm.c_id.c_name,
+		    hp->h_sbdcmd.cmd_cm.c_id.c_type);
+	} else {
+		/*EMPTY*/
+		PR_ALL("%s: c_name is NULL\n", f);
+	}
+
+	/* determine scope of operation */
+	hp->h_devset = dr_dev2devset(&hp->h_sbdcmd.cmd_cm.c_id);
+
+	switch (hp->h_cmd) {
+	case SBD_CMD_STATUS:
+	case SBD_CMD_GETNCM:
+		/* no locks needed for these commands */
+		break;
+
+	default:
+		rw_enter(&dr_grwlock, RW_WRITER);
+		mutex_enter(&hp->h_bd->b_lock);
+
+		/*
+		 * If we're dealing with memory at all, then we have
+		 * to keep the "exclusive" global lock held.  This is
+		 * necessary since we will probably need to look at
+		 * multiple board structs.  Otherwise, we only have
+		 * to deal with the board in question and so can drop
+		 * the global lock to "shared".
+		 */
+		rv = DEVSET_IN_SET(hp->h_devset, SBD_COMP_MEM, DEVSET_ANYUNIT);
+		if (rv == 0)
+			rw_downgrade(&dr_grwlock);
+		break;
+	}
+	rv = 0;
+
+	if (rv == 0)
+		rv = dr_pre_op(hp);
+	if (rv == 0) {
+		rv = dr_exec_op(hp);
+		rv = dr_post_op(hp, rv);
+	}
+
+	if (rv == -1)
+		rv = EIO;
+
+	if (hp->h_err != NULL)
+		if (!(rv = dr_copyout_errs(hp)))
+			rv = EIO;
+
+	/* undo locking, if any, done before dr_pre_op */
+	switch (hp->h_cmd) {
+	case SBD_CMD_STATUS:
+	case SBD_CMD_GETNCM:
+		break;
+
+	case SBD_CMD_ASSIGN:
+	case SBD_CMD_UNASSIGN:
+	case SBD_CMD_POWERON:
+	case SBD_CMD_POWEROFF:
+	case SBD_CMD_CONNECT:
+	case SBD_CMD_CONFIGURE:
+	case SBD_CMD_UNCONFIGURE:
+	case SBD_CMD_DISCONNECT:
+		/* Board changed state. Log a sysevent. */
+		if (rv == 0)
+			(void) drmach_log_sysevent(hp->h_bd->b_num, "",
+			    SE_SLEEP, 0);
+		/* Fall through */
+
+	default:
+		mutex_exit(&hp->h_bd->b_lock);
+		rw_exit(&dr_grwlock);
+	}
+
+	if (hp->h_opts.size != 0)
+		FREESTRUCT(hp->h_opts.copts, char, hp->h_opts.size);
+
+	FREESTRUCT(hp, dr_handle_t, 1);
+
+	return (rv);
+}
+
+/*ARGSUSED*/
+static int
+dr_probe(dev_info_t *dip)
+{
+	return (DDI_PROBE_SUCCESS);
+}
+
+static int
+dr_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+	int		rv, rv2;
+	int		bd;
+	int 		instance;
+	sbd_error_t	*err;
+	dr_softstate_t	*softsp;
+
+	instance = ddi_get_instance(dip);
+
+	switch (cmd) {
+	case DDI_ATTACH:
+		rw_enter(&dr_grwlock, RW_WRITER);
+
+		rv = ddi_soft_state_zalloc(dr_g.softsp, instance);
+		if (rv != DDI_SUCCESS) {
+			cmn_err(CE_WARN, "dr%d: failed to alloc soft-state",
+			    instance);
+			return (DDI_FAILURE);
+		}
+
+		/* initialize softstate structure */
+		softsp = ddi_get_soft_state(dr_g.softsp, instance);
+		softsp->dip = dip;
+
+		mutex_init(&softsp->i_lock, NULL, MUTEX_DRIVER, NULL);
+
+		/* allocate board array (aka boardlist) */
+		softsp->boards = GETSTRUCT(dr_board_t, MAX_BOARDS);
+
+		/* TODO: eliminate dr_boardlist */
+		dr_boardlist = softsp->boards;
+
+		/* initialize each array element */
+		rv = DDI_SUCCESS;
+		for (bd = 0; bd < MAX_BOARDS; bd++) {
+			dr_board_t	*bp = &softsp->boards[bd];
+			char		*p, *name;
+			int		 l, minor_num;
+
+			/*
+			 * initialized board attachment point path
+			 * (relative to pseudo) in a form immediately
+			 * reusable as an cfgadm command argument.
+			 * TODO: clean this up
+			 */
+			p = bp->b_path;
+			l = sizeof (bp->b_path);
+			(void) snprintf(p, l, "dr@%d:", instance);
+			while (*p != '\0') {
+				l--;
+				p++;
+			}
+
+			name = p;
+			err = drmach_board_name(bd, p, l);
+			if (err) {
+				sbd_err_clear(&err);
+				rv = DDI_FAILURE;
+				break;
+			}
+
+			minor_num = DR_MAKE_MINOR(instance, bd);
+			rv = ddi_create_minor_node(dip, name, S_IFCHR,
+			    minor_num, DDI_NT_SBD_ATTACHMENT_POINT, NULL);
+			if (rv != DDI_SUCCESS)
+				rv = DDI_FAILURE;
+		}
+
+		if (rv == DDI_SUCCESS) {
+			/*
+			 * Announce the node's presence.
+			 */
+			ddi_report_dev(dip);
+		} else {
+			ddi_remove_minor_node(dip, NULL);
+		}
+		/*
+		 * Init registered unsafe devs.
+		 */
+		dr_unsafe_devs.devnames = NULL;
+		rv2 = ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip,
+		    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+		    "unsupported-io-drivers", &dr_unsafe_devs.devnames,
+		    &dr_unsafe_devs.ndevs);
+
+		if (rv2 != DDI_PROP_SUCCESS)
+			dr_unsafe_devs.ndevs = 0;
+
+		rw_exit(&dr_grwlock);
+		return (rv);
+
+	default:
+		return (DDI_FAILURE);
+	}
+
+	/*NOTREACHED*/
+}
+
+static int
+dr_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+	int 		instance;
+	dr_softstate_t	*softsp;
+
+	switch (cmd) {
+	case DDI_DETACH:
+		if (!dr_modunload_okay)
+			return (DDI_FAILURE);
+
+		rw_enter(&dr_grwlock, RW_WRITER);
+
+		instance = ddi_get_instance(dip);
+		softsp = ddi_get_soft_state(dr_g.softsp, instance);
+
+		/* TODO: eliminate dr_boardlist */
+		ASSERT(softsp->boards == dr_boardlist);
+
+		/* remove all minor nodes */
+		ddi_remove_minor_node(dip, NULL);
+
+		if (softsp->dr_initialized) {
+			int bd;
+
+			for (bd = 0; bd < MAX_BOARDS; bd++)
+				dr_board_destroy(&softsp->boards[bd]);
+		}
+
+		FREESTRUCT(softsp->boards, dr_board_t, MAX_BOARDS);
+		mutex_destroy(&softsp->i_lock);
+		ddi_soft_state_free(dr_g.softsp, instance);
+
+		rw_exit(&dr_grwlock);
+		return (DDI_SUCCESS);
+
+	default:
+		return (DDI_FAILURE);
+	}
+	/*NOTREACHED*/
+}
+
+static int
+dr_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
+{
+	_NOTE(ARGUNUSED(dip))
+
+	dev_t		dev = (dev_t)arg;
+	int		instance, error;
+	dr_softstate_t	*softsp;
+
+	*result = NULL;
+	error = DDI_SUCCESS;
+	instance = DR_MINOR2INST(getminor(dev));
+
+	switch (cmd) {
+	case DDI_INFO_DEVT2DEVINFO:
+		softsp = ddi_get_soft_state(dr_g.softsp, instance);
+		if (softsp == NULL)
+			return (DDI_FAILURE);
+		*result = (void *)softsp->dip;
+		break;
+
+	case DDI_INFO_DEVT2INSTANCE:
+		*result = (void *)(uintptr_t)instance;
+		break;
+
+	default:
+		error = DDI_FAILURE;
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * DR operations.
+ */
+
+static int
+dr_copyin_iocmd(dr_handle_t *hp)
+{
+	static fn_t	f = "dr_copyin_iocmd";
+	sbd_cmd_t	*scp = &hp->h_sbdcmd;
+
+	if (hp->h_iap == NULL)
+		return (EINVAL);
+
+	bzero((caddr_t)scp, sizeof (sbd_cmd_t));
+
+#ifdef _MULTI_DATAMODEL
+	if (ddi_model_convert_from(hp->h_mode & FMODELS) == DDI_MODEL_ILP32) {
+		sbd_cmd32_t	scmd32;
+
+		bzero((caddr_t)&scmd32, sizeof (sbd_cmd32_t));
+
+		if (ddi_copyin((void *)hp->h_iap, (void *)&scmd32,
+		    sizeof (sbd_cmd32_t), hp->h_mode)) {
+			cmn_err(CE_WARN,
+			    "%s: (32bit) failed to copyin "
+			    "sbdcmd-struct", f);
+			return (EFAULT);
+		}
+		scp->cmd_cm.c_id.c_type = scmd32.cmd_cm.c_id.c_type;
+		scp->cmd_cm.c_id.c_unit = scmd32.cmd_cm.c_id.c_unit;
+		bcopy(&scmd32.cmd_cm.c_id.c_name[0],
+		    &scp->cmd_cm.c_id.c_name[0], OBP_MAXPROPNAME);
+		scp->cmd_cm.c_flags = scmd32.cmd_cm.c_flags;
+		scp->cmd_cm.c_len = scmd32.cmd_cm.c_len;
+		scp->cmd_cm.c_opts = (caddr_t)(uintptr_t)scmd32.cmd_cm.c_opts;
+
+		switch (hp->h_cmd) {
+		case SBD_CMD_STATUS:
+			scp->cmd_stat.s_nbytes = scmd32.cmd_stat.s_nbytes;
+			scp->cmd_stat.s_statp =
+			    (caddr_t)(uintptr_t)scmd32.cmd_stat.s_statp;
+			break;
+		default:
+			break;
+
+		}
+	} else
+#endif /* _MULTI_DATAMODEL */
+	if (ddi_copyin((void *)hp->h_iap, (void *)scp,
+	    sizeof (sbd_cmd_t), hp->h_mode) != 0) {
+		cmn_err(CE_WARN,
+		    "%s: failed to copyin sbdcmd-struct", f);
+		return (EFAULT);
+	}
+
+	if ((hp->h_opts.size = scp->cmd_cm.c_len) != 0) {
+		hp->h_opts.copts = GETSTRUCT(char, scp->cmd_cm.c_len + 1);
+		++hp->h_opts.size;
+		if (ddi_copyin((void *)scp->cmd_cm.c_opts,
+		    (void *)hp->h_opts.copts,
+		    scp->cmd_cm.c_len, hp->h_mode) != 0) {
+			cmn_err(CE_WARN, "%s: failed to copyin options", f);
+			return (EFAULT);
+		}
+	}
+
+	return (0);
+}
+
+static int
+dr_copyout_iocmd(dr_handle_t *hp)
+{
+	static fn_t	f = "dr_copyout_iocmd";
+	sbd_cmd_t	*scp = &hp->h_sbdcmd;
+
+	if (hp->h_iap == NULL)
+		return (EINVAL);
+
+#ifdef _MULTI_DATAMODEL
+	if (ddi_model_convert_from(hp->h_mode & FMODELS) == DDI_MODEL_ILP32) {
+		sbd_cmd32_t	scmd32;
+
+		scmd32.cmd_cm.c_id.c_type = scp->cmd_cm.c_id.c_type;
+		scmd32.cmd_cm.c_id.c_unit = scp->cmd_cm.c_id.c_unit;
+		bcopy(&scp->cmd_cm.c_id.c_name[0],
+		    &scmd32.cmd_cm.c_id.c_name[0], OBP_MAXPROPNAME);
+
+		scmd32.cmd_cm.c_flags = scp->cmd_cm.c_flags;
+		scmd32.cmd_cm.c_len = scp->cmd_cm.c_len;
+		scmd32.cmd_cm.c_opts = (caddr32_t)(uintptr_t)scp->cmd_cm.c_opts;
+
+		switch (hp->h_cmd) {
+		case SBD_CMD_GETNCM:
+			scmd32.cmd_getncm.g_ncm = scp->cmd_getncm.g_ncm;
+			break;
+		default:
+			break;
+		}
+
+		if (ddi_copyout((void *)&scmd32, (void *)hp->h_iap,
+		    sizeof (sbd_cmd32_t), hp->h_mode)) {
+			cmn_err(CE_WARN,
+			    "%s: (32bit) failed to copyout "
+			    "sbdcmd-struct", f);
+			return (EFAULT);
+		}
+	} else
+#endif /* _MULTI_DATAMODEL */
+	if (ddi_copyout((void *)scp, (void *)hp->h_iap,
+	    sizeof (sbd_cmd_t), hp->h_mode) != 0) {
+		cmn_err(CE_WARN,
+		    "%s: failed to copyout sbdcmd-struct", f);
+		return (EFAULT);
+	}
+
+	return (0);
+}
+
+static int
+dr_copyout_errs(dr_handle_t *hp)
+{
+	static fn_t	f = "dr_copyout_errs";
+
+	if (hp->h_err == NULL)
+		return (0);
+
+	if (hp->h_err->e_code) {
+		PR_ALL("%s: error %d %s",
+		    f, hp->h_err->e_code, hp->h_err->e_rsc);
+	}
+
+#ifdef _MULTI_DATAMODEL
+	if (ddi_model_convert_from(hp->h_mode & FMODELS) == DDI_MODEL_ILP32) {
+		sbd_error32_t	*serr32p;
+
+		serr32p = GETSTRUCT(sbd_error32_t, 1);
+
+		serr32p->e_code = hp->h_err->e_code;
+		bcopy(&hp->h_err->e_rsc[0], &serr32p->e_rsc[0],
+		    MAXPATHLEN);
+		if (ddi_copyout((void *)serr32p,
+		    (void *)&((sbd_ioctl_arg32_t *)hp->h_iap)->i_err,
+		    sizeof (sbd_error32_t), hp->h_mode)) {
+			cmn_err(CE_WARN,
+			    "%s: (32bit) failed to copyout", f);
+			return (EFAULT);
+		}
+		FREESTRUCT(serr32p, sbd_error32_t, 1);
+	} else
+#endif /* _MULTI_DATAMODEL */
+	if (ddi_copyout((void *)hp->h_err,
+	    (void *)&hp->h_iap->i_err,
+	    sizeof (sbd_error_t), hp->h_mode)) {
+		cmn_err(CE_WARN,
+		    "%s: failed to copyout", f);
+		return (EFAULT);
+	}
+
+	sbd_err_clear(&hp->h_err);
+
+	return (0);
+
+}
+
+/*
+ * pre-op entry point must sbd_err_set_c(), if needed.
+ * Return value of non-zero indicates failure.
+ */
+static int
+dr_pre_op(dr_handle_t *hp)
+{
+	int		rv = 0, t;
+	int		cmd, serr = 0;
+	dr_devset_t	devset;
+	dr_board_t	*bp = hp->h_bd;
+	dr_handle_t	*shp = hp;
+	static fn_t	f = "dr_pre_op";
+
+	cmd = hp->h_cmd;
+	devset = shp->h_devset;
+
+	PR_ALL("%s (cmd = %s)...\n", f, SBD_CMD_STR(cmd));
+
+	devset = DEVSET_AND(devset, DR_DEVS_PRESENT(bp));
+	hp->h_err = drmach_pre_op(cmd, bp->b_id, &hp->h_opts, &devset);
+	if (hp->h_err != NULL) {
+		PR_ALL("drmach_pre_op failed for cmd %s(%d)\n",
+		    SBD_CMD_STR(cmd), cmd);
+		return (-1);
+	}
+
+	/*
+	 * Check for valid state transitions.
+	 */
+	if ((t = CMD2INDEX(cmd)) != -1) {
+		struct dr_state_trans	*transp;
+		int			state_err;
+
+		transp = &dr_state_transition[t];
+		ASSERT(transp->x_cmd == cmd);
+
+		state_err = dr_check_transition(bp, &devset, transp, cmd);
+
+		if (state_err < 0) {
+			/*
+			 * Invalidate device.
+			 */
+			dr_op_err(CE_IGNORE, hp, ESBD_INVAL, NULL);
+			serr = -1;
+			PR_ALL("%s: invalid devset (0x%x)\n",
+			    f, (uint_t)devset);
+		} else if (state_err != 0) {
+			/*
+			 * State transition is not a valid one.
+			 */
+			dr_op_err(CE_IGNORE, hp,
+			    transp->x_op[state_err].x_err, NULL);
+
+			serr = transp->x_op[state_err].x_rv;
+
+			PR_ALL("%s: invalid state %s(%d) for cmd %s(%d)\n",
+			    f, state_str[state_err], state_err,
+			    SBD_CMD_STR(cmd), cmd);
+		} else {
+			shp->h_devset = devset;
+		}
+	}
+
+	if (serr) {
+		rv = -1;
+	}
+
+	return (rv);
+}
+
+static int
+dr_post_op(dr_handle_t *hp, int rv)
+{
+	int		cmd;
+	sbd_error_t	*err;
+	dr_board_t	*bp = hp->h_bd;
+	static fn_t	f = "dr_post_op";
+
+	cmd = hp->h_cmd;
+
+	PR_ALL("%s (cmd = %s)...\n", f, SBD_CMD_STR(cmd));
+
+	err = drmach_post_op(cmd, bp->b_id, &hp->h_opts, rv);
+	if (err != NULL) {
+		PR_ALL("drmach_post_op failed for cmd %s(%d)\n",
+		    SBD_CMD_STR(cmd), cmd);
+		if (rv == 0) {
+			ASSERT(hp->h_err == NULL);
+			hp->h_err = err;
+			rv = -1;
+		} else if (hp->h_err == NULL) {
+			hp->h_err = err;
+		} else {
+			sbd_err_clear(&err);
+		}
+	}
+
+	return (rv);
+}
+
+static int
+dr_exec_op(dr_handle_t *hp)
+{
+	int		rv = 0;
+	static fn_t	f = "dr_exec_op";
+
+	/* errors should have been caught by now */
+	ASSERT(hp->h_err == NULL);
+
+	switch (hp->h_cmd) {
+	case SBD_CMD_ASSIGN:
+		dr_assign_board(hp);
+		break;
+
+	case SBD_CMD_UNASSIGN:
+		dr_unassign_board(hp);
+		break;
+
+	case SBD_CMD_POWEROFF:
+		dr_poweroff_board(hp);
+		break;
+
+	case SBD_CMD_POWERON:
+		dr_poweron_board(hp);
+		break;
+
+	case SBD_CMD_TEST:
+		dr_test_board(hp);
+		break;
+
+	case SBD_CMD_CONNECT:
+		dr_connect(hp);
+		break;
+
+	case SBD_CMD_CONFIGURE:
+		dr_dev_configure(hp);
+		break;
+
+	case SBD_CMD_UNCONFIGURE:
+		dr_dev_release(hp);
+		if (hp->h_err == NULL)
+			rv = dr_dev_unconfigure(hp);
+		else
+			dr_dev_cancel(hp);
+		break;
+
+	case SBD_CMD_DISCONNECT:
+		rv = dr_disconnect(hp);
+		break;
+
+	case SBD_CMD_STATUS:
+		rv = dr_dev_status(hp);
+		break;
+
+	case SBD_CMD_GETNCM:
+		hp->h_sbdcmd.cmd_getncm.g_ncm = dr_get_ncm(hp);
+		rv = dr_copyout_iocmd(hp);
+		break;
+
+	case SBD_CMD_PASSTHRU:
+		rv = dr_pt_ioctl(hp);
+		break;
+
+	default:
+		cmn_err(CE_WARN,
+		    "%s: unknown command (%d)",
+		    f, hp->h_cmd);
+		break;
+	}
+
+	if (hp->h_err != NULL) {
+		rv = -1;
+	}
+
+	return (rv);
+}
+
+static void
+dr_assign_board(dr_handle_t *hp)
+{
+	dr_board_t *bp = hp->h_bd;
+
+	hp->h_err = drmach_board_assign(bp->b_num, &bp->b_id);
+	if (hp->h_err == NULL) {
+		bp->b_assigned = 1;
+	}
+}
+
+static void
+dr_unassign_board(dr_handle_t *hp)
+{
+	dr_board_t *bp = hp->h_bd;
+
+	/*
+	 * Block out status during unassign.
+	 * Not doing cv_wait_sig here as starfire SSP software
+	 * ignores unassign failure and removes board from
+	 * domain mask causing system panic.
+	 * TODO: Change cv_wait to cv_wait_sig when SSP software
+	 * handles unassign failure.
+	 */
+	dr_lock_status(bp);
+
+	hp->h_err = drmach_board_unassign(bp->b_id);
+	if (hp->h_err == NULL) {
+		/*
+		 * clear drmachid_t handle; not valid after board unassign
+		 */
+		bp->b_id = 0;
+		bp->b_assigned = 0;
+	}
+
+	dr_unlock_status(bp);
+}
+
+static void
+dr_poweron_board(dr_handle_t *hp)
+{
+	dr_board_t *bp = hp->h_bd;
+
+	hp->h_err = drmach_board_poweron(bp->b_id);
+}
+
+static void
+dr_poweroff_board(dr_handle_t *hp)
+{
+	dr_board_t *bp = hp->h_bd;
+
+	hp->h_err = drmach_board_poweroff(bp->b_id);
+}
+
+static void
+dr_test_board(dr_handle_t *hp)
+{
+	dr_board_t *bp = hp->h_bd;
+	hp->h_err = drmach_board_test(bp->b_id, &hp->h_opts,
+	    dr_cmd_flags(hp) & SBD_FLAG_FORCE);
+}
+
+/*
+ * Create and populate the component nodes for a board.  Assumes that the
+ * devlists for the board have been initialized.
+ */
+static void
+dr_make_comp_nodes(dr_board_t *bp)
+{
+	int	i;
+
+	/*
+	 * Make nodes for the individual components on the board.
+	 * First we need to initialize memory unit data structures of board
+	 * structure.
+	 */
+	for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+		dr_mem_unit_t *mp;
+
+		mp = dr_get_mem_unit(bp, i);
+		dr_init_mem_unit(mp);
+	}
+
+	/*
+	 * Initialize cpu unit data structures.
+	 */
+	for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+		dr_cpu_unit_t *cp;
+
+		cp = dr_get_cpu_unit(bp, i);
+		dr_init_cpu_unit(cp);
+	}
+
+	/*
+	 * Initialize io unit data structures.
+	 */
+	for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+		dr_io_unit_t *ip;
+
+		ip = dr_get_io_unit(bp, i);
+		dr_init_io_unit(ip);
+	}
+
+	dr_board_transition(bp, DR_STATE_CONNECTED);
+
+	bp->b_rstate = SBD_STAT_CONNECTED;
+	bp->b_ostate = SBD_STAT_UNCONFIGURED;
+	bp->b_cond = SBD_COND_OK;
+	(void) drv_getparm(TIME, (void *)&bp->b_time);
+
+}
+
+/*
+ * Only do work if called to operate on an entire board
+ * which doesn't already have components present.
+ */
+static void
+dr_connect(dr_handle_t *hp)
+{
+	dr_board_t	*bp = hp->h_bd;
+	static fn_t	f = "dr_connect";
+
+	PR_ALL("%s...\n", f);
+
+	if (DR_DEVS_PRESENT(bp)) {
+		/*
+		 * Board already has devices present.
+		 */
+		PR_ALL("%s: devices already present (" DEVSET_FMT_STR ")\n",
+		    f, DEVSET_FMT_ARG(DR_DEVS_PRESENT(bp)));
+		return;
+	}
+
+	hp->h_err = drmach_board_connect(bp->b_id, &hp->h_opts);
+	if (hp->h_err)
+		return;
+
+	hp->h_err = dr_init_devlists(bp);
+	if (hp->h_err)
+		return;
+	else if (bp->b_ndev == 0) {
+		dr_op_err(CE_WARN, hp, ESBD_EMPTY_BD, bp->b_path);
+		return;
+	} else {
+		dr_make_comp_nodes(bp);
+		return;
+	}
+	/*NOTREACHED*/
+}
+
+static int
+dr_disconnect(dr_handle_t *hp)
+{
+	int		i;
+	dr_devset_t	devset;
+	dr_board_t	*bp = hp->h_bd;
+	static fn_t	f = "dr_disconnect";
+
+	PR_ALL("%s...\n", f);
+
+	/*
+	 * Only devices which are present, but
+	 * unattached can be disconnected.
+	 */
+	devset = hp->h_devset & DR_DEVS_PRESENT(bp) &
+	    DR_DEVS_UNATTACHED(bp);
+
+	if ((devset == 0) && DR_DEVS_PRESENT(bp)) {
+		dr_op_err(CE_IGNORE, hp, ESBD_EMPTY_BD, bp->b_path);
+		return (0);
+	}
+
+	/*
+	 * Block out status during disconnect.
+	 */
+	mutex_enter(&bp->b_slock);
+	while (bp->b_sflags & DR_BSLOCK) {
+		if (cv_wait_sig(&bp->b_scv, &bp->b_slock) == 0) {
+			mutex_exit(&bp->b_slock);
+			return (EINTR);
+		}
+	}
+	bp->b_sflags |= DR_BSLOCK;
+	mutex_exit(&bp->b_slock);
+
+	hp->h_err = drmach_board_disconnect(bp->b_id, &hp->h_opts);
+	if (hp->h_err && hp->h_err->e_code == EX86_WALK_DEPENDENCY) {
+		/*
+		 * Other boards have dependency on this board. No device nodes
+		 * have been destroyed so keep current board status.
+		 */
+		goto disconnect_done;
+	}
+
+	DR_DEVS_DISCONNECT(bp, devset);
+
+	ASSERT((DR_DEVS_ATTACHED(bp) & devset) == 0);
+
+	/*
+	 * Update per-device state transitions.
+	 */
+	for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+		dr_cpu_unit_t *cp;
+
+		if (!DEVSET_IN_SET(devset, SBD_COMP_CPU, i))
+			continue;
+
+		cp = dr_get_cpu_unit(bp, i);
+		if (dr_disconnect_cpu(cp) == 0)
+			dr_device_transition(&cp->sbc_cm, DR_STATE_EMPTY);
+		else if (cp->sbc_cm.sbdev_error != NULL)
+			DRERR_SET_C(&hp->h_err, &cp->sbc_cm.sbdev_error);
+
+		ASSERT(cp->sbc_cm.sbdev_error == NULL);
+	}
+
+	for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+		dr_mem_unit_t *mp;
+
+		if (!DEVSET_IN_SET(devset, SBD_COMP_MEM, i))
+			continue;
+
+		mp = dr_get_mem_unit(bp, i);
+		if (dr_disconnect_mem(mp) == 0)
+			dr_device_transition(&mp->sbm_cm, DR_STATE_EMPTY);
+		else if (mp->sbm_cm.sbdev_error != NULL)
+			DRERR_SET_C(&hp->h_err, &mp->sbm_cm.sbdev_error);
+
+		ASSERT(mp->sbm_cm.sbdev_error == NULL);
+	}
+
+	for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+		dr_io_unit_t *ip;
+
+		if (!DEVSET_IN_SET(devset, SBD_COMP_IO, i))
+			continue;
+
+		ip = dr_get_io_unit(bp, i);
+		if (dr_disconnect_io(ip) == 0)
+			dr_device_transition(&ip->sbi_cm, DR_STATE_EMPTY);
+		else if (ip->sbi_cm.sbdev_error != NULL)
+			DRERR_SET_C(&hp->h_err, &ip->sbi_cm.sbdev_error);
+
+		ASSERT(ip->sbi_cm.sbdev_error == NULL);
+	}
+
+	if (hp->h_err) {
+		/*
+		 * For certain errors, drmach_board_disconnect will mark
+		 * the board as unusable; in these cases the devtree must
+		 * be purged so that status calls will succeed.
+		 * XXX
+		 * This implementation checks for discrete error codes -
+		 * someday, the i/f to drmach_board_disconnect should be
+		 * changed to avoid the e_code testing.
+		 */
+		if (hp->h_err->e_code == EX86_DEPROBE) {
+			bp->b_ostate = SBD_STAT_UNCONFIGURED;
+			bp->b_busy = 0;
+			(void) drv_getparm(TIME, (void *)&bp->b_time);
+
+			if (drmach_board_deprobe(bp->b_id))
+				goto disconnect_done;
+			else
+				bp->b_ndev = 0;
+		}
+	}
+
+	/*
+	 * Once all the components on a board have been disconnect
+	 * the board's state can transition to disconnected and
+	 * we can allow the deprobe to take place.
+	 */
+	if (hp->h_err == NULL && DR_DEVS_PRESENT(bp) == 0) {
+		dr_board_transition(bp, DR_STATE_OCCUPIED);
+		bp->b_rstate = SBD_STAT_DISCONNECTED;
+		bp->b_ostate = SBD_STAT_UNCONFIGURED;
+		bp->b_busy = 0;
+		(void) drv_getparm(TIME, (void *)&bp->b_time);
+
+		hp->h_err = drmach_board_deprobe(bp->b_id);
+
+		if (hp->h_err == NULL) {
+			bp->b_ndev = 0;
+			dr_board_transition(bp, DR_STATE_EMPTY);
+			bp->b_rstate = SBD_STAT_EMPTY;
+			(void) drv_getparm(TIME, (void *)&bp->b_time);
+		}
+	}
+
+disconnect_done:
+	dr_unlock_status(bp);
+
+	return (0);
+}
+
+/*
+ * Check if a particular device is a valid target of the current
+ * operation. Return 1 if it is a valid target, and 0 otherwise.
+ */
+static int
+dr_dev_is_target(dr_dev_unit_t *dp, int present_only, uint_t uset)
+{
+	dr_common_unit_t *cp;
+	int		 is_present;
+	int		 is_attached;
+
+	cp = &dp->du_common;
+
+	/* check if the user requested this device */
+	if ((uset & (1 << cp->sbdev_unum)) == 0) {
+		return (0);
+	}
+
+	is_present = DR_DEV_IS_PRESENT(cp) ? 1 : 0;
+	is_attached = DR_DEV_IS_ATTACHED(cp) ? 1 : 0;
+
+	/*
+	 * If the present_only flag is set, a valid target
+	 * must be present but not attached. Otherwise, it
+	 * must be both present and attached.
+	 */
+	if (is_present && (present_only ^ is_attached)) {
+		/* sanity check */
+		ASSERT(cp->sbdev_id != (drmachid_t)0);
+
+		return (1);
+	}
+
+	return (0);
+}
+
+static void
+dr_dev_make_list(dr_handle_t *hp, sbd_comp_type_t type, int present_only,
+	dr_common_unit_t ***devlist, int *devnum)
+{
+	dr_board_t	*bp = hp->h_bd;
+	int		 unum;
+	int		 nunits;
+	uint_t		 uset;
+	int		 len;
+	dr_common_unit_t **list, **wp;
+
+	switch (type) {
+	case SBD_COMP_CPU:
+		nunits = MAX_CPU_UNITS_PER_BOARD;
+		break;
+	case SBD_COMP_MEM:
+		nunits = MAX_MEM_UNITS_PER_BOARD;
+		break;
+	case SBD_COMP_IO:
+		nunits = MAX_IO_UNITS_PER_BOARD;
+		break;
+	default:
+		/* catch this in debug kernels */
+		ASSERT(0);
+		break;
+	}
+
+	/* allocate list storage. */
+	len = sizeof (dr_common_unit_t *) * (nunits + 1);
+	list = kmem_zalloc(len, KM_SLEEP);
+
+	/* record length of storage in first element */
+	*list++ = (dr_common_unit_t *)(uintptr_t)len;
+
+	/* get bit array signifying which units are to be involved */
+	uset = DEVSET_GET_UNITSET(hp->h_devset, type);
+
+	/*
+	 * Adjust the loop count for CPU devices since all cores
+	 * in a CMP will be examined in a single iteration.
+	 */
+	if (type == SBD_COMP_CPU) {
+		nunits = MAX_CMP_UNITS_PER_BOARD;
+	}
+
+	/* populate list */
+	for (wp = list, unum = 0; unum < nunits; unum++) {
+		dr_dev_unit_t	*dp;
+		int		core;
+		int		cunum;
+
+		dp = DR_GET_BOARD_DEVUNIT(bp, type, unum);
+		if (dr_dev_is_target(dp, present_only, uset)) {
+			*wp++ = &dp->du_common;
+		}
+
+		/* further processing is only required for CPUs */
+		if (type != SBD_COMP_CPU) {
+			continue;
+		}
+
+		/*
+		 * Add any additional cores from the current CPU
+		 * device. This is to ensure that all the cores
+		 * are grouped together in the device list, and
+		 * consequently sequenced together during the actual
+		 * operation.
+		 */
+		for (core = 1; core < MAX_CORES_PER_CMP; core++) {
+			cunum = DR_CMP_CORE_UNUM(unum, core);
+			dp = DR_GET_BOARD_DEVUNIT(bp, type, cunum);
+
+			if (dr_dev_is_target(dp, present_only, uset)) {
+				*wp++ = &dp->du_common;
+			}
+		}
+	}
+
+	/* calculate number of units in list, return result and list pointer */
+	*devnum = wp - list;
+	*devlist = list;
+}
+
+static void
+dr_dev_clean_up(dr_handle_t *hp, dr_common_unit_t **list, int devnum)
+{
+	int len;
+	int n = 0;
+	dr_common_unit_t *cp, **rp = list;
+
+	/*
+	 * move first encountered unit error to handle if handle
+	 * does not yet have a recorded error.
+	 */
+	if (hp->h_err == NULL) {
+		while (n++ < devnum) {
+			cp = *rp++;
+			if (cp->sbdev_error != NULL) {
+				hp->h_err = cp->sbdev_error;
+				cp->sbdev_error = NULL;
+				break;
+			}
+		}
+	}
+
+	/* free remaining unit errors */
+	while (n++ < devnum) {
+		cp = *rp++;
+		if (cp->sbdev_error != NULL) {
+			sbd_err_clear(&cp->sbdev_error);
+			cp->sbdev_error = NULL;
+		}
+	}
+
+	/* free list */
+	list -= 1;
+	len = (int)(uintptr_t)list[0];
+	kmem_free(list, len);
+}
+
+static int
+dr_dev_walk(dr_handle_t *hp, sbd_comp_type_t type, int present_only,
+		int (*pre_op)(dr_handle_t *, dr_common_unit_t **, int),
+		void (*op)(dr_handle_t *, dr_common_unit_t *),
+		int (*post_op)(dr_handle_t *, dr_common_unit_t **, int),
+		void (*board_op)(dr_handle_t *, dr_common_unit_t **, int))
+{
+	int			  devnum, rv;
+	dr_common_unit_t	**devlist;
+
+	dr_dev_make_list(hp, type, present_only, &devlist, &devnum);
+
+	rv = 0;
+	if (devnum > 0) {
+		rv = (*pre_op)(hp, devlist, devnum);
+		if (rv == 0) {
+			int n;
+
+			for (n = 0; n < devnum; n++)
+				(*op)(hp, devlist[n]);
+
+			rv = (*post_op)(hp, devlist, devnum);
+
+			(*board_op)(hp, devlist, devnum);
+		}
+	}
+
+	dr_dev_clean_up(hp, devlist, devnum);
+	return (rv);
+}
+
+/*ARGSUSED*/
+static int
+dr_dev_noop(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	return (0);
+}
+
+static void
+dr_attach_update_state(dr_handle_t *hp,
+	dr_common_unit_t **devlist, int devnum)
+{
+	dr_board_t	*bp = hp->h_bd;
+	int		i;
+	dr_devset_t	devs_unattached, devs_present;
+	static fn_t	f = "dr_attach_update_state";
+
+	for (i = 0; i < devnum; i++) {
+		dr_common_unit_t *cp = devlist[i];
+
+		if (dr_check_unit_attached(cp) == -1) {
+			PR_ALL("%s: ERROR %s not attached\n",
+			    f, cp->sbdev_path);
+			continue;
+		}
+
+		DR_DEV_SET_ATTACHED(cp);
+
+		dr_device_transition(cp, DR_STATE_CONFIGURED);
+		cp->sbdev_cond = SBD_COND_OK;
+	}
+
+	devs_present = DR_DEVS_PRESENT(bp);
+	devs_unattached = DR_DEVS_UNATTACHED(bp);
+
+	switch (bp->b_state) {
+	case DR_STATE_CONNECTED:
+	case DR_STATE_UNCONFIGURED:
+		ASSERT(devs_present);
+
+		if (devs_unattached == 0) {
+			/*
+			 * All devices finally attached.
+			 */
+			dr_board_transition(bp, DR_STATE_CONFIGURED);
+			hp->h_bd->b_ostate = SBD_STAT_CONFIGURED;
+			hp->h_bd->b_rstate = SBD_STAT_CONNECTED;
+			hp->h_bd->b_cond = SBD_COND_OK;
+			hp->h_bd->b_busy = 0;
+			(void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+		} else if (devs_present != devs_unattached) {
+			/*
+			 * Only some devices are fully attached.
+			 */
+			dr_board_transition(bp, DR_STATE_PARTIAL);
+			hp->h_bd->b_rstate = SBD_STAT_CONNECTED;
+			hp->h_bd->b_ostate = SBD_STAT_CONFIGURED;
+			(void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+		}
+		break;
+
+	case DR_STATE_PARTIAL:
+		ASSERT(devs_present);
+		/*
+		 * All devices finally attached.
+		 */
+		if (devs_unattached == 0) {
+			dr_board_transition(bp, DR_STATE_CONFIGURED);
+			hp->h_bd->b_rstate = SBD_STAT_CONNECTED;
+			hp->h_bd->b_ostate = SBD_STAT_CONFIGURED;
+			hp->h_bd->b_cond = SBD_COND_OK;
+			hp->h_bd->b_busy = 0;
+			(void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void
+dr_dev_configure(dr_handle_t *hp)
+{
+	int rv;
+
+	rv = dr_dev_walk(hp, SBD_COMP_CPU, 1,
+	    dr_pre_attach_cpu,
+	    dr_attach_cpu,
+	    dr_post_attach_cpu,
+	    dr_attach_update_state);
+
+	if (rv >= 0) {
+		rv = dr_dev_walk(hp, SBD_COMP_MEM, 1,
+		    dr_pre_attach_mem,
+		    dr_attach_mem,
+		    dr_post_attach_mem,
+		    dr_attach_update_state);
+	}
+
+	if (rv >= 0) {
+		(void) dr_dev_walk(hp, SBD_COMP_IO, 1,
+		    dr_pre_attach_io,
+		    dr_attach_io,
+		    dr_post_attach_io,
+		    dr_attach_update_state);
+	}
+}
+
+static void
+dr_release_update_state(dr_handle_t *hp,
+	dr_common_unit_t **devlist, int devnum)
+{
+	_NOTE(ARGUNUSED(devlist))
+	_NOTE(ARGUNUSED(devnum))
+
+	dr_board_t *bp = hp->h_bd;
+
+	/*
+	 * If the entire board was released and all components
+	 * unreferenced then transfer it to the UNREFERENCED state.
+	 */
+	if ((bp->b_state != DR_STATE_RELEASE) &&
+	    (DR_DEVS_RELEASED(bp) == DR_DEVS_ATTACHED(bp))) {
+		dr_board_transition(bp, DR_STATE_RELEASE);
+		hp->h_bd->b_busy = 1;
+	}
+}
+
+/* called by dr_release_done [below] and dr_release_mem_done [dr_mem.c] */
+int
+dr_release_dev_done(dr_common_unit_t *cp)
+{
+	if (cp->sbdev_state == DR_STATE_RELEASE) {
+		ASSERT(DR_DEV_IS_RELEASED(cp));
+
+		DR_DEV_SET_UNREFERENCED(cp);
+
+		dr_device_transition(cp, DR_STATE_UNREFERENCED);
+
+		return (0);
+	} else {
+		return (-1);
+	}
+}
+
+static void
+dr_release_done(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+	_NOTE(ARGUNUSED(hp))
+
+	dr_board_t		*bp;
+	static fn_t		f = "dr_release_done";
+
+	PR_ALL("%s...\n", f);
+
+	/* get board pointer & sanity check */
+	bp = cp->sbdev_bp;
+	ASSERT(bp == hp->h_bd);
+
+	/*
+	 * Transfer the device which just completed its release
+	 * to the UNREFERENCED state.
+	 */
+	switch (cp->sbdev_type) {
+	case SBD_COMP_MEM:
+		dr_release_mem_done(cp);
+		break;
+
+	default:
+		DR_DEV_SET_RELEASED(cp);
+
+		dr_device_transition(cp, DR_STATE_RELEASE);
+
+		(void) dr_release_dev_done(cp);
+		break;
+	}
+
+	/*
+	 * If we're not already in the RELEASE state for this
+	 * board and we now have released all that were previously
+	 * attached, then transfer the board to the RELEASE state.
+	 */
+	if ((bp->b_state == DR_STATE_RELEASE) &&
+	    (DR_DEVS_RELEASED(bp) == DR_DEVS_UNREFERENCED(bp))) {
+		dr_board_transition(bp, DR_STATE_UNREFERENCED);
+		bp->b_busy = 1;
+		(void) drv_getparm(TIME, (void *)&bp->b_time);
+	}
+}
+
+static void
+dr_dev_release_mem(dr_handle_t *hp, dr_common_unit_t *dv)
+{
+	dr_release_mem(dv);
+	dr_release_done(hp, dv);
+}
+
+static void
+dr_dev_release(dr_handle_t *hp)
+{
+	int rv;
+
+	hp->h_bd->b_busy = 1;
+
+	rv = dr_dev_walk(hp, SBD_COMP_CPU, 0,
+	    dr_pre_release_cpu,
+	    dr_release_done,
+	    dr_dev_noop,
+	    dr_release_update_state);
+
+	if (rv >= 0) {
+		rv = dr_dev_walk(hp, SBD_COMP_MEM, 0,
+		    dr_pre_release_mem,
+		    dr_dev_release_mem,
+		    dr_dev_noop,
+		    dr_release_update_state);
+	}
+
+	if (rv >= 0) {
+		rv = dr_dev_walk(hp, SBD_COMP_IO, 0,
+		    dr_pre_release_io,
+		    dr_release_done,
+		    dr_dev_noop,
+		    dr_release_update_state);
+
+	}
+
+	if (rv < 0)
+		hp->h_bd->b_busy = 0;
+	/* else, b_busy will be cleared in dr_detach_update_state() */
+}
+
+static void
+dr_detach_update_state(dr_handle_t *hp,
+	dr_common_unit_t **devlist, int devnum)
+{
+	dr_board_t	*bp = hp->h_bd;
+	int		i;
+	dr_state_t	bstate;
+	static fn_t	f = "dr_detach_update_state";
+
+	for (i = 0; i < devnum; i++) {
+		dr_common_unit_t *cp = devlist[i];
+
+		if (dr_check_unit_attached(cp) >= 0) {
+			/*
+			 * Device is still attached probably due
+			 * to an error.  Need to keep track of it.
+			 */
+			PR_ALL("%s: ERROR %s not detached\n",
+			    f, cp->sbdev_path);
+
+			continue;
+		}
+
+		DR_DEV_CLR_ATTACHED(cp);
+		DR_DEV_CLR_RELEASED(cp);
+		DR_DEV_CLR_UNREFERENCED(cp);
+		dr_device_transition(cp, DR_STATE_UNCONFIGURED);
+	}
+
+	bstate = bp->b_state;
+	if (bstate != DR_STATE_UNCONFIGURED) {
+		if (DR_DEVS_PRESENT(bp) == DR_DEVS_UNATTACHED(bp)) {
+			/*
+			 * All devices are finally detached.
+			 */
+			dr_board_transition(bp, DR_STATE_UNCONFIGURED);
+			hp->h_bd->b_ostate = SBD_STAT_UNCONFIGURED;
+			(void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+		} else if ((bp->b_state != DR_STATE_PARTIAL) &&
+		    (DR_DEVS_ATTACHED(bp) !=
+		    DR_DEVS_PRESENT(bp))) {
+			/*
+			 * Some devices remain attached.
+			 */
+			dr_board_transition(bp, DR_STATE_PARTIAL);
+			(void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+		}
+
+		if ((hp->h_devset & DR_DEVS_UNATTACHED(bp)) == hp->h_devset)
+			hp->h_bd->b_busy = 0;
+	}
+}
+
+static int
+dr_dev_unconfigure(dr_handle_t *hp)
+{
+	dr_board_t	*bp = hp->h_bd;
+
+	/*
+	 * Block out status during IO unconfig.
+	 */
+	mutex_enter(&bp->b_slock);
+	while (bp->b_sflags & DR_BSLOCK) {
+		if (cv_wait_sig(&bp->b_scv, &bp->b_slock) == 0) {
+			mutex_exit(&bp->b_slock);
+			return (EINTR);
+		}
+	}
+	bp->b_sflags |= DR_BSLOCK;
+	mutex_exit(&bp->b_slock);
+
+	(void) dr_dev_walk(hp, SBD_COMP_IO, 0,
+	    dr_pre_detach_io,
+	    dr_detach_io,
+	    dr_post_detach_io,
+	    dr_detach_update_state);
+
+	dr_unlock_status(bp);
+
+	(void) dr_dev_walk(hp, SBD_COMP_CPU, 0,
+	    dr_pre_detach_cpu,
+	    dr_detach_cpu,
+	    dr_post_detach_cpu,
+	    dr_detach_update_state);
+
+	(void) dr_dev_walk(hp, SBD_COMP_MEM, 0,
+	    dr_pre_detach_mem,
+	    dr_detach_mem,
+	    dr_post_detach_mem,
+	    dr_detach_update_state);
+
+	return (0);
+}
+
+static void
+dr_dev_cancel(dr_handle_t *hp)
+{
+	int		i;
+	dr_devset_t	devset;
+	dr_board_t	*bp = hp->h_bd;
+	static fn_t	f = "dr_dev_cancel";
+
+	PR_ALL("%s...\n", f);
+
+	/*
+	 * Only devices which have been "released" are
+	 * subject to cancellation.
+	 */
+	devset = hp->h_devset & DR_DEVS_RELEASED(bp);
+
+	/*
+	 * Nothing to do for CPUs or IO other than change back
+	 * their state.
+	 */
+	for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+		dr_cpu_unit_t	*cp;
+		dr_state_t	nstate;
+
+		if (!DEVSET_IN_SET(devset, SBD_COMP_CPU, i))
+			continue;
+
+		cp = dr_get_cpu_unit(bp, i);
+		if (dr_cancel_cpu(cp) == 0)
+			nstate = DR_STATE_CONFIGURED;
+		else
+			nstate = DR_STATE_FATAL;
+
+		dr_device_transition(&cp->sbc_cm, nstate);
+	}
+
+	for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+		dr_io_unit_t *ip;
+
+		if (!DEVSET_IN_SET(devset, SBD_COMP_IO, i))
+			continue;
+		ip = dr_get_io_unit(bp, i);
+		dr_device_transition(&ip->sbi_cm, DR_STATE_CONFIGURED);
+	}
+	for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+		dr_mem_unit_t	*mp;
+		dr_state_t	nstate;
+
+		if (!DEVSET_IN_SET(devset, SBD_COMP_MEM, i))
+			continue;
+
+		mp = dr_get_mem_unit(bp, i);
+		if (dr_cancel_mem(mp) == 0)
+			nstate = DR_STATE_CONFIGURED;
+		else
+			nstate = DR_STATE_FATAL;
+
+		dr_device_transition(&mp->sbm_cm, nstate);
+	}
+
+	PR_ALL("%s: unreleasing devset (0x%x)\n", f, (uint_t)devset);
+
+	DR_DEVS_CANCEL(bp, devset);
+
+	if (DR_DEVS_RELEASED(bp) == 0) {
+		dr_state_t	new_state;
+		/*
+		 * If the board no longer has any released devices
+		 * than transfer it back to the CONFIG/PARTIAL state.
+		 */
+		if (DR_DEVS_ATTACHED(bp) == DR_DEVS_PRESENT(bp))
+			new_state = DR_STATE_CONFIGURED;
+		else
+			new_state = DR_STATE_PARTIAL;
+		if (bp->b_state != new_state) {
+			dr_board_transition(bp, new_state);
+		}
+		hp->h_bd->b_ostate = SBD_STAT_CONFIGURED;
+		hp->h_bd->b_busy = 0;
+		(void) drv_getparm(TIME, (void *)&hp->h_bd->b_time);
+	}
+}
+
+static int
+dr_dev_status(dr_handle_t *hp)
+{
+	int		nstat, mode, ncm, sz, pbsz, pnstat;
+	dr_handle_t	*shp;
+	dr_devset_t	devset = 0;
+	sbd_stat_t	*dstatp = NULL;
+	sbd_dev_stat_t	*devstatp;
+	dr_board_t	*bp;
+	drmach_status_t	 pstat;
+	int		rv = 0;
+
+#ifdef _MULTI_DATAMODEL
+	int sz32 = 0;
+#endif /* _MULTI_DATAMODEL */
+
+	static fn_t	f = "dr_dev_status";
+
+	PR_ALL("%s...\n", f);
+
+	mode = hp->h_mode;
+	shp = hp;
+	devset = shp->h_devset;
+	bp = hp->h_bd;
+
+	/*
+	 * Block out disconnect, unassign, IO unconfigure and
+	 * devinfo branch creation during status.
+	 */
+	mutex_enter(&bp->b_slock);
+	while (bp->b_sflags & DR_BSLOCK) {
+		if (cv_wait_sig(&bp->b_scv, &bp->b_slock) == 0) {
+			mutex_exit(&bp->b_slock);
+			return (EINTR);
+		}
+	}
+	bp->b_sflags |= DR_BSLOCK;
+	mutex_exit(&bp->b_slock);
+
+	ncm = 1;
+	if (hp->h_sbdcmd.cmd_cm.c_id.c_type == SBD_COMP_NONE) {
+		if (dr_cmd_flags(hp) & SBD_FLAG_ALLCMP) {
+		/*
+		 * Calculate the maximum number of components possible
+		 * for a board.  This number will be used to size the
+		 * status scratch buffer used by board and component
+		 * status functions.
+		 * This buffer may differ in size from what is provided
+		 * by the plugin, since the known component set on the
+		 * board may change between the plugin's GETNCM call, and
+		 * the status call.  Sizing will be adjusted to the plugin's
+		 * receptacle buffer at copyout time.
+		 */
+			ncm = MAX_CPU_UNITS_PER_BOARD +
+			    MAX_MEM_UNITS_PER_BOARD +
+			    MAX_IO_UNITS_PER_BOARD;
+
+		} else {
+			/*
+			 * In the case of c_type == SBD_COMP_NONE, and
+			 * SBD_FLAG_ALLCMP not specified, only the board
+			 * info is to be returned, no components.
+			 */
+			ncm = 0;
+			devset = 0;
+		}
+	}
+
+	sz = sizeof (sbd_stat_t);
+	if (ncm > 1)
+		sz += sizeof (sbd_dev_stat_t) * (ncm - 1);
+
+
+	pbsz = (int)hp->h_sbdcmd.cmd_stat.s_nbytes;
+	pnstat = (pbsz - sizeof (sbd_stat_t)) / sizeof (sbd_dev_stat_t);
+
+	/*
+	 * s_nbytes describes the size of the preallocated user
+	 * buffer into which the application is execting to
+	 * receive the sbd_stat_t and sbd_dev_stat_t structures.
+	 */
+
+#ifdef _MULTI_DATAMODEL
+
+	/*
+	 * More buffer space is required for the 64bit to 32bit
+	 * conversion of data structures.
+	 */
+	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
+		sz32 = sizeof (sbd_stat32_t);
+		if (ncm > 1)
+			sz32  += sizeof (sbd_dev_stat32_t) * (ncm - 1);
+		pnstat = (pbsz - sizeof (sbd_stat32_t))/
+		    sizeof (sbd_dev_stat32_t);
+	}
+
+	sz += sz32;
+#endif
+	/*
+	 * Since one sbd_dev_stat_t is included in the sbd_stat_t,
+	 * increment the plugin's nstat count.
+	 */
+	++pnstat;
+
+	if (bp->b_id == 0) {
+		bzero(&pstat, sizeof (pstat));
+	} else {
+		sbd_error_t *err;
+
+		err = drmach_status(bp->b_id, &pstat);
+		if (err) {
+			DRERR_SET_C(&hp->h_err, &err);
+			rv = EIO;
+			goto status_done;
+		}
+	}
+
+	dstatp = (sbd_stat_t *)(void *)GETSTRUCT(char, sz);
+
+	devstatp = &dstatp->s_stat[0];
+
+	dstatp->s_board = bp->b_num;
+
+	/*
+	 * Detect transitions between empty and disconnected.
+	 */
+	if (!pstat.empty && (bp->b_rstate == SBD_STAT_EMPTY))
+		bp->b_rstate = SBD_STAT_DISCONNECTED;
+	else if (pstat.empty && (bp->b_rstate == SBD_STAT_DISCONNECTED))
+		bp->b_rstate = SBD_STAT_EMPTY;
+
+	dstatp->s_rstate = bp->b_rstate;
+	dstatp->s_ostate = bp->b_ostate;
+	dstatp->s_cond = bp->b_cond = pstat.cond;
+	dstatp->s_busy = bp->b_busy | pstat.busy;
+	dstatp->s_time = bp->b_time;
+	dstatp->s_power = pstat.powered;
+	dstatp->s_assigned = bp->b_assigned = pstat.assigned;
+	dstatp->s_nstat = nstat = 0;
+	bcopy(&pstat.type[0], &dstatp->s_type[0], SBD_TYPE_LEN);
+	bcopy(&pstat.info[0], &dstatp->s_info[0], SBD_MAX_INFO);
+
+	devset &= DR_DEVS_PRESENT(bp);
+	if (devset == 0) {
+		/*
+		 * No device chosen.
+		 */
+		PR_ALL("%s: no device present\n", f);
+	}
+
+	if (DEVSET_IN_SET(devset, SBD_COMP_CPU, DEVSET_ANYUNIT))
+		if ((nstat = dr_cpu_status(hp, devset, devstatp)) > 0) {
+			dstatp->s_nstat += nstat;
+			devstatp += nstat;
+		}
+
+	if (DEVSET_IN_SET(devset, SBD_COMP_MEM, DEVSET_ANYUNIT))
+		if ((nstat = dr_mem_status(hp, devset, devstatp)) > 0) {
+			dstatp->s_nstat += nstat;
+			devstatp += nstat;
+		}
+
+	if (DEVSET_IN_SET(devset, SBD_COMP_IO, DEVSET_ANYUNIT))
+		if ((nstat = dr_io_status(hp, devset, devstatp)) > 0) {
+			dstatp->s_nstat += nstat;
+			devstatp += nstat;
+		}
+
+	/*
+	 * Due to a possible change in number of components between
+	 * the time of plugin's GETNCM call and now, there may be
+	 * more or less components than the plugin's buffer can
+	 * hold.  Adjust s_nstat accordingly.
+	 */
+
+	dstatp->s_nstat = dstatp->s_nstat > pnstat ? pnstat : dstatp->s_nstat;
+
+#ifdef _MULTI_DATAMODEL
+	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
+		int		i, j;
+		sbd_stat32_t	*dstat32p;
+
+		dstat32p = (sbd_stat32_t *)devstatp;
+
+		/* Alignment Paranoia */
+		if ((ulong_t)dstat32p & 0x1) {
+			PR_ALL("%s: alignment: sz=0x%lx dstat32p=0x%p\n",
+			    f, sizeof (sbd_stat32_t), (void *)dstat32p);
+			DR_OP_INTERNAL_ERROR(hp);
+			rv = EINVAL;
+			goto status_done;
+		}
+
+		/* paranoia: detect buffer overrun */
+		if ((caddr_t)&dstat32p->s_stat[dstatp->s_nstat] >
+		    ((caddr_t)dstatp) + sz) {
+			DR_OP_INTERNAL_ERROR(hp);
+			rv = EINVAL;
+			goto status_done;
+		}
+
+		/* copy sbd_stat_t structure members */
+#define	_SBD_STAT(t, m) dstat32p->m = (t)dstatp->m
+		_SBD_STAT(int32_t, s_board);
+		_SBD_STAT(int32_t, s_rstate);
+		_SBD_STAT(int32_t, s_ostate);
+		_SBD_STAT(int32_t, s_cond);
+		_SBD_STAT(int32_t, s_busy);
+		_SBD_STAT(time32_t, s_time);
+		_SBD_STAT(uint32_t, s_power);
+		_SBD_STAT(uint32_t, s_assigned);
+		_SBD_STAT(int32_t, s_nstat);
+		bcopy(&dstatp->s_type[0], &dstat32p->s_type[0],
+		    SBD_TYPE_LEN);
+		bcopy(&dstatp->s_info[0], &dstat32p->s_info[0],
+		    SBD_MAX_INFO);
+#undef _SBD_STAT
+
+		for (i = 0; i < dstatp->s_nstat; i++) {
+			sbd_dev_stat_t		*dsp = &dstatp->s_stat[i];
+			sbd_dev_stat32_t	*ds32p = &dstat32p->s_stat[i];
+#define	_SBD_DEV_STAT(t, m) ds32p->m = (t)dsp->m
+
+			/* copy sbd_cm_stat_t structure members */
+			_SBD_DEV_STAT(int32_t, ds_type);
+			_SBD_DEV_STAT(int32_t, ds_unit);
+			_SBD_DEV_STAT(int32_t, ds_ostate);
+			_SBD_DEV_STAT(int32_t, ds_cond);
+			_SBD_DEV_STAT(int32_t, ds_busy);
+			_SBD_DEV_STAT(int32_t, ds_suspend);
+			_SBD_DEV_STAT(time32_t, ds_time);
+			bcopy(&dsp->ds_name[0], &ds32p->ds_name[0],
+			    OBP_MAXPROPNAME);
+
+			switch (dsp->ds_type) {
+			case SBD_COMP_CPU:
+				/* copy sbd_cpu_stat_t structure members */
+				_SBD_DEV_STAT(int32_t, d_cpu.cs_isbootproc);
+				_SBD_DEV_STAT(int32_t, d_cpu.cs_cpuid);
+				_SBD_DEV_STAT(int32_t, d_cpu.cs_speed);
+				_SBD_DEV_STAT(int32_t, d_cpu.cs_ecache);
+				break;
+
+			case SBD_COMP_MEM:
+				/* copy sbd_mem_stat_t structure members */
+				_SBD_DEV_STAT(int32_t, d_mem.ms_interleave);
+				_SBD_DEV_STAT(uint32_t, d_mem.ms_basepfn);
+				_SBD_DEV_STAT(uint32_t, d_mem.ms_totpages);
+				_SBD_DEV_STAT(uint32_t, d_mem.ms_detpages);
+				_SBD_DEV_STAT(int32_t, d_mem.ms_pageslost);
+				_SBD_DEV_STAT(uint32_t, d_mem.ms_managed_pages);
+				_SBD_DEV_STAT(uint32_t, d_mem.ms_noreloc_pages);
+				_SBD_DEV_STAT(uint32_t, d_mem.ms_noreloc_first);
+				_SBD_DEV_STAT(uint32_t, d_mem.ms_noreloc_last);
+				_SBD_DEV_STAT(int32_t, d_mem.ms_cage_enabled);
+				_SBD_DEV_STAT(int32_t, d_mem.ms_peer_is_target);
+				bcopy(&dsp->d_mem.ms_peer_ap_id[0],
+				    &ds32p->d_mem.ms_peer_ap_id[0],
+				    sizeof (ds32p->d_mem.ms_peer_ap_id));
+				break;
+
+			case SBD_COMP_IO:
+				/* copy sbd_io_stat_t structure members */
+				_SBD_DEV_STAT(int32_t, d_io.is_referenced);
+				_SBD_DEV_STAT(int32_t, d_io.is_unsafe_count);
+
+				for (j = 0; j < SBD_MAX_UNSAFE; j++)
+					_SBD_DEV_STAT(int32_t,
+					    d_io.is_unsafe_list[j]);
+
+				bcopy(&dsp->d_io.is_pathname[0],
+				    &ds32p->d_io.is_pathname[0], MAXPATHLEN);
+				break;
+
+			case SBD_COMP_CMP:
+				/* copy sbd_cmp_stat_t structure members */
+				bcopy(&dsp->d_cmp.ps_cpuid[0],
+				    &ds32p->d_cmp.ps_cpuid[0],
+				    sizeof (ds32p->d_cmp.ps_cpuid));
+				_SBD_DEV_STAT(int32_t, d_cmp.ps_ncores);
+				_SBD_DEV_STAT(int32_t, d_cmp.ps_speed);
+				_SBD_DEV_STAT(int32_t, d_cmp.ps_ecache);
+				break;
+
+			default:
+				cmn_err(CE_WARN, "%s: unknown dev type (%d)",
+				    f, (int)dsp->ds_type);
+				rv = EFAULT;
+				goto status_done;
+			}
+#undef _SBD_DEV_STAT
+		}
+
+
+		if (ddi_copyout((void *)dstat32p,
+		    hp->h_sbdcmd.cmd_stat.s_statp, pbsz, mode) != 0) {
+			cmn_err(CE_WARN,
+			    "%s: failed to copyout status "
+			    "for board %d", f, bp->b_num);
+			rv = EFAULT;
+			goto status_done;
+		}
+	} else
+#endif /* _MULTI_DATAMODEL */
+
+	if (ddi_copyout((void *)dstatp, hp->h_sbdcmd.cmd_stat.s_statp,
+	    pbsz, mode) != 0) {
+		cmn_err(CE_WARN,
+		    "%s: failed to copyout status for board %d",
+		    f, bp->b_num);
+		rv = EFAULT;
+		goto status_done;
+	}
+
+status_done:
+	if (dstatp != NULL)
+		FREESTRUCT(dstatp, char, sz);
+
+	dr_unlock_status(bp);
+
+	return (rv);
+}
+
+static int
+dr_get_ncm(dr_handle_t *hp)
+{
+	int		i;
+	int		ncm = 0;
+	dr_devset_t	devset;
+
+	devset = DR_DEVS_PRESENT(hp->h_bd);
+	if (hp->h_sbdcmd.cmd_cm.c_id.c_type != SBD_COMP_NONE)
+		devset &= DEVSET(hp->h_sbdcmd.cmd_cm.c_id.c_type,
+		    DEVSET_ANYUNIT);
+
+	/*
+	 * Handle CPUs first to deal with possible CMP
+	 * devices. If the CPU is a CMP, we need to only
+	 * increment ncm once even if there are multiple
+	 * cores for that CMP present in the devset.
+	 */
+	for (i = 0; i < MAX_CMP_UNITS_PER_BOARD; i++) {
+		if (devset & DEVSET(SBD_COMP_CMP, i)) {
+			ncm++;
+		}
+	}
+
+	/* eliminate the CPU information from the devset */
+	devset &= ~(DEVSET(SBD_COMP_CMP, DEVSET_ANYUNIT));
+
+	for (i = 0; i < (sizeof (dr_devset_t) * 8); i++) {
+		ncm += devset & 0x1;
+		devset >>= 1;
+	}
+
+	return (ncm);
+}
+
+/* used by dr_mem.c */
+/* TODO: eliminate dr_boardlist */
+dr_board_t *
+dr_lookup_board(int board_num)
+{
+	dr_board_t *bp;
+
+	ASSERT(board_num >= 0 && board_num < MAX_BOARDS);
+
+	bp = &dr_boardlist[board_num];
+	ASSERT(bp->b_num == board_num);
+
+	return (bp);
+}
+
+static dr_dev_unit_t *
+dr_get_dev_unit(dr_board_t *bp, sbd_comp_type_t nt, int unit_num)
+{
+	dr_dev_unit_t	*dp;
+
+	dp = DR_GET_BOARD_DEVUNIT(bp, nt, unit_num);
+	ASSERT(dp->du_common.sbdev_bp == bp);
+	ASSERT(dp->du_common.sbdev_unum == unit_num);
+	ASSERT(dp->du_common.sbdev_type == nt);
+
+	return (dp);
+}
+
+dr_cpu_unit_t *
+dr_get_cpu_unit(dr_board_t *bp, int unit_num)
+{
+	dr_dev_unit_t	*dp;
+
+	ASSERT(unit_num >= 0 && unit_num < MAX_CPU_UNITS_PER_BOARD);
+
+	dp = dr_get_dev_unit(bp, SBD_COMP_CPU, unit_num);
+	return (&dp->du_cpu);
+}
+
+dr_mem_unit_t *
+dr_get_mem_unit(dr_board_t *bp, int unit_num)
+{
+	dr_dev_unit_t	*dp;
+
+	ASSERT(unit_num >= 0 && unit_num < MAX_MEM_UNITS_PER_BOARD);
+
+	dp = dr_get_dev_unit(bp, SBD_COMP_MEM, unit_num);
+	return (&dp->du_mem);
+}
+
+dr_io_unit_t *
+dr_get_io_unit(dr_board_t *bp, int unit_num)
+{
+	dr_dev_unit_t	*dp;
+
+	ASSERT(unit_num >= 0 && unit_num < MAX_IO_UNITS_PER_BOARD);
+
+	dp = dr_get_dev_unit(bp, SBD_COMP_IO, unit_num);
+	return (&dp->du_io);
+}
+
+dr_common_unit_t *
+dr_get_common_unit(dr_board_t *bp, sbd_comp_type_t nt, int unum)
+{
+	dr_dev_unit_t	*dp;
+
+	dp = dr_get_dev_unit(bp, nt, unum);
+	return (&dp->du_common);
+}
+
+static dr_devset_t
+dr_dev2devset(sbd_comp_id_t *cid)
+{
+	static fn_t	f = "dr_dev2devset";
+
+	dr_devset_t	devset;
+	int		unit = cid->c_unit;
+
+	switch (cid->c_type) {
+		case SBD_COMP_NONE:
+			devset =  DEVSET(SBD_COMP_CPU, DEVSET_ANYUNIT);
+			devset |= DEVSET(SBD_COMP_MEM, DEVSET_ANYUNIT);
+			devset |= DEVSET(SBD_COMP_IO,  DEVSET_ANYUNIT);
+			PR_ALL("%s: COMP_NONE devset = " DEVSET_FMT_STR "\n",
+			    f, DEVSET_FMT_ARG(devset));
+			break;
+
+		case SBD_COMP_CPU:
+			if ((unit > MAX_CPU_UNITS_PER_BOARD) || (unit < 0)) {
+				cmn_err(CE_WARN,
+				    "%s: invalid cpu unit# = %d",
+				    f, unit);
+				devset = 0;
+			} else {
+				/*
+				 * Generate a devset that includes all the
+				 * cores of a CMP device. If this is not a
+				 * CMP, the extra cores will be eliminated
+				 * later since they are not present. This is
+				 * also true for CMP devices that do not have
+				 * all cores active.
+				 */
+				devset = DEVSET(SBD_COMP_CMP, unit);
+			}
+
+			PR_ALL("%s: CPU devset = " DEVSET_FMT_STR "\n",
+			    f, DEVSET_FMT_ARG(devset));
+			break;
+
+		case SBD_COMP_MEM:
+			if (unit == SBD_NULL_UNIT) {
+				unit = 0;
+				cid->c_unit = 0;
+			}
+
+			if ((unit > MAX_MEM_UNITS_PER_BOARD) || (unit < 0)) {
+				cmn_err(CE_WARN,
+				    "%s: invalid mem unit# = %d",
+				    f, unit);
+				devset = 0;
+			} else
+				devset = DEVSET(cid->c_type, unit);
+
+			PR_ALL("%s: MEM devset = " DEVSET_FMT_STR "\n",
+			    f, DEVSET_FMT_ARG(devset));
+			break;
+
+		case SBD_COMP_IO:
+			if ((unit > MAX_IO_UNITS_PER_BOARD) || (unit < 0)) {
+				cmn_err(CE_WARN,
+				    "%s: invalid io unit# = %d",
+				    f, unit);
+				devset = 0;
+			} else
+				devset = DEVSET(cid->c_type, unit);
+
+			PR_ALL("%s: IO devset = " DEVSET_FMT_STR "\n",
+			    f, DEVSET_FMT_ARG(devset));
+			break;
+
+		default:
+		case SBD_COMP_UNKNOWN:
+			devset = 0;
+			break;
+	}
+
+	return (devset);
+}
+
+/*
+ * Converts a dynamic attachment point name to a SBD_COMP_* type.
+ * Returns SDB_COMP_UNKNOWN if name is not recognized.
+ */
+static int
+dr_dev_type_to_nt(char *type)
+{
+	int i;
+
+	for (i = 0; dr_devattr[i].s_nodetype != SBD_COMP_UNKNOWN; i++)
+		if (strcmp(dr_devattr[i].s_devtype, type) == 0)
+			break;
+
+	return (dr_devattr[i].s_nodetype);
+}
+
+/*
+ * Converts a SBD_COMP_* type to a dynamic attachment point name.
+ * Return NULL if SBD_COMP_ type is not recognized.
+ */
+char *
+dr_nt_to_dev_type(int nt)
+{
+	int i;
+
+	for (i = 0; dr_devattr[i].s_nodetype != SBD_COMP_UNKNOWN; i++)
+		if (dr_devattr[i].s_nodetype == nt)
+			break;
+
+	return (dr_devattr[i].s_devtype);
+}
+
+/*
+ * State transition policy is that if there is some component for which
+ * the state transition is valid, then let it through. The exception is
+ * SBD_CMD_DISCONNECT. On disconnect, the state transition must be valid
+ * for ALL components.
+ * Returns the state that is in error, if any.
+ */
+static int
+dr_check_transition(dr_board_t *bp, dr_devset_t *devsetp,
+			struct dr_state_trans *transp, int cmd)
+{
+	int			s, ut;
+	int			state_err = 0;
+	dr_devset_t		devset;
+	dr_common_unit_t	*cp;
+	static fn_t		f = "dr_check_transition";
+
+	devset = *devsetp;
+
+	if (DEVSET_IN_SET(devset, SBD_COMP_CPU, DEVSET_ANYUNIT)) {
+		for (ut = 0; ut < MAX_CPU_UNITS_PER_BOARD; ut++) {
+			if (DEVSET_IN_SET(devset, SBD_COMP_CPU, ut) == 0)
+				continue;
+
+			cp = dr_get_common_unit(bp, SBD_COMP_CPU, ut);
+			s = (int)cp->sbdev_state;
+			if (!DR_DEV_IS_PRESENT(cp)) {
+				DEVSET_DEL(devset, SBD_COMP_CPU, ut);
+			} else {
+				if (transp->x_op[s].x_rv) {
+					if (!state_err)
+						state_err = s;
+					DEVSET_DEL(devset, SBD_COMP_CPU, ut);
+				}
+			}
+		}
+	}
+	if (DEVSET_IN_SET(devset, SBD_COMP_MEM, DEVSET_ANYUNIT)) {
+		for (ut = 0; ut < MAX_MEM_UNITS_PER_BOARD; ut++) {
+			if (DEVSET_IN_SET(devset, SBD_COMP_MEM, ut) == 0)
+				continue;
+
+			cp = dr_get_common_unit(bp, SBD_COMP_MEM, ut);
+			s = (int)cp->sbdev_state;
+			if (!DR_DEV_IS_PRESENT(cp)) {
+				DEVSET_DEL(devset, SBD_COMP_MEM, ut);
+			} else {
+				if (transp->x_op[s].x_rv) {
+					if (!state_err)
+						state_err = s;
+					DEVSET_DEL(devset, SBD_COMP_MEM, ut);
+				}
+			}
+		}
+	}
+	if (DEVSET_IN_SET(devset, SBD_COMP_IO, DEVSET_ANYUNIT)) {
+		for (ut = 0; ut < MAX_IO_UNITS_PER_BOARD; ut++) {
+			if (DEVSET_IN_SET(devset, SBD_COMP_IO, ut) == 0)
+				continue;
+
+			cp = dr_get_common_unit(bp, SBD_COMP_IO, ut);
+			s = (int)cp->sbdev_state;
+			if (!DR_DEV_IS_PRESENT(cp)) {
+				DEVSET_DEL(devset, SBD_COMP_IO, ut);
+			} else {
+				if (transp->x_op[s].x_rv) {
+					if (!state_err)
+						state_err = s;
+					DEVSET_DEL(devset, SBD_COMP_IO, ut);
+				}
+			}
+		}
+	}
+
+	PR_ALL("%s: requested devset = 0x%x, final devset = 0x%x\n",
+	    f, (uint_t)*devsetp, (uint_t)devset);
+
+	*devsetp = devset;
+	/*
+	 * If there are some remaining components for which
+	 * this state transition is valid, then allow them
+	 * through, otherwise if none are left then return
+	 * the state error. The exception is SBD_CMD_DISCONNECT.
+	 * On disconnect, the state transition must be valid for ALL
+	 * components.
+	 */
+	if (cmd == SBD_CMD_DISCONNECT)
+		return (state_err);
+	return (devset ? 0 : state_err);
+}
+
+void
+dr_device_transition(dr_common_unit_t *cp, dr_state_t st)
+{
+	PR_STATE("%s STATE %s(%d) -> %s(%d)\n",
+	    cp->sbdev_path,
+	    state_str[cp->sbdev_state], cp->sbdev_state,
+	    state_str[st], st);
+
+	cp->sbdev_state = st;
+	if (st == DR_STATE_CONFIGURED) {
+		cp->sbdev_ostate = SBD_STAT_CONFIGURED;
+		if (cp->sbdev_bp->b_ostate != SBD_STAT_CONFIGURED) {
+			cp->sbdev_bp->b_ostate = SBD_STAT_CONFIGURED;
+			(void) drv_getparm(TIME,
+			    (void *) &cp->sbdev_bp->b_time);
+		}
+	} else
+		cp->sbdev_ostate = SBD_STAT_UNCONFIGURED;
+
+	(void) drv_getparm(TIME, (void *) &cp->sbdev_time);
+}
+
+static void
+dr_board_transition(dr_board_t *bp, dr_state_t st)
+{
+	PR_STATE("BOARD %d STATE: %s(%d) -> %s(%d)\n",
+	    bp->b_num,
+	    state_str[bp->b_state], bp->b_state,
+	    state_str[st], st);
+
+	bp->b_state = st;
+}
+
+void
+dr_op_err(int ce, dr_handle_t *hp, int code, char *fmt, ...)
+{
+	sbd_error_t	*err;
+	va_list		args;
+
+	va_start(args, fmt);
+	err = drerr_new_v(code, fmt, args);
+	va_end(args);
+
+	if (ce != CE_IGNORE)
+		sbd_err_log(err, ce);
+
+	DRERR_SET_C(&hp->h_err, &err);
+}
+
+void
+dr_dev_err(int ce, dr_common_unit_t *cp, int code)
+{
+	sbd_error_t	*err;
+
+	err = drerr_new(0, code, cp->sbdev_path, NULL);
+
+	if (ce != CE_IGNORE)
+		sbd_err_log(err, ce);
+
+	DRERR_SET_C(&cp->sbdev_error, &err);
+}
+
+/*
+ * A callback routine.  Called from the drmach layer as a result of
+ * call to drmach_board_find_devices from dr_init_devlists.
+ */
+static sbd_error_t *
+dr_dev_found(void *data, const char *name, int unum, drmachid_t id)
+{
+	dr_board_t	*bp = data;
+	dr_dev_unit_t	*dp;
+	int		 nt;
+	static fn_t	f = "dr_dev_found";
+
+	PR_ALL("%s (board = %d, name = %s, unum = %d, id = %p)...\n",
+	    f, bp->b_num, name, unum, id);
+
+	nt = dr_dev_type_to_nt((char *)name);
+	if (nt == SBD_COMP_UNKNOWN) {
+		/*
+		 * this should not happen.  When it does, it indicates
+		 * a missmatch in devices supported by the drmach layer
+		 * vs devices supported by this layer.
+		 */
+		return (DR_INTERNAL_ERROR());
+	}
+
+	dp = DR_GET_BOARD_DEVUNIT(bp, nt, unum);
+
+	/* sanity check */
+	ASSERT(dp->du_common.sbdev_bp == bp);
+	ASSERT(dp->du_common.sbdev_unum == unum);
+	ASSERT(dp->du_common.sbdev_type == nt);
+
+	/* render dynamic attachment point path of this unit */
+	(void) snprintf(dp->du_common.sbdev_path,
+	    sizeof (dp->du_common.sbdev_path), "%s::%s%d",
+	    bp->b_path, name, DR_UNUM2SBD_UNUM(unum, nt));
+
+	dp->du_common.sbdev_id = id;
+	DR_DEV_SET_PRESENT(&dp->du_common);
+
+	bp->b_ndev++;
+
+	return (NULL);
+}
+
+static sbd_error_t *
+dr_init_devlists(dr_board_t *bp)
+{
+	int		i;
+	sbd_error_t	*err;
+	dr_dev_unit_t	*dp;
+	static fn_t	f = "dr_init_devlists";
+
+	PR_ALL("%s (%s)...\n", f, bp->b_path);
+
+	/* sanity check */
+	ASSERT(bp->b_ndev == 0);
+
+	DR_DEVS_DISCONNECT(bp, (uint_t)-1);
+
+	/*
+	 * This routine builds the board's devlist and initializes
+	 * the common portion of the unit data structures.
+	 * Note: because the common portion is considered
+	 * uninitialized, the dr_get_*_unit() routines can not
+	 * be used.
+	 */
+
+	/*
+	 * Clear out old entries, if any.
+	 */
+	for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+		dp = DR_GET_BOARD_DEVUNIT(bp, SBD_COMP_CPU, i);
+
+		bzero(dp, sizeof (*dp));
+		dp->du_common.sbdev_bp = bp;
+		dp->du_common.sbdev_unum = i;
+		dp->du_common.sbdev_type = SBD_COMP_CPU;
+	}
+
+	for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+		dp = DR_GET_BOARD_DEVUNIT(bp, SBD_COMP_MEM, i);
+
+		bzero(dp, sizeof (*dp));
+		dp->du_common.sbdev_bp = bp;
+		dp->du_common.sbdev_unum = i;
+		dp->du_common.sbdev_type = SBD_COMP_MEM;
+	}
+
+	for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+		dp = DR_GET_BOARD_DEVUNIT(bp, SBD_COMP_IO, i);
+
+		bzero(dp, sizeof (*dp));
+		dp->du_common.sbdev_bp = bp;
+		dp->du_common.sbdev_unum = i;
+		dp->du_common.sbdev_type = SBD_COMP_IO;
+	}
+
+	err = NULL;
+	if (bp->b_id) {
+		/* find devices on this board */
+		err = drmach_board_find_devices(
+		    bp->b_id, bp, dr_dev_found);
+	}
+
+	return (err);
+}
+
+/*
+ * Return the unit number of the respective drmachid if
+ * it's found to be attached.
+ */
+static int
+dr_check_unit_attached(dr_common_unit_t *cp)
+{
+	int		rv = 0;
+	processorid_t	cpuid;
+	uint64_t	basepa, endpa;
+	struct memlist	*ml;
+	extern struct memlist	*phys_install;
+	sbd_error_t	*err;
+	int		yes;
+	static fn_t	f = "dr_check_unit_attached";
+
+	switch (cp->sbdev_type) {
+	case SBD_COMP_CPU:
+		err = drmach_cpu_get_id(cp->sbdev_id, &cpuid);
+		if (err) {
+			DRERR_SET_C(&cp->sbdev_error, &err);
+			rv = -1;
+			break;
+		}
+		mutex_enter(&cpu_lock);
+		if (cpu_get(cpuid) == NULL)
+			rv = -1;
+		mutex_exit(&cpu_lock);
+		break;
+
+	case SBD_COMP_MEM:
+		err = drmach_mem_get_slice_info(cp->sbdev_id,
+		    &basepa, &endpa, NULL);
+		if (err) {
+			DRERR_SET_C(&cp->sbdev_error, &err);
+			rv = -1;
+			break;
+		}
+
+		/*
+		 * Check if base address is in phys_install.
+		 */
+		memlist_read_lock();
+		for (ml = phys_install; ml; ml = ml->ml_next)
+			if ((endpa <= ml->ml_address) ||
+			    (basepa >= (ml->ml_address + ml->ml_size)))
+				continue;
+			else
+				break;
+		memlist_read_unlock();
+		if (ml == NULL)
+			rv = -1;
+		break;
+
+	case SBD_COMP_IO:
+		err = drmach_io_is_attached(cp->sbdev_id, &yes);
+		if (err) {
+			DRERR_SET_C(&cp->sbdev_error, &err);
+			rv = -1;
+			break;
+		} else if (!yes)
+			rv = -1;
+		break;
+
+	default:
+		PR_ALL("%s: unexpected nodetype(%d) for id 0x%p\n",
+		    f, cp->sbdev_type, cp->sbdev_id);
+		rv = -1;
+		break;
+	}
+
+	return (rv);
+}
+
+/*
+ * See if drmach recognizes the passthru command.  DRMACH expects the
+ * id to identify the thing to which the command is being applied.  Using
+ * nonsense SBD terms, that information has been perversely encoded in the
+ * c_id member of the sbd_cmd_t structure.  This logic reads those tea
+ * leaves, finds the associated drmach id, then calls drmach to process
+ * the passthru command.
+ */
+static int
+dr_pt_try_drmach(dr_handle_t *hp)
+{
+	dr_board_t	*bp = hp->h_bd;
+	sbd_comp_id_t	*comp_id = &hp->h_sbdcmd.cmd_cm.c_id;
+	drmachid_t	 id;
+
+	if (comp_id->c_type == SBD_COMP_NONE) {
+		id = bp->b_id;
+	} else {
+		sbd_comp_type_t	 nt;
+
+		nt = dr_dev_type_to_nt(comp_id->c_name);
+		if (nt == SBD_COMP_UNKNOWN) {
+			dr_op_err(CE_IGNORE, hp, ESBD_INVAL, comp_id->c_name);
+			id = 0;
+		} else {
+			/* pt command applied to dynamic attachment point */
+			dr_common_unit_t *cp;
+			cp = dr_get_common_unit(bp, nt, comp_id->c_unit);
+			id = cp->sbdev_id;
+		}
+	}
+
+	if (hp->h_err == NULL)
+		hp->h_err = drmach_passthru(id, &hp->h_opts);
+
+	return (hp->h_err == NULL ? 0 : -1);
+}
+
+static int
+dr_pt_ioctl(dr_handle_t *hp)
+{
+	int		cmd, rv, len;
+	int32_t		sz;
+	int		found;
+	char		*copts;
+	static fn_t	f = "dr_pt_ioctl";
+
+	PR_ALL("%s...\n", f);
+
+	sz = hp->h_opts.size;
+	copts = hp->h_opts.copts;
+
+	if (sz == 0 || copts == (char *)NULL) {
+		cmn_err(CE_WARN, "%s: invalid passthru args", f);
+		return (EINVAL);
+	}
+
+	found = 0;
+	for (cmd = 0; cmd < (sizeof (pt_arr) / sizeof (pt_arr[0])); cmd++) {
+		len = strlen(pt_arr[cmd].pt_name);
+		found = (strncmp(pt_arr[cmd].pt_name, copts, len) == 0);
+		if (found)
+			break;
+	}
+
+	if (found)
+		rv = (*pt_arr[cmd].pt_func)(hp);
+	else
+		rv = dr_pt_try_drmach(hp);
+
+	return (rv);
+}
+
+/*
+ * Called at driver load time to determine the state and condition
+ * of an existing board in the system.
+ */
+static void
+dr_board_discovery(dr_board_t *bp)
+{
+	int			i;
+	dr_devset_t		devs_lost, devs_attached = 0;
+	dr_cpu_unit_t		*cp;
+	dr_mem_unit_t		*mp;
+	dr_io_unit_t		*ip;
+	static fn_t		f = "dr_board_discovery";
+
+	if (DR_DEVS_PRESENT(bp) == 0) {
+		PR_ALL("%s: board %d has no devices present\n",
+		    f, bp->b_num);
+		return;
+	}
+
+	/*
+	 * Check for existence of cpus.
+	 */
+	for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
+		cp = dr_get_cpu_unit(bp, i);
+
+		if (!DR_DEV_IS_PRESENT(&cp->sbc_cm))
+			continue;
+
+		if (dr_check_unit_attached(&cp->sbc_cm) >= 0) {
+			DR_DEV_SET_ATTACHED(&cp->sbc_cm);
+			DEVSET_ADD(devs_attached, SBD_COMP_CPU, i);
+			PR_ALL("%s: board %d, cpu-unit %d - attached\n",
+			    f, bp->b_num, i);
+		}
+		dr_init_cpu_unit(cp);
+	}
+
+	/*
+	 * Check for existence of memory.
+	 */
+	for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
+		mp = dr_get_mem_unit(bp, i);
+
+		if (!DR_DEV_IS_PRESENT(&mp->sbm_cm))
+			continue;
+
+		if (dr_check_unit_attached(&mp->sbm_cm) >= 0) {
+			DR_DEV_SET_ATTACHED(&mp->sbm_cm);
+			DEVSET_ADD(devs_attached, SBD_COMP_MEM, i);
+			PR_ALL("%s: board %d, mem-unit %d - attached\n",
+			    f, bp->b_num, i);
+		}
+		dr_init_mem_unit(mp);
+	}
+
+	/*
+	 * Check for i/o state.
+	 */
+	for (i = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+		ip = dr_get_io_unit(bp, i);
+
+		if (!DR_DEV_IS_PRESENT(&ip->sbi_cm))
+			continue;
+
+		if (dr_check_unit_attached(&ip->sbi_cm) >= 0) {
+			/*
+			 * Found it!
+			 */
+			DR_DEV_SET_ATTACHED(&ip->sbi_cm);
+			DEVSET_ADD(devs_attached, SBD_COMP_IO, i);
+			PR_ALL("%s: board %d, io-unit %d - attached\n",
+			    f, bp->b_num, i);
+		}
+		dr_init_io_unit(ip);
+	}
+
+	DR_DEVS_CONFIGURE(bp, devs_attached);
+	if (devs_attached && ((devs_lost = DR_DEVS_UNATTACHED(bp)) != 0)) {
+		int		ut;
+
+		/*
+		 * It is not legal on board discovery to have a
+		 * board that is only partially attached.  A board
+		 * is either all attached or all connected.  If a
+		 * board has at least one attached device, then
+		 * the the remaining devices, if any, must have
+		 * been lost or disconnected.  These devices can
+		 * only be recovered by a full attach from scratch.
+		 * Note that devices previously in the unreferenced
+		 * state are subsequently lost until the next full
+		 * attach.  This is necessary since the driver unload
+		 * that must have occurred would have wiped out the
+		 * information necessary to re-configure the device
+		 * back online, e.g. memlist.
+		 */
+		PR_ALL("%s: some devices LOST (" DEVSET_FMT_STR ")...\n",
+		    f, DEVSET_FMT_ARG(devs_lost));
+
+		for (ut = 0; ut < MAX_CPU_UNITS_PER_BOARD; ut++) {
+			if (!DEVSET_IN_SET(devs_lost, SBD_COMP_CPU, ut))
+				continue;
+
+			cp = dr_get_cpu_unit(bp, ut);
+			dr_device_transition(&cp->sbc_cm, DR_STATE_EMPTY);
+		}
+
+		for (ut = 0; ut < MAX_MEM_UNITS_PER_BOARD; ut++) {
+			if (!DEVSET_IN_SET(devs_lost, SBD_COMP_MEM, ut))
+				continue;
+
+			mp = dr_get_mem_unit(bp, ut);
+			dr_device_transition(&mp->sbm_cm, DR_STATE_EMPTY);
+		}
+
+		for (ut = 0; ut < MAX_IO_UNITS_PER_BOARD; ut++) {
+			if (!DEVSET_IN_SET(devs_lost, SBD_COMP_IO, ut))
+				continue;
+
+			ip = dr_get_io_unit(bp, ut);
+			dr_device_transition(&ip->sbi_cm, DR_STATE_EMPTY);
+		}
+
+		DR_DEVS_DISCONNECT(bp, devs_lost);
+	}
+}
+
+static int
+dr_board_init(dr_board_t *bp, dev_info_t *dip, int bd)
+{
+	sbd_error_t	*err;
+
+	mutex_init(&bp->b_lock, NULL, MUTEX_DRIVER, NULL);
+	mutex_init(&bp->b_slock, NULL, MUTEX_DRIVER, NULL);
+	cv_init(&bp->b_scv, NULL, CV_DRIVER, NULL);
+	bp->b_rstate = SBD_STAT_EMPTY;
+	bp->b_ostate = SBD_STAT_UNCONFIGURED;
+	bp->b_cond = SBD_COND_UNKNOWN;
+	(void) drv_getparm(TIME, (void *)&bp->b_time);
+
+	(void) drmach_board_lookup(bd, &bp->b_id);
+	bp->b_num = bd;
+	bp->b_dip = dip;
+
+	bp->b_dev[DEVSET_NIX(SBD_COMP_CPU)] = GETSTRUCT(dr_dev_unit_t,
+	    MAX_CPU_UNITS_PER_BOARD);
+
+	bp->b_dev[DEVSET_NIX(SBD_COMP_MEM)] = GETSTRUCT(dr_dev_unit_t,
+	    MAX_MEM_UNITS_PER_BOARD);
+
+	bp->b_dev[DEVSET_NIX(SBD_COMP_IO)] = GETSTRUCT(dr_dev_unit_t,
+	    MAX_IO_UNITS_PER_BOARD);
+
+	/*
+	 * Initialize the devlists
+	 */
+	err = dr_init_devlists(bp);
+	if (err) {
+		sbd_err_clear(&err);
+		dr_board_destroy(bp);
+		return (-1);
+	} else if (bp->b_ndev == 0) {
+		dr_board_transition(bp, DR_STATE_EMPTY);
+	} else {
+		/*
+		 * Couldn't have made it down here without
+		 * having found at least one device.
+		 */
+		ASSERT(DR_DEVS_PRESENT(bp) != 0);
+		/*
+		 * Check the state of any possible devices on the
+		 * board.
+		 */
+		dr_board_discovery(bp);
+
+		bp->b_assigned = 1;
+
+		if (DR_DEVS_UNATTACHED(bp) == 0) {
+			/*
+			 * The board has no unattached devices, therefore
+			 * by reason of insanity it must be configured!
+			 */
+			dr_board_transition(bp, DR_STATE_CONFIGURED);
+			bp->b_ostate = SBD_STAT_CONFIGURED;
+			bp->b_rstate = SBD_STAT_CONNECTED;
+			bp->b_cond = SBD_COND_OK;
+			(void) drv_getparm(TIME, (void *)&bp->b_time);
+		} else if (DR_DEVS_ATTACHED(bp)) {
+			dr_board_transition(bp, DR_STATE_PARTIAL);
+			bp->b_ostate = SBD_STAT_CONFIGURED;
+			bp->b_rstate = SBD_STAT_CONNECTED;
+			bp->b_cond = SBD_COND_OK;
+			(void) drv_getparm(TIME, (void *)&bp->b_time);
+		} else {
+			dr_board_transition(bp, DR_STATE_CONNECTED);
+			bp->b_rstate = SBD_STAT_CONNECTED;
+			(void) drv_getparm(TIME, (void *)&bp->b_time);
+		}
+	}
+
+	return (0);
+}
+
+static void
+dr_board_destroy(dr_board_t *bp)
+{
+	PR_ALL("dr_board_destroy: num %d, path %s\n",
+	    bp->b_num, bp->b_path);
+
+	dr_board_transition(bp, DR_STATE_EMPTY);
+	bp->b_rstate = SBD_STAT_EMPTY;
+	(void) drv_getparm(TIME, (void *)&bp->b_time);
+
+	/*
+	 * Free up MEM unit structs.
+	 */
+	FREESTRUCT(bp->b_dev[DEVSET_NIX(SBD_COMP_MEM)],
+	    dr_dev_unit_t, MAX_MEM_UNITS_PER_BOARD);
+	bp->b_dev[DEVSET_NIX(SBD_COMP_MEM)] = NULL;
+	/*
+	 * Free up CPU unit structs.
+	 */
+	FREESTRUCT(bp->b_dev[DEVSET_NIX(SBD_COMP_CPU)],
+	    dr_dev_unit_t, MAX_CPU_UNITS_PER_BOARD);
+	bp->b_dev[DEVSET_NIX(SBD_COMP_CPU)] = NULL;
+	/*
+	 * Free up IO unit structs.
+	 */
+	FREESTRUCT(bp->b_dev[DEVSET_NIX(SBD_COMP_IO)],
+	    dr_dev_unit_t, MAX_IO_UNITS_PER_BOARD);
+	bp->b_dev[DEVSET_NIX(SBD_COMP_IO)] = NULL;
+
+	mutex_destroy(&bp->b_lock);
+	mutex_destroy(&bp->b_slock);
+	cv_destroy(&bp->b_scv);
+
+	/*
+	 * Reset the board structure to its initial state, otherwise it will
+	 * cause trouble on the next call to dr_board_init() for the same board.
+	 * dr_board_init() may be called multiple times for the same board
+	 * if DR driver fails to initialize some boards.
+	 */
+	bzero(bp, sizeof (*bp));
+}
+
+void
+dr_lock_status(dr_board_t *bp)
+{
+	mutex_enter(&bp->b_slock);
+	while (bp->b_sflags & DR_BSLOCK)
+		cv_wait(&bp->b_scv, &bp->b_slock);
+	bp->b_sflags |= DR_BSLOCK;
+	mutex_exit(&bp->b_slock);
+}
+
+void
+dr_unlock_status(dr_board_t *bp)
+{
+	mutex_enter(&bp->b_slock);
+	bp->b_sflags &= ~DR_BSLOCK;
+	cv_signal(&bp->b_scv);
+	mutex_exit(&bp->b_slock);
+}
+
+/*
+ * Extract flags passed via ioctl.
+ */
+int
+dr_cmd_flags(dr_handle_t *hp)
+{
+	return (hp->h_sbdcmd.cmd_cm.c_flags);
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr.conf b/usr/src/uts/i86pc/io/dr/dr.conf
new file mode 100644
index 0000000000..c694f5573f
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2009, Intel Corporation.
+# All rights reserved.
+#
+
+# Force attach driver to enable DR at boot time.
+ddi-forceattach=1;
diff --git a/usr/src/uts/i86pc/io/dr/dr_cpu.c b/usr/src/uts/i86pc/io/dr/dr_cpu.c
new file mode 100644
index 0000000000..a893310977
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_cpu.c
@@ -0,0 +1,885 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
+ * CPU support routines for DR
+ */
+
+#include <sys/note.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/dditypes.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/kmem.h>
+#include <sys/processor.h>
+#include <sys/cpuvar.h>
+#include <sys/promif.h>
+#include <sys/sysmacros.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+#include <sys/cpu_module.h>
+#include <sys/cmn_err.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+
+/* for the DR*INTERNAL_ERROR macros.  see sys/dr.h. */
+static char *dr_ie_fmt = "dr_cpu.c %d";
+
+int
+dr_cpu_unit_is_sane(dr_board_t *bp, dr_cpu_unit_t *cp)
+{
+#ifdef DEBUG
+	ASSERT(cp->sbc_cm.sbdev_bp == bp);
+	ASSERT(cp->sbc_cm.sbdev_type == SBD_COMP_CPU);
+#else
+	_NOTE(ARGUNUSED(bp))
+	_NOTE(ARGUNUSED(cp))
+#endif
+
+	return (1);
+}
+
+static int
+dr_errno2ecode(int error)
+{
+	int	rv;
+
+	switch (error) {
+	case EBUSY:
+		rv = ESBD_BUSY;
+		break;
+	case EINVAL:
+		rv = ESBD_INVAL;
+		break;
+	case EALREADY:
+		rv = ESBD_ALREADY;
+		break;
+	case ENODEV:
+		rv = ESBD_NODEV;
+		break;
+	case ENOMEM:
+		rv = ESBD_NOMEM;
+		break;
+	default:
+		rv = ESBD_INVAL;
+	}
+
+	return (rv);
+}
+
+/*
+ * On x86, the "clock-frequency" and cache size device properties may be
+ * unavailable before CPU starts. If they are unavailabe, just set them to zero.
+ */
+static void
+dr_cpu_set_prop(dr_cpu_unit_t *cp)
+{
+	sbd_error_t	*err;
+	dev_info_t	*dip;
+	uint64_t	clock_freq;
+	int		ecache_size = 0;
+	char		*cache_str = NULL;
+
+	err = drmach_get_dip(cp->sbc_cm.sbdev_id, &dip);
+	if (err) {
+		DRERR_SET_C(&cp->sbc_cm.sbdev_error, &err);
+		return;
+	}
+
+	if (dip == NULL) {
+		DR_DEV_INTERNAL_ERROR(&cp->sbc_cm);
+		return;
+	}
+
+	/* read in the CPU speed */
+	clock_freq = (unsigned int)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+	    DDI_PROP_DONTPASS, "clock-frequency", 0);
+
+	/*
+	 * The ecache property string is not the same
+	 * for all CPU implementations.
+	 */
+	switch (cp->sbc_cpu_impl) {
+	case X86_CPU_IMPL_NEHALEM_EX:
+		cache_str = "l3-cache-size";
+		break;
+	default:
+		cmn_err(CE_WARN, "Unknown cpu implementation=0x%x",
+		    cp->sbc_cpu_impl);
+		break;
+	}
+
+	if (cache_str != NULL) {
+		/* read in the ecache size */
+		/*
+		 * If the property is not found in the CPU node,
+		 * it has to be kept in the core or cmp node so
+		 * we just keep looking.
+		 */
+
+		ecache_size = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
+		    cache_str, 0);
+	}
+
+	/* convert to the proper units */
+	cp->sbc_speed = (clock_freq + 500000) / 1000000;
+	cp->sbc_ecache = ecache_size / (1024 * 1024);
+}
+
+void
+dr_init_cpu_unit(dr_cpu_unit_t *cp)
+{
+	sbd_error_t	*err;
+	dr_state_t	new_state;
+	int		cpuid;
+	int		impl;
+
+	if (DR_DEV_IS_ATTACHED(&cp->sbc_cm)) {
+		new_state = DR_STATE_CONFIGURED;
+		cp->sbc_cm.sbdev_cond = SBD_COND_OK;
+	} else if (DR_DEV_IS_PRESENT(&cp->sbc_cm)) {
+		new_state = DR_STATE_CONNECTED;
+		cp->sbc_cm.sbdev_cond = SBD_COND_OK;
+	} else {
+		new_state = DR_STATE_EMPTY;
+		cp->sbc_cm.sbdev_cond = SBD_COND_UNKNOWN;
+	}
+
+	if (DR_DEV_IS_PRESENT(&cp->sbc_cm)) {
+		err = drmach_cpu_get_id(cp->sbc_cm.sbdev_id, &cpuid);
+		if (err) {
+			DRERR_SET_C(&cp->sbc_cm.sbdev_error, &err);
+			new_state = DR_STATE_FATAL;
+			goto done;
+		}
+
+		err = drmach_cpu_get_impl(cp->sbc_cm.sbdev_id, &impl);
+		if (err) {
+			DRERR_SET_C(&cp->sbc_cm.sbdev_error, &err);
+			new_state = DR_STATE_FATAL;
+			goto done;
+		}
+	} else {
+		cp->sbc_cpu_id = -1;
+		cp->sbc_cpu_impl = -1;
+		goto done;
+	}
+
+	cp->sbc_cpu_id = cpuid;
+	cp->sbc_cpu_impl = impl;
+
+	/* if true at init time, it must always be true */
+	ASSERT(dr_cpu_unit_is_sane(cp->sbc_cm.sbdev_bp, cp));
+
+	mutex_enter(&cpu_lock);
+	if ((cpuid >= 0) && cpu[cpuid])
+		cp->sbc_cpu_flags = cpu[cpuid]->cpu_flags;
+	else
+		cp->sbc_cpu_flags = P_OFFLINE | P_POWEROFF;
+	mutex_exit(&cpu_lock);
+
+	dr_cpu_set_prop(cp);
+
+done:
+	/* delay transition until fully initialized */
+	dr_device_transition(&cp->sbc_cm, new_state);
+}
+
+int
+dr_pre_attach_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	int		i;
+	static fn_t	f = "dr_pre_attach_cpu";
+
+	PR_CPU("%s...\n", f);
+
+	for (i = 0; i < devnum; i++) {
+		dr_cpu_unit_t *up = (dr_cpu_unit_t *)devlist[i];
+
+		ASSERT(dr_cpu_unit_is_sane(hp->h_bd, up));
+
+		/*
+		 * Print a console message for each attachment
+		 * point. For CMP devices, this means that only
+		 * one message should be printed, no matter how
+		 * many cores are actually present.
+		 */
+		if ((up->sbc_cm.sbdev_unum % MAX_CORES_PER_CMP) == 0) {
+			cmn_err(CE_CONT, "OS configure %s",
+			    up->sbc_cm.sbdev_path);
+		}
+	}
+
+	/*
+	 * Block out status threads while creating
+	 * devinfo tree branches
+	 */
+	dr_lock_status(hp->h_bd);
+	ndi_devi_enter(ddi_root_node(), (int *)(&hp->h_ndi));
+	mutex_enter(&cpu_lock);
+
+	return (0);
+}
+
+/*ARGSUSED*/
+void
+dr_attach_cpu(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+	sbd_error_t	*err;
+	processorid_t	 cpuid;
+	int		 rv;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	err = drmach_configure(cp->sbdev_id, 0);
+	if (err) {
+		DRERR_SET_C(&cp->sbdev_error, &err);
+		return;
+	}
+
+	err = drmach_cpu_get_id(cp->sbdev_id, &cpuid);
+	if (err) {
+		DRERR_SET_C(&cp->sbdev_error, &err);
+
+		err = drmach_unconfigure(cp->sbdev_id, DEVI_BRANCH_DESTROY);
+		if (err)
+			sbd_err_clear(&err);
+	} else if ((rv = cpu_configure(cpuid)) != 0) {
+		dr_dev_err(CE_WARN, cp, dr_errno2ecode(rv));
+		err = drmach_unconfigure(cp->sbdev_id, DEVI_BRANCH_DESTROY);
+		if (err)
+			sbd_err_clear(&err);
+	} else {
+		dr_cpu_unit_t *up = (dr_cpu_unit_t *)cp;
+		up->sbc_cpu_id = cpuid;
+	}
+}
+
+/*
+ * dr_post_attach_cpu
+ *
+ * sbd error policy: Does not stop on error.  Processes all units in list.
+ */
+int
+dr_post_attach_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	int		i;
+	int		errflag = 0;
+	static fn_t	f = "dr_post_attach_cpu";
+
+	PR_CPU("%s...\n", f);
+
+	/* Startup and online newly-attached CPUs */
+	for (i = 0; i < devnum; i++) {
+		dr_cpu_unit_t *up = (dr_cpu_unit_t *)devlist[i];
+		struct cpu	*cp;
+
+		ASSERT(dr_cpu_unit_is_sane(hp->h_bd, up));
+
+		cp = cpu_get(up->sbc_cpu_id);
+		if (cp == NULL) {
+			cmn_err(CE_WARN, "%s: cpu_get failed for cpu %d",
+			    f, up->sbc_cpu_id);
+			continue;
+		}
+
+		if (cpu_is_poweredoff(cp)) {
+			if (cpu_poweron(cp) != 0) {
+				dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_CPUSTART);
+				errflag = 1;
+			}
+			PR_CPU("%s: cpu %d powered ON\n", f, up->sbc_cpu_id);
+		}
+
+		if (cpu_is_offline(cp)) {
+			PR_CPU("%s: onlining cpu %d...\n", f, up->sbc_cpu_id);
+
+			if (cpu_online(cp) != 0) {
+				dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_ONLINE);
+				errflag = 1;
+			}
+		}
+
+	}
+
+	mutex_exit(&cpu_lock);
+	ndi_devi_exit(ddi_root_node(), hp->h_ndi);
+	dr_unlock_status(hp->h_bd);
+
+	if (errflag)
+		return (-1);
+	else
+		return (0);
+}
+
+/*
+ * dr_pre_release_cpu
+ *
+ * sbd error policy: Stops on first error.
+ */
+int
+dr_pre_release_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	int		c, cix, i, lastoffline = -1, rv = 0;
+	processorid_t	cpuid;
+	struct cpu	*cp;
+	dr_cpu_unit_t	*up;
+	dr_devset_t	devset;
+	sbd_dev_stat_t	*ds;
+	static fn_t	f = "dr_pre_release_cpu";
+	int		cpu_flags = 0;
+
+	devset = DR_DEVS_PRESENT(hp->h_bd);
+
+	/* allocate status struct storage. */
+	ds = (sbd_dev_stat_t *) kmem_zalloc(sizeof (sbd_dev_stat_t) *
+	    MAX_CPU_UNITS_PER_BOARD, KM_SLEEP);
+
+	cix = dr_cpu_status(hp, devset, ds);
+
+	mutex_enter(&cpu_lock);
+
+	for (i = 0; i < devnum; i++) {
+		up = (dr_cpu_unit_t *)devlist[i];
+		if (!DR_DEV_IS_ATTACHED(&up->sbc_cm)) {
+			continue;
+		}
+		ASSERT(dr_cpu_unit_is_sane(hp->h_bd, up));
+
+		/*
+		 * On x86 systems, some CPUs can't be unconfigured.
+		 * For example, CPU0 can't be unconfigured because many other
+		 * components have a dependency on it.
+		 * This check determines if a CPU is currently in use and
+		 * returns a "Device busy" error if so.
+		 */
+		for (c = 0; c < cix; c++) {
+			if (ds[c].d_cpu.cs_unit == up->sbc_cm.sbdev_unum) {
+				if (ds[c].d_cpu.cs_busy) {
+					dr_dev_err(CE_WARN, &up->sbc_cm,
+					    ESBD_BUSY);
+					rv = -1;
+					break;
+				}
+			}
+		}
+		if (c < cix)
+			break;
+
+		cpuid = up->sbc_cpu_id;
+		if ((cp = cpu_get(cpuid)) == NULL) {
+			dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_OFFLINE);
+			rv = -1;
+			break;
+		}
+
+		/* used by dr_cancel_cpu during error flow */
+		up->sbc_cpu_flags = cp->cpu_flags;
+
+		if (CPU_ACTIVE(cp)) {
+			if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
+				cpu_flags = CPU_FORCED;
+
+			PR_CPU("%s: offlining cpu %d\n", f, cpuid);
+			if (cpu_offline(cp, cpu_flags)) {
+				PR_CPU("%s: failed to offline cpu %d\n", f,
+				    cpuid);
+				dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_OFFLINE);
+				if (disp_bound_threads(cp, 0)) {
+					cmn_err(CE_WARN, "%s: thread(s) bound "
+					    "to cpu %d", f, cp->cpu_id);
+				}
+				rv = -1;
+				break;
+			} else
+				lastoffline = i;
+		}
+
+		if (!rv) {
+			sbd_error_t *err;
+
+			err = drmach_release(up->sbc_cm.sbdev_id);
+			if (err) {
+				DRERR_SET_C(&up->sbc_cm.sbdev_error, &err);
+				rv = -1;
+				break;
+			}
+		}
+	}
+
+	mutex_exit(&cpu_lock);
+
+	if (rv) {
+		/*
+		 * Need to unwind others since at this level (pre-release)
+		 * the device state has not yet transitioned and failures
+		 * will prevent us from reaching the "post" release
+		 * function where states are normally transitioned.
+		 */
+		for (i = lastoffline; i >= 0; i--) {
+			up = (dr_cpu_unit_t *)devlist[i];
+			(void) dr_cancel_cpu(up);
+		}
+	}
+
+	kmem_free(ds, sizeof (sbd_dev_stat_t) * MAX_CPU_UNITS_PER_BOARD);
+	return (rv);
+}
+
+/*
+ * dr_pre_detach_cpu
+ *
+ * sbd error policy: Stops on first error.
+ */
+int
+dr_pre_detach_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	_NOTE(ARGUNUSED(hp))
+
+	int		i;
+	int		cpu_flags = 0;
+	static fn_t	f = "dr_pre_detach_cpu";
+
+	PR_CPU("%s...\n", f);
+
+	/*
+	 * Block out status threads while destroying devinfo tree
+	 * branches
+	 */
+	dr_lock_status(hp->h_bd);
+	mutex_enter(&cpu_lock);
+
+	for (i = 0; i < devnum; i++) {
+		dr_cpu_unit_t *up = (dr_cpu_unit_t *)devlist[i];
+		struct cpu	*cp;
+
+		if (!DR_DEV_IS_ATTACHED(&up->sbc_cm)) {
+			continue;
+		}
+
+		ASSERT(dr_cpu_unit_is_sane(hp->h_bd, up));
+
+		cp = cpu_get(up->sbc_cpu_id);
+		if (cp == NULL)
+			continue;
+
+		/*
+		 * Print a console message for each attachment
+		 * point. For CMP devices, this means that only
+		 * one message should be printed, no matter how
+		 * many cores are actually present.
+		 */
+		if ((up->sbc_cm.sbdev_unum % MAX_CORES_PER_CMP) == 0) {
+			cmn_err(CE_CONT, "OS unconfigure %s\n",
+			    up->sbc_cm.sbdev_path);
+		}
+
+		/*
+		 * CPUs were offlined during Release.
+		 */
+		if (cpu_is_poweredoff(cp)) {
+			PR_CPU("%s: cpu %d already powered OFF\n",
+			    f, up->sbc_cpu_id);
+			continue;
+		}
+
+		if (!cpu_is_offline(cp)) {
+			if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
+				cpu_flags = CPU_FORCED;
+			/* cpu was onlined after release.  Offline it again */
+			PR_CPU("%s: offlining cpu %d\n", f, up->sbc_cpu_id);
+			if (cpu_offline(cp, cpu_flags)) {
+				PR_CPU("%s: failed to offline cpu %d\n",
+				    f, up->sbc_cpu_id);
+				dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_OFFLINE);
+				if (disp_bound_threads(cp, 0)) {
+					cmn_err(CE_WARN, "%s: thread(s) bound "
+					    "to cpu %d", f, cp->cpu_id);
+				}
+				goto err;
+			}
+		}
+		if (cpu_poweroff(cp) != 0) {
+			dr_dev_err(CE_WARN, &up->sbc_cm, ESBD_CPUSTOP);
+			goto err;
+		} else {
+			PR_CPU("%s: cpu %d powered OFF\n", f, up->sbc_cpu_id);
+		}
+	}
+
+	return (0);
+
+err:
+	mutex_exit(&cpu_lock);
+	dr_unlock_status(hp->h_bd);
+	return (-1);
+}
+
+/*ARGSUSED*/
+void
+dr_detach_cpu(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+	sbd_error_t	*err;
+	processorid_t	 cpuid;
+	int		 rv;
+	dr_cpu_unit_t	*up = (dr_cpu_unit_t *)cp;
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	if (!DR_DEV_IS_ATTACHED(&up->sbc_cm)) {
+		return;
+	}
+
+	err = drmach_cpu_get_id(cp->sbdev_id, &cpuid);
+	if (err) {
+		DRERR_SET_C(&cp->sbdev_error, &err);
+	} else if ((rv = cpu_unconfigure(cpuid)) != 0) {
+		dr_dev_err(CE_IGNORE, cp, dr_errno2ecode(rv));
+	} else {
+		err = drmach_unconfigure(cp->sbdev_id, DEVI_BRANCH_DESTROY);
+		if (err) {
+			DRERR_SET_C(&cp->sbdev_error, &err);
+		} else {
+			up->sbc_cpu_id = -1;
+		}
+	}
+}
+
+/*ARGSUSED1*/
+int
+dr_post_detach_cpu(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	static fn_t	f = "dr_post_detach_cpu";
+
+	PR_CPU("%s...\n", f);
+	hp->h_ndi = 0;
+
+	mutex_exit(&cpu_lock);
+	dr_unlock_status(hp->h_bd);
+
+	return (0);
+}
+
+static void
+dr_fill_cpu_stat(dr_cpu_unit_t *cp, drmach_status_t *pstat, sbd_cpu_stat_t *csp)
+{
+	ASSERT(cp && pstat && csp);
+
+	/* Fill in the common status information */
+	bzero((caddr_t)csp, sizeof (*csp));
+	csp->cs_type = cp->sbc_cm.sbdev_type;
+	csp->cs_unit = cp->sbc_cm.sbdev_unum;
+	(void) strlcpy(csp->cs_name, pstat->type, sizeof (csp->cs_name));
+	csp->cs_cond = cp->sbc_cm.sbdev_cond;
+	csp->cs_busy = cp->sbc_cm.sbdev_busy | pstat->busy;
+	csp->cs_time = cp->sbc_cm.sbdev_time;
+	csp->cs_ostate = cp->sbc_cm.sbdev_ostate;
+	csp->cs_suspend = 0;
+
+	/* CPU specific status data */
+	csp->cs_cpuid = cp->sbc_cpu_id;
+
+	/*
+	 * If the speed and ecache properties have not been
+	 * cached yet, read them in from the device tree.
+	 */
+	if ((cp->sbc_speed == 0) || (cp->sbc_ecache == 0))
+		dr_cpu_set_prop(cp);
+
+	/* use the cached speed and ecache values */
+	csp->cs_speed = cp->sbc_speed;
+	csp->cs_ecache = cp->sbc_ecache;
+
+	mutex_enter(&cpu_lock);
+	if (!cpu_get(csp->cs_cpuid)) {
+		/* ostate must be UNCONFIGURED */
+		csp->cs_cm.c_ostate = SBD_STAT_UNCONFIGURED;
+	}
+	mutex_exit(&cpu_lock);
+}
+
+/*ARGSUSED2*/
+static void
+dr_fill_cmp_stat(sbd_cpu_stat_t *csp, int ncores, int impl, sbd_cmp_stat_t *psp)
+{
+	int	core;
+
+	ASSERT(csp && psp && (ncores >= 1));
+
+	bzero((caddr_t)psp, sizeof (*psp));
+
+	/*
+	 * Fill in the common status information based
+	 * on the data for the first core.
+	 */
+	psp->ps_type = SBD_COMP_CMP;
+	psp->ps_unit = DR_UNUM2SBD_UNUM(csp->cs_unit, SBD_COMP_CMP);
+	(void) strlcpy(psp->ps_name, csp->cs_name, sizeof (psp->ps_name));
+	psp->ps_cond = csp->cs_cond;
+	psp->ps_busy = csp->cs_busy;
+	psp->ps_time = csp->cs_time;
+	psp->ps_ostate = csp->cs_ostate;
+	psp->ps_suspend = csp->cs_suspend;
+
+	/* CMP specific status data */
+	*psp->ps_cpuid = csp->cs_cpuid;
+	psp->ps_ncores = 1;
+	psp->ps_speed = csp->cs_speed;
+	psp->ps_ecache = csp->cs_ecache;
+
+	/*
+	 * Walk through the data for the remaining cores.
+	 * Make any adjustments to the common status data,
+	 * or the shared CMP specific data if necessary.
+	 */
+	for (core = 1; core < ncores; core++) {
+		/*
+		 * The following properties should be the same
+		 * for all the cores of the CMP.
+		 */
+		ASSERT(psp->ps_unit == DR_UNUM2SBD_UNUM(csp[core].cs_unit,
+		    SBD_COMP_CMP));
+
+		if (csp[core].cs_speed > psp->ps_speed)
+			psp->ps_speed = csp[core].cs_speed;
+		if (csp[core].cs_ecache > psp->ps_ecache)
+			psp->ps_ecache = csp[core].cs_ecache;
+
+		psp->ps_cpuid[core] = csp[core].cs_cpuid;
+		psp->ps_ncores++;
+
+		/* adjust time if necessary */
+		if (csp[core].cs_time > psp->ps_time) {
+			psp->ps_time = csp[core].cs_time;
+		}
+
+		psp->ps_busy |= csp[core].cs_busy;
+
+		/*
+		 * If any of the cores are configured, the
+		 * entire CMP is marked as configured.
+		 */
+		if (csp[core].cs_ostate == SBD_STAT_CONFIGURED) {
+			psp->ps_ostate = csp[core].cs_ostate;
+		}
+	}
+}
+
+int
+dr_cpu_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
+{
+	int		cmp;
+	int		core;
+	int		ncpu;
+	dr_board_t	*bp;
+	sbd_cpu_stat_t	*cstat;
+	int		impl;
+
+	bp = hp->h_bd;
+	ncpu = 0;
+
+	devset &= DR_DEVS_PRESENT(bp);
+	cstat = kmem_zalloc(sizeof (sbd_cpu_stat_t) * MAX_CORES_PER_CMP,
+	    KM_SLEEP);
+
+	/*
+	 * Treat every CPU as a CMP. In the case where the
+	 * device is not a CMP, treat it as a CMP with only
+	 * one core.
+	 */
+	for (cmp = 0; cmp < MAX_CMP_UNITS_PER_BOARD; cmp++) {
+		int		ncores;
+		dr_cpu_unit_t	*cp;
+		drmach_status_t	pstat;
+		sbd_error_t	*err;
+		sbd_cmp_stat_t	*psp;
+
+		if ((devset & DEVSET(SBD_COMP_CMP, cmp)) == 0) {
+			continue;
+		}
+
+		ncores = 0;
+
+		for (core = 0; core < MAX_CORES_PER_CMP; core++) {
+
+			cp = dr_get_cpu_unit(bp, DR_CMP_CORE_UNUM(cmp, core));
+
+			if (cp->sbc_cm.sbdev_state == DR_STATE_EMPTY) {
+				/* present, but not fully initialized */
+				continue;
+			}
+
+			ASSERT(dr_cpu_unit_is_sane(hp->h_bd, cp));
+
+			/* skip if not present */
+			if (cp->sbc_cm.sbdev_id == (drmachid_t)0) {
+				continue;
+			}
+
+			/* fetch platform status */
+			err = drmach_status(cp->sbc_cm.sbdev_id, &pstat);
+			if (err) {
+				DRERR_SET_C(&cp->sbc_cm.sbdev_error, &err);
+				continue;
+			}
+
+			dr_fill_cpu_stat(cp, &pstat, &cstat[ncores++]);
+			/*
+			 * We should set impl here because the last core
+			 * found might be EMPTY or not present.
+			 */
+			impl = cp->sbc_cpu_impl;
+		}
+
+		if (ncores == 0) {
+			continue;
+		}
+
+		/*
+		 * Store the data to the outgoing array. If the
+		 * device is a CMP, combine all the data for the
+		 * cores into a single stat structure.
+		 *
+		 * The check for a CMP device uses the last core
+		 * found, assuming that all cores will have the
+		 * same implementation.
+		 */
+		if (CPU_IMPL_IS_CMP(impl)) {
+			psp = (sbd_cmp_stat_t *)dsp;
+			dr_fill_cmp_stat(cstat, ncores, impl, psp);
+		} else {
+			ASSERT(ncores == 1);
+			bcopy(cstat, dsp, sizeof (sbd_cpu_stat_t));
+		}
+
+		dsp++;
+		ncpu++;
+	}
+
+	kmem_free(cstat, sizeof (sbd_cpu_stat_t) * MAX_CORES_PER_CMP);
+
+	return (ncpu);
+}
+
+/*
+ * Cancel previous release operation for cpu.
+ * For cpus this means simply bringing cpus that
+ * were offline back online.  Note that they had
+ * to have been online at the time there were
+ * released.
+ */
+int
+dr_cancel_cpu(dr_cpu_unit_t *up)
+{
+	int		rv = 0;
+	static fn_t	f = "dr_cancel_cpu";
+
+	ASSERT(dr_cpu_unit_is_sane(up->sbc_cm.sbdev_bp, up));
+
+	if (cpu_flagged_active(up->sbc_cpu_flags)) {
+		struct cpu	*cp;
+
+		/*
+		 * CPU had been online, go ahead
+		 * bring it back online.
+		 */
+		PR_CPU("%s: bringing cpu %d back ONLINE\n", f, up->sbc_cpu_id);
+
+		mutex_enter(&cpu_lock);
+		cp = cpu[up->sbc_cpu_id];
+
+		if (cpu_is_poweredoff(cp)) {
+			if (cpu_poweron(cp)) {
+				cmn_err(CE_WARN, "%s: failed to power-on "
+				    "cpu %d", f, up->sbc_cpu_id);
+				rv = -1;
+			}
+		}
+
+		if (rv == 0 && cpu_is_offline(cp)) {
+			if (cpu_online(cp)) {
+				cmn_err(CE_WARN, "%s: failed to online cpu %d",
+				    f, up->sbc_cpu_id);
+				rv = -1;
+			}
+		}
+
+		if (rv == 0 && cpu_is_online(cp)) {
+			if (cpu_flagged_nointr(up->sbc_cpu_flags)) {
+				if (cpu_intr_disable(cp) != 0) {
+					cmn_err(CE_WARN, "%s: failed to "
+					    "disable interrupts on cpu %d", f,
+					    up->sbc_cpu_id);
+				}
+			}
+		}
+
+		mutex_exit(&cpu_lock);
+	}
+
+	return (rv);
+}
+
+int
+dr_disconnect_cpu(dr_cpu_unit_t *up)
+{
+	sbd_error_t	*err;
+	static fn_t	f = "dr_disconnect_cpu";
+
+	PR_CPU("%s...\n", f);
+
+	ASSERT((up->sbc_cm.sbdev_state == DR_STATE_CONNECTED) ||
+	    (up->sbc_cm.sbdev_state == DR_STATE_UNCONFIGURED));
+
+	ASSERT(dr_cpu_unit_is_sane(up->sbc_cm.sbdev_bp, up));
+
+	if (up->sbc_cm.sbdev_state == DR_STATE_CONNECTED) {
+		/*
+		 * Cpus were never brought in and so are still
+		 * effectively disconnected, so nothing to do here.
+		 */
+		PR_CPU("%s: cpu %d never brought in\n", f, up->sbc_cpu_id);
+		return (0);
+	}
+
+	err = drmach_cpu_disconnect(up->sbc_cm.sbdev_id);
+	if (err == NULL)
+		return (0);
+	else {
+		DRERR_SET_C(&up->sbc_cm.sbdev_error, &err);
+		return (-1);
+	}
+	/*NOTREACHED*/
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr_io.c b/usr/src/uts/i86pc/io/dr/dr_io.c
new file mode 100644
index 0000000000..96e0e458cb
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_io.c
@@ -0,0 +1,389 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * I/O support routines for DR
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/dditypes.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/kmem.h>
+#include <sys/promif.h>
+#include <sys/sysmacros.h>
+#include <sys/archsystm.h>
+#include <sys/machsystm.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+#include <sys/drmach.h>
+
+void
+dr_init_io_unit(dr_io_unit_t *ip)
+{
+	dr_state_t	new_state;
+
+	if (DR_DEV_IS_ATTACHED(&ip->sbi_cm)) {
+		new_state = DR_STATE_CONFIGURED;
+		ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+	} else if (DR_DEV_IS_PRESENT(&ip->sbi_cm)) {
+		new_state = DR_STATE_CONNECTED;
+		ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+	} else {
+		new_state = DR_STATE_EMPTY;
+	}
+	dr_device_transition(&ip->sbi_cm, new_state);
+}
+
+/*ARGSUSED*/
+void
+dr_attach_io(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+	sbd_error_t *err;
+
+	dr_lock_status(hp->h_bd);
+	err = drmach_configure(cp->sbdev_id, 0);
+	dr_unlock_status(hp->h_bd);
+
+	if (!err)
+		err = drmach_io_post_attach(cp->sbdev_id);
+
+	if (err)
+		DRERR_SET_C(&cp->sbdev_error, &err);
+}
+
+/*
+ * remove device nodes for the branch indicated by cp
+ */
+/*ARGSUSED*/
+void
+dr_detach_io(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+	sbd_error_t *err;
+
+	err = drmach_unconfigure(cp->sbdev_id, 0);
+
+	if (!err)
+		err = drmach_unconfigure(cp->sbdev_id, DEVI_BRANCH_DESTROY);
+
+	if (!err)
+		err = drmach_io_post_release(cp->sbdev_id);
+
+	if (err) {
+		dr_device_transition(cp, DR_STATE_CONFIGURED);
+		DRERR_SET_C(&cp->sbdev_error, &err);
+	}
+}
+
+/*ARGSUSED*/
+int
+dr_disconnect_io(dr_io_unit_t *ip)
+{
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_pre_attach_io(dr_handle_t *hp,
+	dr_common_unit_t **devlist, int devnum)
+{
+	int		d;
+
+	for (d = 0; d < devnum; d++) {
+		dr_common_unit_t *cp = devlist[d];
+
+		cmn_err(CE_CONT, "OS configure %s", cp->sbdev_path);
+	}
+
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_post_attach_io(dr_handle_t *hp,
+	dr_common_unit_t **devlist, int devnum)
+{
+	return (0);
+}
+
+static int
+dr_check_io_refs(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	register int	i, reftotal = 0;
+	static fn_t	f = "dr_check_io_refs";
+
+	for (i = 0; i < devnum; i++) {
+		dr_io_unit_t	*ip = (dr_io_unit_t *)devlist[i];
+		dev_info_t	*dip;
+		int		ref;
+		int		refcount_non_gldv3;
+		sbd_error_t	*err;
+
+		err = drmach_get_dip(ip->sbi_cm.sbdev_id, &dip);
+		if (err)
+			DRERR_SET_C(&ip->sbi_cm.sbdev_error, &err);
+		else if (dip != NULL) {
+			ref = 0;
+			refcount_non_gldv3 = 0;
+			ASSERT(e_ddi_branch_held(dip));
+			dr_check_devices(dip, &ref, hp, NULL, NULL,
+			    0, &refcount_non_gldv3);
+			ASSERT(refcount_non_gldv3 >= 0);
+			ASSERT(ref >= refcount_non_gldv3);
+			/*
+			 * Ignore reference counts of non-gldv3 network devices
+			 * as Crossbow creates reference counts for non-active
+			 * (unplumbed) instances.  Reference count check in
+			 * detach() known to prevent device from detaching
+			 * as necessary.
+			 */
+			ref -= refcount_non_gldv3;
+			hp->h_err = NULL;
+			if (ref) {
+				dr_dev_err(CE_WARN, &ip->sbi_cm, ESBD_BUSY);
+			}
+			PR_IO("%s: dip(%s) ref = %d\n",
+			    f, ddi_get_name(dip), ref);
+			reftotal += ref;
+		} else {
+			PR_IO("%s: NO dip for id (0x%x)\n",
+			    f, (uint_t)(uintptr_t)ip->sbi_cm.sbdev_id);
+		}
+	}
+
+	return (reftotal);
+}
+
+int
+dr_pre_release_io(dr_handle_t *hp,
+	dr_common_unit_t **devlist, int devnum)
+{
+	static fn_t	f = "dr_pre_release_io";
+	int	d;
+
+	ASSERT(devnum > 0);
+
+	/* fail if any I/O device pre-release fails */
+	for (d = 0; d < devnum; d++) {
+		dr_io_unit_t *ip = (dr_io_unit_t *)devlist[d];
+
+		if ((hp->h_err = drmach_io_pre_release(
+		    ip->sbi_cm.sbdev_id)) != 0) {
+			return (-1);
+		}
+	}
+
+	for (d = 0; d < devnum; d++) {
+		dr_io_unit_t *ip = (dr_io_unit_t *)devlist[d];
+		sbd_error_t *err;
+
+		err = drmach_release(ip->sbi_cm.sbdev_id);
+		if (err) {
+			DRERR_SET_C(&ip->sbi_cm.sbdev_error,
+			    &err);
+			return (-1);
+		}
+	}
+
+	/* fail if any I/O devices are still referenced */
+	if (dr_check_io_refs(hp, devlist, devnum) > 0) {
+		PR_IO("%s: failed - I/O devices ref'd\n", f);
+
+		/* recover before return error */
+		for (d = 0; d < devnum; d++) {
+			dr_io_unit_t *ip = (dr_io_unit_t *)devlist[d];
+			sbd_error_t *err;
+			err = drmach_io_unrelease(ip->sbi_cm.sbdev_id);
+			if (err) {
+				DRERR_SET_C(&ip->sbi_cm.sbdev_error, &err);
+				return (-1);
+			}
+		}
+		return (-1);
+	}
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_pre_detach_io(dr_handle_t *hp,
+	dr_common_unit_t **devlist, int devnum)
+{
+	int		d;
+
+	ASSERT(devnum > 0);
+
+	for (d = 0; d < devnum; d++) {
+		dr_common_unit_t *cp = devlist[d];
+
+		cmn_err(CE_CONT, "OS unconfigure %s", cp->sbdev_path);
+	}
+
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_post_detach_io(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	register int	i;
+	int		rv = 0;
+	static fn_t	f = "dr_post_detach_io";
+
+	ASSERT(devnum > 0);
+	for (i = 0; i < devnum; i++) {
+		dr_common_unit_t	*cp = devlist[i];
+		if (cp->sbdev_error != NULL) {
+			PR_IO("%s: Failed\n", f);
+			rv = -1;
+			break;
+		}
+	}
+	return (rv);
+}
+
+static void
+dr_get_comp_cond(dr_io_unit_t *ip, dev_info_t *dip)
+{
+	if (dip == NULL) {
+		ip->sbi_cm.sbdev_cond = SBD_COND_UNKNOWN;
+		return;
+	}
+
+	if (DEVI(dip)->devi_flags & DEVI_RETIRED) {
+		ip->sbi_cm.sbdev_cond = SBD_COND_FAILED;
+		return;
+	}
+
+	if (DR_DEV_IS_ATTACHED(&ip->sbi_cm)) {
+		ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+	} else if (DR_DEV_IS_PRESENT(&ip->sbi_cm)) {
+		ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+	}
+}
+
+int
+dr_io_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
+{
+	int		i, ix;
+	dr_board_t	*bp;
+	sbd_io_stat_t	*isp;
+	dr_io_unit_t	*ip;
+
+	bp = hp->h_bd;
+
+	/*
+	 * Only look for requested devices that are actually present.
+	 */
+	devset &= DR_DEVS_PRESENT(bp);
+
+	for (i = ix = 0; i < MAX_IO_UNITS_PER_BOARD; i++) {
+		drmachid_t	 id;
+		dev_info_t	*dip;
+		sbd_error_t	*err;
+		drmach_status_t	 pstat;
+
+		if (DEVSET_IN_SET(devset, SBD_COMP_IO, i) == 0)
+			continue;
+
+		ip = dr_get_io_unit(bp, i);
+
+		if (ip->sbi_cm.sbdev_state == DR_STATE_EMPTY) {
+			/* present, but not fully initialized */
+			continue;
+		}
+
+		id = ip->sbi_cm.sbdev_id;
+		if (id == (drmachid_t)0)
+			continue;
+
+		err = drmach_status(ip->sbi_cm.sbdev_id, &pstat);
+		if (err) {
+			DRERR_SET_C(&ip->sbi_cm.sbdev_error, &err);
+			return (-1);
+		}
+
+		dip = NULL;
+		err = drmach_get_dip(id, &dip);
+		if (err) {
+			/* catch this in debug kernels */
+			ASSERT(0);
+
+			sbd_err_clear(&err);
+			continue;
+		}
+
+		isp = &dsp->d_io;
+		bzero((caddr_t)isp, sizeof (*isp));
+
+		isp->is_cm.c_id.c_type = ip->sbi_cm.sbdev_type;
+		isp->is_cm.c_id.c_unit = ip->sbi_cm.sbdev_unum;
+		(void) strlcpy(isp->is_cm.c_id.c_name, pstat.type,
+		    sizeof (isp->is_cm.c_id.c_name));
+
+		dr_get_comp_cond(ip, dip);
+		isp->is_cm.c_cond = ip->sbi_cm.sbdev_cond;
+		isp->is_cm.c_busy = ip->sbi_cm.sbdev_busy | pstat.busy;
+		isp->is_cm.c_time = ip->sbi_cm.sbdev_time;
+		isp->is_cm.c_ostate = ip->sbi_cm.sbdev_ostate;
+		isp->is_cm.c_sflags = 0;
+
+		if (dip == NULL) {
+			isp->is_pathname[0] = '\0';
+			isp->is_referenced = 0;
+			isp->is_unsafe_count = 0;
+		} else {
+			int		refcount = 0, idx = 0;
+			uint64_t	unsafe_devs[SBD_MAX_UNSAFE];
+
+			ASSERT(e_ddi_branch_held(dip));
+			(void) ddi_pathname(dip, isp->is_pathname);
+
+			/* check reference and unsafe counts on devices */
+			isp->is_unsafe_count = 0;
+			dr_check_devices(dip, &refcount, hp, unsafe_devs,
+			    &idx, SBD_MAX_UNSAFE, NULL);
+			while (idx > 0) {
+				isp->is_unsafe_list[idx-1] = unsafe_devs[idx-1];
+				--idx;
+			}
+
+			isp->is_referenced = (refcount == 0) ? 0 : 1;
+
+			hp->h_err = NULL;
+		}
+		ix++;
+		dsp++;
+	}
+
+	return (ix);
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr_mem_acpi.c b/usr/src/uts/i86pc/io/dr/dr_mem_acpi.c
new file mode 100644
index 0000000000..90ceb84044
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_mem_acpi.c
@@ -0,0 +1,639 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+/*
+ * DR memory support routines.
+ */
+
+#include <sys/note.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>
+#include <sys/kmem.h>
+#include <sys/kobj.h>
+#include <sys/conf.h>
+#include <sys/dditypes.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/sysmacros.h>
+#include <sys/machsystm.h>
+#include <sys/promif.h>
+#include <sys/lgrp.h>
+#include <sys/mem_config.h>
+#include <vm/seg_kmem.h>
+#include <vm/page.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+#include <sys/drmach.h>
+
+extern struct memlist	*phys_install;
+
+/* TODO: push this reference below drmach line */
+extern int		kcage_on;
+
+/* for the DR*INTERNAL_ERROR macros.  see sys/dr.h. */
+static char *dr_ie_fmt = "dr_mem_acpi.c %d";
+
+static void		dr_init_mem_unit_data(dr_mem_unit_t *mp);
+
+/*
+ * dr_mem_unit_t.sbm_flags
+ */
+#define	DR_MFLAG_RESERVED	0x01	/* mem unit reserved for delete */
+#define	DR_MFLAG_SOURCE		0x02	/* source brd of copy/rename op */
+#define	DR_MFLAG_TARGET		0x04	/* target brd of copy/rename op */
+#define	DR_MFLAG_RELOWNER	0x20	/* memory release (delete) owner */
+#define	DR_MFLAG_RELDONE	0x40	/* memory release (delete) done */
+
+/* helper macros */
+#define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
+#define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
+
+static struct memlist *
+dr_get_memlist(dr_mem_unit_t *mp)
+{
+	struct memlist	*mlist = NULL;
+	sbd_error_t	*err;
+	static fn_t	f = "dr_get_memlist";
+
+	PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
+
+	/*
+	 * Return cached memlist, if present.
+	 * This memlist will be present following an
+	 * unconfigure (a.k.a: detach) of this memunit.
+	 * It should only be used in the case were a configure
+	 * is bringing this memunit back in without going
+	 * through the disconnect and connect states.
+	 */
+	if (mp->sbm_mlist) {
+		PR_MEM("%s: found cached memlist\n", f);
+
+		mlist = memlist_dup(mp->sbm_mlist);
+	} else {
+		uint64_t basepa = _ptob64(mp->sbm_basepfn);
+
+		/* attempt to construct a memlist using phys_install */
+
+		/* round down to slice base address */
+		basepa &= ~mp->sbm_alignment_mask;
+
+		/* get a copy of phys_install to edit */
+		memlist_read_lock();
+		mlist = memlist_dup(phys_install);
+		memlist_read_unlock();
+
+		/* trim lower irrelevant span */
+		if (mlist)
+			mlist = memlist_del_span(mlist, 0ull, basepa);
+
+		/* trim upper irrelevant span */
+		if (mlist) {
+			uint64_t endpa, toppa;
+
+			toppa = mp->sbm_slice_top;
+			endpa = _ptob64(physmax + 1);
+			if (endpa > toppa)
+				mlist = memlist_del_span(
+				    mlist, toppa,
+				    endpa - toppa);
+		}
+
+		if (mlist) {
+			/* successfully built a memlist */
+			PR_MEM("%s: derived memlist from phys_install\n", f);
+		}
+
+		/* if no mlist yet, try platform layer */
+		if (!mlist) {
+			err = drmach_mem_get_memlist(
+			    mp->sbm_cm.sbdev_id, &mlist);
+			if (err) {
+				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+				mlist = NULL; /* paranoia */
+			}
+		}
+	}
+
+	PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
+	PR_MEMLIST_DUMP(mlist);
+
+	return (mlist);
+}
+
+/*ARGSUSED*/
+void
+dr_release_mem(dr_common_unit_t *cp)
+{
+}
+
+void
+dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
+	struct memlist	*ml, *mc;
+	sbd_error_t	*err;
+	static fn_t	f = "dr_attach_mem";
+	uint64_t	dr_physmax;
+
+	PR_MEM("%s...\n", f);
+
+	dr_lock_status(hp->h_bd);
+	err = drmach_configure(cp->sbdev_id, 0);
+	dr_unlock_status(hp->h_bd);
+	if (err) {
+		DRERR_SET_C(&cp->sbdev_error, &err);
+		return;
+	}
+
+	ml = dr_get_memlist(mp);
+
+	/* Skip memory with address above plat_dr_physmax or kpm_size */
+	dr_physmax = plat_dr_physmax ? ptob(plat_dr_physmax) : UINT64_MAX;
+	if (kpm_size < dr_physmax)
+		dr_physmax = kpm_size;
+	ml = memlist_del_span(ml, dr_physmax, UINT64_MAX - dr_physmax);
+
+	for (mc = ml; mc; mc = mc->ml_next) {
+		int		 rv;
+		sbd_error_t	*err;
+
+		rv = kphysm_add_memory_dynamic(
+		    (pfn_t)btop(mc->ml_address),
+		    (pgcnt_t)btop(mc->ml_size));
+		if (rv != KPHYSM_OK) {
+			/*
+			 * translate kphysm error and
+			 * store in devlist error
+			 */
+			switch (rv) {
+			case KPHYSM_ERESOURCE:
+				rv = ESBD_NOMEM;
+				break;
+
+			case KPHYSM_EFAULT:
+				rv = ESBD_FAULT;
+				break;
+
+			default:
+				rv = ESBD_INTERNAL;
+				break;
+			}
+
+			if (rv == ESBD_INTERNAL) {
+				DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
+			} else
+				dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
+			break;
+		}
+
+		err = drmach_mem_add_span(
+		    mp->sbm_cm.sbdev_id, mc->ml_address, mc->ml_size);
+		if (err) {
+			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+			break;
+		}
+	}
+
+	memlist_delete(ml);
+	dr_init_mem_unit_data(mp);
+
+	/* back out if configure failed */
+	if (mp->sbm_cm.sbdev_error != NULL) {
+		dr_lock_status(hp->h_bd);
+		err = drmach_unconfigure(cp->sbdev_id, 0);
+		if (err)
+			sbd_err_clear(&err);
+		dr_unlock_status(hp->h_bd);
+	}
+}
+
+/*ARGSUSED*/
+void
+dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
+{
+}
+
+/*
+ * This routine acts as a wrapper for kphysm_del_span_query in order to
+ * support potential memory holes in a board's physical address space.
+ * It calls kphysm_del_span_query for each node in a memlist and accumulates
+ * the results in *mp.
+ */
+static int
+dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
+{
+	int		 rv = 0;
+
+	if (mlist == NULL)
+		cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
+
+	mp->phys_pages = 0;
+	mp->managed = 0;
+	mp->nonrelocatable = 0;
+	mp->first_nonrelocatable = 0;
+	mp->last_nonrelocatable = 0;
+
+	return (rv);
+}
+
+/*
+ * NOTE: This routine is only partially smart about multiple
+ *	 mem-units.  Need to make mem-status structure smart
+ *	 about them also.
+ */
+int
+dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
+{
+	int		m, mix;
+	memquery_t	mq;
+	dr_board_t	*bp;
+	dr_mem_unit_t	*mp;
+	sbd_mem_stat_t	*msp;
+	static fn_t	f = "dr_mem_status";
+
+	bp = hp->h_bd;
+	devset &= DR_DEVS_PRESENT(bp);
+
+	for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
+		int		rv;
+		sbd_error_t	*err;
+		drmach_status_t	 pstat;
+		dr_mem_unit_t	*p_mp;
+
+		if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
+			continue;
+
+		mp = dr_get_mem_unit(bp, m);
+
+		if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
+			/* present, but not fully initialized */
+			continue;
+		}
+
+		if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
+			continue;
+
+		/* fetch platform status */
+		err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
+		if (err) {
+			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+			continue;
+		}
+
+		msp = &dsp->d_mem;
+		bzero((caddr_t)msp, sizeof (*msp));
+
+		(void) strlcpy(msp->ms_cm.c_id.c_name, pstat.type,
+		    sizeof (msp->ms_cm.c_id.c_name));
+		msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
+		msp->ms_cm.c_id.c_unit = mp->sbm_cm.sbdev_unum;
+		msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
+		msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
+		msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
+		msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
+
+		msp->ms_totpages = mp->sbm_npages;
+		msp->ms_basepfn = mp->sbm_basepfn;
+		msp->ms_pageslost = mp->sbm_pageslost;
+		msp->ms_cage_enabled = kcage_on;
+
+		if (mp->sbm_flags & DR_MFLAG_RESERVED)
+			p_mp = mp->sbm_peer;
+		else
+			p_mp = NULL;
+
+		if (p_mp == NULL) {
+			msp->ms_peer_is_target = 0;
+			msp->ms_peer_ap_id[0] = '\0';
+		} else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
+			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+			char *minor;
+
+			/*
+			 * b_dip doesn't have to be held for ddi_pathname()
+			 * because the board struct (dr_board_t) will be
+			 * destroyed before b_dip detaches.
+			 */
+			(void) ddi_pathname(bp->b_dip, path);
+			minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
+
+			(void) snprintf(msp->ms_peer_ap_id,
+			    sizeof (msp->ms_peer_ap_id), "%s%s",
+			    path, (minor == NULL) ? "" : minor);
+
+			kmem_free(path, MAXPATHLEN);
+
+			if (p_mp->sbm_flags & DR_MFLAG_TARGET)
+				msp->ms_peer_is_target = 1;
+		}
+
+		/*
+		 * kphysm_del_span_query can report non-reloc pages = total
+		 * pages for memory that is not yet configured
+		 */
+		if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
+			struct memlist *ml;
+
+			ml = dr_get_memlist(mp);
+			rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
+			memlist_delete(ml);
+
+			if (rv == KPHYSM_OK) {
+				msp->ms_managed_pages = mq.managed;
+				msp->ms_noreloc_pages = mq.nonrelocatable;
+				msp->ms_noreloc_first =
+				    mq.first_nonrelocatable;
+				msp->ms_noreloc_last =
+				    mq.last_nonrelocatable;
+				msp->ms_cm.c_sflags = 0;
+				if (mq.nonrelocatable &&
+				    drmach_copy_rename_need_suspend(
+				    mp->sbm_cm.sbdev_id)) {
+					SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
+					    msp->ms_cm.c_sflags);
+				}
+			} else {
+				PR_MEM("%s: kphysm_del_span_query() = %d\n",
+				    f, rv);
+			}
+		}
+
+		/*
+		 * Check source unit state during copy-rename
+		 */
+		if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
+		    (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
+		    mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
+			msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
+
+		mix++;
+		dsp++;
+	}
+
+	return (mix);
+}
+
+/*ARGSUSED*/
+int
+dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	int		err_flag = 0;
+	int		d;
+	sbd_error_t	*err;
+	static fn_t	f = "dr_pre_attach_mem";
+
+	PR_MEM("%s...\n", f);
+
+	for (d = 0; d < devnum; d++) {
+		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
+		dr_state_t	state;
+
+		cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
+
+		state = mp->sbm_cm.sbdev_state;
+		switch (state) {
+		case DR_STATE_UNCONFIGURED:
+			PR_MEM("%s: recovering from UNCONFIG for %s\n",
+			    f, mp->sbm_cm.sbdev_path);
+
+			/* use memlist cached by dr_post_detach_mem_unit */
+			ASSERT(mp->sbm_mlist != NULL);
+			PR_MEM("%s: re-configuring cached memlist for %s:\n",
+			    f, mp->sbm_cm.sbdev_path);
+			PR_MEMLIST_DUMP(mp->sbm_mlist);
+
+			/* kphysm del handle should be have been freed */
+			ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
+
+			/*FALLTHROUGH*/
+
+		case DR_STATE_CONNECTED:
+			PR_MEM("%s: reprogramming mem hardware on %s\n",
+			    f, mp->sbm_cm.sbdev_bp->b_path);
+
+			PR_MEM("%s: enabling %s\n",
+			    f, mp->sbm_cm.sbdev_path);
+
+			err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
+			if (err) {
+				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+				err_flag = 1;
+			}
+			break;
+
+		default:
+			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
+			err_flag = 1;
+			break;
+		}
+
+		/* exit for loop if error encountered */
+		if (err_flag)
+			break;
+	}
+
+	return (err_flag ? -1 : 0);
+}
+
+/*ARGSUSED*/
+int
+dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	int		d;
+	static fn_t	f = "dr_post_attach_mem";
+
+	PR_MEM("%s...\n", f);
+
+	for (d = 0; d < devnum; d++) {
+		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
+		struct memlist	*mlist, *ml;
+
+		mlist = dr_get_memlist(mp);
+
+		/*
+		 * Verify the memory really did successfully attach
+		 * by checking for its existence in phys_install.
+		 */
+		memlist_read_lock();
+		if (memlist_intersect(phys_install, mlist) == 0) {
+			memlist_read_unlock();
+
+			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
+
+			PR_MEM("%s: %s memlist not in phys_install",
+			    f, mp->sbm_cm.sbdev_path);
+
+			memlist_delete(mlist);
+			continue;
+		}
+		memlist_read_unlock();
+
+		for (ml = mlist; ml != NULL; ml = ml->ml_next) {
+			sbd_error_t *err;
+
+			err = drmach_mem_add_span(
+			    mp->sbm_cm.sbdev_id,
+			    ml->ml_address,
+			    ml->ml_size);
+			if (err)
+				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+		}
+
+		memlist_delete(mlist);
+
+		/*
+		 * Destroy cached memlist, if any.
+		 * There will be a cached memlist in sbm_mlist if
+		 * this board is being configured directly after
+		 * an unconfigure.
+		 * To support this transition, dr_post_detach_mem
+		 * left a copy of the last known memlist in sbm_mlist.
+		 * This memlist could differ from any derived from
+		 * hardware if while this memunit was last configured
+		 * the system detected and deleted bad pages from
+		 * phys_install.  The location of those bad pages
+		 * will be reflected in the cached memlist.
+		 */
+		if (mp->sbm_mlist) {
+			memlist_delete(mp->sbm_mlist);
+			mp->sbm_mlist = NULL;
+		}
+	}
+
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	return (-1);
+}
+
+/*ARGSUSED*/
+int
+dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	return (-1);
+}
+
+/*
+ * Successful return from this function will have the memory
+ * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
+ * and waiting.  This routine's job is to select the memory that
+ * actually has to be released (detached) which may not necessarily
+ * be the same memory node that came in in devlist[],
+ * i.e. a copy-rename is needed.
+ */
+/*ARGSUSED*/
+int
+dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
+{
+	return (-1);
+}
+
+/*ARGSUSED*/
+void
+dr_release_mem_done(dr_common_unit_t *cp)
+{
+}
+
+/*ARGSUSED*/
+int
+dr_disconnect_mem(dr_mem_unit_t *mp)
+{
+	return (-1);
+}
+
+/*ARGSUSED*/
+int
+dr_cancel_mem(dr_mem_unit_t *s_mp)
+{
+	return (-1);
+}
+
+void
+dr_init_mem_unit(dr_mem_unit_t *mp)
+{
+	dr_state_t	new_state;
+
+	if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
+		new_state = DR_STATE_CONFIGURED;
+		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
+	} else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
+		new_state = DR_STATE_CONNECTED;
+		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
+	} else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
+		new_state = DR_STATE_OCCUPIED;
+	} else {
+		new_state = DR_STATE_EMPTY;
+	}
+
+	if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
+		dr_init_mem_unit_data(mp);
+
+	/* delay transition until fully initialized */
+	dr_device_transition(&mp->sbm_cm, new_state);
+}
+
+static void
+dr_init_mem_unit_data(dr_mem_unit_t *mp)
+{
+	drmachid_t	id = mp->sbm_cm.sbdev_id;
+	drmach_mem_info_t	minfo;
+	sbd_error_t	*err;
+	static fn_t	f = "dr_init_mem_unit_data";
+
+	PR_MEM("%s...\n", f);
+
+	/* a little sanity checking */
+	ASSERT(mp->sbm_peer == NULL);
+	ASSERT(mp->sbm_flags == 0);
+
+	if (err = drmach_mem_get_info(id, &minfo)) {
+		DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
+		return;
+	}
+	mp->sbm_basepfn = _b64top(minfo.mi_basepa);
+	mp->sbm_npages = _b64top(minfo.mi_size);
+	mp->sbm_alignment_mask = minfo.mi_alignment_mask;
+	mp->sbm_slice_base = minfo.mi_slice_base;
+	mp->sbm_slice_top = minfo.mi_slice_top;
+	mp->sbm_slice_size = minfo.mi_slice_size;
+
+	PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
+	    f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr_quiesce.c b/usr/src/uts/i86pc/io/dr/dr_quiesce.c
new file mode 100644
index 0000000000..f3467f6eb1
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_quiesce.c
@@ -0,0 +1,1050 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * A CPR derivative specifically for starfire/starcat
+ * X86 doesn't make use of the quiesce interfaces, it's kept for simplicity.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/machparam.h>
+#include <sys/machsystm.h>
+#include <sys/ddi.h>
+#define	SUNDDI_IMPL
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/devctl.h>
+#include <sys/time.h>
+#include <sys/kmem.h>
+#include <nfs/lm.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/obpdefs.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/callb.h>
+#include <sys/clock.h>
+#include <sys/x_call.h>
+#include <sys/cpuvar.h>
+#include <sys/epm.h>
+#include <sys/vfs.h>
+#include <sys/promif.h>
+#include <sys/conf.h>
+#include <sys/cyclic.h>
+
+#include <sys/dr.h>
+#include <sys/dr_util.h>
+
+extern void	e_ddi_enter_driver_list(struct devnames *dnp, int *listcnt);
+extern void	e_ddi_exit_driver_list(struct devnames *dnp, int listcnt);
+extern int	is_pseudo_device(dev_info_t *dip);
+
+extern kmutex_t	cpu_lock;
+extern dr_unsafe_devs_t dr_unsafe_devs;
+
+static int		dr_is_real_device(dev_info_t *dip);
+static int		dr_is_unsafe_major(major_t major);
+static int		dr_bypass_device(char *dname);
+static int		dr_check_dip(dev_info_t *dip, void *arg, uint_t ref);
+static int		dr_resolve_devname(dev_info_t *dip, char *buffer,
+				char *alias);
+static sbd_error_t	*drerr_int(int e_code, uint64_t *arr, int idx,
+				int majors);
+static int		dr_add_int(uint64_t *arr, int idx, int len,
+				uint64_t val);
+
+int dr_pt_test_suspend(dr_handle_t *hp);
+
+/*
+ * dr_quiesce.c interface
+ * NOTE: states used internally by dr_suspend and dr_resume
+ */
+typedef enum dr_suspend_state {
+	DR_SRSTATE_BEGIN = 0,
+	DR_SRSTATE_USER,
+	DR_SRSTATE_DRIVER,
+	DR_SRSTATE_FULL
+} suspend_state_t;
+
+struct dr_sr_handle {
+	dr_handle_t		*sr_dr_handlep;
+	dev_info_t		*sr_failed_dip;
+	suspend_state_t		sr_suspend_state;
+	uint_t			sr_flags;
+	uint64_t		sr_err_ints[DR_MAX_ERR_INT];
+	int			sr_err_idx;
+};
+
+#define	SR_FLAG_WATCHDOG	0x1
+
+/*
+ * XXX
+ * This hack will go away before RTI.  Just for testing.
+ * List of drivers to bypass when performing a suspend.
+ */
+static char *dr_bypass_list[] = {
+	""
+};
+
+
+#define		SKIP_SYNC	/* bypass sync ops in dr_suspend */
+
+/*
+ * dr_skip_user_threads is used to control if user threads should
+ * be suspended.  If dr_skip_user_threads is true, the rest of the
+ * flags are not used; if it is false, dr_check_user_stop_result
+ * will be used to control whether or not we need to check suspend
+ * result, and dr_allow_blocked_threads will be used to control
+ * whether or not we allow suspend to continue if there are blocked
+ * threads.  We allow all combinations of dr_check_user_stop_result
+ * and dr_allow_block_threads, even though it might not make much
+ * sense to not allow block threads when we don't even check stop
+ * result.
+ */
+static int	dr_skip_user_threads = 0;	/* default to FALSE */
+static int	dr_check_user_stop_result = 1;	/* default to TRUE */
+static int	dr_allow_blocked_threads = 1;	/* default to TRUE */
+
+#define	DR_CPU_LOOP_MSEC	1000
+
+static void
+dr_stop_intr(void)
+{
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	kpreempt_disable();
+	cyclic_suspend();
+}
+
+static void
+dr_enable_intr(void)
+{
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	cyclic_resume();
+	kpreempt_enable();
+}
+
+dr_sr_handle_t *
+dr_get_sr_handle(dr_handle_t *hp)
+{
+	dr_sr_handle_t *srh;
+
+	srh = GETSTRUCT(dr_sr_handle_t, 1);
+	srh->sr_dr_handlep = hp;
+
+	return (srh);
+}
+
+void
+dr_release_sr_handle(dr_sr_handle_t *srh)
+{
+	ASSERT(srh->sr_failed_dip == NULL);
+	FREESTRUCT(srh, dr_sr_handle_t, 1);
+}
+
+static int
+dr_is_real_device(dev_info_t *dip)
+{
+	struct regspec *regbuf = NULL;
+	int length = 0;
+	int rc;
+
+	if (ddi_get_driver(dip) == NULL)
+		return (0);
+
+	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
+		return (1);
+	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
+		return (0);
+
+	/*
+	 * now the general case
+	 */
+	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
+	    (caddr_t)&regbuf, &length);
+	ASSERT(rc != DDI_PROP_NO_MEMORY);
+	if (rc != DDI_PROP_SUCCESS) {
+		return (0);
+	} else {
+		if ((length > 0) && (regbuf != NULL))
+			kmem_free(regbuf, length);
+		return (1);
+	}
+}
+
+static int
+dr_is_unsafe_major(major_t major)
+{
+	char    *dname, **cpp;
+	int	i, ndevs;
+
+	if ((dname = ddi_major_to_name(major)) == NULL) {
+		PR_QR("dr_is_unsafe_major: invalid major # %d\n", major);
+		return (0);
+	}
+
+	ndevs = dr_unsafe_devs.ndevs;
+	for (i = 0, cpp = dr_unsafe_devs.devnames; i < ndevs; i++) {
+		if (strcmp(dname, *cpp++) == 0)
+			return (1);
+	}
+	return (0);
+}
+
+static int
+dr_bypass_device(char *dname)
+{
+	int i;
+	char **lname;
+	/* check the bypass list */
+	for (i = 0, lname = &dr_bypass_list[i]; **lname != '\0'; lname++) {
+		if (strcmp(dname, dr_bypass_list[i++]) == 0)
+			return (1);
+	}
+	return (0);
+}
+
+static int
+dr_resolve_devname(dev_info_t *dip, char *buffer, char *alias)
+{
+	major_t	devmajor;
+	char	*aka, *name;
+
+	*buffer = *alias = 0;
+
+	if (dip == NULL)
+		return (-1);
+
+	if ((name = ddi_get_name(dip)) == NULL)
+		name = "<null name>";
+
+	aka = name;
+
+	if ((devmajor = ddi_name_to_major(aka)) != DDI_MAJOR_T_NONE)
+		aka = ddi_major_to_name(devmajor);
+
+	(void) strcpy(buffer, name);
+
+	if (strcmp(name, aka))
+		(void) strcpy(alias, aka);
+	else
+		*alias = 0;
+
+	return (0);
+}
+
+struct dr_ref {
+	int		*refcount;
+	int		*refcount_non_gldv3;
+	uint64_t	*arr;
+	int		*idx;
+	int		len;
+};
+
+/* ARGSUSED */
+static int
+dr_check_dip(dev_info_t *dip, void *arg, uint_t ref)
+{
+	major_t		major;
+	char		*dname;
+	struct dr_ref	*rp = (struct dr_ref *)arg;
+
+	if (dip == NULL)
+		return (DDI_WALK_CONTINUE);
+
+	if (!dr_is_real_device(dip))
+		return (DDI_WALK_CONTINUE);
+
+	dname = ddi_binding_name(dip);
+
+	if (dr_bypass_device(dname))
+		return (DDI_WALK_CONTINUE);
+
+	if (dname && ((major = ddi_name_to_major(dname)) != (major_t)-1)) {
+		if (ref && rp->refcount) {
+			*rp->refcount += ref;
+			PR_QR("\n  %s (major# %d) is referenced(%u)\n", dname,
+			    major, ref);
+		}
+		if (ref && rp->refcount_non_gldv3) {
+			if (NETWORK_PHYSDRV(major) && !GLDV3_DRV(major))
+				*rp->refcount_non_gldv3 += ref;
+		}
+		if (dr_is_unsafe_major(major) && i_ddi_devi_attached(dip)) {
+			PR_QR("\n  %s (major# %d) not hotpluggable\n", dname,
+			    major);
+			if (rp->arr != NULL && rp->idx != NULL)
+				*rp->idx = dr_add_int(rp->arr, *rp->idx,
+				    rp->len, (uint64_t)major);
+		}
+	}
+	return (DDI_WALK_CONTINUE);
+}
+
+static int
+dr_check_unsafe_major(dev_info_t *dip, void *arg)
+{
+	return (dr_check_dip(dip, arg, 0));
+}
+
+
+/*ARGSUSED*/
+void
+dr_check_devices(dev_info_t *dip, int *refcount, dr_handle_t *handle,
+    uint64_t *arr, int *idx, int len, int *refcount_non_gldv3)
+{
+	struct dr_ref bref = {0};
+
+	if (dip == NULL)
+		return;
+
+	bref.refcount = refcount;
+	bref.refcount_non_gldv3 = refcount_non_gldv3;
+	bref.arr = arr;
+	bref.idx = idx;
+	bref.len = len;
+
+	ASSERT(e_ddi_branch_held(dip));
+	(void) e_ddi_branch_referenced(dip, dr_check_dip, &bref);
+}
+
+/*
+ * The "dip" argument's parent (if it exists) must be held busy.
+ */
+static int
+dr_suspend_devices(dev_info_t *dip, dr_sr_handle_t *srh)
+{
+	dr_handle_t	*handle;
+	major_t		major;
+	char		*dname;
+	int		circ;
+
+	/*
+	 * If dip is the root node, it has no siblings and it is
+	 * always held. If dip is not the root node, dr_suspend_devices()
+	 * will be invoked with the parent held busy.
+	 */
+	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
+		char	d_name[40], d_alias[40], *d_info;
+
+		ndi_devi_enter(dip, &circ);
+		if (dr_suspend_devices(ddi_get_child(dip), srh)) {
+			ndi_devi_exit(dip, circ);
+			return (ENXIO);
+		}
+		ndi_devi_exit(dip, circ);
+
+		if (!dr_is_real_device(dip))
+			continue;
+
+		major = (major_t)-1;
+		if ((dname = ddi_binding_name(dip)) != NULL)
+			major = ddi_name_to_major(dname);
+
+		if (dr_bypass_device(dname)) {
+			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
+			    major);
+			continue;
+		}
+
+		if (drmach_verify_sr(dip, 1)) {
+			PR_QR(" bypassed suspend of %s (major# %d)\n", dname,
+			    major);
+			continue;
+		}
+
+		if ((d_info = ddi_get_name_addr(dip)) == NULL)
+			d_info = "<null>";
+
+		d_name[0] = 0;
+		if (dr_resolve_devname(dip, d_name, d_alias) == 0) {
+			if (d_alias[0] != 0) {
+				prom_printf("\tsuspending %s@%s (aka %s)\n",
+				    d_name, d_info, d_alias);
+			} else {
+				prom_printf("\tsuspending %s@%s\n", d_name,
+				    d_info);
+			}
+		} else {
+			prom_printf("\tsuspending %s@%s\n", dname, d_info);
+		}
+
+		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
+			prom_printf("\tFAILED to suspend %s@%s\n",
+			    d_name[0] ? d_name : dname, d_info);
+
+			srh->sr_err_idx = dr_add_int(srh->sr_err_ints,
+			    srh->sr_err_idx, DR_MAX_ERR_INT, (uint64_t)major);
+
+			ndi_hold_devi(dip);
+			srh->sr_failed_dip = dip;
+
+			handle = srh->sr_dr_handlep;
+			dr_op_err(CE_IGNORE, handle, ESBD_SUSPEND, "%s@%s",
+			    d_name[0] ? d_name : dname, d_info);
+
+			return (DDI_FAILURE);
+		}
+	}
+
+	return (DDI_SUCCESS);
+}
+
+static void
+dr_resume_devices(dev_info_t *start, dr_sr_handle_t *srh)
+{
+	dr_handle_t	*handle;
+	dev_info_t	*dip, *next, *last = NULL;
+	major_t		major;
+	char		*bn;
+	int		circ;
+
+	major = (major_t)-1;
+
+	/* attach in reverse device tree order */
+	while (last != start) {
+		dip = start;
+		next = ddi_get_next_sibling(dip);
+		while (next != last && dip != srh->sr_failed_dip) {
+			dip = next;
+			next = ddi_get_next_sibling(dip);
+		}
+		if (dip == srh->sr_failed_dip) {
+			/* release hold acquired in dr_suspend_devices() */
+			srh->sr_failed_dip = NULL;
+			ndi_rele_devi(dip);
+		} else if (dr_is_real_device(dip) &&
+		    srh->sr_failed_dip == NULL) {
+
+			if ((bn = ddi_binding_name(dip)) != NULL) {
+				major = ddi_name_to_major(bn);
+			} else {
+				bn = "<null>";
+			}
+			if (!dr_bypass_device(bn) &&
+			    !drmach_verify_sr(dip, 0)) {
+				char	d_name[40], d_alias[40], *d_info;
+
+				d_name[0] = 0;
+				d_info = ddi_get_name_addr(dip);
+				if (d_info == NULL)
+					d_info = "<null>";
+
+				if (!dr_resolve_devname(dip, d_name, d_alias)) {
+					if (d_alias[0] != 0) {
+						prom_printf("\tresuming "
+						    "%s@%s (aka %s)\n", d_name,
+						    d_info, d_alias);
+					} else {
+						prom_printf("\tresuming "
+						    "%s@%s\n", d_name, d_info);
+					}
+				} else {
+					prom_printf("\tresuming %s@%s\n", bn,
+					    d_info);
+				}
+
+				if (devi_attach(dip, DDI_RESUME) !=
+				    DDI_SUCCESS) {
+					/*
+					 * Print a console warning,
+					 * set an e_code of ESBD_RESUME,
+					 * and save the driver major
+					 * number in the e_rsc.
+					 */
+					prom_printf("\tFAILED to resume %s@%s",
+					    d_name[0] ? d_name : bn, d_info);
+
+					srh->sr_err_idx =
+					    dr_add_int(srh->sr_err_ints,
+					    srh->sr_err_idx, DR_MAX_ERR_INT,
+					    (uint64_t)major);
+
+					handle = srh->sr_dr_handlep;
+
+					dr_op_err(CE_IGNORE, handle,
+					    ESBD_RESUME, "%s@%s",
+					    d_name[0] ? d_name : bn, d_info);
+				}
+			}
+		}
+
+		/* Hold parent busy while walking its children */
+		ndi_devi_enter(dip, &circ);
+		dr_resume_devices(ddi_get_child(dip), srh);
+		ndi_devi_exit(dip, circ);
+		last = dip;
+	}
+}
+
+/*
+ * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
+ * but from DR point of view.  These user threads are waiting in
+ * the kernel.  Once they complete in the kernel, they will process
+ * the stop signal and stop.
+ */
+#define	DR_VSTOPPED(t)			\
+	((t)->t_state == TS_SLEEP &&	\
+	(t)->t_wchan != NULL &&		\
+	(t)->t_astflag &&		\
+	((t)->t_proc_flag & TP_CHKPT))
+
+/* ARGSUSED */
+static int
+dr_stop_user_threads(dr_sr_handle_t *srh)
+{
+	int		count;
+	int		bailout;
+	dr_handle_t	*handle = srh->sr_dr_handlep;
+	static fn_t	f = "dr_stop_user_threads";
+	kthread_id_t 	tp;
+
+	extern void add_one_utstop();
+	extern void utstop_timedwait(clock_t);
+	extern void utstop_init(void);
+
+#define	DR_UTSTOP_RETRY	4
+#define	DR_UTSTOP_WAIT	hz
+
+	if (dr_skip_user_threads)
+		return (DDI_SUCCESS);
+
+	utstop_init();
+
+	/* we need to try a few times to get past fork, etc. */
+	srh->sr_err_idx = 0;
+	for (count = 0; count < DR_UTSTOP_RETRY; count++) {
+		/* walk the entire threadlist */
+		mutex_enter(&pidlock);
+		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
+			proc_t *p = ttoproc(tp);
+
+			/* handle kernel threads separately */
+			if (p->p_as == &kas || p->p_stat == SZOMB)
+				continue;
+
+			mutex_enter(&p->p_lock);
+			thread_lock(tp);
+
+			if (tp->t_state == TS_STOPPED) {
+				/* add another reason to stop this thread */
+				tp->t_schedflag &= ~TS_RESUME;
+			} else {
+				tp->t_proc_flag |= TP_CHKPT;
+
+				thread_unlock(tp);
+				mutex_exit(&p->p_lock);
+				add_one_utstop();
+				mutex_enter(&p->p_lock);
+				thread_lock(tp);
+
+				aston(tp);
+
+				if (ISWAKEABLE(tp) || ISWAITING(tp)) {
+					setrun_locked(tp);
+				}
+
+			}
+
+			/* grab thread if needed */
+			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
+				poke_cpu(tp->t_cpu->cpu_id);
+
+
+			thread_unlock(tp);
+			mutex_exit(&p->p_lock);
+		}
+		mutex_exit(&pidlock);
+
+
+		/* let everything catch up */
+		utstop_timedwait(count * count * DR_UTSTOP_WAIT);
+
+
+		/* now, walk the threadlist again to see if we are done */
+		mutex_enter(&pidlock);
+		for (tp = curthread->t_next, bailout = 0;
+		    tp != curthread; tp = tp->t_next) {
+			proc_t *p = ttoproc(tp);
+
+			/* handle kernel threads separately */
+			if (p->p_as == &kas || p->p_stat == SZOMB)
+				continue;
+
+			/*
+			 * If this thread didn't stop, and we don't allow
+			 * unstopped blocked threads, bail.
+			 */
+			thread_lock(tp);
+			if (!CPR_ISTOPPED(tp) &&
+			    !(dr_allow_blocked_threads &&
+			    DR_VSTOPPED(tp))) {
+				bailout = 1;
+				if (count == DR_UTSTOP_RETRY - 1) {
+					/*
+					 * save the pid for later reporting
+					 */
+					srh->sr_err_idx =
+					    dr_add_int(srh->sr_err_ints,
+					    srh->sr_err_idx, DR_MAX_ERR_INT,
+					    (uint64_t)p->p_pid);
+
+					cmn_err(CE_WARN, "%s: "
+					    "failed to stop thread: "
+					    "process=%s, pid=%d",
+					    f, p->p_user.u_psargs, p->p_pid);
+
+					PR_QR("%s: failed to stop thread: "
+					    "process=%s, pid=%d, t_id=0x%p, "
+					    "t_state=0x%x, t_proc_flag=0x%x, "
+					    "t_schedflag=0x%x\n",
+					    f, p->p_user.u_psargs, p->p_pid,
+					    (void *)tp, tp->t_state,
+					    tp->t_proc_flag, tp->t_schedflag);
+				}
+
+			}
+			thread_unlock(tp);
+		}
+		mutex_exit(&pidlock);
+
+		/* were all the threads stopped? */
+		if (!bailout)
+			break;
+	}
+
+	/* were we unable to stop all threads after a few tries? */
+	if (bailout) {
+		handle->h_err = drerr_int(ESBD_UTHREAD, srh->sr_err_ints,
+		    srh->sr_err_idx, 0);
+		return (ESRCH);
+	}
+
+	return (DDI_SUCCESS);
+}
+
+static void
+dr_start_user_threads(void)
+{
+	kthread_id_t tp;
+
+	mutex_enter(&pidlock);
+
+	/* walk all threads and release them */
+	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
+		proc_t *p = ttoproc(tp);
+
+		/* skip kernel threads */
+		if (ttoproc(tp)->p_as == &kas)
+			continue;
+
+		mutex_enter(&p->p_lock);
+		tp->t_proc_flag &= ~TP_CHKPT;
+		mutex_exit(&p->p_lock);
+
+		thread_lock(tp);
+		if (CPR_ISTOPPED(tp)) {
+			/* back on the runq */
+			tp->t_schedflag |= TS_RESUME;
+			setrun_locked(tp);
+		}
+		thread_unlock(tp);
+	}
+
+	mutex_exit(&pidlock);
+}
+
+static void
+dr_signal_user(int sig)
+{
+	struct proc *p;
+
+	mutex_enter(&pidlock);
+
+	for (p = practive; p != NULL; p = p->p_next) {
+		/* only user threads */
+		if (p->p_exec == NULL || p->p_stat == SZOMB ||
+		    p == proc_init || p == ttoproc(curthread))
+			continue;
+
+		mutex_enter(&p->p_lock);
+		sigtoproc(p, NULL, sig);
+		mutex_exit(&p->p_lock);
+	}
+
+	mutex_exit(&pidlock);
+
+	/* add a bit of delay */
+	delay(hz);
+}
+
+void
+dr_resume(dr_sr_handle_t *srh)
+{
+	dr_handle_t	*handle;
+
+	handle = srh->sr_dr_handlep;
+
+	switch (srh->sr_suspend_state) {
+	case DR_SRSTATE_FULL:
+
+		ASSERT(MUTEX_HELD(&cpu_lock));
+
+		/*
+		 * Prevent false alarm in tod_validate() due to tod
+		 * value change between suspend and resume
+		 */
+		mutex_enter(&tod_lock);
+		tod_status_set(TOD_DR_RESUME_DONE);
+		mutex_exit(&tod_lock);
+
+		dr_enable_intr(); 	/* enable intr & clock */
+
+		start_cpus();
+		mutex_exit(&cpu_lock);
+
+		/*
+		 * This should only be called if drmach_suspend_last()
+		 * was called and state transitioned to DR_SRSTATE_FULL
+		 * to prevent resume attempts on device instances that
+		 * were not previously suspended.
+		 */
+		drmach_resume_first();
+
+		/* FALLTHROUGH */
+
+	case DR_SRSTATE_DRIVER:
+		/*
+		 * resume drivers
+		 */
+		srh->sr_err_idx = 0;
+
+		/* no parent dip to hold busy */
+		dr_resume_devices(ddi_root_node(), srh);
+
+		if (srh->sr_err_idx && srh->sr_dr_handlep) {
+			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_RESUME,
+			    srh->sr_err_ints, srh->sr_err_idx, 1);
+		}
+
+		/*
+		 * resume the lock manager
+		 */
+		lm_cprresume();
+
+		/* FALLTHROUGH */
+
+	case DR_SRSTATE_USER:
+		/*
+		 * finally, resume user threads
+		 */
+		if (!dr_skip_user_threads) {
+			prom_printf("DR: resuming user threads...\n");
+			dr_start_user_threads();
+		}
+		/* FALLTHROUGH */
+
+	case DR_SRSTATE_BEGIN:
+	default:
+		/*
+		 * let those who care know that we've just resumed
+		 */
+		PR_QR("sending SIGTHAW...\n");
+		dr_signal_user(SIGTHAW);
+		break;
+	}
+
+	i_ndi_allow_device_tree_changes(handle->h_ndi);
+
+	prom_printf("DR: resume COMPLETED\n");
+}
+
+int
+dr_suspend(dr_sr_handle_t *srh)
+{
+	dr_handle_t	*handle;
+	int		force;
+	int		dev_errs_idx;
+	uint64_t	dev_errs[DR_MAX_ERR_INT];
+	int		rc = DDI_SUCCESS;
+
+	handle = srh->sr_dr_handlep;
+
+	force = dr_cmd_flags(handle) & SBD_FLAG_FORCE;
+
+	i_ndi_block_device_tree_changes(&handle->h_ndi);
+
+	prom_printf("\nDR: suspending user threads...\n");
+	srh->sr_suspend_state = DR_SRSTATE_USER;
+	if (((rc = dr_stop_user_threads(srh)) != DDI_SUCCESS) &&
+	    dr_check_user_stop_result) {
+		dr_resume(srh);
+		return (rc);
+	}
+
+	if (!force) {
+		struct dr_ref drc = {0};
+
+		prom_printf("\nDR: checking devices...\n");
+		dev_errs_idx = 0;
+
+		drc.arr = dev_errs;
+		drc.idx = &dev_errs_idx;
+		drc.len = DR_MAX_ERR_INT;
+
+		/*
+		 * Since the root node can never go away, it
+		 * doesn't have to be held.
+		 */
+		ddi_walk_devs(ddi_root_node(), dr_check_unsafe_major, &drc);
+		if (dev_errs_idx) {
+			handle->h_err = drerr_int(ESBD_UNSAFE, dev_errs,
+			    dev_errs_idx, 1);
+			dr_resume(srh);
+			return (DDI_FAILURE);
+		}
+		PR_QR("done\n");
+	} else {
+		prom_printf("\nDR: dr_suspend invoked with force flag\n");
+	}
+
+#ifndef	SKIP_SYNC
+	/*
+	 * This sync swap out all user pages
+	 */
+	vfs_sync(SYNC_ALL);
+#endif
+
+	/*
+	 * special treatment for lock manager
+	 */
+	lm_cprsuspend();
+
+#ifndef	SKIP_SYNC
+	/*
+	 * sync the file system in case we never make it back
+	 */
+	sync();
+#endif
+
+	/*
+	 * now suspend drivers
+	 */
+	prom_printf("DR: suspending drivers...\n");
+	srh->sr_suspend_state = DR_SRSTATE_DRIVER;
+	srh->sr_err_idx = 0;
+	/* No parent to hold busy */
+	if ((rc = dr_suspend_devices(ddi_root_node(), srh)) != DDI_SUCCESS) {
+		if (srh->sr_err_idx && srh->sr_dr_handlep) {
+			(srh->sr_dr_handlep)->h_err = drerr_int(ESBD_SUSPEND,
+			    srh->sr_err_ints, srh->sr_err_idx, 1);
+		}
+		dr_resume(srh);
+		return (rc);
+	}
+
+	drmach_suspend_last();
+
+	/*
+	 * finally, grab all cpus
+	 */
+	srh->sr_suspend_state = DR_SRSTATE_FULL;
+
+	mutex_enter(&cpu_lock);
+	pause_cpus(NULL);
+	dr_stop_intr();
+
+	return (rc);
+}
+
+int
+dr_pt_test_suspend(dr_handle_t *hp)
+{
+	dr_sr_handle_t *srh;
+	int		err;
+	uint_t		psmerr;
+	static fn_t	f = "dr_pt_test_suspend";
+
+	PR_QR("%s...\n", f);
+
+	srh = dr_get_sr_handle(hp);
+	if ((err = dr_suspend(srh)) == DDI_SUCCESS) {
+		dr_resume(srh);
+		if ((hp->h_err) && ((psmerr = hp->h_err->e_code) != 0)) {
+			PR_QR("%s: error on dr_resume()", f);
+			switch (psmerr) {
+			case ESBD_RESUME:
+				PR_QR("Couldn't resume devices: %s\n",
+				    DR_GET_E_RSC(hp->h_err));
+				break;
+
+			case ESBD_KTHREAD:
+				PR_ALL("psmerr is ESBD_KTHREAD\n");
+				break;
+			default:
+				PR_ALL("Resume error unknown = %d\n", psmerr);
+				break;
+			}
+		}
+	} else {
+		PR_ALL("%s: dr_suspend() failed, err = 0x%x\n", f, err);
+		psmerr = hp->h_err ? hp->h_err->e_code : ESBD_NOERROR;
+		switch (psmerr) {
+		case ESBD_UNSAFE:
+			PR_ALL("Unsafe devices (major #): %s\n",
+			    DR_GET_E_RSC(hp->h_err));
+			break;
+
+		case ESBD_RTTHREAD:
+			PR_ALL("RT threads (PIDs): %s\n",
+			    DR_GET_E_RSC(hp->h_err));
+			break;
+
+		case ESBD_UTHREAD:
+			PR_ALL("User threads (PIDs): %s\n",
+			    DR_GET_E_RSC(hp->h_err));
+			break;
+
+		case ESBD_SUSPEND:
+			PR_ALL("Non-suspendable devices (major #): %s\n",
+			    DR_GET_E_RSC(hp->h_err));
+			break;
+
+		case ESBD_RESUME:
+			PR_ALL("Could not resume devices (major #): %s\n",
+			    DR_GET_E_RSC(hp->h_err));
+			break;
+
+		case ESBD_KTHREAD:
+			PR_ALL("psmerr is ESBD_KTHREAD\n");
+			break;
+
+		case ESBD_NOERROR:
+			PR_ALL("sbd_error_t error code not set\n");
+			break;
+
+		default:
+			PR_ALL("Unknown error psmerr = %d\n", psmerr);
+			break;
+		}
+	}
+	dr_release_sr_handle(srh);
+
+	return (0);
+}
+
+/*
+ * Add a new integer value to the end of an array.  Don't allow duplicates to
+ * appear in the array, and don't allow the array to overflow.  Return the new
+ * total number of entries in the array.
+ */
+static int
+dr_add_int(uint64_t *arr, int idx, int len, uint64_t val)
+{
+	int i;
+
+	if (arr == NULL)
+		return (0);
+
+	if (idx >= len)
+		return (idx);
+
+	for (i = 0; i < idx; i++) {
+		if (arr[i] == val)
+			return (idx);
+	}
+
+	arr[idx++] = val;
+
+	return (idx);
+}
+
+/*
+ * Construct an sbd_error_t featuring a string representation of an array of
+ * integers as its e_rsc.
+ */
+static sbd_error_t *
+drerr_int(int e_code, uint64_t *arr, int idx, int majors)
+{
+	int		i, n, buf_len, buf_idx, buf_avail;
+	char		*dname;
+	char		*buf;
+	sbd_error_t	*new_sbd_err;
+	static char	s_ellipsis[] = "...";
+
+	if (arr == NULL || idx <= 0)
+		return (NULL);
+
+	/* MAXPATHLEN is the size of the e_rsc field in sbd_error_t. */
+	buf = (char *)kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+
+	/*
+	 * This is the total working area of the buffer.  It must be computed
+	 * as the size of 'buf', minus reserved space for the null terminator
+	 * and the ellipsis string.
+	 */
+	buf_len = MAXPATHLEN - (strlen(s_ellipsis) + 1);
+
+	/* Construct a string representation of the array values */
+	for (buf_idx = 0, i = 0; i < idx; i++) {
+		buf_avail = buf_len - buf_idx;
+		if (majors) {
+			dname = ddi_major_to_name(arr[i]);
+			if (dname) {
+				n = snprintf(&buf[buf_idx], buf_avail, "%s, ",
+				    dname);
+			} else {
+				n = snprintf(&buf[buf_idx], buf_avail,
+				    "major %" PRIu64 ", ", arr[i]);
+			}
+		} else {
+			n = snprintf(&buf[buf_idx], buf_avail, "%" PRIu64 ", ",
+			    arr[i]);
+		}
+
+		/* An ellipsis gets appended when no more values fit */
+		if (n >= buf_avail) {
+			(void) strcpy(&buf[buf_idx], s_ellipsis);
+			break;
+		}
+
+		buf_idx += n;
+	}
+
+	/* If all the contents fit, remove the trailing comma */
+	if (n < buf_avail) {
+		buf[--buf_idx] = '\0';
+		buf[--buf_idx] = '\0';
+	}
+
+	/* Return an sbd_error_t with the buffer and e_code */
+	new_sbd_err = drerr_new(1, e_code, buf);
+	kmem_free(buf, MAXPATHLEN);
+	return (new_sbd_err);
+}
diff --git a/usr/src/uts/i86pc/io/dr/dr_util.c b/usr/src/uts/i86pc/io/dr/dr_util.c
new file mode 100644
index 0000000000..d60dc7f7b8
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/dr_util.c
@@ -0,0 +1,457 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/param.h>		/* for NULL */
+#include <sys/sbd_ioctl.h>
+#include <sys/dr_util.h>
+#include <sys/varargs.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+
+/* sbd_etab[] and sbd_etab_len provided by sbdgenerr.pl */
+extern sbd_etab_t sbd_etab[];
+extern int sbd_etab_len;
+
+sbd_error_t *
+sbd_err_new(int e_code, char *fmt, va_list args)
+{
+	sbd_error_t	*new;
+
+	new = GETSTRUCT(sbd_error_t, 1);
+	new->e_code = e_code;
+
+	if (fmt)
+		(void) vsnprintf(new->e_rsc, sizeof (new->e_rsc), fmt, args);
+
+	return (new);
+}
+
+void
+sbd_err_log(sbd_error_t *ep, int ce)
+{
+	char		 buf[32];
+	char		*fmt;
+	char		*txt;
+	int		 i;
+	sbd_etab_t	*tp;
+
+	if (!ep)
+		return;
+
+	if (ep->e_rsc[0] == '\0')
+		fmt = "%s";
+	else
+		fmt = "%s: %s";
+
+	for (tp = sbd_etab, i = 0; i < sbd_etab_len; i++, tp++)
+		if (ep->e_code >= tp->t_base && ep->e_code <= tp->t_bnd)
+			break;
+
+	if (i < sbd_etab_len)
+		txt = tp->t_text[ep->e_code - tp->t_base];
+	else {
+		(void) snprintf(buf, sizeof (buf), "error %d", ep->e_code);
+		txt = buf;
+	}
+
+	cmn_err(ce, fmt, txt, ep->e_rsc);
+}
+
+void
+sbd_err_clear(sbd_error_t **ep)
+{
+	FREESTRUCT(*ep, sbd_error_t, 1);
+	*ep = NULL;
+}
+
+void
+sbd_err_set_c(sbd_error_t **ep, int ce, int e_code, char *fmt, ...)
+{
+	sbd_error_t	*tmp;
+	va_list		args;
+
+	va_start(args, fmt);
+
+	tmp = sbd_err_new(e_code, fmt, args);
+
+	sbd_err_log(tmp, ce);
+
+	if (*ep == NULL)
+		*ep = tmp;
+	else
+		sbd_err_clear(&tmp);
+
+	va_end(args);
+}
+
+void
+sbd_err_set(sbd_error_t **ep, int ce, int e_code, char *fmt, ...)
+{
+	sbd_error_t	*tmp;
+	va_list		args;
+
+	va_start(args, fmt);
+
+	tmp = sbd_err_new(e_code, fmt, args);
+
+	sbd_err_log(tmp, ce);
+
+	*ep = tmp;
+
+	va_end(args);
+}
+
+sbd_error_t *
+drerr_new_v(int e_code, char *fmt, va_list args)
+{
+	return (sbd_err_new(e_code, fmt, args));
+}
+
+sbd_error_t *
+drerr_new(int log, int e_code, char *fmt, ...)
+{
+	sbd_error_t	*ep;
+	va_list		 args;
+
+	va_start(args, fmt);
+	ep = sbd_err_new(e_code, fmt, args);
+	va_end(args);
+
+	if (log)
+		sbd_err_log(ep, CE_WARN);
+
+	return (ep);
+}
+
+void
+drerr_set_c(int log, sbd_error_t **ep, int e_code, char *fmt, ...)
+{
+	sbd_error_t	*err;
+	va_list		 args;
+
+	va_start(args, fmt);
+	err = sbd_err_new(e_code, fmt, args);
+	va_end(args);
+
+	if (log)
+		sbd_err_log(err, CE_WARN);
+
+	if (*ep == NULL)
+		*ep = err;
+	else
+		sbd_err_clear(&err);
+}
+
+
+/*
+ * Memlist support.
+ */
+void
+dr_memlist_delete(struct memlist *mlist)
+{
+	register struct memlist	*ml;
+
+	for (ml = mlist; ml; ml = mlist) {
+		mlist = ml->ml_next;
+		FREESTRUCT(ml, struct memlist, 1);
+	}
+}
+
+int
+dr_memlist_intersect(struct memlist *al, struct memlist *bl)
+{
+	uint64_t	astart, aend, bstart, bend;
+
+	if ((al == NULL) || (bl == NULL))
+		return (0);
+
+	aend = al->ml_address + al->ml_size;
+	bstart = bl->ml_address;
+	bend = bl->ml_address + bl->ml_size;
+
+	while (al && bl) {
+		while (al && (aend <= bstart))
+			if ((al = al->ml_next) != NULL)
+				aend = al->ml_address + al->ml_size;
+		if (al == NULL)
+			return (0);
+
+		if ((astart = al->ml_address) <= bstart)
+			return (1);
+
+		while (bl && (bend <= astart))
+			if ((bl = bl->ml_next) != NULL)
+				bend = bl->ml_address + bl->ml_size;
+		if (bl == NULL)
+			return (0);
+
+		if ((bstart = bl->ml_address) <= astart)
+			return (1);
+	}
+
+	return (0);
+}
+
+void
+dr_memlist_coalesce(struct memlist *mlist)
+{
+	uint64_t	end, nend;
+
+	if ((mlist == NULL) || (mlist->ml_next == NULL))
+		return;
+
+	while (mlist->ml_next) {
+		end = mlist->ml_address + mlist->ml_size;
+		if (mlist->ml_next->ml_address <= end) {
+			struct memlist 	*nl;
+
+			nend = mlist->ml_next->ml_address +
+			    mlist->ml_next->ml_size;
+			if (nend > end)
+				mlist->ml_size += (nend - end);
+			nl = mlist->ml_next;
+			mlist->ml_next = mlist->ml_next->ml_next;
+			if (nl) {
+				FREESTRUCT(nl, struct memlist, 1);
+			}
+			if (mlist->ml_next)
+				mlist->ml_next->ml_prev = mlist;
+		} else {
+			mlist = mlist->ml_next;
+		}
+	}
+}
+
+#ifdef DEBUG
+void
+memlist_dump(struct memlist *mlist)
+{
+	register struct memlist *ml;
+
+	if (mlist == NULL)
+		printf("memlist> EMPTY\n");
+	else for (ml = mlist; ml; ml = ml->ml_next)
+		printf("memlist> 0x%" PRIx64 ", 0x%" PRIx64 "\n",
+		    ml->ml_address, ml->ml_size);
+}
+#endif
+
+struct memlist *
+dr_memlist_dup(struct memlist *mlist)
+{
+	struct memlist *hl = NULL, *tl, **mlp;
+
+	if (mlist == NULL)
+		return (NULL);
+
+	mlp = &hl;
+	tl = *mlp;
+	for (; mlist; mlist = mlist->ml_next) {
+		*mlp = GETSTRUCT(struct memlist, 1);
+		(*mlp)->ml_address = mlist->ml_address;
+		(*mlp)->ml_size = mlist->ml_size;
+		(*mlp)->ml_prev = tl;
+		tl = *mlp;
+		mlp = &((*mlp)->ml_next);
+	}
+	*mlp = NULL;
+
+	return (hl);
+}
+
+struct memlist *
+dr_memlist_add_span(struct memlist *mlist, uint64_t base, uint64_t len)
+{
+	struct memlist	*ml, *tl, *nl;
+
+	if (len == 0ull)
+		return (NULL);
+
+	if (mlist == NULL) {
+		mlist = GETSTRUCT(struct memlist, 1);
+		mlist->ml_address = base;
+		mlist->ml_size = len;
+		mlist->ml_next = mlist->ml_prev = NULL;
+
+		return (mlist);
+	}
+
+	for (tl = ml = mlist; ml; tl = ml, ml = ml->ml_next) {
+		if (base < ml->ml_address) {
+			if ((base + len) < ml->ml_address) {
+				nl = GETSTRUCT(struct memlist, 1);
+				nl->ml_address = base;
+				nl->ml_size = len;
+				nl->ml_next = ml;
+				if ((nl->ml_prev = ml->ml_prev) != NULL)
+					nl->ml_prev->ml_next = nl;
+				ml->ml_prev = nl;
+				if (mlist == ml)
+					mlist = nl;
+			} else {
+				ml->ml_size = MAX((base + len),
+				    (ml->ml_address + ml->ml_size)) - base;
+				ml->ml_address = base;
+			}
+			break;
+
+		} else if (base <= (ml->ml_address + ml->ml_size)) {
+			ml->ml_size = MAX((base + len),
+			    (ml->ml_address + ml->ml_size)) -
+			    MIN(ml->ml_address, base);
+			ml->ml_address = MIN(ml->ml_address, base);
+			break;
+		}
+	}
+	if (ml == NULL) {
+		nl = GETSTRUCT(struct memlist, 1);
+		nl->ml_address = base;
+		nl->ml_size = len;
+		nl->ml_next = NULL;
+		nl->ml_prev = tl;
+		tl->ml_next = nl;
+	}
+
+	dr_memlist_coalesce(mlist);
+
+	return (mlist);
+}
+
+struct memlist *
+dr_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len)
+{
+	uint64_t	end;
+	struct memlist	*ml, *tl, *nlp;
+
+	if (mlist == NULL)
+		return (NULL);
+
+	end = base + len;
+	if ((end <= mlist->ml_address) || (base == end))
+		return (mlist);
+
+	for (tl = ml = mlist; ml; tl = ml, ml = nlp) {
+		uint64_t	mend;
+
+		nlp = ml->ml_next;
+
+		if (end <= ml->ml_address)
+			break;
+
+		mend = ml->ml_address + ml->ml_size;
+		if (base < mend) {
+			if (base <= ml->ml_address) {
+				ml->ml_address = end;
+				if (end >= mend)
+					ml->ml_size = 0ull;
+				else
+					ml->ml_size = mend - ml->ml_address;
+			} else {
+				ml->ml_size = base - ml->ml_address;
+				if (end < mend) {
+					struct memlist	*nl;
+					/*
+					 * splitting an memlist entry.
+					 */
+					nl = GETSTRUCT(struct memlist, 1);
+					nl->ml_address = end;
+					nl->ml_size = mend - nl->ml_address;
+					if ((nl->ml_next = nlp) != NULL)
+						nlp->ml_prev = nl;
+					nl->ml_prev = ml;
+					ml->ml_next = nl;
+					nlp = nl;
+				}
+			}
+			if (ml->ml_size == 0ull) {
+				if (ml == mlist) {
+					if ((mlist = nlp) != NULL)
+						nlp->ml_prev = NULL;
+					FREESTRUCT(ml, struct memlist, 1);
+					if (mlist == NULL)
+						break;
+					ml = nlp;
+				} else {
+					if ((tl->ml_next = nlp) != NULL)
+						nlp->ml_prev = tl;
+					FREESTRUCT(ml, struct memlist, 1);
+					ml = tl;
+				}
+			}
+		}
+	}
+
+	return (mlist);
+}
+
+/*
+ * add span without merging
+ */
+struct memlist *
+dr_memlist_cat_span(struct memlist *mlist, uint64_t base, uint64_t len)
+{
+	struct memlist	*ml, *tl, *nl;
+
+	if (len == 0ull)
+		return (NULL);
+
+	if (mlist == NULL) {
+		mlist = GETSTRUCT(struct memlist, 1);
+		mlist->ml_address = base;
+		mlist->ml_size = len;
+		mlist->ml_next = mlist->ml_prev = NULL;
+
+		return (mlist);
+	}
+
+	for (tl = ml = mlist; ml; tl = ml, ml = ml->ml_next) {
+		if (base < ml->ml_address) {
+			nl = GETSTRUCT(struct memlist, 1);
+			nl->ml_address = base;
+			nl->ml_size = len;
+			nl->ml_next = ml;
+			if ((nl->ml_prev = ml->ml_prev) != NULL)
+				nl->ml_prev->ml_next = nl;
+			ml->ml_prev = nl;
+			if (mlist == ml)
+				mlist = nl;
+			break;
+		}
+	}
+
+	if (ml == NULL) {
+		nl = GETSTRUCT(struct memlist, 1);
+		nl->ml_address = base;
+		nl->ml_size = len;
+		nl->ml_next = NULL;
+		nl->ml_prev = tl;
+		tl->ml_next = nl;
+	}
+
+	return (mlist);
+}
diff --git a/usr/src/uts/i86pc/io/dr/sbdgenerr.pl b/usr/src/uts/i86pc/io/dr/sbdgenerr.pl
new file mode 100644
index 0000000000..9980c96459
--- /dev/null
+++ b/usr/src/uts/i86pc/io/dr/sbdgenerr.pl
@@ -0,0 +1,90 @@
+#!/usr/bin/perl
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2000 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+sub trim {
+	my ($line) = @_;
+	$line =~ s#/\*|\*/##g;
+	$line =~ s#^\s+|\s+$##g;
+	return $line;
+}
+
+my $filter = 0;
+my %prefix;
+while ($#ARGV >= 0) {
+	$prefix{$ARGV[0]} = 0;
+	shift @ARGV;
+	$filter = 1;
+}
+
+my $base;
+my $bnd;
+my @text;
+my @sets;
+while (<STDIN>) {
+	my $n = m@^#define\s(E\w\w\w)\w+\s+(\d+)(.*)@;
+	next unless ($n > 0);
+	next unless ($filter == 0 || defined $prefix{$1});
+	my $txt = trim($3);
+	if (length($txt) == 0) {
+		my $l = <STDIN>;
+		$txt = trim($l);
+	}
+
+	$base = $2 if (!defined $base);
+	if (defined $bnd && $2 != $bnd + 1) {
+		push(@sets, { base => $base, bnd => $bnd });
+		$base = $2;
+	}
+	$bnd = $2;
+	push(@text, $txt);
+}
+
+push(@sets, { base => $base, bnd => $bnd });
+
+printf "#include <sys/sbd_ioctl.h>\n";
+
+my $i = 0;
+my $s = 0;
+do {
+	my $set = $sets[$s];
+
+	printf "static char *sbd_t%d[] = {\n", $set->{base};
+	my $n = $set->{bnd} - $set->{base} + 1;
+	while ($n--) {
+		printf "\t\"%s\",\n", $text[$i++];
+	}
+	printf "};\n";
+} while (++$s <= $#sets);
+
+printf "sbd_etab_t sbd_etab[] = {\n";
+$s = 0;
+do {
+	my $set = $sets[$s];
+	printf "\t{ %d, %d, sbd_t%d },\n",
+		$set->{base}, $set->{bnd}, $set->{base};
+} while (++$s <= $#sets);
+printf "};\n";
+printf "int sbd_etab_len = %d;\n", $s;
diff --git a/usr/src/uts/i86pc/io/mp_platform_common.c b/usr/src/uts/i86pc/io/mp_platform_common.c
index 319193bcc0..a3afe84d07 100644
--- a/usr/src/uts/i86pc/io/mp_platform_common.c
+++ b/usr/src/uts/i86pc/io/mp_platform_common.c
@@ -22,6 +22,10 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 /*
  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
@@ -29,8 +33,9 @@
  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
  * PSMI 1.5 extensions are supported in Solaris Nevada.
  * PSMI 1.6 extensions are supported in Solaris Nevada.
+ * PSMI 1.7 extensions are supported in Solaris Nevada.
  */
-#define	PSMI_1_6
+#define	PSMI_1_7
 
 #include <sys/processor.h>
 #include <sys/time.h>
@@ -93,6 +98,7 @@ static int apic_setup_irq_table(dev_info_t *dip, int irqno,
     struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp,
     int type);
 static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp);
+static void apic_free_apic_cpus(void);
 static void apic_try_deferred_reprogram(int ipl, int vect);
 static void delete_defer_repro_ent(int which_irq);
 static void apic_ioapic_wait_pending_clear(int ioapicindex,
@@ -194,6 +200,11 @@ int	apic_redist_cpu_skip = 0;
 int	apic_num_imbalance = 0;
 int	apic_num_rebind = 0;
 
+/*
+ * Maximum number of APIC CPUs in the system, -1 indicates that dynamic
+ * allocation of CPU ids is disabled.
+ */
+int 	apic_max_nproc = -1;
 int	apic_nproc = 0;
 size_t	apic_cpus_size = 0;
 int	apic_defconf = 0;
@@ -597,6 +608,16 @@ apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp)
 	}
 }
 
+static void
+apic_free_apic_cpus(void)
+{
+	if (apic_cpus != NULL) {
+		kmem_free(apic_cpus, apic_cpus_size);
+		apic_cpus = NULL;
+		apic_cpus_size = 0;
+	}
+}
+
 static int
 acpi_probe(char *modname)
 {
@@ -639,7 +660,6 @@ acpi_probe(char *modname)
 	id = apic_reg_ops->apic_read(APIC_LID_REG);
 	local_ids[0] = (uchar_t)(id >> 24);
 	apic_nproc = index = 1;
-	CPUSET_ONLY(apic_cpumask, 0);
 	apic_io_max = 0;
 
 	ap = (ACPI_SUBTABLE_HEADER *) (acpi_mapic_dtp + 1);
@@ -652,16 +672,12 @@ acpi_probe(char *modname)
 			mpa = (ACPI_MADT_LOCAL_APIC *) ap;
 			if (mpa->LapicFlags & ACPI_MADT_ENABLED) {
 				if (mpa->Id == local_ids[0]) {
+					ASSERT(index == 1);
 					proc_ids[0] = mpa->ProcessorId;
-					(void) acpica_map_cpu(0,
-					    mpa->ProcessorId);
 				} else if (apic_nproc < NCPU && use_mp &&
 				    apic_nproc < boot_ncpus) {
 					local_ids[index] = mpa->Id;
 					proc_ids[index] = mpa->ProcessorId;
-					CPUSET_ADD(apic_cpumask, index);
-					(void) acpica_map_cpu(index,
-					    mpa->ProcessorId);
 					index++;
 					apic_nproc++;
 				} else if (apic_nproc == NCPU && !warned) {
@@ -770,9 +786,7 @@ acpi_probe(char *modname)
 				if (apic_nproc < NCPU && use_mp &&
 				    apic_nproc < boot_ncpus) {
 					local_ids[index] = mpx2a->LocalApicId;
-					CPUSET_ADD(apic_cpumask, index);
-					(void) acpica_map_cpu(index,
-					    mpx2a->Uid);
+					proc_ids[index] = mpa->ProcessorId;
 					index++;
 					apic_nproc++;
 				} else if (apic_nproc == NCPU && !warned) {
@@ -813,7 +827,14 @@ acpi_probe(char *modname)
 		ap = (ACPI_SUBTABLE_HEADER *)(((char *)ap) + ap->Length);
 	}
 
-	apic_cpus_size = apic_nproc * sizeof (*apic_cpus);
+	/*
+	 * allocate enough space for possible hot-adding of CPUs.
+	 * max_ncpus may be less than apic_nproc if it's set by user.
+	 */
+	if (plat_dr_support_cpu()) {
+		apic_max_nproc = max_ncpus;
+	}
+	apic_cpus_size = max(apic_nproc, max_ncpus) * sizeof (*apic_cpus);
 	if ((apic_cpus = kmem_zalloc(apic_cpus_size, KM_NOSLEEP)) == NULL)
 		goto cleanup;
 
@@ -825,6 +846,47 @@ acpi_probe(char *modname)
 	for (i = 0; i < apic_nproc; i++) {
 		apic_cpus[i].aci_local_id = local_ids[i];
 		apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF);
+		apic_cpus[i].aci_processor_id = proc_ids[i];
+		/* Only build mapping info for CPUs present at boot. */
+		if (i < boot_ncpus)
+			(void) acpica_map_cpu(i, proc_ids[i]);
+	}
+
+	/*
+	 * To support CPU dynamic reconfiguration, the apic CPU info structure
+	 * for each possible CPU will be pre-allocated at boot time.
+	 * The state for each apic CPU info structure will be assigned according
+	 * to the following rules:
+	 * Rule 1:
+	 * 	Slot index range: [0, min(apic_nproc, boot_ncpus))
+	 *	State flags: 0
+	 *	Note: cpu exists and will be configured/enabled at boot time
+	 * Rule 2:
+	 * 	Slot index range: [boot_ncpus, apic_nproc)
+	 *	State flags: APIC_CPU_FREE | APIC_CPU_DIRTY
+	 *	Note: cpu exists but won't be configured/enabled at boot time
+	 * Rule 3:
+	 * 	Slot index range: [apic_nproc, boot_ncpus)
+	 *	State flags: APIC_CPU_FREE
+	 *	Note: cpu doesn't exist at boot time
+	 * Rule 4:
+	 * 	Slot index range: [max(apic_nproc, boot_ncpus), max_ncpus)
+	 *	State flags: APIC_CPU_FREE
+	 *	Note: cpu doesn't exist at boot time
+	 */
+	CPUSET_ZERO(apic_cpumask);
+	for (i = 0; i < min(boot_ncpus, apic_nproc); i++) {
+		CPUSET_ADD(apic_cpumask, i);
+		apic_cpus[i].aci_status = 0;
+	}
+	for (i = boot_ncpus; i < apic_nproc; i++) {
+		apic_cpus[i].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
+	}
+	for (i = apic_nproc; i < boot_ncpus; i++) {
+		apic_cpus[i].aci_status = APIC_CPU_FREE;
+	}
+	for (i = max(boot_ncpus, apic_nproc); i < max_ncpus; i++) {
+		apic_cpus[i].aci_status = APIC_CPU_FREE;
 	}
 
 	for (i = 0; i < apic_io_max; i++) {
@@ -926,10 +988,12 @@ acpi_probe(char *modname)
 	/* if setting APIC mode failed above, we fall through to cleanup */
 
 cleanup:
+	apic_free_apic_cpus();
 	if (apicadr != NULL) {
 		mapout_apic((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
 		apicadr = NULL;
 	}
+	apic_max_nproc = -1;
 	apic_nproc = 0;
 	for (i = 0; i < apic_io_max; i++) {
 		mapout_ioapic((caddr_t)apicioadr[i], APIC_IO_MEMLEN);
@@ -954,6 +1018,11 @@ apic_handle_defconf()
 {
 	uint_t	lid;
 
+	/* Failed to probe ACPI MADT tables, disable CPU DR. */
+	apic_max_nproc = -1;
+	apic_free_apic_cpus();
+	plat_dr_disable_cpu();
+
 	/*LINTED: pointer cast may result in improper alignment */
 	apicioadr[0] = mapin_ioapic(APIC_IO_ADDR,
 	    APIC_IO_MEMLEN, PROT_READ | PROT_WRITE);
@@ -998,8 +1067,7 @@ apic_handle_defconf()
 	return (PSM_SUCCESS);
 
 apic_handle_defconf_fail:
-	if (apic_cpus)
-		kmem_free(apic_cpus, apic_cpus_size);
+	apic_free_apic_cpus();
 	if (apicadr)
 		mapout_apic((caddr_t)apicadr, APIC_LOCAL_MEMLEN);
 	if (apicioadr[0])
@@ -1187,7 +1255,51 @@ apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics)
 boolean_t
 apic_cpu_in_range(int cpu)
 {
-	return ((cpu & ~IRQ_USER_BOUND) < apic_nproc);
+	cpu &= ~IRQ_USER_BOUND;
+	/* Check whether cpu id is in valid range. */
+	if (cpu < 0 || cpu >= apic_nproc) {
+		return (B_FALSE);
+	} else if (apic_max_nproc != -1 && cpu >= apic_max_nproc) {
+		/*
+		 * Check whether cpuid is in valid range if CPU DR is enabled.
+		 */
+		return (B_FALSE);
+	} else if (!CPU_IN_SET(apic_cpumask, cpu)) {
+		return (B_FALSE);
+	}
+
+	return (B_TRUE);
+}
+
+/*
+ * Must be called with interrupts disabled and the apic_ioapic_lock held.
+ */
+processorid_t
+apic_find_next_cpu_intr(void)
+{
+	int i, count;
+	processorid_t cpuid = 0;
+
+	ASSERT(LOCK_HELD(&apic_ioapic_lock));
+
+	/*
+	 * Find next CPU with INTR_ENABLE flag set.
+	 * Assume that there is at least one CPU with interrupts enabled.
+	 */
+	for (count = 0; count < apic_nproc; count++) {
+		if (apic_next_bind_cpu >= apic_nproc) {
+			apic_next_bind_cpu = 0;
+		}
+		i = apic_next_bind_cpu++;
+		if (apic_cpu_in_range(i) &&
+		    (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) {
+			cpuid = i;
+			break;
+		}
+	}
+	ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_INTR_ENABLE) != 0);
+
+	return (cpuid);
 }
 
 uint16_t
@@ -1770,7 +1882,7 @@ apic_delspl_common(int irqno, int ipl, int min_ipl, int max_ipl)
 		bind_cpu = irqptr->airq_temp_cpu;
 		if (((uint32_t)bind_cpu != IRQ_UNBOUND) &&
 		    ((uint32_t)bind_cpu != IRQ_UNINIT)) {
-			ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc);
+			ASSERT(apic_cpu_in_range(bind_cpu));
 			if (bind_cpu & IRQ_USER_BOUND) {
 				/* If hardbound, temp_cpu == cpu */
 				bind_cpu &= ~IRQ_USER_BOUND;
@@ -2392,6 +2504,7 @@ apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin)
 	major_t	major;
 	char	*name, *drv_name, *prop_val, *cptr;
 	char	prop_name[32];
+	ulong_t	iflag;
 
 
 	if (apic_intr_policy == INTR_LOWEST_PRIORITY)
@@ -2475,26 +2588,28 @@ apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin)
 				i++;
 		bind_cpu = stoi(&cptr);
 		kmem_free(prop_val, prop_len);
-		/* if specific cpu is bogus, then default to cpu 0 */
-		if (bind_cpu >= apic_nproc) {
+		/* if specific CPU is bogus, then default to next cpu */
+		if (!apic_cpu_in_range(bind_cpu)) {
 			cmn_err(CE_WARN, "%s: %s=%s: CPU %d not present",
 			    psm_name, prop_name, prop_val, bind_cpu);
-			bind_cpu = 0;
+			rc = DDI_PROP_NOT_FOUND;
 		} else {
 			/* indicate that we are bound at user request */
 			bind_cpu |= IRQ_USER_BOUND;
 		}
 		/*
-		 * no need to check apic_cpus[].aci_status, if specific cpu is
+		 * no need to check apic_cpus[].aci_status, if specific CPU is
 		 * not up, then post_cpu_start will handle it.
 		 */
-	} else {
-		bind_cpu = apic_next_bind_cpu++;
-		if (bind_cpu >= apic_nproc) {
-			apic_next_bind_cpu = 1;
-			bind_cpu = 0;
-		}
 	}
+	if (rc != DDI_PROP_SUCCESS) {
+		iflag = intr_clear();
+		lock_set(&apic_ioapic_lock);
+		bind_cpu = apic_find_next_cpu_intr();
+		lock_clear(&apic_ioapic_lock);
+		intr_restore(iflag);
+	}
+
 	if (drv_name != NULL)
 		cmn_err(CE_CONT, "!%s: %s (%s) instance %d irq 0x%x "
 		    "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n",
@@ -2925,7 +3040,7 @@ apic_rebind(apic_irq_t *irq_ptr, int bind_cpu,
 			/* Mask off high bit so it can be used as array index */
 			airq_temp_cpu &= ~IRQ_USER_BOUND;
 
-		ASSERT(airq_temp_cpu < apic_nproc);
+		ASSERT(apic_cpu_in_range(airq_temp_cpu));
 	}
 
 	/*
@@ -2999,7 +3114,7 @@ apic_rebind(apic_irq_t *irq_ptr, int bind_cpu,
 	} else {
 		cpu_infop->aci_temp_bound++;
 	}
-	ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc);
+	ASSERT(apic_cpu_in_range(bind_cpu));
 
 	if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) {
 		apic_cpus[airq_temp_cpu].aci_temp_bound--;
@@ -3477,7 +3592,8 @@ apic_intr_redistribute()
 	 * we are consistent.
 	 */
 	for (i = 0; i < apic_nproc; i++) {
-		if (!(apic_redist_cpu_skip & (1 << i)) &&
+		if (apic_cpu_in_range(i) &&
+		    !(apic_redist_cpu_skip & (1 << i)) &&
 		    (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) {
 
 			cpu_infop = &apic_cpus[i];
@@ -3639,7 +3755,9 @@ apic_intr_redistribute()
 			apic_redist_cpu_skip = 0;
 	}
 	for (i = 0; i < apic_nproc; i++) {
-		apic_cpus[i].aci_busy = 0;
+		if (apic_cpu_in_range(i)) {
+			apic_cpus[i].aci_busy = 0;
+		}
 	}
 }
 
@@ -3650,7 +3768,9 @@ apic_cleanup_busy()
 	apic_irq_t *irq_ptr;
 
 	for (i = 0; i < apic_nproc; i++) {
-		apic_cpus[i].aci_busy = 0;
+		if (apic_cpu_in_range(i)) {
+			apic_cpus[i].aci_busy = 0;
+		}
 	}
 
 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
diff --git a/usr/src/uts/i86pc/io/pcplusmp/apic.c b/usr/src/uts/i86pc/io/pcplusmp/apic.c
index 939ac73dab..13ceaef674 100644
--- a/usr/src/uts/i86pc/io/pcplusmp/apic.c
+++ b/usr/src/uts/i86pc/io/pcplusmp/apic.c
@@ -23,6 +23,10 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 /*
  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
@@ -30,8 +34,9 @@
  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
  * PSMI 1.5 extensions are supported in Solaris Nevada.
  * PSMI 1.6 extensions are supported in Solaris Nevada.
+ * PSMI 1.7 extensions are supported in Solaris Nevada.
  */
-#define	PSMI_1_6
+#define	PSMI_1_7
 
 #include <sys/processor.h>
 #include <sys/time.h>
@@ -88,7 +93,11 @@ static hrtime_t apic_gettime();
 static hrtime_t apic_gethrtime();
 static void	apic_init();
 static void	apic_picinit(void);
-static int	apic_cpu_start(processorid_t, caddr_t);
+static int	apic_cpu_start(processorid_t cpuid, caddr_t ctx);
+static int	apic_cpu_stop(processorid_t cpuid, caddr_t ctx);
+static int	apic_cpu_add(psm_cpu_request_t *reqp);
+static int	apic_cpu_remove(psm_cpu_request_t *reqp);
+static int	apic_cpu_ops(psm_cpu_request_t *reqp);
 static int	apic_post_cpu_start(void);
 static void	apic_send_ipi(int cpun, int ipl);
 static void	apic_set_idlecpu(processorid_t cpun);
@@ -96,6 +105,7 @@ static void	apic_unset_idlecpu(processorid_t cpun);
 static int	apic_intr_enter(int ipl, int *vect);
 static void	apic_setspl(int ipl);
 static void	x2apic_setspl(int ipl);
+static void	apic_switch_ipi_callback(boolean_t enter);
 static int	apic_addspl(int ipl, int vector, int min_ipl, int max_ipl);
 static int	apic_delspl(int ipl, int vector, int min_ipl, int max_ipl);
 static void	apic_shutdown(int cmd, int fcn);
@@ -137,6 +147,9 @@ int apic_error_display_delay = 100;
 int apic_cpcovf_vect;
 int apic_enable_cpcovf_intr = 1;
 
+/* maximum loop count when sending Start IPIs. */
+int apic_sipi_max_loop_count = 0x1000;
+
 /* vector at which CMCI interrupts come in */
 int apic_cmci_vect;
 extern int cmi_enable_cmci;
@@ -145,6 +158,10 @@ extern void cmi_cmci_trap(void);
 static kmutex_t cmci_cpu_setup_lock;	/* protects cmci_cpu_setup_registered */
 static int cmci_cpu_setup_registered;
 
+/* number of CPUs in power-on transition state */
+static int apic_poweron_cnt = 0;
+static lock_t apic_mode_switch_lock;
+
 /*
  * The following vector assignments influence the value of ipltopri and
  * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program
@@ -209,6 +226,7 @@ int	apic_verbose = 0;
 
 /* minimum number of timer ticks to program to */
 int apic_min_timer_ticks = 1;
+
 /*
  *	Local static data
  */
@@ -252,11 +270,12 @@ static struct	psm_ops apic_ops = {
 	apic_preshutdown,
 	apic_intr_ops,			/* Advanced DDI Interrupt framework */
 	apic_state,			/* save, restore apic state for S3 */
+	apic_cpu_ops,			/* CPU control interface. */
 };
 
 
 static struct	psm_info apic_psm_info = {
-	PSM_INFO_VER01_6,			/* version */
+	PSM_INFO_VER01_7,			/* version */
 	PSM_OWN_EXCLUSIVE,			/* ownership */
 	(struct psm_ops *)&apic_ops,		/* operation */
 	APIC_PCPLUSMP_NAME,			/* machine name */
@@ -786,6 +805,7 @@ apic_picinit(void)
 	LOCK_INIT_CLEAR(&apic_gethrtime_lock);
 	LOCK_INIT_CLEAR(&apic_ioapic_lock);
 	LOCK_INIT_CLEAR(&apic_error_lock);
+	LOCK_INIT_CLEAR(&apic_mode_switch_lock);
 
 	picsetup();	 /* initialise the 8259 */
 
@@ -814,29 +834,27 @@ apic_picinit(void)
 	ioapic_init_intr(IOAPIC_MASK);
 }
 
-
-/*ARGSUSED1*/
-static int
-apic_cpu_start(processorid_t cpun, caddr_t arg)
+static void
+apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
 {
 	int		loop_count;
 	uint32_t	vector;
-	uint_t		cpu_id;
+	uint_t		apicid;
 	ulong_t		iflag;
 
-	cpu_id =  apic_cpus[cpun].aci_local_id;
-
-	apic_cmos_ssb_set = 1;
+	apicid =  apic_cpus[cpun].aci_local_id;
 
 	/*
-	 * Interrupts on BSP cpu will be disabled during these startup
+	 * Interrupts on current CPU will be disabled during the
 	 * steps in order to avoid unwanted side effects from
 	 * executing interrupt handlers on a problematic BIOS.
 	 */
-
 	iflag = intr_clear();
-	outb(CMOS_ADDR, SSB);
-	outb(CMOS_DATA, BIOS_SHUTDOWN);
+
+	if (start) {
+		outb(CMOS_ADDR, SSB);
+		outb(CMOS_DATA, BIOS_SHUTDOWN);
+	}
 
 	/*
 	 * According to X2APIC specification in section '2.3.5.1' of
@@ -860,10 +878,10 @@ apic_cpu_start(processorid_t cpun, caddr_t arg)
 
 	/* for integrated - make sure there is one INIT IPI in buffer */
 	/* for external - it will wake up the cpu */
-	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_ASSERT | AV_RESET);
+	apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
 
 	/* If only 1 CPU is installed, PENDING bit will not go low */
-	for (loop_count = 0x1000; loop_count; loop_count--) {
+	for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
 		if (apic_mode == LOCAL_APIC &&
 		    apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
 			apic_ret();
@@ -871,8 +889,7 @@ apic_cpu_start(processorid_t cpun, caddr_t arg)
 			break;
 	}
 
-	apic_reg_ops->apic_write_int_cmd(cpu_id, AV_DEASSERT | AV_RESET);
-
+	apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
 	drv_usecwait(20000);		/* 20 milli sec */
 
 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
@@ -882,18 +899,111 @@ apic_cpu_start(processorid_t cpun, caddr_t arg)
 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
 
 		/* to offset the INIT IPI queue up in the buffer */
-		apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
-
+		apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
 		drv_usecwait(200);		/* 20 micro sec */
 
-		apic_reg_ops->apic_write_int_cmd(cpu_id, vector | AV_STARTUP);
-
-		drv_usecwait(200);		/* 20 micro sec */
+		/*
+		 * send the second SIPI (Startup IPI) as recommended by Intel
+		 * software development manual.
+		 */
+		apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
+		drv_usecwait(200);	/* 20 micro sec */
 	}
+
 	intr_restore(iflag);
+}
+
+/*ARGSUSED1*/
+static int
+apic_cpu_start(processorid_t cpun, caddr_t arg)
+{
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	if (!apic_cpu_in_range(cpun)) {
+		return (EINVAL);
+	}
+
+	/*
+	 * Switch to apic_common_send_ipi for safety during starting other CPUs.
+	 */
+	if (apic_mode == LOCAL_X2APIC) {
+		apic_switch_ipi_callback(B_TRUE);
+	}
+
+	apic_cmos_ssb_set = 1;
+	apic_cpu_send_SIPI(cpun, B_TRUE);
+
 	return (0);
 }
 
+/*
+ * Put CPU into halted state with interrupts disabled.
+ */
+/*ARGSUSED1*/
+static int
+apic_cpu_stop(processorid_t cpun, caddr_t arg)
+{
+	int		rc;
+	cpu_t 		*cp;
+	extern cpuset_t cpu_ready_set;
+	extern void cpu_idle_intercept_cpu(cpu_t *cp);
+
+	ASSERT(MUTEX_HELD(&cpu_lock));
+
+	if (!apic_cpu_in_range(cpun)) {
+		return (EINVAL);
+	}
+	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
+		return (ENOTSUP);
+	}
+
+	cp = cpu_get(cpun);
+	ASSERT(cp != NULL);
+	ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
+	ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
+	ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
+
+	/* Clear CPU_READY flag to disable cross calls. */
+	cp->cpu_flags &= ~CPU_READY;
+	CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
+	rc = xc_flush_cpu(cp);
+	if (rc != 0) {
+		CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
+		cp->cpu_flags |= CPU_READY;
+		return (rc);
+	}
+
+	/* Intercept target CPU at a safe point before powering it off. */
+	cpu_idle_intercept_cpu(cp);
+
+	apic_cpu_send_SIPI(cpun, B_FALSE);
+	cp->cpu_flags &= ~CPU_RUNNING;
+
+	return (0);
+}
+
+static int
+apic_cpu_ops(psm_cpu_request_t *reqp)
+{
+	if (reqp == NULL) {
+		return (EINVAL);
+	}
+
+	switch (reqp->pcr_cmd) {
+	case PSM_CPU_ADD:
+		return (apic_cpu_add(reqp));
+
+	case PSM_CPU_REMOVE:
+		return (apic_cpu_remove(reqp));
+
+	case PSM_CPU_STOP:
+		return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
+		    reqp->req.cpu_stop.ctx));
+
+	default:
+		return (ENOTSUP);
+	}
+}
 
 #ifdef	DEBUG
 int	apic_break_on_cpu = 9;
@@ -1377,7 +1487,6 @@ apic_post_cpu_start()
 {
 	int cpun;
 	static int cpus_started = 1;
-	struct psm_ops *pops = &apic_ops;
 
 	/* We know this CPU + BSP  started successfully. */
 	cpus_started++;
@@ -1395,23 +1504,11 @@ apic_post_cpu_start()
 	}
 
 	/*
-	 * We change psm_send_ipi and send_dirintf only if Solaris
-	 * is booted in kmdb & the current CPU is the last CPU being
-	 * brought up. We don't need to do anything if Solaris is running
-	 * in MMIO mode (xAPIC).
+	 * Switch back to x2apic IPI sending method for performance when target
+	 * CPU has entered x2apic mode.
 	 */
-	if ((boothowto & RB_DEBUG) &&
-	    (cpus_started == boot_ncpus || cpus_started == apic_nproc) &&
-	    apic_mode == LOCAL_X2APIC) {
-		/*
-		 * We no longer need help from apic_common_send_ipi()
-		 * since we will not start any more CPUs.
-		 *
-		 * We will need to revisit this if we start supporting
-		 * hot-plugging of CPUs.
-		 */
-		pops->psm_send_ipi = x2apic_send_ipi;
-		send_dirintf = pops->psm_send_ipi;
+	if (apic_mode == LOCAL_X2APIC) {
+		apic_switch_ipi_callback(B_FALSE);
 	}
 
 	splx(ipltospl(LOCK_LEVEL));
@@ -1452,13 +1549,230 @@ apic_get_next_processorid(processorid_t cpu_id)
 		return ((processorid_t)0);
 
 	for (i = cpu_id + 1; i < NCPU; i++) {
-		if (CPU_IN_SET(apic_cpumask, i))
+		if (apic_cpu_in_range(i))
 			return (i);
 	}
 
 	return ((processorid_t)-1);
 }
 
+static int
+apic_cpu_add(psm_cpu_request_t *reqp)
+{
+	int i, rv = 0;
+	ulong_t iflag;
+	boolean_t first = B_TRUE;
+	uchar_t localver;
+	uint32_t localid, procid;
+	processorid_t cpuid = (processorid_t)-1;
+	mach_cpu_add_arg_t *ap;
+
+	ASSERT(reqp != NULL);
+	reqp->req.cpu_add.cpuid = (processorid_t)-1;
+
+	/* Check whether CPU hotplug is supported. */
+	if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
+		return (ENOTSUP);
+	}
+
+	ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
+	switch (ap->type) {
+	case MACH_CPU_ARG_LOCAL_APIC:
+		localid = ap->arg.apic.apic_id;
+		procid = ap->arg.apic.proc_id;
+		if (localid >= 255 || procid > 255) {
+			cmn_err(CE_WARN,
+			    "!apic: apicid(%u) or procid(%u) is invalid.",
+			    localid, procid);
+			return (EINVAL);
+		}
+		break;
+
+	case MACH_CPU_ARG_LOCAL_X2APIC:
+		localid = ap->arg.apic.apic_id;
+		procid = ap->arg.apic.proc_id;
+		if (localid >= UINT32_MAX) {
+			cmn_err(CE_WARN,
+			    "!apic: x2apicid(%u) is invalid.", localid);
+			return (EINVAL);
+		} else if (localid >= 255 && apic_mode == LOCAL_APIC) {
+			cmn_err(CE_WARN, "!apic: system is in APIC mode, "
+			    "can't support x2APIC processor.");
+			return (ENOTSUP);
+		}
+		break;
+
+	default:
+		cmn_err(CE_WARN,
+		    "!apic: unknown argument type %d to apic_cpu_add().",
+		    ap->type);
+		return (EINVAL);
+	}
+
+	/* Use apic_ioapic_lock to sync with apic_find_next_cpu_intr. */
+	iflag = intr_clear();
+	lock_set(&apic_ioapic_lock);
+
+	/* Check whether local APIC id already exists. */
+	for (i = 0; i < apic_nproc; i++) {
+		if (!CPU_IN_SET(apic_cpumask, i))
+			continue;
+		if (apic_cpus[i].aci_local_id == localid) {
+			lock_clear(&apic_ioapic_lock);
+			intr_restore(iflag);
+			cmn_err(CE_WARN,
+			    "!apic: local apic id %u already exists.",
+			    localid);
+			return (EEXIST);
+		} else if (apic_cpus[i].aci_processor_id == procid) {
+			lock_clear(&apic_ioapic_lock);
+			intr_restore(iflag);
+			cmn_err(CE_WARN,
+			    "!apic: processor id %u already exists.",
+			    (int)procid);
+			return (EEXIST);
+		}
+
+		/*
+		 * There's no local APIC version number available in MADT table,
+		 * so assume that all CPUs are homogeneous and use local APIC
+		 * version number of the first existing CPU.
+		 */
+		if (first) {
+			first = B_FALSE;
+			localver = apic_cpus[i].aci_local_ver;
+		}
+	}
+	ASSERT(first == B_FALSE);
+
+	/*
+	 * Try to assign the same cpuid if APIC id exists in the dirty cache.
+	 */
+	for (i = 0; i < apic_max_nproc; i++) {
+		if (CPU_IN_SET(apic_cpumask, i)) {
+			ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
+			continue;
+		}
+		ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
+		if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
+		    apic_cpus[i].aci_local_id == localid &&
+		    apic_cpus[i].aci_processor_id == procid) {
+			cpuid = i;
+			break;
+		}
+	}
+
+	/* Avoid the dirty cache and allocate fresh slot if possible. */
+	if (cpuid == (processorid_t)-1) {
+		for (i = 0; i < apic_max_nproc; i++) {
+			if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
+			    (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
+				cpuid = i;
+				break;
+			}
+		}
+	}
+
+	/* Try to find any free slot as last resort. */
+	if (cpuid == (processorid_t)-1) {
+		for (i = 0; i < apic_max_nproc; i++) {
+			if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
+				cpuid = i;
+				break;
+			}
+		}
+	}
+
+	if (cpuid == (processorid_t)-1) {
+		lock_clear(&apic_ioapic_lock);
+		intr_restore(iflag);
+		cmn_err(CE_NOTE,
+		    "!apic: failed to allocate cpu id for processor %u.",
+		    procid);
+		rv = EAGAIN;
+	} else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
+		lock_clear(&apic_ioapic_lock);
+		intr_restore(iflag);
+		cmn_err(CE_NOTE,
+		    "!apic: failed to build mapping for processor %u.",
+		    procid);
+		rv = EBUSY;
+	} else {
+		ASSERT(cpuid >= 0 && cpuid < NCPU);
+		ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
+		bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
+		apic_cpus[cpuid].aci_processor_id = procid;
+		apic_cpus[cpuid].aci_local_id = localid;
+		apic_cpus[cpuid].aci_local_ver = localver;
+		CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
+		if (cpuid >= apic_nproc) {
+			apic_nproc = cpuid + 1;
+		}
+		lock_clear(&apic_ioapic_lock);
+		intr_restore(iflag);
+		reqp->req.cpu_add.cpuid = cpuid;
+	}
+
+	return (rv);
+}
+
+static int
+apic_cpu_remove(psm_cpu_request_t *reqp)
+{
+	int i;
+	ulong_t iflag;
+	processorid_t cpuid;
+
+	/* Check whether CPU hotplug is supported. */
+	if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
+		return (ENOTSUP);
+	}
+
+	cpuid = reqp->req.cpu_remove.cpuid;
+
+	/* Use apic_ioapic_lock to sync with apic_find_next_cpu_intr. */
+	iflag = intr_clear();
+	lock_set(&apic_ioapic_lock);
+
+	if (!apic_cpu_in_range(cpuid)) {
+		lock_clear(&apic_ioapic_lock);
+		intr_restore(iflag);
+		cmn_err(CE_WARN,
+		    "!apic: cpuid %d doesn't exist in apic_cpus array.",
+		    cpuid);
+		return (ENODEV);
+	}
+	ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
+
+	if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
+		lock_clear(&apic_ioapic_lock);
+		intr_restore(iflag);
+		return (ENOENT);
+	}
+
+	if (cpuid == apic_nproc - 1) {
+		/*
+		 * We are removing the highest numbered cpuid so we need to
+		 * find the next highest cpuid as the new value for apic_nproc.
+		 */
+		for (i = apic_nproc; i > 0; i--) {
+			if (CPU_IN_SET(apic_cpumask, i - 1)) {
+				apic_nproc = i;
+				break;
+			}
+		}
+		/* at least one CPU left */
+		ASSERT(i > 0);
+	}
+	CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
+	/* mark slot as free and keep it in the dirty cache */
+	apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
+
+	lock_clear(&apic_ioapic_lock);
+	intr_restore(iflag);
+
+	return (0);
+}
 
 /*
  * type == -1 indicates it is an internal request. Do not change
@@ -1898,8 +2212,7 @@ apic_disable_intr(processorid_t cpun)
 		if ((irq_ptr = apic_irq_table[i]) != NULL) {
 			ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) ||
 			    (irq_ptr->airq_temp_cpu == IRQ_UNINIT) ||
-			    ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) <
-			    apic_nproc));
+			    (apic_cpu_in_range(irq_ptr->airq_temp_cpu)));
 
 			if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) {
 				hardbound = 1;
@@ -1908,12 +2221,7 @@ apic_disable_intr(processorid_t cpun)
 
 			if (irq_ptr->airq_temp_cpu == cpun) {
 				do {
-					bind_cpu = apic_next_bind_cpu++;
-					if (bind_cpu >= apic_nproc) {
-						apic_next_bind_cpu = 1;
-						bind_cpu = 0;
-
-					}
+					bind_cpu = apic_find_next_cpu_intr();
 				} while (apic_rebind_all(irq_ptr, bind_cpu));
 			}
 		}
@@ -2141,6 +2449,8 @@ apic_redistribute_compute(void)
 		}
 		max_busy = 0;
 		for (i = 0; i < apic_nproc; i++) {
+			if (!apic_cpu_in_range(i))
+				continue;
 
 			/*
 			 * Check if curipl is non zero & if ISR is in
@@ -2481,24 +2791,6 @@ ioapic_write_eoi(int ioapic_ix, uint32_t value)
 	ioapic[APIC_IO_EOI] = value;
 }
 
-static processorid_t
-apic_find_cpu(int flag)
-{
-	processorid_t acid = 0;
-	int i;
-
-	/* Find the first CPU with the passed-in flag set */
-	for (i = 0; i < apic_nproc; i++) {
-		if (apic_cpus[i].aci_status & flag) {
-			acid = i;
-			break;
-		}
-	}
-
-	ASSERT((apic_cpus[acid].aci_status & flag) != 0);
-	return (acid);
-}
-
 /*
  * Call rebind to do the actual programming.
  * Must be called with interrupts disabled and apic_ioapic_lock held
@@ -2508,8 +2800,8 @@ apic_find_cpu(int flag)
  * p is of the type 'apic_irq_t *'.
  *
  * apic_ioapic_lock must be held across this call, as it protects apic_rebind
- * and it protects apic_find_cpu() from a race in which a CPU can be taken
- * offline after a cpu is selected, but before apic_rebind is called to
+ * and it protects apic_find_next_cpu_intr() from a race in which a CPU can be
+ * taken offline after a cpu is selected, but before apic_rebind is called to
  * bind interrupts to it.
  */
 int
@@ -2534,8 +2826,7 @@ apic_setup_io_intr(void *p, int irq, boolean_t deferred)
 		 * CPU is not up or interrupts are disabled. Fall back to
 		 * the first available CPU
 		 */
-		rv = apic_rebind(irqptr, apic_find_cpu(APIC_CPU_INTR_ENABLE),
-		    drep);
+		rv = apic_rebind(irqptr, apic_find_next_cpu_intr(), drep);
 	}
 
 	return (rv);
@@ -2555,6 +2846,45 @@ apic_get_apic_type()
 	return (apic_psm_info.p_mach_idstring);
 }
 
+/*
+ * Switch between safe and x2APIC IPI sending method.
+ * CPU may power on in xapic mode or x2apic mode. If CPU needs to send IPI to
+ * other CPUs before entering x2APIC mode, it still needs to xAPIC method.
+ * Before sending StartIPI to target CPU, psm_send_ipi will be changed to
+ * apic_common_send_ipi, which detects current local APIC mode and use right
+ * method to send IPI. If some CPUs fail to start up, apic_poweron_cnt
+ * won't return to zero, so apic_common_send_ipi will always be used.
+ * psm_send_ipi can't be simply changed back to x2apic_send_ipi if some CPUs
+ * failed to start up because those failed CPUs may recover itself later at
+ * unpredictable time.
+ */
+static void
+apic_switch_ipi_callback(boolean_t enter)
+{
+	ulong_t iflag;
+	struct psm_ops *pops = &apic_ops;
+
+	iflag = intr_clear();
+	lock_set(&apic_mode_switch_lock);
+	if (enter) {
+		ASSERT(apic_poweron_cnt >= 0);
+		if (apic_poweron_cnt == 0) {
+			pops->psm_send_ipi = apic_common_send_ipi;
+			send_dirintf = pops->psm_send_ipi;
+		}
+		apic_poweron_cnt++;
+	} else {
+		ASSERT(apic_poweron_cnt > 0);
+		apic_poweron_cnt--;
+		if (apic_poweron_cnt == 0) {
+			pops->psm_send_ipi = x2apic_send_ipi;
+			send_dirintf = pops->psm_send_ipi;
+		}
+	}
+	lock_clear(&apic_mode_switch_lock);
+	intr_restore(iflag);
+}
+
 void
 x2apic_update_psm()
 {
@@ -2562,28 +2892,9 @@ x2apic_update_psm()
 
 	ASSERT(pops != NULL);
 
-	/*
-	 * We don't need to do any magic if one of the following
-	 * conditions is true :
-	 * - Not being run under kernel debugger.
-	 * - MP is not set.
-	 * - Booted with one CPU only.
-	 * - One CPU configured.
-	 *
-	 * We set apic_common_send_ipi() since kernel debuggers
-	 * attempt to send IPIs to other slave CPUs during
-	 * entry (exit) from (to) debugger.
-	 */
-	if (!(boothowto & RB_DEBUG) || use_mp == 0 ||
-	    apic_nproc == 1 || boot_ncpus == 1) {
-		pops->psm_send_ipi =  x2apic_send_ipi;
-	} else {
-		pops->psm_send_ipi =  apic_common_send_ipi;
-	}
-
 	pops->psm_intr_exit = x2apic_intr_exit;
 	pops->psm_setspl = x2apic_setspl;
-
+	pops->psm_send_ipi =  x2apic_send_ipi;
 	send_dirintf = pops->psm_send_ipi;
 
 	apic_mode = LOCAL_X2APIC;
diff --git a/usr/src/uts/i86pc/io/psm/uppc.c b/usr/src/uts/i86pc/io/psm/uppc.c
index 0b6faee267..ad4d378f76 100644
--- a/usr/src/uts/i86pc/io/psm/uppc.c
+++ b/usr/src/uts/i86pc/io/psm/uppc.c
@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  */
 
-#define	PSMI_1_6
+#define	PSMI_1_7
 
 #include <sys/mutex.h>
 #include <sys/types.h>
@@ -173,12 +173,13 @@ static struct	psm_ops uppc_ops = {
 	(int (*)(dev_info_t *, ddi_intr_handle_impl_t *,
 	    psm_intr_op_t, int *))NULL,		/* psm_intr_ops		*/
 
-	uppc_state				/* psm_state		*/
+	uppc_state,				/* psm_state		*/
+	(int (*)(psm_cpu_request_t *))NULL	/* psm_cpu_ops		*/
 };
 
 
 static struct	psm_info uppc_info = {
-	PSM_INFO_VER01_6,	/* version				*/
+	PSM_INFO_VER01_7,	/* version				*/
 	PSM_OWN_SYS_DEFAULT,	/* ownership				*/
 	(struct psm_ops *)&uppc_ops, /* operation			*/
 	"uppc",			/* machine name				*/
diff --git a/usr/src/uts/i86pc/ml/mpcore.s b/usr/src/uts/i86pc/ml/mpcore.s
index 721761f01d..25a943870e 100644
--- a/usr/src/uts/i86pc/ml/mpcore.s
+++ b/usr/src/uts/i86pc/ml/mpcore.s
@@ -22,9 +22,11 @@
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 	
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/asm_linkage.h>
 #include <sys/asm_misc.h>
 #include <sys/regset.h>
@@ -44,6 +46,7 @@
  *		- The GDT, IDT, ktss and page directory has been built for us
  *
  *	Our actions:
+ *	Start CPU:
  *		- We start using our GDT by loading correct values in the
  *		  selector registers (cs=KCS_SEL, ds=es=ss=KDS_SEL, fs=KFS_SEL,
  *		  gs=KGS_SEL).
@@ -51,20 +54,30 @@
  *		- We load the default LDT into the hardware LDT register.
  *		- We load the default TSS into the hardware task register.
  *		- call mp_startup(void) indirectly through the T_PC
+ *	Stop CPU:
+ *		- Put CPU into halted state with interrupts disabled
  *
  */
 
 #if defined(__lint)
 
 void
-real_mode_start(void)
+real_mode_start_cpu(void)
+{}
+
+void
+real_mode_stop_cpu_stage1(void)
+{}
+
+void
+real_mode_stop_cpu_stage2(void)
 {}
 
 #else	/* __lint */
 
 #if defined(__amd64)
 
-	ENTRY_NP(real_mode_start)
+	ENTRY_NP(real_mode_start_cpu)
 
 #if !defined(__GNUC_AS__)
 
@@ -229,8 +242,8 @@ long_mode_64:
 	pushq	$KCS_SEL
 	pushq	$kernel_cs_code
 	lretq
-	.globl real_mode_end
-real_mode_end:
+	.globl real_mode_start_cpu_end
+real_mode_start_cpu_end:
 	nop
 
 kernel_cs_code:
@@ -484,8 +497,8 @@ long_mode_64:
 	pushq	$KCS_SEL
 	pushq	$kernel_cs_code
 	lretq
-	.globl real_mode_end
-real_mode_end:
+	.globl real_mode_start_cpu_end
+real_mode_start_cpu_end:
 	nop
 
 kernel_cs_code:
@@ -575,11 +588,11 @@ kernel_cs_code:
 	int	$20			/* whoops, returned somehow! */
 #endif	/* !__GNUC_AS__ */
 
-	SET_SIZE(real_mode_start)
+	SET_SIZE(real_mode_start_cpu)
 
 #elif defined(__i386)
 
-	ENTRY_NP(real_mode_start)
+	ENTRY_NP(real_mode_start_cpu)
 
 #if !defined(__GNUC_AS__)
 
@@ -621,8 +634,8 @@ pestart:
 	D16 pushl	$KCS_SEL
 	D16 pushl	$kernel_cs_code
 	D16 lret
-	.globl real_mode_end
-real_mode_end:
+	.globl real_mode_start_cpu_end
+real_mode_start_cpu_end:
 	nop
 
 	.globl	kernel_cs_code
@@ -714,8 +727,8 @@ pestart:
 	D16 pushl	$KCS_SEL
 	D16 pushl	$kernel_cs_code
 	D16 lret
-	.globl real_mode_end
-real_mode_end:
+	.globl real_mode_start_cpu_end
+real_mode_start_cpu_end:
 	nop
 	.globl	kernel_cs_code
 kernel_cs_code:
@@ -767,7 +780,120 @@ kernel_cs_code:
 	int	$20			/* whoops, returned somehow! */
 #endif
 
-	SET_SIZE(real_mode_start)
+	SET_SIZE(real_mode_start_cpu)
 
 #endif	/* __amd64 */
+
+#if defined(__amd64)
+
+	ENTRY_NP(real_mode_stop_cpu_stage1)
+
+#if !defined(__GNUC_AS__)
+
+	/*
+	 * For vulcan as we need to do a .code32 and mentally invert the
+	 * meaning of the addr16 and data16 prefixes to get 32-bit access when
+	 * generating code to be executed in 16-bit mode (sigh...)
+	 */
+	.code32
+	cli
+	movw		%cs, %ax
+	movw		%ax, %ds	/* load cs into ds */
+	movw		%ax, %ss	/* and into ss */
+
+	/*
+	 * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
+	 */
+	movw		$CPUHALTCODEOFF, %ax
+	.byte		0xff, 0xe0	/* jmp *%ax */
+
+#else	/* __GNUC_AS__ */
+
+	/*
+	 * NOTE:  The GNU assembler automatically does the right thing to
+	 *	  generate data size operand prefixes based on the code size
+	 *	  generation mode (e.g. .code16, .code32, .code64) and as such
+	 *	  prefixes need not be used on instructions EXCEPT in the case
+	 *	  of address prefixes for code for which the reference is not
+	 *	  automatically of the default operand size.
+	 */      
+	.code16
+	cli
+	movw		%cs, %ax
+	movw		%ax, %ds	/* load cs into ds */
+	movw		%ax, %ss	/* and into ss */
+
+	/*
+	 * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
+	 */
+	movw		$CPUHALTCODEOFF, %ax
+	jmp		*%ax
+
+#endif	/* !__GNUC_AS__ */
+
+	.globl real_mode_stop_cpu_stage1_end
+real_mode_stop_cpu_stage1_end:
+	nop
+
+	SET_SIZE(real_mode_stop_cpu_stage1)
+
+#elif defined(__i386)
+
+	ENTRY_NP(real_mode_stop_cpu_stage1)
+
+#if !defined(__GNUC_AS__)
+
+	cli
+	D16 movw	%cs, %eax
+	movw		%eax, %ds	/* load cs into ds */
+	movw		%eax, %ss	/* and into ss */
+
+	/*
+	 * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
+	 */
+	movw		$CPUHALTCODEOFF, %ax
+	.byte		0xff, 0xe0	/* jmp *%ax */
+
+#else	/* __GNUC_AS__ */
+
+	cli
+	mov		%cs, %ax
+	mov		%eax, %ds	/* load cs into ds */
+	mov		%eax, %ss	/* and into ss */
+
+	/*
+	 * Jump to the stage 2 code in the rm_platter_va->rm_cpu_halt_code
+	 */
+	movw		$CPUHALTCODEOFF, %ax
+	jmp		*%ax
+
+#endif	/* !__GNUC_AS__ */
+
+	.globl real_mode_stop_cpu_stage1_end
+real_mode_stop_cpu_stage1_end:
+	nop
+
+	SET_SIZE(real_mode_stop_cpu_stage1)
+
+#endif	/* __amd64 */
+
+	ENTRY_NP(real_mode_stop_cpu_stage2)
+
+	movw		$0xdead, %ax
+	movw		%ax, CPUHALTEDOFF
+
+real_mode_stop_cpu_loop:
+	/*
+	 * Put CPU into halted state.
+	 * Only INIT, SMI, NMI could break the loop.
+	 */
+	hlt
+	jmp		real_mode_stop_cpu_loop
+
+	.globl real_mode_stop_cpu_stage2_end
+real_mode_stop_cpu_stage2_end:
+	nop
+
+	SET_SIZE(real_mode_stop_cpu_stage2)
+
 #endif	/* __lint */
diff --git a/usr/src/uts/i86pc/ml/offsets.in b/usr/src/uts/i86pc/ml/offsets.in
index f857617beb..029193b35e 100644
--- a/usr/src/uts/i86pc/ml/offsets.in
+++ b/usr/src/uts/i86pc/ml/offsets.in
@@ -295,6 +295,8 @@ rm_platter
 	rm_cpu			CPUNOFF
 	rm_cr4			CR4OFF
 	rm_x86feature		X86FEATURE
+	rm_cpu_halt_code	CPUHALTCODEOFF
+	rm_cpu_halted		CPUHALTEDOFF
 
 ddi_acc_impl
 	ahi_acc_attr	ACC_ATTR
diff --git a/usr/src/uts/i86pc/os/acpi_fw.h b/usr/src/uts/i86pc/os/acpi_fw.h
index 032e231f3f..54a34ef591 100644
--- a/usr/src/uts/i86pc/os/acpi_fw.h
+++ b/usr/src/uts/i86pc/os/acpi_fw.h
@@ -22,6 +22,10 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 #ifndef _ACPI_FW_H
 #define	_ACPI_FW_H
@@ -161,6 +165,28 @@ struct slit {
  */
 extern struct slit	*slit_ptr;
 
+struct msct_proximity_domain {
+	uint8_t revision;
+	uint8_t length;
+	uint32_t domain_min;
+	uint32_t domain_max;
+	uint32_t processor_max;
+	uint64_t memory_max;
+};
+
+struct msct {
+	struct table_header hdr;
+	uint32_t proximity_domain_offset;
+	uint32_t maximum_proximity_domains;
+	uint32_t maximum_power_domains;
+	uint64_t maximum_physical_address;
+};
+
+/*
+ * Pointer to Maximum System Capability Table (MSCT)
+ */
+extern struct msct	*msct_ptr;
+
 struct cfg_base_addr_alloc {
 	uint64_t	base_addr;
 	uint16_t	segment;
diff --git a/usr/src/uts/i86pc/os/cmi.c b/usr/src/uts/i86pc/os/cmi.c
index f9d722f031..4d27eb61d3 100644
--- a/usr/src/uts/i86pc/os/cmi.c
+++ b/usr/src/uts/i86pc/os/cmi.c
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -100,6 +100,8 @@ typedef struct cmi {
 } cmi_t;
 
 static cmi_t *cmi_list;
+static const cmi_mc_ops_t *cmi_mc_global_ops;
+static void *cmi_mc_global_data;
 static kmutex_t cmi_load_lock;
 
 /*
@@ -107,6 +109,7 @@ static kmutex_t cmi_load_lock;
  * interface.
  */
 extern cmi_hdl_t cmi_hdl_create(enum cmi_hdl_class, uint_t, uint_t, uint_t);
+extern void cmi_hdl_destroy(cmi_hdl_t ophdl);
 extern void cmi_hdl_setcmi(cmi_hdl_t, void *, void *);
 extern void *cmi_hdl_getcmi(cmi_hdl_t);
 extern void cmi_hdl_setmc(cmi_hdl_t, const struct cmi_mc_ops *, void *);
@@ -484,9 +487,8 @@ cmi_init(enum cmi_hdl_class class, uint_t chipid, uint_t coreid,
 }
 
 /*
- * cmi_fini is not called at the moment.  It is intended to be called
- * on DR deconfigure of a cpu resource.  It should not be called at
- * simple offline of a cpu.
+ * cmi_fini is called on DR deconfigure of a cpu resource.
+ * It should not be called at simple offline of a cpu.
  */
 void
 cmi_fini(cmi_hdl_t hdl)
@@ -499,7 +501,7 @@ cmi_fini(cmi_hdl_t hdl)
 	if (CMI_OP_PRESENT(cmi, cmi_fini))
 		CMI_OPS(cmi)->cmi_fini(hdl);
 
-	cmi_hdl_rele(hdl);	/* release hold obtained in cmi_hdl_create */
+	cmi_hdl_destroy(hdl);
 }
 
 /*
@@ -806,6 +808,21 @@ cmi_mc_register(cmi_hdl_t hdl, const cmi_mc_ops_t *mcops, void *mcdata)
 		cmi_hdl_setmc(hdl, mcops, mcdata);
 }
 
+cmi_errno_t
+cmi_mc_register_global(const cmi_mc_ops_t *mcops, void *mcdata)
+{
+	if (!cmi_no_mca_init) {
+		if (cmi_mc_global_ops != NULL || cmi_mc_global_data != NULL ||
+		    mcops == NULL || mcops->cmi_mc_patounum == NULL ||
+		    mcops->cmi_mc_unumtopa == NULL) {
+			return (CMIERR_UNKNOWN);
+		}
+		cmi_mc_global_data = mcdata;
+		cmi_mc_global_ops = mcops;
+	}
+	return (CMI_SUCCESS);
+}
+
 void
 cmi_mc_sw_memscrub_disable(void)
 {
@@ -820,8 +837,17 @@ cmi_mc_patounum(uint64_t pa, uint8_t valid_hi, uint8_t valid_lo, uint32_t synd,
 	cmi_hdl_t hdl;
 	cmi_errno_t rv;
 
-	if (cmi_no_mca_init ||
-	    (hdl = cmi_hdl_any()) == NULL)	/* short-term hold */
+	if (cmi_no_mca_init)
+		return (CMIERR_MC_ABSENT);
+
+	if (cmi_mc_global_ops != NULL) {
+		if (cmi_mc_global_ops->cmi_mc_patounum == NULL)
+			return (CMIERR_MC_NOTSUP);
+		return (cmi_mc_global_ops->cmi_mc_patounum(cmi_mc_global_data,
+		    pa, valid_hi, valid_lo, synd, syndtype, up));
+	}
+
+	if ((hdl = cmi_hdl_any()) == NULL)	/* short-term hold */
 		return (CMIERR_MC_ABSENT);
 
 	if ((mcops = cmi_hdl_getmcops(hdl)) == NULL ||
@@ -849,8 +875,17 @@ cmi_mc_unumtopa(mc_unum_t *up, nvlist_t *nvl, uint64_t *pap)
 	if (up != NULL && nvl != NULL)
 		return (CMIERR_API);	/* convert from just one form */
 
-	if (cmi_no_mca_init ||
-	    (hdl = cmi_hdl_any()) == NULL)	/* short-term hold */
+	if (cmi_no_mca_init)
+		return (CMIERR_MC_ABSENT);
+
+	if (cmi_mc_global_ops != NULL) {
+		if (cmi_mc_global_ops->cmi_mc_unumtopa == NULL)
+			return (CMIERR_MC_NOTSUP);
+		return (cmi_mc_global_ops->cmi_mc_unumtopa(cmi_mc_global_data,
+		    up, nvl, pap));
+	}
+
+	if ((hdl = cmi_hdl_any()) == NULL)	/* short-term hold */
 		return (CMIERR_MC_ABSENT);
 
 	if ((mcops = cmi_hdl_getmcops(hdl)) == NULL ||
@@ -865,7 +900,7 @@ cmi_mc_unumtopa(mc_unum_t *up, nvlist_t *nvl, uint64_t *pap)
 		    pap) == 0)) {
 			return (CMIERR_MC_PARTIALUNUMTOPA);
 		} else {
-			return (mcops && mcops->cmi_mc_unumtopa ?
+			return (mcops && mcops->cmi_mc_unumtopa == NULL ?
 			    CMIERR_MC_NOTSUP : CMIERR_MC_ABSENT);
 		}
 	}
@@ -882,10 +917,15 @@ cmi_mc_logout(cmi_hdl_t hdl, boolean_t ismc, boolean_t sync)
 {
 	const struct cmi_mc_ops *mcops;
 
-	if (cmi_no_mca_init || (mcops = cmi_hdl_getmcops(hdl)) == NULL)
+	if (cmi_no_mca_init)
 		return;
 
-	if (mcops->cmi_mc_logout != NULL)
+	if (cmi_mc_global_ops != NULL)
+		mcops = cmi_mc_global_ops;
+	else
+		mcops = cmi_hdl_getmcops(hdl);
+
+	if (mcops != NULL && mcops->cmi_mc_logout != NULL)
 		mcops->cmi_mc_logout(hdl, ismc, sync);
 }
 
diff --git a/usr/src/uts/i86pc/os/cmi_hw.c b/usr/src/uts/i86pc/os/cmi_hw.c
index 1c2dbeaad6..8e3ae0f0af 100644
--- a/usr/src/uts/i86pc/os/cmi_hw.c
+++ b/usr/src/uts/i86pc/os/cmi_hw.c
@@ -23,6 +23,10 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 /*
  * CPU Module Interface - hardware abstraction.
@@ -94,6 +98,7 @@ typedef struct cmi_hdl_impl {
 #define	HDLOPS(hdl)	((hdl)->cmih_ops)
 
 #define	CMIH_F_INJACTV		0x1ULL
+#define	CMIH_F_DEAD		0x2ULL
 
 /*
  * Ops structure for handle operations.
@@ -847,9 +852,18 @@ ntv_int(cmi_hdl_impl_t *hdl, int int_no)
 static int
 ntv_online(cmi_hdl_impl_t *hdl, int new_status, int *old_status)
 {
+	int rc;
 	processorid_t cpuid = HDLPRIV(hdl)->cpu_id;
 
-	return (p_online_internal(cpuid, new_status, old_status));
+	while (mutex_tryenter(&cpu_lock) == 0) {
+		if (hdl->cmih_flags & CMIH_F_DEAD)
+			return (EBUSY);
+		delay(1);
+	}
+	rc = p_online_internal_locked(cpuid, new_status, old_status);
+	mutex_exit(&cpu_lock);
+
+	return (rc);
 }
 
 #else	/* __xpv */
@@ -1387,15 +1401,32 @@ void
 cmi_hdl_rele(cmi_hdl_t ophdl)
 {
 	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
-	cmi_hdl_ent_t *ent;
 
 	ASSERT(*hdl->cmih_refcntp > 0);
+	(void) atomic_dec_32_nv(hdl->cmih_refcntp);
+}
 
-	if (atomic_dec_32_nv(hdl->cmih_refcntp) > 0)
-		return;
+void
+cmi_hdl_destroy(cmi_hdl_t ophdl)
+{
+	cmi_hdl_impl_t *hdl = IMPLHDL(ophdl);
+	cmi_hdl_ent_t *ent;
+
+	/* Release the reference count held by cmi_hdl_create(). */
+	ASSERT(*hdl->cmih_refcntp > 0);
+	(void) atomic_dec_32_nv(hdl->cmih_refcntp);
+	hdl->cmih_flags |= CMIH_F_DEAD;
 
 	ent = cmi_hdl_ent_lookup(hdl->cmih_chipid, hdl->cmih_coreid,
 	    hdl->cmih_strandid);
+	/*
+	 * Use busy polling instead of condition variable here because
+	 * cmi_hdl_rele() may be called from #MC handler.
+	 */
+	while (cmi_hdl_canref(ent)) {
+		cmi_hdl_rele(ophdl);
+		delay(1);
+	}
 	ent->cmae_hdlp = NULL;
 
 	kmem_free(hdl, sizeof (*hdl));
@@ -1628,6 +1659,33 @@ cmi_ntv_hwstrandid(cpu_t *cp)
 
 	return (cpuid_get_clogid(cp) % strands_per_core);
 }
+
+static void
+cmi_ntv_hwdisable_mce_xc(void)
+{
+	ulong_t cr4;
+
+	cr4 = getcr4();
+	cr4 = cr4 & (~CR4_MCE);
+	setcr4(cr4);
+}
+
+void
+cmi_ntv_hwdisable_mce(cmi_hdl_t hdl)
+{
+	cpuset_t	set;
+	cmi_hdl_impl_t *thdl = IMPLHDL(hdl);
+	cpu_t *cp = HDLPRIV(thdl);
+
+	if (CPU->cpu_id == cp->cpu_id) {
+		cmi_ntv_hwdisable_mce_xc();
+	} else {
+		CPUSET_ONLY(set, cp->cpu_id);
+		xc_call(NULL, NULL, NULL, CPUSET2BV(set),
+		    (xc_func_t)cmi_ntv_hwdisable_mce_xc);
+	}
+}
+
 #endif	/* __xpv */
 
 void
diff --git a/usr/src/uts/i86pc/os/cms.c b/usr/src/uts/i86pc/os/cms.c
index 18d3b046f6..1fe03d3f30 100644
--- a/usr/src/uts/i86pc/os/cms.c
+++ b/usr/src/uts/i86pc/os/cms.c
@@ -23,6 +23,10 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 #include <sys/types.h>
 #include <sys/cpu_module_ms_impl.h>
@@ -421,9 +425,19 @@ void
 cms_fini(cmi_hdl_t hdl)
 {
 	cms_t *cms = HDL2CMS(hdl);
+	struct cms_ctl *cdp;
 
 	if (CMS_OP_PRESENT(cms, cms_fini))
 		CMS_OPS(cms)->cms_fini(hdl);
+
+	mutex_enter(&cms_load_lock);
+	cdp = (struct cms_ctl *)cmi_hdl_getspecific(hdl);
+	if (cdp != NULL) {
+		if (cdp->cs_cms != NULL)
+			cms_rele(cdp->cs_cms);
+		kmem_free(cdp, sizeof (*cdp));
+	}
+	mutex_exit(&cms_load_lock);
 }
 
 boolean_t
diff --git a/usr/src/uts/i86pc/os/cpr_impl.c b/usr/src/uts/i86pc/os/cpr_impl.c
index 2599d3530a..103955a097 100644
--- a/usr/src/uts/i86pc/os/cpr_impl.c
+++ b/usr/src/uts/i86pc/os/cpr_impl.c
@@ -337,7 +337,7 @@ i_cpr_pre_resume_cpus()
 	CPUSET_ONLY(cpu_ready_set, boot_cpuid);
 	CPUSET_ONLY(procset, boot_cpuid);
 
-	for (who = 0; who < ncpus; who++) {
+	for (who = 0; who < max_ncpus; who++) {
 
 		wc_cpu_t	*cpup = wc_other_cpus + who;
 		wc_desctbr_t	gdt;
@@ -363,7 +363,10 @@ i_cpr_pre_resume_cpus()
 
 		init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
 
-		if ((err = mach_cpuid_start(who, rm_platter_va)) != 0) {
+		mutex_enter(&cpu_lock);
+		err = mach_cpuid_start(who, rm_platter_va);
+		mutex_exit(&cpu_lock);
+		if (err != 0) {
 			cmn_err(CE_WARN, "cpu%d: failed to start during "
 			    "suspend/resume error %d", who, err);
 			continue;
@@ -481,10 +484,13 @@ prt_other_cpus()
 		return;
 	}
 
-	for (who = 0; who < ncpus; who++) {
+	for (who = 0; who < max_ncpus; who++) {
 
 		wc_cpu_t	*cpup = wc_other_cpus + who;
 
+		if (!CPU_IN_SET(mp_cpus, who))
+			continue;
+
 		PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
 		    "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
 		    AFMT ", sp=%lx\n", who,
@@ -1009,7 +1015,7 @@ i_cpr_alloc_cpus(void)
 	 */
 
 	if (wc_other_cpus == NULL) {
-		wc_other_cpus = kmem_zalloc(ncpus * sizeof (wc_cpu_t),
+		wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t),
 		    KM_SLEEP);
 	}
 
@@ -1022,7 +1028,7 @@ i_cpr_free_cpus(void)
 	wc_cpu_t *wc_cpu;
 
 	if (wc_other_cpus != NULL) {
-		for (index = 0; index < ncpus; index++) {
+		for (index = 0; index < max_ncpus; index++) {
 			wc_cpu = wc_other_cpus + index;
 			if (wc_cpu->wc_saved_stack != NULL) {
 				kmem_free(wc_cpu->wc_saved_stack,
@@ -1030,7 +1036,8 @@ i_cpr_free_cpus(void)
 			}
 		}
 
-		kmem_free((void *) wc_other_cpus, ncpus * sizeof (wc_cpu_t));
+		kmem_free((void *) wc_other_cpus,
+		    max_ncpus * sizeof (wc_cpu_t));
 		wc_other_cpus = NULL;
 	}
 }
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 8a14fccaa8..d47d680939 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -430,6 +430,9 @@ uint32_t cpuid_feature_ecx_exclude;
 uint32_t cpuid_feature_edx_include;
 uint32_t cpuid_feature_edx_exclude;
 
+/*
+ * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
+ */
 void
 cpuid_alloc_space(cpu_t *cpu)
 {
@@ -439,6 +442,7 @@ cpuid_alloc_space(cpu_t *cpu)
 	 * their cpuid_info struct allocated here.
 	 */
 	ASSERT(cpu->cpu_id != 0);
+	ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
 	cpu->cpu_m.mcpu_cpi =
 	    kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
 }
@@ -449,7 +453,8 @@ cpuid_free_space(cpu_t *cpu)
 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
 	int i;
 
-	ASSERT(cpu->cpu_id != 0);
+	ASSERT(cpi != NULL);
+	ASSERT(cpi != &cpuid_info0);
 
 	/*
 	 * Free up any function 4 related dynamic storage
@@ -460,7 +465,8 @@ cpuid_free_space(cpu_t *cpu)
 		kmem_free(cpi->cpi_std_4,
 		    cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
 
-	kmem_free(cpu->cpu_m.mcpu_cpi, sizeof (*cpu->cpu_m.mcpu_cpi));
+	kmem_free(cpi, sizeof (*cpi));
+	cpu->cpu_m.mcpu_cpi = NULL;
 }
 
 #if !defined(__xpv)
@@ -721,9 +727,9 @@ cpuid_pass1(cpu_t *cpu)
 	determine_platform();
 #endif
 	/*
-	 * Space statically allocated for cpu0, ensure pointer is set
+	 * Space statically allocated for BSP, ensure pointer is set
 	 */
-	if (cpu->cpu_id == 0)
+	if (cpu->cpu_id == 0 && cpu->cpu_m.mcpu_cpi == NULL)
 		cpu->cpu_m.mcpu_cpi = &cpuid_info0;
 	cpi = cpu->cpu_m.mcpu_cpi;
 	ASSERT(cpi != NULL);
@@ -3977,9 +3983,9 @@ cpuid_mwait_alloc(cpu_t *cpu)
 	uint32_t	*ret;
 	size_t		mwait_size;
 
-	ASSERT(cpuid_checkpass(cpu, 2));
+	ASSERT(cpuid_checkpass(CPU, 2));
 
-	mwait_size = cpu->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
+	mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
 	if (mwait_size == 0)
 		return (NULL);
 
@@ -4016,7 +4022,9 @@ cpuid_mwait_alloc(cpu_t *cpu)
 void
 cpuid_mwait_free(cpu_t *cpu)
 {
-	ASSERT(cpuid_checkpass(cpu, 2));
+	if (cpu->cpu_m.mcpu_cpi == NULL) {
+		return;
+	}
 
 	if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
 	    cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
diff --git a/usr/src/uts/i86pc/os/cpupm/cpu_acpi.c b/usr/src/uts/i86pc/os/cpupm/cpu_acpi.c
index a092046071..896b110e0e 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpu_acpi.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpu_acpi.c
@@ -733,7 +733,8 @@ cpu_acpi_cache_cst(cpu_acpi_handle_t handle)
 
 	CPU_ACPI_CSTATES_COUNT(handle) = (uint32_t)cnt;
 	alloc_size = CPU_ACPI_CSTATES_SIZE(cnt);
-	CPU_ACPI_CSTATES(handle) = kmem_zalloc(alloc_size, KM_SLEEP);
+	if (CPU_ACPI_CSTATES(handle) == NULL)
+		CPU_ACPI_CSTATES(handle) = kmem_zalloc(alloc_size, KM_SLEEP);
 	cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
 	p = cstate;
 
diff --git a/usr/src/uts/i86pc/os/cpupm/cpu_idle.c b/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
index 3dd75f00a5..483e8865c7 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpu_idle.c
@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -47,6 +47,8 @@
 #define	CSTATE_USING_HPET		1
 #define	CSTATE_USING_LAT		2
 
+#define	CPU_IDLE_STOP_TIMEOUT		1000
+
 extern void cpu_idle_adaptive(void);
 extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data,
     cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start);
@@ -677,7 +679,7 @@ cpu_idle_init(cpu_t *cp)
 		/*
 		 * Allocate, initialize and install cstate kstat
 		 */
-		cstate->cs_ksp = kstat_create("cstate", CPU->cpu_id,
+		cstate->cs_ksp = kstat_create("cstate", cp->cpu_id,
 		    name, "misc",
 		    KSTAT_TYPE_NAMED,
 		    sizeof (cpu_idle_kstat) / sizeof (kstat_named_t),
@@ -692,8 +694,8 @@ cpu_idle_init(cpu_t *cp)
 			cstate->cs_ksp->ks_data_size += MAXNAMELEN;
 			cstate->cs_ksp->ks_private = cstate;
 			kstat_install(cstate->cs_ksp);
-			cstate++;
 		}
+		cstate++;
 	}
 
 	cpupm_alloc_domains(cp, CPUPM_C_STATES);
@@ -771,6 +773,19 @@ cpu_idle_fini(cpu_t *cp)
 	mutex_exit(&cpu_idle_callb_mutex);
 }
 
+/*
+ * This function is introduced here to solve a race condition
+ * between the master and the slave to touch c-state data structure.
+ * After the slave calls this idle function to switch to the non
+ * deep idle function, the master can go on to reclaim the resource.
+ */
+static void
+cpu_idle_stop_sync(void)
+{
+	/* switch to the non deep idle function */
+	CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
+}
+
 static void
 cpu_idle_stop(cpu_t *cp)
 {
@@ -778,15 +793,28 @@ cpu_idle_stop(cpu_t *cp)
 	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
 	cpu_acpi_cstate_t *cstate;
-	uint_t cpu_max_cstates, i;
+	uint_t cpu_max_cstates, i = 0;
 
-	/*
-	 * place the CPUs in a safe place so that we can disable
-	 * deep c-state on them.
-	 */
-	pause_cpus(NULL);
-	cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
-	start_cpus();
+	mutex_enter(&cpu_idle_callb_mutex);
+	if (idle_cpu == cpu_idle_adaptive) {
+		/*
+		 * invoke the slave to call synchronous idle function.
+		 */
+		cp->cpu_m.mcpu_idle_cpu = cpu_idle_stop_sync;
+		poke_cpu(cp->cpu_id);
+
+		/*
+		 * wait until the slave switchs to non deep idle function,
+		 * so that the master is safe to go on to reclaim the resource.
+		 */
+		while (cp->cpu_m.mcpu_idle_cpu != non_deep_idle_cpu) {
+			drv_usecwait(10);
+			if ((++i % CPU_IDLE_STOP_TIMEOUT) == 0)
+				cmn_err(CE_NOTE, "!cpu_idle_stop: the slave"
+				    " idle stop timeout");
+		}
+	}
+	mutex_exit(&cpu_idle_callb_mutex);
 
 	cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
 	if (cstate) {
@@ -937,7 +965,6 @@ cpuidle_cstate_instance(cpu_t *cp)
 			cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state"
 			    " object Instance: %d", cpu_id);
 		}
-		mutex_enter(&cpu_lock);
 		mcpu = &(cp->cpu_m);
 		mcpu->max_cstates = cpu_acpi_get_max_cstates(handle);
 		if (mcpu->max_cstates > CPU_ACPI_C1) {
@@ -950,7 +977,6 @@ cpuidle_cstate_instance(cpu_t *cp)
 			cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
 			(void) cstate_timer_callback(CST_EVENT_ONE_CSTATE);
 		}
-		mutex_exit(&cpu_lock);
 
 		CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result);
 	} while (result < 0);
diff --git a/usr/src/uts/i86pc/os/cpupm/cpupm_intel.c b/usr/src/uts/i86pc/os/cpupm/cpupm_intel.c
index 04113a9485..66b2acc7bf 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpupm_intel.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpupm_intel.c
@@ -93,8 +93,8 @@ cpupm_intel_init(cpu_t *cp)
 	if (x86_vendor != X86_VENDOR_Intel)
 		return (B_FALSE);
 
-	family = cpuid_getfamily(CPU);
-	model = cpuid_getmodel(CPU);
+	family = cpuid_getfamily(cp);
+	model = cpuid_getmodel(cp);
 
 	cpupm_intel_pdccap = CPUPM_INTEL_PDC_MP;
 
diff --git a/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c b/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c
index 78ad028890..72302fcf07 100644
--- a/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c
+++ b/usr/src/uts/i86pc/os/cpupm/cpupm_mach.c
@@ -240,7 +240,7 @@ cpupm_init(cpu_t *cp)
 			if (mcpu->max_cstates > CPU_ACPI_C1) {
 				(void) cstate_timer_callback(
 				    CST_EVENT_MULTIPLE_CSTATES);
-				CPU->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
+				cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
 				mcpu->mcpu_idle_type = CPU_ACPI_C1;
 				disp_enq_thread = cstate_wakeup;
 			} else {
@@ -439,9 +439,13 @@ cpupm_alloc_domains(cpu_t *cp, int state)
 			domain = CPU_ACPI_PSD(handle).sd_domain;
 			type = CPU_ACPI_PSD(handle).sd_type;
 		} else {
-			mutex_enter(&cpu_lock);
-			domain = cpuid_get_chipid(cp);
-			mutex_exit(&cpu_lock);
+			if (MUTEX_HELD(&cpu_lock)) {
+				domain = cpuid_get_chipid(cp);
+			} else {
+				mutex_enter(&cpu_lock);
+				domain = cpuid_get_chipid(cp);
+				mutex_exit(&cpu_lock);
+			}
 			type = CPU_ACPI_HW_ALL;
 		}
 		dom_ptr = &cpupm_pstate_domains;
@@ -452,9 +456,13 @@ cpupm_alloc_domains(cpu_t *cp, int state)
 			domain = CPU_ACPI_TSD(handle).sd_domain;
 			type = CPU_ACPI_TSD(handle).sd_type;
 		} else {
-			mutex_enter(&cpu_lock);
-			domain = cpuid_get_chipid(cp);
-			mutex_exit(&cpu_lock);
+			if (MUTEX_HELD(&cpu_lock)) {
+				domain = cpuid_get_chipid(cp);
+			} else {
+				mutex_enter(&cpu_lock);
+				domain = cpuid_get_chipid(cp);
+				mutex_exit(&cpu_lock);
+			}
 			type = CPU_ACPI_HW_ALL;
 		}
 		dom_ptr = &cpupm_tstate_domains;
@@ -465,9 +473,13 @@ cpupm_alloc_domains(cpu_t *cp, int state)
 			domain = CPU_ACPI_CSD(handle).sd_domain;
 			type = CPU_ACPI_CSD(handle).sd_type;
 		} else {
-			mutex_enter(&cpu_lock);
-			domain = cpuid_get_coreid(cp);
-			mutex_exit(&cpu_lock);
+			if (MUTEX_HELD(&cpu_lock)) {
+				domain = cpuid_get_coreid(cp);
+			} else {
+				mutex_enter(&cpu_lock);
+				domain = cpuid_get_coreid(cp);
+				mutex_exit(&cpu_lock);
+			}
 			type = CPU_ACPI_HW_ALL;
 		}
 		dom_ptr = &cpupm_cstate_domains;
diff --git a/usr/src/uts/i86pc/os/fakebop.c b/usr/src/uts/i86pc/os/fakebop.c
index 1c23b7643c..d38bcb046f 100644
--- a/usr/src/uts/i86pc/os/fakebop.c
+++ b/usr/src/uts/i86pc/os/fakebop.c
@@ -23,6 +23,10 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 /*
  * This file contains the functionality that mimics the boot operations
@@ -40,6 +44,7 @@
 #include <sys/varargs.h>
 #include <sys/param.h>
 #include <sys/machparam.h>
+#include <sys/machsystm.h>
 #include <sys/archsystm.h>
 #include <sys/boot_console.h>
 #include <sys/cmn_err.h>
@@ -151,12 +156,13 @@ char fastreboot_onpanic_cmdline[FASTBOOT_SAVED_CMDLINE_LEN];
 static const char fastreboot_onpanic_args[] = " -B fastreboot_onpanic=0";
 
 /*
- * Pointers to where System Resource Affinity Table (SRAT) and
- * System Locality Information Table (SLIT) are mapped into virtual memory
+ * Pointers to where System Resource Affinity Table (SRAT), System Locality
+ * Information Table (SLIT) and Maximum System Capability Table (MSCT)
+ * are mapped into virtual memory
  */
 struct srat	*srat_ptr = NULL;
 struct slit	*slit_ptr = NULL;
-
+struct msct	*msct_ptr = NULL;
 
 /*
  * Allocate aligned physical memory at boot time. This allocator allocates
@@ -2077,6 +2083,7 @@ process_madt(struct madt *tp)
 {
 	struct madt_processor *cpu, *end;
 	uint32_t cpu_count = 0;
+	uint32_t cpu_possible_count = 0;
 	uint8_t cpu_apicid_array[UINT8_MAX + 1];
 
 	if (tp != NULL) {
@@ -2094,6 +2101,7 @@ process_madt(struct madt *tp)
 						    cpu->apic_id;
 					cpu_count++;
 				}
+				cpu_possible_count++;
 			}
 
 			cpu = (struct madt_processor *)
@@ -2109,17 +2117,44 @@ process_madt(struct madt *tp)
 	}
 
 	/*
+	 * Check whehter property plat-max-ncpus is already set.
+	 */
+	if (do_bsys_getproplen(NULL, PLAT_MAX_NCPUS_NAME) < 0) {
+		/*
+		 * Set plat-max-ncpus to number of maximum possible CPUs given
+		 * in MADT if it hasn't been set.
+		 * There's no formal way to detect max possible CPUs supported
+		 * by platform according to ACPI spec3.0b. So current CPU
+		 * hotplug implementation expects that all possible CPUs will
+		 * have an entry in MADT table and set plat-max-ncpus to number
+		 * of entries in MADT.
+		 * With introducing of ACPI4.0, Maximum System Capability Table
+		 * (MSCT) provides maximum number of CPUs supported by platform.
+		 * If MSCT is unavailable, fall back to old way.
+		 */
+		if (tp != NULL)
+			bsetpropsi(PLAT_MAX_NCPUS_NAME, cpu_possible_count);
+	}
+
+	/*
+	 * Set boot property boot-max-ncpus to number of CPUs existing at
+	 * boot time. boot-max-ncpus is mainly used for optimization.
+	 */
+	if (tp != NULL)
+		bsetpropsi(BOOT_MAX_NCPUS_NAME, cpu_count);
+
+	/*
 	 * User-set boot-ncpus overrides firmware count
 	 */
-	if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
+	if (do_bsys_getproplen(NULL, BOOT_NCPUS_NAME) >= 0)
 		return;
 
 	/*
-	 * Set boot property for boot-ncpus to number of CPUs given in MADT
-	 * if user hasn't set the property already
+	 * Set boot property boot-ncpus to number of active CPUs given in MADT
+	 * if it hasn't been set yet.
 	 */
 	if (tp != NULL)
-		bsetpropsi("boot-ncpus", cpu_count);
+		bsetpropsi(BOOT_NCPUS_NAME, cpu_count);
 }
 
 static void
@@ -2146,6 +2181,7 @@ process_srat(struct srat *tp)
 	} memory;
 #pragma pack()
 	char prop_name[30];
+	uint64_t maxmem = 0;
 
 	if (tp == NULL)
 		return;
@@ -2181,6 +2217,10 @@ process_srat(struct srat *tp)
 			    mem_num);
 			bsetprop(prop_name, strlen(prop_name), &memory,
 			    sizeof (memory));
+			if ((item->i.m.flags & SRAT_HOT_PLUG) &&
+			    (memory.addr + memory.length > maxmem)) {
+				maxmem = memory.addr + memory.length;
+			}
 			mem_num++;
 			break;
 		case SRAT_X2APIC:
@@ -2199,6 +2239,14 @@ process_srat(struct srat *tp)
 		item = (struct srat_item *)
 		    (item->len + (caddr_t)item);
 	}
+
+	/*
+	 * The maximum physical address calculated from the SRAT table is more
+	 * accurate than that calculated from the MSCT table.
+	 */
+	if (maxmem != 0) {
+		plat_dr_physmax = btop(maxmem);
+	}
 }
 
 static void
@@ -2224,6 +2272,99 @@ process_slit(struct slit *tp)
 	    tp->number * tp->number);
 }
 
+static struct msct *
+process_msct(struct msct *tp)
+{
+	int last_seen = 0;
+	int proc_num = 0;
+	struct msct_proximity_domain *item, *end;
+	extern uint64_t plat_dr_options;
+
+	ASSERT(tp != NULL);
+
+	end = (void *)(tp->hdr.len + (uintptr_t)tp);
+	for (item = (void *)((uintptr_t)tp + tp->proximity_domain_offset);
+	    item < end;
+	    item = (void *)(item->length + (uintptr_t)item)) {
+		/*
+		 * Sanity check according to section 5.2.19.1 of ACPI 4.0.
+		 * Revision 	1
+		 * Length	22
+		 */
+		if (item->revision != 1 || item->length != 22) {
+			cmn_err(CE_CONT,
+			    "?boot: unknown proximity domain structure in MSCT "
+			    "with rev(%d), len(%d).\n",
+			    (int)item->revision, (int)item->length);
+			return (NULL);
+		} else if (item->domain_min > item->domain_max) {
+			cmn_err(CE_CONT,
+			    "?boot: invalid proximity domain structure in MSCT "
+			    "with domain_min(%u), domain_max(%u).\n",
+			    item->domain_min, item->domain_max);
+			return (NULL);
+		} else if (item->domain_min != last_seen) {
+			/*
+			 * Items must be organized in ascending order of the
+			 * proximity domain enumerations.
+			 */
+			cmn_err(CE_CONT,
+			    "?boot: invalid proximity domain structure in MSCT,"
+			    " items are not orginized in ascending order.\n");
+			return (NULL);
+		}
+
+		/*
+		 * If processor_max is 0 then there would be no CPUs in this
+		 * domain.
+		 */
+		if (item->processor_max != 0) {
+			proc_num += (item->domain_max - item->domain_min + 1) *
+			    item->processor_max;
+		}
+
+		last_seen = item->domain_max - item->domain_min + 1;
+		/*
+		 * Break out if all proximity domains have been processed.
+		 * Some BIOSes may have unused items at the end of MSCT table.
+		 */
+		if (last_seen > tp->maximum_proximity_domains) {
+			break;
+		}
+	}
+	if (last_seen != tp->maximum_proximity_domains + 1) {
+		cmn_err(CE_CONT,
+		    "?boot: invalid proximity domain structure in MSCT, "
+		    "proximity domain count doesn't match.\n");
+		return (NULL);
+	}
+
+	/*
+	 * Set plat-max-ncpus property if it hasn't been set yet.
+	 */
+	if (do_bsys_getproplen(NULL, PLAT_MAX_NCPUS_NAME) < 0) {
+		if (proc_num != 0) {
+			bsetpropsi(PLAT_MAX_NCPUS_NAME, proc_num);
+		}
+	}
+
+	/*
+	 * Use Maximum Physical Address from the MSCT table as upper limit for
+	 * memory hot-adding by default. It may be overridden by value from
+	 * the SRAT table or the "plat-dr-physmax" boot option.
+	 */
+	plat_dr_physmax = btop(tp->maximum_physical_address + 1);
+
+	/*
+	 * Existence of MSCT implies CPU/memory hotplug-capability for the
+	 * platform.
+	 */
+	plat_dr_options |= PLAT_DR_FEATURE_CPU;
+	plat_dr_options |= PLAT_DR_FEATURE_MEMORY;
+
+	return (tp);
+}
+
 #else /* __xpv */
 static void
 enumerate_xen_cpus()
@@ -2233,7 +2374,7 @@ enumerate_xen_cpus()
 	/*
 	 * User-set boot-ncpus overrides enumeration
 	 */
-	if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
+	if (do_bsys_getproplen(NULL, BOOT_NCPUS_NAME) >= 0)
 		return;
 
 	/*
@@ -2246,7 +2387,7 @@ enumerate_xen_cpus()
 		if (HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL) == 0)
 			max_id = id;
 
-	bsetpropsi("boot-ncpus", max_id+1);
+	bsetpropsi(BOOT_NCPUS_NAME, max_id+1);
 
 }
 #endif /* __xpv */
@@ -2257,6 +2398,9 @@ build_firmware_properties(void)
 	struct table_header *tp = NULL;
 
 #ifndef __xpv
+	if ((msct_ptr = (struct msct *)find_fw_table("MSCT")) != NULL)
+		msct_ptr = process_msct(msct_ptr);
+
 	if ((tp = find_fw_table("APIC")) != NULL)
 		process_madt((struct madt *)tp);
 
diff --git a/usr/src/uts/i86pc/os/lgrpplat.c b/usr/src/uts/i86pc/os/lgrpplat.c
index 1525b7859d..02596478c0 100644
--- a/usr/src/uts/i86pc/os/lgrpplat.c
+++ b/usr/src/uts/i86pc/os/lgrpplat.c
@@ -23,6 +23,10 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 /*
  * LOCALITY GROUP (LGROUP) PLATFORM SUPPORT FOR X86/AMD64 PLATFORMS
@@ -88,14 +92,18 @@
  * - lgrp_plat_lat_stats.latencies[][]	Table of latencies between same and
  *					different nodes indexed by node ID
  *
- * - lgrp_plat_node_cnt			Number of NUMA nodes in system
+ * - lgrp_plat_node_cnt			Number of NUMA nodes in system for
+ *					non-DR-capable systems,
+ *					maximum possible number of NUMA nodes
+ *					in system for DR capable systems.
  *
  * - lgrp_plat_node_domain[]		Node ID to proximity domain ID mapping
  *					table indexed by node ID (only used
  *					for SRAT)
  *
- * - lgrp_plat_node_memory[]		Table with physical address range for
- *					each node indexed by node ID
+ * - lgrp_plat_memnode_info[]		Table with physical address range for
+ *					each memory node indexed by memory node
+ *					ID
  *
  * The code is implemented to make the following always be true:
  *
@@ -113,26 +121,41 @@
  * equivalent node ID since we want to keep the node IDs numbered from 0 to
  * <number of nodes - 1> to minimize cost of searching and potentially space.
  *
- * The code below really tries to do the above.  However, the virtual memory
- * system expects the memnodes which describe the physical address range for
- * each NUMA node to be arranged in ascending order by physical address.  (:-(
- * Otherwise, the kernel will panic in different semi-random places in the VM
- * system.
+ * With the introduction of support of memory DR operations on x86 platforms,
+ * things get a little complicated. The addresses of hot-added memory may not
+ * be continuous with other memory connected to the same lgrp node. In other
+ * words, memory addresses may get interleaved among lgrp nodes after memory
+ * DR operations. To work around this limitation, we have extended the
+ * relationship between lgrp node and memory node from 1:1 map to 1:N map,
+ * that means there may be multiple memory nodes associated with a lgrp node
+ * after memory DR operations.
  *
- * Consequently, this module has to try to sort the nodes in ascending order by
- * each node's starting physical address to try to meet this "constraint" in
- * the VM system (see lgrp_plat_node_sort()).  Also, the lowest numbered
- * proximity domain ID in the system is deteremined and used to make the lowest
- * numbered proximity domain map to node 0 in hopes that the proximity domains
- * are sorted in ascending order by physical address already even if their IDs
- * don't start at 0 (see NODE_DOMAIN_HASH() and lgrp_plat_srat_domains()).
- * Finally, it is important to note that these workarounds may not be
- * sufficient if/when memory hotplugging is supported and the VM system may
- * ultimately need to be fixed to handle this....
+ * To minimize the code changes to support memory DR operations, the
+ * following policies have been adopted.
+ * 1) On non-DR-capable systems, the relationship among lgroup platform handle,
+ *    node ID and memnode ID is still kept as:
+ *	lgroup platform handle == node ID == memnode ID
+ * 2) For memory present at boot time on DR capable platforms, the relationship
+ *    is still kept as is.
+ *	lgroup platform handle == node ID == memnode ID
+ * 3) For hot-added memory, the relationship between lgrp ID and memnode ID have
+ *    been changed from 1:1 map to 1:N map. Memnode IDs [0 - lgrp_plat_node_cnt)
+ *    are reserved for memory present at boot time, and memnode IDs
+ *    [lgrp_plat_node_cnt, max_mem_nodes) are used to dynamically allocate
+ *    memnode ID for hot-added memory.
+ * 4) All boot code having the assumption "node ID == memnode ID" can live as
+ *    is, that's because node ID is always equal to memnode ID at boot time.
+ * 5) The lgrp_plat_memnode_info_update(), plat_pfn_to_mem_node() and
+ *    lgrp_plat_mem_size() related logics have been enhanced to deal with
+ *    the 1:N map relationship.
+ * 6) The latency probing related logics, which have the assumption
+ *    "node ID == memnode ID" and may be called at run time, is disabled if
+ *    memory DR operation is enabled.
  */
 
 
 #include <sys/archsystm.h>	/* for {in,out}{b,w,l}() */
+#include <sys/atomic.h>
 #include <sys/bootconf.h>
 #include <sys/cmn_err.h>
 #include <sys/controlregs.h>
@@ -143,6 +166,7 @@
 #include <sys/memlist.h>
 #include <sys/memnode.h>
 #include <sys/mman.h>
+#include <sys/note.h>
 #include <sys/pci_cfgspace.h>
 #include <sys/pci_impl.h>
 #include <sys/param.h>
@@ -158,7 +182,8 @@
 #include <vm/seg_kmem.h>
 #include <vm/vm_dep.h>
 
-#include "acpi_fw.h"		/* for SRAT and SLIT */
+#include <sys/acpidev.h>
+#include "acpi_fw.h"		/* for SRAT, SLIT and MSCT */
 
 
 #define	MAX_NODES		8
@@ -187,7 +212,6 @@
 	((lgrp_plat_prox_domain_min == UINT32_MAX) ? (domain) % node_cnt : \
 	    ((domain) - lgrp_plat_prox_domain_min) % node_cnt)
 
-
 /*
  * CPU to node ID mapping structure (only used with SRAT)
  */
@@ -239,14 +263,16 @@ typedef	struct node_domain_map {
 } node_domain_map_t;
 
 /*
- * Node ID and starting and ending page for physical memory in node
+ * Node ID and starting and ending page for physical memory in memory node
  */
-typedef	struct node_phys_addr_map {
+typedef	struct memnode_phys_addr_map {
 	pfn_t		start;
 	pfn_t		end;
 	int		exists;
 	uint32_t	prox_domain;
-} node_phys_addr_map_t;
+	uint32_t	device_id;
+	uint_t		lgrphand;
+} memnode_phys_addr_map_t;
 
 /*
  * Number of CPUs for which we got APIC IDs
@@ -278,7 +304,7 @@ static node_domain_map_t		lgrp_plat_node_domain[MAX_NODES];
 /*
  * Physical address range for memory in each node
  */
-static node_phys_addr_map_t		lgrp_plat_node_memory[MAX_NODES];
+static memnode_phys_addr_map_t		lgrp_plat_memnode_info[MAX_MEM_NODES];
 
 /*
  * Statistics gotten from probing
@@ -306,6 +332,17 @@ static int				lgrp_plat_srat_error = 0;
 static int				lgrp_plat_slit_error = 0;
 
 /*
+ * Whether lgrp topology has been flattened to 2 levels.
+ */
+static int				lgrp_plat_topo_flatten = 0;
+
+
+/*
+ * Maximum memory node ID in use.
+ */
+static uint_t				lgrp_plat_max_mem_node;
+
+/*
  * Allocate lgroup array statically
  */
 static lgrp_t				lgrp_space[NLGRP];
@@ -318,7 +355,7 @@ static int				nlgrps_alloc;
 int			lgrp_plat_domain_min_enable = 1;
 
 /*
- * Number of nodes in system
+ * Maximum possible number of nodes in system
  */
 uint_t			lgrp_plat_node_cnt = 1;
 
@@ -343,11 +380,12 @@ int			lgrp_plat_probe_nsamples = LGRP_PLAT_PROBE_NSAMPLES;
 int			lgrp_plat_probe_nreads = LGRP_PLAT_PROBE_NREADS;
 
 /*
- * Enable use of ACPI System Resource Affinity Table (SRAT) and System
- * Locality Information Table (SLIT)
+ * Enable use of ACPI System Resource Affinity Table (SRAT), System
+ * Locality Information Table (SLIT) and Maximum System Capability Table (MSCT)
  */
 int			lgrp_plat_srat_enable = 1;
 int			lgrp_plat_slit_enable = 1;
+int			lgrp_plat_msct_enable = 1;
 
 /*
  * mnode_xwa: set to non-zero value to initiate workaround if large pages are
@@ -370,12 +408,10 @@ struct lgrp_stats	lgrp_stats[NLGRP];
  */
 void		plat_build_mem_nodes(struct memlist *list);
 
-int		plat_lgrphand_to_mem_node(lgrp_handle_t hand);
+int		plat_mnode_xcheck(pfn_t pfncnt);
 
 lgrp_handle_t	plat_mem_node_to_lgrphand(int mnode);
 
-int		plat_mnode_xcheck(pfn_t pfncnt);
-
 int		plat_pfn_to_mem_node(pfn_t pfn);
 
 /*
@@ -420,11 +456,11 @@ static int	lgrp_plat_domain_to_node(node_domain_map_t *node_domain,
 
 static void	lgrp_plat_get_numa_config(void);
 
-static void	lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
+static void	lgrp_plat_latency_adjust(memnode_phys_addr_map_t *memnode_info,
     lgrp_plat_latency_stats_t *lat_stats,
     lgrp_plat_probe_stats_t *probe_stats);
 
-static int	lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
+static int	lgrp_plat_latency_verify(memnode_phys_addr_map_t *memnode_info,
     lgrp_plat_latency_stats_t *lat_stats);
 
 static void	lgrp_plat_main_init(void);
@@ -434,13 +470,13 @@ static pgcnt_t	lgrp_plat_mem_size_default(lgrp_handle_t, lgrp_mem_query_t);
 static int	lgrp_plat_node_domain_update(node_domain_map_t *node_domain,
     int node_cnt, uint32_t domain);
 
-static int	lgrp_plat_node_memory_update(node_domain_map_t *node_domain,
-    int node_cnt, node_phys_addr_map_t *node_memory, uint64_t start,
-    uint64_t end, uint32_t domain);
+static int	lgrp_plat_memnode_info_update(node_domain_map_t *node_domain,
+    int node_cnt, memnode_phys_addr_map_t *memnode_info, int memnode_cnt,
+    uint64_t start, uint64_t end, uint32_t domain, uint32_t device_id);
 
 static void	lgrp_plat_node_sort(node_domain_map_t *node_domain,
     int node_cnt, cpu_node_map_t *cpu_node, int cpu_count,
-    node_phys_addr_map_t *node_memory);
+    memnode_phys_addr_map_t *memnode_info);
 
 static hrtime_t	lgrp_plat_probe_time(int to, cpu_node_map_t *cpu_node,
     int cpu_node_nentries, lgrp_plat_probe_mem_config_t *probe_mem_config,
@@ -448,24 +484,32 @@ static hrtime_t	lgrp_plat_probe_time(int to, cpu_node_map_t *cpu_node,
 
 static int	lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node);
 
-static int	lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
-    node_phys_addr_map_t *node_memory, lgrp_plat_latency_stats_t *lat_stats);
+static int	lgrp_plat_process_slit(struct slit *tp,
+    node_domain_map_t *node_domain, uint_t node_cnt,
+    memnode_phys_addr_map_t *memnode_info,
+    lgrp_plat_latency_stats_t *lat_stats);
+
+static int	lgrp_plat_process_sli(uint32_t domain, uchar_t *sli_info,
+    uint32_t sli_cnt, node_domain_map_t *node_domain, uint_t node_cnt,
+    lgrp_plat_latency_stats_t *lat_stats);
 
-static int	lgrp_plat_process_srat(struct srat *tp,
+static int	lgrp_plat_process_srat(struct srat *tp, struct msct *mp,
     uint32_t *prox_domain_min, node_domain_map_t *node_domain,
     cpu_node_map_t *cpu_node, int cpu_count,
-    node_phys_addr_map_t *node_memory);
+    memnode_phys_addr_map_t *memnode_info);
 
 static void	lgrp_plat_release_bootstrap(void);
 
 static int	lgrp_plat_srat_domains(struct srat *tp,
     uint32_t *prox_domain_min);
 
-static void	lgrp_plat_2level_setup(node_phys_addr_map_t *node_memory,
-    lgrp_plat_latency_stats_t *lat_stats);
+static int	lgrp_plat_msct_domains(struct msct *tp,
+    uint32_t *prox_domain_min);
+
+static void	lgrp_plat_2level_setup(lgrp_plat_latency_stats_t *lat_stats);
 
 static void	opt_get_numa_config(uint_t *node_cnt, int *mem_intrlv,
-    node_phys_addr_map_t *node_memory);
+    memnode_phys_addr_map_t *memnode_info);
 
 static hrtime_t	opt_probe_vendor(int dest_node, int nreads);
 
@@ -529,10 +573,10 @@ plat_build_mem_nodes(struct memlist *list)
 			 * This shouldn't happen and rest of code can't deal
 			 * with this if it does.
 			 */
-			if (node < 0 || node >= lgrp_plat_node_cnt ||
-			    !lgrp_plat_node_memory[node].exists ||
-			    cur_start < lgrp_plat_node_memory[node].start ||
-			    cur_start > lgrp_plat_node_memory[node].end) {
+			if (node < 0 || node >= lgrp_plat_max_mem_node ||
+			    !lgrp_plat_memnode_info[node].exists ||
+			    cur_start < lgrp_plat_memnode_info[node].start ||
+			    cur_start > lgrp_plat_memnode_info[node].end) {
 				cmn_err(CE_PANIC, "Don't know which memnode "
 				    "to add installed memory address 0x%lx\n",
 				    cur_start);
@@ -543,9 +587,9 @@ plat_build_mem_nodes(struct memlist *list)
 			 */
 			cur_end = end;
 			endcnt = 0;
-			if (lgrp_plat_node_memory[node].exists &&
-			    cur_end > lgrp_plat_node_memory[node].end) {
-				cur_end = lgrp_plat_node_memory[node].end;
+			if (lgrp_plat_memnode_info[node].exists &&
+			    cur_end > lgrp_plat_memnode_info[node].end) {
+				cur_end = lgrp_plat_memnode_info[node].end;
 				if (mnode_xwa > 1) {
 					/*
 					 * sacrifice the last page in each
@@ -572,16 +616,6 @@ plat_build_mem_nodes(struct memlist *list)
 }
 
 
-int
-plat_lgrphand_to_mem_node(lgrp_handle_t hand)
-{
-	if (max_mem_nodes == 1)
-		return (0);
-
-	return ((int)hand);
-}
-
-
 /*
  * plat_mnode_xcheck: checks the node memory ranges to see if there is a pfncnt
  * range of pages aligned on pfncnt that crosses an node boundary. Returns 1 if
@@ -593,9 +627,9 @@ plat_mnode_xcheck(pfn_t pfncnt)
 	int	node, prevnode = -1, basenode;
 	pfn_t	ea, sa;
 
-	for (node = 0; node < lgrp_plat_node_cnt; node++) {
+	for (node = 0; node < lgrp_plat_max_mem_node; node++) {
 
-		if (lgrp_plat_node_memory[node].exists == 0)
+		if (lgrp_plat_memnode_info[node].exists == 0)
 			continue;
 
 		if (prevnode == -1) {
@@ -605,23 +639,23 @@ plat_mnode_xcheck(pfn_t pfncnt)
 		}
 
 		/* assume x86 node pfn ranges are in increasing order */
-		ASSERT(lgrp_plat_node_memory[node].start >
-		    lgrp_plat_node_memory[prevnode].end);
+		ASSERT(lgrp_plat_memnode_info[node].start >
+		    lgrp_plat_memnode_info[prevnode].end);
 
 		/*
 		 * continue if the starting address of node is not contiguous
 		 * with the previous node.
 		 */
 
-		if (lgrp_plat_node_memory[node].start !=
-		    (lgrp_plat_node_memory[prevnode].end + 1)) {
+		if (lgrp_plat_memnode_info[node].start !=
+		    (lgrp_plat_memnode_info[prevnode].end + 1)) {
 			basenode = node;
 			prevnode = node;
 			continue;
 		}
 
 		/* check if the starting address of node is pfncnt aligned */
-		if ((lgrp_plat_node_memory[node].start & (pfncnt - 1)) != 0) {
+		if ((lgrp_plat_memnode_info[node].start & (pfncnt - 1)) != 0) {
 
 			/*
 			 * at this point, node starts at an unaligned boundary
@@ -630,14 +664,14 @@ plat_mnode_xcheck(pfn_t pfncnt)
 			 * range of length pfncnt that crosses this boundary.
 			 */
 
-			sa = P2ALIGN(lgrp_plat_node_memory[prevnode].end,
+			sa = P2ALIGN(lgrp_plat_memnode_info[prevnode].end,
 			    pfncnt);
-			ea = P2ROUNDUP((lgrp_plat_node_memory[node].start),
+			ea = P2ROUNDUP((lgrp_plat_memnode_info[node].start),
 			    pfncnt);
 
 			ASSERT((ea - sa) == pfncnt);
-			if (sa >= lgrp_plat_node_memory[basenode].start &&
-			    ea <= (lgrp_plat_node_memory[node].end + 1)) {
+			if (sa >= lgrp_plat_memnode_info[basenode].start &&
+			    ea <= (lgrp_plat_memnode_info[node].end + 1)) {
 				/*
 				 * large page found to cross mnode boundary.
 				 * Return Failure if workaround not enabled.
@@ -659,9 +693,10 @@ plat_mem_node_to_lgrphand(int mnode)
 	if (max_mem_nodes == 1)
 		return (LGRP_DEFAULT_HANDLE);
 
-	return ((lgrp_handle_t)mnode);
-}
+	ASSERT(0 <= mnode && mnode < lgrp_plat_max_mem_node);
 
+	return ((lgrp_handle_t)(lgrp_plat_memnode_info[mnode].lgrphand));
+}
 
 int
 plat_pfn_to_mem_node(pfn_t pfn)
@@ -671,22 +706,23 @@ plat_pfn_to_mem_node(pfn_t pfn)
 	if (max_mem_nodes == 1)
 		return (0);
 
-	for (node = 0; node < lgrp_plat_node_cnt; node++) {
+	for (node = 0; node < lgrp_plat_max_mem_node; node++) {
 		/*
 		 * Skip nodes with no memory
 		 */
-		if (!lgrp_plat_node_memory[node].exists)
+		if (!lgrp_plat_memnode_info[node].exists)
 			continue;
 
-		if (pfn >= lgrp_plat_node_memory[node].start &&
-		    pfn <= lgrp_plat_node_memory[node].end)
+		membar_consumer();
+		if (pfn >= lgrp_plat_memnode_info[node].start &&
+		    pfn <= lgrp_plat_memnode_info[node].end)
 			return (node);
 	}
 
 	/*
 	 * Didn't find memnode where this PFN lives which should never happen
 	 */
-	ASSERT(node < lgrp_plat_node_cnt);
+	ASSERT(node < lgrp_plat_max_mem_node);
 	return (-1);
 }
 
@@ -698,7 +734,6 @@ plat_pfn_to_mem_node(pfn_t pfn)
 /*
  * Allocate additional space for an lgroup.
  */
-/* ARGSUSED */
 lgrp_t *
 lgrp_plat_alloc(lgrp_id_t lgrpid)
 {
@@ -713,23 +748,198 @@ lgrp_plat_alloc(lgrp_id_t lgrpid)
 
 /*
  * Platform handling for (re)configuration changes
+ *
+ * Mechanism to protect lgrp_plat_cpu_node[] at CPU hotplug:
+ * 1) Use cpu_lock to synchronize between lgrp_plat_config() and
+ *    lgrp_plat_cpu_to_hand().
+ * 2) Disable latency probing logic by making sure that the flag
+ *    LGRP_PLAT_PROBE_ENABLE is cleared.
+ *
+ * Mechanism to protect lgrp_plat_memnode_info[] at memory hotplug:
+ * 1) Only inserts into lgrp_plat_memnode_info at memory hotplug, no removal.
+ * 2) Only expansion to existing entries, no shrinking.
+ * 3) On writing side, DR framework ensures that lgrp_plat_config() is called
+ *    in single-threaded context. And membar_producer() is used to ensure that
+ *    all changes are visible to other CPUs before setting the "exists" flag.
+ * 4) On reading side, membar_consumer() after checking the "exists" flag
+ *    ensures that right values are retrieved.
+ *
+ * Mechanism to protect lgrp_plat_node_domain[] at hotplug:
+ * 1) Only insertion into lgrp_plat_node_domain at hotplug, no removal.
+ * 2) On writing side, it's single-threaded and membar_producer() is used to
+ *    ensure all changes are visible to other CPUs before setting the "exists"
+ *    flag.
+ * 3) On reading side, membar_consumer() after checking the "exists" flag
+ *    ensures that right values are retrieved.
  */
-/* ARGSUSED */
 void
 lgrp_plat_config(lgrp_config_flag_t flag, uintptr_t arg)
 {
+#ifdef	__xpv
+	_NOTE(ARGUNUSED(flag, arg));
+#else
+	int	rc, node;
+	cpu_t	*cp;
+	void	*hdl = NULL;
+	uchar_t	*sliptr = NULL;
+	uint32_t domain, apicid, slicnt = 0;
+	update_membounds_t *mp;
+
+	extern int acpidev_dr_get_cpu_numa_info(cpu_t *, void **, uint32_t *,
+	    uint32_t *, uint32_t *, uchar_t **);
+	extern void acpidev_dr_free_cpu_numa_info(void *);
+
+	/*
+	 * This interface is used to support CPU/memory DR operations.
+	 * Don't bother here if it's still during boot or only one lgrp node
+	 * is supported.
+	 */
+	if (!lgrp_topo_initialized || lgrp_plat_node_cnt == 1)
+		return;
+
+	switch (flag) {
+	case LGRP_CONFIG_CPU_ADD:
+		cp = (cpu_t *)arg;
+		ASSERT(cp != NULL);
+		ASSERT(MUTEX_HELD(&cpu_lock));
+
+		/* Check whether CPU already exists. */
+		ASSERT(!lgrp_plat_cpu_node[cp->cpu_id].exists);
+		if (lgrp_plat_cpu_node[cp->cpu_id].exists) {
+			cmn_err(CE_WARN,
+			    "!lgrp: CPU(%d) already exists in cpu_node map.",
+			    cp->cpu_id);
+			break;
+		}
+
+		/* Query CPU lgrp information. */
+		rc = acpidev_dr_get_cpu_numa_info(cp, &hdl, &apicid, &domain,
+		    &slicnt, &sliptr);
+		ASSERT(rc == 0);
+		if (rc != 0) {
+			cmn_err(CE_WARN,
+			    "!lgrp: failed to query lgrp info for CPU(%d).",
+			    cp->cpu_id);
+			break;
+		}
+
+		/* Update node to proximity domain mapping */
+		node = lgrp_plat_domain_to_node(lgrp_plat_node_domain,
+		    lgrp_plat_node_cnt, domain);
+		if (node == -1) {
+			node = lgrp_plat_node_domain_update(
+			    lgrp_plat_node_domain, lgrp_plat_node_cnt, domain);
+			ASSERT(node != -1);
+			if (node == -1) {
+				acpidev_dr_free_cpu_numa_info(hdl);
+				cmn_err(CE_WARN, "!lgrp: failed to update "
+				    "node_domain map for domain(%u).", domain);
+				break;
+			}
+		}
+
+		/* Update latency information among lgrps. */
+		if (slicnt != 0 && sliptr != NULL) {
+			if (lgrp_plat_process_sli(domain, sliptr, slicnt,
+			    lgrp_plat_node_domain, lgrp_plat_node_cnt,
+			    &lgrp_plat_lat_stats) != 0) {
+				cmn_err(CE_WARN, "!lgrp: failed to update "
+				    "latency information for domain (%u).",
+				    domain);
+			}
+		}
+
+		/* Update CPU to node mapping. */
+		lgrp_plat_cpu_node[cp->cpu_id].prox_domain = domain;
+		lgrp_plat_cpu_node[cp->cpu_id].node = node;
+		lgrp_plat_cpu_node[cp->cpu_id].apicid = apicid;
+		lgrp_plat_cpu_node[cp->cpu_id].exists = 1;
+		lgrp_plat_apic_ncpus++;
+
+		acpidev_dr_free_cpu_numa_info(hdl);
+		break;
+
+	case LGRP_CONFIG_CPU_DEL:
+		cp = (cpu_t *)arg;
+		ASSERT(cp != NULL);
+		ASSERT(MUTEX_HELD(&cpu_lock));
+
+		/* Check whether CPU exists. */
+		ASSERT(lgrp_plat_cpu_node[cp->cpu_id].exists);
+		if (!lgrp_plat_cpu_node[cp->cpu_id].exists) {
+			cmn_err(CE_WARN,
+			    "!lgrp: CPU(%d) doesn't exist in cpu_node map.",
+			    cp->cpu_id);
+			break;
+		}
+
+		/* Query CPU lgrp information. */
+		rc = acpidev_dr_get_cpu_numa_info(cp, &hdl, &apicid, &domain,
+		    NULL, NULL);
+		ASSERT(rc == 0);
+		if (rc != 0) {
+			cmn_err(CE_WARN,
+			    "!lgrp: failed to query lgrp info for CPU(%d).",
+			    cp->cpu_id);
+			break;
+		}
+
+		/* Update map. */
+		ASSERT(lgrp_plat_cpu_node[cp->cpu_id].apicid == apicid);
+		ASSERT(lgrp_plat_cpu_node[cp->cpu_id].prox_domain == domain);
+		lgrp_plat_cpu_node[cp->cpu_id].exists = 0;
+		lgrp_plat_cpu_node[cp->cpu_id].apicid = UINT32_MAX;
+		lgrp_plat_cpu_node[cp->cpu_id].prox_domain = UINT32_MAX;
+		lgrp_plat_cpu_node[cp->cpu_id].node = UINT_MAX;
+		lgrp_plat_apic_ncpus--;
+
+		acpidev_dr_free_cpu_numa_info(hdl);
+		break;
+
+	case LGRP_CONFIG_MEM_ADD:
+		mp = (update_membounds_t *)arg;
+		ASSERT(mp != NULL);
+
+		/* Update latency information among lgrps. */
+		if (mp->u_sli_cnt != 0 && mp->u_sli_ptr != NULL) {
+			if (lgrp_plat_process_sli(mp->u_domain,
+			    mp->u_sli_ptr, mp->u_sli_cnt,
+			    lgrp_plat_node_domain, lgrp_plat_node_cnt,
+			    &lgrp_plat_lat_stats) != 0) {
+				cmn_err(CE_WARN, "!lgrp: failed to update "
+				    "latency information for domain (%u).",
+				    domain);
+			}
+		}
+
+		if (lgrp_plat_memnode_info_update(lgrp_plat_node_domain,
+		    lgrp_plat_node_cnt, lgrp_plat_memnode_info, max_mem_nodes,
+		    mp->u_base, mp->u_base + mp->u_length,
+		    mp->u_domain, mp->u_device_id) < 0) {
+			cmn_err(CE_WARN,
+			    "!lgrp: failed to update latency  information for "
+			    "memory (0x%" PRIx64 " - 0x%" PRIx64 ").",
+			    mp->u_base, mp->u_base + mp->u_length);
+		}
+		break;
+
+	default:
+		break;
+	}
+#endif	/* __xpv */
 }
 
 
 /*
  * Return the platform handle for the lgroup containing the given CPU
  */
-/* ARGSUSED */
 lgrp_handle_t
 lgrp_plat_cpu_to_hand(processorid_t id)
 {
 	lgrp_handle_t	hand;
 
+	ASSERT(!lgrp_initialized || MUTEX_HELD(&cpu_lock));
+
 	if (lgrp_plat_node_cnt == 1)
 		return (LGRP_DEFAULT_HANDLE);
 
@@ -763,6 +973,7 @@ lgrp_plat_init(lgrp_init_stages_t stage)
 		 */
 		lgrp_plat_node_cnt = max_mem_nodes = 1;
 #else	/* __xpv */
+
 		/*
 		 * Get boot property for lgroup topology height limit
 		 */
@@ -782,14 +993,32 @@ lgrp_plat_init(lgrp_init_stages_t stage)
 			lgrp_plat_slit_enable = (int)value;
 
 		/*
+		 * Get boot property for enabling/disabling MSCT
+		 */
+		if (bootprop_getval(BP_LGRP_MSCT_ENABLE, &value) == 0)
+			lgrp_plat_msct_enable = (int)value;
+
+		/*
 		 * Initialize as a UMA machine
 		 */
 		if (lgrp_topo_ht_limit() == 1) {
 			lgrp_plat_node_cnt = max_mem_nodes = 1;
+			lgrp_plat_max_mem_node = 1;
 			return;
 		}
 
 		lgrp_plat_get_numa_config();
+
+		/*
+		 * Each lgrp node needs MAX_MEM_NODES_PER_LGROUP memnodes
+		 * to support memory DR operations if memory DR is enabled.
+		 */
+		lgrp_plat_max_mem_node = lgrp_plat_node_cnt;
+		if (plat_dr_support_memory() && lgrp_plat_node_cnt != 1) {
+			max_mem_nodes = MAX_MEM_NODES_PER_LGROUP *
+			    lgrp_plat_node_cnt;
+			ASSERT(max_mem_nodes <= MAX_MEM_NODES);
+		}
 #endif	/* __xpv */
 		break;
 
@@ -817,7 +1046,6 @@ lgrp_plat_init(lgrp_init_stages_t stage)
  * specific, so platform gets to decide its value.  It would be nice if the
  * number was at least proportional to make comparisons more meaningful though.
  */
-/* ARGSUSED */
 int
 lgrp_plat_latency(lgrp_handle_t from, lgrp_handle_t to)
 {
@@ -844,13 +1072,29 @@ lgrp_plat_latency(lgrp_handle_t from, lgrp_handle_t to)
 
 	/*
 	 * Probe from current CPU if its lgroup latencies haven't been set yet
-	 * and we are trying to get latency from current CPU to some node
+	 * and we are trying to get latency from current CPU to some node.
+	 * Avoid probing if CPU/memory DR is enabled.
 	 */
-	node = lgrp_plat_cpu_to_node(CPU, lgrp_plat_cpu_node,
-	    lgrp_plat_cpu_node_nentries);
-	ASSERT(node >= 0 && node < lgrp_plat_node_cnt);
-	if (lgrp_plat_lat_stats.latencies[src][src] == 0 && node == src)
-		lgrp_plat_probe();
+	if (lgrp_plat_lat_stats.latencies[src][src] == 0) {
+		/*
+		 * Latency information should be updated by lgrp_plat_config()
+		 * for DR operations. Something is wrong if reaches here.
+		 * For safety, flatten lgrp topology to two levels.
+		 */
+		if (plat_dr_support_cpu() || plat_dr_support_memory()) {
+			ASSERT(lgrp_plat_lat_stats.latencies[src][src]);
+			cmn_err(CE_WARN,
+			    "lgrp: failed to get latency information, "
+			    "fall back to two-level topology.");
+			lgrp_plat_2level_setup(&lgrp_plat_lat_stats);
+		} else {
+			node = lgrp_plat_cpu_to_node(CPU, lgrp_plat_cpu_node,
+			    lgrp_plat_cpu_node_nentries);
+			ASSERT(node >= 0 && node < lgrp_plat_node_cnt);
+			if (node == src)
+				lgrp_plat_probe();
+		}
+	}
 
 	return (lgrp_plat_lat_stats.latencies[src][dest]);
 }
@@ -859,21 +1103,45 @@ lgrp_plat_latency(lgrp_handle_t from, lgrp_handle_t to)
 /*
  * Return the maximum number of lgrps supported by the platform.
  * Before lgrp topology is known it returns an estimate based on the number of
- * nodes. Once topology is known it returns the actual maximim number of lgrps
- * created. Since x86/x64 doesn't support Dynamic Reconfiguration (DR) and
- * dynamic addition of new nodes, this number may not grow during system
- * lifetime (yet).
+ * nodes. Once topology is known it returns:
+ * 1) the actual maximim number of lgrps created if CPU/memory DR operations
+ *    are not suppported.
+ * 2) the maximum possible number of lgrps if CPU/memory DR operations are
+ *    supported.
  */
 int
 lgrp_plat_max_lgrps(void)
 {
-	return (lgrp_topo_initialized ?
-	    lgrp_alloc_max + 1 :
-	    lgrp_plat_node_cnt * (lgrp_plat_node_cnt - 1) + 1);
+	if (!lgrp_topo_initialized || plat_dr_support_cpu() ||
+	    plat_dr_support_memory()) {
+		return (lgrp_plat_node_cnt * (lgrp_plat_node_cnt - 1) + 1);
+	} else {
+		return (lgrp_alloc_max + 1);
+	}
 }
 
 
 /*
+ * Count number of memory pages (_t) based on mnode id (_n) and query type (_t).
+ */
+#define	_LGRP_PLAT_MEM_SIZE(_n, _q, _t)					\
+	if (mem_node_config[_n].exists) {				\
+		switch (_q) {						\
+		case LGRP_MEM_SIZE_FREE:				\
+			_t += MNODE_PGCNT(_n);				\
+			break;						\
+		case LGRP_MEM_SIZE_AVAIL:				\
+			_t += mem_node_memlist_pages(_n, phys_avail);	\
+				break;					\
+		case LGRP_MEM_SIZE_INSTALL:				\
+			_t += mem_node_memlist_pages(_n, phys_install);	\
+			break;						\
+		default:						\
+			break;						\
+		}							\
+	}
+
+/*
  * Return the number of free pages in an lgroup.
  *
  * For query of LGRP_MEM_SIZE_FREE, return the number of base pagesize
@@ -896,25 +1164,19 @@ lgrp_plat_mem_size(lgrp_handle_t plathand, lgrp_mem_query_t query)
 		return (lgrp_plat_mem_size_default(plathand, query));
 
 	if (plathand != LGRP_NULL_HANDLE) {
-		mnode = plat_lgrphand_to_mem_node(plathand);
-		if (mnode >= 0 && mem_node_config[mnode].exists) {
-			switch (query) {
-			case LGRP_MEM_SIZE_FREE:
-				npgs = MNODE_PGCNT(mnode);
-				break;
-			case LGRP_MEM_SIZE_AVAIL:
-				npgs = mem_node_memlist_pages(mnode,
-				    phys_avail);
-				break;
-			case LGRP_MEM_SIZE_INSTALL:
-				npgs = mem_node_memlist_pages(mnode,
-				    phys_install);
-				break;
-			default:
-				break;
-			}
+		/* Count memory node present at boot. */
+		mnode = (int)plathand;
+		ASSERT(mnode < lgrp_plat_node_cnt);
+		_LGRP_PLAT_MEM_SIZE(mnode, query, npgs);
+
+		/* Count possible hot-added memory nodes. */
+		for (mnode = lgrp_plat_node_cnt;
+		    mnode < lgrp_plat_max_mem_node; mnode++) {
+			if (lgrp_plat_memnode_info[mnode].lgrphand == plathand)
+				_LGRP_PLAT_MEM_SIZE(mnode, query, npgs);
 		}
 	}
+
 	return (npgs);
 }
 
@@ -923,7 +1185,6 @@ lgrp_plat_mem_size(lgrp_handle_t plathand, lgrp_mem_query_t query)
  * Return the platform handle of the lgroup that contains the physical memory
  * corresponding to the given page frame number
  */
-/* ARGSUSED */
 lgrp_handle_t
 lgrp_plat_pfn_to_hand(pfn_t pfn)
 {
@@ -979,6 +1240,10 @@ lgrp_plat_probe(void)
 	    max_mem_nodes == 1 || lgrp_topo_ht_limit() <= 2)
 		return;
 
+	/* SRAT and SLIT should be enabled if DR operations are enabled. */
+	if (plat_dr_support_cpu() || plat_dr_support_memory())
+		return;
+
 	/*
 	 * Determine ID of node containing current CPU
 	 */
@@ -1051,14 +1316,13 @@ lgrp_plat_probe(void)
 	 * - Fallback to just optimizing for local and remote if
 	 *   latencies didn't look right
 	 */
-	lgrp_plat_latency_adjust(lgrp_plat_node_memory, &lgrp_plat_lat_stats,
+	lgrp_plat_latency_adjust(lgrp_plat_memnode_info, &lgrp_plat_lat_stats,
 	    &lgrp_plat_probe_stats);
 	lgrp_plat_probe_stats.probe_error_code =
-	    lgrp_plat_latency_verify(lgrp_plat_node_memory,
+	    lgrp_plat_latency_verify(lgrp_plat_memnode_info,
 	    &lgrp_plat_lat_stats);
 	if (lgrp_plat_probe_stats.probe_error_code)
-		lgrp_plat_2level_setup(lgrp_plat_node_memory,
-		    &lgrp_plat_lat_stats);
+		lgrp_plat_2level_setup(&lgrp_plat_lat_stats);
 }
 
 
@@ -1078,8 +1342,11 @@ lgrp_plat_root_hand(void)
 
 
 /*
- * Update CPU to node mapping for given CPU and proximity domain (and returns
- * negative numbers for errors and positive ones for success)
+ * Update CPU to node mapping for given CPU and proximity domain.
+ * Return values:
+ * 	- zero for success
+ *	- positive numbers for warnings
+ *	- negative numbers for errors
  */
 static int
 lgrp_plat_cpu_node_update(node_domain_map_t *node_domain, int node_cnt,
@@ -1119,6 +1386,13 @@ lgrp_plat_cpu_node_update(node_domain_map_t *node_domain, int node_cnt,
 			return (1);
 
 		/*
+		 * It's invalid to have more than one entry with the same
+		 * local APIC ID in SRAT table.
+		 */
+		if (cpu_node[i].node != UINT_MAX)
+			return (-2);
+
+		/*
 		 * Fill in node and proximity domain IDs
 		 */
 		cpu_node[i].prox_domain = domain;
@@ -1128,9 +1402,11 @@ lgrp_plat_cpu_node_update(node_domain_map_t *node_domain, int node_cnt,
 	}
 
 	/*
-	 * Return error when entry for APIC ID wasn't found in table
+	 * It's possible that an apicid doesn't exist in the cpu_node map due
+	 * to user limits number of CPUs powered on at boot by specifying the
+	 * boot_ncpus kernel option.
 	 */
-	return (-2);
+	return (2);
 }
 
 
@@ -1189,9 +1465,11 @@ lgrp_plat_domain_to_node(node_domain_map_t *node_domain, int node_cnt,
 	 */
 	node = start = NODE_DOMAIN_HASH(domain, node_cnt);
 	do {
-		if (node_domain[node].prox_domain == domain &&
-		    node_domain[node].exists)
-			return (node);
+		if (node_domain[node].exists) {
+			membar_consumer();
+			if (node_domain[node].prox_domain == domain)
+				return (node);
+		}
 		node = (node + 1) % node_cnt;
 	} while (node != start);
 	return (-1);
@@ -1219,19 +1497,26 @@ lgrp_plat_get_numa_config(void)
 	if (lgrp_plat_apic_ncpus > 0) {
 		int	retval;
 
+		/* Reserve enough resources if CPU DR is enabled. */
+		if (plat_dr_support_cpu() && max_ncpus > lgrp_plat_apic_ncpus)
+			lgrp_plat_cpu_node_nentries = max_ncpus;
+		else
+			lgrp_plat_cpu_node_nentries = lgrp_plat_apic_ncpus;
+
 		/*
 		 * Temporarily allocate boot memory to use for CPU to node
 		 * mapping since kernel memory allocator isn't alive yet
 		 */
 		lgrp_plat_cpu_node = (cpu_node_map_t *)BOP_ALLOC(bootops,
-		    NULL, lgrp_plat_apic_ncpus * sizeof (cpu_node_map_t),
+		    NULL, lgrp_plat_cpu_node_nentries * sizeof (cpu_node_map_t),
 		    sizeof (int));
 
 		ASSERT(lgrp_plat_cpu_node != NULL);
 		if (lgrp_plat_cpu_node) {
-			lgrp_plat_cpu_node_nentries = lgrp_plat_apic_ncpus;
 			bzero(lgrp_plat_cpu_node, lgrp_plat_cpu_node_nentries *
 			    sizeof (cpu_node_map_t));
+		} else {
+			lgrp_plat_cpu_node_nentries = 0;
 		}
 
 		/*
@@ -1240,10 +1525,10 @@ lgrp_plat_get_numa_config(void)
 		 */
 		(void) lgrp_plat_process_cpu_apicids(lgrp_plat_cpu_node);
 
-		retval = lgrp_plat_process_srat(srat_ptr,
+		retval = lgrp_plat_process_srat(srat_ptr, msct_ptr,
 		    &lgrp_plat_prox_domain_min,
 		    lgrp_plat_node_domain, lgrp_plat_cpu_node,
-		    lgrp_plat_apic_ncpus, lgrp_plat_node_memory);
+		    lgrp_plat_apic_ncpus, lgrp_plat_memnode_info);
 		if (retval <= 0) {
 			lgrp_plat_srat_error = retval;
 			lgrp_plat_node_cnt = 1;
@@ -1260,15 +1545,12 @@ lgrp_plat_get_numa_config(void)
 	if ((lgrp_plat_apic_ncpus <= 0 || lgrp_plat_srat_error != 0) &&
 	    is_opteron())
 		opt_get_numa_config(&lgrp_plat_node_cnt, &lgrp_plat_mem_intrlv,
-		    lgrp_plat_node_memory);
+		    lgrp_plat_memnode_info);
 
 	/*
 	 * Don't bother to setup system for multiple lgroups and only use one
 	 * memory node when memory is interleaved between any nodes or there is
 	 * only one NUMA node
-	 *
-	 * NOTE: May need to change this for Dynamic Reconfiguration (DR)
-	 *	 when and if it happens for x86/x64
 	 */
 	if (lgrp_plat_mem_intrlv || lgrp_plat_node_cnt == 1) {
 		lgrp_plat_node_cnt = max_mem_nodes = 1;
@@ -1287,7 +1569,7 @@ lgrp_plat_get_numa_config(void)
 
 	/*
 	 * There should be one memnode (physical page free list(s)) for
-	 * each node
+	 * each node if memory DR is disabled.
 	 */
 	max_mem_nodes = lgrp_plat_node_cnt;
 
@@ -1303,8 +1585,35 @@ lgrp_plat_get_numa_config(void)
 	 * exists
 	 */
 	lgrp_plat_slit_error = lgrp_plat_process_slit(slit_ptr,
-	    lgrp_plat_node_cnt, lgrp_plat_node_memory,
+	    lgrp_plat_node_domain, lgrp_plat_node_cnt, lgrp_plat_memnode_info,
 	    &lgrp_plat_lat_stats);
+
+	/*
+	 * Disable support of CPU/memory DR operations if multiple locality
+	 * domains exist in system and either of following is true.
+	 * 1) Failed to process SLIT table.
+	 * 2) Latency probing is enabled by user.
+	 */
+	if (lgrp_plat_node_cnt > 1 &&
+	    (plat_dr_support_cpu() || plat_dr_support_memory())) {
+		if (!lgrp_plat_slit_enable || lgrp_plat_slit_error != 0 ||
+		    !lgrp_plat_srat_enable || lgrp_plat_srat_error != 0 ||
+		    lgrp_plat_apic_ncpus <= 0) {
+			cmn_err(CE_CONT,
+			    "?lgrp: failed to process ACPI SRAT/SLIT table, "
+			    "disable support of CPU/memory DR operations.");
+			plat_dr_disable_cpu();
+			plat_dr_disable_memory();
+		} else if (lgrp_plat_probe_flags & LGRP_PLAT_PROBE_ENABLE) {
+			cmn_err(CE_CONT,
+			    "?lgrp: latency probing enabled by user, "
+			    "disable support of CPU/memory DR operations.");
+			plat_dr_disable_cpu();
+			plat_dr_disable_memory();
+		}
+	}
+
+	/* Done if succeeded to process SLIT table. */
 	if (lgrp_plat_slit_error == 0)
 		return;
 
@@ -1366,7 +1675,7 @@ int	lgrp_plat_probe_lt_shift = LGRP_LAT_TOLERANCE_SHIFT;
  * latencies be same
  */
 static void
-lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
+lgrp_plat_latency_adjust(memnode_phys_addr_map_t *memnode_info,
     lgrp_plat_latency_stats_t *lat_stats, lgrp_plat_probe_stats_t *probe_stats)
 {
 	int				i;
@@ -1387,7 +1696,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
 	if (max_mem_nodes == 1)
 		return;
 
-	ASSERT(node_memory != NULL && lat_stats != NULL &&
+	ASSERT(memnode_info != NULL && lat_stats != NULL &&
 	    probe_stats != NULL);
 
 	/*
@@ -1395,11 +1704,11 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
 	 * (ie. latency(node0, node1) == latency(node1, node0))
 	 */
 	for (i = 0; i < lgrp_plat_node_cnt; i++) {
-		if (!node_memory[i].exists)
+		if (!memnode_info[i].exists)
 			continue;
 
 		for (j = 0; j < lgrp_plat_node_cnt; j++) {
-			if (!node_memory[j].exists)
+			if (!memnode_info[j].exists)
 				continue;
 
 			t1 = lat_stats->latencies[i][j];
@@ -1452,7 +1761,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
 	 */
 	for (i = 0; i < lgrp_plat_node_cnt; i++) {
 		for (j = 0; j < lgrp_plat_node_cnt; j++) {
-			if (!node_memory[j].exists)
+			if (!memnode_info[j].exists)
 				continue;
 			/*
 			 * Pick one pair of nodes (i, j)
@@ -1469,7 +1778,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
 
 			for (k = 0; k < lgrp_plat_node_cnt; k++) {
 				for (l = 0; l < lgrp_plat_node_cnt; l++) {
-					if (!node_memory[l].exists)
+					if (!memnode_info[l].exists)
 						continue;
 					/*
 					 * Pick another pair of nodes (k, l)
@@ -1545,7 +1854,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
 	min = -1;
 	max = 0;
 	for (i = 0; i < lgrp_plat_node_cnt; i++) {
-		if (!node_memory[i].exists)
+		if (!memnode_info[i].exists)
 			continue;
 		t = lat_stats->latencies[i][i];
 		if (t == 0)
@@ -1559,7 +1868,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
 		for (i = 0; i < lgrp_plat_node_cnt; i++) {
 			int	local;
 
-			if (!node_memory[i].exists)
+			if (!memnode_info[i].exists)
 				continue;
 
 			local = lat_stats->latencies[i][i];
@@ -1590,7 +1899,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
 	lat_stats->latency_max = 0;
 	for (i = 0; i < lgrp_plat_node_cnt; i++) {
 		for (j = 0; j < lgrp_plat_node_cnt; j++) {
-			if (!node_memory[j].exists)
+			if (!memnode_info[j].exists)
 				continue;
 			t = lat_stats->latencies[i][j];
 			if (t > lat_stats->latency_max)
@@ -1616,7 +1925,7 @@ lgrp_plat_latency_adjust(node_phys_addr_map_t *node_memory,
  *	-3	Local >= remote
  */
 static int
-lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
+lgrp_plat_latency_verify(memnode_phys_addr_map_t *memnode_info,
     lgrp_plat_latency_stats_t *lat_stats)
 {
 	int				i;
@@ -1624,7 +1933,7 @@ lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
 	u_longlong_t			t1;
 	u_longlong_t			t2;
 
-	ASSERT(node_memory != NULL && lat_stats != NULL);
+	ASSERT(memnode_info != NULL && lat_stats != NULL);
 
 	/*
 	 * Nothing to do when this is an UMA machine, lgroup topology is
@@ -1639,10 +1948,10 @@ lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
 	 * (ie. latency(node0, node1) == latency(node1, node0))
 	 */
 	for (i = 0; i < lgrp_plat_node_cnt; i++) {
-		if (!node_memory[i].exists)
+		if (!memnode_info[i].exists)
 			continue;
 		for (j = 0; j < lgrp_plat_node_cnt; j++) {
-			if (!node_memory[j].exists)
+			if (!memnode_info[j].exists)
 				continue;
 			t1 = lat_stats->latencies[i][j];
 			t2 = lat_stats->latencies[j][i];
@@ -1659,7 +1968,7 @@ lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
 	 */
 	t1 = lat_stats->latencies[0][0];
 	for (i = 1; i < lgrp_plat_node_cnt; i++) {
-		if (!node_memory[i].exists)
+		if (!memnode_info[i].exists)
 			continue;
 
 		t2 = lat_stats->latencies[i][i];
@@ -1681,7 +1990,7 @@ lgrp_plat_latency_verify(node_phys_addr_map_t *node_memory,
 	if (t1) {
 		for (i = 0; i < lgrp_plat_node_cnt; i++) {
 			for (j = 0; j < lgrp_plat_node_cnt; j++) {
-				if (!node_memory[j].exists)
+				if (!memnode_info[j].exists)
 					continue;
 				t2 = lat_stats->latencies[i][j];
 				if (i == j || t2 == 0)
@@ -1729,8 +2038,7 @@ lgrp_plat_main_init(void)
 		 */
 		if (ht_limit == 2 && lgrp_plat_lat_stats.latency_min == -1 &&
 		    lgrp_plat_lat_stats.latency_max == 0)
-			lgrp_plat_2level_setup(lgrp_plat_node_memory,
-			    &lgrp_plat_lat_stats);
+			lgrp_plat_2level_setup(&lgrp_plat_lat_stats);
 		return;
 	}
 
@@ -1769,7 +2077,7 @@ lgrp_plat_main_init(void)
 		 * Skip this node and leave its probe page NULL
 		 * if it doesn't have any memory
 		 */
-		mnode = plat_lgrphand_to_mem_node((lgrp_handle_t)i);
+		mnode = i;
 		if (!mem_node_config[mnode].exists) {
 			lgrp_plat_probe_mem_config.probe_va[i] = NULL;
 			continue;
@@ -1815,10 +2123,11 @@ lgrp_plat_main_init(void)
  * This is a copy of the MAX_MEM_NODES == 1 version of the routine
  * used when MPO is disabled (i.e. single lgroup) or this is the root lgroup
  */
-/* ARGSUSED */
 static pgcnt_t
 lgrp_plat_mem_size_default(lgrp_handle_t lgrphand, lgrp_mem_query_t query)
 {
+	_NOTE(ARGUNUSED(lgrphand));
+
 	struct memlist *mlist;
 	pgcnt_t npgs = 0;
 	extern struct memlist *phys_avail;
@@ -1866,8 +2175,9 @@ lgrp_plat_node_domain_update(node_domain_map_t *node_domain, int node_cnt,
 		 * domain and return node ID which is index into mapping table.
 		 */
 		if (!node_domain[node].exists) {
-			node_domain[node].exists = 1;
 			node_domain[node].prox_domain = domain;
+			membar_producer();
+			node_domain[node].exists = 1;
 			return (node);
 		}
 
@@ -1887,18 +2197,17 @@ lgrp_plat_node_domain_update(node_domain_map_t *node_domain, int node_cnt,
 	return (-1);
 }
 
-
 /*
  * Update node memory information for given proximity domain with specified
  * starting and ending physical address range (and return positive numbers for
  * success and negative ones for errors)
  */
 static int
-lgrp_plat_node_memory_update(node_domain_map_t *node_domain, int node_cnt,
-    node_phys_addr_map_t *node_memory, uint64_t start, uint64_t end,
-    uint32_t domain)
+lgrp_plat_memnode_info_update(node_domain_map_t *node_domain, int node_cnt,
+    memnode_phys_addr_map_t *memnode_info, int memnode_cnt, uint64_t start,
+    uint64_t end, uint32_t domain, uint32_t device_id)
 {
-	int	node;
+	int	node, mnode;
 
 	/*
 	 * Get node number for proximity domain
@@ -1912,13 +2221,53 @@ lgrp_plat_node_memory_update(node_domain_map_t *node_domain, int node_cnt,
 	}
 
 	/*
+	 * This function is called during boot if device_id is
+	 * ACPI_MEMNODE_DEVID_BOOT, otherwise it's called at runtime for
+	 * memory DR operations.
+	 */
+	if (device_id != ACPI_MEMNODE_DEVID_BOOT) {
+		ASSERT(lgrp_plat_max_mem_node <= memnode_cnt);
+
+		for (mnode = lgrp_plat_node_cnt;
+		    mnode < lgrp_plat_max_mem_node; mnode++) {
+			if (memnode_info[mnode].exists &&
+			    memnode_info[mnode].prox_domain == domain &&
+			    memnode_info[mnode].device_id == device_id) {
+				if (btop(start) < memnode_info[mnode].start)
+					memnode_info[mnode].start = btop(start);
+				if (btop(end) > memnode_info[mnode].end)
+					memnode_info[mnode].end = btop(end);
+				return (1);
+			}
+		}
+
+		if (lgrp_plat_max_mem_node >= memnode_cnt) {
+			return (-3);
+		} else {
+			lgrp_plat_max_mem_node++;
+			memnode_info[mnode].start = btop(start);
+			memnode_info[mnode].end = btop(end);
+			memnode_info[mnode].prox_domain = domain;
+			memnode_info[mnode].device_id = device_id;
+			memnode_info[mnode].lgrphand = node;
+			membar_producer();
+			memnode_info[mnode].exists = 1;
+			return (0);
+		}
+	}
+
+	/*
 	 * Create entry in table for node if it doesn't exist
 	 */
-	if (!node_memory[node].exists) {
-		node_memory[node].exists = 1;
-		node_memory[node].start = btop(start);
-		node_memory[node].end = btop(end);
-		node_memory[node].prox_domain = domain;
+	ASSERT(node < memnode_cnt);
+	if (!memnode_info[node].exists) {
+		memnode_info[node].start = btop(start);
+		memnode_info[node].end = btop(end);
+		memnode_info[node].prox_domain = domain;
+		memnode_info[node].device_id = device_id;
+		memnode_info[node].lgrphand = node;
+		membar_producer();
+		memnode_info[node].exists = 1;
 		return (0);
 	}
 
@@ -1928,11 +2277,11 @@ lgrp_plat_node_memory_update(node_domain_map_t *node_domain, int node_cnt,
 	 * There may be more than one SRAT memory entry for a domain, so we may
 	 * need to update existing start or end address for the node.
 	 */
-	if (node_memory[node].prox_domain == domain) {
-		if (btop(start) < node_memory[node].start)
-			node_memory[node].start = btop(start);
-		if (btop(end) > node_memory[node].end)
-			node_memory[node].end = btop(end);
+	if (memnode_info[node].prox_domain == domain) {
+		if (btop(start) < memnode_info[node].start)
+			memnode_info[node].start = btop(start);
+		if (btop(end) > memnode_info[node].end)
+			memnode_info[node].end = btop(end);
 		return (1);
 	}
 	return (-2);
@@ -1940,16 +2289,14 @@ lgrp_plat_node_memory_update(node_domain_map_t *node_domain, int node_cnt,
 
 
 /*
- * Have to sort node by starting physical address because VM system (physical
- * page free list management) assumes and expects memnodes to be sorted in
- * ascending order by physical address.  If not, the kernel will panic in
- * potentially a number of different places.  (:-(
- * NOTE: This workaround will not be sufficient if/when hotplugging memory is
- *	 supported on x86/x64.
+ * Have to sort nodes by starting physical address because plat_mnode_xcheck()
+ * assumes and expects memnodes to be sorted in ascending order by physical
+ * address.
  */
 static void
 lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
-    cpu_node_map_t *cpu_node, int cpu_count, node_phys_addr_map_t *node_memory)
+    cpu_node_map_t *cpu_node, int cpu_count,
+    memnode_phys_addr_map_t *memnode_info)
 {
 	boolean_t	found;
 	int		i;
@@ -1959,7 +2306,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
 	boolean_t	swapped;
 
 	if (!lgrp_plat_node_sort_enable || node_cnt <= 1 ||
-	    node_domain == NULL || node_memory == NULL)
+	    node_domain == NULL || memnode_info == NULL)
 		return;
 
 	/*
@@ -1970,7 +2317,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
 		/*
 		 * Skip entries that don't exist
 		 */
-		if (!node_memory[i].exists)
+		if (!memnode_info[i].exists)
 			continue;
 
 		/*
@@ -1978,7 +2325,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
 		 */
 		found = B_FALSE;
 		for (j = i + 1; j < node_cnt; j++) {
-			if (node_memory[j].exists) {
+			if (memnode_info[j].exists) {
 				found = B_TRUE;
 				break;
 			}
@@ -1994,7 +2341,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
 		 * Not sorted if starting address of current entry is bigger
 		 * than starting address of next existing entry
 		 */
-		if (node_memory[i].start > node_memory[j].start) {
+		if (memnode_info[i].start > memnode_info[j].start) {
 			sorted = B_FALSE;
 			break;
 		}
@@ -2017,7 +2364,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
 			/*
 			 * Skip entries that don't exist
 			 */
-			if (!node_memory[i].exists)
+			if (!memnode_info[i].exists)
 				continue;
 
 			/*
@@ -2025,7 +2372,7 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
 			 */
 			found = B_FALSE;
 			for (j = i + 1; j <= n; j++) {
-				if (node_memory[j].exists) {
+				if (memnode_info[j].exists) {
 					found = B_TRUE;
 					break;
 				}
@@ -2037,8 +2384,8 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
 			if (found == B_FALSE)
 				break;
 
-			if (node_memory[i].start > node_memory[j].start) {
-				node_phys_addr_map_t	save_addr;
+			if (memnode_info[i].start > memnode_info[j].start) {
+				memnode_phys_addr_map_t	save_addr;
 				node_domain_map_t	save_node;
 
 				/*
@@ -2054,12 +2401,12 @@ lgrp_plat_node_sort(node_domain_map_t *node_domain, int node_cnt,
 				/*
 				 * Swap node to physical memory assignments
 				 */
-				bcopy(&node_memory[i], &save_addr,
-				    sizeof (node_phys_addr_map_t));
-				bcopy(&node_memory[j], &node_memory[i],
-				    sizeof (node_phys_addr_map_t));
-				bcopy(&save_addr, &node_memory[j],
-				    sizeof (node_phys_addr_map_t));
+				bcopy(&memnode_info[i], &save_addr,
+				    sizeof (memnode_phys_addr_map_t));
+				bcopy(&memnode_info[j], &memnode_info[i],
+				    sizeof (memnode_phys_addr_map_t));
+				bcopy(&save_addr, &memnode_info[j],
+				    sizeof (memnode_phys_addr_map_t));
 				swapped = B_TRUE;
 			}
 		}
@@ -2226,11 +2573,12 @@ lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node)
 		return (-1);
 
 	/*
-	 * Calculate number of entries in array and return when there's just
-	 * one CPU since that's not very interesting for NUMA
+	 * Calculate number of entries in array and return when the system is
+	 * not very interesting for NUMA. It's not interesting for NUMA if
+	 * system has only one CPU and doesn't support CPU hotplug.
 	 */
 	n = boot_prop_len / sizeof (uint8_t);
-	if (n == 1)
+	if (n == 1 && !plat_dr_support_cpu())
 		return (-2);
 
 	/*
@@ -2243,21 +2591,31 @@ lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node)
 	 * Just return number of CPU APIC IDs if CPU to node mapping table is
 	 * NULL
 	 */
-	if (cpu_node == NULL)
-		return (n);
+	if (cpu_node == NULL) {
+		if (plat_dr_support_cpu() && n >= boot_ncpus) {
+			return (boot_ncpus);
+		} else {
+			return (n);
+		}
+	}
 
 	/*
 	 * Fill in CPU to node ID mapping table with APIC ID for each CPU
 	 */
 	for (i = 0; i < n; i++) {
+		/* Only add boot CPUs into the map if CPU DR is enabled. */
+		if (plat_dr_support_cpu() && i >= boot_ncpus)
+			break;
 		cpu_node[i].exists = 1;
 		cpu_node[i].apicid = cpu_apicid_array[i];
+		cpu_node[i].prox_domain = UINT32_MAX;
+		cpu_node[i].node = UINT_MAX;
 	}
 
 	/*
 	 * Return number of CPUs based on number of APIC IDs
 	 */
-	return (n);
+	return (i);
 }
 
 
@@ -2266,11 +2624,14 @@ lgrp_plat_process_cpu_apicids(cpu_node_map_t *cpu_node)
  * NUMA node is from each other
  */
 static int
-lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
-    node_phys_addr_map_t *node_memory, lgrp_plat_latency_stats_t *lat_stats)
+lgrp_plat_process_slit(struct slit *tp,
+    node_domain_map_t *node_domain, uint_t node_cnt,
+    memnode_phys_addr_map_t *memnode_info, lgrp_plat_latency_stats_t *lat_stats)
 {
 	int		i;
 	int		j;
+	int		src;
+	int		dst;
 	int		localities;
 	hrtime_t	max;
 	hrtime_t	min;
@@ -2284,8 +2645,6 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
 		return (2);
 
 	localities = tp->number;
-	if (localities != node_cnt)
-		return (3);
 
 	min = lat_stats->latency_min;
 	max = lat_stats->latency_max;
@@ -2295,11 +2654,21 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
 	 */
 	slit_entries = tp->entry;
 	for (i = 0; i < localities; i++) {
+		src = lgrp_plat_domain_to_node(node_domain,
+		    node_cnt, i);
+		if (src == -1)
+			continue;
+
 		for (j = 0; j < localities; j++) {
 			uint8_t	latency;
 
+			dst = lgrp_plat_domain_to_node(node_domain,
+			    node_cnt, j);
+			if (dst == -1)
+				continue;
+
 			latency = slit_entries[(i * localities) + j];
-			lat_stats->latencies[i][j] = latency;
+			lat_stats->latencies[src][dst] = latency;
 			if (latency < min || min == -1)
 				min = latency;
 			if (latency > max)
@@ -2310,7 +2679,7 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
 	/*
 	 * Verify that latencies/distances given in SLIT look reasonable
 	 */
-	retval = lgrp_plat_latency_verify(node_memory, lat_stats);
+	retval = lgrp_plat_latency_verify(memnode_info, lat_stats);
 
 	if (retval) {
 		/*
@@ -2334,20 +2703,110 @@ lgrp_plat_process_slit(struct slit *tp, uint_t node_cnt,
 
 
 /*
+ * Update lgrp latencies according to information returned by ACPI _SLI method.
+ */
+static int
+lgrp_plat_process_sli(uint32_t domain_id, uchar_t *sli_info,
+    uint32_t sli_cnt, node_domain_map_t *node_domain, uint_t node_cnt,
+    lgrp_plat_latency_stats_t *lat_stats)
+{
+	int		i;
+	int		src, dst;
+	uint8_t		latency;
+	hrtime_t	max, min;
+
+	if (lat_stats == NULL || sli_info == NULL ||
+	    sli_cnt == 0 || domain_id >= sli_cnt)
+		return (-1);
+
+	src = lgrp_plat_domain_to_node(node_domain, node_cnt, domain_id);
+	if (src == -1) {
+		src = lgrp_plat_node_domain_update(node_domain, node_cnt,
+		    domain_id);
+		if (src == -1)
+			return (-1);
+	}
+
+	/*
+	 * Don't update latency info if topology has been flattened to 2 levels.
+	 */
+	if (lgrp_plat_topo_flatten != 0) {
+		return (0);
+	}
+
+	/*
+	 * Latency information for proximity domain is ready.
+	 * TODO: support adjusting latency information at runtime.
+	 */
+	if (lat_stats->latencies[src][src] != 0) {
+		return (0);
+	}
+
+	/* Validate latency information. */
+	for (i = 0; i < sli_cnt; i++) {
+		if (i == domain_id) {
+			if (sli_info[i] != ACPI_SLIT_SELF_LATENCY ||
+			    sli_info[sli_cnt + i] != ACPI_SLIT_SELF_LATENCY) {
+				return (-1);
+			}
+		} else {
+			if (sli_info[i] <= ACPI_SLIT_SELF_LATENCY ||
+			    sli_info[sli_cnt + i] <= ACPI_SLIT_SELF_LATENCY ||
+			    sli_info[i] != sli_info[sli_cnt + i]) {
+				return (-1);
+			}
+		}
+	}
+
+	min = lat_stats->latency_min;
+	max = lat_stats->latency_max;
+	for (i = 0; i < sli_cnt; i++) {
+		dst = lgrp_plat_domain_to_node(node_domain, node_cnt, i);
+		if (dst == -1)
+			continue;
+
+		ASSERT(sli_info[i] == sli_info[sli_cnt + i]);
+
+		/* Update row in latencies matrix. */
+		latency = sli_info[i];
+		lat_stats->latencies[src][dst] = latency;
+		if (latency < min || min == -1)
+			min = latency;
+		if (latency > max)
+			max = latency;
+
+		/* Update column in latencies matrix. */
+		latency = sli_info[sli_cnt + i];
+		lat_stats->latencies[dst][src] = latency;
+		if (latency < min || min == -1)
+			min = latency;
+		if (latency > max)
+			max = latency;
+	}
+	lat_stats->latency_min = min;
+	lat_stats->latency_max = max;
+
+	return (0);
+}
+
+
+/*
  * Read ACPI System Resource Affinity Table (SRAT) to determine which CPUs
  * and memory are local to each other in the same NUMA node and return number
  * of nodes
  */
 static int
-lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
-    node_domain_map_t *node_domain, cpu_node_map_t *cpu_node, int cpu_count,
-    node_phys_addr_map_t *node_memory)
+lgrp_plat_process_srat(struct srat *tp, struct msct *mp,
+    uint32_t *prox_domain_min, node_domain_map_t *node_domain,
+    cpu_node_map_t *cpu_node, int cpu_count,
+    memnode_phys_addr_map_t *memnode_info)
 {
 	struct srat_item	*srat_end;
 	int			i;
 	struct srat_item	*item;
 	int			node_cnt;
 	int			proc_entry_count;
+	int			rc;
 
 	/*
 	 * Nothing to do when no SRAT or disabled
@@ -2356,11 +2815,21 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
 		return (-1);
 
 	/*
-	 * Determine number of nodes by counting number of proximity domains in
-	 * SRAT and return if number of nodes is 1 or less since don't need to
-	 * read SRAT then
+	 * Try to get domain information from MSCT table.
+	 * ACPI4.0: OSPM will use information provided by the MSCT only
+	 * when the System Resource Affinity Table (SRAT) exists.
+	 */
+	node_cnt = lgrp_plat_msct_domains(mp, prox_domain_min);
+	if (node_cnt <= 0) {
+		/*
+		 * Determine number of nodes by counting number of proximity
+		 * domains in SRAT.
+		 */
+		node_cnt = lgrp_plat_srat_domains(tp, prox_domain_min);
+	}
+	/*
+	 * Return if number of nodes is 1 or less since don't need to read SRAT.
 	 */
-	node_cnt = lgrp_plat_srat_domains(tp, prox_domain_min);
 	if (node_cnt == 1)
 		return (1);
 	else if (node_cnt <= 0)
@@ -2397,16 +2866,17 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
 			}
 			apic_id = item->i.p.apic_id;
 
-			if (lgrp_plat_cpu_node_update(node_domain, node_cnt,
-			    cpu_node, cpu_count, apic_id, domain) < 0)
+			rc = lgrp_plat_cpu_node_update(node_domain, node_cnt,
+			    cpu_node, cpu_count, apic_id, domain);
+			if (rc < 0)
 				return (-3);
-
-			proc_entry_count++;
+			else if (rc == 0)
+				proc_entry_count++;
 			break;
 
 		case SRAT_MEMORY:	/* memory entry */
 			if (!(item->i.m.flags & SRAT_ENABLED) ||
-			    node_memory == NULL)
+			    memnode_info == NULL)
 				break;
 
 			/*
@@ -2418,10 +2888,52 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
 			length = item->i.m.len;
 			end = start + length - 1;
 
-			if (lgrp_plat_node_memory_update(node_domain, node_cnt,
-			    node_memory, start, end, domain) < 0)
+			/*
+			 * According to ACPI 4.0, both ENABLE and HOTPLUG flags
+			 * may be set for memory address range entries in SRAT
+			 * table which are reserved for memory hot plug.
+			 * We intersect memory address ranges in SRAT table
+			 * with memory ranges in physinstalled to filter out
+			 * memory address ranges reserved for hot plug.
+			 */
+			if (item->i.m.flags & SRAT_HOT_PLUG) {
+				uint64_t	rstart = UINT64_MAX;
+				uint64_t	rend = 0;
+				struct memlist	*ml;
+				extern struct bootops	*bootops;
+
+				memlist_read_lock();
+				for (ml = bootops->boot_mem->physinstalled;
+				    ml; ml = ml->ml_next) {
+					uint64_t tstart = ml->ml_address;
+					uint64_t tend;
+
+					tend = ml->ml_address + ml->ml_size;
+					if (tstart > end || tend < start)
+						continue;
+					if (start > tstart)
+						tstart = start;
+					if (rstart > tstart)
+						rstart = tstart;
+					if (end < tend)
+						tend = end;
+					if (rend < tend)
+						rend = tend;
+				}
+				memlist_read_unlock();
+				start = rstart;
+				end = rend;
+				/* Skip this entry if no memory installed. */
+				if (start > end)
+					break;
+			}
+
+			if (lgrp_plat_memnode_info_update(node_domain,
+			    node_cnt, memnode_info, node_cnt,
+			    start, end, domain, ACPI_MEMNODE_DEVID_BOOT) < 0)
 				return (-4);
 			break;
+
 		case SRAT_X2APIC:	/* x2apic CPU entry */
 			if (!(item->i.xp.flags & SRAT_ENABLED) ||
 			    cpu_node == NULL)
@@ -2434,11 +2946,12 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
 			domain = item->i.xp.domain;
 			apic_id = item->i.xp.x2apic_id;
 
-			if (lgrp_plat_cpu_node_update(node_domain, node_cnt,
-			    cpu_node, cpu_count, apic_id, domain) < 0)
+			rc = lgrp_plat_cpu_node_update(node_domain, node_cnt,
+			    cpu_node, cpu_count, apic_id, domain);
+			if (rc < 0)
 				return (-3);
-
-			proc_entry_count++;
+			else if (rc == 0)
+				proc_entry_count++;
 			break;
 
 		default:
@@ -2460,7 +2973,7 @@ lgrp_plat_process_srat(struct srat *tp, uint32_t *prox_domain_min,
 	 * physical address
 	 */
 	lgrp_plat_node_sort(node_domain, node_cnt, cpu_node, cpu_count,
-	    node_memory);
+	    memnode_info);
 
 	return (node_cnt);
 }
@@ -2665,27 +3178,70 @@ lgrp_plat_srat_domains(struct srat *tp, uint32_t *prox_domain_min)
 
 
 /*
+ * Parse domain information in ACPI Maximum System Capability Table (MSCT).
+ * MSCT table has been verified in function process_msct() in fakebop.c.
+ */
+static int
+lgrp_plat_msct_domains(struct msct *tp, uint32_t *prox_domain_min)
+{
+	int last_seen = 0;
+	uint32_t proxmin = UINT32_MAX;
+	struct msct_proximity_domain *item, *end;
+
+	if (tp == NULL || lgrp_plat_msct_enable == 0)
+		return (-1);
+
+	if (tp->maximum_proximity_domains >= MAX_NODES) {
+		cmn_err(CE_CONT,
+		    "?lgrp: too many proximity domains (%d), max %d supported, "
+		    "disable support of CPU/memory DR operations.",
+		    tp->maximum_proximity_domains + 1, MAX_NODES);
+		plat_dr_disable_cpu();
+		plat_dr_disable_memory();
+		return (-1);
+	}
+
+	if (prox_domain_min != NULL) {
+		end = (void *)(tp->hdr.len + (uintptr_t)tp);
+		for (item = (void *)((uintptr_t)tp +
+		    tp->proximity_domain_offset); item < end;
+		    item = (void *)(item->length + (uintptr_t)item)) {
+			if (item->domain_min < proxmin) {
+				proxmin = item->domain_min;
+			}
+
+			last_seen = item->domain_max - item->domain_min + 1;
+			/*
+			 * Break out if all proximity domains have been
+			 * processed. Some BIOSes may have unused items
+			 * at the end of MSCT table.
+			 */
+			if (last_seen > tp->maximum_proximity_domains) {
+				break;
+			}
+		}
+		*prox_domain_min = proxmin;
+	}
+
+	return (tp->maximum_proximity_domains + 1);
+}
+
+
+/*
  * Set lgroup latencies for 2 level lgroup topology
  */
 static void
-lgrp_plat_2level_setup(node_phys_addr_map_t *node_memory,
-    lgrp_plat_latency_stats_t *lat_stats)
+lgrp_plat_2level_setup(lgrp_plat_latency_stats_t *lat_stats)
 {
-	int	i;
+	int	i, j;
 
-	ASSERT(node_memory != NULL && lat_stats != NULL);
+	ASSERT(lat_stats != NULL);
 
 	if (lgrp_plat_node_cnt >= 4)
 		cmn_err(CE_NOTE,
 		    "MPO only optimizing for local and remote\n");
 	for (i = 0; i < lgrp_plat_node_cnt; i++) {
-		int	j;
-
-		if (!node_memory[i].exists)
-			continue;
 		for (j = 0; j < lgrp_plat_node_cnt; j++) {
-			if (!node_memory[j].exists)
-				continue;
 			if (i == j)
 				lat_stats->latencies[i][j] = 2;
 			else
@@ -2694,7 +3250,9 @@ lgrp_plat_2level_setup(node_phys_addr_map_t *node_memory,
 	}
 	lat_stats->latency_min = 2;
 	lat_stats->latency_max = 3;
+	/* TODO: check it. */
 	lgrp_config(LGRP_CONFIG_FLATTEN, 2, 0);
+	lgrp_plat_topo_flatten = 1;
 }
 
 
@@ -2884,7 +3442,7 @@ is_opteron(void)
  */
 static void
 opt_get_numa_config(uint_t *node_cnt, int *mem_intrlv,
-    node_phys_addr_map_t *node_memory)
+    memnode_phys_addr_map_t *memnode_info)
 {
 	uint_t				bus;
 	uint_t				dev;
@@ -2999,10 +3557,10 @@ opt_get_numa_config(uint_t *node_cnt, int *mem_intrlv,
 		    (base_lo & OPT_DRAMBASE_LO_MASK_WE) == 0) {
 			/*
 			 * Mark node memory as non-existent and set start and
-			 * end addresses to be same in node_memory[]
+			 * end addresses to be same in memnode_info[]
 			 */
-			node_memory[node].exists = 0;
-			node_memory[node].start = node_memory[node].end =
+			memnode_info[node].exists = 0;
+			memnode_info[node].start = memnode_info[node].end =
 			    (pfn_t)-1;
 			continue;
 		}
@@ -3011,11 +3569,11 @@ opt_get_numa_config(uint_t *node_cnt, int *mem_intrlv,
 		 * Mark node memory as existing and remember physical address
 		 * range of each node for use later
 		 */
-		node_memory[node].exists = 1;
+		memnode_info[node].exists = 1;
 
-		node_memory[node].start = btop(OPT_DRAMADDR(base_hi, base_lo));
+		memnode_info[node].start = btop(OPT_DRAMADDR(base_hi, base_lo));
 
-		node_memory[node].end = btop(OPT_DRAMADDR(limit_hi, limit_lo) |
+		memnode_info[node].end = btop(OPT_DRAMADDR(limit_hi, limit_lo) |
 		    OPT_DRAMADDR_LO_MASK_OFF);
 	}
 
diff --git a/usr/src/uts/i86pc/os/machdep.c b/usr/src/uts/i86pc/os/machdep.c
index c9f97fc046..e9d08a9fa7 100644
--- a/usr/src/uts/i86pc/os/machdep.c
+++ b/usr/src/uts/i86pc/os/machdep.c
@@ -23,6 +23,10 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 #include <sys/types.h>
 #include <sys/t_lock.h>
@@ -147,6 +151,16 @@ int force_shutdown_method = AD_UNKNOWN;
 char panicbuf[PANICBUFSIZE];
 
 /*
+ * Flags to control Dynamic Reconfiguration features.
+ */
+uint64_t plat_dr_options;
+
+/*
+ * Maximum physical address for memory DR operations.
+ */
+uint64_t plat_dr_physmax;
+
+/*
  * maxphys - used during physio
  * klustsize - used for klustering by swapfs and specfs
  */
@@ -1390,3 +1404,33 @@ lbolt_softint_post(void)
 {
 	(*setsoftint)(CBE_LOCK_PIL, lbolt_softint_hdl.ih_pending);
 }
+
+boolean_t
+plat_dr_check_capability(uint64_t features)
+{
+	return ((plat_dr_options & features) == features);
+}
+
+boolean_t
+plat_dr_support_cpu(void)
+{
+	return (plat_dr_options & PLAT_DR_FEATURE_CPU);
+}
+
+boolean_t
+plat_dr_support_memory(void)
+{
+	return (plat_dr_options & PLAT_DR_FEATURE_MEMORY);
+}
+
+void
+plat_dr_enable_capability(uint64_t features)
+{
+	atomic_or_64(&plat_dr_options, features);
+}
+
+void
+plat_dr_disable_capability(uint64_t features)
+{
+	atomic_and_64(&plat_dr_options, ~features);
+}
diff --git a/usr/src/uts/i86pc/os/mem_config_arch.c b/usr/src/uts/i86pc/os/mem_config_arch.c
new file mode 100644
index 0000000000..191e8cb641
--- /dev/null
+++ b/usr/src/uts/i86pc/os/mem_config_arch.c
@@ -0,0 +1,56 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1997-1998 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/debug.h>
+#include <vm/page.h>
+#include <sys/mem_config.h>
+
+/*ARGSUSED*/
+int
+arch_kphysm_del_span_ok(pfn_t base, pgcnt_t npgs)
+{
+	ASSERT(npgs != 0);
+	return (0);
+}
+
+/*ARGSUSED*/
+int
+arch_kphysm_relocate(pfn_t base, pgcnt_t npgs)
+{
+	ASSERT(npgs != 0);
+	return (ENOTSUP);
+}
+
+int
+arch_kphysm_del_supported(void)
+{
+	return (0);
+}
diff --git a/usr/src/uts/i86pc/os/memnode.c b/usr/src/uts/i86pc/os/memnode.c
index 908b2e8cd2..35185bcd84 100644
--- a/usr/src/uts/i86pc/os/memnode.c
+++ b/usr/src/uts/i86pc/os/memnode.c
@@ -84,7 +84,7 @@ mem_node_add_slice(pfn_t start, pfn_t end)
 	}
 
 	mnode = PFN_2_MEM_NODE(start);
-	ASSERT(mnode < max_mem_nodes);
+	ASSERT(mnode >= 0 && mnode < max_mem_nodes);
 
 	if (cas32((uint32_t *)&mem_node_config[mnode].exists, 0, 1)) {
 		/*
@@ -130,7 +130,7 @@ mem_node_del_slice(pfn_t start, pfn_t end)
 	}
 	mnode = PFN_2_MEM_NODE(start);
 
-	ASSERT(mnode < max_mem_nodes);
+	ASSERT(mnode >= 0 && mnode < max_mem_nodes);
 	ASSERT(mem_node_config[mnode].exists == 1);
 
 	delta_pgcnt = end - start;
diff --git a/usr/src/uts/i86pc/os/microcode.c b/usr/src/uts/i86pc/os/microcode.c
index 6f32a86c71..90d9585640 100644
--- a/usr/src/uts/i86pc/os/microcode.c
+++ b/usr/src/uts/i86pc/os/microcode.c
@@ -141,6 +141,7 @@ void
 ucode_alloc_space(cpu_t *cp)
 {
 	ASSERT(cp->cpu_id != 0);
+	ASSERT(cp->cpu_m.mcpu_ucode_info == NULL);
 	cp->cpu_m.mcpu_ucode_info =
 	    kmem_zalloc(sizeof (*cp->cpu_m.mcpu_ucode_info), KM_SLEEP);
 }
@@ -148,9 +149,11 @@ ucode_alloc_space(cpu_t *cp)
 void
 ucode_free_space(cpu_t *cp)
 {
-	ASSERT(cp->cpu_id != 0);
+	ASSERT(cp->cpu_m.mcpu_ucode_info != NULL);
+	ASSERT(cp->cpu_m.mcpu_ucode_info != &cpu_ucode_info0);
 	kmem_free(cp->cpu_m.mcpu_ucode_info,
 	    sizeof (*cp->cpu_m.mcpu_ucode_info));
+	cp->cpu_m.mcpu_ucode_info = NULL;
 }
 
 /*
@@ -1126,7 +1129,10 @@ ucode_check(cpu_t *cp)
 	uint32_t new_rev = 0;
 
 	ASSERT(cp);
-	if (cp->cpu_id == 0)
+	/*
+	 * Space statically allocated for BSP, ensure pointer is set
+	 */
+	if (cp->cpu_id == 0 && cp->cpu_m.mcpu_ucode_info == NULL)
 		cp->cpu_m.mcpu_ucode_info = &cpu_ucode_info0;
 
 	uinfop = cp->cpu_m.mcpu_ucode_info;
diff --git a/usr/src/uts/i86pc/os/mlsetup.c b/usr/src/uts/i86pc/os/mlsetup.c
index 3f8752638f..bf57dd3c50 100644
--- a/usr/src/uts/i86pc/os/mlsetup.c
+++ b/usr/src/uts/i86pc/os/mlsetup.c
@@ -22,6 +22,10 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 #include <sys/types.h>
 #include <sys/sysmacros.h>
@@ -107,6 +111,7 @@ mlsetup(struct regs *rp)
 	extern char t0stack[];
 	extern int post_fastreboot;
 	extern int console;
+	extern uint64_t plat_dr_options;
 
 	ASSERT_STACK_ALIGNED();
 
@@ -354,15 +359,70 @@ mlsetup(struct regs *rp)
 
 	prom_init("kernel", (void *)NULL);
 
-	if (bootprop_getval("boot-ncpus", &prop_value) != 0)
+	/* User-set option overrides firmware value. */
+	if (bootprop_getval(PLAT_DR_OPTIONS_NAME, &prop_value) == 0) {
+		plat_dr_options = (uint64_t)prop_value;
+	}
+#if defined(__xpv)
+	/* No support of DR operations on xpv */
+	plat_dr_options = 0;
+#else	/* __xpv */
+	/* Flag PLAT_DR_FEATURE_ENABLED should only be set by DR driver. */
+	plat_dr_options &= ~PLAT_DR_FEATURE_ENABLED;
+#ifndef	__amd64
+	/* Only enable CPU/memory DR on 64 bits kernel. */
+	plat_dr_options &= ~PLAT_DR_FEATURE_MEMORY;
+	plat_dr_options &= ~PLAT_DR_FEATURE_CPU;
+#endif	/* __amd64 */
+#endif	/* __xpv */
+
+	/*
+	 * Get value of "plat_dr_physmax" boot option.
+	 * It overrides values calculated from MSCT or SRAT table.
+	 */
+	if (bootprop_getval(PLAT_DR_PHYSMAX_NAME, &prop_value) == 0) {
+		plat_dr_physmax = ((uint64_t)prop_value) >> PAGESHIFT;
+	}
+
+	/* Get value of boot_ncpus. */
+	if (bootprop_getval(BOOT_NCPUS_NAME, &prop_value) != 0) {
 		boot_ncpus = NCPU;
-	else {
+	} else {
 		boot_ncpus = (int)prop_value;
 		if (boot_ncpus <= 0 || boot_ncpus > NCPU)
 			boot_ncpus = NCPU;
 	}
 
-	max_ncpus = boot_max_ncpus = boot_ncpus;
+	/*
+	 * Set max_ncpus and boot_max_ncpus to boot_ncpus if platform doesn't
+	 * support CPU DR operations.
+	 */
+	if (plat_dr_support_cpu() == 0) {
+		max_ncpus = boot_max_ncpus = boot_ncpus;
+	} else {
+		if (bootprop_getval(PLAT_MAX_NCPUS_NAME, &prop_value) != 0) {
+			max_ncpus = NCPU;
+		} else {
+			max_ncpus = (int)prop_value;
+			if (max_ncpus <= 0 || max_ncpus > NCPU) {
+				max_ncpus = NCPU;
+			}
+			if (boot_ncpus > max_ncpus) {
+				boot_ncpus = max_ncpus;
+			}
+		}
+
+		if (bootprop_getval(BOOT_MAX_NCPUS_NAME, &prop_value) != 0) {
+			boot_max_ncpus = boot_ncpus;
+		} else {
+			boot_max_ncpus = (int)prop_value;
+			if (boot_max_ncpus <= 0 || boot_max_ncpus > NCPU) {
+				boot_max_ncpus = boot_ncpus;
+			} else if (boot_max_ncpus > max_ncpus) {
+				boot_max_ncpus = max_ncpus;
+			}
+		}
+	}
 
 	/*
 	 * Initialize the lgrp framework
diff --git a/usr/src/uts/i86pc/os/mp_implfuncs.c b/usr/src/uts/i86pc/os/mp_implfuncs.c
index b5742b2c69..3bded8f124 100644
--- a/usr/src/uts/i86pc/os/mp_implfuncs.c
+++ b/usr/src/uts/i86pc/os/mp_implfuncs.c
@@ -22,12 +22,12 @@
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+#define	PSMI_1_7
 
 #include <sys/vmem.h>
 #include <vm/hat.h>
 #include <sys/modctl.h>
 #include <vm/seg_kmem.h>
-#define	PSMI_1_6
 #include <sys/psm.h>
 #include <sys/psm_modctl.h>
 #include <sys/smp_impldefs.h>
diff --git a/usr/src/uts/i86pc/os/mp_machdep.c b/usr/src/uts/i86pc/os/mp_machdep.c
index 80e371850b..dc4023007c 100644
--- a/usr/src/uts/i86pc/os/mp_machdep.c
+++ b/usr/src/uts/i86pc/os/mp_machdep.c
@@ -24,11 +24,11 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
-#define	PSMI_1_6
+#define	PSMI_1_7
 #include <sys/smp_impldefs.h>
 #include <sys/psm.h>
 #include <sys/psm_modctl.h>
@@ -157,6 +157,7 @@ void (*notify_error)(int, char *) = (void (*)(int, char *))return_instr;
 void (*hrtime_tick)(void)	= return_instr;
 
 int (*psm_cpu_create_devinfo)(cpu_t *, dev_info_t **) = mach_cpu_create_devinfo;
+int (*psm_cpu_get_devinfo)(cpu_t *, dev_info_t **) = NULL;
 
 /*
  * True if the generic TSC code is our source of hrtime, rather than whatever
@@ -864,9 +865,17 @@ mach_get_platform(int owner)
 		total_ops = OFFSETOF(struct psm_ops, psm_preshutdown) /
 		    sizeof (void (*)(void));
 	else if (mach_ver[owner] == (ushort_t)PSM_INFO_VER01_4)
-		/* no psm_preshutdown function */
+		/* no psm_intr_ops function */
 		total_ops = OFFSETOF(struct psm_ops, psm_intr_ops) /
 		    sizeof (void (*)(void));
+	else if (mach_ver[owner] == (ushort_t)PSM_INFO_VER01_5)
+		/* no psm_state function */
+		total_ops = OFFSETOF(struct psm_ops, psm_state) /
+		    sizeof (void (*)(void));
+	else if (mach_ver[owner] == (ushort_t)PSM_INFO_VER01_6)
+		/* no psm_cpu_ops function */
+		total_ops = OFFSETOF(struct psm_ops, psm_cpu_ops) /
+		    sizeof (void (*)(void));
 	else
 		total_ops = sizeof (struct psm_ops) / sizeof (void (*)(void));
 
@@ -1042,7 +1051,11 @@ mach_smpinit(void)
 
 	cpu_id = -1;
 	cpu_id = (*pops->psm_get_next_processorid)(cpu_id);
-	for (cnt = 0; cpu_id != -1; cnt++) {
+	/*
+	 * Only add boot_ncpus CPUs to mp_cpus. Other CPUs will be handled
+	 * by CPU DR driver at runtime.
+	 */
+	for (cnt = 0; cpu_id != -1 && cnt < boot_ncpus; cnt++) {
 		CPUSET_ADD(cpumask, cpu_id);
 		cpu_id = (*pops->psm_get_next_processorid)(cpu_id);
 	}
@@ -1092,7 +1105,7 @@ mach_smpinit(void)
 		psm_enable_intr  = pops->psm_enable_intr;
 
 	/* check for multiple CPUs */
-	if (cnt < 2)
+	if (cnt < 2 && plat_dr_support_cpu() == B_FALSE)
 		return;
 
 	/* check for MP platforms */
@@ -1480,6 +1493,63 @@ mach_cpuid_start(processorid_t id, void *ctx)
 	return ((*pops->psm_cpu_start)(id, ctx));
 }
 
+int
+mach_cpu_stop(cpu_t *cp, void *ctx)
+{
+	struct psm_ops *pops = mach_set[0];
+	psm_cpu_request_t request;
+
+	if (pops->psm_cpu_ops == NULL) {
+		return (ENOTSUP);
+	}
+
+	ASSERT(cp->cpu_id != -1);
+	request.pcr_cmd = PSM_CPU_STOP;
+	request.req.cpu_stop.cpuid = cp->cpu_id;
+	request.req.cpu_stop.ctx = ctx;
+
+	return ((*pops->psm_cpu_ops)(&request));
+}
+
+int
+mach_cpu_add(mach_cpu_add_arg_t *argp, processorid_t *cpuidp)
+{
+	int rc;
+	struct psm_ops *pops = mach_set[0];
+	psm_cpu_request_t request;
+
+	if (pops->psm_cpu_ops == NULL) {
+		return (ENOTSUP);
+	}
+
+	request.pcr_cmd = PSM_CPU_ADD;
+	request.req.cpu_add.argp = argp;
+	request.req.cpu_add.cpuid = -1;
+	rc = (*pops->psm_cpu_ops)(&request);
+	if (rc == 0) {
+		ASSERT(request.req.cpu_add.cpuid != -1);
+		*cpuidp = request.req.cpu_add.cpuid;
+	}
+
+	return (rc);
+}
+
+int
+mach_cpu_remove(processorid_t cpuid)
+{
+	struct psm_ops *pops = mach_set[0];
+	psm_cpu_request_t request;
+
+	if (pops->psm_cpu_ops == NULL) {
+		return (ENOTSUP);
+	}
+
+	request.pcr_cmd = PSM_CPU_REMOVE;
+	request.req.cpu_remove.cpuid = cpuid;
+
+	return ((*pops->psm_cpu_ops)(&request));
+}
+
 /*
  * Default handler to create device node for CPU.
  * One reference count will be held on created device node.
@@ -1569,6 +1639,24 @@ mach_cpu_create_device_node(struct cpu *cp, dev_info_t **dipp)
 	return (rv);
 }
 
+/*
+ * The dipp contains one of following values on return:
+ * - NULL if no device node found
+ * - pointer to device node if found
+ */
+int
+mach_cpu_get_device_node(struct cpu *cp, dev_info_t **dipp)
+{
+	*dipp = NULL;
+	if (psm_cpu_get_devinfo != NULL) {
+		if (psm_cpu_get_devinfo(cp, dipp) == PSM_SUCCESS) {
+			return (PSM_SUCCESS);
+		}
+	}
+
+	return (PSM_FAILURE);
+}
+
 /*ARGSUSED*/
 static int
 mach_translate_irq(dev_info_t *dip, int irqno)
diff --git a/usr/src/uts/i86pc/os/mp_pc.c b/usr/src/uts/i86pc/os/mp_pc.c
index 6fc571b445..bc21812187 100644
--- a/usr/src/uts/i86pc/os/mp_pc.c
+++ b/usr/src/uts/i86pc/os/mp_pc.c
@@ -22,6 +22,10 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 /*
  * Welcome to the world of the "real mode platter".
@@ -31,6 +35,7 @@
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/cpuvar.h>
+#include <sys/cpu_module.h>
 #include <sys/kmem.h>
 #include <sys/archsystm.h>
 #include <sys/machsystm.h>
@@ -41,10 +46,21 @@
 #include <sys/mach_mmu.h>
 #include <sys/promif.h>
 #include <sys/cpu.h>
+#include <sys/cpu_event.h>
+#include <sys/sunndi.h>
+#include <sys/fs/dv_node.h>
 #include <vm/hat_i86.h>
+#include <vm/as.h>
+
+extern cpuset_t cpu_ready_set;
 
-extern void real_mode_start(void);
-extern void real_mode_end(void);
+extern int  mp_start_cpu_common(cpu_t *cp, boolean_t boot);
+extern void real_mode_start_cpu(void);
+extern void real_mode_start_cpu_end(void);
+extern void real_mode_stop_cpu_stage1(void);
+extern void real_mode_stop_cpu_stage1_end(void);
+extern void real_mode_stop_cpu_stage2(void);
+extern void real_mode_stop_cpu_stage2_end(void);
 extern void *(*cpu_pause_func)(void *);
 
 void rmp_gdt_init(rm_platter_t *);
@@ -61,22 +77,36 @@ int
 mach_cpucontext_init(void)
 {
 	ushort_t *vec;
+	ulong_t addr;
+	struct rm_platter *rm = (struct rm_platter *)rm_platter_va;
 
 	if (!(vec = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
 	    sizeof (vec), PROT_READ | PROT_WRITE)))
 		return (-1);
+
 	/*
 	 * setup secondary cpu bios boot up vector
+	 * Write page offset to 0x467 and page frame number to 0x469.
 	 */
-	*vec = (ushort_t)((caddr_t)
-	    ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
-	    + ((ulong_t)rm_platter_va & 0xf));
-	vec[1] = (ushort_t)(rm_platter_pa >> 4);
+	addr = (ulong_t)((caddr_t)rm->rm_code - (caddr_t)rm) + rm_platter_pa;
+	vec[0] = (ushort_t)(addr & PAGEOFFSET);
+	vec[1] = (ushort_t)((addr & (0xfffff & PAGEMASK)) >> 4);
 	warm_reset_vector = vec;
 
-	bcopy((caddr_t)real_mode_start,
-	    (caddr_t)((rm_platter_t *)rm_platter_va)->rm_code,
-	    (size_t)real_mode_end - (size_t)real_mode_start);
+	/* Map real mode platter into kas so kernel can access it. */
+	hat_devload(kas.a_hat,
+	    (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
+	    btop(rm_platter_pa), PROT_READ | PROT_WRITE | PROT_EXEC,
+	    HAT_LOAD_NOCONSIST);
+
+	/* Copy CPU startup code to rm_platter if it's still during boot. */
+	if (!plat_dr_enabled()) {
+		ASSERT((size_t)real_mode_start_cpu_end -
+		    (size_t)real_mode_start_cpu <= RM_PLATTER_CODE_SIZE);
+		bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
+		    (size_t)real_mode_start_cpu_end -
+		    (size_t)real_mode_start_cpu);
+	}
 
 	return (0);
 }
@@ -95,12 +125,53 @@ mach_cpucontext_fini(void)
 extern void *long_mode_64(void);
 #endif	/* __amd64 */
 
-void *
-mach_cpucontext_alloc(struct cpu *cp)
+/*ARGSUSED*/
+void
+rmp_gdt_init(rm_platter_t *rm)
+{
+
+#if defined(__amd64)
+	/* Use the kas address space for the CPU startup thread. */
+	if (MAKECR3(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL)
+		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
+		    "located above 4G in physical memory (@ 0x%lx)",
+		    MAKECR3(kas.a_hat->hat_htable->ht_pfn));
+
+	/*
+	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
+	 * by code in real_mode_start_cpu():
+	 *
+	 * GDT[0]:  NULL selector
+	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
+	 *
+	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
+	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
+	 * a course of action as any other, though it may cause the entire
+	 * platform to reset in some cases...
+	 */
+	rm->rm_temp_gdt[0] = 0;
+	rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
+
+	rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
+	rm->rm_temp_gdt_base = rm_platter_pa +
+	    (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
+	rm->rm_temp_idt_lim = 0;
+	rm->rm_temp_idt_base = 0;
+
+	/*
+	 * Since the CPU needs to jump to protected mode using an identity
+	 * mapped address, we need to calculate it here.
+	 */
+	rm->rm_longmode64_addr = rm_platter_pa +
+	    ((uint32_t)long_mode_64 - (uint32_t)real_mode_start_cpu);
+#endif	/* __amd64 */
+}
+
+static void *
+mach_cpucontext_alloc_tables(struct cpu *cp)
 {
-	rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
-	struct cpu_tables *ct;
 	struct tss *ntss;
+	struct cpu_tables *ct;
 
 	/*
 	 * Allocate space for stack, tss, gdt and idt. We round the size
@@ -109,7 +180,8 @@ mach_cpucontext_alloc(struct cpu *cp)
 	 */
 	ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP);
 	if ((uintptr_t)ct & PAGEOFFSET)
-		panic("mp_startup_init: cpu%d misaligned tables", cp->cpu_id);
+		panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables",
+		    cp->cpu_id);
 
 	ntss = cp->cpu_tss = &ct->ct_tss;
 
@@ -149,93 +221,174 @@ mach_cpucontext_alloc(struct cpu *cp)
 	set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
 	    sizeof (*cp->cpu_tss) - 1, SDT_SYSTSS, SEL_KPL);
 
+	return (ct);
+}
+
+void *
+mach_cpucontext_xalloc(struct cpu *cp, int optype)
+{
+	size_t len;
+	struct cpu_tables *ct;
+	rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
+	static int cpu_halt_code_ready;
+
+	if (optype == MACH_CPUCONTEXT_OP_STOP) {
+		ASSERT(plat_dr_enabled());
+
+		/*
+		 * The WARM_RESET_VECTOR has a limitation that the physical
+		 * address written to it must be page-aligned. To work around
+		 * this limitation, the CPU stop code has been splitted into
+		 * two stages.
+		 * The stage 2 code, which implements the real logic to halt
+		 * CPUs, is copied to the rm_cpu_halt_code field in the real
+		 * mode platter. The stage 1 code, which simply jumps to the
+		 * stage 2 code in the rm_cpu_halt_code field, is copied to
+		 * rm_code field in the real mode platter and it may be
+		 * overwritten after the CPU has been stopped.
+		 */
+		if (!cpu_halt_code_ready) {
+			/*
+			 * The rm_cpu_halt_code field in the real mode platter
+			 * is used by the CPU stop code only. So only copy the
+			 * CPU stop stage 2 code into the rm_cpu_halt_code
+			 * field on the first call.
+			 */
+			len = (size_t)real_mode_stop_cpu_stage2_end -
+			    (size_t)real_mode_stop_cpu_stage2;
+			ASSERT(len <= RM_PLATTER_CPU_HALT_CODE_SIZE);
+			bcopy((caddr_t)real_mode_stop_cpu_stage2,
+			    (caddr_t)rm->rm_cpu_halt_code, len);
+			cpu_halt_code_ready = 1;
+		}
+
+		/*
+		 * The rm_code field in the real mode platter is shared by
+		 * the CPU start, CPU stop, CPR and fast reboot code. So copy
+		 * the CPU stop stage 1 code into the rm_code field every time.
+		 */
+		len = (size_t)real_mode_stop_cpu_stage1_end -
+		    (size_t)real_mode_stop_cpu_stage1;
+		ASSERT(len <= RM_PLATTER_CODE_SIZE);
+		bcopy((caddr_t)real_mode_stop_cpu_stage1,
+		    (caddr_t)rm->rm_code, len);
+		rm->rm_cpu_halted = 0;
+
+		return (cp->cpu_m.mcpu_mach_ctx_ptr);
+	} else if (optype != MACH_CPUCONTEXT_OP_START) {
+		return (NULL);
+	}
+
+	/*
+	 * Only need to allocate tables when starting CPU.
+	 * Tables allocated when starting CPU will be reused when stopping CPU.
+	 */
+	ct = mach_cpucontext_alloc_tables(cp);
+	if (ct == NULL) {
+		return (NULL);
+	}
+
+	/* Copy CPU startup code to rm_platter for CPU hot-add operations. */
+	if (plat_dr_enabled()) {
+		bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
+		    (size_t)real_mode_start_cpu_end -
+		    (size_t)real_mode_start_cpu);
+	}
+
 	/*
 	 * Now copy all that we've set up onto the real mode platter
 	 * for the real mode code to digest as part of starting the cpu.
 	 */
-
 	rm->rm_idt_base = cp->cpu_idt;
 	rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1;
 	rm->rm_gdt_base = cp->cpu_gdt;
 	rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1;
 
-	rm->rm_pdbr = getcr3();
+	/*
+	 * CPU needs to access kernel address space after powering on.
+	 * When hot-adding CPU at runtime, directly use top level page table
+	 * of kas other than the return value of getcr3(). getcr3() returns
+	 * current process's top level page table, which may be different from
+	 * the one of kas.
+	 */
+	rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn);
 	rm->rm_cpu = cp->cpu_id;
 	rm->rm_x86feature = x86_feature;
+
+	/*
+	 * For hot-adding CPU at runtime, Machine Check and Performance Counter
+	 * should be disabled. They will be enabled on demand after CPU powers
+	 * on successfully
+	 */
 	rm->rm_cr4 = getcr4();
+	rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE);
 
 	rmp_gdt_init(rm);
 
 	return (ct);
 }
 
-/*ARGSUSED*/
 void
-rmp_gdt_init(rm_platter_t *rm)
+mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype)
 {
+	struct cpu_tables *ct = arg;
 
-#if defined(__amd64)
-
-	if (getcr3() > 0xffffffffUL)
-		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
-		    "located above 4G in physical memory (@ 0x%lx)", getcr3());
-
-	/*
-	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
-	 * by code in real_mode_start():
-	 *
-	 * GDT[0]:  NULL selector
-	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
-	 *
-	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
-	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
-	 * a course of action as any other, though it may cause the entire
-	 * platform to reset in some cases...
-	 */
-	rm->rm_temp_gdt[0] = 0;
-	rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
-
-	rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
-	rm->rm_temp_gdt_base = rm_platter_pa +
-	    (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
-	rm->rm_temp_idt_lim = 0;
-	rm->rm_temp_idt_base = 0;
+	ASSERT(&ct->ct_tss == cp->cpu_tss);
+	if (optype == MACH_CPUCONTEXT_OP_START) {
+		switch (err) {
+		case 0:
+			/*
+			 * Save pointer for reuse when stopping CPU.
+			 */
+			cp->cpu_m.mcpu_mach_ctx_ptr = arg;
+			break;
+		case ETIMEDOUT:
+			/*
+			 * The processor was poked, but failed to start before
+			 * we gave up waiting for it.  In case it starts later,
+			 * don't free anything.
+			 */
+			cp->cpu_m.mcpu_mach_ctx_ptr = arg;
+			break;
+		default:
+			/*
+			 * Some other, passive, error occurred.
+			 */
+			kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
+			cp->cpu_tss = NULL;
+			break;
+		}
+	} else if (optype == MACH_CPUCONTEXT_OP_STOP) {
+		switch (err) {
+		case 0:
+			/*
+			 * Free resources allocated when starting CPU.
+			 */
+			kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
+			cp->cpu_tss = NULL;
+			cp->cpu_m.mcpu_mach_ctx_ptr = NULL;
+			break;
+		default:
+			/*
+			 * Don't touch table pointer in case of failure.
+			 */
+			break;
+		}
+	} else {
+		ASSERT(0);
+	}
+}
 
-	/*
-	 * Since the CPU needs to jump to protected mode using an identity
-	 * mapped address, we need to calculate it here.
-	 */
-	rm->rm_longmode64_addr = rm_platter_pa +
-	    ((uint32_t)long_mode_64 - (uint32_t)real_mode_start);
-#endif	/* __amd64 */
+void *
+mach_cpucontext_alloc(struct cpu *cp)
+{
+	return (mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_START));
 }
 
-/*ARGSUSED*/
 void
 mach_cpucontext_free(struct cpu *cp, void *arg, int err)
 {
-	struct cpu_tables *ct = arg;
-
-	ASSERT(&ct->ct_tss == cp->cpu_tss);
-
-	switch (err) {
-	case 0:
-		break;
-	case ETIMEDOUT:
-		/*
-		 * The processor was poked, but failed to start before
-		 * we gave up waiting for it.  In case it starts later,
-		 * don't free anything.
-		 */
-		break;
-	default:
-		/*
-		 * Some other, passive, error occurred.
-		 */
-		kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
-		cp->cpu_tss = NULL;
-		break;
-	}
+	mach_cpucontext_xfree(cp, arg, err, MACH_CPUCONTEXT_OP_START);
 }
 
 /*
@@ -276,25 +429,221 @@ mach_cpu_pause(volatile char *safe)
 }
 
 /*
- * Power on CPU.
+ * Power on the target CPU.
  */
-/*ARGSUSED*/
 int
 mp_cpu_poweron(struct cpu *cp)
 {
-	ASSERT(MUTEX_HELD(&cpu_lock));
-	return (ENOTSUP);		/* not supported */
+	int error;
+	cpuset_t tempset;
+	processorid_t cpuid;
+
+	ASSERT(cp != NULL);
+	cpuid = cp->cpu_id;
+	if (use_mp == 0 || plat_dr_support_cpu() == 0) {
+		return (ENOTSUP);
+	} else if (cpuid < 0 || cpuid >= max_ncpus) {
+		return (EINVAL);
+	}
+
+	/*
+	 * The currrent x86 implementaiton of mp_cpu_configure() and
+	 * mp_cpu_poweron() have a limitation that mp_cpu_poweron() could only
+	 * be called once after calling mp_cpu_configure() for a specific CPU.
+	 * It's because mp_cpu_poweron() will destroy data structure created
+	 * by mp_cpu_configure(). So reject the request if the CPU has already
+	 * been powered on once after calling mp_cpu_configure().
+	 * This limitaiton only affects the p_online syscall and the DR driver
+	 * won't be affected because the DR driver always invoke public CPU
+	 * management interfaces in the predefined order:
+	 * cpu_configure()->cpu_poweron()...->cpu_poweroff()->cpu_unconfigure()
+	 */
+	if (cpuid_checkpass(cp, 4) || cp->cpu_thread == cp->cpu_idle_thread) {
+		return (ENOTSUP);
+	}
+
+	/*
+	 * Check if there's at least a Mbyte of kmem available
+	 * before attempting to start the cpu.
+	 */
+	if (kmem_avail() < 1024 * 1024) {
+		/*
+		 * Kick off a reap in case that helps us with
+		 * later attempts ..
+		 */
+		kmem_reap();
+		return (ENOMEM);
+	}
+
+	affinity_set(CPU->cpu_id);
+
+	/*
+	 * Start the target CPU. No need to call mach_cpucontext_fini()
+	 * if mach_cpucontext_init() fails.
+	 */
+	if ((error = mach_cpucontext_init()) == 0) {
+		error = mp_start_cpu_common(cp, B_FALSE);
+		mach_cpucontext_fini();
+	}
+	if (error != 0) {
+		affinity_clear();
+		return (error);
+	}
+
+	/* Wait for the target cpu to reach READY state. */
+	tempset = cpu_ready_set;
+	while (!CPU_IN_SET(tempset, cpuid)) {
+		delay(1);
+		tempset = *((volatile cpuset_t *)&cpu_ready_set);
+	}
+
+	/* Mark the target CPU as available for mp operation. */
+	CPUSET_ATOMIC_ADD(mp_cpus, cpuid);
+
+	/* Free the space allocated to hold the microcode file */
+	ucode_cleanup();
+
+	affinity_clear();
+
+	return (0);
+}
+
+#define	MP_CPU_DETACH_MAX_TRIES		5
+#define	MP_CPU_DETACH_DELAY		100
+
+static int
+mp_cpu_detach_driver(dev_info_t *dip)
+{
+	int i;
+	int rv = EBUSY;
+	dev_info_t *pdip;
+
+	pdip = ddi_get_parent(dip);
+	ASSERT(pdip != NULL);
+	/*
+	 * Check if caller holds pdip busy - can cause deadlocks in
+	 * e_ddi_branch_unconfigure(), which calls devfs_clean().
+	 */
+	if (DEVI_BUSY_OWNED(pdip)) {
+		return (EDEADLOCK);
+	}
+
+	for (i = 0; i < MP_CPU_DETACH_MAX_TRIES; i++) {
+		if (e_ddi_branch_unconfigure(dip, NULL, 0) == 0) {
+			rv = 0;
+			break;
+		}
+		DELAY(MP_CPU_DETACH_DELAY);
+	}
+
+	return (rv);
 }
 
 /*
- * Power off CPU.
+ * Power off the target CPU.
+ * Note: cpu_lock will be released and then reacquired.
  */
-/*ARGSUSED*/
 int
 mp_cpu_poweroff(struct cpu *cp)
 {
-	ASSERT(MUTEX_HELD(&cpu_lock));
-	return (ENOTSUP);		/* not supported */
+	int rv = 0;
+	void *ctx;
+	dev_info_t *dip = NULL;
+	rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
+	extern void cpupm_start(cpu_t *);
+	extern void cpupm_stop(cpu_t *);
+
+	ASSERT(cp != NULL);
+	ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
+	ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
+
+	if (use_mp == 0 || plat_dr_support_cpu() == 0) {
+		return (ENOTSUP);
+	}
+	/*
+	 * There is no support for powering off cpu0 yet.
+	 * There are many pieces of code which have a hard dependency on cpu0.
+	 */
+	if (cp->cpu_id == 0) {
+		return (ENOTSUP);
+	};
+
+	if (mach_cpu_get_device_node(cp, &dip) != PSM_SUCCESS) {
+		return (ENXIO);
+	}
+	ASSERT(dip != NULL);
+	if (mp_cpu_detach_driver(dip) != 0) {
+		rv = EBUSY;
+		goto out_online;
+	}
+
+	/* Allocate CPU context for stopping */
+	if (mach_cpucontext_init() != 0) {
+		rv = ENXIO;
+		goto out_online;
+	}
+	ctx = mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_STOP);
+	if (ctx == NULL) {
+		rv = ENXIO;
+		goto out_context_fini;
+	}
+
+	cpupm_stop(cp);
+	cpu_event_fini_cpu(cp);
+
+	if (cp->cpu_m.mcpu_cmi_hdl != NULL) {
+		cmi_fini(cp->cpu_m.mcpu_cmi_hdl);
+		cp->cpu_m.mcpu_cmi_hdl = NULL;
+	}
+
+	rv = mach_cpu_stop(cp, ctx);
+	if (rv != 0) {
+		goto out_enable_cmi;
+	}
+
+	/* Wait until the target CPU has been halted. */
+	while (*(volatile ushort_t *)&(rm->rm_cpu_halted) != 0xdead) {
+		delay(1);
+	}
+	rm->rm_cpu_halted = 0xffff;
+
+	/* CPU_READY has been cleared by mach_cpu_stop. */
+	ASSERT((cp->cpu_flags & CPU_READY) == 0);
+	ASSERT((cp->cpu_flags & CPU_RUNNING) == 0);
+	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
+	CPUSET_ATOMIC_DEL(mp_cpus, cp->cpu_id);
+
+	mach_cpucontext_xfree(cp, ctx, 0, MACH_CPUCONTEXT_OP_STOP);
+	mach_cpucontext_fini();
+
+	return (0);
+
+out_enable_cmi:
+	{
+		cmi_hdl_t hdl;
+
+		if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
+		    cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) {
+			if (x86_feature & X86_MCA)
+				cmi_mca_init(hdl);
+			cp->cpu_m.mcpu_cmi_hdl = hdl;
+		}
+	}
+	cpu_event_init_cpu(cp);
+	cpupm_start(cp);
+	mach_cpucontext_xfree(cp, ctx, rv, MACH_CPUCONTEXT_OP_STOP);
+
+out_context_fini:
+	mach_cpucontext_fini();
+
+out_online:
+	(void) e_ddi_branch_configure(dip, NULL, 0);
+
+	if (rv != EAGAIN && rv != ETIME) {
+		rv = ENXIO;
+	}
+
+	return (rv);
 }
 
 /*
diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c
index 0ba377f67c..146f879a66 100644
--- a/usr/src/uts/i86pc/os/mp_startup.c
+++ b/usr/src/uts/i86pc/os/mp_startup.c
@@ -23,10 +23,15 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 #include <sys/types.h>
 #include <sys/thread.h>
 #include <sys/cpuvar.h>
+#include <sys/cpu.h>
 #include <sys/t_lock.h>
 #include <sys/param.h>
 #include <sys/proc.h>
@@ -34,6 +39,7 @@
 #include <sys/class.h>
 #include <sys/cmn_err.h>
 #include <sys/debug.h>
+#include <sys/note.h>
 #include <sys/asm_linkage.h>
 #include <sys/x_call.h>
 #include <sys/systm.h>
@@ -60,6 +66,7 @@
 #include <sys/reboot.h>
 #include <sys/kdi_machimpl.h>
 #include <vm/hat_i86.h>
+#include <vm/vm_dep.h>
 #include <sys/memnode.h>
 #include <sys/pci_cfgspace.h>
 #include <sys/mach_mmu.h>
@@ -71,8 +78,11 @@
 
 struct cpu	cpus[1];			/* CPU data */
 struct cpu	*cpu[NCPU] = {&cpus[0]};	/* pointers to all CPUs */
+struct cpu	*cpu_free_list;			/* list for released CPUs */
 cpu_core_t	cpu_core[NCPU];			/* cpu_core structures */
 
+#define	cpu_next_free	cpu_prev
+
 /*
  * Useful for disabling MP bring-up on a MP capable system.
  */
@@ -94,7 +104,8 @@ int flushes_require_xcalls;
 
 cpuset_t cpu_ready_set;		/* initialized in startup() */
 
-static 	void	mp_startup(void);
+static void mp_startup_boot(void);
+static void mp_startup_hotplug(void);
 
 static void cpu_sep_enable(void);
 static void cpu_sep_disable(void);
@@ -108,7 +119,6 @@ void
 init_cpu_info(struct cpu *cp)
 {
 	processor_info_t *pi = &cp->cpu_type_info;
-	char buf[CPU_IDSTRLEN];
 
 	/*
 	 * Get clock-frequency property for the CPU.
@@ -131,14 +141,18 @@ init_cpu_info(struct cpu *cp)
 	if (fpu_exists)
 		(void) strcpy(pi->pi_fputypes, "i387 compatible");
 
-	(void) cpuid_getidstr(cp, buf, sizeof (buf));
+	cp->cpu_idstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP);
+	cp->cpu_brandstr = kmem_zalloc(CPU_IDSTRLEN, KM_SLEEP);
 
-	cp->cpu_idstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
-	(void) strcpy(cp->cpu_idstr, buf);
-
-	(void) cpuid_getbrandstr(cp, buf, sizeof (buf));
-	cp->cpu_brandstr = kmem_alloc(strlen(buf) + 1, KM_SLEEP);
-	(void) strcpy(cp->cpu_brandstr, buf);
+	/*
+	 * If called for the BSP, cp is equal to current CPU.
+	 * For non-BSPs, cpuid info of cp is not ready yet, so use cpuid info
+	 * of current CPU as default values for cpu_idstr and cpu_brandstr.
+	 * They will be corrected in mp_startup_common() after cpuid_pass1()
+	 * has been invoked on target CPU.
+	 */
+	(void) cpuid_getidstr(CPU, cp->cpu_idstr, CPU_IDSTRLEN);
+	(void) cpuid_getbrandstr(CPU, cp->cpu_brandstr, CPU_IDSTRLEN);
 }
 
 /*
@@ -228,9 +242,11 @@ init_cpu_syscall(struct cpu *cp)
  *
  * Allocate and initialize the cpu structure, TRAPTRACE buffer, and the
  * startup and idle threads for the specified CPU.
+ * Parameter boot is true for boot time operations and is false for CPU
+ * DR operations.
  */
-struct cpu *
-mp_startup_init(int cpun)
+static struct cpu *
+mp_cpu_configure_common(int cpun, boolean_t boot)
 {
 	struct cpu *cp;
 	kthread_id_t tp;
@@ -247,27 +263,25 @@ mp_startup_init(int cpun)
 	trap_trace_ctl_t *ttc = &trap_trace_ctl[cpun];
 #endif
 
+	ASSERT(MUTEX_HELD(&cpu_lock));
 	ASSERT(cpun < NCPU && cpu[cpun] == NULL);
 
-	cp = kmem_zalloc(sizeof (*cp), KM_SLEEP);
-#if !defined(__xpv)
-	if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) {
-		cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(CPU);
-		cp->cpu_m.mcpu_idle_cpu = cpu_idle_mwait;
-	} else
-#endif
-		cp->cpu_m.mcpu_idle_cpu = cpu_idle;
+	if (cpu_free_list == NULL) {
+		cp = kmem_zalloc(sizeof (*cp), KM_SLEEP);
+	} else {
+		cp = cpu_free_list;
+		cpu_free_list = cp->cpu_next_free;
+	}
 
 	cp->cpu_m.mcpu_istamp = cpun << 16;
 
-	procp = curthread->t_procp;
+	/* Create per CPU specific threads in the process p0. */
+	procp = &p0;
 
-	mutex_enter(&cpu_lock);
 	/*
 	 * Initialize the dispatcher first.
 	 */
 	disp_cpu_init(cp);
-	mutex_exit(&cpu_lock);
 
 	cpu_vm_data_init(cp);
 
@@ -294,14 +308,21 @@ mp_startup_init(int cpun)
 	tp->t_disp_queue = cp->cpu_disp;
 
 	/*
-	 * Setup thread to start in mp_startup.
+	 * Setup thread to start in mp_startup_common.
 	 */
 	sp = tp->t_stk;
-	tp->t_pc = (uintptr_t)mp_startup;
 	tp->t_sp = (uintptr_t)(sp - MINFRAME);
 #if defined(__amd64)
 	tp->t_sp -= STACK_ENTRY_ALIGN;		/* fake a call */
 #endif
+	/*
+	 * Setup thread start entry point for boot or hotplug.
+	 */
+	if (boot) {
+		tp->t_pc = (uintptr_t)mp_startup_boot;
+	} else {
+		tp->t_pc = (uintptr_t)mp_startup_hotplug;
+	}
 
 	cp->cpu_id = cpun;
 	cp->cpu_self = cp;
@@ -312,12 +333,13 @@ mp_startup_init(int cpun)
 
 	/*
 	 * cpu_base_spl must be set explicitly here to prevent any blocking
-	 * operations in mp_startup from causing the spl of the cpu to drop
-	 * to 0 (allowing device interrupts before we're ready) in resume().
+	 * operations in mp_startup_common from causing the spl of the cpu
+	 * to drop to 0 (allowing device interrupts before we're ready) in
+	 * resume().
 	 * cpu_base_spl MUST remain at LOCK_LEVEL until the cpu is CPU_READY.
 	 * As an extra bit of security on DEBUG kernels, this is enforced with
-	 * an assertion in mp_startup() -- before cpu_base_spl is set to its
-	 * proper value.
+	 * an assertion in mp_startup_common() -- before cpu_base_spl is set
+	 * to its proper value.
 	 */
 	cp->cpu_base_spl = ipltospl(LOCK_LEVEL);
 
@@ -392,6 +414,15 @@ mp_startup_init(int cpun)
 	 * alloc space for cpuid info
 	 */
 	cpuid_alloc_space(cp);
+#if !defined(__xpv)
+	if ((x86_feature & X86_MWAIT) && idle_cpu_prefer_mwait) {
+		cp->cpu_m.mcpu_mwait = cpuid_mwait_alloc(cp);
+		cp->cpu_m.mcpu_idle_cpu = cpu_idle_mwait;
+	} else
+#endif
+		cp->cpu_m.mcpu_idle_cpu = cpu_idle;
+
+	init_cpu_info(cp);
 
 	/*
 	 * alloc space for ucode_info
@@ -408,31 +439,34 @@ mp_startup_init(int cpun)
 	ttc->ttc_next = ttc->ttc_first;
 	ttc->ttc_limit = ttc->ttc_first + trap_trace_bufsize;
 #endif
+
 	/*
 	 * Record that we have another CPU.
 	 */
-	mutex_enter(&cpu_lock);
 	/*
 	 * Initialize the interrupt threads for this CPU
 	 */
 	cpu_intr_alloc(cp, NINTR_THREADS);
+
+	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
+	cpu_set_state(cp);
+
 	/*
 	 * Add CPU to list of available CPUs.  It'll be on the active list
-	 * after mp_startup().
+	 * after mp_startup_common().
 	 */
 	cpu_add_unit(cp);
-	mutex_exit(&cpu_lock);
 
 	return (cp);
 }
 
 /*
- * Undo what was done in mp_startup_init
+ * Undo what was done in mp_cpu_configure_common
  */
 static void
-mp_startup_fini(struct cpu *cp, int error)
+mp_cpu_unconfigure_common(struct cpu *cp, int error)
 {
-	mutex_enter(&cpu_lock);
+	ASSERT(MUTEX_HELD(&cpu_lock));
 
 	/*
 	 * Remove the CPU from the list of available CPUs.
@@ -451,7 +485,6 @@ mp_startup_fini(struct cpu *cp, int error)
 		 * leave the fundamental data structures intact.
 		 */
 		cp->cpu_flags = 0;
-		mutex_exit(&cpu_lock);
 		return;
 	}
 
@@ -468,8 +501,7 @@ mp_startup_fini(struct cpu *cp, int error)
 	 */
 	segkp_release(segkp,
 	    cp->cpu_intr_stack - (INTR_STACK_SIZE - SA(MINFRAME)));
-
-	mutex_exit(&cpu_lock);
+	cp->cpu_intr_stack = NULL;
 
 #ifdef TRAPTRACE
 	/*
@@ -485,10 +517,26 @@ mp_startup_fini(struct cpu *cp, int error)
 
 	hat_cpu_offline(cp);
 
-	cpuid_free_space(cp);
-
 	ucode_free_space(cp);
 
+	/* Free CPU ID string and brand string. */
+	if (cp->cpu_idstr) {
+		kmem_free(cp->cpu_idstr, CPU_IDSTRLEN);
+		cp->cpu_idstr = NULL;
+	}
+	if (cp->cpu_brandstr) {
+		kmem_free(cp->cpu_brandstr, CPU_IDSTRLEN);
+		cp->cpu_brandstr = NULL;
+	}
+
+#if !defined(__xpv)
+	if (cp->cpu_m.mcpu_mwait != NULL) {
+		cpuid_mwait_free(cp);
+		cp->cpu_m.mcpu_mwait = NULL;
+	}
+#endif
+	cpuid_free_space(cp);
+
 	if (cp->cpu_idt != CPU->cpu_idt)
 		kmem_free(cp->cpu_idt, PAGESIZE);
 	cp->cpu_idt = NULL;
@@ -496,6 +544,12 @@ mp_startup_fini(struct cpu *cp, int error)
 	kmem_free(cp->cpu_gdt, PAGESIZE);
 	cp->cpu_gdt = NULL;
 
+	if (cp->cpu_supp_freqs != NULL) {
+		size_t len = strlen(cp->cpu_supp_freqs) + 1;
+		kmem_free(cp->cpu_supp_freqs, len);
+		cp->cpu_supp_freqs = NULL;
+	}
+
 	teardown_vaddr_for_ppcopy(cp);
 
 	kcpc_hw_fini(cp);
@@ -505,15 +559,13 @@ mp_startup_fini(struct cpu *cp, int error)
 
 	cpu_vm_data_destroy(cp);
 
-	mutex_enter(&cpu_lock);
+	xc_fini_cpu(cp);
 	disp_cpu_fini(cp);
-	mutex_exit(&cpu_lock);
 
-#if !defined(__xpv)
-	if (cp->cpu_m.mcpu_mwait != NULL)
-		cpuid_mwait_free(cp);
-#endif
-	kmem_free(cp, sizeof (*cp));
+	ASSERT(cp != CPU0);
+	bzero(cp, sizeof (*cp));
+	cp->cpu_next_free = cpu_free_list;
+	cpu_free_list = cp;
 }
 
 /*
@@ -529,8 +581,8 @@ mp_startup_fini(struct cpu *cp, int error)
  * system.
  *
  * workaround_errata is invoked early in mlsetup() for CPU 0, and in
- * mp_startup() for all slave CPUs. Slaves process workaround_errata prior
- * to acknowledging their readiness to the master, so this routine will
+ * mp_startup_common() for all slave CPUs. Slaves process workaround_errata
+ * prior to acknowledging their readiness to the master, so this routine will
  * never be executed by multiple CPUs in parallel, thus making updates to
  * global data safe.
  *
@@ -1214,77 +1266,82 @@ workaround_errata_end()
 #endif
 }
 
-static cpuset_t procset;
-
 /*
- * Start a single cpu, assuming that the kernel context is available
- * to successfully start another cpu.
- *
- * (For example, real mode code is mapped into the right place
- * in memory and is ready to be run.)
+ * The procset_slave and procset_master are used to synchronize
+ * between the control CPU and the target CPU when starting CPUs.
  */
+static cpuset_t procset_slave, procset_master;
+
+static void
+mp_startup_wait(cpuset_t *sp, processorid_t cpuid)
+{
+	cpuset_t tempset;
+
+	for (tempset = *sp; !CPU_IN_SET(tempset, cpuid);
+	    tempset = *(volatile cpuset_t *)sp) {
+		SMT_PAUSE();
+	}
+	CPUSET_ATOMIC_DEL(*(cpuset_t *)sp, cpuid);
+}
+
+static void
+mp_startup_signal(cpuset_t *sp, processorid_t cpuid)
+{
+	cpuset_t tempset;
+
+	CPUSET_ATOMIC_ADD(*(cpuset_t *)sp, cpuid);
+	for (tempset = *sp; CPU_IN_SET(tempset, cpuid);
+	    tempset = *(volatile cpuset_t *)sp) {
+		SMT_PAUSE();
+	}
+}
+
 int
-start_cpu(processorid_t who)
+mp_start_cpu_common(cpu_t *cp, boolean_t boot)
 {
+	_NOTE(ARGUNUSED(boot));
+
 	void *ctx;
-	cpu_t *cp;
 	int delays;
 	int error = 0;
+	cpuset_t tempset;
+	processorid_t cpuid;
+#ifndef __xpv
+	extern void cpupm_init(cpu_t *);
+#endif
 
-	ASSERT(who != 0);
-
-	/*
-	 * Check if there's at least a Mbyte of kmem available
-	 * before attempting to start the cpu.
-	 */
-	if (kmem_avail() < 1024 * 1024) {
-		/*
-		 * Kick off a reap in case that helps us with
-		 * later attempts ..
-		 */
-		kmem_reap();
-		return (ENOMEM);
+	ASSERT(cp != NULL);
+	cpuid = cp->cpu_id;
+	ctx = mach_cpucontext_alloc(cp);
+	if (ctx == NULL) {
+		cmn_err(CE_WARN,
+		    "cpu%d: failed to allocate context", cp->cpu_id);
+		return (EAGAIN);
 	}
-
-	cp = mp_startup_init(who);
-	if ((ctx = mach_cpucontext_alloc(cp)) == NULL ||
-	    (error = mach_cpu_start(cp, ctx)) != 0) {
-
-		/*
-		 * Something went wrong before we even started it
-		 */
-		if (ctx)
-			cmn_err(CE_WARN,
-			    "cpu%d: failed to start error %d",
-			    cp->cpu_id, error);
-		else
-			cmn_err(CE_WARN,
-			    "cpu%d: failed to allocate context", cp->cpu_id);
-
-		if (ctx)
-			mach_cpucontext_free(cp, ctx, error);
-		else
-			error = EAGAIN;		/* hmm. */
-		mp_startup_fini(cp, error);
+	error = mach_cpu_start(cp, ctx);
+	if (error != 0) {
+		cmn_err(CE_WARN,
+		    "cpu%d: failed to start, error %d", cp->cpu_id, error);
+		mach_cpucontext_free(cp, ctx, error);
 		return (error);
 	}
 
-	for (delays = 0; !CPU_IN_SET(procset, who); delays++) {
+	for (delays = 0, tempset = procset_slave; !CPU_IN_SET(tempset, cpuid);
+	    delays++) {
 		if (delays == 500) {
 			/*
 			 * After five seconds, things are probably looking
 			 * a bit bleak - explain the hang.
 			 */
 			cmn_err(CE_NOTE, "cpu%d: started, "
-			    "but not running in the kernel yet", who);
+			    "but not running in the kernel yet", cpuid);
 		} else if (delays > 2000) {
 			/*
 			 * We waited at least 20 seconds, bail ..
 			 */
 			error = ETIMEDOUT;
-			cmn_err(CE_WARN, "cpu%d: timed out", who);
+			cmn_err(CE_WARN, "cpu%d: timed out", cpuid);
 			mach_cpucontext_free(cp, ctx, error);
-			mp_startup_fini(cp, error);
 			return (error);
 		}
 
@@ -1292,37 +1349,105 @@ start_cpu(processorid_t who)
 		 * wait at least 10ms, then check again..
 		 */
 		delay(USEC_TO_TICK_ROUNDUP(10000));
+		tempset = *((volatile cpuset_t *)&procset_slave);
 	}
+	CPUSET_ATOMIC_DEL(procset_slave, cpuid);
 
 	mach_cpucontext_free(cp, ctx, 0);
 
 #ifndef __xpv
 	if (tsc_gethrtime_enable)
-		tsc_sync_master(who);
+		tsc_sync_master(cpuid);
 #endif
 
 	if (dtrace_cpu_init != NULL) {
+		(*dtrace_cpu_init)(cpuid);
+	}
+
+	/*
+	 * During CPU DR operations, the cpu_lock is held by current
+	 * (the control) thread. We can't release the cpu_lock here
+	 * because that will break the CPU DR logic.
+	 * On the other hand, CPUPM and processor group initialization
+	 * routines need to access the cpu_lock. So we invoke those
+	 * routines here on behalf of mp_startup_common().
+	 *
+	 * CPUPM and processor group initialization routines depend
+	 * on the cpuid probing results. Wait for mp_startup_common()
+	 * to signal that cpuid probing is done.
+	 */
+	mp_startup_wait(&procset_slave, cpuid);
+#ifndef __xpv
+	cpupm_init(cp);
+#endif
+	(void) pg_cpu_init(cp, B_FALSE);
+	cpu_set_state(cp);
+	mp_startup_signal(&procset_master, cpuid);
+
+	return (0);
+}
+
+/*
+ * Start a single cpu, assuming that the kernel context is available
+ * to successfully start another cpu.
+ *
+ * (For example, real mode code is mapped into the right place
+ * in memory and is ready to be run.)
+ */
+int
+start_cpu(processorid_t who)
+{
+	cpu_t *cp;
+	int error = 0;
+	cpuset_t tempset;
+
+	ASSERT(who != 0);
+
+	/*
+	 * Check if there's at least a Mbyte of kmem available
+	 * before attempting to start the cpu.
+	 */
+	if (kmem_avail() < 1024 * 1024) {
 		/*
-		 * DTrace CPU initialization expects cpu_lock to be held.
+		 * Kick off a reap in case that helps us with
+		 * later attempts ..
 		 */
-		mutex_enter(&cpu_lock);
-		(*dtrace_cpu_init)(who);
-		mutex_exit(&cpu_lock);
+		kmem_reap();
+		return (ENOMEM);
+	}
+
+	/*
+	 * First configure cpu.
+	 */
+	cp = mp_cpu_configure_common(who, B_TRUE);
+	ASSERT(cp != NULL);
+
+	/*
+	 * Then start cpu.
+	 */
+	error = mp_start_cpu_common(cp, B_TRUE);
+	if (error != 0) {
+		mp_cpu_unconfigure_common(cp, error);
+		return (error);
 	}
 
-	while (!CPU_IN_SET(cpu_ready_set, who))
-		delay(1);
+	mutex_exit(&cpu_lock);
+	tempset = cpu_ready_set;
+	while (!CPU_IN_SET(tempset, who)) {
+		drv_usecwait(1);
+		tempset = *((volatile cpuset_t *)&cpu_ready_set);
+	}
+	mutex_enter(&cpu_lock);
 
 	return (0);
 }
 
-
-/*ARGSUSED*/
 void
 start_other_cpus(int cprboot)
 {
+	_NOTE(ARGUNUSED(cprboot));
+
 	uint_t who;
-	uint_t skipped = 0;
 	uint_t bootcpuid = 0;
 
 	/*
@@ -1347,9 +1472,12 @@ start_other_cpus(int cprboot)
 	CPUSET_ADD(cpu_ready_set, bootcpuid);
 
 	/*
-	 * if only 1 cpu or not using MP, skip the rest of this
+	 * skip the rest of this if
+	 * . only 1 cpu dectected and system isn't hotplug-capable
+	 * . not using MP
 	 */
-	if (CPUSET_ISNULL(mp_cpus) || use_mp == 0) {
+	if ((CPUSET_ISNULL(mp_cpus) && plat_dr_support_cpu() == 0) ||
+	    use_mp == 0) {
 		if (use_mp == 0)
 			cmn_err(CE_CONT, "?***** Not in MP mode\n");
 		goto done;
@@ -1375,18 +1503,13 @@ start_other_cpus(int cprboot)
 	affinity_set(CPU_CURRENT);
 
 	for (who = 0; who < NCPU; who++) {
-
 		if (!CPU_IN_SET(mp_cpus, who))
 			continue;
 		ASSERT(who != bootcpuid);
-		if (ncpus >= max_ncpus) {
-			skipped = who;
-			continue;
-		}
-		if (start_cpu(who) != 0)
-			CPUSET_DEL(mp_cpus, who);
 
 		mutex_enter(&cpu_lock);
+		if (start_cpu(who) != 0)
+			CPUSET_DEL(mp_cpus, who);
 		cpu_state_change_notify(who, CPU_SETUP);
 		mutex_exit(&cpu_lock);
 	}
@@ -1396,59 +1519,92 @@ start_other_cpus(int cprboot)
 
 	affinity_clear();
 
-	if (skipped) {
+	mach_cpucontext_fini();
+
+done:
+	if (get_hwenv() == HW_NATIVE)
+		workaround_errata_end();
+	cmi_post_mpstartup();
+
+	if (use_mp && ncpus != boot_max_ncpus) {
 		cmn_err(CE_NOTE,
 		    "System detected %d cpus, but "
 		    "only %d cpu(s) were enabled during boot.",
-		    skipped + 1, ncpus);
+		    boot_max_ncpus, ncpus);
 		cmn_err(CE_NOTE,
 		    "Use \"boot-ncpus\" parameter to enable more CPU(s). "
 		    "See eeprom(1M).");
 	}
-
-done:
-	if (get_hwenv() == HW_NATIVE)
-		workaround_errata_end();
-	mach_cpucontext_fini();
-
-	cmi_post_mpstartup();
 }
 
-/*
- * Dummy functions - no i86pc platforms support dynamic cpu allocation.
- */
-/*ARGSUSED*/
 int
 mp_cpu_configure(int cpuid)
 {
-	return (ENOTSUP);		/* not supported */
+	cpu_t *cp;
+
+	if (use_mp == 0 || plat_dr_support_cpu() == 0) {
+		return (ENOTSUP);
+	}
+
+	cp = cpu_get(cpuid);
+	if (cp != NULL) {
+		return (EALREADY);
+	}
+
+	/*
+	 * Check if there's at least a Mbyte of kmem available
+	 * before attempting to start the cpu.
+	 */
+	if (kmem_avail() < 1024 * 1024) {
+		/*
+		 * Kick off a reap in case that helps us with
+		 * later attempts ..
+		 */
+		kmem_reap();
+		return (ENOMEM);
+	}
+
+	cp = mp_cpu_configure_common(cpuid, B_FALSE);
+	ASSERT(cp != NULL && cpu_get(cpuid) == cp);
+
+	return (cp != NULL ? 0 : EAGAIN);
 }
 
-/*ARGSUSED*/
 int
 mp_cpu_unconfigure(int cpuid)
 {
-	return (ENOTSUP);		/* not supported */
+	cpu_t *cp;
+
+	if (use_mp == 0 || plat_dr_support_cpu() == 0) {
+		return (ENOTSUP);
+	} else if (cpuid < 0 || cpuid >= max_ncpus) {
+		return (EINVAL);
+	}
+
+	cp = cpu_get(cpuid);
+	if (cp == NULL) {
+		return (ENODEV);
+	}
+	mp_cpu_unconfigure_common(cp, 0);
+
+	return (0);
 }
 
 /*
  * Startup function for 'other' CPUs (besides boot cpu).
  * Called from real_mode_start.
  *
- * WARNING: until CPU_READY is set, mp_startup and routines called by
- * mp_startup should not call routines (e.g. kmem_free) that could call
+ * WARNING: until CPU_READY is set, mp_startup_common and routines called by
+ * mp_startup_common should not call routines (e.g. kmem_free) that could call
  * hat_unload which requires CPU_READY to be set.
  */
-void
-mp_startup(void)
+static void
+mp_startup_common(boolean_t boot)
 {
-	struct cpu *cp = CPU;
+	cpu_t *cp = CPU;
 	uint_t new_x86_feature;
-	extern void cpu_event_init_cpu(cpu_t *);
-#ifndef __xpv
-	extern void cpupm_init(cpu_t *);
-#endif
 	const char *fmt = "?cpu%d: %b\n";
+	extern void cpu_event_init_cpu(cpu_t *);
 
 	/*
 	 * We need to get TSC on this proc synced (i.e., any delta
@@ -1457,8 +1613,8 @@ mp_startup(void)
 	 * interrupts, cmn_err, etc.
 	 */
 
-	/* Let cpu0 continue into tsc_sync_master() */
-	CPUSET_ATOMIC_ADD(procset, cp->cpu_id);
+	/* Let the control CPU continue into tsc_sync_master() */
+	mp_startup_signal(&procset_slave, cp->cpu_id);
 
 #ifndef __xpv
 	if (tsc_gethrtime_enable)
@@ -1537,39 +1693,42 @@ mp_startup(void)
 	if (workaround_errata(cp) != 0)
 		panic("critical workaround(s) missing for cpu%d", cp->cpu_id);
 
+	/*
+	 * We can touch cpu_flags here without acquiring the cpu_lock here
+	 * because the cpu_lock is held by the control CPU which is running
+	 * mp_start_cpu_common().
+	 * Need to clear CPU_QUIESCED flag before calling any function which
+	 * may cause thread context switching, such as kmem_alloc() etc.
+	 * The idle thread checks for CPU_QUIESCED flag and loops for ever if
+	 * it's set. So the startup thread may have no chance to switch back
+	 * again if it's switched away with CPU_QUIESCED set.
+	 */
+	cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED);
+
 	cpuid_pass2(cp);
 	cpuid_pass3(cp);
 	(void) cpuid_pass4(cp);
 
-	init_cpu_info(cp);
-
-	mutex_enter(&cpu_lock);
-
-	cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_EXISTS;
-
-	cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr);
-	cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr);
-
-	if (dtrace_cpu_init != NULL) {
-		(*dtrace_cpu_init)(cp->cpu_id);
-	}
-
 	/*
-	 * Fill out cpu_ucode_info.  Update microcode if necessary.
+	 * Correct cpu_idstr and cpu_brandstr on target CPU after
+	 * cpuid_pass1() is done.
 	 */
-	ucode_check(cp);
+	(void) cpuid_getidstr(cp, cp->cpu_idstr, CPU_IDSTRLEN);
+	(void) cpuid_getbrandstr(cp, cp->cpu_brandstr, CPU_IDSTRLEN);
 
-	mutex_exit(&cpu_lock);
+	cp->cpu_flags |= CPU_RUNNING | CPU_READY | CPU_EXISTS;
 
 	post_startup_cpu_fixups();
 
+	cpu_event_init_cpu(cp);
+
 	/*
 	 * Enable preemption here so that contention for any locks acquired
-	 * later in mp_startup may be preempted if the thread owning those
-	 * locks is continuously executing on other CPUs (for example, this
-	 * CPU must be preemptible to allow other CPUs to pause it during their
-	 * startup phases).  It's safe to enable preemption here because the
-	 * CPU state is pretty-much fully constructed.
+	 * later in mp_startup_common may be preempted if the thread owning
+	 * those locks is continuously executing on other CPUs (for example,
+	 * this CPU must be preemptible to allow other CPUs to pause it during
+	 * their startup phases).  It's safe to enable preemption here because
+	 * the CPU state is pretty-much fully constructed.
 	 */
 	curthread->t_preempt = 0;
 
@@ -1577,29 +1736,31 @@ mp_startup(void)
 	ASSERT(cp->cpu_base_spl == ipltospl(LOCK_LEVEL));
 	set_base_spl();		/* Restore the spl to its proper value */
 
-	cpu_event_init_cpu(cp);
-#ifndef __xpv
-	cpupm_init(cp);
-#endif
-
+	pghw_physid_create(cp);
 	/*
-	 * Processor group initialization for this CPU is dependent on the
-	 * cpuid probing, which must be done in the context of the current
-	 * CPU, as well as the CPU's device node initialization (for ACPI).
+	 * Delegate initialization tasks, which need to access the cpu_lock,
+	 * to mp_start_cpu_common() because we can't acquire the cpu_lock here
+	 * during CPU DR operations.
 	 */
-	mutex_enter(&cpu_lock);
-	pghw_physid_create(cp);
-	(void) pg_cpu_init(cp, B_FALSE);
+	mp_startup_signal(&procset_slave, cp->cpu_id);
+	mp_startup_wait(&procset_master, cp->cpu_id);
 	pg_cmt_cpu_startup(cp);
-	mutex_exit(&cpu_lock);
+
+	if (boot) {
+		mutex_enter(&cpu_lock);
+		cp->cpu_flags &= ~CPU_OFFLINE;
+		cpu_enable_intr(cp);
+		cpu_add_active(cp);
+		mutex_exit(&cpu_lock);
+	}
 
 	/* Enable interrupts */
 	(void) spl0();
 
-	mutex_enter(&cpu_lock);
-	cpu_enable_intr(cp);
-	cpu_add_active(cp);
-	mutex_exit(&cpu_lock);
+	/*
+	 * Fill out cpu_ucode_info.  Update microcode if necessary.
+	 */
+	ucode_check(cp);
 
 #ifndef __xpv
 	{
@@ -1616,6 +1777,7 @@ mp_startup(void)
 		    cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL) {
 			if (x86_feature & X86_MCA)
 				cmi_mca_init(hdl);
+			cp->cpu_m.mcpu_cmi_hdl = hdl;
 		}
 	}
 #endif /* __xpv */
@@ -1630,16 +1792,13 @@ mp_startup(void)
 	 */
 	CPUSET_ATOMIC_ADD(cpu_ready_set, cp->cpu_id);
 
-	/*
-	 * Because mp_startup() gets fired off after init() starts, we
-	 * can't use the '?' trick to do 'boot -v' printing - so we
-	 * always direct the 'cpu .. online' messages to the log.
-	 */
-	cmn_err(CE_CONT, "!cpu%d initialization complete - online\n",
-	    cp->cpu_id);
-
 	(void) mach_cpu_create_device_node(cp, NULL);
 
+	cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_idstr);
+	cmn_err(CE_CONT, "?cpu%d: %s\n", cp->cpu_id, cp->cpu_brandstr);
+	cmn_err(CE_CONT, "?cpu%d initialization complete - online\n",
+	    cp->cpu_id);
+
 	/*
 	 * Now we are done with the startup thread, so free it up.
 	 */
@@ -1648,6 +1807,23 @@ mp_startup(void)
 	/*NOTREACHED*/
 }
 
+/*
+ * Startup function for 'other' CPUs at boot time (besides boot cpu).
+ */
+static void
+mp_startup_boot(void)
+{
+	mp_startup_common(B_TRUE);
+}
+
+/*
+ * Startup function for hotplug CPUs at runtime.
+ */
+void
+mp_startup_hotplug(void)
+{
+	mp_startup_common(B_FALSE);
+}
 
 /*
  * Start CPU on user request.
@@ -1663,7 +1839,6 @@ mp_cpu_start(struct cpu *cp)
 /*
  * Stop CPU on user request.
  */
-/* ARGSUSED */
 int
 mp_cpu_stop(struct cpu *cp)
 {
@@ -1713,31 +1888,42 @@ cpu_enable_intr(struct cpu *cp)
 	psm_enable_intr(cp->cpu_id);
 }
 
-
-/*ARGSUSED*/
 void
 mp_cpu_faulted_enter(struct cpu *cp)
 {
-#ifndef __xpv
-	cmi_hdl_t hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
-	    cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
+#ifdef __xpv
+	_NOTE(ARGUNUSED(cp));
+#else
+	cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl;
 
 	if (hdl != NULL) {
+		cmi_hdl_hold(hdl);
+	} else {
+		hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
+		    cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
+	}
+	if (hdl != NULL) {
 		cmi_faulted_enter(hdl);
 		cmi_hdl_rele(hdl);
 	}
 #endif
 }
 
-/*ARGSUSED*/
 void
 mp_cpu_faulted_exit(struct cpu *cp)
 {
-#ifndef __xpv
-	cmi_hdl_t hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
-	    cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
+#ifdef __xpv
+	_NOTE(ARGUNUSED(cp));
+#else
+	cmi_hdl_t hdl = cp->cpu_m.mcpu_cmi_hdl;
 
 	if (hdl != NULL) {
+		cmi_hdl_hold(hdl);
+	} else {
+		hdl = cmi_hdl_lookup(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
+		    cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp));
+	}
+	if (hdl != NULL) {
 		cmi_faulted_exit(hdl);
 		cmi_hdl_rele(hdl);
 	}
diff --git a/usr/src/uts/i86pc/os/startup.c b/usr/src/uts/i86pc/os/startup.c
index b4b235cf5c..c0e9c2404b 100644
--- a/usr/src/uts/i86pc/os/startup.c
+++ b/usr/src/uts/i86pc/os/startup.c
@@ -22,6 +22,10 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 #include <sys/types.h>
 #include <sys/t_lock.h>
@@ -63,6 +67,7 @@
 
 #include <sys/dumphdr.h>
 #include <sys/bootconf.h>
+#include <sys/memlist_plat.h>
 #include <sys/varargs.h>
 #include <sys/promif.h>
 #include <sys/modctl.h>
@@ -116,7 +121,7 @@
 #include <sys/systeminfo.h>
 #include <sys/multiboot.h>
 
-#ifdef __xpv
+#ifdef	__xpv
 
 #include <sys/hypervisor.h>
 #include <sys/xen_mmu.h>
@@ -130,6 +135,10 @@ extern void xen_late_startup(void);
 
 struct xen_evt_data cpu0_evt_data;
 
+#else	/* __xpv */
+#include <sys/memlist_impl.h>
+
+extern void mem_config_init(void);
 #endif /* __xpv */
 
 extern void progressbar_init(void);
@@ -315,6 +324,17 @@ pgcnt_t segkpsize = 0;
 pgcnt_t segziosize = 0;		/* size of zio segment in pages */
 
 /*
+ * A static DR page_t VA map is reserved that can map the page structures
+ * for a domain's entire RA space. The pages that back this space are
+ * dynamically allocated and need not be physically contiguous.  The DR
+ * map size is derived from KPM size.
+ * This mechanism isn't used by x86 yet, so just stubs here.
+ */
+int ppvm_enable = 0;		/* Static virtual map for page structs */
+page_t *ppvm_base = NULL;	/* Base of page struct map */
+pgcnt_t ppvm_size = 0;		/* Size of page struct map */
+
+/*
  * VA range available to the debugger
  */
 const caddr_t kdi_segdebugbase = (const caddr_t)SEGDEBUGBASE;
@@ -965,20 +985,38 @@ startup_memlist(void)
 	if (prom_debug)
 		print_memlist("boot physinstalled",
 		    bootops->boot_mem->physinstalled);
-	installed_top_size(bootops->boot_mem->physinstalled, &physmax,
+	installed_top_size_ex(bootops->boot_mem->physinstalled, &physmax,
 	    &physinstalled, &memblocks);
 	PRM_DEBUG(physmax);
 	PRM_DEBUG(physinstalled);
 	PRM_DEBUG(memblocks);
 
 	/*
+	 * Compute maximum physical address for memory DR operations.
+	 * Memory DR operations are unsupported on xpv or 32bit OSes.
+	 */
+#ifdef	__amd64
+	if (plat_dr_support_memory()) {
+		if (plat_dr_physmax == 0) {
+			uint_t pabits = UINT_MAX;
+
+			cpuid_get_addrsize(CPU, &pabits, NULL);
+			plat_dr_physmax = btop(1ULL << pabits);
+		}
+		if (plat_dr_physmax > PHYSMEM_MAX64)
+			plat_dr_physmax = PHYSMEM_MAX64;
+	} else
+#endif
+		plat_dr_physmax = 0;
+
+	/*
 	 * Examine the bios reserved memory to find out:
 	 * - the number of discontiguous segments of memory.
 	 */
 	if (prom_debug)
 		print_memlist("boot reserved mem",
 		    bootops->boot_mem->rsvdmem);
-	installed_top_size(bootops->boot_mem->rsvdmem, &rsvd_high_pfn,
+	installed_top_size_ex(bootops->boot_mem->rsvdmem, &rsvd_high_pfn,
 	    &rsvd_pgcnt, &rsvdmemblocks);
 	PRM_DEBUG(rsvd_high_pfn);
 	PRM_DEBUG(rsvd_pgcnt);
@@ -1137,9 +1175,14 @@ startup_memlist(void)
 	 * memory is at addresses above 1 TB. When adjusted, segkpm_base must
 	 * be aligned on KERNEL_REDZONE_SIZE boundary (span of top level pte).
 	 */
-	if (physmax + 1 > mmu_btop(TERABYTE)) {
+	if (physmax + 1 > mmu_btop(TERABYTE) ||
+	    plat_dr_physmax > mmu_btop(TERABYTE)) {
 		uint64_t kpm_resv_amount = mmu_ptob(physmax + 1);
 
+		if (kpm_resv_amount < mmu_ptob(plat_dr_physmax)) {
+			kpm_resv_amount = mmu_ptob(plat_dr_physmax);
+		}
+
 		segkpm_base = -(P2ROUNDUP((2 * kpm_resv_amount),
 		    KERNEL_REDZONE_SIZE));	/* down from top VA */
 
@@ -1186,6 +1229,16 @@ startup_memlist(void)
 		panic("physavail was too big!");
 	if (prom_debug)
 		print_memlist("phys_avail", phys_avail);
+#ifndef	__xpv
+	/*
+	 * Free unused memlist items, which may be used by memory DR driver
+	 * at runtime.
+	 */
+	if ((caddr_t)current < (caddr_t)memlist + memlist_sz) {
+		memlist_free_block((caddr_t)current,
+		    (caddr_t)memlist + memlist_sz - (caddr_t)current);
+	}
+#endif
 
 	/*
 	 * Build bios reserved memspace
@@ -1196,6 +1249,16 @@ startup_memlist(void)
 		panic("bios_rsvd was too big!");
 	if (prom_debug)
 		print_memlist("bios_rsvd", bios_rsvd);
+#ifndef	__xpv
+	/*
+	 * Free unused memlist items, which may be used by memory DR driver
+	 * at runtime.
+	 */
+	if ((caddr_t)current < (caddr_t)bios_rsvd + rsvdmemlist_sz) {
+		memlist_free_block((caddr_t)current,
+		    (caddr_t)bios_rsvd + rsvdmemlist_sz - (caddr_t)current);
+	}
+#endif
 
 	/*
 	 * setup page coloring
@@ -1391,7 +1454,11 @@ startup_kmem(void)
 	}
 #endif
 
-#ifdef __xpv
+#ifndef __xpv
+	if (plat_dr_support_memory()) {
+		mem_config_init();
+	}
+#else	/* __xpv */
 	/*
 	 * Some of the xen start information has to be relocated up
 	 * into the kernel's permanent address space.
@@ -1406,7 +1473,7 @@ startup_kmem(void)
 	 */
 	CPU->cpu_m.mcpu_vcpu_info =
 	    &HYPERVISOR_shared_info->vcpu_info[CPU->cpu_id];
-#endif
+#endif	/* __xpv */
 
 	PRM_POINT("startup_kmem() done");
 }
@@ -1591,8 +1658,10 @@ startup_modules(void)
 	if ((get_hwenv() != HW_XEN_HVM) &&
 	    (hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(CPU),
 	    cmi_ntv_hwcoreid(CPU), cmi_ntv_hwstrandid(CPU))) != NULL &&
-	    (x86_feature & X86_MCA))
+	    (x86_feature & X86_MCA)) {
 			cmi_mca_init(hdl);
+			CPU->cpu_m.mcpu_cmi_hdl = hdl;
+	}
 #endif	/* __xpv */
 
 	/*
@@ -1701,7 +1770,11 @@ layout_kernel_va(void)
 #if defined(__amd64)
 
 	kpm_vbase = (caddr_t)segkpm_base;
-	kpm_size = ROUND_UP_LPAGE(mmu_ptob(physmax + 1));
+	if (physmax + 1 < plat_dr_physmax) {
+		kpm_size = ROUND_UP_LPAGE(mmu_ptob(plat_dr_physmax));
+	} else {
+		kpm_size = ROUND_UP_LPAGE(mmu_ptob(physmax + 1));
+	}
 	if ((uintptr_t)kpm_vbase + kpm_size > (uintptr_t)valloc_base)
 		panic("not enough room for kpm!");
 	PRM_DEBUG(kpm_size);
@@ -2342,10 +2415,6 @@ release_bootstrap(void)
 		rm_platter_va = i86devmap(pfn, 1,
 		    PROT_READ | PROT_WRITE | PROT_EXEC);
 		rm_platter_pa = ptob(pfn);
-		hat_devload(kas.a_hat,
-		    (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
-		    pfn, PROT_READ | PROT_WRITE | PROT_EXEC,
-		    HAT_LOAD_NOCONSIST);
 		break;
 	}
 	if (pfn == btop(1*1024*1024) && use_mp)
diff --git a/usr/src/uts/i86pc/os/x_call.c b/usr/src/uts/i86pc/os/x_call.c
index 33b75c1a5a..17d5123508 100644
--- a/usr/src/uts/i86pc/os/x_call.c
+++ b/usr/src/uts/i86pc/os/x_call.c
@@ -22,6 +22,10 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -186,11 +190,11 @@ xc_extract(xc_msg_t **queue)
 	return (old_head);
 }
 
-
 /*
  * Initialize the machcpu fields used for cross calls
  */
 static uint_t xc_initialized = 0;
+
 void
 xc_init_cpu(struct cpu *cpup)
 {
@@ -198,34 +202,100 @@ xc_init_cpu(struct cpu *cpup)
 	int c;
 
 	/*
-	 * add a new msg to each existing CPU's free list, as well as one for
-	 * my list for each of them. ncpus has an inconsistent value when this
-	 * function is called, so use cpup->cpu_id.
+	 * Allocate message buffers for the new CPU.
 	 */
-	for (c = 0; c < cpup->cpu_id; ++c) {
-		if (cpu[c] == NULL)
-			continue;
-		msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
-		msg->xc_command = XC_MSG_FREE;
-		msg->xc_master = c;
-		xc_insert(&cpu[c]->cpu_m.xc_free, msg);
+	for (c = 0; c < max_ncpus; ++c) {
+		if (plat_dr_support_cpu()) {
+			/*
+			 * Allocate a message buffer for every CPU possible
+			 * in system, including our own, and add them to our xc
+			 * message queue.
+			 */
+			msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
+			msg->xc_command = XC_MSG_FREE;
+			msg->xc_master = cpup->cpu_id;
+			xc_insert(&cpup->cpu_m.xc_free, msg);
+		} else if (cpu[c] != NULL && cpu[c] != cpup) {
+			/*
+			 * Add a new message buffer to each existing CPU's free
+			 * list, as well as one for my list for each of them.
+			 * Note: cpu0 is statically inserted into cpu[] array,
+			 * so need to check cpu[c] isn't cpup itself to avoid
+			 * allocating extra message buffers for cpu0.
+			 */
+			msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
+			msg->xc_command = XC_MSG_FREE;
+			msg->xc_master = c;
+			xc_insert(&cpu[c]->cpu_m.xc_free, msg);
+
+			msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
+			msg->xc_command = XC_MSG_FREE;
+			msg->xc_master = cpup->cpu_id;
+			xc_insert(&cpup->cpu_m.xc_free, msg);
+		}
+	}
 
+	if (!plat_dr_support_cpu()) {
+		/*
+		 * Add one for self messages if CPU hotplug is disabled.
+		 */
 		msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
 		msg->xc_command = XC_MSG_FREE;
 		msg->xc_master = cpup->cpu_id;
 		xc_insert(&cpup->cpu_m.xc_free, msg);
 	}
 
+	if (!xc_initialized)
+		xc_initialized = 1;
+}
+
+void
+xc_fini_cpu(struct cpu *cpup)
+{
+	xc_msg_t *msg;
+
+	ASSERT((cpup->cpu_flags & CPU_READY) == 0);
+	ASSERT(cpup->cpu_m.xc_msgbox == NULL);
+	ASSERT(cpup->cpu_m.xc_work_cnt == 0);
+
+	while ((msg = xc_extract(&cpup->cpu_m.xc_free)) != NULL) {
+		kmem_free(msg, sizeof (*msg));
+	}
+}
+
+#define	XC_FLUSH_MAX_WAITS		1000
+
+/* Flush inflight message buffers. */
+int
+xc_flush_cpu(struct cpu *cpup)
+{
+	int i;
+
+	ASSERT((cpup->cpu_flags & CPU_READY) == 0);
+
 	/*
-	 * Add one for self messages
+	 * Pause all working CPUs, which ensures that there's no CPU in
+	 * function xc_common().
+	 * This is used to work around a race condition window in xc_common()
+	 * between checking CPU_READY flag and increasing working item count.
 	 */
-	msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
-	msg->xc_command = XC_MSG_FREE;
-	msg->xc_master = cpup->cpu_id;
-	xc_insert(&cpup->cpu_m.xc_free, msg);
+	pause_cpus(cpup);
+	start_cpus();
 
-	if (!xc_initialized)
-		xc_initialized = 1;
+	for (i = 0; i < XC_FLUSH_MAX_WAITS; i++) {
+		if (cpup->cpu_m.xc_work_cnt == 0) {
+			break;
+		}
+		DELAY(1);
+	}
+	for (; i < XC_FLUSH_MAX_WAITS; i++) {
+		if (!BT_TEST(xc_priority_set, cpup->cpu_id)) {
+			break;
+		}
+		DELAY(1);
+	}
+
+	return (i >= XC_FLUSH_MAX_WAITS ? ETIME : 0);
 }
 
 /*
@@ -416,7 +486,7 @@ xc_common(
 	 * Post messages to all CPUs involved that are CPU_READY
 	 */
 	CPU->cpu_m.xc_wait_cnt = 0;
-	for (c = 0; c < ncpus; ++c) {
+	for (c = 0; c < max_ncpus; ++c) {
 		if (!BT_TEST(set, c))
 			continue;
 		cpup = cpu[c];
@@ -487,7 +557,7 @@ xc_priority_common(
 	/*
 	 * Wait briefly for any previous xc_priority to have finished.
 	 */
-	for (c = 0; c < ncpus; ++c) {
+	for (c = 0; c < max_ncpus; ++c) {
 		cpup = cpu[c];
 		if (cpup == NULL || !(cpup->cpu_flags & CPU_READY))
 			continue;
@@ -527,7 +597,7 @@ xc_priority_common(
 	 * Post messages to all CPUs involved that are CPU_READY
 	 * We'll always IPI, plus bang on the xc_msgbox for i86_mwait()
 	 */
-	for (c = 0; c < ncpus; ++c) {
+	for (c = 0; c < max_ncpus; ++c) {
 		if (!BT_TEST(set, c))
 			continue;
 		cpup = cpu[c];
diff --git a/usr/src/uts/i86pc/sys/Makefile b/usr/src/uts/i86pc/sys/Makefile
index 11ebb58187..7fa9f8f6ef 100644
--- a/usr/src/uts/i86pc/sys/Makefile
+++ b/usr/src/uts/i86pc/sys/Makefile
@@ -60,6 +60,7 @@ HDRS=  \
 	psm_types.h	\
 	rm_platter.h	\
 	smp_impldefs.h	\
+	sbd_ioctl.h	\
 	vm_machparam.h	\
 	x_call.h	\
 	xc_levels.h	\
diff --git a/usr/src/uts/i86pc/sys/acpidev.h b/usr/src/uts/i86pc/sys/acpidev.h
index e0fbaa4999..6d11277aaf 100644
--- a/usr/src/uts/i86pc/sys/acpidev.h
+++ b/usr/src/uts/i86pc/sys/acpidev.h
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -28,8 +28,10 @@
 #include <sys/types.h>
 #include <sys/obpdefs.h>
 #include <sys/sunddi.h>
+#ifdef	_KERNEL
 #include <sys/acpi/acpi.h>
 #include <sys/acpica.h>
+#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -55,23 +57,19 @@ extern "C" {
 #define	ACPIDEV_HID_CPU			"ACPI0007"
 #define	ACPIDEV_HID_PCI_HOSTBRIDGE	"PNP0A03"
 #define	ACPIDEV_HID_PCIE_HOSTBRIDGE	"PNP0A08"
+#define	ACPIDEV_HID_PCIEX_HOSTBRIDGE	"PNP0A08"
 #define	ACPIDEV_HID_MEMORY		"PNP0C80"
 
-/* Common ACPI object names. */
-#define	ACPIDEV_OBJECT_NAME_SB		ACPI_NS_SYSTEM_BUS
-#define	ACPIDEV_OBJECT_NAME_PR		"_PR_"
-
-/* Common ACPI method names. */
-#define	ACPIDEV_METHOD_NAME_MAT		"_MAT"
-
 /* Device names for ACPI objects. */
 #define	ACPIDEV_NODE_NAME_ROOT		"fw"
+#define	ACPIDEV_NODE_NAME_ACPIDR	"acpidr"
 #define	ACPIDEV_NODE_NAME_CONTAINER	"container"
 #define	ACPIDEV_NODE_NAME_MODULE_SBD	"sb"
 #define	ACPIDEV_NODE_NAME_MODULE_CPU	"socket"
 #define	ACPIDEV_NODE_NAME_CPU		"cpu"
 #define	ACPIDEV_NODE_NAME_PROCESSOR	"cpus"
 #define	ACPIDEV_NODE_NAME_MEMORY	"mem"
+#define	ACPIDEV_NODE_NAME_PCI		"pci"
 
 /* Device types for ACPI objects. */
 #define	ACPIDEV_TYPE_ROOTNEX		"acpirootnex"
@@ -81,15 +79,31 @@ extern "C" {
 #define	ACPIDEV_TYPE_CONTAINER		"acpicontainer"
 #define	ACPIDEV_TYPE_CPU		"acpicpu"
 #define	ACPIDEV_TYPE_MEMORY		"acpimemory"
+#define	ACPIDEV_TYPE_PCI		"pci"
+#define	ACPIDEV_TYPE_PCIEX		"pciex"
 
 /* Device property names for ACPI objects. */
 #define	ACPIDEV_PROP_NAME_UNIT_ADDR	"unit-address"
 #define	ACPIDEV_PROP_NAME_ACPI_UID	"acpi-uid"
 #define	ACPIDEV_PROP_NAME_PROCESSOR_ID	"acpi-processor-id"
 #define	ACPIDEV_PROP_NAME_LOCALAPIC_ID	"apic-id"
+#define	ACPIDEV_PROP_NAME_PROXIMITY_ID	"proximity-id"
 
 #define	ACPIDEV_PROP_NAME_UID_FORMAT	"acpidev-uid-format"
 
+/* Miscellaneous strings. */
+#define	ACPIDEV_CMD_OST_PREFIX		"acpi-update-status"
+#define	ACPIDEV_CMD_OST_INPROGRESS	"acpi-update-status=inprogress"
+#define	ACPIDEV_CMD_OST_SUCCESS		"acpi-update-status=success"
+#define	ACPIDEV_CMD_OST_FAILURE		"acpi-update-status=failure"
+#define	ACPIDEV_CMD_OST_NOOP		"acpi-update-status=noop"
+
+#define	ACPIDEV_EVENT_TYPE_ATTR_NAME	"acpi-event-type"
+#define	ACPIDEV_EVENT_TYPE_BUS_CHECK	"bus_check"
+#define	ACPIDEV_EVENT_TYPE_DEVICE_CHECK	"device_check"
+#define	ACPIDEV_EVENT_TYPE_DEVICE_CHECK_LIGHT	"device_check_light"
+#define	ACPIDEV_EVENT_TYPE_EJECT_REQUEST	"eject_request"
+
 /* ACPI device class Id. */
 typedef enum acpidev_class_id {
 	ACPIDEV_CLASS_ID_INVALID = 0,
@@ -99,6 +113,8 @@ typedef enum acpidev_class_id {
 	ACPIDEV_CLASS_ID_CONTAINER = 4,
 	ACPIDEV_CLASS_ID_CPU = 5,
 	ACPIDEV_CLASS_ID_MEMORY = 6,
+	ACPIDEV_CLASS_ID_PCI = 7,
+	ACPIDEV_CLASS_ID_PCIEX = 8,
 	ACPIDEV_CLASS_ID_MAX
 } acpidev_class_id_t;
 
@@ -106,9 +122,88 @@ typedef enum acpidev_class_id {
 #define	ACPIDEV_OUSER_NO_CPU		0x1
 #define	ACPIDEV_OUSER_NO_MEM		0x2
 #define	ACPIDEV_OUSER_NO_CONTAINER	0x4
+#define	ACPIDEV_OUSER_NO_PCI		0x8
+#define	ACPIDEV_OUSER_NO_CACHE		0x10000
 
 #ifdef	_KERNEL
 
+/* Common ACPI object names. */
+#define	ACPIDEV_OBJECT_NAME_SB		ACPI_NS_SYSTEM_BUS
+#define	ACPIDEV_OBJECT_NAME_PR		"_PR_"
+
+/* Common ACPI method names. */
+#define	ACPIDEV_METHOD_NAME_MAT		"_MAT"
+#define	ACPIDEV_METHOD_NAME_EJ0		"_EJ0"
+#define	ACPIDEV_METHOD_NAME_EDL		"_EDL"
+#define	ACPIDEV_METHOD_NAME_EJD		"_EJD"
+#define	ACPIDEV_METHOD_NAME_OST		"_OST"
+#define	ACPIDEV_METHOD_NAME_PXM		"_PXM"
+#define	ACPIDEV_METHOD_NAME_SLI		"_SLI"
+
+/* Source event code for _OST. */
+#define	ACPI_OST_EVENT_EJECTING		0x103
+#define	ACPI_OST_EVENT_INSERTING	0x200
+
+/* Status code for _OST. */
+#define	ACPI_OST_STA_SUCCESS		0x0
+
+/* Non-specific failure. */
+#define	ACPI_OST_STA_FAILURE		0x1
+
+/* Unrecognized Notify Code. */
+#define	ACPI_OST_STA_NOT_SUPPORT	0x2
+
+/* Device ejection not supported by OSPM. */
+#define	ACPI_OST_STA_EJECT_NOT_SUPPORT	0x80
+
+/* Device in use by application. */
+#define	ACPI_OST_STA_EJECT_IN_USE	0x81
+
+/* Device Busy. */
+#define	ACPI_OST_STA_EJECT_BUSY		0x82
+
+/* Ejection dependency is busy or not supported for ejection by OSPM. */
+#define	ACPI_OST_STA_EJECT_DEPENDENCY	0x83
+
+/* Ejection is in progress (pending). */
+#define	ACPI_OST_STA_EJECT_IN_PROGRESS	0x84
+
+/* Device insertion in progress (pending). */
+#define	ACPI_OST_STA_INSERT_IN_PROGRESS	0x80
+
+/* Device driver load failure. */
+#define	ACPI_OST_STA_INSERT_DRIVER	0x81
+
+/* Device insertion not supported by OSPM. */
+#define	ACPI_OST_STA_INSERT_NOT_SUPPORT	0x82
+
+/*
+ * Insertion failure
+ * Resources Unavailable as described by the following bit encodings:
+ * Bit[3] Bus Numbers
+ * Bit[2] Interrupts
+ * Bit[1] I/O
+ * Bit[0] Memory
+ */
+#define	ACPI_OST_STA_INSERT_NO_RESOURCE	0x90
+#define	ACPI_OST_STA_INSERT_NO_BUS	0x8
+#define	ACPI_OST_STA_INSERT_NO_INTR	0x4
+#define	ACPI_OST_STA_INSERT_NO_IO	0x2
+#define	ACPI_OST_STA_INSERT_NO_MEM	0x1
+
+/*
+ * According to the ACPI specification, self latency (entry[n][n]) in the
+ * SLIT table should be 10.
+ */
+#define	ACPI_SLIT_SELF_LATENCY		10
+
+/*
+ * The DR driver assigns a unique device id for each hot-added memory device.
+ * ACPI_MEMNODE_DEVID_BOOT is assigned to memory devices present at boot,
+ * which is distinguished from device ids assigned by the DR driver.
+ */
+#define	ACPI_MEMNODE_DEVID_BOOT		UINT32_MAX
+
 /* Forward declaration */
 typedef	struct acpidev_data_impl	*acpidev_data_handle_t;
 typedef struct acpidev_walk_info	acpidev_walk_info_t;
@@ -197,7 +292,7 @@ typedef ACPI_STATUS (* acpidev_probe_t)(acpidev_walk_info_t *);
 typedef acpidev_filter_result_t (* acpidev_filter_t)(acpidev_walk_info_t *,
     char *, int);
 typedef ACPI_STATUS (* acpidev_init_t)(acpidev_walk_info_t *);
-typedef ACPI_STATUS (* acpidev_fini_t)(ACPI_HANDLE, dev_info_t *,
+typedef void (* acpidev_fini_t)(ACPI_HANDLE, acpidev_data_handle_t,
     acpidev_class_t *);
 
 /* Device class driver interface. */
@@ -237,6 +332,7 @@ extern acpidev_class_t			acpidev_class_device;
 extern acpidev_class_t			acpidev_class_container;
 extern acpidev_class_t			acpidev_class_cpu;
 extern acpidev_class_t			acpidev_class_memory;
+extern acpidev_class_t			acpidev_class_pci;
 
 /*
  * Class driver lists.
@@ -309,6 +405,7 @@ extern ACPI_STATUS acpidev_process_object(acpidev_walk_info_t *infop,
 #define	ACPIDEV_PROCESS_FLAG_OFFLINE	0x400	/* Put device into offline. */
 #define	ACPIDEV_PROCESS_FLAG_NOTAG	0x800	/* Skip tag dip with object. */
 #define	ACPIDEV_PROCESS_FLAG_SYNCSTATUS	0x1000	/* Sync object status. */
+#define	ACPIDEV_PROCESS_FLAG_HOLDBRANCH	0x10000	/* Hold device branch. */
 
 /*
  * Filter ACPI objects according to filter rules, generate devname if needed.
@@ -346,10 +443,16 @@ extern void acpidev_data_destroy_handle(ACPI_HANDLE hdl);
 extern ACPI_HANDLE acpidev_data_get_object(acpidev_data_handle_t hdl);
 extern dev_info_t *acpidev_data_get_devinfo(acpidev_data_handle_t hdl);
 extern int acpidev_data_get_status(acpidev_data_handle_t hdl);
+extern boolean_t acpidev_data_dr_capable(acpidev_data_handle_t hdl);
+extern boolean_t acpidev_data_dr_ready(acpidev_data_handle_t hdl);
+extern boolean_t acpidev_data_dr_failed(acpidev_data_handle_t hdl);
 extern void acpidev_data_set_flag(acpidev_data_handle_t hdl, uint32_t flag);
 extern void acpidev_data_clear_flag(acpidev_data_handle_t hdl, uint32_t flag);
 extern uint32_t acpidev_data_get_flag(acpidev_data_handle_t hdl, uint32_t flag);
 
+/* ACPI system event handler has been registered. */
+#define	ACPIDEV_DATA_HANDLER_READY	0x1
+
 /*
  * Try to generate meaningful device unit address from uid.
  * Return buf on success and NULL on failure.
@@ -430,6 +533,26 @@ typedef ACPI_STATUS (* acpidev_apic_walker_t)(ACPI_SUBTABLE_HEADER *, void *);
 extern ACPI_STATUS acpidev_walk_apic(ACPI_BUFFER *bufp, ACPI_HANDLE hdl,
     char *method, acpidev_apic_walker_t func, void *context);
 
+/*
+ * Evaluate _OST method under object, which is used to support hotplug event.
+ * hdl: object handle
+ * code: _OST source event code
+ * stauts: _OST result status code
+ * bufp and len: optional third parameter for _OST.
+ */
+extern ACPI_STATUS acpidev_eval_ost(ACPI_HANDLE hdl, uint32_t code,
+    uint32_t status, char *bufp, size_t len);
+
+/*
+ * Evaluate _EJ0 method under object.
+ */
+extern ACPI_STATUS acpidev_eval_ej0(ACPI_HANDLE hdl);
+
+/*
+ * Evaluate _PXM method under object.
+ */
+extern ACPI_STATUS acpidev_eval_pxm(ACPI_HANDLE hdl, uint32_t *idp);
+
 #endif	/* _KERNEL */
 
 #ifdef __cplusplus
diff --git a/usr/src/uts/i86pc/sys/acpidev_dr.h b/usr/src/uts/i86pc/sys/acpidev_dr.h
new file mode 100644
index 0000000000..6219f98bf1
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/acpidev_dr.h
@@ -0,0 +1,252 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+/*
+ * Interfaces to support System Board Dynamic Reconfiguration.
+ */
+
+#ifndef	_SYS_ACPIDEV_DR_H
+#define	_SYS_ACPIDEV_DR_H
+#include <sys/types.h>
+#include <sys/obpdefs.h>
+#include <sys/cpuvar.h>
+#include <sys/memlist.h>
+#include <sys/sunddi.h>
+#include <sys/acpi/acpi.h>
+#include <sys/acpica.h>
+#include <sys/acpidev.h>
+#include <sys/acpidev_rsc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef	_KERNEL
+
+/* Maximum number of DR capable system boards supported. */
+#define	ACPIDEV_DR_MAX_BOARDS		0x40
+#define	ACPIDEV_DR_SEGS_PER_MEM_DEV	0x10
+#define	ACPIDEV_DR_MEMLISTS_PER_SEG	0x10
+#define	ACPIDEV_DR_MAX_MEMLIST_ENTRIES	0x10000
+
+#define	ACPIDEV_DR_PROP_PORTID		"portid"
+#define	ACPIDEV_DR_PROP_BOARDNUM	OBP_BOARDNUM
+#define	ACPIDEV_DR_PROP_DEVNAME		OBP_NAME
+
+/*
+ * Format strings for DR capable system boards.
+ * They will be used as attachment point names.
+ */
+#define	ACPIDEV_DR_CPU_BD_FMT		"CPU%u"
+#define	ACPIDEV_DR_MEMORY_BD_FMT	"MEM%u"
+#define	ACPIDEV_DR_IO_BD_FMT		"IO%u"
+#define	ACPIDEV_DR_SYSTEM_BD_FMT	"SB%u"
+
+typedef enum {
+	ACPIDEV_INVALID_BOARD = 0,
+	ACPIDEV_CPU_BOARD,
+	ACPIDEV_MEMORY_BOARD,
+	ACPIDEV_IO_BOARD,
+	ACPIDEV_SYSTEM_BOARD
+} acpidev_board_type_t;
+
+/* Check whether the system is DR capable. */
+extern int acpidev_dr_capable(void);
+
+extern uint32_t acpidev_dr_max_boards(void);
+extern uint32_t acpidev_dr_max_mem_units_per_board(void);
+extern uint32_t acpidev_dr_max_io_units_per_board(void);
+extern uint32_t acpidev_dr_max_cmp_units_per_board(void);
+extern uint32_t acpidev_dr_max_cpu_units_per_cmp(void);
+extern uint32_t acpidev_dr_max_segments_per_mem_device(void);
+extern uint32_t acpidev_dr_max_memlists_per_segment(void);
+extern ACPI_STATUS acpidev_dr_get_mem_alignment(ACPI_HANDLE hdl, uint64_t *ap);
+
+/* Initialize support of DR operations. */
+extern void acpidev_dr_init(void);
+
+/* Scan for DR capable boards and setup environment for DR operations. */
+extern void acpidev_dr_check(acpidev_walk_info_t *infop);
+
+/*
+ * Initialize DR interfaces to enable DR operations.
+ */
+extern ACPI_STATUS acpidev_dr_initialize(dev_info_t *pdip);
+
+/* Get ACPI handle of the DR capable board. */
+extern ACPI_STATUS acpidev_dr_get_board_handle(uint_t board,
+    ACPI_HANDLE *hdlp);
+
+/* Get board type of the DR capable board. */
+extern acpidev_board_type_t acpidev_dr_get_board_type(ACPI_HANDLE hdl);
+
+/* Get board number of the DR capable board. */
+extern ACPI_STATUS acpidev_dr_get_board_number(ACPI_HANDLE hdl,
+    uint32_t *bnump);
+
+/* Get board name of the DR capable board. */
+extern ACPI_STATUS acpidev_dr_get_board_name(ACPI_HANDLE hdl,
+    char *buf, size_t len);
+
+/* Get attachment point of the DR capable board. */
+extern ACPI_STATUS acpidev_dr_get_attachment_point(ACPI_HANDLE hdl,
+    char *buf, size_t len);
+
+/*
+ * Figure out device type of the object/device.
+ * It only supports device types which may be involved in DR operations.
+ */
+extern acpidev_class_id_t acpidev_dr_device_get_class(ACPI_HANDLE hdl);
+
+/* Get memory device index/id. */
+extern ACPI_STATUS acpidev_dr_device_get_memory_index(ACPI_HANDLE hdl,
+    uint32_t *idxp);
+
+/* Check whether the device is a DR capable board or not. */
+extern int acpidev_dr_device_is_board(ACPI_HANDLE hdl);
+
+/* Check whether the device is present or not. */
+extern int acpidev_dr_device_is_present(ACPI_HANDLE hdl);
+
+/* Check whether the device is powered-on or not. */
+extern int acpidev_dr_device_is_powered(ACPI_HANDLE hdl);
+
+/* Check whether the device is DR capable. */
+extern int acpidev_dr_device_hotplug_capable(ACPI_HANDLE hdl);
+
+/* Check whether the device has an eject device list. */
+extern int acpidev_dr_device_has_edl(ACPI_HANDLE hdl);
+
+/*
+ * Simulate OBP property interfaces to support drmach driver,
+ * so we can keep drmach in consistency with SPARC version.
+ * Return size of data copied to buf if it's big enough,
+ * otherwise return size of buffer needed.
+ */
+extern int acpidev_dr_device_getprop(ACPI_HANDLE hdl, char *name,
+    caddr_t buf, size_t len);
+
+/*
+ * Get "reg" or "assigned-address" property of the device.
+ * Return "assigned-address" property if assigned is non-zero,
+ * otherwise return "reg" property.
+ * Caller needs to release returned resources by calling
+ * acpidev_dr_device_free_regspec().
+ */
+extern ACPI_STATUS acpidev_dr_device_get_regspec(ACPI_HANDLE hdl,
+    boolean_t assigned, acpidev_regspec_t **regpp, uint_t *cntp);
+
+/* Free resources returned by acpidev_dr_device_get_regspec(). */
+extern void acpidev_dr_device_free_regspec(acpidev_regspec_t *regp,
+    uint_t count);
+
+/* Walk devices in eject device list (ACPI _EDL method). */
+extern ACPI_STATUS acpidev_dr_device_walk_edl(ACPI_HANDLE hdl,
+    ACPI_WALK_CALLBACK cb, void *arg, void **retval);
+
+/* Walk devices in eject dependency list (ACPI _EJD method). */
+extern ACPI_STATUS acpidev_dr_device_walk_ejd(ACPI_HANDLE hdl,
+    ACPI_WALK_CALLBACK cb, void *arg, void **retval);
+
+/*
+ * Walk child and dependent devices which may be involved in DR operations.
+ * PCI host bridges embedded in physical processors may be presented in eject
+ * device list instead of as children of processors.
+ */
+extern ACPI_STATUS acpidev_dr_device_walk_device(ACPI_HANDLE hdl,
+    uint_t max_lvl, ACPI_WALK_CALLBACK cb, void *arg, void **retval);
+
+/* Check whether the device is in working state without any error. */
+extern ACPI_STATUS acpidev_dr_device_check_status(ACPI_HANDLE hdl);
+
+/* Power on the device. */
+extern ACPI_STATUS acpidev_dr_device_poweron(ACPI_HANDLE hdl);
+
+/* Power off the device. */
+extern ACPI_STATUS acpidev_dr_device_poweroff(ACPI_HANDLE hdl);
+
+/*
+ * Create device nodes for hot-added devices under hdl.
+ * Return:
+ * AE_OK: on success
+ * AE_SUPPORT: if it's not capable of DR operation.
+ * AE_ERROR: for other errors
+ */
+extern ACPI_STATUS acpidev_dr_device_insert(ACPI_HANDLE hdl);
+
+/*
+ * Destroy device nodes to be removed under hdl.
+ * AE_OK: on success
+ * AE_SUPPORT: if it's not capable of DR operation.
+ * AE_ERROR: for other errors
+ */
+extern ACPI_STATUS acpidev_dr_device_remove(ACPI_HANDLE hdl);
+
+/* Block dynamic reconfiguration operations. */
+extern void acpidev_dr_lock_all(void);
+
+/* Unblock dynamic reconfiguration operations. */
+extern void acpidev_dr_unlock_all(void);
+
+extern ACPI_STATUS acpidev_dr_allocate_cpuid(ACPI_HANDLE hdl,
+    processorid_t *idp);
+extern ACPI_STATUS acpidev_dr_free_cpuid(ACPI_HANDLE hdl);
+
+/*
+ * Query NUMA relative information for the CPU device.
+ * It returns APIC id, Proximity id and latency information of the CPU device.
+ * Latency information is retrieved from the ACPI _SLI method or the ACPI SLIT
+ * table.
+ */
+extern int acpidev_dr_get_cpu_numa_info(cpu_t *cp, void **hdlpp,
+    uint32_t *apicidp, uint32_t *pxmidp, uint32_t *slicntp, uchar_t **slipp);
+
+/*
+ * Release resources allocated by acpidev_dr_get_cpu_numa_info().
+ */
+extern void acpidev_dr_free_cpu_numa_info(void *hdlp);
+
+/*
+ * Query NUMA relative information for a memory device.
+ * It returns proximity id and latency information of the memory device.
+ * Latency information is obtained from the ACPI _SLI method or the ACPI
+ * SLIT table.
+ */
+extern ACPI_STATUS acpidev_dr_get_mem_numa_info(ACPI_HANDLE hdl,
+    struct memlist *ml, void **hdlpp, uint32_t *pxmidp,
+    uint32_t *slicntp, uchar_t **slipp);
+
+/*
+ * Release resources allocated by acpidev_dr_get_mem_numa_info().
+ */
+extern void acpidev_dr_free_mem_numa_info(void *hdlp);
+
+#endif	/* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* _SYS_ACPIDEV_DR_H */
diff --git a/usr/src/uts/i86pc/sys/acpidev_impl.h b/usr/src/uts/i86pc/sys/acpidev_impl.h
index 666ab5578d..c01ea9f5cb 100644
--- a/usr/src/uts/i86pc/sys/acpidev_impl.h
+++ b/usr/src/uts/i86pc/sys/acpidev_impl.h
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -27,10 +27,13 @@
 #define	_SYS_ACPIDEV_IMPL_H
 #include <sys/types.h>
 #include <sys/cmn_err.h>
+#include <sys/bitmap.h>
+#include <sys/synch.h>
 #include <sys/sunddi.h>
 #include <sys/acpi/acpi.h>
 #include <sys/acpica.h>
 #include <sys/acpidev.h>
+#include <sys/acpidev_dr.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -54,12 +57,52 @@ struct acpidev_data_impl {
 	dev_info_t			*aod_dip;
 	acpidev_class_t			*aod_class;
 	acpidev_class_list_t		**aod_class_list;
+	acpidev_board_type_t		aod_bdtype;	/* Type of board. */
+	uint32_t			aod_bdnum;	/* Board # for DR. */
+	uint32_t			aod_portid;	/* Port id for DR. */
+	uint32_t			aod_bdidx;	/* Index # of AP */
+	volatile uint32_t		aod_chidx;	/* Index # of child */
+	uint32_t			aod_memidx;	/* Index # of memory */
+	acpidev_class_id_t		aod_class_id;	/* Dev type for DR. */
 };
 
 #define	ACPIDEV_ODF_STATUS_VALID	0x1
 #define	ACPIDEV_ODF_DEVINFO_CREATED	0x2
 #define	ACPIDEV_ODF_DEVINFO_TAGGED	0x4
-#define	ACPIDEV_ODF_DEVINFO_OFFLINE	0x8
+#define	ACPIDEV_ODF_HOTPLUG_CAPABLE	0x100
+#define	ACPIDEV_ODF_HOTPLUG_READY	0x200
+#define	ACPIDEV_ODF_HOTPLUG_FAILED	0x400
+
+#define	ACPIDEV_DR_IS_BOARD(hdl)	\
+	((hdl)->aod_iflag & ACPIDEV_ODF_HOTPLUG_CAPABLE)
+
+#define	ACPIDEV_DR_SET_BOARD(hdl)	\
+	(hdl)->aod_iflag |= ACPIDEV_ODF_HOTPLUG_CAPABLE
+
+#define	ACPIDEV_DR_IS_READY(hdl)	\
+	((hdl)->aod_iflag & ACPIDEV_ODF_HOTPLUG_READY)
+
+#define	ACPIDEV_DR_SET_READY(hdl)	\
+	(hdl)->aod_iflag |= ACPIDEV_ODF_HOTPLUG_READY
+
+#define	ACPIDEV_DR_IS_FAILED(hdl)	\
+	((hdl)->aod_iflag & ACPIDEV_ODF_HOTPLUG_FAILED)
+
+#define	ACPIDEV_DR_SET_FAILED(hdl)	\
+	(hdl)->aod_iflag |= ACPIDEV_ODF_HOTPLUG_FAILED
+
+#define	ACPIDEV_DR_IS_WORKING(hdl)	\
+	(((hdl)->aod_iflag & (ACPIDEV_ODF_HOTPLUG_READY | \
+	ACPIDEV_ODF_HOTPLUG_FAILED)) == ACPIDEV_ODF_HOTPLUG_READY)
+
+#define	ACPIDEV_DR_IS_PROCESSED(hdl)	\
+	((hdl)->aod_iflag & (ACPIDEV_ODF_HOTPLUG_READY | \
+	ACPIDEV_ODF_HOTPLUG_FAILED | ACPIDEV_ODF_HOTPLUG_CAPABLE))
+
+#define	ACPIDEV_DR_BOARD_READY(hdl)	\
+	(((hdl)->aod_iflag & \
+	(ACPIDEV_ODF_HOTPLUG_READY | ACPIDEV_ODF_HOTPLUG_CAPABLE)) == \
+	(ACPIDEV_ODF_HOTPLUG_READY | ACPIDEV_ODF_HOTPLUG_CAPABLE))
 
 /*
  * List of registered device class drivers.
@@ -83,6 +126,23 @@ typedef struct acpidev_pseudo_uid_head {
 	acpidev_pseudo_uid_t		*apuh_first;
 } acpidev_pseudo_uid_head_t;
 
+typedef struct acpidev_dr_capacity {
+	uint_t				cpu_vendor;
+	uint_t				cpu_family;
+	uint_t				cpu_model_min;
+	uint_t				cpu_model_max;
+	uint_t				cpu_step_min;
+	uint_t				cpu_step_max;
+	boolean_t			hotplug_supported;
+	uint64_t			memory_alignment;
+} acpidev_dr_capacity_t;
+
+extern int acpidev_dr_enable;
+extern krwlock_t acpidev_class_lock;
+extern ulong_t acpidev_object_type_mask[BT_BITOUL(ACPI_TYPE_NS_NODE_MAX + 1)];
+extern ACPI_TABLE_SRAT *acpidev_srat_tbl_ptr;
+extern ACPI_TABLE_SLIT *acpidev_slit_tbl_ptr;
+
 #endif	/* _KERNEL */
 
 #ifdef __cplusplus
diff --git a/usr/src/uts/i86pc/sys/acpinex.h b/usr/src/uts/i86pc/sys/acpinex.h
index be21c8e0e2..4aa6f19636 100644
--- a/usr/src/uts/i86pc/sys/acpinex.h
+++ b/usr/src/uts/i86pc/sys/acpinex.h
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -62,6 +62,10 @@ typedef struct {
 	char				ans_path[MAXPATHLEN];
 } acpinex_softstate_t;
 
+extern void acpinex_event_init(void);
+extern void acpinex_event_fini(void);
+extern int acpinex_event_scan(acpinex_softstate_t *, boolean_t);
+
 #endif	/* _KERNEL */
 
 #ifdef	__cplusplus
diff --git a/usr/src/uts/i86pc/sys/apic.h b/usr/src/uts/i86pc/sys/apic.h
index d0aea5d877..48500c4396 100644
--- a/usr/src/uts/i86pc/sys/apic.h
+++ b/usr/src/uts/i86pc/sys/apic.h
@@ -502,7 +502,7 @@ typedef struct	apic_irq {
  * adjust APIC_PADSZ as we add/modify any member of apic_cpus_info. We also
  * don't want the compiler to optimize apic_cpus_info.
  */
-#define	APIC_PADSZ	19
+#define	APIC_PADSZ	15
 
 #pragma	pack(1)
 typedef struct apic_cpus_info {
@@ -517,6 +517,7 @@ typedef struct apic_cpus_info {
 	uchar_t	aci_current[MAXIPL];	/* Current IRQ at each IPL */
 	uint32_t aci_bound;		/* # of user requested binds ? */
 	uint32_t aci_temp_bound;	/* # of non user IRQ binds */
+	uint32_t aci_processor_id;	/* Only used in ACPI mode. */
 	uchar_t	aci_idle;		/* The CPU is idle */
 	/*
 	 * Fill to make sure each struct is in separate 64-byte cache line.
@@ -527,6 +528,8 @@ typedef struct apic_cpus_info {
 
 #define	APIC_CPU_ONLINE		1
 #define	APIC_CPU_INTR_ENABLE	2
+#define	APIC_CPU_FREE		4	/* APIC CPU slot is free */
+#define	APIC_CPU_DIRTY		8	/* Slot was once used */
 
 /*
  * APIC ops to support various flavors of APIC like APIC and x2APIC.
@@ -784,6 +787,7 @@ extern int apic_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
     psm_intr_op_t intr_op, int *result);
 extern int apic_state(psm_state_request_t *);
 extern boolean_t apic_cpu_in_range(int cpu);
+extern processorid_t apic_find_next_cpu_intr(void);
 extern int apic_check_msi_support();
 extern apic_irq_t *apic_find_irq(dev_info_t *dip, struct intrspec *ispec,
     int type);
@@ -849,6 +853,7 @@ extern int apic_diff_for_redistribution;
 extern int apic_poweroff_method;
 extern int apic_enable_acpi;
 extern int apic_nproc;
+extern int apic_max_nproc;
 extern int apic_next_bind_cpu;
 extern int apic_redistribute_sample_interval;
 extern int apic_multi_msi_enable;
diff --git a/usr/src/uts/i86pc/sys/dr.h b/usr/src/uts/i86pc/sys/dr.h
new file mode 100644
index 0000000000..492eaeaae9
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/dr.h
@@ -0,0 +1,500 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_DR_H
+#define	_SYS_DR_H
+#include <sys/types.h>
+#include <sys/note.h>
+#include <sys/processor.h>
+#include <sys/obpdefs.h>
+#include <sys/memlist.h>
+#include <sys/mem_config.h>
+#include <sys/param.h>			/* for MAXPATHLEN */
+#include <sys/varargs.h>
+#include <sys/sbd_ioctl.h>
+#include <sys/dr_util.h>
+#include <sys/drmach.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * helper macros for constructing and reporting internal error messages.
+ * NOTE: each module which uses one or more this these macros is expected
+ * to supply a char *dr_ie_fmt string containing the SCCS filename
+ * expansion macro (percent M percent) and a sprintf %d to render the
+ * line number argument.
+ */
+#define	DR_INTERNAL_ERROR(hp)				\
+	drerr_new(1, ESBD_INTERNAL, dr_ie_fmt, __LINE__)
+
+#define	DR_OP_INTERNAL_ERROR(hp)			\
+	drerr_set_c(CE_WARN, &(hp)->h_err,		\
+		ESBD_INTERNAL, dr_ie_fmt, __LINE__)
+
+#define	DR_DEV_INTERNAL_ERROR(cp)			\
+	drerr_set_c(CE_WARN, &(cp)->sbdev_error,	\
+		ESBD_INTERNAL, dr_ie_fmt, __LINE__)
+
+/*
+ * Macros for keeping an error code and an associated list of integers.
+ */
+#define	DR_MAX_ERR_INT		(32)
+#define	DR_GET_E_CODE(sep)	((sep)->e_code)
+#define	DR_SET_E_CODE(sep, en)	((sep)->e_code = (en))
+#define	DR_GET_E_RSC(sep)	((sep)->e_rsc)
+
+/* Number of device node types. */
+#define	DR_MAXNUM_NT		3
+
+/* used to map sbd_comp_type_t to array index */
+#define	DEVSET_NIX(t)					\
+	(((t) == SBD_COMP_CPU) ? 0 :			\
+	((t) == SBD_COMP_MEM) ? 1 :			\
+	((t) == SBD_COMP_IO) ? 2 :			\
+	((t) == SBD_COMP_CMP) ? 0 : DR_MAXNUM_NT)
+
+/*
+ * Format of dr_devset_t bit masks:
+ *
+ * 64    56        48        40        32        24        16        8         0
+ *  |....|IIII|IIII|IIII|IIII|MMMM|MMMM|CCCC|CCCC|CCCC|CCCC|CCCC|CCCC|CCCC|CCCC|
+ *
+ * 1 = indicates respective component present/attached.
+ * I = I/O, M = Memory, C = CPU.
+ */
+#define	DEVSET_CPU_OFFSET	0
+#define	DEVSET_CPU_NUMBER	32
+#define	DEVSET_MEM_OFFSET	(DEVSET_CPU_OFFSET + DEVSET_CPU_NUMBER)
+#define	DEVSET_MEM_NUMBER	8
+#define	DEVSET_IO_OFFSET	(DEVSET_MEM_OFFSET + DEVSET_MEM_NUMBER)
+#define	DEVSET_IO_NUMBER	16
+#define	DEVSET_MAX_BITS		(DEVSET_IO_OFFSET + DEVSET_IO_NUMBER)
+
+#define	DEVSET_BIX(t)					\
+	(((t) == SBD_COMP_CPU) ? DEVSET_CPU_OFFSET :	\
+	((t) == SBD_COMP_MEM) ? DEVSET_MEM_OFFSET :	\
+	((t) == SBD_COMP_IO) ? DEVSET_IO_OFFSET :	\
+	((t) == SBD_COMP_CMP) ? DEVSET_CPU_OFFSET : 0)
+
+#define	DEVSET_NT2DEVPOS(t, u)	(((t) == SBD_COMP_CMP) ?\
+	(DEVSET_BIX(t) + (u) * MAX_CORES_PER_CMP) : DEVSET_BIX(t) + (u))
+
+#if (DEVSET_MAX_BITS <= 64)
+typedef uint64_t		dr_devset_t;
+
+#define	DEVSET_ONEUNIT		((dr_devset_t)1)
+#define	DEVSET_ANYUNIT		((dr_devset_t)(-1))
+#define	DEVSET_CPU_NMASK	((dr_devset_t)((1ULL << DEVSET_CPU_NUMBER) - 1))
+#define	DEVSET_MEM_NMASK	((dr_devset_t)((1ULL << DEVSET_MEM_NUMBER) - 1))
+#define	DEVSET_IO_NMASK		((dr_devset_t)((1ULL << DEVSET_IO_NUMBER) - 1))
+#define	DEVSET_CMP_NMASK	((dr_devset_t)((1ULL << MAX_CORES_PER_CMP) - 1))
+
+#define	DEVSET_NMASK(t)					\
+	(((t) == SBD_COMP_CPU) ? DEVSET_CPU_NMASK :	\
+	((t) == SBD_COMP_MEM) ? DEVSET_MEM_NMASK :	\
+	((t) == SBD_COMP_IO) ? DEVSET_IO_NMASK :	\
+	((t) == SBD_COMP_CMP) ? DEVSET_CPU_NMASK : 0)
+
+#define	DEVSET_MASK					\
+	((DEVSET_CPU_NMASK << DEVSET_CPU_OFFSET) | 	\
+	(DEVSET_MEM_NMASK << DEVSET_MEM_OFFSET) | 	\
+	(DEVSET_IO_NMASK << DEVSET_IO_OFFSET))
+
+#define	DEVSET(t, u) \
+	(((u) == DEVSET_ANYUNIT) ? \
+		((DEVSET_NMASK(t) << DEVSET_NT2DEVPOS((t), 0)) & \
+		DEVSET_MASK) : \
+	((t) == SBD_COMP_CMP) ? \
+		(DEVSET_CMP_NMASK << DEVSET_NT2DEVPOS((t), (u))) : \
+		(DEVSET_ONEUNIT << DEVSET_NT2DEVPOS((t), (u))))
+
+#define	DEVSET_IS_NULL(ds)	((ds) == 0)
+#define	DEVSET_IN_SET(ds, t, u)	(((ds) & DEVSET((t), (u))) != 0)
+#define	DEVSET_ADD(ds, t, u)	((ds) |= DEVSET((t), (u)))
+#define	DEVSET_DEL(ds, t, u)	((ds) &= ~DEVSET((t), (u)))
+#define	DEVSET_AND(ds1, ds2)	((ds1) & (ds2))
+#define	DEVSET_OR(ds1, ds2)	((ds1) | (ds2))
+#define	DEVSET_NAND(ds1, ds2)	((ds1) & ~(ds2))
+#define	DEVSET_GET_UNITSET(ds, t) \
+	(((ds) & DEVSET((t), DEVSET_ANYUNIT)) >> DEVSET_NT2DEVPOS((t), 0))
+#define	DEVSET_FMT_STR		"0x%" PRIx64 ""
+#define	DEVSET_FMT_ARG(ds)	(ds)
+#else	/* DEVSET_MAX_BITS <= 64 */
+#error please implement devset with bitmap to support more 64 devices
+#endif	/* DEVSET_MAX_BITS <= 64 */
+
+/*
+ * Ops for dr_board_t.b_dev_*
+ */
+#define	DR_DEV_IS(ds, cp)	DEVSET_IN_SET( \
+					(cp)->sbdev_bp->b_dev_##ds, \
+					(cp)->sbdev_type, \
+					(cp)->sbdev_unum)
+
+#define	DR_DEV_ADD(ds, cp)	DEVSET_ADD( \
+					(cp)->sbdev_bp->b_dev_##ds, \
+					(cp)->sbdev_type, \
+					(cp)->sbdev_unum)
+
+#define	DR_DEV_DEL(ds, cp)	DEVSET_DEL( \
+					(cp)->sbdev_bp->b_dev_##ds, \
+					(cp)->sbdev_type, \
+					(cp)->sbdev_unum)
+
+/*
+ * Ops for dr_board_t.b_dev_present
+ */
+#define	DR_DEV_IS_PRESENT(cp)		DR_DEV_IS(present, cp)
+#define	DR_DEV_SET_PRESENT(cp)		DR_DEV_ADD(present, cp)
+#define	DR_DEV_CLR_PRESENT(cp)		DR_DEV_DEL(present, cp)
+
+/*
+ * Ops for dr_board_t.b_dev_attached
+ */
+#define	DR_DEV_IS_ATTACHED(cp)		DR_DEV_IS(attached, cp)
+#define	DR_DEV_SET_ATTACHED(cp)		DR_DEV_ADD(attached, cp)
+#define	DR_DEV_CLR_ATTACHED(cp)		DR_DEV_DEL(attached, cp)
+
+/*
+ * Ops for dr_board_t.b_dev_released
+ */
+#define	DR_DEV_IS_RELEASED(cp)		DR_DEV_IS(released, cp)
+#define	DR_DEV_SET_RELEASED(cp)		DR_DEV_ADD(released, cp)
+#define	DR_DEV_CLR_RELEASED(cp)		DR_DEV_DEL(released, cp)
+
+/*
+ * Ops for dr_board_t.b_dev_unreferenced
+ */
+#define	DR_DEV_IS_UNREFERENCED(cp)	DR_DEV_IS(unreferenced, cp)
+#define	DR_DEV_SET_UNREFERENCED(cp)	DR_DEV_ADD(unreferenced, cp)
+#define	DR_DEV_CLR_UNREFERENCED(cp)	DR_DEV_DEL(unreferenced, cp)
+
+#define	DR_DEVS_PRESENT(bp) \
+			((bp)->b_dev_present)
+#define	DR_DEVS_ATTACHED(bp) \
+			((bp)->b_dev_attached)
+#define	DR_DEVS_RELEASED(bp) \
+			((bp)->b_dev_released)
+#define	DR_DEVS_UNREFERENCED(bp) \
+			((bp)->b_dev_unreferenced)
+#define	DR_DEVS_UNATTACHED(bp) \
+			((bp)->b_dev_present & ~(bp)->b_dev_attached)
+#define	DR_DEVS_CONFIGURE(bp, devs) \
+			((bp)->b_dev_attached = (devs))
+#define	DR_DEVS_DISCONNECT(bp, devs) \
+			((bp)->b_dev_present &= ~(devs))
+#define	DR_DEVS_CANCEL(bp, devs) \
+			((bp)->b_dev_released &= ~(devs), \
+			(bp)->b_dev_unreferenced &= ~(devs))
+
+/*
+ * CMP Specific Helpers
+ */
+#define	DR_CMP_CORE_UNUM(cmp, core)	((cmp) * MAX_CORES_PER_CMP + (core))
+
+/*
+ * For CPU and CMP devices, DR_UNUM2SBD_UNUM is used to extract the physical
+ * CPU/CMP id from the device id.
+ */
+#define	DR_UNUM2SBD_UNUM(n, d)		\
+	((d) == SBD_COMP_CPU ? ((n) / MAX_CORES_PER_CMP) : \
+	(d) == SBD_COMP_CMP ? ((n) / MAX_CORES_PER_CMP) : (n))
+
+/*
+ * Some stuff to assist in debug.
+ */
+#ifdef DEBUG
+#define	DRDBG_STATE	0x00000001
+#define	DRDBG_QR	0x00000002
+#define	DRDBG_CPU	0x00000004
+#define	DRDBG_MEM	0x00000008
+#define	DRDBG_IO	0x00000010
+
+#define	PR_ALL		if (dr_debug)			printf
+#define	PR_STATE	if (dr_debug & DRDBG_STATE)	printf
+#define	PR_QR		if (dr_debug & DRDBG_QR)	prom_printf
+#define	PR_CPU		if (dr_debug & DRDBG_CPU)	printf
+#define	PR_MEM		if (dr_debug & DRDBG_MEM)	printf
+#define	PR_IO		if (dr_debug & DRDBG_IO)	printf
+#define	PR_MEMLIST_DUMP	if (dr_debug & DRDBG_MEM)	MEMLIST_DUMP
+
+extern uint_t	dr_debug;
+#else /* DEBUG */
+#define	PR_ALL		_NOTE(CONSTANTCONDITION) if (0) printf
+#define	PR_STATE	PR_ALL
+#define	PR_QR		PR_ALL
+#define	PR_CPU		PR_ALL
+#define	PR_MEM		PR_ALL
+#define	PR_IO		PR_ALL
+#define	PR_MEMLIST_DUMP	_NOTE(CONSTANTCONDITION) if (0) MEMLIST_DUMP
+
+#endif /* DEBUG */
+
+/*
+ * dr_board_t b_sflags.
+ */
+#define	DR_BSLOCK	0x01	/* for blocking status (protected by b_slock) */
+
+typedef const char	*fn_t;
+
+/*
+ * Unsafe devices based on dr.conf prop "unsupported-io-drivers"
+ */
+typedef struct {
+	char	**devnames;
+	uint_t	ndevs;
+} dr_unsafe_devs_t;
+
+/*
+ * Device states.
+ * PARTIAL state is really only relevant for board state.
+ */
+typedef enum {
+	DR_STATE_EMPTY = 0,
+	DR_STATE_OCCUPIED,
+	DR_STATE_CONNECTED,
+	DR_STATE_UNCONFIGURED,
+	DR_STATE_PARTIAL,		/* part connected, part configured */
+	DR_STATE_CONFIGURED,
+	DR_STATE_RELEASE,
+	DR_STATE_UNREFERENCED,
+	DR_STATE_FATAL,
+	DR_STATE_MAX
+} dr_state_t;
+
+typedef struct dr_handle {
+	struct dr_board	*h_bd;
+	sbd_error_t	*h_err;
+	int		h_op_intr;	/* nz if op interrupted */
+	dev_t		h_dev;		/* dev_t of opened device */
+	int		h_cmd;		/* PIM ioctl argument */
+	int		h_mode;		/* device open mode */
+	sbd_cmd_t 	h_sbdcmd;	/* copied-in ioctl cmd struct */
+	sbd_ioctl_arg_t	*h_iap;		/* ptr to caller-space cmd struct */
+	dr_devset_t	h_devset;	/* based on h_dev */
+	uint_t		h_ndi;
+	drmach_opts_t	h_opts;		/* command-line platform options */
+} dr_handle_t;
+
+typedef struct dr_common_unit {
+	dr_state_t		sbdev_state;
+	sbd_state_t		sbdev_ostate;
+	sbd_cond_t		sbdev_cond;
+	time_t			sbdev_time;
+	int			sbdev_busy;
+	struct dr_board		*sbdev_bp;
+	int			sbdev_unum;
+	sbd_comp_type_t		sbdev_type;
+	drmachid_t		sbdev_id;
+	char			sbdev_path[MAXNAMELEN];
+	sbd_error_t		*sbdev_error;
+} dr_common_unit_t;
+
+typedef struct dr_mem_unit {
+	dr_common_unit_t	sbm_cm;		/* mem-unit state */
+	uint_t			sbm_flags;
+	pfn_t			sbm_basepfn;
+	pgcnt_t			sbm_npages;
+	pgcnt_t			sbm_pageslost;
+	struct memlist		*sbm_dyn_segs;	/* kphysm_add_dynamic segs */
+	/*
+	 * The following fields are used during
+	 * the memory detach process only. sbm_mlist
+	 * will be used to store the board memlist
+	 * following a detach.  The memlist will be
+	 * used to re-attach the board when configuring
+	 * the unit directly after an unconfigure.
+	 */
+	struct dr_mem_unit	*sbm_peer;
+	struct memlist		*sbm_mlist;
+	struct memlist		*sbm_del_mlist;
+	memhandle_t		sbm_memhandle;
+	uint64_t		sbm_alignment_mask;
+	uint64_t		sbm_slice_base;
+	uint64_t		sbm_slice_top;
+	uint64_t		sbm_slice_size;
+} dr_mem_unit_t;
+
+/*
+ * Currently only maintain state information for individual
+ * components.
+ */
+typedef struct dr_cpu_unit {
+	dr_common_unit_t	sbc_cm;		/* cpu-unit state */
+	processorid_t		sbc_cpu_id;
+	cpu_flag_t		sbc_cpu_flags;	/* snapshot of CPU flags */
+	ushort_t		sbc_pad1;	/* padded for compatibility */
+	int			sbc_speed;
+	int			sbc_ecache;
+	int			sbc_cpu_impl;
+} dr_cpu_unit_t;
+
+typedef struct dr_io_unit {
+	dr_common_unit_t	sbi_cm;		/* io-unit state */
+} dr_io_unit_t;
+
+typedef union {
+	dr_common_unit_t	du_common;
+	dr_mem_unit_t		du_mem;
+	dr_cpu_unit_t		du_cpu;
+	dr_io_unit_t		du_io;
+} dr_dev_unit_t;
+
+typedef struct dr_board {
+	kmutex_t	b_lock;		/* lock for this board struct */
+	kmutex_t	b_slock;	/* lock for status on the board */
+	kcondvar_t	b_scv;		/* condvar for status on the board */
+	int		b_sflags;	/* for serializing status */
+	sbd_state_t	b_rstate;	/* board's cfgadm receptacle state */
+	sbd_state_t	b_ostate;	/* board's cfgadm occupant state */
+	sbd_cond_t	b_cond;		/* cfgadm condition */
+	int		b_busy;
+	int		b_assigned;
+	time_t		b_time;		/* time of last board operation */
+	char		b_type[MAXNAMELEN];
+	drmachid_t	b_id;
+	int		b_num;			/* board number */
+	int		b_ndev;			/* # of devices on board */
+	dev_info_t	*b_dip;			/* dip for make-nodes */
+	dr_state_t	b_state;		/* board DR state */
+	dr_devset_t	b_dev_present;		/* present mask */
+	dr_devset_t	b_dev_attached;		/* attached mask */
+	dr_devset_t	b_dev_released;		/* released mask */
+	dr_devset_t	b_dev_unreferenced;	/* unreferenced mask */
+	char		b_path[MAXNAMELEN];
+	dr_dev_unit_t	*b_dev[DR_MAXNUM_NT];
+} dr_board_t;
+
+/*
+ * dr_quiesce.c interfaces
+ */
+struct dr_sr_handle;
+typedef struct dr_sr_handle dr_sr_handle_t;
+
+extern dr_sr_handle_t	*dr_get_sr_handle(dr_handle_t *handle);
+extern void		dr_release_sr_handle(dr_sr_handle_t *srh);
+extern int		dr_suspend(dr_sr_handle_t *srh);
+extern void		dr_resume(dr_sr_handle_t *srh);
+extern void		dr_check_devices(dev_info_t *dip, int *refcount,
+			    dr_handle_t *handle, uint64_t *arr, int *idx,
+			    int len, int *refcount_non_gldv3);
+extern int		dr_pt_test_suspend(dr_handle_t *hp);
+
+/*
+ * dr_cpu.c interface
+ */
+extern void		dr_init_cpu_unit(dr_cpu_unit_t *cp);
+extern int		dr_pre_attach_cpu(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern void		dr_attach_cpu(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int		dr_post_attach_cpu(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern int		dr_pre_release_cpu(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern int		dr_pre_detach_cpu(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern void		dr_detach_cpu(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int		dr_post_detach_cpu(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern int		dr_cpu_status(dr_handle_t *hp, dr_devset_t devset,
+					sbd_dev_stat_t *dsp);
+extern int		dr_cancel_cpu(dr_cpu_unit_t *cp);
+extern int		dr_disconnect_cpu(dr_cpu_unit_t *cp);
+
+
+/*
+ * dr_mem.c interface
+ */
+extern void		dr_init_mem_unit(dr_mem_unit_t *mp);
+extern int		dr_pre_attach_mem(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern void		dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int		dr_post_attach_mem(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern int		dr_pre_release_mem(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern void		dr_release_mem(dr_common_unit_t *cp);
+extern void		dr_release_mem_done(dr_common_unit_t *cp);
+extern int		dr_pre_detach_mem(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern void		dr_detach_mem(dr_handle_t *, dr_common_unit_t *);
+extern int		dr_post_detach_mem(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern int		dr_mem_status(dr_handle_t *hp, dr_devset_t devset,
+					sbd_dev_stat_t *dsp);
+extern int		dr_cancel_mem(dr_mem_unit_t *mp);
+extern int		dr_disconnect_mem(dr_mem_unit_t *mp);
+
+/*
+ * dr_io.c interface
+ */
+extern void		dr_init_io_unit(dr_io_unit_t *io);
+extern int		dr_pre_attach_io(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern void		dr_attach_io(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int		dr_post_attach_io(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern int		dr_pre_release_io(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern int		dr_pre_detach_io(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern void		dr_detach_io(dr_handle_t *hp, dr_common_unit_t *cp);
+extern int		dr_post_detach_io(dr_handle_t *hp,
+				dr_common_unit_t **devlist, int devnum);
+extern int		dr_io_status(dr_handle_t *hp, dr_devset_t devset,
+					sbd_dev_stat_t *dsp);
+extern int		dr_disconnect_io(dr_io_unit_t *ip);
+
+
+/*
+ * dr.c interface
+ */
+extern void dr_op_err(int ce, dr_handle_t *hp, int code, char *fmt, ...);
+extern void dr_dev_err(int ce, dr_common_unit_t *cp, int code);
+
+extern dr_cpu_unit_t	*dr_get_cpu_unit(dr_board_t *bp, int unit_num);
+extern dr_mem_unit_t	*dr_get_mem_unit(dr_board_t *bp, int unit_num);
+extern dr_io_unit_t	*dr_get_io_unit(dr_board_t *bp, int unit_num);
+
+extern dr_board_t	*dr_lookup_board(int board_num);
+extern int		dr_release_dev_done(dr_common_unit_t *cp);
+extern char		*dr_nt_to_dev_type(int type);
+extern void		dr_device_transition(dr_common_unit_t *cp,
+				dr_state_t new_state);
+extern void		dr_lock_status(dr_board_t *bp);
+extern void		dr_unlock_status(dr_board_t *bp);
+extern int		dr_cmd_flags(dr_handle_t *hp);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_DR_H */
diff --git a/usr/src/uts/i86pc/sys/dr_util.h b/usr/src/uts/i86pc/sys/dr_util.h
new file mode 100644
index 0000000000..46a66c759b
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/dr_util.h
@@ -0,0 +1,99 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_DR_UTIL_H_
+#define	_SYS_DR_UTIL_H_
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/memlist.h>
+#include <sys/varargs.h>
+#include <sys/sbd_ioctl.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	GETSTRUCT(t, n) \
+		((t *)kmem_zalloc((size_t)(n) * sizeof (t), KM_SLEEP))
+#define	FREESTRUCT(p, t, n) \
+		(kmem_free((caddr_t)(p), sizeof (t) * (size_t)(n)))
+
+#define	DRERR_SET_C(epps, eppn)		\
+	if (*(epps) == NULL)		\
+		*(epps) = *(eppn);	\
+	else				\
+		sbd_err_clear(eppn)
+
+#ifdef DEBUG
+#define	MEMLIST_DUMP(ml) memlist_dump(ml)
+#else
+#define	MEMLIST_DUMP(ml)
+#endif
+
+extern sbd_error_t	*sbd_err_new(int e_code, char *fmt, va_list args);
+extern void		sbd_err_log(sbd_error_t *ep, int ce);
+extern void		sbd_err_clear(sbd_error_t **ep);
+extern void		sbd_err_set_c(sbd_error_t **ep, int ce,
+				int e_code, char *fmt, ...);
+extern void		sbd_err_set(sbd_error_t **ep, int ce,
+				int e_code, char *fmt, ...);
+
+extern sbd_error_t	*drerr_new(int log, int e_code, char *fmt, ...);
+extern sbd_error_t	*drerr_new_v(int e_code, char *fmt, va_list args);
+extern void		drerr_set_c(int log, sbd_error_t **ep,
+				int e_code, char *fmt, ...);
+
+extern void		dr_memlist_delete(struct memlist *mlist);
+extern void		memlist_dump(struct memlist *mlist);
+extern int		dr_memlist_intersect(struct memlist *al,
+				struct memlist *bl);
+extern void		dr_memlist_coalesce(struct memlist *mlist);
+extern struct memlist	*dr_memlist_dup(struct memlist *mlist);
+extern struct memlist	*dr_memlist_add_span(struct memlist *mlist,
+				uint64_t base, uint64_t len);
+extern struct memlist	*dr_memlist_del_span(struct memlist *mlist,
+				uint64_t base, uint64_t len);
+extern struct memlist	*dr_memlist_cat_span(struct memlist *mlist,
+				uint64_t base, uint64_t len);
+
+/*
+ * These are all utilities internal for DR.  There are
+ * similar functions in common/os which have similar names.
+ * We rename them to make sure there is no name space
+ * conflict.
+ */
+#define	memlist_delete		dr_memlist_delete
+#define	memlist_intersect	dr_memlist_intersect
+#define	memlist_coalesce	dr_memlist_coalesce
+#define	memlist_dup		dr_memlist_dup
+#define	memlist_add_span	dr_memlist_add_span
+#define	memlist_del_span	dr_memlist_del_span
+#define	memlist_cat_span	dr_memlist_cat_span
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DR_UTIL_H_ */
diff --git a/usr/src/uts/i86pc/sys/drmach.h b/usr/src/uts/i86pc/sys/drmach.h
new file mode 100644
index 0000000000..2d2a2cff7b
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/drmach.h
@@ -0,0 +1,177 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_DRMACH_H_
+#define	_SYS_DRMACH_H_
+#include <sys/types.h>
+#include <sys/memlist.h>
+#include <sys/ddi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/sysevent.h>
+#include <sys/x86_archext.h>
+#include <sys/sbd_ioctl.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	MAX_BOARDS		drmach_max_boards()
+#define	MAX_MEM_UNITS_PER_BOARD	drmach_max_mem_units_per_board()
+#define	MAX_IO_UNITS_PER_BOARD	drmach_max_io_units_per_board()
+#define	MAX_CMP_UNITS_PER_BOARD	drmach_max_cmp_units_per_board()
+
+/* DR uses MAX_CORES_PER_CMP as number of logical CPUs within a CMP. */
+#define	MAX_CORES_PER_CMP	drmach_max_core_per_cmp()
+
+/* Maximum possible logical CPUs per board. */
+#define	MAX_CPU_UNITS_PER_BOARD	(MAX_CMP_UNITS_PER_BOARD * MAX_CORES_PER_CMP)
+
+/* Check whether CPU is CMP. True if chip has more than one core/thread. */
+#define	CPU_IMPL_IS_CMP(impl)	(MAX_CORES_PER_CMP > 1)
+
+/* CPU implementation ID for Intel Nehalem CPU. */
+#define	X86_CPU_IMPL_NEHALEM_EX	0x062E0000
+#define	X86_CPU_IMPL_UNKNOWN	0x00000000
+
+/* returned with drmach_board_find_devices callback */
+#define	DRMACH_DEVTYPE_CPU	"cpu"
+#define	DRMACH_DEVTYPE_MEM	"memory"
+#define	DRMACH_DEVTYPE_PCI	"pci"
+
+/*
+ * x86 platform specific routines currently only defined
+ * in drmach_acpi.c and referenced by DR.
+ */
+
+typedef void *drmachid_t;
+
+typedef struct {
+	boolean_t	assigned;
+	boolean_t	powered;
+	boolean_t	configured;
+	boolean_t	busy;
+	boolean_t	empty;
+	sbd_cond_t	cond;
+	char		type[SBD_TYPE_LEN];
+	char		info[SBD_MAX_INFO];
+} drmach_status_t;
+
+typedef struct {
+	int	size;
+	char	*copts;
+} drmach_opts_t;
+
+typedef struct {
+	uint64_t mi_basepa;
+	uint64_t mi_size;
+	uint64_t mi_slice_base;
+	uint64_t mi_slice_top;
+	uint64_t mi_slice_size;
+	uint64_t mi_alignment_mask;
+} drmach_mem_info_t;
+
+extern uint_t		drmach_max_boards(void);
+extern uint_t		drmach_max_io_units_per_board(void);
+extern uint_t		drmach_max_cmp_units_per_board(void);
+extern uint_t		drmach_max_mem_units_per_board(void);
+extern uint_t		drmach_max_core_per_cmp(void);
+
+extern sbd_error_t	*drmach_get_dip(drmachid_t id, dev_info_t **dip);
+extern sbd_error_t	*drmach_release(drmachid_t id);
+extern sbd_error_t	*drmach_pre_op(int cmd, drmachid_t id,
+				drmach_opts_t *opts, void *devsetp);
+extern sbd_error_t	*drmach_post_op(int cmd, drmachid_t id,
+				drmach_opts_t *opts, int rv);
+extern sbd_error_t	*drmach_configure(drmachid_t id, int flags);
+extern sbd_error_t	*drmach_unconfigure(drmachid_t id, int flags);
+extern sbd_error_t	*drmach_status(drmachid_t id, drmach_status_t *stat);
+extern sbd_error_t	*drmach_passthru(drmachid_t id,
+						drmach_opts_t *opts);
+
+extern sbd_error_t	*drmach_board_find_devices(drmachid_t id, void *a,
+		sbd_error_t *(*found)(void *a, const char *, int, drmachid_t));
+extern int		drmach_board_lookup(int bnum, drmachid_t *id);
+extern sbd_error_t	*drmach_board_name(int bnum, char *buf, int buflen);
+extern sbd_error_t	*drmach_board_assign(int bnum, drmachid_t *id);
+extern sbd_error_t	*drmach_board_unassign(drmachid_t id);
+extern sbd_error_t	*drmach_board_poweroff(drmachid_t id);
+extern sbd_error_t	*drmach_board_poweron(drmachid_t id);
+extern sbd_error_t	*drmach_board_test(drmachid_t id, drmach_opts_t *opts,
+						int force);
+extern sbd_error_t	*drmach_board_connect(drmachid_t id,
+						drmach_opts_t *opts);
+extern sbd_error_t	*drmach_board_disconnect(drmachid_t id,
+						drmach_opts_t *opts);
+extern sbd_error_t	*drmach_board_deprobe(drmachid_t id);
+extern int		drmach_board_is_floating(drmachid_t);
+
+extern sbd_error_t	*drmach_cpu_get_id(drmachid_t id, processorid_t *cpuid);
+extern sbd_error_t	*drmach_cpu_get_impl(drmachid_t id, int *ip);
+extern sbd_error_t	*drmach_cpu_disconnect(drmachid_t id);
+
+extern sbd_error_t	*drmach_io_is_attached(drmachid_t id, int *yes);
+extern sbd_error_t	*drmach_io_post_attach(drmachid_t id);
+extern sbd_error_t	*drmach_io_pre_release(drmachid_t id);
+extern sbd_error_t	*drmach_io_unrelease(drmachid_t id);
+extern sbd_error_t	*drmach_io_post_release(drmachid_t id);
+
+extern sbd_error_t	*drmach_mem_get_slice_info(drmachid_t id,
+				uint64_t *basepa, uint64_t *endpa,
+				uint64_t *sizep);
+extern sbd_error_t	*drmach_mem_get_memlist(drmachid_t id,
+				struct memlist **ml);
+extern sbd_error_t	*drmach_mem_get_info(drmachid_t, drmach_mem_info_t *);
+extern sbd_error_t	*drmach_mem_enable(drmachid_t id);
+extern sbd_error_t	*drmach_mem_disable(drmachid_t id);
+extern sbd_error_t	*drmach_mem_add_span(drmachid_t id,
+				uint64_t basepa, uint64_t size);
+extern sbd_error_t	*drmach_mem_del_span(drmachid_t id,
+				uint64_t basepa, uint64_t size);
+extern sbd_error_t	*drmach_copy_rename_init(
+				drmachid_t dst_id, drmachid_t src_id,
+				struct memlist *src_copy_ml,
+				drmachid_t *pgm_id);
+extern sbd_error_t	*drmach_copy_rename_fini(drmachid_t id);
+extern void		drmach_copy_rename(drmachid_t id);
+extern int		drmach_copy_rename_need_suspend(drmachid_t id);
+
+extern int		drmach_log_sysevent(int board, char *hint, int flag,
+					    int verbose);
+
+extern int		drmach_verify_sr(dev_info_t *dip, int sflag);
+extern void		drmach_suspend_last();
+extern void		drmach_resume_first();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_DRMACH_H_ */
diff --git a/usr/src/uts/i86pc/sys/fastboot_msg.h b/usr/src/uts/i86pc/sys/fastboot_msg.h
index a35fcf2f67..4c7ee7f8b5 100644
--- a/usr/src/uts/i86pc/sys/fastboot_msg.h
+++ b/usr/src/uts/i86pc/sys/fastboot_msg.h
@@ -45,6 +45,7 @@
 fastboot_nosup_msg(FBNS_DEFAULT, "")
 fastboot_nosup_msg(FBNS_SUSPEND, " after suspend/resume")
 fastboot_nosup_msg(FBNS_FMAHWERR, " due to FMA recovery from hardware error")
+fastboot_nosup_msg(FBNS_HOTPLUG, " after DR operations")
 
 /*
  * Should ALWAYS be the last one.
diff --git a/usr/src/uts/i86pc/sys/machcpuvar.h b/usr/src/uts/i86pc/sys/machcpuvar.h
index b652331715..67b9a07439 100644
--- a/usr/src/uts/i86pc/sys/machcpuvar.h
+++ b/usr/src/uts/i86pc/sys/machcpuvar.h
@@ -63,6 +63,7 @@ typedef void *cpu_pri_lev_t;
 
 struct cpuid_info;
 struct cpu_ucode_info;
+struct cmi_hdl;
 
 /*
  * A note about the hypervisor affinity bits: a one bit in the affinity mask
@@ -135,7 +136,9 @@ struct	machcpu {
 
 	struct cpu_ucode_info	*mcpu_ucode_info;
 
-	void		*mcpu_pm_mach_state;
+	void			*mcpu_pm_mach_state;
+	struct cmi_hdl		*mcpu_cmi_hdl;
+	void			*mcpu_mach_ctx_ptr;
 
 	/*
 	 * A stamp that is unique per processor and changes
diff --git a/usr/src/uts/i86pc/sys/machsystm.h b/usr/src/uts/i86pc/sys/machsystm.h
index b98a48326e..8c79243a29 100644
--- a/usr/src/uts/i86pc/sys/machsystm.h
+++ b/usr/src/uts/i86pc/sys/machsystm.h
@@ -23,6 +23,10 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 #ifndef _SYS_MACHSYSTM_H
 #define	_SYS_MACHSYSTM_H
@@ -51,11 +55,30 @@ extern "C" {
 
 #ifdef _KERNEL
 
+typedef enum mach_cpu_add_arg_type {
+	MACH_CPU_ARG_LOCAL_APIC,
+	MACH_CPU_ARG_LOCAL_X2APIC,
+} mach_cpu_add_arg_type_t;
+
+typedef struct mach_cpu_add_arg {
+	mach_cpu_add_arg_type_t		type;
+	union {
+		struct {
+			uint32_t	apic_id;
+			uint32_t	proc_id;
+		} apic;
+	} arg;
+} mach_cpu_add_arg_t;
+
 extern void mach_cpu_idle(void);
 extern void mach_cpu_halt(char *);
 extern int mach_cpu_start(cpu_t *, void *);
 extern int mach_cpuid_start(processorid_t, void *);
+extern int mach_cpu_stop(cpu_t *, void *);
+extern int mach_cpu_add(mach_cpu_add_arg_t *, processorid_t *);
+extern int mach_cpu_remove(processorid_t);
 extern int mach_cpu_create_device_node(cpu_t *, dev_info_t **);
+extern int mach_cpu_get_device_node(cpu_t *, dev_info_t **);
 
 extern int Cpudelay;
 extern void setcpudelay(void);
@@ -120,10 +143,16 @@ extern int use_mp;
 extern struct cpu	cpus[];		/* pointer to other cpus */
 extern struct cpu	*cpu[];		/* pointer to all cpus */
 
+/* Operation types for extended mach_cpucontext interfaces */
+#define	MACH_CPUCONTEXT_OP_START	0
+#define	MACH_CPUCONTEXT_OP_STOP		1
+
 extern int mach_cpucontext_init(void);
 extern void mach_cpucontext_fini(void);
 extern void *mach_cpucontext_alloc(struct cpu *);
 extern void mach_cpucontext_free(struct cpu *, void *, int);
+extern void *mach_cpucontext_xalloc(struct cpu *, int);
+extern void mach_cpucontext_xfree(struct cpu *, void *, int, int);
 extern void rmp_gdt_init(rm_platter_t *);
 
 extern uintptr_t hole_start, hole_end;
@@ -145,6 +174,52 @@ extern int dtrace_linear_pc(struct regs *rp, proc_t *p, caddr_t *linearp);
 
 extern int force_shutdown_method;
 
+/* Dynamic Reconfiguration capability interface. */
+#define	PLAT_DR_OPTIONS_NAME		"plat-dr-options"
+#define	PLAT_DR_PHYSMAX_NAME		"plat-dr-physmax"
+#define	PLAT_MAX_NCPUS_NAME		"plat-max-ncpus"
+#define	BOOT_MAX_NCPUS_NAME		"boot-max-ncpus"
+#define	BOOT_NCPUS_NAME			"boot-ncpus"
+
+#define	PLAT_DR_FEATURE_CPU		0x1
+#define	PLAT_DR_FEATURE_MEMORY		0x2
+#define	PLAT_DR_FEATURE_ENABLED		0x1000000
+
+#define	plat_dr_enabled()		\
+	plat_dr_check_capability(PLAT_DR_FEATURE_ENABLED)
+
+#define	plat_dr_enable()		\
+	plat_dr_enable_capability(PLAT_DR_FEATURE_ENABLED)
+
+#define	plat_dr_disable_cpu()		\
+	plat_dr_disable_capability(PLAT_DR_FEATURE_CPU)
+#define	plat_dr_disable_memory()	\
+	plat_dr_disable_capability(PLAT_DR_FEATURE_MEMORY)
+
+extern boolean_t plat_dr_support_cpu(void);
+extern boolean_t plat_dr_support_memory(void);
+extern boolean_t plat_dr_check_capability(uint64_t features);
+extern void plat_dr_enable_capability(uint64_t features);
+extern void plat_dr_disable_capability(uint64_t features);
+
+#pragma	weak plat_dr_support_cpu
+#pragma	weak plat_dr_support_memory
+
+/*
+ * Used to communicate DR updates to platform lgroup framework
+ */
+typedef struct {
+	uint64_t	u_base;
+	uint64_t	u_length;
+	uint32_t	u_domain;
+	uint32_t	u_device_id;
+	uint32_t	u_sli_cnt;
+	uchar_t		*u_sli_ptr;
+} update_membounds_t;
+
+/* Maximum physical page number (PFN) for memory DR operations. */
+extern uint64_t plat_dr_physmax;
+
 #ifdef __xpv
 #include <sys/xen_mmu.h>
 extern page_t *page_get_high_mfn(mfn_t);
diff --git a/usr/src/uts/i86pc/sys/memnode.h b/usr/src/uts/i86pc/sys/memnode.h
index dde3931627..2bcb623617 100644
--- a/usr/src/uts/i86pc/sys/memnode.h
+++ b/usr/src/uts/i86pc/sys/memnode.h
@@ -41,8 +41,9 @@ extern "C" {
  * memory slice ID and the high-order bits are the SSM nodeid.
  */
 
+#define	MAX_MEM_NODES_PER_LGROUP	3
 #ifndef	MAX_MEM_NODES
-#define	MAX_MEM_NODES	(8)
+#define	MAX_MEM_NODES			(8 * MAX_MEM_NODES_PER_LGROUP)
 #endif	/* MAX_MEM_NODES */
 
 #define	PFN_2_MEM_NODE(pfn)			\
@@ -57,14 +58,12 @@ extern "C" {
  */
 
 extern int plat_pfn_to_mem_node(pfn_t);
-extern int plat_lgrphand_to_mem_node(lgrp_handle_t);
 extern void plat_assign_lgrphand_to_mem_node(lgrp_handle_t, int);
 extern lgrp_handle_t plat_mem_node_to_lgrphand(int);
 extern void plat_slice_add(pfn_t, pfn_t);
 extern void plat_slice_del(pfn_t, pfn_t);
 
 #pragma	weak plat_pfn_to_mem_node
-#pragma	weak plat_lgrphand_to_mem_node
 #pragma	weak plat_mem_node_to_lgrphand
 #pragma	weak plat_slice_add
 #pragma	weak plat_slice_del
diff --git a/usr/src/uts/i86pc/sys/psm_types.h b/usr/src/uts/i86pc/sys/psm_types.h
index 17e9db17b1..24ad7d700f 100644
--- a/usr/src/uts/i86pc/sys/psm_types.h
+++ b/usr/src/uts/i86pc/sys/psm_types.h
@@ -23,12 +23,14 @@
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 #ifndef	_SYS_PSM_TYPES_H
 #define	_SYS_PSM_TYPES_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * Platform Specific Module Types
  */
@@ -88,6 +90,31 @@ typedef struct psm_state_req {
 	} req;
 } psm_state_request_t;
 
+typedef enum psm_cpu_op_e {
+	PSM_CPU_ADD = 1,
+	PSM_CPU_REMOVE,
+	PSM_CPU_STOP
+} psm_cpu_op_t;
+
+typedef struct psm_cpu_request {
+	psm_cpu_op_t pcr_cmd;
+	union {
+		struct {
+			processorid_t cpuid;
+			void *argp;
+		} cpu_add;
+
+		struct {
+			processorid_t cpuid;
+		} cpu_remove;
+
+		struct {
+			processorid_t cpuid;
+			void *ctx;
+		} cpu_stop;
+	} req;
+} psm_cpu_request_t;
+
 struct 	psm_ops {
 	int	(*psm_probe)(void);
 
@@ -106,7 +133,7 @@ struct 	psm_ops {
 	void	(*psm_unset_idlecpu)(processorid_t cpun);
 
 #if defined(PSMI_1_3) || defined(PSMI_1_4) || defined(PSMI_1_5) || \
-    defined(PSMI_1_6)
+    defined(PSMI_1_6) || defined(PSMI_1_7)
 	int	(*psm_clkinit)(int hertz);
 #else
 	void	(*psm_clkinit)(int hertz);
@@ -117,14 +144,14 @@ struct 	psm_ops {
 	hrtime_t (*psm_gethrtime)(void);
 
 	processorid_t (*psm_get_next_processorid)(processorid_t cpu_id);
-#if defined(PSMI_1_5) || defined(PSMI_1_6)
+#if defined(PSMI_1_5) || defined(PSMI_1_6) || defined(PSMI_1_7)
 	int	(*psm_cpu_start)(processorid_t cpun, caddr_t ctxt);
 #else
 	void	(*psm_cpu_start)(processorid_t cpun, caddr_t rm_code);
 #endif
 	int	(*psm_post_cpu_start)(void);
 #if defined(PSMI_1_2) || defined(PSMI_1_3) || defined(PSMI_1_4) || \
-    defined(PSMI_1_5) || defined(PSMI_1_6)
+    defined(PSMI_1_5) || defined(PSMI_1_6) || defined(PSMI_1_7)
 	void	(*psm_shutdown)(int cmd, int fcn);
 #else
 	void	(*psm_shutdown)(void);
@@ -140,26 +167,30 @@ struct 	psm_ops {
 #endif
 	void	(*psm_notify_error)(int level, char *errmsg);
 #if defined(PSMI_1_2) || defined(PSMI_1_3) || defined(PSMI_1_4) || \
-    defined(PSMI_1_5) || defined(PSMI_1_6)
+    defined(PSMI_1_5) || defined(PSMI_1_6) || defined(PSMI_1_7)
 	void	(*psm_notify_func)(int msg);
 #endif
 #if defined(PSMI_1_3) || defined(PSMI_1_4) || defined(PSMI_1_5) || \
-    defined(PSMI_1_6)
+    defined(PSMI_1_6) || defined(PSMI_1_7)
 	void 	(*psm_timer_reprogram)(hrtime_t time);
 	void	(*psm_timer_enable)(void);
 	void 	(*psm_timer_disable)(void);
 	void 	(*psm_post_cyclic_setup)(void *arg);
 #endif
-#if defined(PSMI_1_4) || defined(PSMI_1_5) || defined(PSMI_1_6)
+#if defined(PSMI_1_4) || defined(PSMI_1_5) || defined(PSMI_1_6) || \
+    defined(PSMI_1_7)
 	void	(*psm_preshutdown)(int cmd, int fcn);
 #endif
-#if defined(PSMI_1_5) || defined(PSMI_1_6)
+#if defined(PSMI_1_5) || defined(PSMI_1_6) || defined(PSMI_1_7)
 	int	(*psm_intr_ops)(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
 		    psm_intr_op_t op, int *result);
 #endif
-#if defined(PSMI_1_6)
+#if defined(PSMI_1_6) || defined(PSMI_1_7)
 	int	(*psm_state)(psm_state_request_t *request);
 #endif
+#if defined(PSMI_1_7)
+	int	(*psm_cpu_ops)(psm_cpu_request_t *reqp);
+#endif
 };
 
 
@@ -183,7 +214,8 @@ struct psm_info {
 #define	PSM_INFO_VER01_3	0x8604
 #define	PSM_INFO_VER01_4	0x8605
 #define	PSM_INFO_VER01_5	0x8606
-#define	PSM_INFO_VER01_6	0x8706
+#define	PSM_INFO_VER01_6	0x8607
+#define	PSM_INFO_VER01_7	0x8608
 #define	PSM_INFO_VER01_X	(PSM_INFO_VER01_1 & 0xFFF0)	/* ver 1.X */
 
 /*
diff --git a/usr/src/uts/i86pc/sys/rm_platter.h b/usr/src/uts/i86pc/sys/rm_platter.h
index b21c54dd63..48e141126b 100644
--- a/usr/src/uts/i86pc/sys/rm_platter.h
+++ b/usr/src/uts/i86pc/sys/rm_platter.h
@@ -22,12 +22,14 @@
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 #ifndef	_SYS_RM_PLATTER_H
 #define	_SYS_RM_PLATTER_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 #include <sys/types.h>
 #include <sys/tss.h>
 #include <sys/segments.h>
@@ -36,8 +38,12 @@
 extern "C" {
 #endif
 
+#define	RM_PLATTER_CODE_SIZE		0x400
+#define	RM_PLATTER_CPU_HALT_CODE_SIZE	0x100
+
 typedef	struct rm_platter {
-	char		rm_code[1024];
+	char		rm_code[RM_PLATTER_CODE_SIZE];
+	char		rm_cpu_halt_code[RM_PLATTER_CPU_HALT_CODE_SIZE];
 #if defined(__amd64)
 	/*
 	 * The compiler will want to 64-bit align the 64-bit rm_gdt_base
@@ -59,7 +65,7 @@ typedef	struct rm_platter {
 	 */
 	uint32_t	rm_idt_pad;
 #endif	/* __amd64 */
-	ushort_t	rm_filler2;	/* till I am sure that pragma works */
+	ushort_t	rm_cpu_halted;	/* non-zero if CPU has been halted */
 	ushort_t	rm_idt_lim;	/* stuff for lidt */
 	gate_desc_t	*rm_idt_base;
 	uint_t		rm_pdbr;	/* cr3 value */
diff --git a/usr/src/uts/i86pc/sys/sbd_ioctl.h b/usr/src/uts/i86pc/sys/sbd_ioctl.h
new file mode 100644
index 0000000000..1eab543aa3
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/sbd_ioctl.h
@@ -0,0 +1,655 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef	_SBD_IOCTL_H
+#define	_SBD_IOCTL_H
+
+#ifndef	_ASM
+#include <sys/types.h>
+#include <sys/obpdefs.h>
+#include <sys/processor.h>
+#include <sys/param.h>
+#endif
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#ifndef	_ASM
+typedef enum {
+	SBD_COMP_NONE,
+	SBD_COMP_CPU,
+	SBD_COMP_MEM,
+	SBD_COMP_IO,
+	SBD_COMP_CMP,
+	SBD_COMP_UNKNOWN
+} sbd_comp_type_t;
+
+typedef enum {
+	SBD_STAT_NONE = 0,
+	SBD_STAT_EMPTY,
+	SBD_STAT_DISCONNECTED,
+	SBD_STAT_CONNECTED,
+	SBD_STAT_UNCONFIGURED,
+	SBD_STAT_CONFIGURED
+} sbd_state_t;
+
+typedef enum {
+	SBD_COND_UNKNOWN = 0,
+	SBD_COND_OK,
+	SBD_COND_FAILING,
+	SBD_COND_FAILED,
+	SBD_COND_UNUSABLE
+} sbd_cond_t;
+
+typedef	int	sbd_busy_t;
+
+#define	SBD_MAX_UNSAFE		16
+#define	SBD_TYPE_LEN		12
+#define	SBD_NULL_UNIT		-1
+
+typedef struct {
+	sbd_comp_type_t	c_type;
+	int		c_unit;
+	char		c_name[OBP_MAXPROPNAME];
+} sbd_comp_id_t;
+
+typedef struct {
+	sbd_comp_id_t	c_id;
+	sbd_state_t	c_ostate;
+	sbd_cond_t	c_cond;
+	sbd_busy_t	c_busy;
+	uint_t		c_sflags;
+	time_t		c_time;
+} sbd_cm_stat_t;
+
+#define	ci_type		c_id.c_type
+#define	ci_unit		c_id.c_unit
+#define	ci_name		c_id.c_name
+
+typedef struct {
+	sbd_cm_stat_t	cs_cm;
+	int		cs_isbootproc;
+	processorid_t	cs_cpuid;
+	int		cs_speed;
+	int		cs_ecache;
+} sbd_cpu_stat_t;
+
+#define	cs_type		cs_cm.ci_type
+#define	cs_unit		cs_cm.ci_unit
+#define	cs_name		cs_cm.ci_name
+#define	cs_ostate	cs_cm.c_ostate
+#define	cs_cond		cs_cm.c_cond
+#define	cs_busy		cs_cm.c_busy
+#define	cs_suspend	cs_cm.c_sflags
+#define	cs_time		cs_cm.c_time
+
+typedef struct {
+	sbd_cm_stat_t	ms_cm;
+	int		ms_interleave;
+	pfn_t		ms_basepfn;
+	pgcnt_t		ms_totpages;
+	pgcnt_t		ms_detpages;
+	pgcnt_t		ms_pageslost;
+	pgcnt_t		ms_managed_pages;
+	pgcnt_t		ms_noreloc_pages;
+	pgcnt_t		ms_noreloc_first;
+	pgcnt_t		ms_noreloc_last;
+	int		ms_cage_enabled;
+	int		ms_peer_is_target;	/* else peer is source */
+	char		ms_peer_ap_id[MAXPATHLEN];	/* board's AP name */
+} sbd_mem_stat_t;
+
+#define	ms_type		ms_cm.ci_type
+#define	ms_unit		ms_cm.ci_unit
+#define	ms_name		ms_cm.ci_name
+#define	ms_ostate	ms_cm.c_ostate
+#define	ms_cond		ms_cm.c_cond
+#define	ms_busy		ms_cm.c_busy
+#define	ms_suspend	ms_cm.c_sflags
+#define	ms_time		ms_cm.c_time
+
+typedef struct {
+	sbd_cm_stat_t	is_cm;
+	int		is_referenced;
+	int		is_unsafe_count;
+	int		is_unsafe_list[SBD_MAX_UNSAFE];
+	char		is_pathname[MAXPATHLEN];
+} sbd_io_stat_t;
+
+#define	is_type		is_cm.ci_type
+#define	is_unit		is_cm.ci_unit
+#define	is_name		is_cm.ci_name
+#define	is_ostate	is_cm.c_ostate
+#define	is_cond		is_cm.c_cond
+#define	is_busy		is_cm.c_busy
+#define	is_suspend	is_cm.c_sflags
+#define	is_time		is_cm.c_time
+
+/* This constant must be the max of the max cores on all platforms */
+#define	SBD_MAX_CORES_PER_CMP	64
+
+typedef struct {
+	sbd_cm_stat_t	ps_cm;
+	processorid_t	ps_cpuid[SBD_MAX_CORES_PER_CMP];
+	int		ps_ncores;
+	int		ps_speed;
+	int		ps_ecache;
+} sbd_cmp_stat_t;
+
+#define	ps_type		ps_cm.ci_type
+#define	ps_unit		ps_cm.ci_unit
+#define	ps_name		ps_cm.ci_name
+#define	ps_ostate	ps_cm.c_ostate
+#define	ps_cond		ps_cm.c_cond
+#define	ps_busy		ps_cm.c_busy
+#define	ps_suspend	ps_cm.c_sflags
+#define	ps_time		ps_cm.c_time
+
+typedef union {
+	sbd_cm_stat_t	d_cm;
+	sbd_cpu_stat_t	d_cpu;
+	sbd_mem_stat_t	d_mem;
+	sbd_io_stat_t	d_io;
+	sbd_cmp_stat_t	d_cmp;
+} sbd_dev_stat_t;
+
+#define	ds_type		d_cm.ci_type
+#define	ds_unit		d_cm.ci_unit
+#define	ds_name		d_cm.ci_name
+#define	ds_ostate	d_cm.c_ostate
+#define	ds_cond		d_cm.c_cond
+#define	ds_busy		d_cm.c_busy
+#define	ds_suspend	d_cm.c_sflags
+#define	ds_time		d_cm.c_time
+
+#define	SBD_MAX_INFO	256
+
+typedef struct {
+	int		s_board;
+	char		s_type[SBD_TYPE_LEN];
+	char		s_info[SBD_MAX_INFO];
+	sbd_state_t	s_rstate;
+	sbd_state_t	s_ostate;
+	sbd_cond_t	s_cond;
+	sbd_busy_t	s_busy;
+	time_t		s_time;
+	uint_t		s_power:1;
+	uint_t		s_assigned:1;
+	uint_t		s_platopts;
+	int		s_nstat;
+	sbd_dev_stat_t	s_stat[1];
+} sbd_stat_t;
+
+typedef struct {
+	sbd_comp_id_t	c_id;
+	uint_t		c_flags;
+	int		c_len;
+	caddr_t		c_opts;
+} sbd_cm_cmd_t;
+
+typedef struct {
+	sbd_cm_cmd_t	g_cm;
+	int		g_ncm;
+} sbd_getncm_cmd_t;
+
+typedef struct {
+	sbd_cm_cmd_t	s_cm;
+	int		s_nbytes;
+	caddr_t		s_statp;
+} sbd_stat_cmd_t;
+
+typedef union {
+	sbd_cm_cmd_t		cmd_cm;
+	sbd_getncm_cmd_t	cmd_getncm;
+	sbd_stat_cmd_t		cmd_stat;
+} sbd_cmd_t;
+
+typedef struct {
+	int		e_code;
+	char		e_rsc[MAXPATHLEN];
+} sbd_error_t;
+
+typedef struct {
+	sbd_cmd_t	i_cmd;
+	sbd_error_t	i_err;
+} sbd_ioctl_arg_t;
+
+typedef struct {
+	int		t_base;
+	int		t_bnd;
+	char		**t_text;
+} sbd_etab_t;
+
+#define	i_flags		i_cmd.cmd_cm.c_flags
+#define	i_len		i_cmd.cmd_cm.c_len
+#define	i_opts		i_cmd.cmd_cm.c_opts
+#define	ic_type		i_cmd.cmd_cm.ci_type
+#define	ic_name		i_cmd.cmd_cm.ci_name
+#define	ic_unit		i_cmd.cmd_cm.ci_unit
+#define	ie_code		i_err.e_code
+#define	ie_rsc		i_err.e_rsc
+
+#define	_SBD_IOC		(('D' << 16) | ('R' << 8))
+
+#define	SBD_CMD_ASSIGN		(_SBD_IOC | 0x01)
+#define	SBD_CMD_UNASSIGN	(_SBD_IOC | 0x02)
+#define	SBD_CMD_POWERON		(_SBD_IOC | 0x03)
+#define	SBD_CMD_POWEROFF	(_SBD_IOC | 0x04)
+#define	SBD_CMD_TEST		(_SBD_IOC | 0x05)
+#define	SBD_CMD_CONNECT		(_SBD_IOC | 0x06)
+#define	SBD_CMD_CONFIGURE	(_SBD_IOC | 0x07)
+#define	SBD_CMD_UNCONFIGURE	(_SBD_IOC | 0x08)
+#define	SBD_CMD_DISCONNECT	(_SBD_IOC | 0x09)
+#define	SBD_CMD_STATUS		(_SBD_IOC | 0x0a)
+#define	SBD_CMD_GETNCM		(_SBD_IOC | 0x0b)
+#define	SBD_CMD_PASSTHRU	(_SBD_IOC | 0x0c)
+
+#define	SBD_CHECK_SUSPEND(cmd, c_sflags) \
+		(((c_sflags) >> (((cmd) & 0xf) - 1)) & 0x01)
+
+#define	SBD_SET_SUSPEND(cmd, c_sflags) \
+		((c_sflags) |= (0x01 << (((cmd) & 0xf) - 1)))
+
+#define	SBD_CHECK_PLATOPTS(cmd, c_platopts) \
+		(((c_platopts) >> (((cmd) & 0xf) - 1)) & 0x01)
+
+#define	SBD_SET_PLATOPTS(cmd, c_platopts) \
+		((c_platopts) &= ~(0x01 << (((cmd) & 0xf) - 1)))
+
+#define	SBD_FLAG_FORCE		0x1
+#define	SBD_FLAG_ALLCMP		0x2
+#define	SBD_FLAG_QUIESCE_OKAY	0x4
+
+#if defined(_SYSCALL32)
+
+typedef struct {
+	int32_t		c_type;
+	int32_t		c_unit;
+	char		c_name[OBP_MAXPROPNAME];
+} sbd_comp_id32_t;
+
+typedef struct {
+	sbd_comp_id32_t	c_id;
+	int32_t		c_ostate;
+	int32_t		c_cond;
+	int32_t		c_busy;
+	uint32_t	c_sflags;
+	time32_t	c_time;
+} sbd_cm_stat32_t;
+
+typedef struct {
+	sbd_cm_stat32_t	cs_cm;
+	int32_t		cs_isbootproc;
+	int32_t		cs_cpuid;
+	int32_t		cs_speed;
+	int32_t		cs_ecache;
+} sbd_cpu_stat32_t;
+
+typedef struct {
+	sbd_cm_stat32_t	ms_cm;
+	int32_t		ms_interleave;
+	uint32_t	ms_basepfn;
+	uint32_t	ms_totpages;
+	uint32_t	ms_detpages;
+	int32_t		ms_pageslost;
+	uint32_t	ms_managed_pages;
+	uint32_t	ms_noreloc_pages;
+	uint32_t	ms_noreloc_first;
+	uint32_t	ms_noreloc_last;
+	int32_t		ms_cage_enabled;
+	int32_t		ms_peer_is_target;
+	char		ms_peer_ap_id[MAXPATHLEN];
+} sbd_mem_stat32_t;
+
+typedef struct {
+	sbd_cm_stat32_t	is_cm;
+	int32_t		is_referenced;
+	int32_t		is_unsafe_count;
+	int32_t		is_unsafe_list[SBD_MAX_UNSAFE];
+	char		is_pathname[MAXPATHLEN];
+} sbd_io_stat32_t;
+
+typedef struct {
+	sbd_cm_stat32_t	ps_cm;
+	int32_t		ps_cpuid[SBD_MAX_CORES_PER_CMP];
+	int32_t		ps_ncores;
+	int32_t		ps_speed;
+	int32_t		ps_ecache;
+} sbd_cmp_stat32_t;
+
+typedef union {
+	sbd_cm_stat32_t		d_cm;
+	sbd_cpu_stat32_t	d_cpu;
+	sbd_mem_stat32_t	d_mem;
+	sbd_io_stat32_t		d_io;
+	sbd_cmp_stat32_t	d_cmp;
+} sbd_dev_stat32_t;
+
+typedef struct {
+	int32_t			s_board;
+	char			s_type[SBD_TYPE_LEN];
+	char			s_info[SBD_MAX_INFO];
+	int32_t			s_rstate;
+	int32_t			s_ostate;
+	int32_t			s_cond;
+	int32_t			s_busy;
+	time32_t		s_time;
+	uint32_t		s_power:1;
+	uint32_t		s_assigned:1;
+	uint32_t		s_platopts;
+	int32_t			s_nstat;
+	sbd_dev_stat32_t	s_stat[1];
+} sbd_stat32_t;
+
+typedef struct {
+	int32_t			e_code;
+	char			e_rsc[MAXPATHLEN];
+} sbd_error32_t;
+
+typedef struct {
+	sbd_comp_id32_t		c_id;
+	uint32_t		c_flags;
+	int32_t			c_len;
+	caddr32_t		c_opts;
+} sbd_cm_cmd32_t;
+
+typedef struct {
+	sbd_cm_cmd32_t	g_cm;
+	int32_t		g_ncm;
+} sbd_getncm_cmd32_t;
+
+typedef struct {
+	sbd_cm_cmd32_t	s_cm;
+	int32_t		s_nbytes;
+	caddr32_t	s_statp;
+} sbd_stat_cmd32_t;
+
+typedef union {
+	sbd_cm_cmd32_t		cmd_cm;
+	sbd_getncm_cmd32_t	cmd_getncm;
+	sbd_stat_cmd32_t	cmd_stat;
+} sbd_cmd32_t;
+
+typedef struct {
+	sbd_cmd32_t		i_cmd;
+	sbd_error32_t		i_err;
+} sbd_ioctl_arg32_t;
+
+typedef struct {
+	int32_t			t_base;
+	int32_t			t_bnd;
+	char			**t_text;
+} sbd_etab32_t;
+
+#endif	/* _SYSCALL32 */
+#endif	/* _ASM */
+
+/* Common error codes */
+
+#define	ESBD_NOERROR		0	/* no error */
+#define	ESBD_INTERNAL		1	/* Internal error */
+#define	ESBD_NOMEM		2	/* Insufficient memory */
+#define	ESBD_PROTO		3	/* Protocol error */
+#define	ESBD_BUSY		4	/* Device busy */
+#define	ESBD_NODEV		5	/* No such device */
+#define	ESBD_ALREADY		6	/* Operation already in progress */
+#define	ESBD_IO			7	/* I/O error */
+#define	ESBD_FAULT		8	/* Bad address */
+#define	ESBD_EMPTY_BD		9	/* No device(s) on board */
+#define	ESBD_INVAL		10	/* Invalid argument */
+#define	ESBD_STATE		11	/* Invalid state transition */
+#define	ESBD_FATAL_STATE	12	/* Device in fatal state */
+#define	ESBD_OUTSTANDING	13	/* Outstanding error */
+#define	ESBD_SUSPEND		14	/* Device failed to suspend */
+#define	ESBD_RESUME		15	/* Device failed to resume */
+#define	ESBD_UTHREAD		16	/* Cannot stop user thread */
+#define	ESBD_RTTHREAD		17	/* Cannot quiesce realtime thread */
+#define	ESBD_KTHREAD		18	/* Cannot stop kernel thread  */
+#define	ESBD_OFFLINE		19	/* Failed to off-line */
+#define	ESBD_ONLINE		20	/* Failed to on-line */
+#define	ESBD_CPUSTART		21	/* Failed to start CPU */
+#define	ESBD_CPUSTOP		22	/* Failed to stop CPU */
+#define	ESBD_INVAL_COMP		23	/* Invalid component type */
+#define	ESBD_KCAGE_OFF		24	/* Kernel cage is disabled */
+#define	ESBD_NO_TARGET		25	/* No available memory target */
+#define	ESBD_HW_PROGRAM		26	/* Hardware programming error */
+#define	ESBD_MEM_NOTVIABLE	27	/* VM viability test failed */
+#define	ESBD_MEM_REFUSED	28	/* Memory operation refused */
+#define	ESBD_MEM_NONRELOC	29	/* Non-relocatable pages in span */
+#define	ESBD_MEM_CANCELLED	30	/* Memory operation cancelled */
+#define	ESBD_MEMFAIL		31	/* Memory operation failed */
+#define	ESBD_MEMONLINE		32	/* Can't unconfig cpu if mem online */
+#define	ESBD_QUIESCE_REQD	33
+	/* Operator confirmation for quiesce is required */
+#define	ESBD_MEMINTLV		34
+	/* Memory is interleaved across boards */
+#define	ESBD_CPUONLINE		35
+	/* Can't config memory if not all cpus are online */
+#define	ESBD_UNSAFE		36	/* Unsafe driver present */
+#define	ESBD_INVAL_OPT		37	/* option invalid */
+
+/* Starcat error codes */
+
+#define	ESTC_NONE		1000	/* No error */
+#define	ESTC_GETPROP		1001	/* Cannot read property value */
+#define	ESTC_BNUM		1002	/* Invalid board number */
+#define	ESTC_CONFIGBUSY		1003
+	/* Cannot proceed; Board is configured or busy */
+#define	ESTC_PROBE		1004	/* Solaris failed to probe */
+#define	ESTC_DEPROBE		1005	/* Solaris failed to deprobe */
+#define	ESTC_MOVESIGB		1006	/* Firmware move-cpu0 failed */
+#define	ESTC_SUPPORT		1007	/* Operation not supported */
+#define	ESTC_DRVFAIL		1008	/* Device driver failure */
+#define	ESTC_UNKPTCMD		1012	/* Unrecognized platform command */
+#define	ESTC_NOTID		1013
+	/* drmach parameter is not a valid ID */
+#define	ESTC_INAPPROP		1014
+	/* drmach parameter is inappropriate for operation */
+#define	ESTC_INTERNAL		1015	/* Unexpected internal condition */
+#define	ESTC_MBXRQST		1016
+	/* Mailbox framework failure: outgoing */
+#define	ESTC_MBXRPLY		1017
+	/* Mailbox framework failure: incoming */
+#define	ESTC_NOACL		1018	/* Board is not in domain ACL */
+#define	ESTC_NOT_ASSIGNED	1019	/* Board is not assigned to domain */
+#define	ESTC_NOT_ACTIVE		1020	/* Board is not active */
+#define	ESTC_EMPTY_SLOT		1021	/* Slot is empty */
+#define	ESTC_POWER_OFF		1022	/* Board is powered off */
+#define	ESTC_TEST_IN_PROGRESS	1023	/* Board is already being tested */
+#define	ESTC_TESTING_BUSY	1024
+	/* Wait: All SC test resources are in use */
+#define	ESTC_TEST_REQUIRED	1025	/* Board requires test prior to use */
+#define	ESTC_TEST_ABORTED	1026	/* Board test has been aborted */
+#define	ESTC_MBOX_UNKNOWN	1027
+	/* Unknown error type received from SC */
+#define	ESTC_TEST_STATUS_UNKNOWN	1028
+	/* Test completed with unknown status */
+#define	ESTC_TEST_RESULT_UNKNOWN	1029
+	/* Unknown test result returned by SC */
+#define	ESTC_TEST_FAILED	1030
+	/* SMS hpost reported error, see POST log for details */
+#define	ESTC_UNAVAILABLE	1031	/* Slot is unavailable to the domain */
+#define	ESTC_NZ_LPA		1032	/* Nonzero LPA not yet supported */
+#define	ESTC_IOSWITCH		1033
+	/* Cannot unconfigure I/O board: tunnel switch failed */
+#define	ESTC_IOCAGE_NO_CPU_AVAIL	1034
+	/* No CPU available for I/O cage test. */
+#define	ESTC_SMS_ERR_RECOVERABLE	1035
+	/* SMS reported recoverable error: check SMS status and Retry */
+#define	ESTC_SMS_ERR_UNRECOVERABLE	1036
+	/* SMS reported unrecoverable error: Board is Unusable */
+#define	ESTC_NWSWITCH		1037
+	/* Cannot unconfigure I/O board: network switch failed */
+
+/* Starfire error codes */
+
+#define	ESTF_NONE		2000	/* No error */
+#define	ESTF_GETPROP		2001	/* Cannot read property value */
+#define	ESTF_GETPROPLEN		2002	/* Cannot determine property length */
+#define	ESTF_BNUM		2003	/* Invalid board number */
+#define	ESTF_CONFIGBUSY		2004
+	/* Cannot proceed; Board is configured or busy */
+#define	ESTF_NOCPUID		2005	/* No CPU specified for connect */
+#define	ESTF_PROBE		2006	/* Firmware probe failed */
+#define	ESTF_DEPROBE		2007	/* Firmware deprobe failed */
+#define	ESTF_MOVESIGB		2008	/* Firmware move-cpu0 failed */
+#define	ESTF_JUGGLE		2009	/* Cannot move SIGB assignment */
+#define	ESTF_HASSIGB		2010
+	/* Cannot disconnect CPU; SIGB is currently assigned */
+#define	ESTF_SUPPORT		2011	/* Operation not supported */
+#define	ESTF_DRVFAIL		2012	/* Device driver failure */
+#define	ESTF_SETCPUVAL		2013
+	/* Must specify a CPU on the given board */
+#define	ESTF_NODEV		2014	/* No such device */
+#define	ESTF_INTERBOARD		2015
+	/* Memory configured with inter-board interleaving */
+#define	ESTF_UNKPTCMD		2016	/* Unrecognized platform command */
+#define	ESTF_NOTID		2017	/* drmach parameter is not a valid ID */
+#define	ESTF_INAPPROP		2018
+	/* drmach parameter is inappropriate for operation */
+#define	ESTF_INTERNAL		2019	/* Unexpected internal condition */
+
+/* Daktari error codes */
+
+#define	EDAK_NONE		3000	/* no error */
+#define	EDAK_INTERNAL		3001	/* Internal error */
+#define	EDAK_NOFRUINFO		3002	/* Didn't receive fru info */
+#define	EDAK_NONDR_BOARD	3003
+	/* DR is not supported on this board type */
+#define	EDAK_POWERON		3004	/* Power on request failed */
+#define	EDAK_POWEROK		3005	/* Failed to power on */
+#define	EDAK_INTERRUPTED	3006	/* Operation interrupted */
+#define	EDAK_BOARDINIT		3007	/* Board initialization failed */
+#define	EDAK_CPUINIT		3008	/* CPU intialization failed */
+#define	EDAK_MEMFAIL		3009	/* Memory operation failed */
+
+/* Serengeti error codes */
+
+#define	ESGT_NONE		4000	/* no error */
+#define	ESGT_INTERNAL		4001	/* Internal error */
+#define	ESGT_INVAL		4002	/* Invalid argument */
+#define	ESGT_MEMFAIL		4003	/* Memory operation failed */
+#define	ESGT_PROBE		4004	/* Board probe failed */
+#define	ESGT_DEPROBE		4005	/* Board deprobe failed */
+#define	ESGT_JUGGLE_BOOTPROC	4006	/* Failed to juggle bootproc */
+#define	ESGT_NOT_CPUTYPE	4007	/* Not a cpu device */
+#define	ESGT_NO_DEV_TYPE	4008	/* Cannot find device type */
+#define	ESGT_BAD_PORTID		4009	/* Bad port id */
+#define	ESGT_RESUME		4010	/* Failed to resume device */
+#define	ESGT_SUSPEND		4011	/* Failed to suspend device */
+#define	ESGT_KTHREAD		4012	/* failed to stop kernel thd */
+#define	ESGT_UNSAFE		4013	/* unsafe */
+#define	ESGT_RTTHREAD		4014	/* real time threads */
+#define	ESGT_UTHREAD		4015	/* failed to stop user thd */
+#define	ESGT_PROM_ATTACH	4016	/* prom failed attach board */
+#define	ESGT_PROM_DETACH	4017	/* prom failed detach board */
+#define	ESGT_SC_ERR		4018	/* sc return a failure */
+#define	ESGT_GET_BOARD_STAT	4019	/* Failed to obtain board information */
+#define	ESGT_WAKEUPCPU		4020	/* Failed to wake up cpu */
+#define	ESGT_STOPCPU		4021	/* Failed to stop cpu */
+/* Serengeti SC return codes */
+#define	ESGT_HW_FAIL		4022	/* Hardware Failure */
+#define	ESGT_BD_ACCESS		4023	/* Board access denied */
+#define	ESGT_STALE_CMP		4024	/* Stale components */
+#define	ESGT_STALE_OBJ		4025	/* Stale objects */
+#define	ESGT_NO_SEPROM_SPACE	4026	/* No SEPROM space */
+#define	ESGT_NOT_SUPP		4027	/* Operation not supported */
+#define	ESGT_NO_MEM		4028	/* No Memory */
+
+/* OPL error codes */
+
+#define	EOPL_GETPROP		5001	/* Cannot read property value */
+#define	EOPL_BNUM		5002	/* Invalid board number */
+#define	EOPL_CONFIGBUSY		5003
+	/* Cannot proceed; Board is configured or busy */
+#define	EOPL_PROBE		5004	/* Firmware probe failed */
+#define	EOPL_DEPROBE		5005	/* Firmware deprobe failed */
+#define	EOPL_SUPPORT		5006	/* Operation not supported */
+#define	EOPL_DRVFAIL		5007	/* Device driver failure */
+#define	EOPL_UNKPTCMD		5008	/* Unrecognized platform command */
+#define	EOPL_NOTID		5009	/* drmach parameter is not a valid ID */
+#define	EOPL_INAPPROP		5010
+	/* drmach parameter is inappropriate for operation */
+#define	EOPL_INTERNAL		5011	/* Unexpected internal condition */
+#define	EOPL_FINDDEVICE		5012	/* Firmware cannot find node. */
+#define	EOPL_MC_SETUP		5013	/* Cannot setup memory node */
+#define	EOPL_CPU_STATE		5014	/* Invalid CPU/core state */
+#define	EOPL_MC_OPL		5015	/* Cannot find mc-opl interface */
+#define	EOPL_SCF_FMEM		5016	/* Cannot find scf_fmem interface */
+#define	EOPL_FMEM_SETUP		5017	/* Error setting up FMEM buffer */
+#define	EOPL_SCF_FMEM_START	5018	/* scf_fmem_start error */
+#define	EOPL_FMEM_ERROR		5019	/* FMEM error */
+#define	EOPL_SCF_FMEM_CANCEL	5020	/* scf_fmem_cancel error */
+#define	EOPL_FMEM_XC_TIMEOUT	5021	/* xcall timeout */
+#define	EOPL_FMEM_COPY_TIMEOUT	5022	/* DR parellel copy timeout */
+#define	EOPL_FMEM_SCF_BUSY	5023	/* SCF busy */
+#define	EOPL_FMEM_RETRY_OUT	5024	/* SCF IO Retry Error */
+#define	EOPL_FMEM_TIMEOUT	5025	/* FMEM command timeout */
+#define	EOPL_FMEM_HW_ERROR	5026	/* Hardware error */
+#define	EOPL_FMEM_TERMINATE	5027	/* FMEM operation Terminated */
+#define	EOPL_FMEM_COPY_ERROR	5028	/* Memory copy error */
+#define	EOPL_FMEM_SCF_ERR	5029	/* SCF error */
+#define	EOPL_MIXED_CPU		5030
+	/* Cannot add SPARC64-VI to domain booted with all SPARC64-VII CPUs */
+#define	EOPL_FMEM_SCF_OFFLINE	5031	/* SCF OFFLINE */
+
+/* X86 error codes */
+
+#define	EX86_GETPROP		10001	/* Cannot read property value */
+#define	EX86_BNUM		10002	/* Invalid board number */
+#define	EX86_NOTID		10003	/* drmach parameter is not a valid ID */
+#define	EX86_INAPPROP		10004
+	/* drmach parameter is inappropriate for operation */
+#define	EX86_PROBE		10005	/* Firmware probe failed */
+#define	EX86_DEPROBE		10006	/* Firmware deprobe failed */
+#define	EX86_SUPPORT		10007	/* Operation not supported */
+#define	EX86_INTERNAL		10008	/* Unexpected internal condition */
+#define	EX86_CONFIGBUSY		10009
+	/* Cannot proceed, board is configured or busy */
+#define	EX86_POWERBUSY		10010	/* Cannot proceed, board is powered */
+#define	EX86_CONNECTBUSY	10011	/* Cannot proceed, board is connected */
+#define	EX86_INVALID_ARG	10012	/* Invalid argument */
+#define	EX86_DRVFAIL		10013	/* Device driver failure */
+#define	EX86_UNKPTCMD		10014	/* Unrecognized platform command */
+#define	EX86_ALLOC_CPUID	10015	/* Failed to allocate processor id */
+#define	EX86_FREE_CPUID		10016	/* Failed to release processor id */
+#define	EX86_POWERON		10017	/* Failed to power on board */
+#define	EX86_POWEROFF		10018	/* Failed to power off board */
+#define	EX86_MC_SETUP		10019	/* Cannot setup memory node */
+#define	EX86_ACPIWALK		10020	/* Cannot walk ACPI namespace */
+#define	EX86_WALK_DEPENDENCY	10021
+	/* Failed to check dependency for board */
+#define	EX86_IN_FAILURE		10022	/* Board is in failure state */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SBD_IOCTL_H */
diff --git a/usr/src/uts/i86pc/sys/x_call.h b/usr/src/uts/i86pc/sys/x_call.h
index ca778a4c21..65b5813b95 100644
--- a/usr/src/uts/i86pc/sys/x_call.h
+++ b/usr/src/uts/i86pc/sys/x_call.h
@@ -68,6 +68,8 @@ typedef struct xc_msg {
 #if defined(_KERNEL)
 
 extern void	xc_init_cpu(struct cpu *);
+extern void	xc_fini_cpu(struct cpu *);
+extern int	xc_flush_cpu(struct cpu *);
 extern uint_t	xc_serv(caddr_t, caddr_t);
 
 #define	CPUSET2BV(set)	((ulong_t *)(void *)&(set))
diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c
index ade54caab2..6ff1216d76 100644
--- a/usr/src/uts/i86pc/vm/hat_i86.c
+++ b/usr/src/uts/i86pc/vm/hat_i86.c
@@ -22,6 +22,10 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 
 /*
@@ -188,6 +192,10 @@ int pt_kern;
 extern void atomic_orb(uchar_t *addr, uchar_t val);
 extern void atomic_andb(uchar_t *addr, uchar_t val);
 
+#ifndef __xpv
+extern pfn_t memseg_get_start(struct memseg *);
+#endif
+
 #define	PP_GETRM(pp, rmmask)    (pp->p_nrm & rmmask)
 #define	PP_ISMOD(pp)		PP_GETRM(pp, P_MOD)
 #define	PP_ISREF(pp)		PP_GETRM(pp, P_REF)
@@ -4050,8 +4058,8 @@ hat_cpu_offline(struct cpu *cpup)
 	ASSERT(cpup != CPU);
 
 	CPUSET_ATOMIC_DEL(khat_cpuset, cpup->cpu_id);
-	x86pte_cpu_fini(cpup);
 	hat_vlp_teardown(cpup);
+	x86pte_cpu_fini(cpup);
 }
 
 /*
@@ -4363,7 +4371,96 @@ void
 hat_kpm_mseghash_update(pgcnt_t inx, struct memseg *msp)
 {}
 
-#ifdef __xpv
+#ifndef	__xpv
+void
+hat_kpm_addmem_mseg_update(struct memseg *msp, pgcnt_t nkpmpgs,
+	offset_t kpm_pages_off)
+{
+	_NOTE(ARGUNUSED(nkpmpgs, kpm_pages_off));
+	pfn_t base, end;
+
+	/*
+	 * kphysm_add_memory_dynamic() does not set nkpmpgs
+	 * when page_t memory is externally allocated.  That
+	 * code must properly calculate nkpmpgs in all cases
+	 * if nkpmpgs needs to be used at some point.
+	 */
+
+	/*
+	 * The meta (page_t) pages for dynamically added memory are allocated
+	 * either from the incoming memory itself or from existing memory.
+	 * In the former case the base of the incoming pages will be different
+	 * than the base of the dynamic segment so call memseg_get_start() to
+	 * get the actual base of the incoming memory for each case.
+	 */
+
+	base = memseg_get_start(msp);
+	end = msp->pages_end;
+
+	hat_devload(kas.a_hat, kpm_vbase + mmu_ptob(base),
+	    mmu_ptob(end - base), base, PROT_READ | PROT_WRITE,
+	    HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
+}
+
+void
+hat_kpm_addmem_mseg_insert(struct memseg *msp)
+{
+	_NOTE(ARGUNUSED(msp));
+}
+
+void
+hat_kpm_addmem_memsegs_update(struct memseg *msp)
+{
+	_NOTE(ARGUNUSED(msp));
+}
+
+/*
+ * Return end of metadata for an already setup memseg.
+ * X86 platforms don't need per-page meta data to support kpm.
+ */
+caddr_t
+hat_kpm_mseg_reuse(struct memseg *msp)
+{
+	return ((caddr_t)msp->epages);
+}
+
+void
+hat_kpm_delmem_mseg_update(struct memseg *msp, struct memseg **mspp)
+{
+	_NOTE(ARGUNUSED(msp, mspp));
+	ASSERT(0);
+}
+
+void
+hat_kpm_split_mseg_update(struct memseg *msp, struct memseg **mspp,
+	struct memseg *lo, struct memseg *mid, struct memseg *hi)
+{
+	_NOTE(ARGUNUSED(msp, mspp, lo, mid, hi));
+	ASSERT(0);
+}
+
+/*
+ * Walk the memsegs chain, applying func to each memseg span.
+ */
+void
+hat_kpm_walk(void (*func)(void *, void *, size_t), void *arg)
+{
+	pfn_t	pbase, pend;
+	void	*base;
+	size_t	size;
+	struct memseg *msp;
+
+	for (msp = memsegs; msp; msp = msp->next) {
+		pbase = msp->pages_base;
+		pend = msp->pages_end;
+		base = ptob(pbase) + kpm_vbase;
+		size = ptob(pend - pbase);
+		func(arg, base, size);
+	}
+}
+
+#else	/* __xpv */
+
 /*
  * There are specific Hypervisor calls to establish and remove mappings
  * to grant table references and the privcmd driver. We have to ensure
@@ -4406,5 +4503,5 @@ hat_release_mapping(hat_t *hat, caddr_t addr)
 	htable_release(ht);
 	htable_release(ht);
 	XPV_ALLOW_MIGRATE();
-									}
-#endif
+}
+#endif	/* __xpv */
diff --git a/usr/src/uts/i86pc/vm/vm_dep.h b/usr/src/uts/i86pc/vm/vm_dep.h
index ae2b6c12c1..753aa9d146 100644
--- a/usr/src/uts/i86pc/vm/vm_dep.h
+++ b/usr/src/uts/i86pc/vm/vm_dep.h
@@ -22,6 +22,10 @@
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 /*
  * UNIX machine dependent virtual memory support.
@@ -71,7 +75,14 @@ extern void plcnt_inc_dec(page_t *, int, int, long, int);
  */
 #define	PLCNT_XFER_NORELOC(pp)
 
-#define	PLCNT_MODIFY_MAX(pfn, cnt)	mtype_modify_max(pfn, (pgcnt_t)cnt)
+/*
+ * macro to modify the page list max counts when memory is added to
+ * the page lists during startup (add_physmem) or during a DR operation
+ * when memory is added (kphysm_add_memory_dynamic) or deleted
+ * (kphysm_del_cleanup).
+ */
+#define	PLCNT_MODIFY_MAX(pfn, cnt)	mtype_modify_max(pfn, cnt)
+
 extern int memrange_num(pfn_t);
 extern int pfn_2_mtype(pfn_t);
 extern int mtype_func(int, int, uint_t);
@@ -82,7 +93,7 @@ extern int mnode_range_cnt(int);
 /*
  * candidate counters in vm_pagelist.c are indexed by color and range
  */
-#define	NUM_MEM_RANGES	4		/* memory range types */
+#define	NUM_MEM_RANGES		4		/* memory range types */
 #define	MAX_MNODE_MRANGES	NUM_MEM_RANGES
 #define	MNODE_RANGE_CNT(mnode)	mnode_range_cnt(mnode)
 #define	MNODE_MAX_MRANGE(mnode)	memrange_num(mem_node_config[mnode].physbase)
@@ -161,6 +172,25 @@ extern page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int);
 		rw_exit(&page_ctrs_rwlock[(mnode)]);		\
 	}
 
+/*
+ * macro to call page_ctrs_adjust() when memory is added
+ * during a DR operation.
+ */
+#define	PAGE_CTRS_ADJUST(pfn, cnt, rv) {				       \
+	spgcnt_t _cnt = (spgcnt_t)(cnt);				       \
+	int _mn;							       \
+	pgcnt_t _np;							       \
+	pfn_t _pfn = (pfn);						       \
+	pfn_t _endpfn = _pfn + _cnt;					       \
+	while (_pfn < _endpfn) {					       \
+		_mn = PFN_2_MEM_NODE(_pfn);				       \
+		_np = MIN(_endpfn, mem_node_config[_mn].physmax + 1) - _pfn;   \
+		_pfn += _np;						       \
+		if ((rv = page_ctrs_adjust(_mn)) != 0)			       \
+			break;						       \
+	}								       \
+}
+
 #define	PAGE_GET_COLOR_SHIFT(szc, nszc)				\
 	    (hw_page_array[(nszc)].hp_shift - hw_page_array[(szc)].hp_shift)
 
@@ -524,6 +554,7 @@ extern page_t *page_freelist_split(uchar_t,
     uint_t, int, int, pfn_t, pfn_t, page_list_walker_t *);
 extern page_t *page_freelist_coalesce(int, uchar_t, uint_t, uint_t, int,
     pfn_t);
+extern void page_freelist_coalesce_all(int);
 extern uint_t page_get_pagecolors(uint_t);
 extern void pfnzero(pfn_t, uint_t, uint_t);
 
diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c
index 63c8129ddf..79c0ee073e 100644
--- a/usr/src/uts/i86pc/vm/vm_machdep.c
+++ b/usr/src/uts/i86pc/vm/vm_machdep.c
@@ -22,6 +22,10 @@
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
 
 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
 /*	All Rights Reserved   */
@@ -127,7 +131,6 @@ int largepagesupport = 0;
 extern uint_t page_create_new;
 extern uint_t page_create_exists;
 extern uint_t page_create_putbacks;
-extern uint_t page_create_putbacks;
 /*
  * Allow users to disable the kernel's use of SSE.
  */
@@ -142,6 +145,8 @@ typedef struct {
 	pfn_t	mnr_pfnhi;
 	int	mnr_mnode;
 	int	mnr_memrange;		/* index into memranges[] */
+	int	mnr_next;		/* next lower PA mnoderange */
+	int	mnr_exists;
 	/* maintain page list stats */
 	pgcnt_t	mnr_mt_clpgcnt;		/* cache list cnt */
 	pgcnt_t	mnr_mt_flpgcnt[MMU_PAGE_SIZES];	/* free list cnt per szc */
@@ -175,6 +180,11 @@ typedef struct {
  */
 #define	PFN_4GIG	0x100000
 #define	PFN_16MEG	0x1000
+/* Indices into the memory range (arch_memranges) array. */
+#define	MRI_4G		0
+#define	MRI_2G		1
+#define	MRI_16M		2
+#define	MRI_0		3
 static pfn_t arch_memranges[NUM_MEM_RANGES] = {
     PFN_4GIG,	/* pfn range for 4G and above */
     0x80000,	/* pfn range for 2G-4G */
@@ -191,6 +201,8 @@ int nranges = NUM_MEM_RANGES;
 mnoderange_t	*mnoderanges;
 int		mnoderangecnt;
 int		mtype4g;
+int		mtype16m;
+int		mtypetop;	/* index of highest pfn'ed mnoderange */
 
 /*
  * 4g memory management variables for systems with more than 4g of memory:
@@ -220,7 +232,6 @@ int		mtype4g;
  * for requests that don't specifically require it.
  */
 
-#define	LOTSFREE4G	(maxmem4g >> lotsfree4gshift)
 #define	DESFREE4G	(maxmem4g >> desfree4gshift)
 
 #define	RESTRICT4G_ALLOC					\
@@ -230,7 +241,6 @@ static pgcnt_t	maxmem4g;
 static pgcnt_t	freemem4g;
 static int	physmax4g;
 static int	desfree4gshift = 4;	/* maxmem4g shift to derive DESFREE4G */
-static int	lotsfree4gshift = 3;
 
 /*
  * 16m memory management:
@@ -246,7 +256,7 @@ static int	lotsfree4gshift = 3;
  * are not restricted.
  */
 
-#define	FREEMEM16M	MTYPE_FREEMEM(0)
+#define	FREEMEM16M	MTYPE_FREEMEM(mtype16m)
 #define	DESFREE16M	desfree16m
 #define	RESTRICT16M_ALLOC(freemem, pgcnt, flags)		\
 	((freemem != 0) && ((flags & PG_PANIC) == 0) &&		\
@@ -337,6 +347,9 @@ hw_pagesize_t hw_page_array[MAX_NUM_LEVEL + 1];
 kmutex_t	*fpc_mutex[NPC_MUTEX];
 kmutex_t	*cpc_mutex[NPC_MUTEX];
 
+/* Lock to protect mnoderanges array for memory DR operations. */
+static kmutex_t mnoderange_lock;
+
 /*
  * Only let one thread at a time try to coalesce large pages, to
  * prevent them from working against each other.
@@ -951,7 +964,8 @@ pfn_2_mtype(pfn_t pfn)
 #else
 	int	n;
 
-	for (n = mnoderangecnt - 1; n >= 0; n--) {
+	/* Always start from highest pfn and work our way down */
+	for (n = mtypetop; n != -1; n = mnoderanges[n].mnr_next) {
 		if (pfn >= mnoderanges[n].mnr_pfnlo) {
 			break;
 		}
@@ -1063,7 +1077,7 @@ retry:
  * verify that pages being returned from allocator have correct DMA attribute
  */
 #ifndef DEBUG
-#define	check_dma(a, b, c) (0)
+#define	check_dma(a, b, c) (void)(0)
 #else
 static void
 check_dma(ddi_dma_attr_t *dma_attr, page_t *pp, int cnt)
@@ -1253,7 +1267,12 @@ mnode_range_cnt(int mnode)
 void
 mnode_range_setup(mnoderange_t *mnoderanges)
 {
+	mnoderange_t *mp = mnoderanges;
 	int	mnode, mri;
+	int	mindex = 0;	/* current index into mnoderanges array */
+	int	i, j;
+	pfn_t	hipfn;
+	int	last, hi;
 
 	for (mnode = 0; mnode < max_mem_nodes; mnode++) {
 		if (mem_node_config[mnode].exists == 0)
@@ -1272,26 +1291,168 @@ mnode_range_setup(mnoderange_t *mnoderanges)
 			    mem_node_config[mnode].physmax);
 			mnoderanges->mnr_mnode = mnode;
 			mnoderanges->mnr_memrange = mri;
+			mnoderanges->mnr_exists = 1;
 			mnoderanges++;
+			mindex++;
 			if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
 				mri--;
 			else
 				break;
 		}
 	}
+
+	/*
+	 * For now do a simple sort of the mnoderanges array to fill in
+	 * the mnr_next fields.  Since mindex is expected to be relatively
+	 * small, using a simple O(N^2) algorithm.
+	 */
+	for (i = 0; i < mindex; i++) {
+		if (mp[i].mnr_pfnlo == 0)	/* find lowest */
+			break;
+	}
+	ASSERT(i < mindex);
+	last = i;
+	mtype16m = last;
+	mp[last].mnr_next = -1;
+	for (i = 0; i < mindex - 1; i++) {
+		hipfn = (pfn_t)(-1);
+		hi = -1;
+		/* find next highest mnode range */
+		for (j = 0; j < mindex; j++) {
+			if (mp[j].mnr_pfnlo > mp[last].mnr_pfnlo &&
+			    mp[j].mnr_pfnlo < hipfn) {
+				hipfn = mp[j].mnr_pfnlo;
+				hi = j;
+			}
+		}
+		mp[hi].mnr_next = last;
+		last = hi;
+	}
+	mtypetop = last;
 }
 
+#ifndef	__xpv
+/*
+ * Update mnoderanges for memory hot-add DR operations.
+ */
+static void
+mnode_range_add(int mnode)
+{
+	int	*prev;
+	int	n, mri;
+	pfn_t	start, end;
+	extern	void membar_sync(void);
+
+	ASSERT(0 <= mnode && mnode < max_mem_nodes);
+	ASSERT(mem_node_config[mnode].exists);
+	start = mem_node_config[mnode].physbase;
+	end = mem_node_config[mnode].physmax;
+	ASSERT(start <= end);
+	mutex_enter(&mnoderange_lock);
+
+#ifdef	DEBUG
+	/* Check whether it interleaves with other memory nodes. */
+	for (n = mtypetop; n != -1; n = mnoderanges[n].mnr_next) {
+		ASSERT(mnoderanges[n].mnr_exists);
+		if (mnoderanges[n].mnr_mnode == mnode)
+			continue;
+		ASSERT(start > mnoderanges[n].mnr_pfnhi ||
+		    end < mnoderanges[n].mnr_pfnlo);
+	}
+#endif	/* DEBUG */
+
+	mri = nranges - 1;
+	while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase)
+		mri--;
+	while (mri >= 0 && mem_node_config[mnode].physmax >= MEMRANGELO(mri)) {
+		/* Check whether mtype already exists. */
+		for (n = mtypetop; n != -1; n = mnoderanges[n].mnr_next) {
+			if (mnoderanges[n].mnr_mnode == mnode &&
+			    mnoderanges[n].mnr_memrange == mri) {
+				mnoderanges[n].mnr_pfnlo = MAX(MEMRANGELO(mri),
+				    start);
+				mnoderanges[n].mnr_pfnhi = MIN(MEMRANGEHI(mri),
+				    end);
+				break;
+			}
+		}
+
+		/* Add a new entry if it doesn't exist yet. */
+		if (n == -1) {
+			/* Try to find an unused entry in mnoderanges array. */
+			for (n = 0; n < mnoderangecnt; n++) {
+				if (mnoderanges[n].mnr_exists == 0)
+					break;
+			}
+			ASSERT(n < mnoderangecnt);
+			mnoderanges[n].mnr_pfnlo = MAX(MEMRANGELO(mri), start);
+			mnoderanges[n].mnr_pfnhi = MIN(MEMRANGEHI(mri), end);
+			mnoderanges[n].mnr_mnode = mnode;
+			mnoderanges[n].mnr_memrange = mri;
+			mnoderanges[n].mnr_exists = 1;
+			/* Page 0 should always be present. */
+			for (prev = &mtypetop;
+			    mnoderanges[*prev].mnr_pfnlo > start;
+			    prev = &mnoderanges[*prev].mnr_next) {
+				ASSERT(mnoderanges[*prev].mnr_next >= 0);
+				ASSERT(mnoderanges[*prev].mnr_pfnlo > end);
+			}
+			mnoderanges[n].mnr_next = *prev;
+			membar_sync();
+			*prev = n;
+		}
+
+		if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
+			mri--;
+		else
+			break;
+	}
+
+	mutex_exit(&mnoderange_lock);
+}
+
+/*
+ * Update mnoderanges for memory hot-removal DR operations.
+ */
+static void
+mnode_range_del(int mnode)
+{
+	_NOTE(ARGUNUSED(mnode));
+	ASSERT(0 <= mnode && mnode < max_mem_nodes);
+	/* TODO: support deletion operation. */
+	ASSERT(0);
+}
+
+void
+plat_slice_add(pfn_t start, pfn_t end)
+{
+	mem_node_add_slice(start, end);
+	if (plat_dr_enabled()) {
+		mnode_range_add(PFN_2_MEM_NODE(start));
+	}
+}
+
+void
+plat_slice_del(pfn_t start, pfn_t end)
+{
+	ASSERT(PFN_2_MEM_NODE(start) == PFN_2_MEM_NODE(end));
+	ASSERT(plat_dr_enabled());
+	mnode_range_del(PFN_2_MEM_NODE(start));
+	mem_node_del_slice(start, end);
+}
+#endif	/* __xpv */
+
 /*ARGSUSED*/
 int
 mtype_init(vnode_t *vp, caddr_t vaddr, uint_t *flags, size_t pgsz)
 {
-	int mtype = mnoderangecnt - 1;
+	int mtype = mtypetop;
 
 #if !defined(__xpv)
 #if defined(__i386)
 	/*
 	 * set the mtype range
-	 * - kmem requests needs to be below 4g if restricted_kmemalloc is set.
+	 * - kmem requests need to be below 4g if restricted_kmemalloc is set.
 	 * - for non kmem requests, set range to above 4g if memory below 4g
 	 * runs low.
 	 */
@@ -1334,8 +1495,8 @@ mtype_init(vnode_t *vp, caddr_t vaddr, uint_t *flags, size_t pgsz)
 int
 mtype_pgr_init(int *flags, page_t *pp, int mnode, pgcnt_t pgcnt)
 {
-	int mtype = mnoderangecnt - 1;
-#if !defined(__ixpv)
+	int mtype = mtypetop;
+#if !defined(__xpv)
 	if (RESTRICT16M_ALLOC(freemem, pgcnt, *flags)) {
 		*flags |= PGI_MT_RANGE16M;
 	} else {
@@ -1349,7 +1510,7 @@ mtype_pgr_init(int *flags, page_t *pp, int mnode, pgcnt_t pgcnt)
 /*
  * Determine if the mnode range specified in mtype contains memory belonging
  * to memory node mnode.  If flags & PGI_MT_RANGE is set then mtype contains
- * the range of indices from high pfn to 0, 16m or 4g.
+ * the range from high pfn to 0, 16m or 4g.
  *
  * Return first mnode range type index found otherwise return -1 if none found.
  */
@@ -1357,18 +1518,20 @@ int
 mtype_func(int mnode, int mtype, uint_t flags)
 {
 	if (flags & PGI_MT_RANGE) {
-		int	mtlim = 0;
+		int	mnr_lim = MRI_0;
 
-		if (flags & PGI_MT_NEXT)
-			mtype--;
+		if (flags & PGI_MT_NEXT) {
+			mtype = mnoderanges[mtype].mnr_next;
+		}
 		if (flags & PGI_MT_RANGE4G)
-			mtlim = mtype4g + 1;	/* exclude 0-4g range */
+			mnr_lim = MRI_4G;	/* exclude 0-4g range */
 		else if (flags & PGI_MT_RANGE16M)
-			mtlim = 1;		/* exclude 0-16m range */
-		while (mtype >= mtlim) {
+			mnr_lim = MRI_16M;	/* exclude 0-16m range */
+		while (mtype != -1 &&
+		    mnoderanges[mtype].mnr_memrange <= mnr_lim) {
 			if (mnoderanges[mtype].mnr_mnode == mnode)
 				return (mtype);
-			mtype--;
+			mtype = mnoderanges[mtype].mnr_next;
 		}
 	} else if (mnoderanges[mtype].mnr_mnode == mnode) {
 		return (mtype);
@@ -1378,34 +1541,40 @@ mtype_func(int mnode, int mtype, uint_t flags)
 
 /*
  * Update the page list max counts with the pfn range specified by the
- * input parameters.  Called from add_physmem() when physical memory with
- * page_t's are initially added to the page lists.
+ * input parameters.
  */
 void
 mtype_modify_max(pfn_t startpfn, long cnt)
 {
-	int	mtype = 0;
-	pfn_t	endpfn = startpfn + cnt, pfn;
-	pgcnt_t	inc;
-
-	ASSERT(cnt > 0);
+	int		mtype;
+	pgcnt_t		inc;
+	spgcnt_t	scnt = (spgcnt_t)(cnt);
+	pgcnt_t		acnt = ABS(scnt);
+	pfn_t		endpfn = startpfn + acnt;
+	pfn_t		pfn, lo;
 
 	if (!physmax4g)
 		return;
 
-	for (pfn = startpfn; pfn < endpfn; ) {
-		if (pfn <= mnoderanges[mtype].mnr_pfnhi) {
-			if (endpfn < mnoderanges[mtype].mnr_pfnhi) {
-				inc = endpfn - pfn;
+	mtype = mtypetop;
+	for (pfn = endpfn; pfn > startpfn; ) {
+		ASSERT(mtype != -1);
+		lo = mnoderanges[mtype].mnr_pfnlo;
+		if (pfn > lo) {
+			if (startpfn >= lo) {
+				inc = pfn - startpfn;
 			} else {
-				inc = mnoderanges[mtype].mnr_pfnhi - pfn + 1;
+				inc = pfn - lo;
+			}
+			if (mnoderanges[mtype].mnr_memrange != MRI_4G) {
+				if (scnt > 0)
+					maxmem4g += inc;
+				else
+					maxmem4g -= inc;
 			}
-			if (mtype <= mtype4g)
-				maxmem4g += inc;
-			pfn += inc;
+			pfn -= inc;
 		}
-		mtype++;
-		ASSERT(mtype < mnoderangecnt || pfn >= endpfn);
+		mtype = mnoderanges[mtype].mnr_next;
 	}
 }
 
@@ -1418,6 +1587,7 @@ mtype_2_mrange(int mtype)
 void
 mnodetype_2_pfn(int mnode, int mtype, pfn_t *pfnlo, pfn_t *pfnhi)
 {
+	_NOTE(ARGUNUSED(mnode));
 	ASSERT(mnoderanges[mtype].mnr_mnode == mnode);
 	*pfnlo = mnoderanges[mtype].mnr_pfnlo;
 	*pfnhi = mnoderanges[mtype].mnr_pfnhi;
@@ -1462,6 +1632,7 @@ plcnt_init(caddr_t addr)
 void
 plcnt_inc_dec(page_t *pp, int mtype, int szc, long cnt, int flags)
 {
+	_NOTE(ARGUNUSED(pp));
 #ifdef DEBUG
 	int	bin = PP_2_BIN(pp);
 
@@ -1470,7 +1641,7 @@ plcnt_inc_dec(page_t *pp, int mtype, int szc, long cnt, int flags)
 	    cnt);
 #endif
 	ASSERT(mtype == PP_2_MTYPE(pp));
-	if (physmax4g && mtype <= mtype4g)
+	if (physmax4g && mnoderanges[mtype].mnr_memrange != MRI_4G)
 		atomic_add_long(&freemem4g, cnt);
 	if (flags & PG_CACHE_LIST)
 		atomic_add_long(&mnoderanges[mtype].mnr_mt_clpgcnt, cnt);
@@ -1485,7 +1656,7 @@ plcnt_inc_dec(page_t *pp, int mtype, int szc, long cnt, int flags)
 int
 mnode_pgcnt(int mnode)
 {
-	int	mtype = mnoderangecnt - 1;
+	int	mtype = mtypetop;
 	int	flags = PGI_MT_RANGE0;
 	pgcnt_t	pgcnt = 0;
 
@@ -1505,6 +1676,7 @@ mnode_pgcnt(int mnode)
 size_t
 page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc)
 {
+	_NOTE(ARGUNUSED(l2_linesz));
 	size_t	colorsz = 0;
 	int	i;
 	int	colors;
@@ -1519,14 +1691,19 @@ page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc)
 	/*
 	 * Reduce the memory ranges lists if we don't have large amounts
 	 * of memory. This avoids searching known empty free lists.
+	 * To support memory DR operations, we need to keep memory ranges
+	 * for possible memory hot-add operations.
 	 */
-	i = memrange_num(physmax);
+	if (plat_dr_physmax > physmax)
+		i = memrange_num(plat_dr_physmax);
+	else
+		i = memrange_num(physmax);
 #if defined(__i386)
-	if (i > 0)
+	if (i > MRI_4G)
 		restricted_kmemalloc = 0;
 #endif
 	/* physmax greater than 4g */
-	if (i == 0)
+	if (i == MRI_4G)
 		physmax4g = 1;
 #endif /* !__xpv */
 	memranges += i;
@@ -1619,6 +1796,16 @@ page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc)
 	/* size for mnoderanges */
 	for (mnoderangecnt = 0, i = 0; i < max_mem_nodes; i++)
 		mnoderangecnt += mnode_range_cnt(i);
+	if (plat_dr_support_memory()) {
+		/*
+		 * Reserve enough space for memory DR operations.
+		 * Two extra mnoderanges for possbile fragmentations,
+		 * one for the 2G boundary and the other for the 4G boundary.
+		 * We don't expect a memory board crossing the 16M boundary
+		 * for memory hot-add operations on x86 platforms.
+		 */
+		mnoderangecnt += 2 + max_mem_nodes - lgrp_plat_node_cnt;
+	}
 	colorsz = mnoderangecnt * sizeof (mnoderange_t);
 
 	/* size for fpc_mutex and cpc_mutex */
@@ -3122,8 +3309,8 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
 	 * ordering.
 	 */
 	if (dma_attr == NULL) {
-		n = 0;
-		m = mnoderangecnt - 1;
+		n = mtype16m;
+		m = mtypetop;
 		fullrange = 1;
 		VM_STAT_ADD(pga_vmstats.pga_nulldmaattr);
 	} else {
@@ -3136,6 +3323,10 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
 		if (dma_attr->dma_attr_align > MMU_PAGESIZE)
 			return (NULL);
 
+		/* Sanity check the dma_attr */
+		if (pfnlo > pfnhi)
+			return (NULL);
+
 		n = pfn_2_mtype(pfnlo);
 		m = pfn_2_mtype(pfnhi);
 
@@ -3144,13 +3335,10 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
 	}
 	VM_STAT_COND_ADD(fullrange == 0, pga_vmstats.pga_notfullrange);
 
-	if (n > m)
-		return (NULL);
-
 	szc = 0;
 
-	/* cylcing thru mtype handled by RANGE0 if n == 0 */
-	if (n == 0) {
+	/* cylcing thru mtype handled by RANGE0 if n == mtype16m */
+	if (n == mtype16m) {
 		flags |= PGI_MT_RANGE0;
 		n = m;
 	}
@@ -3164,7 +3352,8 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
 		/*
 		 * allocate pages from high pfn to low.
 		 */
-		for (mtype = m; mtype >= n; mtype--) {
+		mtype = m;
+		do {
 			if (fullrange != 0) {
 				pp = page_get_mnode_freelist(mnode,
 				    bin, mtype, szc, flags);
@@ -3181,7 +3370,8 @@ page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
 				check_dma(dma_attr, pp, 1);
 				return (pp);
 			}
-		}
+		} while (mtype != n &&
+		    (mtype = mnoderanges[mtype].mnr_next) != -1);
 		if (!local_failed_stat) {
 			lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1);
 			local_failed_stat = 1;
diff --git a/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c b/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c
index f0a257d1fd..8bc46f8e3e 100644
--- a/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c
+++ b/usr/src/uts/i86xpv/cpu/generic_cpu/gcpu_poll_xpv.c
@@ -161,6 +161,12 @@ gcpu_mca_poll_init(cmi_hdl_t hdl)
 	}
 }
 
+/* deconfigure gcpu_mca_poll_init() */
+void
+gcpu_mca_poll_fini(cmi_hdl_t hdl)
+{
+}
+
 void
 gcpu_mca_poll_start(cmi_hdl_t hdl)
 {
diff --git a/usr/src/uts/i86xpv/io/psm/xpv_psm.c b/usr/src/uts/i86xpv/io/psm/xpv_psm.c
index 07db030ed5..c59d1015a0 100644
--- a/usr/src/uts/i86xpv/io/psm/xpv_psm.c
+++ b/usr/src/uts/i86xpv/io/psm/xpv_psm.c
@@ -24,7 +24,7 @@
  * Use is subject to license terms.
  */
 
-#define	PSMI_1_6
+#define	PSMI_1_7
 
 #include <sys/mutex.h>
 #include <sys/types.h>
@@ -1672,7 +1672,8 @@ static struct psm_ops xen_psm_ops = {
 	(void (*)(void *arg))NULL,		/* psm_post_cyclic_setup */
 	(void (*)(int, int))NULL,		/* psm_preshutdown	*/
 	xen_intr_ops,			/* Advanced DDI Interrupt framework */
-	(int (*)(psm_state_request_t *))NULL	/* psm_state		*/
+	(int (*)(psm_state_request_t *))NULL,	/* psm_state		*/
+	(int (*)(psm_cpu_request_t *))NULL	/* psm_cpu_ops		*/
 };
 
 static struct psm_info xen_psm_info = {
diff --git a/usr/src/uts/i86xpv/io/psm/xpv_uppc.c b/usr/src/uts/i86xpv/io/psm/xpv_uppc.c
index 9427ead955..06049ed9e0 100644
--- a/usr/src/uts/i86xpv/io/psm/xpv_uppc.c
+++ b/usr/src/uts/i86xpv/io/psm/xpv_uppc.c
@@ -24,7 +24,7 @@
  * Use is subject to license terms.
  */
 
-#define	PSMI_1_6
+#define	PSMI_1_7
 
 #include <sys/mutex.h>
 #include <sys/types.h>
@@ -891,7 +891,8 @@ static struct psm_ops xen_uppc_ops = {
 
 	(int (*)(dev_info_t *, ddi_intr_handle_impl_t *,
 	    psm_intr_op_t, int *))NULL,		/* psm_intr_ops		*/
-	(int (*)(psm_state_request_t *))NULL	/* psm_state		*/
+	(int (*)(psm_state_request_t *))NULL,	/* psm_state		*/
+	(int (*)(psm_cpu_request_t *))NULL	/* psm_cpu_ops		*/
 };
 
 static struct psm_info xen_uppc_info = {
diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s
index 87e1323772..807cb8f790 100644
--- a/usr/src/uts/intel/ia32/ml/modstubs.s
+++ b/usr/src/uts/intel/ia32/ml/modstubs.s
@@ -1359,6 +1359,17 @@ fcnname/**/_info:							\
 	END_MODULE(acpica);
 #endif
 
+/*
+ * Stubs for acpidev
+ */
+#ifndef ACPIDEV_MODULE
+	MODULE(acpidev,misc);
+	NO_UNLOAD_STUB(acpidev, acpidev_dr_get_cpu_numa_info, nomod_minus_one) ;
+	NO_UNLOAD_STUB(acpidev, acpidev_dr_free_cpu_numa_info,
+	    nomod_minus_one) ;
+	END_MODULE(acpidev);
+#endif
+
 #ifndef IPNET_MODULE
 	MODULE(ipnet,drv);
 	STUB(ipnet, ipnet_if_getdev, nomod_zero);
diff --git a/usr/src/uts/intel/ia32/os/sundep.c b/usr/src/uts/intel/ia32/os/sundep.c
index 9aca9f4ea1..13f04658aa 100644
--- a/usr/src/uts/intel/ia32/os/sundep.c
+++ b/usr/src/uts/intel/ia32/os/sundep.c
@@ -115,9 +115,10 @@ check_boot_version(int boots_version)
  * 1) the pfn of the highest installed physical page,
  * 2) the number of pages installed
  * 3) the number of distinct contiguous regions these pages fall into.
+ * 4) the number of contiguous memory ranges
  */
 void
-installed_top_size(
+installed_top_size_ex(
 	struct memlist *list,	/* pointer to start of installed list */
 	pfn_t *high_pfn,	/* return ptr for top value */
 	pgcnt_t *pgcnt,		/* return ptr for sum of installed pages */
@@ -141,6 +142,21 @@ installed_top_size(
 	*ranges = cnt;
 }
 
+void
+installed_top_size(
+	struct memlist *list,	/* pointer to start of installed list */
+	pfn_t *high_pfn,	/* return ptr for top value */
+	pgcnt_t *pgcnt)		/* return ptr for sum of installed pages */
+{
+	int ranges;
+
+	installed_top_size_ex(list, high_pfn, pgcnt, &ranges);
+}
+
+void
+phys_install_has_changed(void)
+{}
+
 /*
  * Copy in a memory list from boot to kernel, with a filter function
  * to remove pages. The filter function can increase the address and/or
diff --git a/usr/src/uts/intel/io/acpica/acpica.c b/usr/src/uts/intel/io/acpica/acpica.c
index 088b1f0eb4..30a8d79be1 100644
--- a/usr/src/uts/intel/io/acpica/acpica.c
+++ b/usr/src/uts/intel/io/acpica/acpica.c
@@ -36,6 +36,7 @@
 #include <sys/modctl.h>
 #include <sys/open.h>
 #include <sys/stat.h>
+#include <sys/spl.h>
 #include <sys/ddi.h>
 #include <sys/sunddi.h>
 #include <sys/esunddi.h>
@@ -121,8 +122,11 @@ _init(void)
 	int error = EBUSY;
 	int	status;
 	extern int (*acpi_fp_setwake)();
+	extern kmutex_t cpu_map_lock;
 
 	mutex_init(&acpica_module_lock, NULL, MUTEX_DRIVER, NULL);
+	mutex_init(&cpu_map_lock, NULL, MUTEX_SPIN,
+	    (ddi_iblock_cookie_t)ipltospl(DISP_LEVEL));
 
 	if ((error = mod_install(&modlinkage)) != 0) {
 		mutex_destroy(&acpica_module_lock);
diff --git a/usr/src/uts/intel/io/acpica/osl.c b/usr/src/uts/intel/io/acpica/osl.c
index 7593bde61a..6cb08ee27e 100644
--- a/usr/src/uts/intel/io/acpica/osl.c
+++ b/usr/src/uts/intel/io/acpica/osl.c
@@ -24,7 +24,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 /*
@@ -98,7 +98,7 @@ static int scanning_d2a_map = 0;
 static int d2a_done = 0;
 
 /* features supported by ACPICA and ACPI device configuration. */
-uint64_t acpica_core_features = 0;
+uint64_t acpica_core_features = ACPI_FEATURE_OSI_MODULE;
 static uint64_t acpica_devcfg_features = 0;
 
 /* set by acpi_poweroff() in PSMs and appm_ioctl() in acpippm for S3 */
@@ -112,7 +112,7 @@ struct cpu_map_item {
 	ACPI_HANDLE	obj;
 };
 
-static kmutex_t cpu_map_lock;
+kmutex_t cpu_map_lock;
 static struct cpu_map_item **cpu_map = NULL;
 static int cpu_map_count_max = 0;
 static int cpu_map_count = 0;
@@ -1552,7 +1552,7 @@ acpica_get_handle_cpu(int cpu_id, ACPI_HANDLE *rh)
 		}
 	}
 	if (i < cpu_map_count && (cpu_map[i]->obj != NULL)) {
-		*rh = cpu_map[cpu_id]->obj;
+		*rh = cpu_map[i]->obj;
 		mutex_exit(&cpu_map_lock);
 		return (AE_OK);
 	}
@@ -1567,7 +1567,7 @@ acpica_get_handle_cpu(int cpu_id, ACPI_HANDLE *rh)
 				}
 			}
 			if (i < cpu_map_count && (cpu_map[i]->obj != NULL)) {
-				*rh = cpu_map[cpu_id]->obj;
+				*rh = cpu_map[i]->obj;
 				mutex_exit(&cpu_map_lock);
 				return (AE_OK);
 			}
@@ -2209,6 +2209,82 @@ acpica_get_cpu_object_by_apicid(UINT32 apicid, ACPI_HANDLE *hdlp)
 	return (rc);
 }
 
+ACPI_STATUS
+acpica_get_cpu_id_by_object(ACPI_HANDLE hdl, processorid_t *cpuidp)
+{
+	int i;
+	ACPI_STATUS rc = AE_NOT_EXIST;
+
+	ASSERT(cpuidp != NULL);
+	if (hdl == NULL || cpuidp == NULL) {
+		return (rc);
+	}
+
+	*cpuidp = -1;
+	mutex_enter(&cpu_map_lock);
+	for (i = 0; i < cpu_map_count; i++) {
+		if (cpu_map[i]->obj == hdl && cpu_map[i]->cpu_id != -1) {
+			*cpuidp = cpu_map[i]->cpu_id;
+			rc = AE_OK;
+			break;
+		}
+	}
+	mutex_exit(&cpu_map_lock);
+
+	return (rc);
+}
+
+ACPI_STATUS
+acpica_get_apicid_by_object(ACPI_HANDLE hdl, UINT32 *rp)
+{
+	int i;
+	ACPI_STATUS rc = AE_NOT_EXIST;
+
+	ASSERT(rp != NULL);
+	if (hdl == NULL || rp == NULL) {
+		return (rc);
+	}
+
+	*rp = UINT32_MAX;
+	mutex_enter(&cpu_map_lock);
+	for (i = 0; i < cpu_map_count; i++) {
+		if (cpu_map[i]->obj == hdl &&
+		    cpu_map[i]->apic_id != UINT32_MAX) {
+			*rp = cpu_map[i]->apic_id;
+			rc = AE_OK;
+			break;
+		}
+	}
+	mutex_exit(&cpu_map_lock);
+
+	return (rc);
+}
+
+ACPI_STATUS
+acpica_get_procid_by_object(ACPI_HANDLE hdl, UINT32 *rp)
+{
+	int i;
+	ACPI_STATUS rc = AE_NOT_EXIST;
+
+	ASSERT(rp != NULL);
+	if (hdl == NULL || rp == NULL) {
+		return (rc);
+	}
+
+	*rp = UINT32_MAX;
+	mutex_enter(&cpu_map_lock);
+	for (i = 0; i < cpu_map_count; i++) {
+		if (cpu_map[i]->obj == hdl) {
+			*rp = cpu_map[i]->proc_id;
+			rc = AE_OK;
+			break;
+		}
+	}
+	mutex_exit(&cpu_map_lock);
+
+	return (rc);
+}
+
 void
 acpica_set_core_feature(uint64_t features)
 {
diff --git a/usr/src/uts/intel/sys/acpica.h b/usr/src/uts/intel/sys/acpica.h
index 5f74655418..ff55dcb302 100644
--- a/usr/src/uts/intel/sys/acpica.h
+++ b/usr/src/uts/intel/sys/acpica.h
@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2009-2010, Intel Corporation.
  * All rights reserved.
  */
 
@@ -133,6 +133,7 @@ typedef struct iflag {
 #define	ACPI_DEVCFG_CPU		0x1
 #define	ACPI_DEVCFG_MEMORY	0x2
 #define	ACPI_DEVCFG_CONTAINER	0x4
+#define	ACPI_DEVCFG_PCI		0x8
 
 /*
  * master_ops.c
@@ -187,6 +188,9 @@ extern ACPI_STATUS acpica_unmap_cpu(processorid_t);
 extern ACPI_STATUS acpica_get_cpu_object_by_cpuid(processorid_t, ACPI_HANDLE *);
 extern ACPI_STATUS acpica_get_cpu_object_by_procid(UINT32, ACPI_HANDLE *);
 extern ACPI_STATUS acpica_get_cpu_object_by_apicid(UINT32, ACPI_HANDLE *);
+extern ACPI_STATUS acpica_get_cpu_id_by_object(ACPI_HANDLE, processorid_t *);
+extern ACPI_STATUS acpica_get_apicid_by_object(ACPI_HANDLE, UINT32 *);
+extern ACPI_STATUS acpica_get_procid_by_object(ACPI_HANDLE, UINT32 *);
 
 extern uint64_t acpica_get_core_feature(uint64_t);
 extern void acpica_set_core_feature(uint64_t);
diff --git a/usr/src/uts/intel/sys/bootconf.h b/usr/src/uts/intel/sys/bootconf.h
index f626fbbb79..4f6c48c395 100644
--- a/usr/src/uts/intel/sys/bootconf.h
+++ b/usr/src/uts/intel/sys/bootconf.h
@@ -50,6 +50,7 @@ extern "C" {
 #define	BP_CPU_APICID_ARRAY	"cpu_apicid_array"
 #define	BP_LGRP_SLIT_ENABLE	"lgrp_slit_enable"
 #define	BP_LGRP_SRAT_ENABLE	"lgrp_srat_enable"
+#define	BP_LGRP_MSCT_ENABLE	"lgrp_msct_enable"
 #define	BP_LGRP_TOPO_LEVELS	"lgrp_topo_levels"
 
 /*
@@ -213,18 +214,6 @@ extern int octet_to_hexascii(const void *, uint_t, char *, uint_t *);
 
 extern int dhcpinit(void);
 
-/*
- * XXX	The memlist stuff belongs in a header of its own
- */
-extern int check_boot_version(int);
-extern void size_physavail(struct memlist *, pgcnt_t *, int *, pfn_t);
-extern int copy_physavail(struct memlist *, struct memlist **,
-    uint_t, uint_t);
-extern void installed_top_size(struct memlist *, pfn_t *, pgcnt_t *, int *);
-extern int check_memexp(struct memlist *, uint_t);
-extern void copy_memlist_filter(struct memlist *, struct memlist **,
-    void (*filter)(uint64_t *, uint64_t *));
-
 extern struct bootops *bootops;
 extern int netboot;
 extern int swaploaded;
diff --git a/usr/src/uts/intel/sys/cpu_module.h b/usr/src/uts/intel/sys/cpu_module.h
index 9b64b8527a..8d801b07f5 100644
--- a/usr/src/uts/intel/sys/cpu_module.h
+++ b/usr/src/uts/intel/sys/cpu_module.h
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -143,7 +143,6 @@ extern cmi_errno_t cmi_hdl_rdmsr(cmi_hdl_t, uint_t, uint64_t *);
 extern cmi_errno_t cmi_hdl_wrmsr(cmi_hdl_t, uint_t, uint64_t);
 
 extern void cmi_hdl_enable_mce(cmi_hdl_t);
-
 extern uint_t cmi_hdl_vendor(cmi_hdl_t);
 extern const char *cmi_hdl_vendorstr(cmi_hdl_t);
 extern uint_t cmi_hdl_family(cmi_hdl_t);
@@ -172,6 +171,7 @@ extern uint_t cmi_ntv_hwchipid(cpu_t *);
 extern uint_t cmi_ntv_hwprocnodeid(cpu_t *);
 extern uint_t cmi_ntv_hwcoreid(cpu_t *);
 extern uint_t cmi_ntv_hwstrandid(cpu_t *);
+extern void cmi_ntv_hwdisable_mce(cmi_hdl_t);
 #endif	/* __xpv */
 
 typedef struct cmi_mca_regs {
@@ -210,6 +210,7 @@ extern void cmi_mca_trap(struct regs *);
 extern boolean_t cmi_panic_on_ue(void);
 
 extern void cmi_mc_register(cmi_hdl_t, const struct cmi_mc_ops *, void *);
+extern cmi_errno_t cmi_mc_register_global(const struct cmi_mc_ops *, void *);
 extern void cmi_mc_sw_memscrub_disable(void);
 extern cmi_errno_t cmi_mc_patounum(uint64_t, uint8_t, uint8_t, uint32_t, int,
     mc_unum_t *);
diff --git a/usr/src/uts/intel/sys/memlist_plat.h b/usr/src/uts/intel/sys/memlist_plat.h
new file mode 100644
index 0000000000..065f8ed37a
--- /dev/null
+++ b/usr/src/uts/intel/sys/memlist_plat.h
@@ -0,0 +1,55 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * All rights reserved.
+ */
+
+#ifndef	_SYS_MEMLIST_PLAT_H
+#define	_SYS_MEMLIST_PLAT_H
+
+/*
+ * Boot time configuration information objects
+ */
+
+#include <sys/types.h>
+#include <sys/memlist.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int check_boot_version(int);
+extern void copy_memlist_filter(struct memlist *, struct memlist **,
+    void (*filter)(uint64_t *, uint64_t *));
+extern void installed_top_size(struct memlist *, pfn_t *, pgcnt_t *);
+extern void installed_top_size_ex(struct memlist *, pfn_t *, pgcnt_t *, int *);
+extern void phys_install_has_changed(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* _SYS_MEMLIST_PLAT_H */
diff --git a/usr/src/uts/sun4/os/ddi_impl.c b/usr/src/uts/sun4/os/ddi_impl.c
index d01eabae19..f3efc433c5 100644
--- a/usr/src/uts/sun4/os/ddi_impl.c
+++ b/usr/src/uts/sun4/os/ddi_impl.c
@@ -48,12 +48,8 @@
 #include <sys/systeminfo.h>
 #include <sys/fpu/fpusystm.h>
 #include <sys/vm.h>
-#include <sys/fs/dv_node.h>
-#include <sys/fs/snode.h>
 #include <sys/ddi_isa.h>
-#include <sys/modhash.h>
 #include <sys/modctl.h>
-#include <sys/sunldi_impl.h>
 
 dev_info_t *get_intr_parent(dev_info_t *, dev_info_t *,
     ddi_intr_handle_impl_t *);
@@ -1954,937 +1950,3 @@ peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
 
 	return (err);
 }
-
-/*
- * Platform independent DR routines
- */
-
-static int
-ndi2errno(int n)
-{
-	int err = 0;
-
-	switch (n) {
-		case NDI_NOMEM:
-			err = ENOMEM;
-			break;
-		case NDI_BUSY:
-			err = EBUSY;
-			break;
-		case NDI_FAULT:
-			err = EFAULT;
-			break;
-		case NDI_FAILURE:
-			err = EIO;
-			break;
-		case NDI_SUCCESS:
-			break;
-		case NDI_BADHANDLE:
-		default:
-			err = EINVAL;
-			break;
-	}
-	return (err);
-}
-
-/*
- * Prom tree node list
- */
-struct ptnode {
-	pnode_t		nodeid;
-	struct ptnode	*next;
-};
-
-/*
- * Prom tree walk arg
- */
-struct pta {
-	dev_info_t	*pdip;
-	devi_branch_t	*bp;
-	uint_t		flags;
-	dev_info_t	*fdip;
-	struct ptnode	*head;
-};
-
-static void
-visit_node(pnode_t nodeid, struct pta *ap)
-{
-	struct ptnode	**nextp;
-	int		(*select)(pnode_t, void *, uint_t);
-
-	ASSERT(nodeid != OBP_NONODE && nodeid != OBP_BADNODE);
-
-	select = ap->bp->create.prom_branch_select;
-
-	ASSERT(select);
-
-	if (select(nodeid, ap->bp->arg, 0) == DDI_SUCCESS) {
-
-		for (nextp = &ap->head; *nextp; nextp = &(*nextp)->next)
-			;
-
-		*nextp = kmem_zalloc(sizeof (struct ptnode), KM_SLEEP);
-
-		(*nextp)->nodeid = nodeid;
-	}
-
-	if ((ap->flags & DEVI_BRANCH_CHILD) == DEVI_BRANCH_CHILD)
-		return;
-
-	nodeid = prom_childnode(nodeid);
-	while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
-		visit_node(nodeid, ap);
-		nodeid = prom_nextnode(nodeid);
-	}
-}
-
-/*
- * NOTE: The caller of this function must check for device contracts
- * or LDI callbacks against this dip before setting the dip offline.
- */
-static int
-set_infant_dip_offline(dev_info_t *dip, void *arg)
-{
-	char	*path = (char *)arg;
-
-	ASSERT(dip);
-	ASSERT(arg);
-
-	if (i_ddi_node_state(dip) >= DS_ATTACHED) {
-		(void) ddi_pathname(dip, path);
-		cmn_err(CE_WARN, "Attempt to set offline flag on attached "
-		    "node: %s", path);
-		return (DDI_FAILURE);
-	}
-
-	mutex_enter(&(DEVI(dip)->devi_lock));
-	if (!DEVI_IS_DEVICE_OFFLINE(dip))
-		DEVI_SET_DEVICE_OFFLINE(dip);
-	mutex_exit(&(DEVI(dip)->devi_lock));
-
-	return (DDI_SUCCESS);
-}
-
-typedef struct result {
-	char	*path;
-	int	result;
-} result_t;
-
-static int
-dip_set_offline(dev_info_t *dip, void *arg)
-{
-	int end;
-	result_t *resp = (result_t *)arg;
-
-	ASSERT(dip);
-	ASSERT(resp);
-
-	/*
-	 * We stop the walk if e_ddi_offline_notify() returns
-	 * failure, because this implies that one or more consumers
-	 * (either LDI or contract based) has blocked the offline.
-	 * So there is no point in conitnuing the walk
-	 */
-	if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
-		resp->result = DDI_FAILURE;
-		return (DDI_WALK_TERMINATE);
-	}
-
-	/*
-	 * If set_infant_dip_offline() returns failure, it implies
-	 * that we failed to set a particular dip offline. This
-	 * does not imply that the offline as a whole should fail.
-	 * We want to do the best we can, so we continue the walk.
-	 */
-	if (set_infant_dip_offline(dip, resp->path) == DDI_SUCCESS)
-		end = DDI_SUCCESS;
-	else
-		end = DDI_FAILURE;
-
-	e_ddi_offline_finalize(dip, end);
-
-	return (DDI_WALK_CONTINUE);
-}
-
-/*
- * The call to e_ddi_offline_notify() exists for the
- * unlikely error case that a branch we are trying to
- * create already exists and has device contracts or LDI
- * event callbacks against it.
- *
- * We allow create to succeed for such branches only if
- * no constraints block the offline.
- */
-static int
-branch_set_offline(dev_info_t *dip, char *path)
-{
-	int		circ;
-	int		end;
-	result_t	res;
-
-
-	if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
-		return (DDI_FAILURE);
-	}
-
-	if (set_infant_dip_offline(dip, path) == DDI_SUCCESS)
-		end = DDI_SUCCESS;
-	else
-		end = DDI_FAILURE;
-
-	e_ddi_offline_finalize(dip, end);
-
-	if (end == DDI_FAILURE)
-		return (DDI_FAILURE);
-
-	res.result = DDI_SUCCESS;
-	res.path = path;
-
-	ndi_devi_enter(dip, &circ);
-	ddi_walk_devs(ddi_get_child(dip), dip_set_offline, &res);
-	ndi_devi_exit(dip, circ);
-
-	return (res.result);
-}
-
-/*ARGSUSED*/
-static int
-create_prom_branch(void *arg, int has_changed)
-{
-	int		circ;
-	int		exists, rv;
-	pnode_t		nodeid;
-	struct ptnode	*tnp;
-	dev_info_t	*dip;
-	struct pta	*ap = arg;
-	devi_branch_t	*bp;
-	char		*path;
-
-	ASSERT(ap);
-	ASSERT(ap->fdip == NULL);
-	ASSERT(ap->pdip && ndi_dev_is_prom_node(ap->pdip));
-
-	bp = ap->bp;
-
-	nodeid = ddi_get_nodeid(ap->pdip);
-	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE) {
-		cmn_err(CE_WARN, "create_prom_branch: invalid "
-		    "nodeid: 0x%x", nodeid);
-		return (EINVAL);
-	}
-
-	ap->head = NULL;
-
-	nodeid = prom_childnode(nodeid);
-	while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
-		visit_node(nodeid, ap);
-		nodeid = prom_nextnode(nodeid);
-	}
-
-	if (ap->head == NULL)
-		return (ENODEV);
-
-	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-	rv = 0;
-	while ((tnp = ap->head) != NULL) {
-		ap->head = tnp->next;
-
-		ndi_devi_enter(ap->pdip, &circ);
-
-		/*
-		 * Check if the branch already exists.
-		 */
-		exists = 0;
-		dip = e_ddi_nodeid_to_dip(tnp->nodeid);
-		if (dip != NULL) {
-			exists = 1;
-
-			/* Parent is held busy, so release hold */
-			ndi_rele_devi(dip);
-#ifdef	DEBUG
-			cmn_err(CE_WARN, "create_prom_branch: dip(%p) exists"
-			    " for nodeid 0x%x", (void *)dip, tnp->nodeid);
-#endif
-		} else {
-			dip = i_ddi_create_branch(ap->pdip, tnp->nodeid);
-		}
-
-		kmem_free(tnp, sizeof (struct ptnode));
-
-		/*
-		 * Hold the branch if it is not already held
-		 */
-		if (dip && !exists) {
-			e_ddi_branch_hold(dip);
-		}
-
-		ASSERT(dip == NULL || e_ddi_branch_held(dip));
-
-		/*
-		 * Set all dips in the newly created branch offline so that
-		 * only a "configure" operation can attach
-		 * the branch
-		 */
-		if (dip == NULL || branch_set_offline(dip, path)
-		    == DDI_FAILURE) {
-			ndi_devi_exit(ap->pdip, circ);
-			rv = EIO;
-			continue;
-		}
-
-		ASSERT(ddi_get_parent(dip) == ap->pdip);
-
-		ndi_devi_exit(ap->pdip, circ);
-
-		if (ap->flags & DEVI_BRANCH_CONFIGURE) {
-			int error = e_ddi_branch_configure(dip, &ap->fdip, 0);
-			if (error && rv == 0)
-				rv = error;
-		}
-
-		/*
-		 * Invoke devi_branch_callback() (if it exists) only for
-		 * newly created branches
-		 */
-		if (bp->devi_branch_callback && !exists)
-			bp->devi_branch_callback(dip, bp->arg, 0);
-	}
-
-	kmem_free(path, MAXPATHLEN);
-
-	return (rv);
-}
-
-static int
-sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp)
-{
-	int			rv, circ, len;
-	int			i, flags, ret;
-	dev_info_t		*dip;
-	char			*nbuf;
-	char			*path;
-	static const char	*noname = "<none>";
-
-	ASSERT(pdip);
-	ASSERT(DEVI_BUSY_OWNED(pdip));
-
-	flags = 0;
-
-	/*
-	 * Creating the root of a branch ?
-	 */
-	if (rdipp) {
-		*rdipp = NULL;
-		flags = DEVI_BRANCH_ROOT;
-	}
-
-	ndi_devi_alloc_sleep(pdip, (char *)noname, DEVI_SID_NODEID, &dip);
-	rv = bp->create.sid_branch_create(dip, bp->arg, flags);
-
-	nbuf = kmem_alloc(OBP_MAXDRVNAME, KM_SLEEP);
-
-	if (rv == DDI_WALK_ERROR) {
-		cmn_err(CE_WARN, "e_ddi_branch_create: Error setting"
-		    " properties on devinfo node %p",  (void *)dip);
-		goto fail;
-	}
-
-	len = OBP_MAXDRVNAME;
-	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
-	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "name", nbuf, &len)
-	    != DDI_PROP_SUCCESS) {
-		cmn_err(CE_WARN, "e_ddi_branch_create: devinfo node %p has"
-		    "no name property", (void *)dip);
-		goto fail;
-	}
-
-	ASSERT(i_ddi_node_state(dip) == DS_PROTO);
-	if (ndi_devi_set_nodename(dip, nbuf, 0) != NDI_SUCCESS) {
-		cmn_err(CE_WARN, "e_ddi_branch_create: cannot set name (%s)"
-		    " for devinfo node %p", nbuf, (void *)dip);
-		goto fail;
-	}
-
-	kmem_free(nbuf, OBP_MAXDRVNAME);
-
-	/*
-	 * Ignore bind failures just like boot does
-	 */
-	(void) ndi_devi_bind_driver(dip, 0);
-
-	switch (rv) {
-	case DDI_WALK_CONTINUE:
-	case DDI_WALK_PRUNESIB:
-		ndi_devi_enter(dip, &circ);
-
-		i = DDI_WALK_CONTINUE;
-		for (; i == DDI_WALK_CONTINUE; ) {
-			i = sid_node_create(dip, bp, NULL);
-		}
-
-		ASSERT(i == DDI_WALK_ERROR || i == DDI_WALK_PRUNESIB);
-		if (i == DDI_WALK_ERROR)
-			rv = i;
-		/*
-		 * If PRUNESIB stop creating siblings
-		 * of dip's child. Subsequent walk behavior
-		 * is determined by rv returned by dip.
-		 */
-
-		ndi_devi_exit(dip, circ);
-		break;
-	case DDI_WALK_TERMINATE:
-		/*
-		 * Don't create children and ask our parent
-		 * to not create siblings either.
-		 */
-		rv = DDI_WALK_PRUNESIB;
-		break;
-	case DDI_WALK_PRUNECHILD:
-		/*
-		 * Don't create children, but ask parent to continue
-		 * with siblings.
-		 */
-		rv = DDI_WALK_CONTINUE;
-		break;
-	default:
-		ASSERT(0);
-		break;
-	}
-
-	if (rdipp)
-		*rdipp = dip;
-
-	/*
-	 * Set device offline - only the "configure" op should cause an attach.
-	 * Note that it is safe to set the dip offline without checking
-	 * for either device contract or layered driver (LDI) based constraints
-	 * since there cannot be any contracts or LDI opens of this device.
-	 * This is because this node is a newly created dip with the parent busy
-	 * held, so no other thread can come in and attach this dip. A dip that
-	 * has never been attached cannot have contracts since by definition
-	 * a device contract (an agreement between a process and a device minor
-	 * node) can only be created against a device that has minor nodes
-	 * i.e is attached. Similarly an LDI open will only succeed if the
-	 * dip is attached. We assert below that the dip is not attached.
-	 */
-	ASSERT(i_ddi_node_state(dip) < DS_ATTACHED);
-	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-	ret = set_infant_dip_offline(dip, path);
-	ASSERT(ret == DDI_SUCCESS);
-	kmem_free(path, MAXPATHLEN);
-
-	return (rv);
-fail:
-	(void) ndi_devi_free(dip);
-	kmem_free(nbuf, OBP_MAXDRVNAME);
-	return (DDI_WALK_ERROR);
-}
-
-static int
-create_sid_branch(
-	dev_info_t	*pdip,
-	devi_branch_t	*bp,
-	dev_info_t	**dipp,
-	uint_t		flags)
-{
-	int		rv = 0, state = DDI_WALK_CONTINUE;
-	dev_info_t	*rdip;
-
-	while (state == DDI_WALK_CONTINUE) {
-		int	circ;
-
-		ndi_devi_enter(pdip, &circ);
-
-		state = sid_node_create(pdip, bp, &rdip);
-		if (rdip == NULL) {
-			ndi_devi_exit(pdip, circ);
-			ASSERT(state == DDI_WALK_ERROR);
-			break;
-		}
-
-		e_ddi_branch_hold(rdip);
-
-		ndi_devi_exit(pdip, circ);
-
-		if (flags & DEVI_BRANCH_CONFIGURE) {
-			int error = e_ddi_branch_configure(rdip, dipp, 0);
-			if (error && rv == 0)
-				rv = error;
-		}
-
-		/*
-		 * devi_branch_callback() is optional
-		 */
-		if (bp->devi_branch_callback)
-			bp->devi_branch_callback(rdip, bp->arg, 0);
-	}
-
-	ASSERT(state == DDI_WALK_ERROR || state == DDI_WALK_PRUNESIB);
-
-	return (state == DDI_WALK_ERROR ? EIO : rv);
-}
-
-int
-e_ddi_branch_create(
-	dev_info_t	*pdip,
-	devi_branch_t	*bp,
-	dev_info_t	**dipp,
-	uint_t		flags)
-{
-	int prom_devi, sid_devi, error;
-
-	if (pdip == NULL || bp == NULL || bp->type == 0)
-		return (EINVAL);
-
-	prom_devi = (bp->type == DEVI_BRANCH_PROM) ? 1 : 0;
-	sid_devi = (bp->type == DEVI_BRANCH_SID) ? 1 : 0;
-
-	if (prom_devi && bp->create.prom_branch_select == NULL)
-		return (EINVAL);
-	else if (sid_devi && bp->create.sid_branch_create == NULL)
-		return (EINVAL);
-	else if (!prom_devi && !sid_devi)
-		return (EINVAL);
-
-	if (flags & DEVI_BRANCH_EVENT)
-		return (EINVAL);
-
-	if (prom_devi) {
-		struct pta pta = {0};
-
-		pta.pdip = pdip;
-		pta.bp = bp;
-		pta.flags = flags;
-
-		error = prom_tree_access(create_prom_branch, &pta, NULL);
-
-		if (dipp)
-			*dipp = pta.fdip;
-		else if (pta.fdip)
-			ndi_rele_devi(pta.fdip);
-	} else {
-		error = create_sid_branch(pdip, bp, dipp, flags);
-	}
-
-	return (error);
-}
-
-int
-e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp, uint_t flags)
-{
-	int		rv;
-	char		*devnm;
-	dev_info_t	*pdip;
-
-	if (dipp)
-		*dipp = NULL;
-
-	if (rdip == NULL || flags != 0 || (flags & DEVI_BRANCH_EVENT))
-		return (EINVAL);
-
-	pdip = ddi_get_parent(rdip);
-
-	ndi_hold_devi(pdip);
-
-	if (!e_ddi_branch_held(rdip)) {
-		ndi_rele_devi(pdip);
-		cmn_err(CE_WARN, "e_ddi_branch_configure: "
-		    "dip(%p) not held", (void *)rdip);
-		return (EINVAL);
-	}
-
-	if (i_ddi_node_state(rdip) < DS_INITIALIZED) {
-		/*
-		 * First attempt to bind a driver. If we fail, return
-		 * success (On some platforms, dips for some device
-		 * types (CPUs) may not have a driver)
-		 */
-		if (ndi_devi_bind_driver(rdip, 0) != NDI_SUCCESS) {
-			ndi_rele_devi(pdip);
-			return (0);
-		}
-
-		if (ddi_initchild(pdip, rdip) != DDI_SUCCESS) {
-			rv = NDI_FAILURE;
-			goto out;
-		}
-	}
-
-	ASSERT(i_ddi_node_state(rdip) >= DS_INITIALIZED);
-
-	devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
-
-	(void) ddi_deviname(rdip, devnm);
-
-	if ((rv = ndi_devi_config_one(pdip, devnm+1, &rdip,
-	    NDI_DEVI_ONLINE | NDI_CONFIG)) == NDI_SUCCESS) {
-		/* release hold from ndi_devi_config_one() */
-		ndi_rele_devi(rdip);
-	}
-
-	kmem_free(devnm, MAXNAMELEN + 1);
-out:
-	if (rv != NDI_SUCCESS && dipp && rdip) {
-		ndi_hold_devi(rdip);
-		*dipp = rdip;
-	}
-	ndi_rele_devi(pdip);
-	return (ndi2errno(rv));
-}
-
-void
-e_ddi_branch_hold(dev_info_t *rdip)
-{
-	if (e_ddi_branch_held(rdip)) {
-		cmn_err(CE_WARN, "e_ddi_branch_hold: branch already held");
-		return;
-	}
-
-	mutex_enter(&DEVI(rdip)->devi_lock);
-	if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) == 0) {
-		DEVI(rdip)->devi_flags |= DEVI_BRANCH_HELD;
-		DEVI(rdip)->devi_ref++;
-	}
-	ASSERT(DEVI(rdip)->devi_ref > 0);
-	mutex_exit(&DEVI(rdip)->devi_lock);
-}
-
-int
-e_ddi_branch_held(dev_info_t *rdip)
-{
-	int rv = 0;
-
-	mutex_enter(&DEVI(rdip)->devi_lock);
-	if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) &&
-	    DEVI(rdip)->devi_ref > 0) {
-		rv = 1;
-	}
-	mutex_exit(&DEVI(rdip)->devi_lock);
-
-	return (rv);
-}
-void
-e_ddi_branch_rele(dev_info_t *rdip)
-{
-	mutex_enter(&DEVI(rdip)->devi_lock);
-	DEVI(rdip)->devi_flags &= ~DEVI_BRANCH_HELD;
-	DEVI(rdip)->devi_ref--;
-	mutex_exit(&DEVI(rdip)->devi_lock);
-}
-
-int
-e_ddi_branch_unconfigure(
-	dev_info_t *rdip,
-	dev_info_t **dipp,
-	uint_t flags)
-{
-	int	circ, rv;
-	int	destroy;
-	char	*devnm;
-	uint_t	nflags;
-	dev_info_t *pdip;
-
-	if (dipp)
-		*dipp = NULL;
-
-	if (rdip == NULL)
-		return (EINVAL);
-
-	pdip = ddi_get_parent(rdip);
-
-	ASSERT(pdip);
-
-	/*
-	 * Check if caller holds pdip busy - can cause deadlocks during
-	 * devfs_clean()
-	 */
-	if (DEVI_BUSY_OWNED(pdip)) {
-		cmn_err(CE_WARN, "e_ddi_branch_unconfigure: failed: parent"
-		    " devinfo node(%p) is busy held", (void *)pdip);
-		return (EINVAL);
-	}
-
-	destroy = (flags & DEVI_BRANCH_DESTROY) ? 1 : 0;
-
-	devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
-
-	ndi_devi_enter(pdip, &circ);
-	(void) ddi_deviname(rdip, devnm);
-	ndi_devi_exit(pdip, circ);
-
-	/*
-	 * ddi_deviname() returns a component name with / prepended.
-	 */
-	(void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
-
-	ndi_devi_enter(pdip, &circ);
-
-	/*
-	 * Recreate device name as it may have changed state (init/uninit)
-	 * when parent busy lock was dropped for devfs_clean()
-	 */
-	(void) ddi_deviname(rdip, devnm);
-
-	if (!e_ddi_branch_held(rdip)) {
-		kmem_free(devnm, MAXNAMELEN + 1);
-		ndi_devi_exit(pdip, circ);
-		cmn_err(CE_WARN, "e_ddi_%s_branch: dip(%p) not held",
-		    destroy ? "destroy" : "unconfigure", (void *)rdip);
-		return (EINVAL);
-	}
-
-	/*
-	 * Release hold on the branch. This is ok since we are holding the
-	 * parent busy. If rdip is not removed, we must do a hold on the
-	 * branch before returning.
-	 */
-	e_ddi_branch_rele(rdip);
-
-	nflags = NDI_DEVI_OFFLINE;
-	if (destroy || (flags & DEVI_BRANCH_DESTROY)) {
-		nflags |= NDI_DEVI_REMOVE;
-		destroy = 1;
-	} else {
-		nflags |= NDI_UNCONFIG;		/* uninit but don't remove */
-	}
-
-	if (flags & DEVI_BRANCH_EVENT)
-		nflags |= NDI_POST_EVENT;
-
-	if (i_ddi_devi_attached(pdip) &&
-	    (i_ddi_node_state(rdip) >= DS_INITIALIZED)) {
-		rv = ndi_devi_unconfig_one(pdip, devnm+1, dipp, nflags);
-	} else {
-		rv = e_ddi_devi_unconfig(rdip, dipp, nflags);
-		if (rv == NDI_SUCCESS) {
-			ASSERT(!destroy || ddi_get_child(rdip) == NULL);
-			rv = ndi_devi_offline(rdip, nflags);
-		}
-	}
-
-	if (!destroy || rv != NDI_SUCCESS) {
-		/* The dip still exists, so do a hold */
-		e_ddi_branch_hold(rdip);
-	}
-out:
-	kmem_free(devnm, MAXNAMELEN + 1);
-	ndi_devi_exit(pdip, circ);
-	return (ndi2errno(rv));
-}
-
-int
-e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp, uint_t flag)
-{
-	return (e_ddi_branch_unconfigure(rdip, dipp,
-	    flag|DEVI_BRANCH_DESTROY));
-}
-
-/*
- * Number of chains for hash table
- */
-#define	NUMCHAINS	17
-
-/*
- * Devinfo busy arg
- */
-struct devi_busy {
-	int dv_total;
-	int s_total;
-	mod_hash_t *dv_hash;
-	mod_hash_t *s_hash;
-	int (*callback)(dev_info_t *, void *, uint_t);
-	void *arg;
-};
-
-static int
-visit_dip(dev_info_t *dip, void *arg)
-{
-	uintptr_t sbusy, dvbusy, ref;
-	struct devi_busy *bsp = arg;
-
-	ASSERT(bsp->callback);
-
-	/*
-	 * A dip cannot be busy if its reference count is 0
-	 */
-	if ((ref = e_ddi_devi_holdcnt(dip)) == 0) {
-		return (bsp->callback(dip, bsp->arg, 0));
-	}
-
-	if (mod_hash_find(bsp->dv_hash, dip, (mod_hash_val_t *)&dvbusy))
-		dvbusy = 0;
-
-	/*
-	 * To catch device opens currently maintained on specfs common snodes.
-	 */
-	if (mod_hash_find(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
-		sbusy = 0;
-
-#ifdef	DEBUG
-	if (ref < sbusy || ref < dvbusy) {
-		cmn_err(CE_WARN, "dip(%p): sopen = %lu, dvopen = %lu "
-		    "dip ref = %lu\n", (void *)dip, sbusy, dvbusy, ref);
-	}
-#endif
-
-	dvbusy = (sbusy > dvbusy) ? sbusy : dvbusy;
-
-	return (bsp->callback(dip, bsp->arg, dvbusy));
-}
-
-static int
-visit_snode(struct snode *sp, void *arg)
-{
-	uintptr_t sbusy;
-	dev_info_t *dip;
-	int count;
-	struct devi_busy *bsp = arg;
-
-	ASSERT(sp);
-
-	/*
-	 * The stable lock is held. This prevents
-	 * the snode and its associated dip from
-	 * going away.
-	 */
-	dip = NULL;
-	count = spec_devi_open_count(sp, &dip);
-
-	if (count <= 0)
-		return (DDI_WALK_CONTINUE);
-
-	ASSERT(dip);
-
-	if (mod_hash_remove(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
-		sbusy = count;
-	else
-		sbusy += count;
-
-	if (mod_hash_insert(bsp->s_hash, dip, (mod_hash_val_t)sbusy)) {
-		cmn_err(CE_WARN, "%s: s_hash insert failed: dip=0x%p, "
-		    "sbusy = %lu", "e_ddi_branch_referenced",
-		    (void *)dip, sbusy);
-	}
-
-	bsp->s_total += count;
-
-	return (DDI_WALK_CONTINUE);
-}
-
-static void
-visit_dvnode(struct dv_node *dv, void *arg)
-{
-	uintptr_t dvbusy;
-	uint_t count;
-	struct vnode *vp;
-	struct devi_busy *bsp = arg;
-
-	ASSERT(dv && dv->dv_devi);
-
-	vp = DVTOV(dv);
-
-	mutex_enter(&vp->v_lock);
-	count = vp->v_count;
-	mutex_exit(&vp->v_lock);
-
-	if (!count)
-		return;
-
-	if (mod_hash_remove(bsp->dv_hash, dv->dv_devi,
-	    (mod_hash_val_t *)&dvbusy))
-		dvbusy = count;
-	else
-		dvbusy += count;
-
-	if (mod_hash_insert(bsp->dv_hash, dv->dv_devi,
-	    (mod_hash_val_t)dvbusy)) {
-		cmn_err(CE_WARN, "%s: dv_hash insert failed: dip=0x%p, "
-		    "dvbusy=%lu", "e_ddi_branch_referenced",
-		    (void *)dv->dv_devi, dvbusy);
-	}
-
-	bsp->dv_total += count;
-}
-
-/*
- * Returns reference count on success or -1 on failure.
- */
-int
-e_ddi_branch_referenced(
-	dev_info_t *rdip,
-	int (*callback)(dev_info_t *dip, void *arg, uint_t ref),
-	void *arg)
-{
-	int circ;
-	char *path;
-	dev_info_t *pdip;
-	struct devi_busy bsa = {0};
-
-	ASSERT(rdip);
-
-	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
-
-	ndi_hold_devi(rdip);
-
-	pdip = ddi_get_parent(rdip);
-
-	ASSERT(pdip);
-
-	/*
-	 * Check if caller holds pdip busy - can cause deadlocks during
-	 * devfs_walk()
-	 */
-	if (!e_ddi_branch_held(rdip) || DEVI_BUSY_OWNED(pdip)) {
-		cmn_err(CE_WARN, "e_ddi_branch_referenced: failed: "
-		    "devinfo branch(%p) not held or parent busy held",
-		    (void *)rdip);
-		ndi_rele_devi(rdip);
-		kmem_free(path, MAXPATHLEN);
-		return (-1);
-	}
-
-	ndi_devi_enter(pdip, &circ);
-	(void) ddi_pathname(rdip, path);
-	ndi_devi_exit(pdip, circ);
-
-	bsa.dv_hash = mod_hash_create_ptrhash("dv_node busy hash", NUMCHAINS,
-	    mod_hash_null_valdtor, sizeof (struct dev_info));
-
-	bsa.s_hash = mod_hash_create_ptrhash("snode busy hash", NUMCHAINS,
-	    mod_hash_null_valdtor, sizeof (struct snode));
-
-	if (devfs_walk(path, visit_dvnode, &bsa)) {
-		cmn_err(CE_WARN, "e_ddi_branch_referenced: "
-		    "devfs walk failed for: %s", path);
-		kmem_free(path, MAXPATHLEN);
-		bsa.s_total = bsa.dv_total = -1;
-		goto out;
-	}
-
-	kmem_free(path, MAXPATHLEN);
-
-	/*
-	 * Walk the snode table to detect device opens, which are currently
-	 * maintained on specfs common snodes.
-	 */
-	spec_snode_walk(visit_snode, &bsa);
-
-	if (callback == NULL)
-		goto out;
-
-	bsa.callback = callback;
-	bsa.arg = arg;
-
-	if (visit_dip(rdip, &bsa) == DDI_WALK_CONTINUE) {
-		ndi_devi_enter(rdip, &circ);
-		ddi_walk_devs(ddi_get_child(rdip), visit_dip, &bsa);
-		ndi_devi_exit(rdip, circ);
-	}
-
-out:
-	ndi_rele_devi(rdip);
-	mod_hash_destroy_ptrhash(bsa.s_hash);
-	mod_hash_destroy_ptrhash(bsa.dv_hash);
-	return (bsa.s_total > bsa.dv_total ? bsa.s_total : bsa.dv_total);
-}
diff --git a/usr/src/uts/sun4u/sys/machsystm.h b/usr/src/uts/sun4u/sys/machsystm.h
index 76e96a56d6..ccacd922b4 100644
--- a/usr/src/uts/sun4u/sys/machsystm.h
+++ b/usr/src/uts/sun4u/sys/machsystm.h
@@ -26,8 +26,6 @@
 #ifndef _SYS_MACHSYSTM_H
 #define	_SYS_MACHSYSTM_H
 
-#pragma ident	"%Z%%M%	%I%	%E% SMI"
-
 /*
  * Numerous platform-dependent interfaces that don't seem to belong
  * in any other header file.
@@ -357,46 +355,6 @@ extern uint64_t cpu_pa[];
 extern void ptl1_init_cpu(struct cpu *);
 
 /*
- * Defines for DR interfaces
- */
-#define	DEVI_BRANCH_CHILD	0x01	/* Walk immediate children of root  */
-#define	DEVI_BRANCH_CONFIGURE	0x02	/* Configure branch after create    */
-#define	DEVI_BRANCH_DESTROY	0x04	/* Destroy branch after unconfigure */
-#define	DEVI_BRANCH_EVENT	0x08	/* Post NDI event		    */
-#define	DEVI_BRANCH_PROM	0x10	/* Branches derived from PROM nodes */
-#define	DEVI_BRANCH_SID		0x20	/* SID node branches		    */
-#define	DEVI_BRANCH_ROOT	0x40	/* Node is the root of a branch	    */
-
-typedef struct devi_branch {
-	void		*arg;
-	void		(*devi_branch_callback)(dev_info_t *, void *, uint_t);
-	int		type;
-	union {
-		int	(*prom_branch_select)(pnode_t, void *, uint_t);
-		int	(*sid_branch_create)(dev_info_t *, void *, uint_t);
-	} create;
-} devi_branch_t;
-
-
-/*
- * Prototypes which really belongs to sunddi.c, and should be moved to
- * sunddi.c if there is another platform using these calls.
- */
-extern int e_ddi_branch_create(dev_info_t *pdip, devi_branch_t *bp,
-    dev_info_t **dipp, uint_t flags);
-extern int e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp,
-    uint_t flags);
-extern int e_ddi_branch_unconfigure(dev_info_t *rdip, dev_info_t **dipp,
-    uint_t flags);
-extern int e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp,
-    uint_t flags);
-extern void e_ddi_branch_hold(dev_info_t *rdip);
-extern void e_ddi_branch_rele(dev_info_t *rdip);
-extern int e_ddi_branch_held(dev_info_t *rdip);
-extern int e_ddi_branch_referenced(dev_info_t *rdip,
-    int (*cb)(dev_info_t *dip, void *, uint_t), void *arg);
-
-/*
  * Constants which define the "hole" in the 64-bit sfmmu address space.
  * These are set to specific values by the CPU module code.
  */
diff --git a/usr/src/uts/sun4v/sys/machsystm.h b/usr/src/uts/sun4v/sys/machsystm.h
index 832a634d0f..d9c795ecf9 100644
--- a/usr/src/uts/sun4v/sys/machsystm.h
+++ b/usr/src/uts/sun4v/sys/machsystm.h
@@ -341,46 +341,6 @@ extern uint64_t cpu_pa[];
 extern void ptl1_init_cpu(struct cpu *);
 
 /*
- * Defines for DR interfaces
- */
-#define	DEVI_BRANCH_CHILD	0x01	/* Walk immediate children of root  */
-#define	DEVI_BRANCH_CONFIGURE	0x02	/* Configure branch after create    */
-#define	DEVI_BRANCH_DESTROY	0x04	/* Destroy branch after unconfigure */
-#define	DEVI_BRANCH_EVENT	0x08	/* Post NDI event		    */
-#define	DEVI_BRANCH_PROM	0x10	/* Branches derived from PROM nodes */
-#define	DEVI_BRANCH_SID		0x20	/* SID node branches		    */
-#define	DEVI_BRANCH_ROOT	0x40	/* Node is the root of a branch	    */
-
-typedef struct devi_branch {
-	void		*arg;
-	void		(*devi_branch_callback)(dev_info_t *, void *, uint_t);
-	int		type;
-	union {
-		int	(*prom_branch_select)(pnode_t, void *, uint_t);
-		int	(*sid_branch_create)(dev_info_t *, void *, uint_t);
-	} create;
-} devi_branch_t;
-
-
-/*
- * Prototypes which really belongs to sunddi.c, and should be moved to
- * sunddi.c if there is another platform using these calls.
- */
-extern int e_ddi_branch_create(dev_info_t *pdip, devi_branch_t *bp,
-    dev_info_t **dipp, uint_t flags);
-extern int e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp,
-    uint_t flags);
-extern int e_ddi_branch_unconfigure(dev_info_t *rdip, dev_info_t **dipp,
-    uint_t flags);
-extern int e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp,
-    uint_t flags);
-extern void e_ddi_branch_hold(dev_info_t *rdip);
-extern void e_ddi_branch_rele(dev_info_t *rdip);
-extern int e_ddi_branch_held(dev_info_t *rdip);
-extern int e_ddi_branch_referenced(dev_info_t *rdip,
-    int (*cb)(dev_info_t *dip, void *, uint_t), void *arg);
-
-/*
  * Constants which define the "hole" in the 64-bit sfmmu address space.
  * These are set to specific values by the CPU module code.
  */