summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/devfsadm/Makefile.com7
-rw-r--r--usr/src/cmd/devfsadm/devfsadm.c21
-rw-r--r--usr/src/cmd/devfsadm/sensor_link.c79
-rw-r--r--usr/src/lib/fm/topo/modules/common/shared/topo_sensor.c261
-rw-r--r--usr/src/lib/fm/topo/modules/common/shared/topo_sensor.h34
-rw-r--r--usr/src/lib/fm/topo/modules/i86pc/chip/Makefile14
-rw-r--r--usr/src/lib/fm/topo/modules/i86pc/chip/chip.c16
-rw-r--r--usr/src/lib/fm/topo/modules/i86pc/chip/chip.h7
-rw-r--r--usr/src/lib/fm/topo/modules/i86pc/chip/chip_label.c8
-rw-r--r--usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c4
-rw-r--r--usr/src/lib/fm/topo/modules/i86pc/chip/chip_temp.c91
-rw-r--r--usr/src/man/man7d/Makefile6
-rw-r--r--usr/src/man/man7d/amdf17nbdf.7d53
-rw-r--r--usr/src/man/man7d/coretemp.7d49
-rw-r--r--usr/src/uts/common/sys/Makefile4
-rw-r--r--usr/src/uts/common/sys/sensors.h81
-rw-r--r--usr/src/uts/common/sys/sunddi.h14
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c48
-rw-r--r--usr/src/uts/intel/Makefile.files14
-rw-r--r--usr/src/uts/intel/Makefile.intel18
-rw-r--r--usr/src/uts/intel/Makefile.rules10
-rw-r--r--usr/src/uts/intel/amdf17nbdf/Makefile47
-rw-r--r--usr/src/uts/intel/coretemp/Makefile54
-rw-r--r--usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c1015
-rw-r--r--usr/src/uts/intel/io/coretemp/coretemp.c784
-rw-r--r--usr/src/uts/intel/io/coretemp/coretemp.conf16
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h116
27 files changed, 2819 insertions, 52 deletions
diff --git a/usr/src/cmd/devfsadm/Makefile.com b/usr/src/cmd/devfsadm/Makefile.com
index b446b148ff..cec58108c8 100644
--- a/usr/src/cmd/devfsadm/Makefile.com
+++ b/usr/src/cmd/devfsadm/Makefile.com
@@ -21,7 +21,7 @@
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# Copyright (c) 2018, Joyent, Inc.
+# Copyright 2019, Joyent, Inc.
# This target builds both a command (daemon) and various shared objects. This
# isn't a typical target, and the inclusion of both library and command
@@ -71,7 +71,8 @@ LINK_OBJS_CMN = \
dtrace_link.o \
vscan_link.o \
zfs_link.o \
- zut_link.o
+ zut_link.o \
+ sensor_link.o
LINK_OBJS = $(LINK_OBJS_CMN) \
$(LINK_OBJS_$(MACH))
@@ -164,7 +165,7 @@ install: all \
clean:
- $(RM) $(OBJS)
+ $(RM) $(OBJS)
lint: $(DEVFSADM_MOD).ln $(LINT_MODULES)
diff --git a/usr/src/cmd/devfsadm/devfsadm.c b/usr/src/cmd/devfsadm/devfsadm.c
index f81d5b5d67..52f4f4c0da 100644
--- a/usr/src/cmd/devfsadm/devfsadm.c
+++ b/usr/src/cmd/devfsadm/devfsadm.c
@@ -23,6 +23,7 @@
* Copyright 2016 Toomas Soome <tsoome@me.com>
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2019, Joyent, Inc.
*/
/*
@@ -2073,6 +2074,16 @@ class_ok(char *class)
return (DEVFSADM_SUCCESS);
}
+ /*
+ * Some create tabs operate on multiple classes of devices because the
+ * kernel doesn't have a good way for a driver to indicate that a
+ * particular minor's class is different from that of the dev_info_t
+ * it belongs to. As such, we'll always fail to match those here.
+ */
+ if (class == NULL) {
+ return (DEVFSADM_FAILURE);
+ }
+
for (i = 0; i < num_classes; i++) {
if (strcmp(class, classes[i]) == 0) {
return (DEVFSADM_SUCCESS);
@@ -3717,10 +3728,10 @@ do_inst_sync(char *filename, char *instfilename)
* safely, the database is flushed to a temporary file, then moved into place.
*
* The following files are used during this process:
- * /etc/path_to_inst: The path_to_inst file
- * /etc/path_to_inst.<pid>: Contains data flushed from the kernel
- * /etc/path_to_inst.old: The backup file
- * /etc/path_to_inst.old.<pid>: Temp file for creating backup
+ * /etc/path_to_inst: The path_to_inst file
+ * /etc/path_to_inst.<pid>: Contains data flushed from the kernel
+ * /etc/path_to_inst.old: The backup file
+ * /etc/path_to_inst.old.<pid>: Temp file for creating backup
*
*/
static void
@@ -7803,7 +7814,7 @@ add_verbose_id(char *mid)
* returns DEVFSADM_TRUE if contents is a minor node in /devices.
* If mn_root is not NULL, mn_root is set to:
* if contents is a /dev node, mn_root = contents
- * OR
+ * OR
* if contents is a /devices node, mn_root set to the '/'
* following /devices.
*/
diff --git a/usr/src/cmd/devfsadm/sensor_link.c b/usr/src/cmd/devfsadm/sensor_link.c
new file mode 100644
index 0000000000..7a2b48af75
--- /dev/null
+++ b/usr/src/cmd/devfsadm/sensor_link.c
@@ -0,0 +1,79 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019, Joyent, Inc.
+ */
+
+/*
+ * Create /devices links for various sensors. The sensor series of node types
+ * all begin with ddi_sensor. After which, there is a series of : delineated
+ * paths in the node type. Those represent the directory under /dev/sensors that
+ * the nodes should ultimately be created.
+ *
+ * For example, ddi_sensor:temperature:cpu would cause us to place the named
+ * minor under /dev/sensors/temperature/cpu/. Currently it is up to drivers to
+ * not conflict in names or if there is a fear of conflicting, make sure their
+ * minor is unique.
+ */
+
+#include <devfsadm.h>
+#include <string.h>
+
+#define SENSORS_BASE "sensors"
+
+static int
+sensor_link(di_minor_t minor, di_node_t node)
+{
+ const char *t, *minor_name, *dir_path = NULL;
+ char *type, *c;
+ char buf[PATH_MAX];
+ size_t len;
+
+ if ((t = di_minor_nodetype(minor)) == NULL) {
+ return (DEVFSADM_CONTINUE);
+ }
+
+ if ((minor_name = di_minor_name(minor)) == NULL) {
+ return (DEVFSADM_CONTINUE);
+ }
+
+ if ((type = strdup(t)) == NULL) {
+ return (DEVFSADM_TERMINATE);
+ }
+
+ while ((c = strchr(type, ':')) != NULL) {
+ if (dir_path == NULL) {
+ dir_path = c + 1;
+ }
+ *c = '/';
+ }
+
+ if (dir_path == NULL || *dir_path == '\0') {
+ len = snprintf(buf, sizeof (buf), "%s/%s", SENSORS_BASE,
+ minor_name);
+ } else {
+ len = snprintf(buf, sizeof (buf), "%s/%s/%s", SENSORS_BASE,
+ dir_path, minor_name);
+ }
+
+ if (len < sizeof (buf)) {
+ (void) devfsadm_mklink(buf, node, minor, 0);
+ }
+
+ free(type);
+ return (DEVFSADM_CONTINUE);
+}
+
+static devfsadm_create_t sensor_create_cbt[] = {
+ { NULL, "ddi_sensor", NULL, TYPE_PARTIAL, ILEVEL_0, sensor_link }
+};
+DEVFSADM_CREATE_INIT_V0(sensor_create_cbt);
diff --git a/usr/src/lib/fm/topo/modules/common/shared/topo_sensor.c b/usr/src/lib/fm/topo/modules/common/shared/topo_sensor.c
new file mode 100644
index 0000000000..28fcf3e314
--- /dev/null
+++ b/usr/src/lib/fm/topo/modules/common/shared/topo_sensor.c
@@ -0,0 +1,261 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019, Joyent, Inc.
+ */
+
+/*
+ * This file provides routines to interact with the kernel sensor framework.
+ * Currently, modules that require interacting with a kernel sensor need to
+ * build this file as part of the module. This takes care of all the work of
+ * setting up and creating the temperature sensor, given a path to that sensor.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <libnvpair.h>
+#include <sys/sensors.h>
+#include <sys/fm/protocol.h>
+#include <fm/topo_mod.h>
+
+#define TOPO_METH_TOPO_SENSOR_TEMP "topo_sensor_temp_reading"
+#define TOPO_METH_TOPO_SENSOR_TEMP_DESC "Kernel Temperature Reading"
+#define TOPO_METH_TOPO_SENSOR_TEMP_VERSION 0
+
+static int
+topo_sensor_temp_read(topo_mod_t *mod, tnode_t *node, topo_version_t vers,
+ nvlist_t *in, nvlist_t **out)
+{
+ int fd = -1, ret;
+ nvlist_t *args, *nvl;
+ char *path;
+ sensor_ioctl_temperature_t temp;
+ double degrees;
+
+ if (vers != TOPO_METH_TOPO_SENSOR_TEMP_VERSION) {
+ return (topo_mod_seterrno(mod, ETOPO_METHOD_VERNEW));
+ }
+
+ if (nvlist_lookup_nvlist(in, TOPO_PROP_ARGS, &args) != 0 ||
+ nvlist_lookup_string(args, TOPO_IO_DEV_PATH, &path) != 0) {
+ topo_mod_dprintf(mod, "failed to lookup sensor path from "
+ "property %s", TOPO_IO_DEV_PATH);
+ return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
+ }
+
+ if ((fd = open(path, O_RDONLY)) < 0) {
+ topo_mod_dprintf(mod, "failed to open sensor path %s: %s",
+ path, strerror(errno));
+ return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
+ }
+
+ (void) memset(&temp, '\0', sizeof (temp));
+ if (ioctl(fd, SENSOR_IOCTL_TEMPERATURE, &temp) != 0) {
+ topo_mod_dprintf(mod, "failed to read temperature sensor "
+ "%s: %s", path, strerror(errno));
+ ret = topo_mod_seterrno(mod, EMOD_UNKNOWN);
+ goto out;
+ }
+
+ /*
+ * Check to see if we need to change the value to get it into an
+ * accurate reading. Positive values indicate that the temperature
+ * reading is in a fractional number of degrees and that each degree
+ * contains temp.sit_gran steps. A negative number means that the
+ * temperature reading represents temp.sit_gran degrees.
+ */
+ degrees = (double)temp.sit_temp;
+ if (temp.sit_gran > 1) {
+ degrees /= (double)temp.sit_gran;
+ } else if (temp.sit_gran < -1) {
+ degrees *= (double)labs(temp.sit_gran);
+ }
+
+ if (topo_mod_nvalloc(mod, &nvl, NV_UNIQUE_NAME) != 0) {
+ topo_mod_dprintf(mod, "failed to allocate output temperature "
+ "nvl");
+ ret = topo_mod_seterrno(mod, EMOD_NOMEM);
+ goto out;
+ }
+
+ if (nvlist_add_string(nvl, TOPO_PROP_VAL_NAME, TOPO_SENSOR_READING) !=
+ 0 ||
+ nvlist_add_uint32(nvl, TOPO_PROP_VAL_TYPE, TOPO_TYPE_DOUBLE) != 0 ||
+ nvlist_add_double(nvl, TOPO_PROP_VAL_VAL, degrees) != 0) {
+ topo_mod_dprintf(mod, "failed to add members to output "
+ "temperature nvlist");
+ nvlist_free(nvl);
+ ret = topo_mod_seterrno(mod, EMOD_NOMEM);
+ goto out;
+ }
+
+ *out = nvl;
+ ret = 0;
+out:
+ if (fd >= 0) {
+ (void) close(fd);
+ }
+ return (ret);
+}
+
+static const topo_method_t topo_sensor_temp_fac_methods[] = {
+ { TOPO_METH_TOPO_SENSOR_TEMP, TOPO_METH_TOPO_SENSOR_TEMP_DESC,
+ TOPO_METH_TOPO_SENSOR_TEMP_VERSION, TOPO_STABILITY_INTERNAL,
+ topo_sensor_temp_read },
+ { NULL }
+};
+
+static topo_sensor_unit_t
+topo_sensor_units(const sensor_ioctl_temperature_t *temp)
+{
+ switch (temp->sit_unit) {
+ case SENSOR_UNIT_CELSIUS:
+ return (TOPO_SENSOR_UNITS_DEGREES_C);
+ case SENSOR_UNIT_FAHRENHEIT:
+ return (TOPO_SENSOR_UNITS_DEGREES_F);
+ case SENSOR_UNIT_KELVIN:
+ return (TOPO_SENSOR_UNITS_DEGREES_K);
+ default:
+ return (TOPO_SENSOR_UNITS_UNSPECIFIED);
+ }
+}
+
+int
+topo_sensor_create_temp_sensor(topo_mod_t *mod, tnode_t *pnode,
+ const char *path, const char *fname)
+{
+ int fd, ret, err;
+ sensor_ioctl_kind_t sik;
+ sensor_ioctl_temperature_t temp;
+ tnode_t *fnode = NULL;
+ topo_pgroup_info_t pgi;
+ nvlist_t *reader_arg = NULL;
+
+ topo_mod_dprintf(mod, "attempting to create sensor for %s at %s",
+ topo_node_name(pnode), path);
+
+ (void) memset(&sik, '\0', sizeof (sik));
+ (void) memset(&temp, '\0', sizeof (temp));
+
+ if ((fd = open(path, O_RDONLY)) < 0) {
+ topo_mod_dprintf(mod, "failed to open sensor path %s: %s",
+ path, strerror(errno));
+
+ /*
+ * We always try to create temperature sensors; however, they
+ * may not exist or be supported on the system in question.
+ * Therefore ENOENT is totally acceptable.
+ */
+ if (errno == ENOENT) {
+ return (0);
+ }
+ return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
+ }
+
+ if (ioctl(fd, SENSOR_IOCTL_TYPE, &sik) != 0) {
+ topo_mod_dprintf(mod, "failed to verify sensor kind for sensor "
+ "%s: %s", path, strerror(errno));
+ ret = topo_mod_seterrno(mod, EMOD_UNKNOWN);
+ goto out;
+ }
+
+ if (sik.sik_kind != SENSOR_KIND_TEMPERATURE) {
+ topo_mod_dprintf(mod, "sensor kind for %s is not temperature, "
+ "found 0x%x", path, sik.sik_kind);
+ ret = topo_mod_seterrno(mod, EMOD_UNKNOWN);
+ goto out;
+ }
+
+ if (ioctl(fd, SENSOR_IOCTL_TEMPERATURE, &temp) != 0) {
+ topo_mod_dprintf(mod, "failed to read temperature sensor "
+ "%s: %s", path, strerror(errno));
+ ret = topo_mod_seterrno(mod, EMOD_UNKNOWN);
+ goto out;
+ }
+
+ (void) close(fd);
+ fd = -1;
+
+ if ((fnode = topo_node_facbind(mod, pnode, fname,
+ TOPO_FAC_TYPE_SENSOR)) == NULL) {
+ topo_mod_dprintf(mod, "failed to bind temperature facility "
+ "node to %s: %d", path, topo_mod_errno(mod));
+ ret = -1;
+ goto out;
+ }
+
+ pgi.tpi_name = TOPO_PGROUP_FACILITY;
+ pgi.tpi_namestab = TOPO_STABILITY_PRIVATE;
+ pgi.tpi_datastab = TOPO_STABILITY_PRIVATE;
+ pgi.tpi_version = 1;
+
+ if (topo_pgroup_create(fnode, &pgi, &err) != 0) {
+ topo_mod_dprintf(mod, "failed to create facility pgroup: %s",
+ topo_strerror(err));
+ ret = topo_mod_seterrno(mod, err);
+ goto out;
+ }
+
+ if (topo_prop_set_string(fnode, TOPO_PGROUP_FACILITY,
+ TOPO_SENSOR_CLASS, TOPO_PROP_IMMUTABLE,
+ TOPO_SENSOR_CLASS_THRESHOLD, &err) != 0 ||
+ topo_prop_set_uint32(fnode, TOPO_PGROUP_FACILITY,
+ TOPO_FACILITY_TYPE, TOPO_PROP_IMMUTABLE, TOPO_SENSOR_TYPE_TEMP,
+ &err) != 0 ||
+ topo_prop_set_uint32(fnode, TOPO_PGROUP_FACILITY,
+ TOPO_SENSOR_UNITS, TOPO_PROP_IMMUTABLE, topo_sensor_units(&temp),
+ &err) != 0) {
+ topo_mod_dprintf(mod, "failed to set properties for sensor "
+ "%s: %s", path, topo_strerror(err));
+ ret = topo_mod_seterrno(mod, err);
+ goto out;
+
+ }
+
+ if (topo_method_register(mod, fnode, topo_sensor_temp_fac_methods) < 0) {
+ topo_mod_dprintf(mod, "failed to register reading methods on "
+ "%s", path);
+ ret = -1;
+ goto out;
+ }
+
+ if (topo_mod_nvalloc(mod, &reader_arg, NV_UNIQUE_NAME) != 0 ||
+ nvlist_add_string(reader_arg, TOPO_IO_DEV_PATH, path) != 0) {
+ topo_mod_dprintf(mod, "Failed to set up reader argument nvl");
+ ret = topo_mod_seterrno(mod, EMOD_NOMEM);
+ goto out;
+ }
+
+ if (topo_prop_method_register(fnode, TOPO_PGROUP_FACILITY,
+ TOPO_SENSOR_READING, TOPO_TYPE_DOUBLE, TOPO_METH_TOPO_SENSOR_TEMP,
+ reader_arg, &err) != 0) {
+ topo_mod_dprintf(mod, "failed to set argument for sensor %s: "
+ "%s", path, topo_strerror(err));
+ err = topo_mod_seterrno(mod, err);
+ goto out;
+ }
+
+ nvlist_free(reader_arg);
+ return (0);
+out:
+ if (fd >= 0) {
+ (void) close(fd);
+ }
+
+ topo_node_unbind(fnode);
+ nvlist_free(reader_arg);
+ return (ret);
+}
diff --git a/usr/src/lib/fm/topo/modules/common/shared/topo_sensor.h b/usr/src/lib/fm/topo/modules/common/shared/topo_sensor.h
new file mode 100644
index 0000000000..ff6e1ea92e
--- /dev/null
+++ b/usr/src/lib/fm/topo/modules/common/shared/topo_sensor.h
@@ -0,0 +1,34 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019, Joyent, Inc.
+ */
+
+#ifndef _TOPO_SENSOR_H
+#define _TOPO_SENSOR_H
+
+/*
+ * Routines to interact with the common kernel sensor framework.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int topo_sensor_create_temp_sensor(topo_mod_t *, tnode_t *, const char *,
+ const char *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TOPO_SENSOR_H */
diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/Makefile b/usr/src/lib/fm/topo/modules/i86pc/chip/Makefile
index 3da69e6ce2..f56686faf1 100644
--- a/usr/src/lib/fm/topo/modules/i86pc/chip/Makefile
+++ b/usr/src/lib/fm/topo/modules/i86pc/chip/Makefile
@@ -22,16 +22,22 @@
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# Copyright (c) 2018, Joyent, Inc.
+# Copyright 2019, Joyent, Inc.
MODULE = chip
ARCH = i86pc
CLASS = arch
+SHAREDDIR = ../../common/shared/
+
MODULESRCS = chip.c chip_label.c chip_subr.c chip_amd.c chip_intel.c\
-chip_serial.c chip_smbios.c
+chip_serial.c chip_smbios.c chip_temp.o
+MODULESRCS += topo_sensor.c
include ../../Makefile.plugin
LDLIBS += -lipmi -lfmd_agent -lumem -lsmbios -lkstat
-# not linted
-SMATCH=off
+CPPFLAGS += -I$(SHAREDDIR)
+
+%.o: $(SHAREDDIR)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip.c b/usr/src/lib/fm/topo/modules/i86pc/chip/chip.c
index cdd799cc0b..c81f01c3e9 100644
--- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip.c
+++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip.c
@@ -22,7 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019, Joyent, Inc.
*/
#include <unistd.h>
@@ -403,6 +403,13 @@ create_core(topo_mod_t *mod, tnode_t *pnode, nvlist_t *cpu,
if (topo_node_range_create(mod, core, STRAND_NODE_NAME,
0, 255) != 0)
return (-1);
+
+ /*
+ * Creating a temperature sensor may fail because the sensor
+ * doesn't exist or due to internal reasons. At the moment, we
+ * swallow any such errors that occur.
+ */
+ (void) chip_create_core_temp_sensor(mod, core);
}
if (!is_xpv()) {
@@ -644,6 +651,13 @@ create_chip(topo_mod_t *mod, tnode_t *pnode, topo_instance_t min,
}
create_mc = B_TRUE;
+
+ /*
+ * Creating a temperature sensor may fail because the sensor
+ * doesn't exist or due to internal reasons. At the moment, we
+ * swallow any such errors that occur.
+ */
+ (void) chip_create_chip_temp_sensor(mod, chip);
}
if (FM_AWARE_SMBIOS(mod)) {
diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h b/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h
index 8b5ad3b88f..b4fd850996 100644
--- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h
+++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019, Joyent, Inc.
*/
#ifndef _CHIP_H
@@ -221,6 +221,11 @@ extern const char *chip_rev_smbios_get(topo_mod_t *, id_t);
extern id_t memnode_to_smbiosid(topo_mod_t *, uint16_t, const char *,
uint64_t, void *);
+/*
+ * Prototypes for chip_temp.c
+ */
+extern int chip_create_chip_temp_sensor(topo_mod_t *, tnode_t *);
+extern int chip_create_core_temp_sensor(topo_mod_t *, tnode_t *);
#ifdef __cplusplus
}
diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_label.c b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_label.c
index 67c35058c3..4275bc46f6 100644
--- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_label.c
+++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_label.c
@@ -22,6 +22,8 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright 2019, Joyent, Inc.
*/
#include <stdio.h>
@@ -561,8 +563,8 @@ get_num_chips(topo_mod_t *mod)
nchip = -1;
break;
}
- if ((bitmap & (1 << chipid)) != 0) {
- bitmap |= (1 << chipid);
+ if ((bitmap & (1ULL << chipid)) != 0) {
+ bitmap |= (1ULL << chipid);
nchip++;
}
}
@@ -660,7 +662,7 @@ a4fplus_chip_label(topo_mod_t *mod, tnode_t *node, topo_version_t vers,
*
* This function computes the DIMM slot number using the following formula:
*
- * slot = cs - (cs % 2) + channel + offset
+ * slot = cs - (cs % 2) + channel + offset
*/
/* ARGSUSED */
int
diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c
index a83f31dbb4..53fd7852ef 100644
--- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c
+++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c
@@ -22,7 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019, Joyent, Inc.
*/
/*
@@ -230,7 +230,7 @@ mkrsrc(topo_mod_t *mod, tnode_t *pnode, const char *name, int inst,
{
*nvl = topo_mod_hcfmri(mod, pnode, FM_HC_SCHEME_VERSION, name,
inst, NULL, auth, NULL, NULL, NULL);
- return (nvl != NULL ? 0 : -1); /* caller must free nvlist */
+ return (*nvl != NULL ? 0 : -1); /* caller must free nvlist */
}
/*
diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_temp.c b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_temp.c
new file mode 100644
index 0000000000..89f8d57fb6
--- /dev/null
+++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_temp.c
@@ -0,0 +1,91 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019, Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <libnvpair.h>
+#include <sys/sensors.h>
+#include <sys/fm/protocol.h>
+#include <fm/topo_mod.h>
+#include <topo_sensor.h>
+
+#include "chip.h"
+
+static const char *chip_sensor_base = "/dev/sensors/temperature/cpu";
+
+int
+chip_create_core_temp_sensor(topo_mod_t *mod, tnode_t *pnode)
+{
+ int err;
+ int32_t chip, core;
+ char buf[PATH_MAX];
+ struct stat st;
+
+ core = topo_node_instance(pnode);
+ if (topo_prop_get_int32(pnode, PGNAME(CORE), CORE_CHIP_ID, &chip,
+ &err) != 0) {
+ return (topo_mod_seterrno(mod, err));
+ }
+
+ if (snprintf(buf, sizeof (buf), "%s/chip%d.core%d", chip_sensor_base,
+ chip, core) >= sizeof (buf)) {
+ return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
+ }
+
+ /*
+ * Some systems have per-core sensors. Others have it on a per-die aka
+ * procnode basis. Check to see if the file exists before we attempt to
+ * do something.
+ */
+ if (stat(buf, &st) != 0) {
+ int32_t procnode;
+
+ if (errno != ENOENT) {
+ return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
+ }
+
+ if (topo_prop_get_int32(pnode, PGNAME(CORE), CORE_PROCNODE_ID,
+ &procnode, &err) != 0) {
+ return (topo_mod_seterrno(mod, err));
+ }
+
+ if (snprintf(buf, sizeof (buf), "%s/procnode.%d",
+ chip_sensor_base, procnode) >= sizeof (buf)) {
+ return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
+ }
+ }
+
+ return (topo_sensor_create_temp_sensor(mod, pnode, buf, "temp"));
+}
+
+int
+chip_create_chip_temp_sensor(topo_mod_t *mod, tnode_t *pnode)
+{
+ int32_t chip;
+ char buf[PATH_MAX];
+
+ chip = topo_node_instance(pnode);
+
+ if (snprintf(buf, sizeof (buf), "%s/chip%d", chip_sensor_base,
+ chip) >= sizeof (buf)) {
+ return (topo_mod_seterrno(mod, EMOD_UNKNOWN));
+ }
+
+ return (topo_sensor_create_temp_sensor(mod, pnode, buf, "temp"));
+}
diff --git a/usr/src/man/man7d/Makefile b/usr/src/man/man7d/Makefile
index c984ee588a..6ad3c6b28f 100644
--- a/usr/src/man/man7d/Makefile
+++ b/usr/src/man/man7d/Makefile
@@ -12,7 +12,7 @@
#
# Copyright 2011, Richard Lowe
# Copyright 2016 Garrett D'Amore <garrett@damore.org>
-# Copyright (c) 2017, Joyent, Inc.
+# Copyright 2019, Joyent, Inc.
# Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
# Copyright 2018 Nexenta Systems, Inc.
# Copyright 2019 Peter Tribble
@@ -20,10 +20,11 @@
include $(SRC)/Makefile.master
-MANSECT= 7d
+MANSECT= 7d
_MANFILES= aac.7d \
afe.7d \
+ amdf17nbdf.7d \
audio.7d \
audio1575.7d \
audioens.7d \
@@ -38,6 +39,7 @@ _MANFILES= aac.7d \
bscv.7d \
chxge.7d \
console.7d \
+ coretemp.7d \
cpuid.7d \
dca.7d \
dcam1394.7d \
diff --git a/usr/src/man/man7d/amdf17nbdf.7d b/usr/src/man/man7d/amdf17nbdf.7d
new file mode 100644
index 0000000000..739eab6c82
--- /dev/null
+++ b/usr/src/man/man7d/amdf17nbdf.7d
@@ -0,0 +1,53 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2019, Joyent, Inc.
+.\"
+.Dd March 20, 2019
+.Dt AMDF17NBDF 7D
+.Os
+.Sh NAME
+.Nm amdf17nbdf
+.Nd AMD Family 17h Northbridge and Data Fabric Driver
+.Sh SYNOPSIS
+.Pa /dev/sensors/temperature/cpu/*
+.Sh DESCRIPTION
+The
+.Nm
+driver provides the system access to the Northbridge and Data Fabric
+devices on AMD Family 17h
+.Pq Zen
+processors allowing the operating system to communicate with the system
+management unit
+.Pq SMU .
+.Pp
+From this, the driver exposes temperature sensors.
+On Family 17h systems, temperature sensors exist for each Zeppelin die,
+of which there may be multiple in a single package.
+This means that each sensor covers more than one core.
+.Pp
+Temperature information is available to the system via the fault
+management architecture
+.Pq FMA .
+The file system location and programming interface to the
+.Nm
+driver are considered
+.Sy Volatile ,
+subject to change without notice, and should not be used directly.
+Raw temperature information can be dumped through the FMA developer
+utility fmtopo.
+.Sh SEE ALSO
+.Xr fmadm 1M
+.Rs
+.%A AMD
+.%B Open-Source Register Reference For AMD Family 17h Processors Models 00h-2Fh
+.%D July, 2018
+.Re
diff --git a/usr/src/man/man7d/coretemp.7d b/usr/src/man/man7d/coretemp.7d
new file mode 100644
index 0000000000..2ac1008e55
--- /dev/null
+++ b/usr/src/man/man7d/coretemp.7d
@@ -0,0 +1,49 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2019, Joyent, Inc.
+.\"
+.Dd March 20, 2019
+.Dt CORETEMP 7D
+.Os
+.Sh NAME
+.Nm coretemp
+.Nd Intel core-family temperature sensor driver
+.Sh SYNOPSIS
+.Pa /dev/sensors/temperature/cpu/*
+.Sh DESCRIPTION
+The
+.Nm
+driver provides the system with a means of reading the per-core and,
+when available, per-package digital temperature sensors on Intel CPUs.
+Currently, the
+.Nm
+driver supports Intel Core family processors after Penryn
+microarchitecture and Intel Atom processors starting with the Silvermont
+microarchitecure.
+.Pp
+Temperature information is available to the system via the fault
+management architecture
+.Pq FMA .
+The file system location and programming interface to the
+.Nm
+driver are considered
+.Sy Volatile ,
+subject to change without notice, and should not be used directly.
+Raw temperature information can be dumped through the FMA developer
+utility fmtopo.
+.Sh SEE ALSO
+.Xr fmadm 1M
+.Rs
+.%A Intel Corporation
+.%B Intel 64 and IA-32 Architectures Software Developer's Manual
+.%V Volume 3 (3A, 3B, 3C & 3D): System Programming Guide
+.Re
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 5a6d7a204c..909160f2db 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -21,9 +21,8 @@
#
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2018, Joyent, Inc.
+# Copyright 2019, Joyent, Inc.
# Copyright 2013 Garrett D'Amore <garrett@damore.org>
-# Copyright 2015, Joyent, Inc. All rights reserved.
# Copyright 2013 Saso Kiselkov. All rights reserved.
# Copyright 2015 Igor Kozhukhov <ikozhukhov@gmail.com>
# Copyright 2017 Nexenta Systems, Inc.
@@ -518,6 +517,7 @@ CHKHDRS= \
sema_impl.h \
semaphore.h \
sendfile.h \
+ sensors.h \
ser_sync.h \
session.h \
sha1.h \
diff --git a/usr/src/uts/common/sys/sensors.h b/usr/src/uts/common/sys/sensors.h
new file mode 100644
index 0000000000..b9ca9f1f3f
--- /dev/null
+++ b/usr/src/uts/common/sys/sensors.h
@@ -0,0 +1,81 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019, Joyent, Inc.
+ */
+
+#ifndef _SYS_SENSORS_H
+#define _SYS_SENSORS_H
+
+/*
+ * Consolidated sensor ioctls for various parts of the operating system. These
+ * interfaces should not be relied on at all. They are evolving and will change
+ * as we add more to the system for this. This may eventually become a larger
+ * framework, though it's more likely we'll consolidate that in userland.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * List of different possible kinds of sensors.
+ */
+#define SENSOR_KIND_UNKNOWN 0x00
+#define SENSOR_KIND_TEMPERATURE 0x01
+
+/*
+ * Lists of units that senors may have.
+ */
+#define SENSOR_UNIT_UNKNOWN 0x00
+#define SENSOR_UNIT_CELSIUS 0x01
+#define SENSOR_UNIT_FAHRENHEIT 0x02
+#define SENSOR_UNIT_KELVIN 0x03
+
+#define SENSOR_IOCTL (('s' << 24) | ('e' << 16) | ('n' << 8))
+
+/*
+ * Ask the sensor what kind of sensor it is.
+ */
+#define SENSOR_IOCTL_TYPE (SENSOR_IOCTL | 0x01)
+
+typedef struct sensor_ioctl_kind {
+ uint64_t sik_kind;
+} sensor_ioctl_kind_t;
+
+/*
+ * Ask the sensor for a temperature measurement. The sensor is responsible for
+ * returning the units it's in. A temperature measurement is broken down into a
+ * signed value and a notion of its granularity. The sit_gran member indicates
+ * the granularity: the number of increments per degree in the temperature
+ * measurement (the sit_temp member). sit_gran is signed and the sign indicates
+ * whether one needs to multiply or divide the granularity. For example, a
+ * value that set sit_gran to 10 would mean that the value in sit_temp was in
+ * 10ths of a degree and that to get the actual value in degrees, one would
+ * divide by 10. On the other hand, a negative value means that we effectively
+ * have to multiply to get there. For example, a value of -2 would indicate that
+ * each value in sit_temp indicated two degrees and to get the temperature in
+ * degrees you would multiply sit_temp by two.
+ */
+#define SENSOR_IOCTL_TEMPERATURE (SENSOR_IOCTL | 0x02)
+
+typedef struct sensor_ioctl_temperature {
+ uint32_t sit_unit;
+ int32_t sit_gran;
+ int64_t sit_temp;
+} sensor_ioctl_temperature_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SENSORS_H */
diff --git a/usr/src/uts/common/sys/sunddi.h b/usr/src/uts/common/sys/sunddi.h
index b260971a89..3026dc961a 100644
--- a/usr/src/uts/common/sys/sunddi.h
+++ b/usr/src/uts/common/sys/sunddi.h
@@ -24,6 +24,7 @@
* Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2019, Joyent, Inc.
*/
#ifndef _SYS_SUNDDI_H
@@ -202,13 +203,13 @@ extern "C" {
#define DDI_NT_KEYBOARD "ddi_keyboard" /* keyboard device */
-#define DDI_NT_PARALLEL "ddi_parallel" /* parallel port */
+#define DDI_NT_PARALLEL "ddi_parallel" /* parallel port */
#define DDI_NT_PRINTER "ddi_printer" /* printer device */
#define DDI_NT_UGEN "ddi_generic:usb" /* USB generic drv */
-#define DDI_NT_SMP "ddi_sas_smp" /* smp devcies */
+#define DDI_NT_SMP "ddi_sas_smp" /* smp devcies */
#define DDI_NT_NEXUS "ddi_ctl:devctl" /* nexus drivers */
@@ -260,6 +261,11 @@ extern "C" {
#define DDI_NT_INTRCTL "ddi_tool_intr" /* tool intr access */
/*
+ * Various device types used for sensors.
+ */
+#define DDI_NT_SENSOR_TEMP_CPU "ddi_sensor:temperature:cpu"
+
+/*
* DDI event definitions
*/
#define EC_DEVFS "EC_devfs" /* Event class devfs */
@@ -839,7 +845,7 @@ ddi_prop_op_nblocks_blksize(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
* allocated by property provider via kmem_alloc. Requester
* is responsible for freeing returned property via kmem_free.
*
- * Arguments:
+ * Arguments:
*
* dev: Input: dev_t of property.
* dip: Input: dev_info_t pointer of child.
@@ -850,7 +856,7 @@ ddi_prop_op_nblocks_blksize(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
* valuep: Output: Addr of callers buffer pointer.
* lengthp:Output: *lengthp will contain prop length on exit.
*
- * Possible Returns:
+ * Possible Returns:
*
* DDI_PROP_SUCCESS: Prop found and returned.
* DDI_PROP_NOT_FOUND: Prop not found
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 3b0133ce07..1bf09bcf08 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -1037,7 +1037,9 @@ static char *x86_feature_names[NUM_X86_FEATURES] = {
"fma4",
"tbm",
"avx512_vnni",
- "amd_pcec"
+ "amd_pcec",
+ "core_thermal",
+ "pkg_thermal"
};
boolean_t
@@ -2392,6 +2394,41 @@ cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)
}
}
+/*
+ * Gather relevant CPU features from leaf 6 which covers thermal information. We
+ * always gather leaf 6 if it's supported; however, we only look for features on
+ * Intel systems as AMD does not currently define any of the features we look
+ * for below.
+ */
+static void
+cpuid_pass1_thermal(cpu_t *cpu, uchar_t *featureset)
+{
+ struct cpuid_regs *cp;
+ struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
+
+ if (cpi->cpi_maxeax < 6) {
+ return;
+ }
+
+ cp = &cpi->cpi_std[6];
+ cp->cp_eax = 6;
+ cp->cp_ebx = cp->cp_ecx = cp->cp_edx = 0;
+ (void) __cpuid_insn(cp);
+ platform_cpuid_mangle(cpi->cpi_vendor, 6, cp);
+
+ if (cpi->cpi_vendor != X86_VENDOR_Intel) {
+ return;
+ }
+
+ if ((cp->cp_eax & CPUID_INTC_EAX_DTS) != 0) {
+ add_x86_feature(featureset, X86FSET_CORE_THERMAL);
+ }
+
+ if ((cp->cp_eax & CPUID_INTC_EAX_PTM) != 0) {
+ add_x86_feature(featureset, X86FSET_PKG_THERMAL);
+ }
+}
+
void
cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
{
@@ -3230,6 +3267,7 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
}
cpuid_pass1_topology(cpu, featureset);
+ cpuid_pass1_thermal(cpu, featureset);
/*
* Synthesize chip "revision" and socket type
@@ -3293,9 +3331,9 @@ cpuid_pass2(cpu_t *cpu)
cp->cp_eax = n;
/*
- * n == 7 was handled in pass 1
+ * leaves 6 and 7 were handled in pass 1
*/
- if (n == 7)
+ if (n == 6 || n == 7)
continue;
/*
@@ -6443,7 +6481,7 @@ cpuid_arat_supported(void)
if (cpi->cpi_maxeax >= 6) {
regs.cp_eax = 6;
(void) cpuid_insn(NULL, &regs);
- return (regs.cp_eax & CPUID_CSTATE_ARAT);
+ return (regs.cp_eax & CPUID_INTC_EAX_ARAT);
} else {
return (0);
}
@@ -6477,7 +6515,7 @@ cpuid_iepb_supported(struct cpu *cp)
regs.cp_eax = 0x6;
(void) cpuid_insn(NULL, &regs);
- return (regs.cp_ecx & CPUID_EPB_SUPPORT);
+ return (regs.cp_ecx & CPUID_INTC_ECX_PERFBIAS);
}
/*
diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files
index bfcfe1dc52..b2ad69e8c1 100644
--- a/usr/src/uts/intel/Makefile.files
+++ b/usr/src/uts/intel/Makefile.files
@@ -21,7 +21,7 @@
#
# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2018, Joyent, Inc.
+# Copyright 2019, Joyent, Inc.
# Copyright 2018 Nexenta Systems, Inc.
#
@@ -101,7 +101,7 @@ GENUNIX_OBJS += \
#
CORE_OBJS += \
prmachdep.o
-
+
LX_CGROUP_OBJS += \
cgrps_node.o \
cgrps_vfsops.o \
@@ -423,3 +423,13 @@ VMXNET3S_OBJS = vmxnet3_main.o \
# VMware PVSCSI SCSI Controller
#
PVSCSI_OBJS = pvscsi.o
+
+#
+# Intel Temperature Module
+#
+CORETEMP_OBJS = coretemp.o
+
+#
+# AMD Family 17 northbridge driver
+#
+AMDF17NBDF_OBJS = amdf17nbdf.o
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index 00785ef1be..e23797aeac 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -21,7 +21,7 @@
#
# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2013 Andrew Stormont. All rights reserved.
-# Copyright 2016 Joyent, Inc.
+# Copyright 2019, Joyent, Inc.
# Copyright 2016 Garrett D'Amore <garrett@damore.org>
# Copyright 2018 Nexenta Systems, Inc.
#
@@ -214,7 +214,7 @@ DRV_KMODS += audiopci
DRV_KMODS += audiosolo
DRV_KMODS += audiots
DRV_KMODS += audiovia823x
-DRV_KMODS += bl
+DRV_KMODS += bl
DRV_KMODS += blkdev
DRV_KMODS += bge
DRV_KMODS += bofi
@@ -360,7 +360,7 @@ DRV_KMODS += ural
DRV_KMODS += uath
DRV_KMODS += urtw
DRV_KMODS += vgatext
-DRV_KMODS += vmxnet
+DRV_KMODS += vmxnet
DRV_KMODS += vnd
DRV_KMODS += vnic
DRV_KMODS += vscan
@@ -506,9 +506,9 @@ DRV_KMODS += xhci
#
DRV_KMODS += usbgem
DRV_KMODS += axf
-DRV_KMODS += udmf
+DRV_KMODS += udmf
DRV_KMODS += upf
-DRV_KMODS += urf
+DRV_KMODS += urf
#
# 1394 modules
@@ -522,7 +522,7 @@ DRV_KMODS += dcam1394
# InfiniBand pseudo drivers
#
DRV_KMODS += ib ibp eibnx eoib rdsib sdp iser daplt hermon tavor sol_ucma sol_uverbs
-DRV_KMODS += sol_umad
+DRV_KMODS += sol_umad
#
# Brand modules
@@ -753,3 +753,9 @@ DACF_KMODS += net_dacf
# global cross check.
#
LINTFLAGS += -D_MACHDEP -I$(UTSBASE)/i86pc
+
+#
+# Sensor related drivers
+#
+DRV_KMODS += amdf17nbdf
+DRV_KMODS += coretemp
diff --git a/usr/src/uts/intel/Makefile.rules b/usr/src/uts/intel/Makefile.rules
index 998fb97496..723cd2fd84 100644
--- a/usr/src/uts/intel/Makefile.rules
+++ b/usr/src/uts/intel/Makefile.rules
@@ -21,7 +21,7 @@
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
-# Copyright 2019 Joyent, Inc. All rights reserved.
+# Copyright 2019, Joyent, Inc.
# Copyright 2017 Nexenta Systems, Inc.
#
@@ -154,10 +154,18 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/amd8111s/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/amdf17nbdf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/amr/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/coretemp/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/drm/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
diff --git a/usr/src/uts/intel/amdf17nbdf/Makefile b/usr/src/uts/intel/amdf17nbdf/Makefile
new file mode 100644
index 0000000000..a5543f176f
--- /dev/null
+++ b/usr/src/uts/intel/amdf17nbdf/Makefile
@@ -0,0 +1,47 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2019, Joyent, Inc.
+#
+
+UTSBASE = ../..
+
+MODULE = amdf17nbdf
+OBJECTS = $(AMDF17NBDF_OBJS:%=$(OBJS_DIR)/%)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/intel/io/amdf17nb
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY) $(CONFMOD)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/coretemp/Makefile b/usr/src/uts/intel/coretemp/Makefile
new file mode 100644
index 0000000000..9ce4a8ab56
--- /dev/null
+++ b/usr/src/uts/intel/coretemp/Makefile
@@ -0,0 +1,54 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2019, Joyent, Inc.
+#
+
+UTSBASE = ../..
+
+MODULE = coretemp
+OBJECTS = $(CORETEMP_OBJS:%=$(OBJS_DIR)/%)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/intel/io/coretemp
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY) $(CONFMOD)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Because we need to use cross calls directly, we must include the
+# definitions below. Once CMI rdmsr routines have been fixed, we can
+# remove this and move out of the platform specific driver world.
+#
+CPPFLAGS += -I$(UTSBASE)/i86pc/
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c b/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c
new file mode 100644
index 0000000000..11bddfa515
--- /dev/null
+++ b/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c
@@ -0,0 +1,1015 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019, Joyent, Inc.
+ */
+
+/*
+ * AMD Family 17 Northbridge and Data Fabric Driver
+ *
+ * This driver attaches to the AMD Family 17h northbridge and data fabric bus.
+ * Each Zeppelin die ('processor node' in cpuid.c parlance) has its own
+ * northbridge and access to the data fabric bus. The northbridge and data
+ * fabric both provide access to various features such as:
+ *
+ * - The System Management Network (SMN)
+ * - Data Fabric via Fabric Indirect Config Access (FICAA)
+ *
+ * These are required to access things such as temperature sensors or memory
+ * controller configuration registers.
+ *
+ * In AMD Family 17h systems, the 'northbridge' is an ASIC that is part of the
+ * package that contains many I/O capabilities related to things like PCI
+ * express, etc. The 'data fabric' is the means by which different components
+ * both inside the socket and multiple sockets are connected together. Both the
+ * northbridge and the data fabric have dedicated PCI devices which the
+ * operating system can use to interact with them.
+ *
+ * ------------------------
+ * Mapping Devices Together
+ * ------------------------
+ *
+ * The operating system needs to expose things like temperature sensors and DRAM
+ * configuration registers in terms that are meaningful to the system such as
+ * logical CPUs, cores, etc. This driver attaches to the PCI IDs that represent
+ * the northbridge and data fabric; however, there are multiple PCI devices (one
+ * per die) that exist. This driver does manage to map all of these three things
+ * together; however, it requires some acrobatics. Unfortunately, there's no
+ * direct way to map a northbridge to its corresponding die. However, we can map
+ * a CPU die to a data fabric PCI device and a data fabric PCI device to a
+ * corresponding northbridge PCI device.
+ *
+ * In current Zen based products, there is a direct mapping between processor
+ * nodes and a data fabric PCI device. All of the devices are on PCI Bus 0 and
+ * start from Device 0x18. Device 0x18 maps to processor node 0, 0x19 to
+ * processor node 1, etc. This means that to map a logical CPU to a data fabric
+ * device, we take its processor node id, add it to 0x18 and find the PCI device
+ * that is on bus 0, device 0x18. As each data fabric device is attached based
+ * on its PCI ID, we add it to the global list, amd_nbdf_dfs that is in the
+ * amd_f17nbdf_t structure.
+ *
+ * The northbridge PCI device has a defined device and function, but the PCI bus
+ * that it's on can vary. Each die has its own series of PCI buses that are
+ * assigned to it and the northbridge PCI device is on the first of die-specific
+ * PCI bus for each die. This also means that the northbridge will not show up
+ * on PCI bus 0, which is the PCI bus that all of the data fabric devices are
+ * on. While conventionally the northbridge with the lowest PCI bus value
+ * would correspond to processor node zero, hardware does not guarantee that at
+ * all. Because we don't want to be at the mercy of firmware, we don't rely on
+ * this ordering, even though we have yet to find a system that deviates from
+ * this scheme.
+ *
+ * One of the registers in the data fabric device's function 0
+ * (AMDF17_DF_CFG_ADDR_CTL), happens to have the first PCI bus that is
+ * associated with the processor node. This means, that we can map a data fabric
+ * device to a northbridge by finding the northbridge whose PCI bus matches the
+ * value in the corresponding data fabric's AMDF17_DF_CFG_ADDR_CTL.
+ *
+ * This means that we can map a northbridge to a data fabric device and a data
+ * fabric device to a die. Because these are 1:1 mappings, there is a transitive
+ * relationship and therefore we know which northbridge is associated with which
+ * processor die. This is summarized in the following image:
+ *
+ * +-------+ +----------------------------+ +--------------+
+ * | Die 0 | ---> | Data Fabric PCI BDF 0/18/0 |-------> | Northbridge |
+ * +-------+ | AMDF17_DF_CFG_ADDR: bus 10 | | PCI 10/0/0 |
+ * ... +----------------------------+ +--------------+
+ * +-------+ +------------------------------+ +--------------+
+ * | Die n | ---> | Data Fabric PCI BDF 0/18+n/0 |-------> | Northbridge |
+ * +-------+ | AMDF17_DF_CFG_ADDR: bus 133 | | PCI 133/0/0 |
+ * +------------------------------+ +--------------+
+ *
+ * Note, the PCI buses used by the northbridges here are arbitrary. They do not
+ * reflect the actual values by hardware; however, the bus/device/function (BDF)
+ * of the data fabric accurately models hardware. All of the BDF values are in
+ * hex.
+ *
+ * -------------------------------
+ * Attach and Detach Complications
+ * -------------------------------
+ *
+ * Because we need to map different PCI devices together, this means that we
+ * have multiple dev_info_t structures that we need to manage. Each of these is
+ * independently attached and detached. While this is easily managed for attach,
+ * it is not for detach.
+ *
+ * Once a device has been detached it will only come back if we have an active
+ * minor node that will be accessed. While we have minor nodes associated with
+ * the northbridges, we don't with the data fabric devices. This means that if
+ * they are detached, nothing would ever cause them to be reattached. The system
+ * also doesn't provide us a way or any guarantees around making sure that we're
+ * attached to all such devices before we detach. As a result, unfortunately,
+ * it's easier to basically have detach always fail.
+ *
+ * To deal with both development and if issues arise in the field, there is a
+ * knob, amdf17df_allow_detach, which if set to a non-zero value, will allow
+ * instances to detach.
+ *
+ * ---------------
+ * Exposed Devices
+ * ---------------
+ *
+ * Currently we expose a single set of character devices which represent
+ * temperature sensors for this family of processors. Because temperature
+ * sensors exist on a per-processor node basis, we create a single minor node
+ * for each one. Because our naming matches the cpuid naming, FMA can match that
+ * up to logical CPUs and take care of matching the sensors appropriately. We
+ * internally rate limit the sensor updates to 100ms, which is controlled by the
+ * global amdf17nbdf_cache_ms.
+ */
+
+#include <sys/modctl.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#include <sys/open.h>
+#include <sys/cred.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/cmn_err.h>
+#include <sys/list.h>
+#include <sys/pci.h>
+#include <sys/stddef.h>
+#include <sys/stat.h>
+#include <sys/x86_archext.h>
+#include <sys/cpuvar.h>
+#include <sys/sensors.h>
+
+/*
+ * The range of minors that we'll allow.
+ */
+#define AMDF17_MINOR_LOW 1
+#define AMDF17_MINOR_HIGH INT32_MAX
+
+/*
+ * This is the value of the first PCI data fabric device that globally exists.
+ * It always maps to AMD's first nodeid (what we call cpi_procnodeid).
+ */
+#define AMDF17_DF_FIRST_DEVICE 0x18
+
+/*
+ * The data fabric devices are defined to always be on PCI bus zero.
+ */
+#define AMDF17_DF_BUSNO 0x00
+
+/*
+ * This register contains the BUS A of the the processor node that corresponds
+ * to the data fabric device.
+ */
+#define AMDF17_DF_CFG_ADDR_CTL 0x84
+#define AMDF17_DF_CFG_ADDR_CTL_MASK 0xff
+
+/*
+ * Northbridge registers that are related to accessing the SMN. One writes to
+ * the SMN address register and then can read from the SMN data register.
+ */
+#define AMDF17_NB_SMN_ADDR 0x60
+#define AMDF17_NB_SMN_DATA 0x64
+
+/*
+ * The following are register offsets and the meaning of their bits related to
+ * temperature. These addresses are addresses in the System Management Network
+ * which is accessed through the northbridge. They are not addresses in PCI
+ * configuration space.
+ */
+#define AMDF17_SMU_THERMAL_CURTEMP 0x00059800
+#define AMDF17_SMU_THERMAL_CURTEMP_TEMPERATURE(x) ((x) >> 21)
+#define AMDF17_SMU_THERMAL_CURTEMP_RANGE_SEL (1 << 19)
+
+#define AMDF17_SMU_THERMAL_CURTEMP_RANGE_ADJ (-49)
+#define AMDF17_SMU_THERMAL_CURTEMP_DECIMAL_BITS 3
+#define AMDF17_SMU_THERMAL_CURTEMP_BITS_MASK 0x7
+
+/*
+ * The temperature sensor in family 17 is measured in terms of 0.125 C steps.
+ */
+#define AMDF17_THERMAL_GRANULARITY 8
+
+struct amdf17nb;
+struct amdf17df;
+
+typedef struct amdf17nb {
+ list_node_t amd_nb_link;
+ dev_info_t *amd_nb_dip;
+ ddi_acc_handle_t amd_nb_cfgspace;
+ uint_t amd_nb_bus;
+ uint_t amd_nb_dev;
+ uint_t amd_nb_func;
+ struct amdf17df *amd_nb_df;
+ uint_t amd_nb_procnodeid;
+ id_t amd_nb_temp_minor;
+ hrtime_t amd_nb_temp_last_read;
+ int amd_nb_temp_off;
+ uint32_t amd_nb_temp_reg;
+ /* Values derived from the above */
+ int64_t amd_nb_temp;
+} amdf17nb_t;
+
+typedef struct amdf17df {
+ list_node_t amd_df_link;
+ dev_info_t *amd_df_f0_dip;
+ ddi_acc_handle_t amd_df_f0_cfgspace;
+ uint_t amd_df_procnodeid;
+ uint_t amd_df_iobus;
+ amdf17nb_t *amd_df_nb;
+} amdf17df_t;
+
+typedef struct amdf17nbdf {
+ kmutex_t amd_nbdf_lock;
+ id_space_t *amd_nbdf_minors;
+ list_t amd_nbdf_nbs;
+ list_t amd_nbdf_dfs;
+} amdf17nbdf_t;
+
+typedef enum {
+ AMD_NBDF_TYPE_UNKNOWN,
+ AMD_NBDF_TYPE_NORTHBRIDGE,
+ AMD_NBDF_TYPE_DATA_FABRIC
+} amdf17nbdf_type_t;
+
+typedef struct {
+ uint16_t amd_nbdft_pci_did;
+ amdf17nbdf_type_t amd_nbdft_type;
+} amdf17nbdf_table_t;
+
+static const amdf17nbdf_table_t amdf17nbdf_dev_map[] = {
+ /* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */
+ { 0x1450, AMD_NBDF_TYPE_NORTHBRIDGE },
+ { 0x1460, AMD_NBDF_TYPE_DATA_FABRIC },
+ { PCI_EINVAL16 }
+};
+
+typedef struct {
+ const char *amd_nbdfo_brand;
+ uint_t amd_nbdfo_family;
+ int amd_nbdfo_off;
+} amdf17nbdf_offset_t;
+
+/*
+ * AMD processors report a control temperature (called Tctl) which may be
+ * different from the junction temperature, which is the value that is actually
+ * measured from the die (sometimes called Tdie or Tjct). This is done so that
+ * socket-based environmental monitoring can be consistent from a platform
+ * perspective, but doesn't help us. Unfortunately, these values aren't in
+ * datasheets that we can find, but have been documented partially in a series
+ * of blog posts by AMD when discussing their 'Ryzen Master' monitoring software
+ * for Windows.
+ *
+ * The brand strings below may contain partial matches such in the Threadripper
+ * cases so we can match the entire family of processors. The offset value is
+ * the quantity in degrees that we should adjust Tctl to reach Tdie.
+ */
+static const amdf17nbdf_offset_t amdf17nbdf_offsets[] = {
+ { "AMD Ryzen 5 1600X", 0x17, -20 },
+ { "AMD Ryzen 7 1700X", 0x17, -20 },
+ { "AMD Ryzen 7 1800X", 0x17, -20 },
+ { "AMD Ryzen 7 2700X", 0x17, -10 },
+ { "AMD Ryzen Threadripper 19", 0x17, -27 },
+ { "AMD Ryzen Threadripper 29", 0x17, -27 },
+ { NULL }
+};
+
+/*
+ * This indicates a number of milliseconds that we should wait between reads.
+ * This is somewhat arbitrary, but the goal is to reduce cross call activity
+ * and reflect that the sensor may not update all the time.
+ */
+uint_t amdf17nbdf_cache_ms = 100;
+
+/*
+ * This indicates whether detach is allowed. It is not by default. See the
+ * theory statement section 'Attach and Detach Complications' for more
+ * information.
+ */
+uint_t amdf17nbdf_allow_detach = 0;
+
+/*
+ * Global data that we keep regarding the device.
+ */
+amdf17nbdf_t *amdf17nbdf;
+
+static amdf17nb_t *
+amdf17nbdf_lookup_nb(amdf17nbdf_t *nbdf, minor_t minor)
+{
+ ASSERT(MUTEX_HELD(&nbdf->amd_nbdf_lock));
+
+ if (minor < AMDF17_MINOR_LOW || minor > AMDF17_MINOR_HIGH) {
+ return (NULL);
+ }
+
+ for (amdf17nb_t *nb = list_head(&nbdf->amd_nbdf_nbs); nb != NULL;
+ nb = list_next(&nbdf->amd_nbdf_nbs, nb)) {
+ if ((id_t)minor == nb->amd_nb_temp_minor) {
+ return (nb);
+ }
+ }
+
+ return (NULL);
+}
+
+static void
+amdf17nbdf_cleanup_nb(amdf17nbdf_t *nbdf, amdf17nb_t *nb)
+{
+ if (nb == NULL)
+ return;
+
+ ddi_remove_minor_node(nb->amd_nb_dip, NULL);
+ if (nb->amd_nb_temp_minor > 0) {
+ id_free(nbdf->amd_nbdf_minors, nb->amd_nb_temp_minor);
+ }
+ if (nb->amd_nb_cfgspace != NULL) {
+ pci_config_teardown(&nb->amd_nb_cfgspace);
+ }
+ kmem_free(nb, sizeof (amdf17nb_t));
+}
+
+static void
+amdf17nbdf_cleanup_df(amdf17df_t *df)
+{
+ if (df == NULL)
+ return;
+
+ if (df->amd_df_f0_cfgspace != NULL) {
+ pci_config_teardown(&df->amd_df_f0_cfgspace);
+ }
+ kmem_free(df, sizeof (amdf17df_t));
+}
+
+static int
+amdf17nbdf_smn_read(amdf17nbdf_t *nbdf, amdf17nb_t *nb, uint32_t addr,
+ uint32_t *valp)
+{
+ VERIFY(MUTEX_HELD(&nbdf->amd_nbdf_lock));
+
+ pci_config_put32(nb->amd_nb_cfgspace, AMDF17_NB_SMN_ADDR, addr);
+ *valp = pci_config_get32(nb->amd_nb_cfgspace, AMDF17_NB_SMN_DATA);
+
+ return (0);
+}
+
+static int
+amdf17nbdf_temp_read(amdf17nbdf_t *nbdf, amdf17nb_t *nb)
+{
+ int ret;
+ uint32_t reg, rawtemp, decimal;
+
+ ASSERT(MUTEX_HELD(&nbdf->amd_nbdf_lock));
+
+ /*
+ * Update the last read time first. Even if this fails, we want to make
+ * sure that we latch the fact that we tried.
+ */
+ nb->amd_nb_temp_last_read = gethrtime();
+ if ((ret = amdf17nbdf_smn_read(nbdf, nb, AMDF17_SMU_THERMAL_CURTEMP,
+ &reg)) != 0) {
+ return (ret);
+ }
+
+ nb->amd_nb_temp_reg = reg;
+
+ /*
+ * Take the primary temperature value and break apart its decimal value
+ * from its main value.
+ */
+ rawtemp = AMDF17_SMU_THERMAL_CURTEMP_TEMPERATURE(reg);
+ decimal = rawtemp & AMDF17_SMU_THERMAL_CURTEMP_BITS_MASK;
+ rawtemp = rawtemp >> AMDF17_SMU_THERMAL_CURTEMP_DECIMAL_BITS;
+
+ if ((reg & AMDF17_SMU_THERMAL_CURTEMP_RANGE_SEL) != 0) {
+ rawtemp += AMDF17_SMU_THERMAL_CURTEMP_RANGE_ADJ;
+ }
+ rawtemp += nb->amd_nb_temp_off;
+ nb->amd_nb_temp = rawtemp << AMDF17_SMU_THERMAL_CURTEMP_DECIMAL_BITS;
+ nb->amd_nb_temp += decimal;
+
+ return (0);
+}
+
+static int
+amdf17nbdf_temp_init(amdf17nbdf_t *nbdf, amdf17nb_t *nb)
+{
+ uint_t i, family;
+ char buf[256];
+
+ if (cpuid_getbrandstr(CPU, buf, sizeof (buf)) >= sizeof (buf)) {
+ dev_err(nb->amd_nb_dip, CE_WARN, "!failed to read processor "
+ "brand string, brand larger than internal buffer");
+ return (EOVERFLOW);
+ }
+
+ family = cpuid_getfamily(CPU);
+
+ for (i = 0; amdf17nbdf_offsets[i].amd_nbdfo_brand != NULL; i++) {
+ if (family != amdf17nbdf_offsets[i].amd_nbdfo_family)
+ continue;
+ if (strncmp(buf, amdf17nbdf_offsets[i].amd_nbdfo_brand,
+ strlen(amdf17nbdf_offsets[i].amd_nbdfo_brand)) == 0) {
+ nb->amd_nb_temp_off =
+ amdf17nbdf_offsets[i].amd_nbdfo_off;
+ break;
+ }
+ }
+
+ return (amdf17nbdf_temp_read(nbdf, nb));
+}
+
+static amdf17nbdf_type_t
+amdf17nbdf_dip_type(uint16_t dev)
+{
+ uint_t i;
+ const amdf17nbdf_table_t *tp = amdf17nbdf_dev_map;
+
+ for (i = 0; tp[i].amd_nbdft_pci_did != PCI_EINVAL16; i++) {
+ if (tp[i].amd_nbdft_pci_did == dev) {
+ return (tp[i].amd_nbdft_type);
+ }
+ }
+
+ return (AMD_NBDF_TYPE_UNKNOWN);
+}
+
+static boolean_t
+amdf17nbdf_map(amdf17nbdf_t *nbdf, amdf17nb_t *nb, amdf17df_t *df)
+{
+ int ret;
+ char buf[128];
+
+ ASSERT(MUTEX_HELD(&nbdf->amd_nbdf_lock));
+
+ /*
+ * This means that we encountered a duplicate. We're going to stop
+ * processing, but we're not going to fail its attach at this point.
+ */
+ if (nb->amd_nb_df != NULL) {
+ dev_err(nb->amd_nb_dip, CE_WARN, "!trying to map NB %u/%u/%u "
+ "to DF procnode %u, but NB is already mapped to DF "
+ "procnode %u!",
+ nb->amd_nb_bus, nb->amd_nb_dev, nb->amd_nb_func,
+ df->amd_df_procnodeid, nb->amd_nb_df->amd_df_procnodeid);
+ return (B_TRUE);
+ }
+
+ /*
+ * Now that we have found a mapping, initialize our temperature
+ * information and create the minor node.
+ */
+ nb->amd_nb_procnodeid = df->amd_df_procnodeid;
+ nb->amd_nb_temp_minor = id_alloc(nbdf->amd_nbdf_minors);
+
+ if ((ret = amdf17nbdf_temp_init(nbdf, nb)) != 0) {
+ dev_err(nb->amd_nb_dip, CE_WARN, "!failed to init SMN "
+ "temperature data on node %u: %d", nb->amd_nb_procnodeid,
+ ret);
+ return (B_FALSE);
+ }
+
+ if (snprintf(buf, sizeof (buf), "procnode.%u", nb->amd_nb_procnodeid) >=
+ sizeof (buf)) {
+ dev_err(nb->amd_nb_dip, CE_WARN, "!unexpected buffer name "
+ "overrun assembling temperature minor %u",
+ nb->amd_nb_procnodeid);
+ return (B_FALSE);
+ }
+
+ if (ddi_create_minor_node(nb->amd_nb_dip, buf, S_IFCHR,
+ nb->amd_nb_temp_minor, DDI_NT_SENSOR_TEMP_CPU, 0) != DDI_SUCCESS) {
+ dev_err(nb->amd_nb_dip, CE_WARN, "!failed to create minor node "
+ "%s", buf);
+ return (B_FALSE);
+ }
+
+ /*
+ * Now that's it's all done, note that they're mapped to each other.
+ */
+ nb->amd_nb_df = df;
+ df->amd_df_nb = nb;
+
+ return (B_TRUE);
+}
+
+static boolean_t
+amdf17nbdf_add_nb(amdf17nbdf_t *nbdf, amdf17nb_t *nb)
+{
+ amdf17df_t *df;
+ boolean_t ret = B_TRUE;
+
+ mutex_enter(&nbdf->amd_nbdf_lock);
+ list_insert_tail(&nbdf->amd_nbdf_nbs, nb);
+ for (df = list_head(&nbdf->amd_nbdf_dfs); df != NULL;
+ df = list_next(&nbdf->amd_nbdf_dfs, df)) {
+ if (nb->amd_nb_bus == df->amd_df_iobus) {
+ ret = amdf17nbdf_map(nbdf, nb, df);
+ break;
+ }
+ }
+ mutex_exit(&nbdf->amd_nbdf_lock);
+
+ return (ret);
+}
+
+static boolean_t
+amdf17nbdf_add_df(amdf17nbdf_t *nbdf, amdf17df_t *df)
+{
+ amdf17nb_t *nb;
+ boolean_t ret = B_TRUE;
+
+ mutex_enter(&nbdf->amd_nbdf_lock);
+ list_insert_tail(&nbdf->amd_nbdf_dfs, df);
+ for (nb = list_head(&nbdf->amd_nbdf_nbs); nb != NULL;
+ nb = list_next(&nbdf->amd_nbdf_nbs, nb)) {
+ if (nb->amd_nb_bus == df->amd_df_iobus) {
+ ret = amdf17nbdf_map(nbdf, nb, df);
+ }
+ }
+ mutex_exit(&nbdf->amd_nbdf_lock);
+
+ return (ret);
+}
+
+static boolean_t
+amdf17nbdf_attach_nb(amdf17nbdf_t *nbdf, dev_info_t *dip, ddi_acc_handle_t hdl,
+ uint_t bus, uint_t dev, uint_t func)
+{
+ amdf17nb_t *nb;
+
+ nb = kmem_zalloc(sizeof (amdf17nb_t), KM_SLEEP);
+ nb->amd_nb_dip = dip;
+ nb->amd_nb_cfgspace = hdl;
+ nb->amd_nb_bus = bus;
+ nb->amd_nb_dev = dev;
+ nb->amd_nb_func = func;
+ /*
+ * Set this to a value we won't get from the processor.
+ */
+ nb->amd_nb_procnodeid = UINT_MAX;
+
+ if (!amdf17nbdf_add_nb(nbdf, nb)) {
+ amdf17nbdf_cleanup_nb(nbdf, nb);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+amdf17nbdf_attach_df(amdf17nbdf_t *nbdf, dev_info_t *dip, ddi_acc_handle_t hdl,
+ uint_t bus, uint_t dev, uint_t func)
+{
+ amdf17df_t *df;
+
+ if (bus != AMDF17_DF_BUSNO) {
+ dev_err(dip, CE_WARN, "!encountered data fabric device with "
+ "unexpected PCI bus assignment, found 0x%x, expected 0x%x",
+ bus, AMDF17_DF_BUSNO);
+ return (B_FALSE);
+ }
+
+ if (dev < AMDF17_DF_FIRST_DEVICE) {
+ dev_err(dip, CE_WARN, "!encountered data fabric device with "
+ "PCI device assignment below the first minimum device "
+ "(0x%x): 0x%x", AMDF17_DF_FIRST_DEVICE, dev);
+ return (B_FALSE);
+ }
+
+ /*
+ * At the moment we only care about function 0. However, we may care
+ * about Function 4 in the future which has access to the FICAA.
+ * However, only function zero should ever be attached, so this is just
+ * an extra precaution.
+ */
+ if (func != 0) {
+ dev_err(dip, CE_WARN, "!encountered data fabric device with "
+ "unxpected PCI function assignment, found 0x%x, expected "
+ "0x0", func);
+ return (B_FALSE);
+ }
+
+ df = kmem_zalloc(sizeof (amdf17df_t), KM_SLEEP);
+ df->amd_df_f0_dip = dip;
+ df->amd_df_f0_cfgspace = hdl;
+ df->amd_df_procnodeid = dev - AMDF17_DF_FIRST_DEVICE;
+ df->amd_df_iobus = pci_config_get32(hdl, AMDF17_DF_CFG_ADDR_CTL) &
+ AMDF17_DF_CFG_ADDR_CTL_MASK;
+
+ if (!amdf17nbdf_add_df(nbdf, df)) {
+ amdf17nbdf_cleanup_df(df);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+static int
+amdf17nbdf_open(dev_t *devp, int flags, int otype, cred_t *credp)
+{
+ amdf17nbdf_t *nbdf = amdf17nbdf;
+ minor_t m;
+
+ if (crgetzoneid(credp) != GLOBAL_ZONEID || drv_priv(credp)) {
+ return (EPERM);
+ }
+
+ if ((flags & (FEXCL | FNDELAY | FWRITE)) != 0) {
+ return (EINVAL);
+ }
+
+ if (otype != OTYP_CHR) {
+ return (EINVAL);
+ }
+
+ m = getminor(*devp);
+
+ /*
+ * Sanity check the minor
+ */
+ mutex_enter(&nbdf->amd_nbdf_lock);
+ if (amdf17nbdf_lookup_nb(nbdf, m) == NULL) {
+ mutex_exit(&nbdf->amd_nbdf_lock);
+ return (ENXIO);
+ }
+ mutex_exit(&nbdf->amd_nbdf_lock);
+
+ return (0);
+}
+
+static int
+amdf17nbdf_ioctl_kind(intptr_t arg, int mode)
+{
+ sensor_ioctl_kind_t kind;
+
+ bzero(&kind, sizeof (sensor_ioctl_kind_t));
+ kind.sik_kind = SENSOR_KIND_TEMPERATURE;
+
+ if (ddi_copyout((void *)&kind, (void *)arg,
+ sizeof (sensor_ioctl_kind_t), mode & FKIOCTL) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+amdf17nbdf_ioctl_temp(amdf17nbdf_t *nbdf, minor_t minor, intptr_t arg, int mode)
+{
+ amdf17nb_t *nb;
+ hrtime_t diff;
+ sensor_ioctl_temperature_t temp;
+
+ bzero(&temp, sizeof (temp));
+
+ mutex_enter(&nbdf->amd_nbdf_lock);
+ nb = amdf17nbdf_lookup_nb(nbdf, minor);
+ if (nb == NULL) {
+ mutex_exit(&nbdf->amd_nbdf_lock);
+ return (ENXIO);
+ }
+
+ diff = NSEC2MSEC(gethrtime() - nb->amd_nb_temp_last_read);
+ if (diff > 0 && diff > (hrtime_t)amdf17nbdf_cache_ms) {
+ int ret;
+
+ ret = amdf17nbdf_temp_read(nbdf, nb);
+ if (ret != 0) {
+ mutex_exit(&nbdf->amd_nbdf_lock);
+ return (ret);
+ }
+ }
+
+ temp.sit_unit = SENSOR_UNIT_CELSIUS;
+ temp.sit_temp = nb->amd_nb_temp;
+ temp.sit_gran = AMDF17_THERMAL_GRANULARITY;
+ mutex_exit(&nbdf->amd_nbdf_lock);
+
+ if (ddi_copyout(&temp, (void *)arg, sizeof (temp),
+ mode & FKIOCTL) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+amdf17nbdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ minor_t m;
+ amdf17nbdf_t *nbdf = amdf17nbdf;
+
+ if ((mode & FREAD) == 0) {
+ return (EINVAL);
+ }
+
+ m = getminor(dev);
+
+ switch (cmd) {
+ case SENSOR_IOCTL_TYPE:
+ return (amdf17nbdf_ioctl_kind(arg, mode));
+ case SENSOR_IOCTL_TEMPERATURE:
+ return (amdf17nbdf_ioctl_temp(nbdf, m, arg, mode));
+ default:
+ return (ENOTTY);
+ }
+}
+
+/*
+ * We don't really do any state tracking on close, so for now, just allow it to
+ * always succeed.
+ */
+static int
+amdf17nbdf_close(dev_t dev, int flags, int otype, cred_t *credp)
+{
+ return (0);
+}
+
+static int
+amdf17nbdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ uint_t nregs;
+ int *regs;
+ uint_t bus, dev, func;
+ uint16_t pci_did;
+ ddi_acc_handle_t pci_hdl;
+ amdf17nbdf_type_t type;
+ amdf17nbdf_t *nbdf = amdf17nbdf;
+
+ if (cmd == DDI_RESUME)
+ return (DDI_SUCCESS);
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, 0, "reg",
+ &regs, &nregs) != DDI_PROP_SUCCESS) {
+ dev_err(dip, CE_WARN, "!failed to find pci 'reg' property");
+ return (DDI_FAILURE);
+ }
+
+ if (nregs < 1) {
+ ddi_prop_free(regs);
+ return (DDI_FAILURE);
+ }
+
+ bus = PCI_REG_BUS_G(regs[0]);
+ dev = PCI_REG_DEV_G(regs[0]);
+ func = PCI_REG_FUNC_G(regs[0]);
+
+ ddi_prop_free(regs);
+
+ if (pci_config_setup(dip, &pci_hdl) != DDI_SUCCESS) {
+ dev_err(dip, CE_WARN, "!failed to map pci devices");
+ return (DDI_FAILURE);
+ }
+
+ pci_did = pci_config_get16(pci_hdl, PCI_CONF_DEVID);
+
+ type = amdf17nbdf_dip_type(pci_did);
+ switch (type) {
+ case AMD_NBDF_TYPE_NORTHBRIDGE:
+ if (!amdf17nbdf_attach_nb(nbdf, dip, pci_hdl, bus, dev, func)) {
+ return (DDI_FAILURE);
+ }
+ break;
+ case AMD_NBDF_TYPE_DATA_FABRIC:
+ if (!amdf17nbdf_attach_df(nbdf, dip, pci_hdl, bus, dev, func)) {
+ return (DDI_FAILURE);
+ }
+ break;
+ default:
+ pci_config_teardown(&pci_hdl);
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * Unfortunately, it's hard for us to really support detach here. The problem is
+ * that we need both the data fabric devices and the northbridges to make sure
+ * that we map everything. However, only the northbridges actually create minor
+ * nodes that'll be opened and thus trigger them to reattach when accessed. What
+ * we should probably look at doing in the future is making this into a nexus
+ * driver that enumerates children like a temperature driver.
+ */
+static int
+amdf17nbdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ amdf17nbdf_t *nbdf = amdf17nbdf;
+
+ if (cmd == DDI_SUSPEND)
+ return (DDI_SUCCESS);
+
+ if (nbdf == NULL) {
+ return (DDI_FAILURE);
+ }
+
+ if (amdf17nbdf_allow_detach == 0) {
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&nbdf->amd_nbdf_lock);
+ for (amdf17nb_t *nb = list_head(&nbdf->amd_nbdf_nbs); nb != NULL;
+ nb = list_next(&nbdf->amd_nbdf_nbs, nb)) {
+ if (dip == nb->amd_nb_dip) {
+ list_remove(&nbdf->amd_nbdf_nbs, nb);
+ if (nb->amd_nb_df != NULL) {
+ ASSERT3P(nb->amd_nb_df->amd_df_nb, ==, nb);
+ nb->amd_nb_df->amd_df_nb = NULL;
+ }
+ amdf17nbdf_cleanup_nb(nbdf, nb);
+ mutex_exit(&nbdf->amd_nbdf_lock);
+ return (DDI_SUCCESS);
+ }
+ }
+
+ for (amdf17df_t *df = list_head(&nbdf->amd_nbdf_dfs); df != NULL;
+ df = list_next(&nbdf->amd_nbdf_nbs, df)) {
+ if (dip == df->amd_df_f0_dip) {
+ list_remove(&nbdf->amd_nbdf_dfs, df);
+ if (df->amd_df_nb != NULL) {
+ ASSERT3P(df->amd_df_nb->amd_nb_df, ==, df);
+ df->amd_df_nb->amd_nb_df = NULL;
+ }
+ amdf17nbdf_cleanup_df(df);
+ mutex_exit(&nbdf->amd_nbdf_lock);
+ return (DDI_SUCCESS);
+ }
+ }
+ mutex_exit(&nbdf->amd_nbdf_lock);
+
+ return (DDI_FAILURE);
+}
+
+static int
+amdf17nbdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
+ void **resultp)
+{
+ dev_t dev;
+ minor_t minor;
+ amdf17nbdf_t *nbdf;
+ amdf17nb_t *nb;
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ case DDI_INFO_DEVT2INSTANCE:
+ break;
+ default:
+ return (DDI_FAILURE);
+ }
+
+ dev = (dev_t)arg;
+ minor = getminor(dev);
+ nbdf = amdf17nbdf;
+
+ mutex_enter(&nbdf->amd_nbdf_lock);
+ nb = amdf17nbdf_lookup_nb(nbdf, (id_t)minor);
+ if (nb == NULL) {
+ mutex_exit(&nbdf->amd_nbdf_lock);
+ return (DDI_FAILURE);
+ }
+ if (cmd == DDI_INFO_DEVT2DEVINFO) {
+ *resultp = nb->amd_nb_dip;
+ } else {
+ int inst = ddi_get_instance(nb->amd_nb_dip);
+ *resultp = (void *)(uintptr_t)inst;
+ }
+ mutex_exit(&nbdf->amd_nbdf_lock);
+
+ return (DDI_SUCCESS);
+}
+
+static void
+amdf17nbdf_destroy(amdf17nbdf_t *nbdf)
+{
+ amdf17nb_t *nb;
+ amdf17df_t *df;
+
+ while ((nb = list_remove_head(&nbdf->amd_nbdf_nbs)) != NULL) {
+ amdf17nbdf_cleanup_nb(nbdf, nb);
+ }
+ list_destroy(&nbdf->amd_nbdf_nbs);
+
+ while ((df = list_remove_head(&nbdf->amd_nbdf_dfs)) != NULL) {
+ amdf17nbdf_cleanup_df(df);
+ }
+ list_destroy(&nbdf->amd_nbdf_dfs);
+
+ if (nbdf->amd_nbdf_minors != NULL) {
+ id_space_destroy(nbdf->amd_nbdf_minors);
+ }
+
+ mutex_destroy(&nbdf->amd_nbdf_lock);
+ kmem_free(nbdf, sizeof (amdf17nbdf_t));
+}
+
+static amdf17nbdf_t *
+amdf17nbdf_create(void)
+{
+ amdf17nbdf_t *nbdf;
+
+ nbdf = kmem_zalloc(sizeof (amdf17nbdf_t), KM_SLEEP);
+ mutex_init(&nbdf->amd_nbdf_lock, NULL, MUTEX_DRIVER, NULL);
+ list_create(&nbdf->amd_nbdf_nbs, sizeof (amdf17nb_t),
+ offsetof(amdf17nb_t, amd_nb_link));
+ list_create(&nbdf->amd_nbdf_dfs, sizeof (amdf17df_t),
+ offsetof(amdf17df_t, amd_df_link));
+ if ((nbdf->amd_nbdf_minors = id_space_create("amdf17nbdf_minors",
+ AMDF17_MINOR_LOW, AMDF17_MINOR_HIGH)) == NULL) {
+ amdf17nbdf_destroy(nbdf);
+ return (NULL);
+ }
+
+ return (nbdf);
+}
+
+static struct cb_ops amdf17nbdf_cb_ops = {
+ .cb_open = amdf17nbdf_open,
+ .cb_close = amdf17nbdf_close,
+ .cb_strategy = nodev,
+ .cb_print = nodev,
+ .cb_dump = nodev,
+ .cb_read = nodev,
+ .cb_write = nodev,
+ .cb_ioctl = amdf17nbdf_ioctl,
+ .cb_devmap = nodev,
+ .cb_mmap = nodev,
+ .cb_segmap = nodev,
+ .cb_chpoll = nochpoll,
+ .cb_prop_op = ddi_prop_op,
+ .cb_flag = D_MP,
+ .cb_rev = CB_REV,
+ .cb_aread = nodev,
+ .cb_awrite = nodev
+};
+
+static struct dev_ops amdf17nbdf_dev_ops = {
+ .devo_rev = DEVO_REV,
+ .devo_refcnt = 0,
+ .devo_getinfo = amdf17nbdf_getinfo,
+ .devo_identify = nulldev,
+ .devo_probe = nulldev,
+ .devo_attach = amdf17nbdf_attach,
+ .devo_detach = amdf17nbdf_detach,
+ .devo_reset = nodev,
+ .devo_power = ddi_power,
+ .devo_quiesce = ddi_quiesce_not_needed,
+ .devo_cb_ops = &amdf17nbdf_cb_ops
+};
+
+static struct modldrv amdf17nbdf_modldrv = {
+ .drv_modops = &mod_driverops,
+ .drv_linkinfo = "AMD Family 17h Driver",
+ .drv_dev_ops = &amdf17nbdf_dev_ops
+};
+
+static struct modlinkage amdf17nbdf_modlinkage = {
+ .ml_rev = MODREV_1,
+ .ml_linkage = { &amdf17nbdf_modldrv, NULL }
+};
+
+int
+_init(void)
+{
+ int ret;
+ amdf17nbdf_t *nbdf;
+
+ if ((nbdf = amdf17nbdf_create()) == NULL) {
+ return (ENOMEM);
+ }
+
+ if ((ret = mod_install(&amdf17nbdf_modlinkage)) != 0) {
+ amdf17nbdf_destroy(amdf17nbdf);
+ return (ret);
+ }
+
+ amdf17nbdf = nbdf;
+ return (ret);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&amdf17nbdf_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int ret;
+
+ if ((ret = mod_remove(&amdf17nbdf_modlinkage)) != 0) {
+ return (ret);
+ }
+
+ amdf17nbdf_destroy(amdf17nbdf);
+ amdf17nbdf = NULL;
+ return (ret);
+}
diff --git a/usr/src/uts/intel/io/coretemp/coretemp.c b/usr/src/uts/intel/io/coretemp/coretemp.c
new file mode 100644
index 0000000000..e21d385991
--- /dev/null
+++ b/usr/src/uts/intel/io/coretemp/coretemp.c
@@ -0,0 +1,784 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2019, Joyent, Inc.
+ */
+
+/*
+ * Intel CPU Thermal sensor driver
+ *
+ * These MSRs that were used were introduced with the 'Core' family processors
+ * and have since spread beyond there, even to the Atom line. Currently,
+ * temperature sensors exist on a per-core basis and optionally on a per-package
+ * basis. The temperature sensor exposes a reading that's relative to the
+ * processor's maximum junction temperature, often referred to as Tj. We
+ * currently only support models where we can determine that junction
+ * temperature programatically. For older processors, we would need to track
+ * down the datasheet. Unfortunately, the values here are often on a per-brand
+ * string basis. As in two CPUs with the same model and stepping, but have
+ * binned differently have different temperatures.
+ *
+ * The temperature is exposed through /dev and uses a semi-standard sensor
+ * framework. We expose one minor node per CPU core and one minor node per CPU
+ * package, if that is supported. Reads are rate-limited in the driver at 100ms
+ * by default per the global variable coretemp_cache_ms.
+ */
+
+#include <sys/modctl.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#include <sys/open.h>
+#include <sys/stat.h>
+#include <sys/cred.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/list.h>
+#include <sys/stddef.h>
+#include <sys/cmn_err.h>
+#include <sys/id_space.h>
+#include <sys/x86_archext.h>
+#include <sys/cpu_module.h>
+#include <sys/ontrap.h>
+#include <sys/cpuvar.h>
+#include <sys/x_call.h>
+#include <sys/sensors.h>
+
+#define CORETEMP_MINOR_MIN 1
+#define CORETEMP_MINOR_MAX INT32_MAX
+
+typedef struct coretemp_core {
+ list_node_t ctc_link;
+ id_t ctc_core_minor;
+ id_t ctc_pkg_minor;
+ enum cmi_hdl_class ctc_class;
+ uint_t ctc_chip;
+ uint_t ctc_core;
+ uint_t ctc_strand;
+ uint_t ctc_tjmax;
+ hrtime_t ctc_last_read;
+ uint64_t ctc_core_status;
+ uint64_t ctc_core_intr;
+ uint64_t ctc_pkg_status;
+ uint64_t ctc_pkg_intr;
+ uint64_t ctc_invalid_reads;
+ /* The following fields are derived from above */
+ uint_t ctc_temperature;
+ uint_t ctc_resolution;
+ uint_t ctc_pkg_temperature;
+} coretemp_core_t;
+
+typedef struct coretemp {
+ dev_info_t *coretemp_dip;
+ id_space_t *coretemp_ids;
+ cpuset_t *coretemp_cpuset;
+ boolean_t coretemp_pkg;
+ kmutex_t coretemp_mutex;
+ list_t coretemp_cores;
+} coretemp_t;
+
+coretemp_t *coretemp;
+
+/*
+ * This indicates a number of milliseconds that we should wait between reads.
+ * This is somewhat arbitrary, but the goal is to reduce cross call activity
+ * and reflect that the sensor may not update all the time.
+ */
+uint_t coretemp_cache_ms = 100;
+
+static int
+coretemp_rdmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
+{
+ uint_t msr = (uint_t)arg1;
+ uint64_t *valp = (uint64_t *)arg2;
+ cmi_errno_t *errp = (cmi_errno_t *)arg3;
+
+ on_trap_data_t otd;
+
+ if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
+ if (checked_rdmsr(msr, valp) == 0) {
+ *errp = CMI_SUCCESS;
+ } else {
+ *errp = CMIERR_NOTSUP;
+ }
+ } else {
+ *errp = CMIERR_MSRGPF;
+ }
+ no_trap();
+
+ return (0);
+}
+
+/*
+ * This really should just be a call to the CMI handle to provide us the MSR.
+ * However, that routine, cmi_hdl_rdmsr(), cannot be safely used until it is
+ * fixed for use outside of a panic-like context.
+ */
+static int
+coretemp_rdmsr(coretemp_t *ct, cmi_hdl_t hdl, uint_t msr, uint64_t *valp)
+{
+ id_t cpu = cmi_hdl_logical_id(hdl);
+ int ret = CMI_SUCCESS;
+
+ ASSERT(MUTEX_HELD(&ct->coretemp_mutex));
+ kpreempt_disable();
+ if (CPU->cpu_id == cpu) {
+ (void) coretemp_rdmsr_xc((xc_arg_t)msr, (xc_arg_t)valp,
+ (xc_arg_t)&ret);
+ } else {
+ cpuset_only(ct->coretemp_cpuset, (uint_t)cpu);
+ xc_call((xc_arg_t)msr, (xc_arg_t)valp, (xc_arg_t)&ret,
+ (ulong_t *)ct->coretemp_cpuset, coretemp_rdmsr_xc);
+ }
+ kpreempt_enable();
+
+ return (ret);
+}
+
+static int
+coretemp_cmi_errno(cmi_errno_t e)
+{
+ switch (e) {
+ case CMIERR_NOTSUP:
+ return (ENOTSUP);
+ default:
+ return (EIO);
+ }
+}
+
+/*
+ * Answer the question of whether or not the driver can support the CPU in
+ * question. Right now we have the following constraints for supporting the CPU:
+ *
+ * o The CPU is made by Intel
+ * o The CPU has the Digital Thermal Sensor
+ * o The CPU family is 6, which is usually implicit from the above
+ * o We can determine its junction temperature through an MSR
+ *
+ * If we can't determine the junction temperature programatically, then we need
+ * to set up tables of CPUs to do so. This can be fleshed out and improved.
+ */
+static boolean_t
+coretemp_supported(void)
+{
+ uint_t model;
+
+ if (cpuid_getvendor(CPU) != X86_VENDOR_Intel) {
+ return (B_FALSE);
+ }
+
+ if (!is_x86_feature(x86_featureset, X86FSET_CORE_THERMAL)) {
+ return (B_FALSE);
+ }
+
+ if (cpuid_getfamily(CPU) != 6) {
+ return (B_FALSE);
+ }
+
+ model = cpuid_getmodel(CPU);
+ if (model <= INTC_MODEL_PENRYN || model == INTC_MODEL_SILVERTHORNE ||
+ model == INTC_MODEL_LINCROFT || model == INTC_MODEL_PENWELL ||
+ model == INTC_MODEL_CLOVERVIEW || model == INTC_MODEL_CEDARVIEW) {
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+static coretemp_core_t *
+coretemp_lookup_core(coretemp_t *ct, minor_t minor)
+{
+ coretemp_core_t *ctc;
+
+ ASSERT(MUTEX_HELD(&ct->coretemp_mutex));
+
+ if (minor < CORETEMP_MINOR_MIN || minor > CORETEMP_MINOR_MAX) {
+ return (NULL);
+ }
+
+ for (ctc = list_head(&ct->coretemp_cores); ctc != NULL;
+ ctc = list_next(&ct->coretemp_cores, ctc)) {
+ if (ctc->ctc_core_minor == (id_t)minor ||
+ (ctc->ctc_pkg_minor >= CORETEMP_MINOR_MIN &&
+ ctc->ctc_pkg_minor == (id_t)minor)) {
+ return (ctc);
+ }
+ }
+
+ return (NULL);
+}
+
+
+/*
+ * We need to determine the value of Tj Max as all temperature sensors are
+ * derived from this value. The ease of this depends on how old the processor in
+ * question is. The Core family processors after Penryn have support for an MSR
+ * that tells us what to go for. In the Atom family, processors starting with
+ * Silvermont have support for an MSR that documents this value. For older
+ * processors, one needs to track down the datasheet for a specific processor.
+ * Two processors in the same family/model may have different values of Tj Max.
+ * At the moment, we only support this on processors that have that MSR.
+ */
+static int
+coretemp_calculate_tjmax(coretemp_t *ct, coretemp_core_t *ctc, cmi_hdl_t hdl)
+{
+ cmi_errno_t e;
+ int err = 0;
+ uint64_t val = 0;
+
+ e = coretemp_rdmsr(ct, hdl, MSR_TEMPERATURE_TARGET, &val);
+ if (e == CMI_SUCCESS && val != 0) {
+ ctc->ctc_tjmax = MSR_TEMPERATURE_TARGET_TARGET(val);
+ } else if (val == 0) {
+ err = EINVAL;
+ } else {
+ err = coretemp_cmi_errno(e);
+ }
+
+ return (err);
+}
+
+static int
+coretemp_read(coretemp_t *ct, coretemp_core_t *ctc, cmi_hdl_t hdl)
+{
+ cmi_errno_t e;
+ int err = 0;
+ uint64_t val = 0;
+
+ ctc->ctc_last_read = gethrtime();
+
+ e = coretemp_rdmsr(ct, hdl, MSR_IA32_THERM_STATUS, &val);
+ if (e == CMI_SUCCESS) {
+ ctc->ctc_core_status = val;
+ } else {
+ err = coretemp_cmi_errno(e);
+ dev_err(ct->coretemp_dip, CE_WARN, "!failed to get core "
+ "thermal status on %u/%u: %d", ctc->ctc_chip, ctc->ctc_core,
+ err);
+ return (err);
+ }
+
+ e = coretemp_rdmsr(ct, hdl, MSR_IA32_THERM_INTERRUPT, &val);
+ if (e == CMI_SUCCESS) {
+ ctc->ctc_core_intr = val;
+ } else {
+ err = coretemp_cmi_errno(e);
+ dev_err(ct->coretemp_dip, CE_WARN, "!failed to get core "
+ "thermal interrupt on %u/%u: %d", ctc->ctc_chip,
+ ctc->ctc_core, err);
+ return (err);
+ }
+
+ /*
+ * If the last read wasn't valid, then we should keep the current state.
+ */
+ if ((ctc->ctc_core_status & IA32_THERM_STATUS_READ_VALID) != 0) {
+ uint_t diff;
+ diff = IA32_THERM_STATUS_READING(ctc->ctc_core_status);
+
+ if (diff >= ctc->ctc_tjmax) {
+ dev_err(ct->coretemp_dip, CE_WARN, "!found invalid "
+ "core temperature on %u/%u: readout: %u, Tjmax: "
+ "%u, raw: 0x%" PRIx64, ctc->ctc_chip,
+ ctc->ctc_core, diff, ctc->ctc_tjmax,
+ ctc->ctc_core_status);
+ ctc->ctc_invalid_reads++;
+ } else {
+ ctc->ctc_temperature = ctc->ctc_tjmax - diff;
+ }
+ } else {
+ ctc->ctc_invalid_reads++;
+ }
+
+ ctc->ctc_resolution =
+ IA32_THERM_STATUS_RESOLUTION(ctc->ctc_core_status);
+
+ /*
+ * If we have package support and this is core zero, then update the
+ * package data.
+ */
+ if (ct->coretemp_pkg && ctc->ctc_core == 0) {
+ uint_t diff;
+
+ e = coretemp_rdmsr(ct, hdl, MSR_IA32_PACKAGE_THERM_STATUS,
+ &val);
+ if (e == CMI_SUCCESS) {
+ ctc->ctc_pkg_status = val;
+ } else {
+ err = coretemp_cmi_errno(e);
+ dev_err(ct->coretemp_dip, CE_WARN, "!failed to get "
+ "package thermal status on %u: %d", ctc->ctc_chip,
+ err);
+ return (err);
+ }
+
+ e = coretemp_rdmsr(ct, hdl, MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ &val);
+ if (e == CMI_SUCCESS) {
+ ctc->ctc_pkg_intr = val;
+ } else {
+ err = coretemp_cmi_errno(e);
+ dev_err(ct->coretemp_dip, CE_WARN, "!failed to get "
+ "package thermal interrupt on %u: %d",
+ ctc->ctc_chip, err);
+ return (err);
+ }
+
+ diff = IA32_PKG_THERM_STATUS_READING(ctc->ctc_pkg_status);
+ if (diff >= ctc->ctc_tjmax) {
+ dev_err(ct->coretemp_dip, CE_WARN, "!found invalid "
+ "package temperature on %u: readout: %u, tjmax: "
+ "%u, raw: 0x%" PRIx64, ctc->ctc_chip, diff,
+ ctc->ctc_tjmax, ctc->ctc_pkg_status);
+ ctc->ctc_invalid_reads++;
+
+ } else {
+ ctc->ctc_pkg_temperature = ctc->ctc_tjmax - diff;
+ }
+ }
+
+ return (0);
+}
+
+static int
+coretemp_open(dev_t *devp, int flags, int otype, cred_t *credp)
+{
+ coretemp_t *ct = coretemp;
+
+ if (crgetzoneid(credp) != GLOBAL_ZONEID || drv_priv(credp)) {
+ return (EPERM);
+ }
+
+ if ((flags & (FEXCL | FNDELAY | FWRITE)) != 0) {
+ return (EINVAL);
+ }
+
+ if (otype != OTYP_CHR) {
+ return (EINVAL);
+ }
+
+ /*
+ * Sanity check the minor
+ */
+ mutex_enter(&ct->coretemp_mutex);
+ if (coretemp_lookup_core(ct, getminor(*devp)) == NULL) {
+ mutex_exit(&ct->coretemp_mutex);
+ return (ENXIO);
+ }
+ mutex_exit(&ct->coretemp_mutex);
+
+ return (0);
+}
+
+static int
+coretemp_ioctl_kind(intptr_t arg, int mode)
+{
+ sensor_ioctl_kind_t kind;
+
+ bzero(&kind, sizeof (kind));
+ kind.sik_kind = SENSOR_KIND_TEMPERATURE;
+
+ if (ddi_copyout((void *)&kind, (void *)arg, sizeof (kind),
+ mode & FKIOCTL) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+coretemp_ioctl_temp(coretemp_t *ct, minor_t minor, intptr_t arg, int mode)
+{
+ coretemp_core_t *ctc;
+ hrtime_t diff;
+ sensor_ioctl_temperature_t temp;
+
+ bzero(&temp, sizeof (temp));
+
+ mutex_enter(&ct->coretemp_mutex);
+ ctc = coretemp_lookup_core(ct, minor);
+ if (ctc == NULL) {
+ mutex_exit(&ct->coretemp_mutex);
+ return (ENXIO);
+ }
+
+ diff = NSEC2MSEC(gethrtime() - ctc->ctc_last_read);
+ if (diff > 0 && diff > (hrtime_t)coretemp_cache_ms) {
+ int ret;
+ cmi_hdl_t hdl;
+
+ if ((hdl = cmi_hdl_lookup(ctc->ctc_class, ctc->ctc_chip,
+ ctc->ctc_core, ctc->ctc_strand)) == NULL) {
+ mutex_exit(&ct->coretemp_mutex);
+ return (ENXIO);
+ }
+ ret = coretemp_read(ct, ctc, hdl);
+ cmi_hdl_rele(hdl);
+ if (ret != 0) {
+ mutex_exit(&ct->coretemp_mutex);
+ return (ret);
+ }
+ }
+
+ temp.sit_unit = SENSOR_UNIT_CELSIUS;
+ if ((id_t)minor == ctc->ctc_core_minor) {
+ temp.sit_temp = ctc->ctc_temperature;
+ } else {
+ temp.sit_temp = ctc->ctc_pkg_temperature;
+ }
+
+ /*
+ * The resolution field is in whole units of degrees Celsius.
+ */
+ temp.sit_gran = ctc->ctc_resolution;
+ if (ctc->ctc_resolution > 1) {
+ temp.sit_gran *= -1;
+ }
+ mutex_exit(&ct->coretemp_mutex);
+
+ if (ddi_copyout(&temp, (void *)arg, sizeof (temp),
+ mode & FKIOCTL) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+coretemp_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ coretemp_t *ct = coretemp;
+
+ if ((mode & FREAD) == 0) {
+ return (EINVAL);
+ }
+
+ switch (cmd) {
+ case SENSOR_IOCTL_TYPE:
+ return (coretemp_ioctl_kind(arg, mode));
+ case SENSOR_IOCTL_TEMPERATURE:
+ return (coretemp_ioctl_temp(ct, getminor(dev), arg, mode));
+ default:
+ return (ENOTTY);
+ }
+}
+
+/*
+ * We don't really do any state tracking on close, so for now, just allow it to
+ * always succeed.
+ */
+static int
+coretemp_close(dev_t dev, int flags, int otype, cred_t *credp)
+{
+ return (0);
+}
+
+static void
+coretemp_fini_core(coretemp_t *ct, coretemp_core_t *ctc)
+{
+ if (ctc->ctc_core_minor > 0)
+ id_free(ct->coretemp_ids, ctc->ctc_core_minor);
+ if (ctc->ctc_pkg_minor > 0)
+ id_free(ct->coretemp_ids, ctc->ctc_pkg_minor);
+ kmem_free(ctc, sizeof (coretemp_core_t));
+}
+
+static void
+coretemp_destroy(coretemp_t *ct)
+{
+ coretemp_core_t *ctc;
+
+ ddi_remove_minor_node(ct->coretemp_dip, NULL);
+
+ while ((ctc = list_remove_head(&ct->coretemp_cores)) != NULL) {
+ coretemp_fini_core(ct, ctc);
+ }
+ list_destroy(&ct->coretemp_cores);
+
+ if (ct->coretemp_cpuset != NULL) {
+ cpuset_free(ct->coretemp_cpuset);
+ }
+
+ if (ct->coretemp_ids != NULL) {
+ id_space_destroy(ct->coretemp_ids);
+ }
+
+ mutex_destroy(&ct->coretemp_mutex);
+ kmem_free(ct, sizeof (coretemp_t));
+}
+
+static int
+coretemp_init_core(cmi_hdl_t hdl, void *arg1, void *arg2, void *arg3)
+{
+ coretemp_t *ct = arg1;
+ boolean_t *walkerr = arg2;
+ coretemp_core_t *ctc;
+ uint_t chip, core;
+ int err;
+
+ chip = cmi_hdl_chipid(hdl);
+ core = cmi_hdl_coreid(hdl);
+
+ /*
+ * The temperature sensor only exists on a per-core basis. Therefore we
+ * ignore any non-zero strand.
+ */
+ if (cmi_hdl_strandid(hdl) != 0) {
+ return (CMI_HDL_WALK_NEXT);
+ }
+
+ ctc = kmem_zalloc(sizeof (coretemp_core_t), KM_SLEEP);
+ ctc->ctc_class = cmi_hdl_class(hdl);
+ ctc->ctc_chip = chip;
+ ctc->ctc_core = core;
+ ctc->ctc_strand = 0;
+ ctc->ctc_core_minor = id_alloc(ct->coretemp_ids);
+ if (ct->coretemp_pkg && ctc->ctc_core == 0) {
+ ctc->ctc_pkg_minor = id_alloc(ct->coretemp_ids);
+ }
+
+ if ((err = coretemp_calculate_tjmax(ct, ctc, hdl)) != 0) {
+ dev_err(ct->coretemp_dip, CE_WARN,
+ "failed to read Tj Max on %u/%u: %d", chip, core, err);
+ *walkerr = B_TRUE;
+ coretemp_fini_core(ct, ctc);
+ return (CMI_HDL_WALK_DONE);
+ }
+
+ if ((err = coretemp_read(ct, ctc, hdl)) != 0) {
+ dev_err(ct->coretemp_dip, CE_WARN,
+ "failed to take initial temperature reading on %u/%u: %d",
+ chip, core, err);
+ *walkerr = B_TRUE;
+ coretemp_fini_core(ct, ctc);
+ return (CMI_HDL_WALK_DONE);
+ }
+
+ list_insert_tail(&ct->coretemp_cores, ctc);
+
+ return (CMI_HDL_WALK_NEXT);
+}
+
+static boolean_t
+coretemp_create_minors(coretemp_t *ct)
+{
+ coretemp_core_t *ctc;
+
+ for (ctc = list_head(&ct->coretemp_cores); ctc != NULL;
+ ctc = list_next(&ct->coretemp_cores, ctc)) {
+ int ret;
+ char buf[128];
+
+ if (snprintf(buf, sizeof (buf), "chip%u.core%u", ctc->ctc_chip,
+ ctc->ctc_core) >= sizeof (buf)) {
+ return (B_FALSE);
+ }
+ ret = ddi_create_minor_node(ct->coretemp_dip, buf, S_IFCHR,
+ ctc->ctc_core_minor, DDI_NT_SENSOR_TEMP_CPU, 0);
+ if (ret != DDI_SUCCESS) {
+ dev_err(ct->coretemp_dip, CE_WARN, "!failed to create "
+ "minor node %s", buf);
+ return (B_FALSE);
+ }
+
+ if (ctc->ctc_core != 0)
+ continue;
+
+ if (snprintf(buf, sizeof (buf), "chip%u", ctc->ctc_chip) >=
+ sizeof (buf)) {
+ return (B_FALSE);
+ }
+
+ ret = ddi_create_minor_node(ct->coretemp_dip, buf, S_IFCHR,
+ ctc->ctc_pkg_minor, DDI_NT_SENSOR_TEMP_CPU, 0);
+ if (ret != DDI_SUCCESS) {
+ dev_err(ct->coretemp_dip, CE_WARN, "!failed to create "
+ "minor node %s", buf);
+ return (B_FALSE);
+ }
+ }
+
+ return (B_TRUE);
+}
+
+static int
+coretemp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ boolean_t walkerr;
+ coretemp_t *ct = NULL;
+
+ if (cmd == DDI_RESUME) {
+ /*
+ * Currently suspend and resume for this driver are nops.
+ */
+ return (DDI_SUCCESS);
+ }
+
+ if (cmd != DDI_ATTACH) {
+ return (DDI_FAILURE);
+ }
+
+ if (coretemp != NULL) {
+ return (DDI_FAILURE);
+ }
+
+ ct = kmem_zalloc(sizeof (coretemp_t), KM_SLEEP);
+ ct->coretemp_dip = dip;
+ ct->coretemp_pkg = is_x86_feature(x86_featureset, X86FSET_PKG_THERMAL);
+ list_create(&ct->coretemp_cores, sizeof (coretemp_core_t),
+ offsetof(coretemp_core_t, ctc_link));
+ mutex_init(&ct->coretemp_mutex, NULL, MUTEX_DRIVER, NULL);
+ ct->coretemp_cpuset = cpuset_alloc(KM_SLEEP);
+ if ((ct->coretemp_ids = id_space_create("coretemp_minors", 1,
+ INT32_MAX)) == NULL) {
+ goto fail;
+ }
+
+ mutex_enter(&ct->coretemp_mutex);
+ walkerr = B_FALSE;
+ cmi_hdl_walk(coretemp_init_core, ct, &walkerr, NULL);
+
+ if (walkerr) {
+ mutex_exit(&ct->coretemp_mutex);
+ goto fail;
+ }
+
+ if (!coretemp_create_minors(ct)) {
+ mutex_exit(&ct->coretemp_mutex);
+ goto fail;
+ }
+
+ coretemp = ct;
+ mutex_exit(&ct->coretemp_mutex);
+ return (DDI_SUCCESS);
+fail:
+ coretemp = NULL;
+ coretemp_destroy(ct);
+ return (DDI_FAILURE);
+
+}
+
+static int
+coretemp_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
+ void **resultp)
+{
+ int ret;
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *resultp = coretemp->coretemp_dip;
+ ret = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *resultp = (void *)0;
+ ret = DDI_SUCCESS;
+ break;
+ default:
+ ret = DDI_FAILURE;
+ break;
+ }
+
+ return (ret);
+}
+
+static int
+coretemp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ coretemp_t *ct;
+
+ if (cmd == DDI_SUSPEND) {
+ return (DDI_SUCCESS);
+ }
+
+ if (cmd != DDI_DETACH) {
+ return (DDI_FAILURE);
+ }
+
+ if (coretemp == NULL) {
+ return (DDI_FAILURE);
+ }
+
+ ct = coretemp;
+ coretemp = NULL;
+ coretemp_destroy(ct);
+
+ return (DDI_SUCCESS);
+}
+
+static struct cb_ops coretemp_cb_ops = {
+ .cb_open = coretemp_open,
+ .cb_close = coretemp_close,
+ .cb_strategy = nodev,
+ .cb_print = nodev,
+ .cb_dump = nodev,
+ .cb_read = nodev,
+ .cb_write = nodev,
+ .cb_ioctl = coretemp_ioctl,
+ .cb_devmap = nodev,
+ .cb_mmap = nodev,
+ .cb_segmap = nodev,
+ .cb_chpoll = nochpoll,
+ .cb_prop_op = ddi_prop_op,
+ .cb_flag = D_MP,
+ .cb_rev = CB_REV,
+ .cb_aread = nodev,
+ .cb_awrite = nodev
+};
+
+static struct dev_ops coretemp_dev_ops = {
+ .devo_rev = DEVO_REV,
+ .devo_refcnt = 0,
+ .devo_getinfo = coretemp_getinfo,
+ .devo_identify = nulldev,
+ .devo_probe = nulldev,
+ .devo_attach = coretemp_attach,
+ .devo_detach = coretemp_detach,
+ .devo_reset = nodev,
+ .devo_power = ddi_power,
+ .devo_quiesce = ddi_quiesce_not_needed,
+ .devo_cb_ops = &coretemp_cb_ops
+};
+
+static struct modldrv coretemp_modldrv = {
+ .drv_modops = &mod_driverops,
+ .drv_linkinfo = "Intel CPU/Package thermal sensor",
+ .drv_dev_ops = &coretemp_dev_ops
+};
+
+static struct modlinkage coretemp_modlinkage = {
+ .ml_rev = MODREV_1,
+ .ml_linkage = { &coretemp_modldrv, NULL }
+};
+
+int
+_init(void)
+{
+ if (!coretemp_supported()) {
+ return (ENOTSUP);
+ }
+
+ return (mod_install(&coretemp_modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&coretemp_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&coretemp_modlinkage));
+}
diff --git a/usr/src/uts/intel/io/coretemp/coretemp.conf b/usr/src/uts/intel/io/coretemp/coretemp.conf
new file mode 100644
index 0000000000..1880a2fa16
--- /dev/null
+++ b/usr/src/uts/intel/io/coretemp/coretemp.conf
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2019, Joyent, Inc.
+#
+
+name="coretemp" parent="pseudo" instance=0;
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index 87fac33563..0545633682 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -235,6 +235,38 @@ extern "C" {
#define CPUID_INTC_ECX_AHF64 0x00100000 /* LAHF and SAHF in long mode */
/*
+ * Intel uses cpuid leaf 6 to cover various thermal and power control
+ * operations.
+ */
+#define CPUID_INTC_EAX_DTS 0x00000001 /* Digital Thermal Sensor */
+#define CPUID_INTC_EAX_TURBO 0x00000002 /* Turboboost */
+#define CPUID_INTC_EAX_ARAT 0x00000004 /* APIC-Timer-Always-Running */
+/* bit 3 is reserved */
+#define CPUID_INTC_EAX_PLN 0x00000010 /* Power limit notification */
+#define CPUID_INTC_EAX_ECMD 0x00000020 /* Clock mod. duty cycle */
+#define CPUID_INTC_EAX_PTM 0x00000040 /* Package thermal management */
+#define CPUID_INTC_EAX_HWP 0x00000080 /* HWP base registers */
+#define CPUID_INTC_EAX_HWP_NOT 0x00000100 /* HWP Notification */
+#define CPUID_INTC_EAX_HWP_ACT 0x00000200 /* HWP Activity Window */
+#define CPUID_INTC_EAX_HWP_EPR 0x00000400 /* HWP Energy Perf. Pref. */
+#define CPUID_INTC_EAX_HWP_PLR 0x00000800 /* HWP Package Level Request */
+/* bit 12 is reserved */
+#define CPUID_INTC_EAX_HDC 0x00002000 /* HDC */
+#define CPUID_INTC_EAX_TURBO3 0x00004000 /* Turbo Boost Max Tech 3.0 */
+#define CPUID_INTC_EAX_HWP_CAP 0x00008000 /* HWP Capabilities */
+#define CPUID_INTC_EAX_HWP_PECI 0x00010000 /* HWP PECI override */
+#define CPUID_INTC_EAX_HWP_FLEX 0x00020000 /* Flexible HWP */
+#define CPUID_INTC_EAX_HWP_FAST 0x00040000 /* Fast IA32_HWP_REQUEST */
+/* bit 19 is reserved */
+#define CPUID_INTC_EAX_HWP_IDLE 0x00100000 /* Ignore Idle Logical HWP */
+
+#define CPUID_INTC_EBX_DTS_NTRESH(x) ((x) & 0xf)
+
+#define CPUID_INTC_ECX_MAPERF 0x00000001 /* IA32_MPERF / IA32_APERF */
+/* bits 1-2 are reserved */
+#define CPUID_INTC_ECX_PERFBIAS 0x00000008 /* IA32_ENERGY_PERF_BIAS */
+
+/*
* Intel also uses cpuid leaf 7 to have additional instructions and features.
* Like some other leaves, but unlike the current ones we care about, it
* requires us to specify both a leaf in %eax and a sub-leaf in %ecx. To deal
@@ -481,6 +513,74 @@ extern "C" {
#define IA32_VMX_EPT_VPID_INVEPT_SINGLE (1UL << 25)
#define IA32_VMX_EPT_VPID_INVEPT_ALL (1UL << 26)
+/*
+ * Intel Thermal MSRs
+ */
+#define MSR_IA32_THERM_INTERRUPT 0x19b
+#define IA32_THERM_INTERRUPT_HIGH_IE 0x00000001
+#define IA32_THERM_INTERRUPT_LOW_IE 0x00000002
+#define IA32_THERM_INTERRUPT_PROCHOT_IE 0x00000004
+#define IA32_THERM_INTERRUPT_FORCEPR_IE 0x00000008
+#define IA32_THERM_INTERRUPT_CRIT_IE 0x00000010
+#define IA32_THERM_INTERRUPT_TR1_VAL(x) (((x) >> 8) & 0x7f)
+#define IA32_THERM_INTTERUPT_TR1_IE 0x00008000
+#define IA32_THERM_INTTERUPT_TR2_VAL(x) (((x) >> 16) & 0x7f)
+#define IA32_THERM_INTERRUPT_TR2_IE 0x00800000
+#define IA32_THERM_INTERRUPT_PL_NE 0x01000000
+
+#define MSR_IA32_THERM_STATUS 0x19c
+#define IA32_THERM_STATUS_STATUS 0x00000001
+#define IA32_THERM_STATUS_STATUS_LOG 0x00000002
+#define IA32_THERM_STATUS_PROCHOT 0x00000004
+#define IA32_THERM_STATUS_PROCHOT_LOG 0x00000008
+#define IA32_THERM_STATUS_CRIT_STATUS 0x00000010
+#define IA32_THERM_STATUS_CRIT_LOG 0x00000020
+#define IA32_THERM_STATUS_TR1_STATUS 0x00000040
+#define IA32_THERM_STATUS_TR1_LOG 0x00000080
+#define IA32_THERM_STATUS_TR2_STATUS 0x00000100
+#define IA32_THERM_STATUS_TR2_LOG 0x00000200
+#define IA32_THERM_STATUS_POWER_LIMIT_STATUS 0x00000400
+#define IA32_THERM_STATUS_POWER_LIMIT_LOG 0x00000800
+#define IA32_THERM_STATUS_CURRENT_STATUS 0x00001000
+#define IA32_THERM_STATUS_CURRENT_LOG 0x00002000
+#define IA32_THERM_STATUS_CROSS_DOMAIN_STATUS 0x00004000
+#define IA32_THERM_STATUS_CROSS_DOMAIN_LOG 0x00008000
+#define IA32_THERM_STATUS_READING(x) (((x) >> 16) & 0x7f)
+#define IA32_THERM_STATUS_RESOLUTION(x) (((x) >> 27) & 0x0f)
+#define IA32_THERM_STATUS_READ_VALID 0x80000000
+
+#define MSR_TEMPERATURE_TARGET 0x1a2
+#define MSR_TEMPERATURE_TARGET_TARGET(x) (((x) >> 16) & 0xff)
+/*
+ * Not all models support the offset. Refer to the Intel SDM Volume 4 for a list
+ * of which models have support for which bits.
+ */
+#define MSR_TEMPERATURE_TARGET_OFFSET(x) (((x) >> 24) & 0x0f)
+
+#define MSR_IA32_PACKAGE_THERM_STATUS 0x1b1
+#define IA32_PKG_THERM_STATUS_STATUS 0x00000001
+#define IA32_PKG_THERM_STATUS_STATUS_LOG 0x00000002
+#define IA32_PKG_THERM_STATUS_PROCHOT 0x00000004
+#define IA32_PKG_THERM_STATUS_PROCHOT_LOG 0x00000008
+#define IA32_PKG_THERM_STATUS_CRIT_STATUS 0x00000010
+#define IA32_PKG_THERM_STATUS_CRIT_LOG 0x00000020
+#define IA32_PKG_THERM_STATUS_TR1_STATUS 0x00000040
+#define IA32_PKG_THERM_STATUS_TR1_LOG 0x00000080
+#define IA32_PKG_THERM_STATUS_TR2_STATUS 0x00000100
+#define IA32_PKG_THERM_STATUS_TR2_LOG 0x00000200
+#define IA32_PKG_THERM_STATUS_READING(x) (((x) >> 16) & 0x7f)
+
+#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x1b2
+#define IA32_PKG_THERM_INTERRUPT_HIGH_IE 0x00000001
+#define IA32_PKG_THERM_INTERRUPT_LOW_IE 0x00000002
+#define IA32_PKG_THERM_INTERRUPT_PROCHOT_IE 0x00000004
+#define IA32_PKG_THERM_INTERRUPT_OVERHEAT_IE 0x00000010
+#define IA32_PKG_THERM_INTERRUPT_TR1_VAL(x) (((x) >> 8) & 0x7f)
+#define IA32_PKG_THERM_INTTERUPT_TR1_IE 0x00008000
+#define IA32_PKG_THERM_INTTERUPT_TR2_VAL(x) (((x) >> 16) & 0x7f)
+#define IA32_PKG_THERM_INTERRUPT_TR2_IE 0x00800000
+#define IA32_PKG_THERM_INTERRUPT_PL_NE 0x01000000
+
#define MCI_CTL_VALUE 0xffffffff
#define MTRR_TYPE_UC 0
@@ -605,6 +705,8 @@ extern "C" {
#define X86FSET_TBM 90
#define X86FSET_AVX512VNNI 91
#define X86FSET_AMD_PCEC 92
+#define X86FSET_CORE_THERMAL 93
+#define X86FSET_PKG_THERMAL 94
/*
* Intel Deep C-State invariant TSC in leaf 0x80000007.
@@ -612,16 +714,6 @@ extern "C" {
#define CPUID_TSC_CSTATE_INVARIANCE (0x100)
/*
- * Intel Deep C-state always-running local APIC timer
- */
-#define CPUID_CSTATE_ARAT (0x4)
-
-/*
- * Intel ENERGY_PERF_BIAS MSR indicated by feature bit CPUID.6.ECX[3].
- */
-#define CPUID_EPB_SUPPORT (1 << 3)
-
-/*
* Intel TSC deadline timer
*/
#define CPUID_DEADLINE_TSC (1 << 24)
@@ -888,7 +980,9 @@ extern "C" {
* Definitions for Intel processor models. These are all for Family 6
* processors. This list and the Atom set below it are not exhuastive.
*/
+#define INTC_MODEL_YONAH 0x0e
#define INTC_MODEL_MEROM 0x0f
+#define INTC_MODEL_MEROM_L 0x16
#define INTC_MODEL_PENRYN 0x17
#define INTC_MODEL_DUNNINGTON 0x1d
@@ -974,7 +1068,7 @@ extern "C" {
#if defined(_KERNEL) || defined(_KMEMUSER)
-#define NUM_X86_FEATURES 93
+#define NUM_X86_FEATURES 95
extern uchar_t x86_featureset[];
extern void free_x86_featureset(void *featureset);