diff options
Diffstat (limited to 'usr/src')
28 files changed, 2860 insertions, 48 deletions
| diff --git a/usr/src/cmd/devfsadm/Makefile.com b/usr/src/cmd/devfsadm/Makefile.com index b446b148ff..cec58108c8 100644 --- a/usr/src/cmd/devfsadm/Makefile.com +++ b/usr/src/cmd/devfsadm/Makefile.com @@ -21,7 +21,7 @@  # Copyright 2009 Sun Microsystems, Inc.  All rights reserved.  # Use is subject to license terms.  # -# Copyright (c) 2018, Joyent, Inc. +# Copyright 2019, Joyent, Inc.  # This target builds both a command (daemon) and various shared objects.  This  # isn't a typical target, and the inclusion of both library and command @@ -71,7 +71,8 @@ LINK_OBJS_CMN =			\  	dtrace_link.o		\  	vscan_link.o		\  	zfs_link.o		\ -	zut_link.o +	zut_link.o		\ +	sensor_link.o  LINK_OBJS =	$(LINK_OBJS_CMN) \  		$(LINK_OBJS_$(MACH)) @@ -164,7 +165,7 @@ install: all				\  clean: -	$(RM) $(OBJS)  +	$(RM) $(OBJS)  lint: $(DEVFSADM_MOD).ln $(LINT_MODULES) diff --git a/usr/src/cmd/devfsadm/devfsadm.c b/usr/src/cmd/devfsadm/devfsadm.c index f81d5b5d67..52f4f4c0da 100644 --- a/usr/src/cmd/devfsadm/devfsadm.c +++ b/usr/src/cmd/devfsadm/devfsadm.c @@ -23,6 +23,7 @@   * Copyright 2016 Toomas Soome <tsoome@me.com>   * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.   * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2019, Joyent, Inc.   */  /* @@ -2073,6 +2074,16 @@ class_ok(char *class)  		return (DEVFSADM_SUCCESS);  	} +	/* +	 * Some create tabs operate on multiple classes of devices because the +	 * kernel doesn't have a good way for a driver to indicate that a +	 * particular minor's class is different from that of the dev_info_t +	 * it belongs to. As such, we'll always fail to match those here. +	 */ +	if (class == NULL) { +		return (DEVFSADM_FAILURE); +	} +  	for (i = 0; i < num_classes; i++) {  		if (strcmp(class, classes[i]) == 0) {  			return (DEVFSADM_SUCCESS); @@ -3717,10 +3728,10 @@ do_inst_sync(char *filename, char *instfilename)   * safely, the database is flushed to a temporary file, then moved into place.   *   * The following files are used during this process: - * 	/etc/path_to_inst:	The path_to_inst file - * 	/etc/path_to_inst.<pid>: Contains data flushed from the kernel - * 	/etc/path_to_inst.old:  The backup file - * 	/etc/path_to_inst.old.<pid>: Temp file for creating backup + *	/etc/path_to_inst:	The path_to_inst file + *	/etc/path_to_inst.<pid>: Contains data flushed from the kernel + *	/etc/path_to_inst.old:  The backup file + *	/etc/path_to_inst.old.<pid>: Temp file for creating backup   *   */  static void @@ -7803,7 +7814,7 @@ add_verbose_id(char *mid)   * returns DEVFSADM_TRUE if contents is a minor node in /devices.   * If mn_root is not NULL, mn_root is set to:   *	if contents is a /dev node, mn_root = contents - * 			OR + *			OR   *	if contents is a /devices node, mn_root set to the '/'   *	following /devices.   */ diff --git a/usr/src/cmd/devfsadm/sensor_link.c b/usr/src/cmd/devfsadm/sensor_link.c new file mode 100644 index 0000000000..7a2b48af75 --- /dev/null +++ b/usr/src/cmd/devfsadm/sensor_link.c @@ -0,0 +1,79 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source.  A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +/* + * Create /devices links for various sensors. The sensor series of node types + * all begin with ddi_sensor. After which, there is a series of : delineated + * paths in the node type. Those represent the directory under /dev/sensors that + * the nodes should ultimately be created. + * + * For example, ddi_sensor:temperature:cpu would cause us to place the named + * minor under /dev/sensors/temperature/cpu/. Currently it is up to drivers to + * not conflict in names or if there is a fear of conflicting, make sure their + * minor is unique. + */ + +#include <devfsadm.h> +#include <string.h> + +#define	SENSORS_BASE	"sensors" + +static int +sensor_link(di_minor_t minor, di_node_t node) +{ +	const char *t, *minor_name, *dir_path = NULL; +	char *type, *c; +	char buf[PATH_MAX]; +	size_t len; + +	if ((t = di_minor_nodetype(minor)) == NULL) { +		return (DEVFSADM_CONTINUE); +	} + +	if ((minor_name = di_minor_name(minor)) == NULL) { +		return (DEVFSADM_CONTINUE); +	} + +	if ((type = strdup(t)) == NULL) { +		return (DEVFSADM_TERMINATE); +	} + +	while ((c = strchr(type, ':')) != NULL) { +		if (dir_path == NULL) { +			dir_path = c + 1; +		} +		*c = '/'; +	} + +	if (dir_path == NULL || *dir_path == '\0') { +		len = snprintf(buf, sizeof (buf), "%s/%s", SENSORS_BASE, +		    minor_name); +	} else { +		len = snprintf(buf, sizeof (buf), "%s/%s/%s", SENSORS_BASE, +		    dir_path, minor_name); +	} + +	if (len < sizeof (buf)) { +		(void) devfsadm_mklink(buf, node, minor, 0); +	} + +	free(type); +	return (DEVFSADM_CONTINUE); +} + +static devfsadm_create_t sensor_create_cbt[] = { +	{ NULL, "ddi_sensor", NULL, TYPE_PARTIAL, ILEVEL_0, sensor_link } +}; +DEVFSADM_CREATE_INIT_V0(sensor_create_cbt); diff --git a/usr/src/lib/fm/topo/modules/common/shared/topo_sensor.c b/usr/src/lib/fm/topo/modules/common/shared/topo_sensor.c new file mode 100644 index 0000000000..c9e56e9e1f --- /dev/null +++ b/usr/src/lib/fm/topo/modules/common/shared/topo_sensor.c @@ -0,0 +1,262 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source.  A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +/* + * This file provides routines to interact with the kernel sensor framework. + * Currently, modules that require interacting with a kernel sensor need to + * build this file as part of the module. This takes care of all the work of + * setting up and creating the temperature sensor, given a path to that sensor. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <libnvpair.h> +#include <sys/sensors.h> +#include <sys/fm/protocol.h> +#include <fm/topo_mod.h> + +#define	TOPO_METH_TOPO_SENSOR_TEMP		"topo_sensor_temp_reading" +#define	TOPO_METH_TOPO_SENSOR_TEMP_DESC		"Kernel Temperature Reading" +#define	TOPO_METH_TOPO_SENSOR_TEMP_VERSION	0 + +static int +topo_sensor_temp_read(topo_mod_t *mod, tnode_t *node, topo_version_t vers, +    nvlist_t *in, nvlist_t **out) +{ +	int fd = -1, ret; +	nvlist_t *args, *nvl; +	char *path; +	sensor_ioctl_temperature_t temp; +	double degrees; + +	if (vers != TOPO_METH_TOPO_SENSOR_TEMP_VERSION) { +		return (topo_mod_seterrno(mod, ETOPO_METHOD_VERNEW)); +	} + +	if (nvlist_lookup_nvlist(in, TOPO_PROP_ARGS, &args) != 0 || +	    nvlist_lookup_string(args, TOPO_IO_DEV_PATH, &path) != 0) { +		topo_mod_dprintf(mod, "failed to lookup sensor path from " +		    "property %s", TOPO_IO_DEV_PATH); +		return (topo_mod_seterrno(mod, EMOD_NVL_INVAL)); +	} + +	if ((fd = open(path, O_RDONLY)) < 0) { +		topo_mod_dprintf(mod, "failed to open sensor path %s: %s", +		    path, strerror(errno)); +		return (topo_mod_seterrno(mod, EMOD_UNKNOWN)); +	} + +	(void) memset(&temp, '\0', sizeof (temp)); +	if (ioctl(fd, SENSOR_IOCTL_TEMPERATURE, &temp) != 0) { +		topo_mod_dprintf(mod, "failed to read temperature sensor " +		    "%s: %s", path, strerror(errno)); +		ret = topo_mod_seterrno(mod, EMOD_UNKNOWN); +		goto out; +	} + +	/* +	 * Check to see if we need to change the value to get it into an +	 * accurate reading. Positive values indicate that the temperature +	 * reading is in a fractional number of degrees and that each degree +	 * contains temp.sit_gran steps. A negative number means that the +	 * temperature reading represents temp.sit_gran degrees. +	 */ +	degrees = (double)temp.sit_temp; +	if (temp.sit_gran > 1) { +		degrees /= (double)temp.sit_gran; +	} else if (temp.sit_gran < -1) { +		degrees *= (double)labs(temp.sit_gran); +	} + +	if (topo_mod_nvalloc(mod, &nvl, NV_UNIQUE_NAME) != 0) { +		topo_mod_dprintf(mod, "failed to allocate output temperature " +		    "nvl"); +		ret = topo_mod_seterrno(mod, EMOD_NOMEM); +		goto out; +	} + +	if (nvlist_add_string(nvl, TOPO_PROP_VAL_NAME, TOPO_SENSOR_READING) != +	    0 || +	    nvlist_add_uint32(nvl, TOPO_PROP_VAL_TYPE, TOPO_TYPE_DOUBLE) != 0 || +	    nvlist_add_double(nvl, TOPO_PROP_VAL_VAL, degrees) != 0) { +		topo_mod_dprintf(mod, "failed to add members to output " +		    "temperature nvlist"); +		nvlist_free(nvl); +		ret = topo_mod_seterrno(mod, EMOD_NOMEM); +		goto out; +	} + +	*out = nvl; +	ret = 0; +out: +	if (fd >= 0) { +		(void) close(fd); +	} +	return (ret); +} + +static const topo_method_t topo_sensor_temp_fac_methods[] = { +	{ TOPO_METH_TOPO_SENSOR_TEMP, TOPO_METH_TOPO_SENSOR_TEMP_DESC, +		TOPO_METH_TOPO_SENSOR_TEMP_VERSION, TOPO_STABILITY_INTERNAL, +		topo_sensor_temp_read }, +	{ NULL } +}; + +static topo_sensor_unit_t +topo_sensor_units(const sensor_ioctl_temperature_t *temp) +{ +	switch (temp->sit_unit) { +	case SENSOR_UNIT_CELSIUS: +		return (TOPO_SENSOR_UNITS_DEGREES_C); +	case SENSOR_UNIT_FAHRENHEIT: +		return (TOPO_SENSOR_UNITS_DEGREES_F); +	case SENSOR_UNIT_KELVIN: +		return (TOPO_SENSOR_UNITS_DEGREES_K); +	default: +		return (TOPO_SENSOR_UNITS_UNSPECIFIED); +	} +} + +int +topo_sensor_create_temp_sensor(topo_mod_t *mod, tnode_t *pnode, +    const char *path, const char *fname) +{ +	int fd, ret, err; +	sensor_ioctl_kind_t sik; +	sensor_ioctl_temperature_t temp; +	tnode_t *fnode = NULL; +	topo_pgroup_info_t pgi; +	nvlist_t *reader_arg = NULL; + +	topo_mod_dprintf(mod, "attempting to create sensor for %s at %s", +	    topo_node_name(pnode), path); + +	(void) memset(&sik, '\0', sizeof (sik)); +	(void) memset(&temp, '\0', sizeof (temp)); + +	if ((fd = open(path, O_RDONLY)) < 0) { +		topo_mod_dprintf(mod, "failed to open sensor path %s: %s", +		    path, strerror(errno)); + +		/* +		 * We always try to create temperature sensors; however, they +		 * may not exist or be supported on the system in question. +		 * Therefore ENOENT is totally acceptable. +		 */ +		if (errno == ENOENT) { +			return (0); +		} +		return (topo_mod_seterrno(mod, EMOD_UNKNOWN)); +	} + +	if (ioctl(fd, SENSOR_IOCTL_TYPE, &sik) != 0) { +		topo_mod_dprintf(mod, "failed to verify sensor kind for sensor " +		    "%s: %s", path, strerror(errno)); +		ret = topo_mod_seterrno(mod, EMOD_UNKNOWN); +		goto out; +	} + +	if (sik.sik_kind != SENSOR_KIND_TEMPERATURE) { +		topo_mod_dprintf(mod, "sensor kind for %s is not temperature, " +		    "found 0x%x", path, sik.sik_kind); +		ret = topo_mod_seterrno(mod, EMOD_UNKNOWN); +		goto out; +	} + +	if (ioctl(fd, SENSOR_IOCTL_TEMPERATURE, &temp) != 0) { +		topo_mod_dprintf(mod, "failed to read temperature sensor " +		    "%s: %s", path, strerror(errno)); +		ret = topo_mod_seterrno(mod, EMOD_UNKNOWN); +		goto out; +	} + +	(void) close(fd); +	fd = -1; + +	if ((fnode = topo_node_facbind(mod, pnode, fname, +	    TOPO_FAC_TYPE_SENSOR)) == NULL) { +		topo_mod_dprintf(mod, "failed to bind temperature facility " +		    "node to %s: %d", path, topo_mod_errno(mod)); +		ret = -1; +		goto out; +	} + +	pgi.tpi_name = TOPO_PGROUP_FACILITY; +	pgi.tpi_namestab = TOPO_STABILITY_PRIVATE; +	pgi.tpi_datastab = TOPO_STABILITY_PRIVATE; +	pgi.tpi_version = 1; + +	if (topo_pgroup_create(fnode, &pgi, &err) != 0) { +		topo_mod_dprintf(mod, "failed to create facility pgroup: %s", +		    topo_strerror(err)); +		ret = topo_mod_seterrno(mod, err); +		goto out; +	} + +	if (topo_prop_set_string(fnode, TOPO_PGROUP_FACILITY, +	    TOPO_SENSOR_CLASS, TOPO_PROP_IMMUTABLE, +	    TOPO_SENSOR_CLASS_THRESHOLD, &err) != 0 || +	    topo_prop_set_uint32(fnode, TOPO_PGROUP_FACILITY, +	    TOPO_FACILITY_TYPE, TOPO_PROP_IMMUTABLE, TOPO_SENSOR_TYPE_TEMP, +	    &err) != 0 || +	    topo_prop_set_uint32(fnode, TOPO_PGROUP_FACILITY, +	    TOPO_SENSOR_UNITS, TOPO_PROP_IMMUTABLE, topo_sensor_units(&temp), +	    &err) != 0) { +		topo_mod_dprintf(mod, "failed to set properties for sensor " +		    "%s: %s", path, topo_strerror(err)); +		ret = topo_mod_seterrno(mod, err); +		goto out; + +	} + +	if (topo_method_register(mod, fnode, topo_sensor_temp_fac_methods) < +	    0) { +		topo_mod_dprintf(mod, "failed to register reading methods on " +		    "%s", path); +		ret = -1; +		goto out; +	} + +	if (topo_mod_nvalloc(mod, &reader_arg, NV_UNIQUE_NAME) != 0 || +	    nvlist_add_string(reader_arg, TOPO_IO_DEV_PATH, path) != 0) { +		topo_mod_dprintf(mod, "Failed to set up reader argument nvl"); +		ret = topo_mod_seterrno(mod, EMOD_NOMEM); +		goto out; +	} + +	if (topo_prop_method_register(fnode, TOPO_PGROUP_FACILITY, +	    TOPO_SENSOR_READING, TOPO_TYPE_DOUBLE, TOPO_METH_TOPO_SENSOR_TEMP, +	    reader_arg, &err) != 0) { +		topo_mod_dprintf(mod, "failed to set argument for sensor %s: " +		    "%s", path, topo_strerror(err)); +		err = topo_mod_seterrno(mod, err); +		goto out; +	} + +	nvlist_free(reader_arg); +	return (0); +out: +	if (fd >= 0) { +		(void) close(fd); +	} + +	topo_node_unbind(fnode); +	nvlist_free(reader_arg); +	return (ret); +} diff --git a/usr/src/lib/fm/topo/modules/common/shared/topo_sensor.h b/usr/src/lib/fm/topo/modules/common/shared/topo_sensor.h new file mode 100644 index 0000000000..ff6e1ea92e --- /dev/null +++ b/usr/src/lib/fm/topo/modules/common/shared/topo_sensor.h @@ -0,0 +1,34 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source.  A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +#ifndef _TOPO_SENSOR_H +#define	_TOPO_SENSOR_H + +/* + * Routines to interact with the common kernel sensor framework. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +extern int topo_sensor_create_temp_sensor(topo_mod_t *, tnode_t *, const char *, +    const char *); + +#ifdef __cplusplus +} +#endif + +#endif /* _TOPO_SENSOR_H */ diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/Makefile b/usr/src/lib/fm/topo/modules/i86pc/chip/Makefile index 3da69e6ce2..c6db9f09b6 100644 --- a/usr/src/lib/fm/topo/modules/i86pc/chip/Makefile +++ b/usr/src/lib/fm/topo/modules/i86pc/chip/Makefile @@ -22,16 +22,22 @@  # Copyright 2009 Sun Microsystems, Inc.  All rights reserved.  # Use is subject to license terms.  # -# Copyright (c) 2018, Joyent, Inc. +# Copyright 2019, Joyent, Inc.  MODULE = chip  ARCH = i86pc  CLASS = arch -MODULESRCS = chip.c chip_label.c chip_subr.c chip_amd.c chip_intel.c\ -chip_serial.c chip_smbios.c +SHAREDDIR = ../../common/shared/ + +MODULESRCS = chip.c chip_label.c chip_subr.c chip_amd.c chip_intel.c \ +chip_serial.c chip_smbios.c chip_temp.o +MODULESRCS += topo_sensor.c  include ../../Makefile.plugin  LDLIBS += -lipmi -lfmd_agent -lumem -lsmbios -lkstat -# not linted -SMATCH=off +CPPFLAGS += -I$(SHAREDDIR) + +%.o: $(SHAREDDIR)/%.c +	$(COMPILE.c) -o $@ $< +	$(CTFCONVERT_O) diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip.c b/usr/src/lib/fm/topo/modules/i86pc/chip/chip.c index cdd799cc0b..c81f01c3e9 100644 --- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip.c +++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip.c @@ -22,7 +22,7 @@  /*   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.   * Use is subject to license terms. - * Copyright (c) 2018, Joyent, Inc. + * Copyright 2019, Joyent, Inc.   */  #include <unistd.h> @@ -403,6 +403,13 @@ create_core(topo_mod_t *mod, tnode_t *pnode, nvlist_t *cpu,  		if (topo_node_range_create(mod, core, STRAND_NODE_NAME,  		    0, 255) != 0)  			return (-1); + +		/* +		 * Creating a temperature sensor may fail because the sensor +		 * doesn't exist or due to internal reasons. At the moment, we +		 * swallow any such errors that occur. +		 */ +		(void) chip_create_core_temp_sensor(mod, core);  	}  	if (!is_xpv()) { @@ -644,6 +651,13 @@ create_chip(topo_mod_t *mod, tnode_t *pnode, topo_instance_t min,  		}  		create_mc = B_TRUE; + +		/* +		 * Creating a temperature sensor may fail because the sensor +		 * doesn't exist or due to internal reasons. At the moment, we +		 * swallow any such errors that occur. +		 */ +		(void) chip_create_chip_temp_sensor(mod, chip);  	}  	if (FM_AWARE_SMBIOS(mod)) { diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h b/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h index 8b5ad3b88f..b4fd850996 100644 --- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h +++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip.h @@ -20,7 +20,7 @@   */  /*   * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, Joyent, Inc. + * Copyright 2019, Joyent, Inc.   */  #ifndef _CHIP_H @@ -221,6 +221,11 @@ extern const char *chip_rev_smbios_get(topo_mod_t *, id_t);  extern id_t memnode_to_smbiosid(topo_mod_t *, uint16_t, const char *,      uint64_t, void *); +/* + * Prototypes for chip_temp.c + */ +extern int chip_create_chip_temp_sensor(topo_mod_t *, tnode_t *); +extern int chip_create_core_temp_sensor(topo_mod_t *, tnode_t *);  #ifdef __cplusplus  } diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_label.c b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_label.c index 67c35058c3..4275bc46f6 100644 --- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_label.c +++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_label.c @@ -22,6 +22,8 @@  /*   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.   * Use is subject to license terms. + * + * Copyright 2019, Joyent, Inc.   */  #include <stdio.h> @@ -561,8 +563,8 @@ get_num_chips(topo_mod_t *mod)  			nchip = -1;  			break;  		} -		if ((bitmap & (1 << chipid)) != 0) { -			bitmap |= (1 << chipid); +		if ((bitmap & (1ULL << chipid)) != 0) { +			bitmap |= (1ULL << chipid);  			nchip++;  		}  	} @@ -660,7 +662,7 @@ a4fplus_chip_label(topo_mod_t *mod, tnode_t *node, topo_version_t vers,   *   * This function computes the DIMM slot number using the following formula:   * - * 	slot = cs - (cs % 2) + channel + offset + *	slot = cs - (cs % 2) + channel + offset   */  /* ARGSUSED */  int diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c index a83f31dbb4..53fd7852ef 100644 --- a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c +++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_subr.c @@ -22,7 +22,7 @@  /*   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.   * Use is subject to license terms. - * Copyright (c) 2018, Joyent, Inc. + * Copyright 2019, Joyent, Inc.   */  /* @@ -230,7 +230,7 @@ mkrsrc(topo_mod_t *mod, tnode_t *pnode, const char *name, int inst,  {  	*nvl = topo_mod_hcfmri(mod, pnode, FM_HC_SCHEME_VERSION, name,  	    inst, NULL, auth, NULL, NULL, NULL); -	return (nvl != NULL ? 0 : -1);	/* caller must free nvlist */ +	return (*nvl != NULL ? 0 : -1);	/* caller must free nvlist */  }  /* diff --git a/usr/src/lib/fm/topo/modules/i86pc/chip/chip_temp.c b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_temp.c new file mode 100644 index 0000000000..89f8d57fb6 --- /dev/null +++ b/usr/src/lib/fm/topo/modules/i86pc/chip/chip_temp.c @@ -0,0 +1,91 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source.  A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <libnvpair.h> +#include <sys/sensors.h> +#include <sys/fm/protocol.h> +#include <fm/topo_mod.h> +#include <topo_sensor.h> + +#include "chip.h" + +static const char *chip_sensor_base = "/dev/sensors/temperature/cpu"; + +int +chip_create_core_temp_sensor(topo_mod_t *mod, tnode_t *pnode) +{ +	int err; +	int32_t chip, core; +	char buf[PATH_MAX]; +	struct stat st; + +	core = topo_node_instance(pnode); +	if (topo_prop_get_int32(pnode, PGNAME(CORE), CORE_CHIP_ID, &chip, +	    &err) != 0) { +		return (topo_mod_seterrno(mod, err)); +	} + +	if (snprintf(buf, sizeof (buf), "%s/chip%d.core%d", chip_sensor_base, +	    chip, core) >= sizeof (buf)) { +		return (topo_mod_seterrno(mod, EMOD_UNKNOWN)); +	} + +	/* +	 * Some systems have per-core sensors. Others have it on a per-die aka +	 * procnode basis. Check to see if the file exists before we attempt to +	 * do something. +	 */ +	if (stat(buf, &st) != 0) { +		int32_t procnode; + +		if (errno != ENOENT) { +			return (topo_mod_seterrno(mod, EMOD_UNKNOWN)); +		} + +		if (topo_prop_get_int32(pnode, PGNAME(CORE), CORE_PROCNODE_ID, +		    &procnode, &err) != 0) { +			return (topo_mod_seterrno(mod, err)); +		} + +		if (snprintf(buf, sizeof (buf), "%s/procnode.%d", +		    chip_sensor_base, procnode) >= sizeof (buf)) { +			return (topo_mod_seterrno(mod, EMOD_UNKNOWN)); +		} +	} + +	return (topo_sensor_create_temp_sensor(mod, pnode, buf, "temp")); +} + +int +chip_create_chip_temp_sensor(topo_mod_t *mod, tnode_t *pnode) +{ +	int32_t chip; +	char buf[PATH_MAX]; + +	chip = topo_node_instance(pnode); + +	if (snprintf(buf, sizeof (buf), "%s/chip%d", chip_sensor_base, +	    chip) >= sizeof (buf)) { +		return (topo_mod_seterrno(mod, EMOD_UNKNOWN)); +	} + +	return (topo_sensor_create_temp_sensor(mod, pnode, buf, "temp")); +} diff --git a/usr/src/man/man7d/Makefile b/usr/src/man/man7d/Makefile index 6fa4022fa3..ceec9a8978 100644 --- a/usr/src/man/man7d/Makefile +++ b/usr/src/man/man7d/Makefile @@ -12,7 +12,7 @@  #  # Copyright 2011, Richard Lowe  # Copyright 2016 Garrett D'Amore <garrett@damore.org> -# Copyright (c) 2017, Joyent, Inc. +# Copyright 2019, Joyent, Inc.  # Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>  # Copyright 2018 Nexenta Systems, Inc.  # Copyright 2019 Peter Tribble @@ -180,6 +180,7 @@ sparc_MANFILES=	audiocs.7d	\  i386_MANFILES=	ahci.7d		\  		amd8111s.7d	\ +		amdf17nbdf.7d	\  		amr.7d		\  		arcmsr.7d	\  		arn.7d		\ @@ -199,6 +200,7 @@ i386_MANFILES=	ahci.7d		\  		bcm_sata.7d	\  		bfe.7d		\  		cmdk.7d		\ +		coretemp.7d	\  		cpqary3.7d	\  		dnet.7d		\  		ecpp.7d		\ diff --git a/usr/src/man/man7d/amdf17nbdf.7d b/usr/src/man/man7d/amdf17nbdf.7d new file mode 100644 index 0000000000..739eab6c82 --- /dev/null +++ b/usr/src/man/man7d/amdf17nbdf.7d @@ -0,0 +1,53 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source.  A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2019, Joyent, Inc. +.\" +.Dd March 20, 2019 +.Dt AMDF17NBDF 7D +.Os +.Sh NAME +.Nm amdf17nbdf +.Nd AMD Family 17h Northbridge and Data Fabric Driver +.Sh SYNOPSIS +.Pa /dev/sensors/temperature/cpu/* +.Sh DESCRIPTION +The +.Nm +driver provides the system access to the Northbridge and Data Fabric +devices on AMD Family 17h +.Pq Zen +processors allowing the operating system to communicate with the system +management unit +.Pq SMU . +.Pp +From this, the driver exposes temperature sensors. +On Family 17h systems, temperature sensors exist for each Zeppelin die, +of which there may be multiple in a single package. +This means that each sensor covers more than one core. +.Pp +Temperature information is available to the system via the fault +management architecture +.Pq FMA . +The file system location and programming interface to the +.Nm +driver are considered +.Sy Volatile , +subject to change without notice, and should not be used directly. +Raw temperature information can be dumped through the FMA developer +utility fmtopo. +.Sh SEE ALSO +.Xr fmadm 1M +.Rs +.%A AMD +.%B Open-Source Register Reference For AMD Family 17h Processors Models 00h-2Fh +.%D July, 2018 +.Re diff --git a/usr/src/man/man7d/coretemp.7d b/usr/src/man/man7d/coretemp.7d new file mode 100644 index 0000000000..2ac1008e55 --- /dev/null +++ b/usr/src/man/man7d/coretemp.7d @@ -0,0 +1,49 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source.  A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2019, Joyent, Inc. +.\" +.Dd March 20, 2019 +.Dt CORETEMP 7D +.Os +.Sh NAME +.Nm coretemp +.Nd Intel core-family temperature sensor driver +.Sh SYNOPSIS +.Pa /dev/sensors/temperature/cpu/* +.Sh DESCRIPTION +The +.Nm +driver provides the system with a means of reading the per-core and, +when available, per-package digital temperature sensors on Intel CPUs. +Currently, the +.Nm +driver supports Intel Core family processors after Penryn +microarchitecture and Intel Atom processors starting with the Silvermont +microarchitecure. +.Pp +Temperature information is available to the system via the fault +management architecture +.Pq FMA . +The file system location and programming interface to the +.Nm +driver are considered +.Sy Volatile , +subject to change without notice, and should not be used directly. +Raw temperature information can be dumped through the FMA developer +utility fmtopo. +.Sh SEE ALSO +.Xr fmadm 1M +.Rs +.%A Intel Corporation +.%B Intel 64 and IA-32 Architectures Software Developer's Manual +.%V Volume 3 (3A, 3B, 3C & 3D): System Programming Guide +.Re diff --git a/usr/src/pkg/manifests/driver-cpu-sensor.mf b/usr/src/pkg/manifests/driver-cpu-sensor.mf new file mode 100644 index 0000000000..206456e092 --- /dev/null +++ b/usr/src/pkg/manifests/driver-cpu-sensor.mf @@ -0,0 +1,43 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source.  A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019 Joyent, Inc. +# + +<include global_zone_only_component> +set name=pkg.fmri value=pkg:/driver/cpu/sensor@$(PKGVERS) +set name=pkg.description value="CPU Sensor Drivers" +set name=pkg.summary value="CPU Sensor Drivers" +set name=info.classification \ +    value=org.opensolaris.category.2008:System/Hardware +set name=variant.arch value=i386 +dir path=kernel group=sys +dir path=kernel/drv group=sys +dir path=kernel/drv/$(ARCH64) group=sys +dir path=usr/include +dir path=usr/include/sys +dir path=usr/lib/devfsadm group=sys +dir path=usr/lib/devfsadm/linkmod group=sys +dir path=usr/share/man +dir path=usr/share/man/man7d +driver name=amdf17nbdf \ +    alias=pci1022,1450 \ +    alias=pci1022,1460 +driver name=coretemp +file path=kernel/drv/$(ARCH64)/amdf17nbdf group=sys +file path=kernel/drv/$(ARCH64)/coretemp group=sys +file path=kernel/drv/coretemp.conf group=sys +file path=usr/include/sys/sensors.h mode=0644 +file path=usr/lib/devfsadm/linkmod/SUNW_sensor_link.so group=sys +file path=usr/share/man/man7d/amdf17nbdf.7d +file path=usr/share/man/man7d/coretemp.7d +license lic_CDDL license=lic_CDDL diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index b4dd69badd..d95fc0525e 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -21,7 +21,7 @@  #  # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright (c) 2018, Joyent, Inc. +# Copyright 2019, Joyent, Inc.  # Copyright 2013 Garrett D'Amore <garrett@damore.org>  # Copyright 2013 Saso Kiselkov. All rights reserved.  # Copyright 2015 Igor Kozhukhov <ikozhukhov@gmail.com> @@ -505,6 +505,7 @@ CHKHDRS=			\  	sema_impl.h		\  	semaphore.h		\  	sendfile.h		\ +	sensors.h		\  	ser_sync.h		\  	session.h		\  	sha1.h			\ diff --git a/usr/src/uts/common/sys/sensors.h b/usr/src/uts/common/sys/sensors.h new file mode 100644 index 0000000000..b9ca9f1f3f --- /dev/null +++ b/usr/src/uts/common/sys/sensors.h @@ -0,0 +1,81 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source.  A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +#ifndef _SYS_SENSORS_H +#define	_SYS_SENSORS_H + +/* + * Consolidated sensor ioctls for various parts of the operating system. These + * interfaces should not be relied on at all. They are evolving and will change + * as we add more to the system for this. This may eventually become a larger + * framework, though it's more likely we'll consolidate that in userland. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * List of different possible kinds of sensors. + */ +#define	SENSOR_KIND_UNKNOWN		0x00 +#define	SENSOR_KIND_TEMPERATURE		0x01 + +/* + * Lists of units that senors may have. + */ +#define	SENSOR_UNIT_UNKNOWN		0x00 +#define	SENSOR_UNIT_CELSIUS		0x01 +#define	SENSOR_UNIT_FAHRENHEIT		0x02 +#define	SENSOR_UNIT_KELVIN		0x03 + +#define	SENSOR_IOCTL	(('s' << 24) | ('e' << 16) | ('n' << 8)) + +/* + * Ask the sensor what kind of sensor it is. + */ +#define	SENSOR_IOCTL_TYPE	(SENSOR_IOCTL | 0x01) + +typedef struct sensor_ioctl_kind { +	uint64_t	sik_kind; +} sensor_ioctl_kind_t; + +/* + * Ask the sensor for a temperature measurement. The sensor is responsible for + * returning the units it's in.  A temperature measurement is broken down into a + * signed value and a notion of its granularity. The sit_gran member indicates + * the granularity: the number of increments per degree in the temperature + * measurement (the sit_temp member). sit_gran is signed and the sign indicates + * whether one needs to multiply or divide the granularity. For example, a + * value that set sit_gran to 10 would mean that the value in sit_temp was in + * 10ths of a degree and that to get the actual value in degrees, one would + * divide by 10. On the other hand, a negative value means that we effectively + * have to multiply to get there. For example, a value of -2 would indicate that + * each value in sit_temp indicated two degrees and to get the temperature in + * degrees you would multiply sit_temp by two. + */ +#define	SENSOR_IOCTL_TEMPERATURE	(SENSOR_IOCTL | 0x02) + +typedef struct sensor_ioctl_temperature { +	uint32_t	sit_unit; +	int32_t		sit_gran; +	int64_t		sit_temp; +} sensor_ioctl_temperature_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SENSORS_H */ diff --git a/usr/src/uts/common/sys/sunddi.h b/usr/src/uts/common/sys/sunddi.h index 1d94c8fd2c..5a98e6e625 100644 --- a/usr/src/uts/common/sys/sunddi.h +++ b/usr/src/uts/common/sys/sunddi.h @@ -24,6 +24,7 @@   * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.   * Copyright (c) 2012 by Delphix. All rights reserved.   * Copyright 2016 Nexenta Systems, Inc.  All rights reserved. + * Copyright 2019, Joyent, Inc.   */  #ifndef	_SYS_SUNDDI_H @@ -202,13 +203,13 @@ extern "C" {  #define	DDI_NT_KEYBOARD	"ddi_keyboard"		/* keyboard device */ -#define	DDI_NT_PARALLEL "ddi_parallel"		/* parallel port */ +#define	DDI_NT_PARALLEL	"ddi_parallel"		/* parallel port */  #define	DDI_NT_PRINTER	"ddi_printer"		/* printer device */  #define	DDI_NT_UGEN	"ddi_generic:usb"	/* USB generic drv */ -#define	DDI_NT_SMP	"ddi_sas_smp" 		/* smp devcies */ +#define	DDI_NT_SMP	"ddi_sas_smp"		/* smp devcies */  #define	DDI_NT_NEXUS	"ddi_ctl:devctl"	/* nexus drivers */ @@ -260,6 +261,11 @@ extern "C" {  #define	DDI_NT_INTRCTL		"ddi_tool_intr"	/* tool intr access */  /* + * Various device types used for sensors. + */ +#define	DDI_NT_SENSOR_TEMP_CPU	"ddi_sensor:temperature:cpu" + +/*   * DDI event definitions   */  #define	EC_DEVFS	"EC_devfs"	/* Event class devfs */ @@ -839,7 +845,7 @@ ddi_prop_op_nblocks_blksize(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,   *		allocated by property provider via kmem_alloc. Requester   *		is responsible for freeing returned property via kmem_free.   * - * 	Arguments: + *	Arguments:   *   *	dev:	Input:	dev_t of property.   *	dip:	Input:	dev_info_t pointer of child. @@ -850,7 +856,7 @@ ddi_prop_op_nblocks_blksize(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,   *	valuep:	Output:	Addr of callers buffer pointer.   *	lengthp:Output:	*lengthp will contain prop length on exit.   * - * 	Possible Returns: + *	Possible Returns:   *   *		DDI_PROP_SUCCESS:	Prop found and returned.   *		DDI_PROP_NOT_FOUND:	Prop not found diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c index 06690e8357..bdd582f9ec 100644 --- a/usr/src/uts/i86pc/os/cpuid.c +++ b/usr/src/uts/i86pc/os/cpuid.c @@ -1039,7 +1039,9 @@ static char *x86_feature_names[NUM_X86_FEATURES] = {  	"avx512_vnni",  	"amd_pcec",  	"mb_clear", -	"mds_no" +	"mds_no", +	"core_thermal", +	"pkg_thermal"  };  boolean_t @@ -2502,6 +2504,41 @@ cpuid_pass1_topology(cpu_t *cpu, uchar_t *featureset)  	}  } +/* + * Gather relevant CPU features from leaf 6 which covers thermal information. We + * always gather leaf 6 if it's supported; however, we only look for features on + * Intel systems as AMD does not currently define any of the features we look + * for below. + */ +static void +cpuid_pass1_thermal(cpu_t *cpu, uchar_t *featureset) +{ +	struct cpuid_regs *cp; +	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi; + +	if (cpi->cpi_maxeax < 6) { +		return; +	} + +	cp = &cpi->cpi_std[6]; +	cp->cp_eax = 6; +	cp->cp_ebx = cp->cp_ecx = cp->cp_edx = 0; +	(void) __cpuid_insn(cp); +	platform_cpuid_mangle(cpi->cpi_vendor, 6, cp); + +	if (cpi->cpi_vendor != X86_VENDOR_Intel) { +		return; +	} + +	if ((cp->cp_eax & CPUID_INTC_EAX_DTS) != 0) { +		add_x86_feature(featureset, X86FSET_CORE_THERMAL); +	} + +	if ((cp->cp_eax & CPUID_INTC_EAX_PTM) != 0) { +		add_x86_feature(featureset, X86FSET_PKG_THERMAL); +	} +} +  void  cpuid_pass1(cpu_t *cpu, uchar_t *featureset)  { @@ -3340,6 +3377,7 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)  	}  	cpuid_pass1_topology(cpu, featureset); +	cpuid_pass1_thermal(cpu, featureset);  	/*  	 * Synthesize chip "revision" and socket type @@ -3403,9 +3441,9 @@ cpuid_pass2(cpu_t *cpu)  		cp->cp_eax = n;  		/* -		 * n == 7 was handled in pass 1 +		 * leaves 6 and 7 were handled in pass 1  		 */ -		if (n == 7) +		if (n == 6 || n == 7)  			continue;  		/* @@ -6548,7 +6586,7 @@ cpuid_arat_supported(void)  		if (cpi->cpi_maxeax >= 6) {  			regs.cp_eax = 6;  			(void) cpuid_insn(NULL, ®s); -			return (regs.cp_eax & CPUID_CSTATE_ARAT); +			return (regs.cp_eax & CPUID_INTC_EAX_ARAT);  		} else {  			return (0);  		} @@ -6582,7 +6620,7 @@ cpuid_iepb_supported(struct cpu *cp)  	regs.cp_eax = 0x6;  	(void) cpuid_insn(NULL, ®s); -	return (regs.cp_ecx & CPUID_EPB_SUPPORT); +	return (regs.cp_ecx & CPUID_INTC_ECX_PERFBIAS);  }  /* diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files index aa395ea614..2e38cfcab8 100644 --- a/usr/src/uts/intel/Makefile.files +++ b/usr/src/uts/intel/Makefile.files @@ -21,7 +21,7 @@  #  # Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright (c) 2013, Joyent, Inc. All rights reserved. +# Copyright 2019, Joyent, Inc.  # Copyright 2018 Nexenta Systems, Inc.  # @@ -327,3 +327,13 @@ VMXNET3S_OBJS =	vmxnet3_main.o \  # VMware PVSCSI SCSI Controller  #  PVSCSI_OBJS =	pvscsi.o + +# +# Intel Temperature Module +# +CORETEMP_OBJS =	coretemp.o + +# +# AMD Family 17 northbridge driver +# +AMDF17NBDF_OBJS = amdf17nbdf.o diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel index 2cdbd36046..8e691a9d66 100644 --- a/usr/src/uts/intel/Makefile.intel +++ b/usr/src/uts/intel/Makefile.intel @@ -21,7 +21,7 @@  #  # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.  # Copyright (c) 2013 Andrew Stormont.  All rights reserved. -# Copyright 2016 Joyent, Inc. +# Copyright 2019, Joyent, Inc.  # Copyright 2016 Garrett D'Amore <garrett@damore.org>  # Copyright 2018 Nexenta Systems, Inc.  # @@ -209,7 +209,7 @@ DRV_KMODS	+= audiopci  DRV_KMODS	+= audiosolo  DRV_KMODS	+= audiots  DRV_KMODS	+= audiovia823x -DRV_KMODS	+= bl  +DRV_KMODS	+= bl  DRV_KMODS	+= blkdev  DRV_KMODS	+= bge  DRV_KMODS	+= bofi @@ -492,9 +492,9 @@ DRV_KMODS	+= xhci  #  DRV_KMODS	+= usbgem  DRV_KMODS	+= axf -DRV_KMODS	+= udmf  +DRV_KMODS	+= udmf  DRV_KMODS	+= upf -DRV_KMODS	+= urf  +DRV_KMODS	+= urf  #  #	1394 modules @@ -508,7 +508,7 @@ DRV_KMODS	+= dcam1394  #	InfiniBand pseudo drivers  #  DRV_KMODS	+= ib ibp eibnx eoib rdsib sdp iser daplt hermon tavor sol_ucma sol_uverbs -DRV_KMODS	+= sol_umad  +DRV_KMODS	+= sol_umad  #  #	Brand modules @@ -728,3 +728,9 @@ DACF_KMODS	+= net_dacf  # global cross check.  #  LINTFLAGS	+= -D_MACHDEP -I$(UTSBASE)/i86pc + +# +#	Sensor related drivers +# +DRV_KMODS	+= amdf17nbdf +DRV_KMODS	+= coretemp diff --git a/usr/src/uts/intel/Makefile.rules b/usr/src/uts/intel/Makefile.rules index 5e308c582d..2e193a40fb 100644 --- a/usr/src/uts/intel/Makefile.rules +++ b/usr/src/uts/intel/Makefile.rules @@ -146,10 +146,18 @@ $(OBJS_DIR)/%.o:		$(UTSBASE)/intel/io/amd8111s/%.c  	$(COMPILE.c) -o $@ $<  	$(CTFCONVERT_O) +$(OBJS_DIR)/%.o:		$(UTSBASE)/intel/io/amdf17nbdf/%.c +	$(COMPILE.c) -o $@ $< +	$(CTFCONVERT_O) +  $(OBJS_DIR)/%.o:		$(UTSBASE)/intel/io/amr/%.c  	$(COMPILE.c) -o $@ $<  	$(CTFCONVERT_O) +$(OBJS_DIR)/%.o:		$(UTSBASE)/intel/io/coretemp/%.c +	$(COMPILE.c) -o $@ $< +	$(CTFCONVERT_O) +  $(OBJS_DIR)/%.o:		$(UTSBASE)/intel/io/hotplug/pcicfg/%.c  	$(COMPILE.c) -o $@ $<  	$(CTFCONVERT_O) diff --git a/usr/src/uts/intel/amdf17nbdf/Makefile b/usr/src/uts/intel/amdf17nbdf/Makefile new file mode 100644 index 0000000000..a5543f176f --- /dev/null +++ b/usr/src/uts/intel/amdf17nbdf/Makefile @@ -0,0 +1,47 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source.  A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019, Joyent, Inc. +# + +UTSBASE = ../.. + +MODULE		= amdf17nbdf +OBJECTS		= $(AMDF17NBDF_OBJS:%=$(OBJS_DIR)/%) +ROOTMODULE	= $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR	= $(UTSBASE)/intel/io/amdf17nb + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET	= $(BINARY) $(CONFMOD) +LINT_TARGET	= $(MODULE).lint +INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) + +.KEEP_STATE: + +def:		$(DEF_DEPS) + +all:		$(ALL_DEPS) + +clean:		$(CLEAN_DEPS) + +clobber:	$(CLOBBER_DEPS) + +lint:		$(LINT_DEPS) + +modlintlib:	$(MODLINTLIB_DEPS) + +clean.lint:	$(CLEAN_LINT_DEPS) + +install:	$(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/coretemp/Makefile b/usr/src/uts/intel/coretemp/Makefile new file mode 100644 index 0000000000..9ce4a8ab56 --- /dev/null +++ b/usr/src/uts/intel/coretemp/Makefile @@ -0,0 +1,54 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source.  A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019, Joyent, Inc. +# + +UTSBASE = ../.. + +MODULE		= coretemp +OBJECTS		= $(CORETEMP_OBJS:%=$(OBJS_DIR)/%) +ROOTMODULE	= $(ROOT_DRV_DIR)/$(MODULE) +CONF_SRCDIR	= $(UTSBASE)/intel/io/coretemp + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET	= $(BINARY) $(CONFMOD) +LINT_TARGET	= $(MODULE).lint +INSTALL_TARGET	= $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) + +# +# Because we need to use cross calls directly, we must include the +# definitions below. Once CMI rdmsr routines have been fixed, we can +# remove this and move out of the platform specific driver world. +# +CPPFLAGS	+= -I$(UTSBASE)/i86pc/ + +.KEEP_STATE: + +def:		$(DEF_DEPS) + +all:		$(ALL_DEPS) + +clean:		$(CLEAN_DEPS) + +clobber:	$(CLOBBER_DEPS) + +lint:		$(LINT_DEPS) + +modlintlib:	$(MODLINTLIB_DEPS) + +clean.lint:	$(CLEAN_LINT_DEPS) + +install:	$(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c b/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c new file mode 100644 index 0000000000..11bddfa515 --- /dev/null +++ b/usr/src/uts/intel/io/amdf17nbdf/amdf17nbdf.c @@ -0,0 +1,1015 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source.  A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +/* + * AMD Family 17 Northbridge and Data Fabric Driver + * + * This driver attaches to the AMD Family 17h northbridge and data fabric bus. + * Each Zeppelin die ('processor node' in cpuid.c parlance) has its own + * northbridge and access to the data fabric bus. The northbridge and data + * fabric both provide access to various features such as: + * + *  - The System Management Network (SMN) + *  - Data Fabric via Fabric Indirect Config Access (FICAA) + * + * These are required to access things such as temperature sensors or memory + * controller configuration registers. + * + * In AMD Family 17h systems, the 'northbridge' is an ASIC that is part of the + * package that contains many I/O capabilities related to things like PCI + * express, etc. The 'data fabric' is the means by which different components + * both inside the socket and multiple sockets are connected together. Both the + * northbridge and the data fabric have dedicated PCI devices which the + * operating system can use to interact with them. + * + * ------------------------ + * Mapping Devices Together + * ------------------------ + * + * The operating system needs to expose things like temperature sensors and DRAM + * configuration registers in terms that are meaningful to the system such as + * logical CPUs, cores, etc. This driver attaches to the PCI IDs that represent + * the northbridge and data fabric; however, there are multiple PCI devices (one + * per die) that exist. This driver does manage to map all of these three things + * together; however, it requires some acrobatics. Unfortunately, there's no + * direct way to map a northbridge to its corresponding die. However, we can map + * a CPU die to a data fabric PCI device and a data fabric PCI device to a + * corresponding northbridge PCI device. + * + * In current Zen based products, there is a direct mapping between processor + * nodes and a data fabric PCI device. All of the devices are on PCI Bus 0 and + * start from Device 0x18. Device 0x18 maps to processor node 0, 0x19 to + * processor node 1, etc. This means that to map a logical CPU to a data fabric + * device, we take its processor node id, add it to 0x18 and find the PCI device + * that is on bus 0, device 0x18. As each data fabric device is attached based + * on its PCI ID, we add it to the global list, amd_nbdf_dfs that is in the + * amd_f17nbdf_t structure. + * + * The northbridge PCI device has a defined device and function, but the PCI bus + * that it's on can vary. Each die has its own series of PCI buses that are + * assigned to it and the northbridge PCI device is on the first of die-specific + * PCI bus for each die. This also means that the northbridge will not show up + * on PCI bus 0, which is the PCI bus that all of the data fabric devices are + * on. While conventionally the northbridge with the lowest PCI bus value + * would correspond to processor node zero, hardware does not guarantee that at + * all. Because we don't want to be at the mercy of firmware, we don't rely on + * this ordering, even though we have yet to find a system that deviates from + * this scheme. + * + * One of the registers in the data fabric device's function 0 + * (AMDF17_DF_CFG_ADDR_CTL), happens to have the first PCI bus that is + * associated with the processor node. This means, that we can map a data fabric + * device to a northbridge by finding the northbridge whose PCI bus matches the + * value in the corresponding data fabric's AMDF17_DF_CFG_ADDR_CTL. + * + * This means that we can map a northbridge to a data fabric device and a data + * fabric device to a die. Because these are 1:1 mappings, there is a transitive + * relationship and therefore we know which northbridge is associated with which + * processor die. This is summarized in the following image: + * + *  +-------+      +----------------------------+         +--------------+ + *  | Die 0 | ---> | Data Fabric PCI BDF 0/18/0 |-------> | Northbridge  | + *  +-------+      | AMDF17_DF_CFG_ADDR: bus 10 |         | PCI  10/0/0  | + *     ...         +----------------------------+         +--------------+ + *  +-------+      +------------------------------+         +--------------+ + *  | Die n | ---> | Data Fabric PCI BDF 0/18+n/0 |-------> | Northbridge  | + *  +-------+      | AMDF17_DF_CFG_ADDR: bus 133  |         | PCI 133/0/0  | + *                 +------------------------------+         +--------------+ + * + * Note, the PCI buses used by the northbridges here are arbitrary. They do not + * reflect the actual values by hardware; however, the bus/device/function (BDF) + * of the data fabric accurately models hardware. All of the BDF values are in + * hex. + * + * ------------------------------- + * Attach and Detach Complications + * ------------------------------- + * + * Because we need to map different PCI devices together, this means that we + * have multiple dev_info_t structures that we need to manage. Each of these is + * independently attached and detached. While this is easily managed for attach, + * it is not for detach. + * + * Once a device has been detached it will only come back if we have an active + * minor node that will be accessed. While we have minor nodes associated with + * the northbridges, we don't with the data fabric devices. This means that if + * they are detached, nothing would ever cause them to be reattached. The system + * also doesn't provide us a way or any guarantees around making sure that we're + * attached to all such devices before we detach. As a result, unfortunately, + * it's easier to basically have detach always fail. + * + * To deal with both development and if issues arise in the field, there is a + * knob, amdf17df_allow_detach, which if set to a non-zero value, will allow + * instances to detach. + * + * --------------- + * Exposed Devices + * --------------- + * + * Currently we expose a single set of character devices which represent + * temperature sensors for this family of processors. Because temperature + * sensors exist on a per-processor node basis, we create a single minor node + * for each one. Because our naming matches the cpuid naming, FMA can match that + * up to logical CPUs and take care of matching the sensors appropriately. We + * internally rate limit the sensor updates to 100ms, which is controlled by the + * global amdf17nbdf_cache_ms. + */ + +#include <sys/modctl.h> +#include <sys/conf.h> +#include <sys/devops.h> +#include <sys/types.h> +#include <sys/file.h> +#include <sys/open.h> +#include <sys/cred.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/cmn_err.h> +#include <sys/list.h> +#include <sys/pci.h> +#include <sys/stddef.h> +#include <sys/stat.h> +#include <sys/x86_archext.h> +#include <sys/cpuvar.h> +#include <sys/sensors.h> + +/* + * The range of minors that we'll allow. + */ +#define	AMDF17_MINOR_LOW	1 +#define	AMDF17_MINOR_HIGH	INT32_MAX + +/* + * This is the value of the first PCI data fabric device that globally exists. + * It always maps to AMD's first nodeid (what we call cpi_procnodeid). + */ +#define	AMDF17_DF_FIRST_DEVICE	0x18 + +/* + * The data fabric devices are defined to always be on PCI bus zero. + */ +#define	AMDF17_DF_BUSNO		0x00 + +/* + * This register contains the BUS A of the the processor node that corresponds + * to the data fabric device. + */ +#define	AMDF17_DF_CFG_ADDR_CTL		0x84 +#define	AMDF17_DF_CFG_ADDR_CTL_MASK	0xff + +/* + * Northbridge registers that are related to accessing the SMN. One writes to + * the SMN address register and then can read from the SMN data register. + */ +#define	AMDF17_NB_SMN_ADDR	0x60 +#define	AMDF17_NB_SMN_DATA	0x64 + +/* + * The following are register offsets and the meaning of their bits related to + * temperature. These addresses are addresses in the System Management Network + * which is accessed through the northbridge.  They are not addresses in PCI + * configuration space. + */ +#define	AMDF17_SMU_THERMAL_CURTEMP			0x00059800 +#define	AMDF17_SMU_THERMAL_CURTEMP_TEMPERATURE(x)	((x) >> 21) +#define	AMDF17_SMU_THERMAL_CURTEMP_RANGE_SEL		(1 << 19) + +#define	AMDF17_SMU_THERMAL_CURTEMP_RANGE_ADJ		(-49) +#define	AMDF17_SMU_THERMAL_CURTEMP_DECIMAL_BITS		3 +#define	AMDF17_SMU_THERMAL_CURTEMP_BITS_MASK		0x7 + +/* + * The temperature sensor in family 17 is measured in terms of 0.125 C steps. + */ +#define	AMDF17_THERMAL_GRANULARITY	8 + +struct amdf17nb; +struct amdf17df; + +typedef struct amdf17nb { +	list_node_t		amd_nb_link; +	dev_info_t		*amd_nb_dip; +	ddi_acc_handle_t	amd_nb_cfgspace; +	uint_t			amd_nb_bus; +	uint_t			amd_nb_dev; +	uint_t			amd_nb_func; +	struct amdf17df		*amd_nb_df; +	uint_t			amd_nb_procnodeid; +	id_t			amd_nb_temp_minor; +	hrtime_t		amd_nb_temp_last_read; +	int			amd_nb_temp_off; +	uint32_t		amd_nb_temp_reg; +	/* Values derived from the above */ +	int64_t			amd_nb_temp; +} amdf17nb_t; + +typedef struct amdf17df { +	list_node_t		amd_df_link; +	dev_info_t		*amd_df_f0_dip; +	ddi_acc_handle_t	amd_df_f0_cfgspace; +	uint_t			amd_df_procnodeid; +	uint_t			amd_df_iobus; +	amdf17nb_t		*amd_df_nb; +} amdf17df_t; + +typedef struct amdf17nbdf { +	kmutex_t	amd_nbdf_lock; +	id_space_t	*amd_nbdf_minors; +	list_t		amd_nbdf_nbs; +	list_t		amd_nbdf_dfs; +} amdf17nbdf_t; + +typedef enum { +	AMD_NBDF_TYPE_UNKNOWN, +	AMD_NBDF_TYPE_NORTHBRIDGE, +	AMD_NBDF_TYPE_DATA_FABRIC +} amdf17nbdf_type_t; + +typedef struct { +	uint16_t		amd_nbdft_pci_did; +	amdf17nbdf_type_t	amd_nbdft_type; +} amdf17nbdf_table_t; + +static const amdf17nbdf_table_t amdf17nbdf_dev_map[] = { +	/* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */ +	{ 0x1450, AMD_NBDF_TYPE_NORTHBRIDGE }, +	{ 0x1460, AMD_NBDF_TYPE_DATA_FABRIC }, +	{ PCI_EINVAL16 } +}; + +typedef struct { +	const char	*amd_nbdfo_brand; +	uint_t		amd_nbdfo_family; +	int		amd_nbdfo_off; +} amdf17nbdf_offset_t; + +/* + * AMD processors report a control temperature (called Tctl) which may be + * different from the junction temperature, which is the value that is actually + * measured from the die (sometimes called Tdie or Tjct). This is done so that + * socket-based environmental monitoring can be consistent from a platform + * perspective, but doesn't help us. Unfortunately, these values aren't in + * datasheets that we can find, but have been documented partially in a series + * of blog posts by AMD when discussing their 'Ryzen Master' monitoring software + * for Windows. + * + * The brand strings below may contain partial matches such in the Threadripper + * cases so we can match the entire family of processors. The offset value is + * the quantity in degrees that we should adjust Tctl to reach Tdie. + */ +static const amdf17nbdf_offset_t amdf17nbdf_offsets[] = { +	{ "AMD Ryzen 5 1600X", 0x17, -20 }, +	{ "AMD Ryzen 7 1700X", 0x17, -20 }, +	{ "AMD Ryzen 7 1800X", 0x17, -20 }, +	{ "AMD Ryzen 7 2700X", 0x17, -10 }, +	{ "AMD Ryzen Threadripper 19", 0x17, -27 }, +	{ "AMD Ryzen Threadripper 29", 0x17, -27 }, +	{ NULL } +}; + +/* + * This indicates a number of milliseconds that we should wait between reads. + * This is somewhat arbitrary, but the goal is to reduce cross call activity + * and reflect that the sensor may not update all the time. + */ +uint_t amdf17nbdf_cache_ms = 100; + +/* + * This indicates whether detach is allowed. It is not by default. See the + * theory statement section 'Attach and Detach Complications' for more + * information. + */ +uint_t amdf17nbdf_allow_detach = 0; + +/* + * Global data that we keep regarding the device. + */ +amdf17nbdf_t *amdf17nbdf; + +static amdf17nb_t * +amdf17nbdf_lookup_nb(amdf17nbdf_t *nbdf, minor_t minor) +{ +	ASSERT(MUTEX_HELD(&nbdf->amd_nbdf_lock)); + +	if (minor < AMDF17_MINOR_LOW || minor > AMDF17_MINOR_HIGH) { +		return (NULL); +	} + +	for (amdf17nb_t *nb = list_head(&nbdf->amd_nbdf_nbs); nb != NULL; +	    nb = list_next(&nbdf->amd_nbdf_nbs, nb)) { +		if ((id_t)minor == nb->amd_nb_temp_minor) { +			return (nb); +		} +	} + +	return (NULL); +} + +static void +amdf17nbdf_cleanup_nb(amdf17nbdf_t *nbdf, amdf17nb_t *nb) +{ +	if (nb == NULL) +		return; + +	ddi_remove_minor_node(nb->amd_nb_dip, NULL); +	if (nb->amd_nb_temp_minor > 0) { +		id_free(nbdf->amd_nbdf_minors, nb->amd_nb_temp_minor); +	} +	if (nb->amd_nb_cfgspace != NULL) { +		pci_config_teardown(&nb->amd_nb_cfgspace); +	} +	kmem_free(nb, sizeof (amdf17nb_t)); +} + +static void +amdf17nbdf_cleanup_df(amdf17df_t *df) +{ +	if (df == NULL) +		return; + +	if (df->amd_df_f0_cfgspace != NULL) { +		pci_config_teardown(&df->amd_df_f0_cfgspace); +	} +	kmem_free(df, sizeof (amdf17df_t)); +} + +static int +amdf17nbdf_smn_read(amdf17nbdf_t *nbdf, amdf17nb_t *nb, uint32_t addr, +    uint32_t *valp) +{ +	VERIFY(MUTEX_HELD(&nbdf->amd_nbdf_lock)); + +	pci_config_put32(nb->amd_nb_cfgspace, AMDF17_NB_SMN_ADDR, addr); +	*valp = pci_config_get32(nb->amd_nb_cfgspace, AMDF17_NB_SMN_DATA); + +	return (0); +} + +static int +amdf17nbdf_temp_read(amdf17nbdf_t *nbdf, amdf17nb_t *nb) +{ +	int ret; +	uint32_t reg, rawtemp, decimal; + +	ASSERT(MUTEX_HELD(&nbdf->amd_nbdf_lock)); + +	/* +	 * Update the last read time first. Even if this fails, we want to make +	 * sure that we latch the fact that we tried. +	 */ +	nb->amd_nb_temp_last_read = gethrtime(); +	if ((ret = amdf17nbdf_smn_read(nbdf, nb, AMDF17_SMU_THERMAL_CURTEMP, +	    ®)) != 0) { +		return (ret); +	} + +	nb->amd_nb_temp_reg = reg; + +	/* +	 * Take the primary temperature value and break apart its decimal value +	 * from its main value. +	 */ +	rawtemp = AMDF17_SMU_THERMAL_CURTEMP_TEMPERATURE(reg); +	decimal = rawtemp & AMDF17_SMU_THERMAL_CURTEMP_BITS_MASK; +	rawtemp = rawtemp >> AMDF17_SMU_THERMAL_CURTEMP_DECIMAL_BITS; + +	if ((reg & AMDF17_SMU_THERMAL_CURTEMP_RANGE_SEL) != 0) { +		rawtemp += AMDF17_SMU_THERMAL_CURTEMP_RANGE_ADJ; +	} +	rawtemp += nb->amd_nb_temp_off; +	nb->amd_nb_temp = rawtemp << AMDF17_SMU_THERMAL_CURTEMP_DECIMAL_BITS; +	nb->amd_nb_temp += decimal; + +	return (0); +} + +static int +amdf17nbdf_temp_init(amdf17nbdf_t *nbdf, amdf17nb_t *nb) +{ +	uint_t i, family; +	char buf[256]; + +	if (cpuid_getbrandstr(CPU, buf, sizeof (buf)) >= sizeof (buf)) { +		dev_err(nb->amd_nb_dip, CE_WARN, "!failed to read processor " +		    "brand string, brand larger than internal buffer"); +		return (EOVERFLOW); +	} + +	family = cpuid_getfamily(CPU); + +	for (i = 0; amdf17nbdf_offsets[i].amd_nbdfo_brand != NULL; i++) { +		if (family != amdf17nbdf_offsets[i].amd_nbdfo_family) +			continue; +		if (strncmp(buf, amdf17nbdf_offsets[i].amd_nbdfo_brand, +		    strlen(amdf17nbdf_offsets[i].amd_nbdfo_brand)) == 0) { +			nb->amd_nb_temp_off = +			    amdf17nbdf_offsets[i].amd_nbdfo_off; +			break; +		} +	} + +	return (amdf17nbdf_temp_read(nbdf, nb)); +} + +static amdf17nbdf_type_t +amdf17nbdf_dip_type(uint16_t dev) +{ +	uint_t i; +	const amdf17nbdf_table_t *tp = amdf17nbdf_dev_map; + +	for (i = 0; tp[i].amd_nbdft_pci_did != PCI_EINVAL16; i++) { +		if (tp[i].amd_nbdft_pci_did == dev) { +			return (tp[i].amd_nbdft_type); +		} +	} + +	return (AMD_NBDF_TYPE_UNKNOWN); +} + +static boolean_t +amdf17nbdf_map(amdf17nbdf_t *nbdf, amdf17nb_t *nb, amdf17df_t *df) +{ +	int ret; +	char buf[128]; + +	ASSERT(MUTEX_HELD(&nbdf->amd_nbdf_lock)); + +	/* +	 * This means that we encountered a duplicate. We're going to stop +	 * processing, but we're not going to fail its attach at this point. +	 */ +	if (nb->amd_nb_df != NULL) { +		dev_err(nb->amd_nb_dip, CE_WARN, "!trying to map NB %u/%u/%u " +		    "to DF procnode %u, but NB is already mapped to DF " +		    "procnode %u!", +		    nb->amd_nb_bus, nb->amd_nb_dev, nb->amd_nb_func, +		    df->amd_df_procnodeid, nb->amd_nb_df->amd_df_procnodeid); +		return (B_TRUE); +	} + +	/* +	 * Now that we have found a mapping, initialize our temperature +	 * information and create the minor node. +	 */ +	nb->amd_nb_procnodeid = df->amd_df_procnodeid; +	nb->amd_nb_temp_minor = id_alloc(nbdf->amd_nbdf_minors); + +	if ((ret = amdf17nbdf_temp_init(nbdf, nb)) != 0) { +		dev_err(nb->amd_nb_dip, CE_WARN, "!failed to init SMN " +		    "temperature data on node %u: %d", nb->amd_nb_procnodeid, +		    ret); +		return (B_FALSE); +	} + +	if (snprintf(buf, sizeof (buf), "procnode.%u", nb->amd_nb_procnodeid) >= +	    sizeof (buf)) { +		dev_err(nb->amd_nb_dip, CE_WARN, "!unexpected buffer name " +		    "overrun assembling temperature minor %u", +		    nb->amd_nb_procnodeid); +		return (B_FALSE); +	} + +	if (ddi_create_minor_node(nb->amd_nb_dip, buf, S_IFCHR, +	    nb->amd_nb_temp_minor, DDI_NT_SENSOR_TEMP_CPU, 0) != DDI_SUCCESS) { +		dev_err(nb->amd_nb_dip, CE_WARN, "!failed to create minor node " +		    "%s", buf); +		return (B_FALSE); +	} + +	/* +	 * Now that's it's all done, note that they're mapped to each other. +	 */ +	nb->amd_nb_df = df; +	df->amd_df_nb = nb; + +	return (B_TRUE); +} + +static boolean_t +amdf17nbdf_add_nb(amdf17nbdf_t *nbdf, amdf17nb_t *nb) +{ +	amdf17df_t *df; +	boolean_t ret = B_TRUE; + +	mutex_enter(&nbdf->amd_nbdf_lock); +	list_insert_tail(&nbdf->amd_nbdf_nbs, nb); +	for (df = list_head(&nbdf->amd_nbdf_dfs); df != NULL; +	    df = list_next(&nbdf->amd_nbdf_dfs, df)) { +		if (nb->amd_nb_bus == df->amd_df_iobus) { +			ret = amdf17nbdf_map(nbdf, nb, df); +			break; +		} +	} +	mutex_exit(&nbdf->amd_nbdf_lock); + +	return (ret); +} + +static boolean_t +amdf17nbdf_add_df(amdf17nbdf_t *nbdf, amdf17df_t *df) +{ +	amdf17nb_t *nb; +	boolean_t ret = B_TRUE; + +	mutex_enter(&nbdf->amd_nbdf_lock); +	list_insert_tail(&nbdf->amd_nbdf_dfs, df); +	for (nb = list_head(&nbdf->amd_nbdf_nbs); nb != NULL; +	    nb = list_next(&nbdf->amd_nbdf_nbs, nb)) { +		if (nb->amd_nb_bus == df->amd_df_iobus) { +			ret = amdf17nbdf_map(nbdf, nb, df); +		} +	} +	mutex_exit(&nbdf->amd_nbdf_lock); + +	return (ret); +} + +static boolean_t +amdf17nbdf_attach_nb(amdf17nbdf_t *nbdf, dev_info_t *dip, ddi_acc_handle_t hdl, +    uint_t bus, uint_t dev, uint_t func) +{ +	amdf17nb_t *nb; + +	nb = kmem_zalloc(sizeof (amdf17nb_t), KM_SLEEP); +	nb->amd_nb_dip = dip; +	nb->amd_nb_cfgspace = hdl; +	nb->amd_nb_bus = bus; +	nb->amd_nb_dev = dev; +	nb->amd_nb_func = func; +	/* +	 * Set this to a value we won't get from the processor. +	 */ +	nb->amd_nb_procnodeid = UINT_MAX; + +	if (!amdf17nbdf_add_nb(nbdf, nb)) { +		amdf17nbdf_cleanup_nb(nbdf, nb); +		return (B_FALSE); +	} + +	return (B_TRUE); +} + +static boolean_t +amdf17nbdf_attach_df(amdf17nbdf_t *nbdf, dev_info_t *dip, ddi_acc_handle_t hdl, +    uint_t bus, uint_t dev, uint_t func) +{ +	amdf17df_t *df; + +	if (bus != AMDF17_DF_BUSNO) { +		dev_err(dip, CE_WARN, "!encountered data fabric device with " +		    "unexpected PCI bus assignment, found 0x%x, expected 0x%x", +		    bus, AMDF17_DF_BUSNO); +		return (B_FALSE); +	} + +	if (dev < AMDF17_DF_FIRST_DEVICE) { +		dev_err(dip, CE_WARN, "!encountered data fabric device with " +		    "PCI device assignment below the first minimum device " +		    "(0x%x): 0x%x", AMDF17_DF_FIRST_DEVICE, dev); +		return (B_FALSE); +	} + +	/* +	 * At the moment we only care about function 0. However, we may care +	 * about Function 4 in the future which has access to the FICAA. +	 * However, only function zero should ever be attached, so this is just +	 * an extra precaution. +	 */ +	if (func != 0) { +		dev_err(dip, CE_WARN, "!encountered data fabric device with " +		    "unxpected PCI function assignment, found 0x%x, expected " +		    "0x0", func); +		return (B_FALSE); +	} + +	df = kmem_zalloc(sizeof (amdf17df_t), KM_SLEEP); +	df->amd_df_f0_dip = dip; +	df->amd_df_f0_cfgspace = hdl; +	df->amd_df_procnodeid = dev - AMDF17_DF_FIRST_DEVICE; +	df->amd_df_iobus = pci_config_get32(hdl, AMDF17_DF_CFG_ADDR_CTL) & +	    AMDF17_DF_CFG_ADDR_CTL_MASK; + +	if (!amdf17nbdf_add_df(nbdf, df)) { +		amdf17nbdf_cleanup_df(df); +		return (B_FALSE); +	} + +	return (B_TRUE); +} + +static int +amdf17nbdf_open(dev_t *devp, int flags, int otype, cred_t *credp) +{ +	amdf17nbdf_t *nbdf = amdf17nbdf; +	minor_t m; + +	if (crgetzoneid(credp) != GLOBAL_ZONEID || drv_priv(credp)) { +		return (EPERM); +	} + +	if ((flags & (FEXCL | FNDELAY | FWRITE)) != 0) { +		return (EINVAL); +	} + +	if (otype != OTYP_CHR) { +		return (EINVAL); +	} + +	m = getminor(*devp); + +	/* +	 * Sanity check the minor +	 */ +	mutex_enter(&nbdf->amd_nbdf_lock); +	if (amdf17nbdf_lookup_nb(nbdf, m) == NULL) { +		mutex_exit(&nbdf->amd_nbdf_lock); +		return (ENXIO); +	} +	mutex_exit(&nbdf->amd_nbdf_lock); + +	return (0); +} + +static int +amdf17nbdf_ioctl_kind(intptr_t arg, int mode) +{ +	sensor_ioctl_kind_t kind; + +	bzero(&kind, sizeof (sensor_ioctl_kind_t)); +	kind.sik_kind = SENSOR_KIND_TEMPERATURE; + +	if (ddi_copyout((void *)&kind, (void *)arg, +	    sizeof (sensor_ioctl_kind_t), mode & FKIOCTL) != 0) { +		return (EFAULT); +	} + +	return (0); +} + +static int +amdf17nbdf_ioctl_temp(amdf17nbdf_t *nbdf, minor_t minor, intptr_t arg, int mode) +{ +	amdf17nb_t *nb; +	hrtime_t diff; +	sensor_ioctl_temperature_t temp; + +	bzero(&temp, sizeof (temp)); + +	mutex_enter(&nbdf->amd_nbdf_lock); +	nb = amdf17nbdf_lookup_nb(nbdf, minor); +	if (nb == NULL) { +		mutex_exit(&nbdf->amd_nbdf_lock); +		return (ENXIO); +	} + +	diff = NSEC2MSEC(gethrtime() - nb->amd_nb_temp_last_read); +	if (diff > 0 && diff > (hrtime_t)amdf17nbdf_cache_ms) { +		int ret; + +		ret = amdf17nbdf_temp_read(nbdf, nb); +		if (ret != 0) { +			mutex_exit(&nbdf->amd_nbdf_lock); +			return (ret); +		} +	} + +	temp.sit_unit = SENSOR_UNIT_CELSIUS; +	temp.sit_temp = nb->amd_nb_temp; +	temp.sit_gran = AMDF17_THERMAL_GRANULARITY; +	mutex_exit(&nbdf->amd_nbdf_lock); + +	if (ddi_copyout(&temp, (void *)arg, sizeof (temp), +	    mode & FKIOCTL) != 0) { +		return (EFAULT); +	} + +	return (0); +} + +static int +amdf17nbdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, +    int *rvalp) +{ +	minor_t m; +	amdf17nbdf_t *nbdf = amdf17nbdf; + +	if ((mode & FREAD) == 0) { +		return (EINVAL); +	} + +	m = getminor(dev); + +	switch (cmd) { +	case SENSOR_IOCTL_TYPE: +		return (amdf17nbdf_ioctl_kind(arg, mode)); +	case SENSOR_IOCTL_TEMPERATURE: +		return (amdf17nbdf_ioctl_temp(nbdf, m, arg, mode)); +	default: +		return (ENOTTY); +	} +} + +/* + * We don't really do any state tracking on close, so for now, just allow it to + * always succeed. + */ +static int +amdf17nbdf_close(dev_t dev, int flags, int otype, cred_t *credp) +{ +	return (0); +} + +static int +amdf17nbdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ +	uint_t nregs; +	int *regs; +	uint_t bus, dev, func; +	uint16_t pci_did; +	ddi_acc_handle_t pci_hdl; +	amdf17nbdf_type_t type; +	amdf17nbdf_t *nbdf = amdf17nbdf; + +	if (cmd == DDI_RESUME) +		return (DDI_SUCCESS); +	if (cmd != DDI_ATTACH) +		return (DDI_FAILURE); + +	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, 0, "reg", +	    ®s, &nregs) != DDI_PROP_SUCCESS) { +		dev_err(dip, CE_WARN, "!failed to find pci 'reg' property"); +		return (DDI_FAILURE); +	} + +	if (nregs < 1) { +		ddi_prop_free(regs); +		return (DDI_FAILURE); +	} + +	bus = PCI_REG_BUS_G(regs[0]); +	dev = PCI_REG_DEV_G(regs[0]); +	func = PCI_REG_FUNC_G(regs[0]); + +	ddi_prop_free(regs); + +	if (pci_config_setup(dip, &pci_hdl) != DDI_SUCCESS) { +		dev_err(dip, CE_WARN, "!failed to map pci devices"); +		return (DDI_FAILURE); +	} + +	pci_did = pci_config_get16(pci_hdl, PCI_CONF_DEVID); + +	type = amdf17nbdf_dip_type(pci_did); +	switch (type) { +	case AMD_NBDF_TYPE_NORTHBRIDGE: +		if (!amdf17nbdf_attach_nb(nbdf, dip, pci_hdl, bus, dev, func)) { +			return (DDI_FAILURE); +		} +		break; +	case AMD_NBDF_TYPE_DATA_FABRIC: +		if (!amdf17nbdf_attach_df(nbdf, dip, pci_hdl, bus, dev, func)) { +			return (DDI_FAILURE); +		} +		break; +	default: +		pci_config_teardown(&pci_hdl); +		return (DDI_FAILURE); +	} + +	return (DDI_SUCCESS); +} + +/* + * Unfortunately, it's hard for us to really support detach here. The problem is + * that we need both the data fabric devices and the northbridges to make sure + * that we map everything. However, only the northbridges actually create minor + * nodes that'll be opened and thus trigger them to reattach when accessed. What + * we should probably look at doing in the future is making this into a nexus + * driver that enumerates children like a temperature driver. + */ +static int +amdf17nbdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ +	amdf17nbdf_t *nbdf = amdf17nbdf; + +	if (cmd == DDI_SUSPEND) +		return (DDI_SUCCESS); + +	if (nbdf == NULL) { +		return (DDI_FAILURE); +	} + +	if (amdf17nbdf_allow_detach == 0) { +		return (DDI_FAILURE); +	} + +	mutex_enter(&nbdf->amd_nbdf_lock); +	for (amdf17nb_t *nb = list_head(&nbdf->amd_nbdf_nbs); nb != NULL; +	    nb = list_next(&nbdf->amd_nbdf_nbs, nb)) { +		if (dip == nb->amd_nb_dip) { +			list_remove(&nbdf->amd_nbdf_nbs, nb); +			if (nb->amd_nb_df != NULL) { +				ASSERT3P(nb->amd_nb_df->amd_df_nb, ==, nb); +				nb->amd_nb_df->amd_df_nb = NULL; +			} +			amdf17nbdf_cleanup_nb(nbdf, nb); +			mutex_exit(&nbdf->amd_nbdf_lock); +			return (DDI_SUCCESS); +		} +	} + +	for (amdf17df_t *df = list_head(&nbdf->amd_nbdf_dfs); df != NULL; +	    df = list_next(&nbdf->amd_nbdf_nbs, df)) { +		if (dip == df->amd_df_f0_dip) { +			list_remove(&nbdf->amd_nbdf_dfs, df); +			if (df->amd_df_nb != NULL) { +				ASSERT3P(df->amd_df_nb->amd_nb_df, ==, df); +				df->amd_df_nb->amd_nb_df = NULL; +			} +			amdf17nbdf_cleanup_df(df); +			mutex_exit(&nbdf->amd_nbdf_lock); +			return (DDI_SUCCESS); +		} +	} +	mutex_exit(&nbdf->amd_nbdf_lock); + +	return (DDI_FAILURE); +} + +static int +amdf17nbdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, +    void **resultp) +{ +	dev_t dev; +	minor_t minor; +	amdf17nbdf_t *nbdf; +	amdf17nb_t *nb; + +	switch (cmd) { +	case DDI_INFO_DEVT2DEVINFO: +	case DDI_INFO_DEVT2INSTANCE: +		break; +	default: +		return (DDI_FAILURE); +	} + +	dev = (dev_t)arg; +	minor = getminor(dev); +	nbdf = amdf17nbdf; + +	mutex_enter(&nbdf->amd_nbdf_lock); +	nb = amdf17nbdf_lookup_nb(nbdf, (id_t)minor); +	if (nb == NULL) { +		mutex_exit(&nbdf->amd_nbdf_lock); +		return (DDI_FAILURE); +	} +	if (cmd == DDI_INFO_DEVT2DEVINFO) { +		*resultp = nb->amd_nb_dip; +	} else { +		int inst = ddi_get_instance(nb->amd_nb_dip); +		*resultp = (void *)(uintptr_t)inst; +	} +	mutex_exit(&nbdf->amd_nbdf_lock); + +	return (DDI_SUCCESS); +} + +static void +amdf17nbdf_destroy(amdf17nbdf_t *nbdf) +{ +	amdf17nb_t *nb; +	amdf17df_t *df; + +	while ((nb = list_remove_head(&nbdf->amd_nbdf_nbs)) != NULL) { +		amdf17nbdf_cleanup_nb(nbdf, nb); +	} +	list_destroy(&nbdf->amd_nbdf_nbs); + +	while ((df = list_remove_head(&nbdf->amd_nbdf_dfs)) != NULL) { +		amdf17nbdf_cleanup_df(df); +	} +	list_destroy(&nbdf->amd_nbdf_dfs); + +	if (nbdf->amd_nbdf_minors != NULL) { +		id_space_destroy(nbdf->amd_nbdf_minors); +	} + +	mutex_destroy(&nbdf->amd_nbdf_lock); +	kmem_free(nbdf, sizeof (amdf17nbdf_t)); +} + +static amdf17nbdf_t * +amdf17nbdf_create(void) +{ +	amdf17nbdf_t *nbdf; + +	nbdf = kmem_zalloc(sizeof (amdf17nbdf_t), KM_SLEEP); +	mutex_init(&nbdf->amd_nbdf_lock, NULL, MUTEX_DRIVER, NULL); +	list_create(&nbdf->amd_nbdf_nbs, sizeof (amdf17nb_t), +	    offsetof(amdf17nb_t, amd_nb_link)); +	list_create(&nbdf->amd_nbdf_dfs, sizeof (amdf17df_t), +	    offsetof(amdf17df_t, amd_df_link)); +	if ((nbdf->amd_nbdf_minors = id_space_create("amdf17nbdf_minors", +	    AMDF17_MINOR_LOW, AMDF17_MINOR_HIGH)) == NULL) { +		amdf17nbdf_destroy(nbdf); +		return (NULL); +	} + +	return (nbdf); +} + +static struct cb_ops amdf17nbdf_cb_ops = { +	.cb_open = amdf17nbdf_open, +	.cb_close = amdf17nbdf_close, +	.cb_strategy = nodev, +	.cb_print = nodev, +	.cb_dump = nodev, +	.cb_read = nodev, +	.cb_write = nodev, +	.cb_ioctl = amdf17nbdf_ioctl, +	.cb_devmap = nodev, +	.cb_mmap = nodev, +	.cb_segmap = nodev, +	.cb_chpoll = nochpoll, +	.cb_prop_op = ddi_prop_op, +	.cb_flag = D_MP, +	.cb_rev = CB_REV, +	.cb_aread = nodev, +	.cb_awrite = nodev +}; + +static struct dev_ops amdf17nbdf_dev_ops = { +	.devo_rev = DEVO_REV, +	.devo_refcnt = 0, +	.devo_getinfo = amdf17nbdf_getinfo, +	.devo_identify = nulldev, +	.devo_probe = nulldev, +	.devo_attach = amdf17nbdf_attach, +	.devo_detach = amdf17nbdf_detach, +	.devo_reset = nodev, +	.devo_power = ddi_power, +	.devo_quiesce = ddi_quiesce_not_needed, +	.devo_cb_ops = &amdf17nbdf_cb_ops +}; + +static struct modldrv amdf17nbdf_modldrv = { +	.drv_modops = &mod_driverops, +	.drv_linkinfo = "AMD Family 17h Driver", +	.drv_dev_ops = &amdf17nbdf_dev_ops +}; + +static struct modlinkage amdf17nbdf_modlinkage = { +	.ml_rev = MODREV_1, +	.ml_linkage = { &amdf17nbdf_modldrv, NULL } +}; + +int +_init(void) +{ +	int ret; +	amdf17nbdf_t *nbdf; + +	if ((nbdf = amdf17nbdf_create()) == NULL) { +		return (ENOMEM); +	} + +	if ((ret = mod_install(&amdf17nbdf_modlinkage)) != 0) { +		amdf17nbdf_destroy(amdf17nbdf); +		return (ret); +	} + +	amdf17nbdf = nbdf; +	return (ret); +} + +int +_info(struct modinfo *modinfop) +{ +	return (mod_info(&amdf17nbdf_modlinkage, modinfop)); +} + +int +_fini(void) +{ +	int ret; + +	if ((ret = mod_remove(&amdf17nbdf_modlinkage)) != 0) { +		return (ret); +	} + +	amdf17nbdf_destroy(amdf17nbdf); +	amdf17nbdf = NULL; +	return (ret); +} diff --git a/usr/src/uts/intel/io/coretemp/coretemp.c b/usr/src/uts/intel/io/coretemp/coretemp.c new file mode 100644 index 0000000000..e21d385991 --- /dev/null +++ b/usr/src/uts/intel/io/coretemp/coretemp.c @@ -0,0 +1,784 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source.  A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2019, Joyent, Inc. + */ + +/* + * Intel CPU Thermal sensor driver + * + * These MSRs that were used were introduced with the 'Core' family processors + * and have since spread beyond there, even to the Atom line. Currently, + * temperature sensors exist on a per-core basis and optionally on a per-package + * basis. The temperature sensor exposes a reading that's relative to the + * processor's maximum junction temperature, often referred to as Tj. We + * currently only support models where we can determine that junction + * temperature programatically. For older processors, we would need to track + * down the datasheet. Unfortunately, the values here are often on a per-brand + * string basis. As in two CPUs with the same model and stepping, but have + * binned differently have different temperatures. + * + * The temperature is exposed through /dev and uses a semi-standard sensor + * framework. We expose one minor node per CPU core and one minor node per CPU + * package, if that is supported. Reads are rate-limited in the driver at 100ms + * by default per the global variable coretemp_cache_ms. + */ + +#include <sys/modctl.h> +#include <sys/conf.h> +#include <sys/devops.h> +#include <sys/types.h> +#include <sys/file.h> +#include <sys/open.h> +#include <sys/stat.h> +#include <sys/cred.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/list.h> +#include <sys/stddef.h> +#include <sys/cmn_err.h> +#include <sys/id_space.h> +#include <sys/x86_archext.h> +#include <sys/cpu_module.h> +#include <sys/ontrap.h> +#include <sys/cpuvar.h> +#include <sys/x_call.h> +#include <sys/sensors.h> + +#define	CORETEMP_MINOR_MIN	1 +#define	CORETEMP_MINOR_MAX	INT32_MAX + +typedef struct coretemp_core { +	list_node_t		ctc_link; +	id_t			ctc_core_minor; +	id_t			ctc_pkg_minor; +	enum cmi_hdl_class	ctc_class; +	uint_t			ctc_chip; +	uint_t			ctc_core; +	uint_t			ctc_strand; +	uint_t			ctc_tjmax; +	hrtime_t		ctc_last_read; +	uint64_t		ctc_core_status; +	uint64_t		ctc_core_intr; +	uint64_t		ctc_pkg_status; +	uint64_t		ctc_pkg_intr; +	uint64_t		ctc_invalid_reads; +	/* The following fields are derived from above */ +	uint_t			ctc_temperature; +	uint_t			ctc_resolution; +	uint_t			ctc_pkg_temperature; +} coretemp_core_t; + +typedef struct coretemp { +	dev_info_t	*coretemp_dip; +	id_space_t	*coretemp_ids; +	cpuset_t	*coretemp_cpuset; +	boolean_t	coretemp_pkg; +	kmutex_t	coretemp_mutex; +	list_t		coretemp_cores; +} coretemp_t; + +coretemp_t *coretemp; + +/* + * This indicates a number of milliseconds that we should wait between reads. + * This is somewhat arbitrary, but the goal is to reduce cross call activity + * and reflect that the sensor may not update all the time. + */ +uint_t coretemp_cache_ms = 100; + +static int +coretemp_rdmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3) +{ +	uint_t msr = (uint_t)arg1; +	uint64_t *valp = (uint64_t *)arg2; +	cmi_errno_t *errp = (cmi_errno_t *)arg3; + +	on_trap_data_t otd; + +	if (on_trap(&otd, OT_DATA_ACCESS) == 0) { +		if (checked_rdmsr(msr, valp) == 0) { +			*errp = CMI_SUCCESS; +		} else { +			*errp = CMIERR_NOTSUP; +		} +	} else { +		*errp = CMIERR_MSRGPF; +	} +	no_trap(); + +	return (0); +} + +/* + * This really should just be a call to the CMI handle to provide us the MSR. + * However, that routine, cmi_hdl_rdmsr(), cannot be safely used until it is + * fixed for use outside of a panic-like context. + */ +static int +coretemp_rdmsr(coretemp_t *ct, cmi_hdl_t hdl, uint_t msr, uint64_t *valp) +{ +	id_t cpu = cmi_hdl_logical_id(hdl); +	int ret = CMI_SUCCESS; + +	ASSERT(MUTEX_HELD(&ct->coretemp_mutex)); +	kpreempt_disable(); +	if (CPU->cpu_id == cpu) { +		(void) coretemp_rdmsr_xc((xc_arg_t)msr, (xc_arg_t)valp, +		    (xc_arg_t)&ret); +	} else { +		cpuset_only(ct->coretemp_cpuset, (uint_t)cpu); +		xc_call((xc_arg_t)msr, (xc_arg_t)valp, (xc_arg_t)&ret, +		    (ulong_t *)ct->coretemp_cpuset, coretemp_rdmsr_xc); +	} +	kpreempt_enable(); + +	return (ret); +} + +static int +coretemp_cmi_errno(cmi_errno_t e) +{ +	switch (e) { +	case CMIERR_NOTSUP: +		return (ENOTSUP); +	default: +		return (EIO); +	} +} + +/* + * Answer the question of whether or not the driver can support the CPU in + * question. Right now we have the following constraints for supporting the CPU: + * + *   o The CPU is made by Intel + *   o The CPU has the Digital Thermal Sensor + *   o The CPU family is 6, which is usually implicit from the above + *   o We can determine its junction temperature through an MSR + * + * If we can't determine the junction temperature programatically, then we need + * to set up tables of CPUs to do so. This can be fleshed out and improved. + */ +static boolean_t +coretemp_supported(void) +{ +	uint_t model; + +	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel) { +		return (B_FALSE); +	} + +	if (!is_x86_feature(x86_featureset, X86FSET_CORE_THERMAL)) { +		return (B_FALSE); +	} + +	if (cpuid_getfamily(CPU) != 6) { +		return (B_FALSE); +	} + +	model = cpuid_getmodel(CPU); +	if (model <= INTC_MODEL_PENRYN || model == INTC_MODEL_SILVERTHORNE || +	    model == INTC_MODEL_LINCROFT || model == INTC_MODEL_PENWELL || +	    model == INTC_MODEL_CLOVERVIEW || model == INTC_MODEL_CEDARVIEW) { +		return (B_FALSE); +	} + +	return (B_TRUE); +} + +static coretemp_core_t * +coretemp_lookup_core(coretemp_t *ct, minor_t minor) +{ +	coretemp_core_t *ctc; + +	ASSERT(MUTEX_HELD(&ct->coretemp_mutex)); + +	if (minor < CORETEMP_MINOR_MIN || minor > CORETEMP_MINOR_MAX) { +		return (NULL); +	} + +	for (ctc = list_head(&ct->coretemp_cores); ctc != NULL; +	    ctc = list_next(&ct->coretemp_cores, ctc)) { +		if (ctc->ctc_core_minor == (id_t)minor || +		    (ctc->ctc_pkg_minor >= CORETEMP_MINOR_MIN && +		    ctc->ctc_pkg_minor == (id_t)minor)) { +			return (ctc); +		} +	} + +	return (NULL); +} + + +/* + * We need to determine the value of Tj Max as all temperature sensors are + * derived from this value. The ease of this depends on how old the processor in + * question is. The Core family processors after Penryn have support for an MSR + * that tells us what to go for. In the Atom family, processors starting with + * Silvermont have support for an MSR that documents this value. For older + * processors, one needs to track down the datasheet for a specific processor. + * Two processors in the same family/model may have different values of Tj Max. + * At the moment, we only support this on processors that have that MSR. + */ +static int +coretemp_calculate_tjmax(coretemp_t *ct, coretemp_core_t *ctc, cmi_hdl_t hdl) +{ +	cmi_errno_t e; +	int err = 0; +	uint64_t val = 0; + +	e = coretemp_rdmsr(ct, hdl, MSR_TEMPERATURE_TARGET, &val); +	if (e == CMI_SUCCESS && val != 0) { +		ctc->ctc_tjmax = MSR_TEMPERATURE_TARGET_TARGET(val); +	} else if (val == 0) { +		err = EINVAL; +	} else { +		err = coretemp_cmi_errno(e); +	} + +	return (err); +} + +static int +coretemp_read(coretemp_t *ct, coretemp_core_t *ctc, cmi_hdl_t hdl) +{ +	cmi_errno_t e; +	int err = 0; +	uint64_t val = 0; + +	ctc->ctc_last_read = gethrtime(); + +	e = coretemp_rdmsr(ct, hdl, MSR_IA32_THERM_STATUS, &val); +	if (e == CMI_SUCCESS) { +		ctc->ctc_core_status = val; +	} else { +		err = coretemp_cmi_errno(e); +		dev_err(ct->coretemp_dip, CE_WARN, "!failed to get core " +		    "thermal status on %u/%u: %d", ctc->ctc_chip, ctc->ctc_core, +		    err); +		return (err); +	} + +	e = coretemp_rdmsr(ct, hdl, MSR_IA32_THERM_INTERRUPT, &val); +	if (e == CMI_SUCCESS) { +		ctc->ctc_core_intr = val; +	} else { +		err = coretemp_cmi_errno(e); +		dev_err(ct->coretemp_dip, CE_WARN, "!failed to get core " +		    "thermal interrupt on %u/%u: %d", ctc->ctc_chip, +		    ctc->ctc_core, err); +		return (err); +	} + +	/* +	 * If the last read wasn't valid, then we should keep the current state. +	 */ +	if ((ctc->ctc_core_status & IA32_THERM_STATUS_READ_VALID) != 0) { +		uint_t diff; +		diff = IA32_THERM_STATUS_READING(ctc->ctc_core_status); + +		if (diff >= ctc->ctc_tjmax) { +			dev_err(ct->coretemp_dip, CE_WARN, "!found invalid " +			    "core temperature on %u/%u: readout: %u, Tjmax: " +			    "%u, raw: 0x%" PRIx64, ctc->ctc_chip, +			    ctc->ctc_core, diff, ctc->ctc_tjmax, +			    ctc->ctc_core_status); +			ctc->ctc_invalid_reads++; +		} else { +			ctc->ctc_temperature = ctc->ctc_tjmax - diff; +		} +	} else { +		ctc->ctc_invalid_reads++; +	} + +	ctc->ctc_resolution = +	    IA32_THERM_STATUS_RESOLUTION(ctc->ctc_core_status); + +	/* +	 * If we have package support and this is core zero, then update the +	 * package data. +	 */ +	if (ct->coretemp_pkg && ctc->ctc_core == 0) { +		uint_t diff; + +		e = coretemp_rdmsr(ct, hdl, MSR_IA32_PACKAGE_THERM_STATUS, +		    &val); +		if (e == CMI_SUCCESS) { +			ctc->ctc_pkg_status = val; +		} else { +			err = coretemp_cmi_errno(e); +			dev_err(ct->coretemp_dip, CE_WARN, "!failed to get " +			    "package thermal status on %u: %d", ctc->ctc_chip, +			    err); +			return (err); +		} + +		e = coretemp_rdmsr(ct, hdl, MSR_IA32_PACKAGE_THERM_INTERRUPT, +		    &val); +		if (e == CMI_SUCCESS) { +			ctc->ctc_pkg_intr = val; +		} else { +			err = coretemp_cmi_errno(e); +			dev_err(ct->coretemp_dip, CE_WARN, "!failed to get " +			    "package thermal interrupt on %u: %d", +			    ctc->ctc_chip, err); +			return (err); +		} + +		diff = IA32_PKG_THERM_STATUS_READING(ctc->ctc_pkg_status); +		if (diff >= ctc->ctc_tjmax) { +			dev_err(ct->coretemp_dip, CE_WARN, "!found invalid " +			    "package temperature on %u: readout: %u, tjmax: " +			    "%u, raw: 0x%" PRIx64, ctc->ctc_chip, diff, +			    ctc->ctc_tjmax, ctc->ctc_pkg_status); +			ctc->ctc_invalid_reads++; + +		} else { +			ctc->ctc_pkg_temperature = ctc->ctc_tjmax - diff; +		} +	} + +	return (0); +} + +static int +coretemp_open(dev_t *devp, int flags, int otype, cred_t *credp) +{ +	coretemp_t *ct = coretemp; + +	if (crgetzoneid(credp) != GLOBAL_ZONEID || drv_priv(credp)) { +		return (EPERM); +	} + +	if ((flags & (FEXCL | FNDELAY | FWRITE)) != 0) { +		return (EINVAL); +	} + +	if (otype != OTYP_CHR) { +		return (EINVAL); +	} + +	/* +	 * Sanity check the minor +	 */ +	mutex_enter(&ct->coretemp_mutex); +	if (coretemp_lookup_core(ct, getminor(*devp)) == NULL) { +		mutex_exit(&ct->coretemp_mutex); +		return (ENXIO); +	} +	mutex_exit(&ct->coretemp_mutex); + +	return (0); +} + +static int +coretemp_ioctl_kind(intptr_t arg, int mode) +{ +	sensor_ioctl_kind_t kind; + +	bzero(&kind, sizeof (kind)); +	kind.sik_kind = SENSOR_KIND_TEMPERATURE; + +	if (ddi_copyout((void *)&kind, (void *)arg, sizeof (kind), +	    mode & FKIOCTL) != 0) { +		return (EFAULT); +	} + +	return (0); +} + +static int +coretemp_ioctl_temp(coretemp_t *ct, minor_t minor, intptr_t arg, int mode) +{ +	coretemp_core_t *ctc; +	hrtime_t diff; +	sensor_ioctl_temperature_t temp; + +	bzero(&temp, sizeof (temp)); + +	mutex_enter(&ct->coretemp_mutex); +	ctc = coretemp_lookup_core(ct, minor); +	if (ctc == NULL) { +		mutex_exit(&ct->coretemp_mutex); +		return (ENXIO); +	} + +	diff = NSEC2MSEC(gethrtime() - ctc->ctc_last_read); +	if (diff > 0 && diff > (hrtime_t)coretemp_cache_ms) { +		int ret; +		cmi_hdl_t hdl; + +		if ((hdl = cmi_hdl_lookup(ctc->ctc_class, ctc->ctc_chip, +		    ctc->ctc_core, ctc->ctc_strand)) == NULL) { +			mutex_exit(&ct->coretemp_mutex); +			return (ENXIO); +		} +		ret = coretemp_read(ct, ctc, hdl); +		cmi_hdl_rele(hdl); +		if (ret != 0) { +			mutex_exit(&ct->coretemp_mutex); +			return (ret); +		} +	} + +	temp.sit_unit = SENSOR_UNIT_CELSIUS; +	if ((id_t)minor == ctc->ctc_core_minor) { +		temp.sit_temp = ctc->ctc_temperature; +	} else { +		temp.sit_temp = ctc->ctc_pkg_temperature; +	} + +	/* +	 * The resolution field is in whole units of degrees Celsius. +	 */ +	temp.sit_gran = ctc->ctc_resolution; +	if (ctc->ctc_resolution > 1) { +		temp.sit_gran *= -1; +	} +	mutex_exit(&ct->coretemp_mutex); + +	if (ddi_copyout(&temp, (void *)arg, sizeof (temp), +	    mode & FKIOCTL) != 0) { +		return (EFAULT); +	} + +	return (0); +} + +static int +coretemp_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, +    int *rvalp) +{ +	coretemp_t *ct = coretemp; + +	if ((mode & FREAD) == 0) { +		return (EINVAL); +	} + +	switch (cmd) { +	case SENSOR_IOCTL_TYPE: +		return (coretemp_ioctl_kind(arg, mode)); +	case SENSOR_IOCTL_TEMPERATURE: +		return (coretemp_ioctl_temp(ct, getminor(dev), arg, mode)); +	default: +		return (ENOTTY); +	} +} + +/* + * We don't really do any state tracking on close, so for now, just allow it to + * always succeed. + */ +static int +coretemp_close(dev_t dev, int flags, int otype, cred_t *credp) +{ +	return (0); +} + +static void +coretemp_fini_core(coretemp_t *ct, coretemp_core_t *ctc) +{ +	if (ctc->ctc_core_minor > 0) +		id_free(ct->coretemp_ids, ctc->ctc_core_minor); +	if (ctc->ctc_pkg_minor > 0) +		id_free(ct->coretemp_ids, ctc->ctc_pkg_minor); +	kmem_free(ctc, sizeof (coretemp_core_t)); +} + +static void +coretemp_destroy(coretemp_t *ct) +{ +	coretemp_core_t *ctc; + +	ddi_remove_minor_node(ct->coretemp_dip, NULL); + +	while ((ctc = list_remove_head(&ct->coretemp_cores)) != NULL) { +		coretemp_fini_core(ct, ctc); +	} +	list_destroy(&ct->coretemp_cores); + +	if (ct->coretemp_cpuset != NULL) { +		cpuset_free(ct->coretemp_cpuset); +	} + +	if (ct->coretemp_ids != NULL) { +		id_space_destroy(ct->coretemp_ids); +	} + +	mutex_destroy(&ct->coretemp_mutex); +	kmem_free(ct, sizeof (coretemp_t)); +} + +static int +coretemp_init_core(cmi_hdl_t hdl, void *arg1, void *arg2, void *arg3) +{ +	coretemp_t *ct = arg1; +	boolean_t *walkerr = arg2; +	coretemp_core_t *ctc; +	uint_t chip, core; +	int err; + +	chip = cmi_hdl_chipid(hdl); +	core = cmi_hdl_coreid(hdl); + +	/* +	 * The temperature sensor only exists on a per-core basis. Therefore we +	 * ignore any non-zero strand. +	 */ +	if (cmi_hdl_strandid(hdl) != 0) { +		return (CMI_HDL_WALK_NEXT); +	} + +	ctc = kmem_zalloc(sizeof (coretemp_core_t), KM_SLEEP); +	ctc->ctc_class = cmi_hdl_class(hdl); +	ctc->ctc_chip = chip; +	ctc->ctc_core = core; +	ctc->ctc_strand = 0; +	ctc->ctc_core_minor = id_alloc(ct->coretemp_ids); +	if (ct->coretemp_pkg && ctc->ctc_core == 0) { +		ctc->ctc_pkg_minor = id_alloc(ct->coretemp_ids); +	} + +	if ((err = coretemp_calculate_tjmax(ct, ctc, hdl)) != 0) { +		dev_err(ct->coretemp_dip, CE_WARN, +		    "failed to read Tj Max on %u/%u: %d", chip, core, err); +		*walkerr = B_TRUE; +		coretemp_fini_core(ct, ctc); +		return (CMI_HDL_WALK_DONE); +	} + +	if ((err = coretemp_read(ct, ctc, hdl)) != 0) { +		dev_err(ct->coretemp_dip, CE_WARN, +		    "failed to take initial temperature reading on %u/%u: %d", +		    chip, core, err); +		*walkerr = B_TRUE; +		coretemp_fini_core(ct, ctc); +		return (CMI_HDL_WALK_DONE); +	} + +	list_insert_tail(&ct->coretemp_cores, ctc); + +	return (CMI_HDL_WALK_NEXT); +} + +static boolean_t +coretemp_create_minors(coretemp_t *ct) +{ +	coretemp_core_t *ctc; + +	for (ctc = list_head(&ct->coretemp_cores); ctc != NULL; +	    ctc = list_next(&ct->coretemp_cores, ctc)) { +		int ret; +		char buf[128]; + +		if (snprintf(buf, sizeof (buf), "chip%u.core%u", ctc->ctc_chip, +		    ctc->ctc_core) >= sizeof (buf)) { +			return (B_FALSE); +		} +		ret = ddi_create_minor_node(ct->coretemp_dip, buf, S_IFCHR, +		    ctc->ctc_core_minor, DDI_NT_SENSOR_TEMP_CPU, 0); +		if (ret != DDI_SUCCESS) { +			dev_err(ct->coretemp_dip, CE_WARN, "!failed to create " +			    "minor node %s", buf); +			return (B_FALSE); +		} + +		if (ctc->ctc_core != 0) +			continue; + +		if (snprintf(buf, sizeof (buf), "chip%u", ctc->ctc_chip) >= +		    sizeof (buf)) { +			return (B_FALSE); +		} + +		ret = ddi_create_minor_node(ct->coretemp_dip, buf, S_IFCHR, +		    ctc->ctc_pkg_minor, DDI_NT_SENSOR_TEMP_CPU, 0); +		if (ret != DDI_SUCCESS) { +			dev_err(ct->coretemp_dip, CE_WARN, "!failed to create " +			    "minor node %s", buf); +			return (B_FALSE); +		} +	} + +	return (B_TRUE); +} + +static int +coretemp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ +	boolean_t walkerr; +	coretemp_t *ct = NULL; + +	if (cmd == DDI_RESUME) { +		/* +		 * Currently suspend and resume for this driver are nops. +		 */ +		return (DDI_SUCCESS); +	} + +	if (cmd != DDI_ATTACH) { +		return (DDI_FAILURE); +	} + +	if (coretemp != NULL) { +		return (DDI_FAILURE); +	} + +	ct = kmem_zalloc(sizeof (coretemp_t), KM_SLEEP); +	ct->coretemp_dip = dip; +	ct->coretemp_pkg = is_x86_feature(x86_featureset, X86FSET_PKG_THERMAL); +	list_create(&ct->coretemp_cores, sizeof (coretemp_core_t), +	    offsetof(coretemp_core_t, ctc_link)); +	mutex_init(&ct->coretemp_mutex, NULL, MUTEX_DRIVER, NULL); +	ct->coretemp_cpuset = cpuset_alloc(KM_SLEEP); +	if ((ct->coretemp_ids = id_space_create("coretemp_minors", 1, +	    INT32_MAX)) == NULL) { +		goto fail; +	} + +	mutex_enter(&ct->coretemp_mutex); +	walkerr = B_FALSE; +	cmi_hdl_walk(coretemp_init_core, ct, &walkerr, NULL); + +	if (walkerr) { +		mutex_exit(&ct->coretemp_mutex); +		goto fail; +	} + +	if (!coretemp_create_minors(ct)) { +		mutex_exit(&ct->coretemp_mutex); +		goto fail; +	} + +	coretemp = ct; +	mutex_exit(&ct->coretemp_mutex); +	return (DDI_SUCCESS); +fail: +	coretemp = NULL; +	coretemp_destroy(ct); +	return (DDI_FAILURE); + +} + +static int +coretemp_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, +    void **resultp) +{ +	int ret; + +	switch (cmd) { +	case DDI_INFO_DEVT2DEVINFO: +		*resultp = coretemp->coretemp_dip; +		ret = DDI_SUCCESS; +		break; +	case DDI_INFO_DEVT2INSTANCE: +		*resultp = (void *)0; +		ret = DDI_SUCCESS; +		break; +	default: +		ret = DDI_FAILURE; +		break; +	} + +	return (ret); +} + +static int +coretemp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ +	coretemp_t *ct; + +	if (cmd == DDI_SUSPEND) { +		return (DDI_SUCCESS); +	} + +	if (cmd != DDI_DETACH) { +		return (DDI_FAILURE); +	} + +	if (coretemp == NULL) { +		return (DDI_FAILURE); +	} + +	ct = coretemp; +	coretemp = NULL; +	coretemp_destroy(ct); + +	return (DDI_SUCCESS); +} + +static struct cb_ops coretemp_cb_ops = { +	.cb_open = coretemp_open, +	.cb_close = coretemp_close, +	.cb_strategy = nodev, +	.cb_print = nodev, +	.cb_dump = nodev, +	.cb_read = nodev, +	.cb_write = nodev, +	.cb_ioctl = coretemp_ioctl, +	.cb_devmap = nodev, +	.cb_mmap = nodev, +	.cb_segmap = nodev, +	.cb_chpoll = nochpoll, +	.cb_prop_op = ddi_prop_op, +	.cb_flag = D_MP, +	.cb_rev = CB_REV, +	.cb_aread = nodev, +	.cb_awrite = nodev +}; + +static struct dev_ops coretemp_dev_ops = { +	.devo_rev = DEVO_REV, +	.devo_refcnt = 0, +	.devo_getinfo = coretemp_getinfo, +	.devo_identify = nulldev, +	.devo_probe = nulldev, +	.devo_attach = coretemp_attach, +	.devo_detach = coretemp_detach, +	.devo_reset = nodev, +	.devo_power = ddi_power, +	.devo_quiesce = ddi_quiesce_not_needed, +	.devo_cb_ops = &coretemp_cb_ops +}; + +static struct modldrv coretemp_modldrv = { +	.drv_modops = &mod_driverops, +	.drv_linkinfo = "Intel CPU/Package thermal sensor", +	.drv_dev_ops = &coretemp_dev_ops +}; + +static struct modlinkage coretemp_modlinkage = { +	.ml_rev = MODREV_1, +	.ml_linkage = { &coretemp_modldrv, NULL } +}; + +int +_init(void) +{ +	if (!coretemp_supported()) { +		return (ENOTSUP); +	} + +	return (mod_install(&coretemp_modlinkage)); +} + +int +_info(struct modinfo *modinfop) +{ +	return (mod_info(&coretemp_modlinkage, modinfop)); +} + +int +_fini(void) +{ +	return (mod_remove(&coretemp_modlinkage)); +} diff --git a/usr/src/uts/intel/io/coretemp/coretemp.conf b/usr/src/uts/intel/io/coretemp/coretemp.conf new file mode 100644 index 0000000000..1880a2fa16 --- /dev/null +++ b/usr/src/uts/intel/io/coretemp/coretemp.conf @@ -0,0 +1,16 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source.  A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2019, Joyent, Inc. +# + +name="coretemp" parent="pseudo" instance=0; diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h index 581aea703b..8179851de1 100644 --- a/usr/src/uts/intel/sys/x86_archext.h +++ b/usr/src/uts/intel/sys/x86_archext.h @@ -223,6 +223,38 @@ extern "C" {  #define	CPUID_INTC_ECX_AHF64	0x00100000	/* LAHF and SAHF in long mode */  /* + * Intel uses cpuid leaf 6 to cover various thermal and power control + * operations. + */ +#define	CPUID_INTC_EAX_DTS	0x00000001	/* Digital Thermal Sensor */ +#define	CPUID_INTC_EAX_TURBO	0x00000002	/* Turboboost */ +#define	CPUID_INTC_EAX_ARAT	0x00000004	/* APIC-Timer-Always-Running */ +/* bit 3 is reserved */ +#define	CPUID_INTC_EAX_PLN	0x00000010	/* Power limit notification */ +#define	CPUID_INTC_EAX_ECMD	0x00000020	/* Clock mod. duty cycle */ +#define	CPUID_INTC_EAX_PTM	0x00000040	/* Package thermal management */ +#define	CPUID_INTC_EAX_HWP	0x00000080	/* HWP base registers */ +#define	CPUID_INTC_EAX_HWP_NOT	0x00000100	/* HWP Notification */ +#define	CPUID_INTC_EAX_HWP_ACT	0x00000200	/* HWP Activity Window */ +#define	CPUID_INTC_EAX_HWP_EPR	0x00000400	/* HWP Energy Perf. Pref. */ +#define	CPUID_INTC_EAX_HWP_PLR	0x00000800	/* HWP Package Level Request */ +/* bit 12 is reserved */ +#define	CPUID_INTC_EAX_HDC	0x00002000	/* HDC */ +#define	CPUID_INTC_EAX_TURBO3	0x00004000	/* Turbo Boost Max Tech 3.0 */ +#define	CPUID_INTC_EAX_HWP_CAP	0x00008000	/* HWP Capabilities */ +#define	CPUID_INTC_EAX_HWP_PECI	0x00010000	/* HWP PECI override */ +#define	CPUID_INTC_EAX_HWP_FLEX	0x00020000	/* Flexible HWP */ +#define	CPUID_INTC_EAX_HWP_FAST	0x00040000	/* Fast IA32_HWP_REQUEST */ +/* bit 19 is reserved */ +#define	CPUID_INTC_EAX_HWP_IDLE	0x00100000	/* Ignore Idle Logical HWP */ + +#define	CPUID_INTC_EBX_DTS_NTRESH(x)	((x) & 0xf) + +#define	CPUID_INTC_ECX_MAPERF	0x00000001	/* IA32_MPERF / IA32_APERF */ +/* bits 1-2 are reserved */ +#define	CPUID_INTC_ECX_PERFBIAS	0x00000008	/* IA32_ENERGY_PERF_BIAS */ + +/*   * Intel also uses cpuid leaf 7 to have additional instructions and features.   * Like some other leaves, but unlike the current ones we care about, it   * requires us to specify both a leaf in %eax and a sub-leaf in %ecx. To deal @@ -447,6 +479,74 @@ extern "C" {  #define	MSR_IA32_FLUSH_CMD	0x10b  #define	IA32_FLUSH_CMD_L1D	0x01 +/* + * Intel Thermal MSRs + */ +#define	MSR_IA32_THERM_INTERRUPT	0x19b +#define	IA32_THERM_INTERRUPT_HIGH_IE	0x00000001 +#define	IA32_THERM_INTERRUPT_LOW_IE	0x00000002 +#define	IA32_THERM_INTERRUPT_PROCHOT_IE	0x00000004 +#define	IA32_THERM_INTERRUPT_FORCEPR_IE	0x00000008 +#define	IA32_THERM_INTERRUPT_CRIT_IE	0x00000010 +#define	IA32_THERM_INTERRUPT_TR1_VAL(x)	(((x) >> 8) & 0x7f) +#define	IA32_THERM_INTTERUPT_TR1_IE	0x00008000 +#define	IA32_THERM_INTTERUPT_TR2_VAL(x)	(((x) >> 16) & 0x7f) +#define	IA32_THERM_INTERRUPT_TR2_IE	0x00800000 +#define	IA32_THERM_INTERRUPT_PL_NE	0x01000000 + +#define	MSR_IA32_THERM_STATUS		0x19c +#define	IA32_THERM_STATUS_STATUS		0x00000001 +#define	IA32_THERM_STATUS_STATUS_LOG		0x00000002 +#define	IA32_THERM_STATUS_PROCHOT		0x00000004 +#define	IA32_THERM_STATUS_PROCHOT_LOG		0x00000008 +#define	IA32_THERM_STATUS_CRIT_STATUS		0x00000010 +#define	IA32_THERM_STATUS_CRIT_LOG		0x00000020 +#define	IA32_THERM_STATUS_TR1_STATUS		0x00000040 +#define	IA32_THERM_STATUS_TR1_LOG		0x00000080 +#define	IA32_THERM_STATUS_TR2_STATUS		0x00000100 +#define	IA32_THERM_STATUS_TR2_LOG		0x00000200 +#define	IA32_THERM_STATUS_POWER_LIMIT_STATUS	0x00000400 +#define	IA32_THERM_STATUS_POWER_LIMIT_LOG	0x00000800 +#define	IA32_THERM_STATUS_CURRENT_STATUS	0x00001000 +#define	IA32_THERM_STATUS_CURRENT_LOG		0x00002000 +#define	IA32_THERM_STATUS_CROSS_DOMAIN_STATUS	0x00004000 +#define	IA32_THERM_STATUS_CROSS_DOMAIN_LOG	0x00008000 +#define	IA32_THERM_STATUS_READING(x)		(((x) >> 16) & 0x7f) +#define	IA32_THERM_STATUS_RESOLUTION(x)		(((x) >> 27) & 0x0f) +#define	IA32_THERM_STATUS_READ_VALID		0x80000000 + +#define	MSR_TEMPERATURE_TARGET		0x1a2 +#define	MSR_TEMPERATURE_TARGET_TARGET(x)	(((x) >> 16) & 0xff) +/* + * Not all models support the offset. Refer to the Intel SDM Volume 4 for a list + * of which models have support for which bits. + */ +#define	MSR_TEMPERATURE_TARGET_OFFSET(x)	(((x) >> 24) & 0x0f) + +#define	MSR_IA32_PACKAGE_THERM_STATUS		0x1b1 +#define	IA32_PKG_THERM_STATUS_STATUS		0x00000001 +#define	IA32_PKG_THERM_STATUS_STATUS_LOG	0x00000002 +#define	IA32_PKG_THERM_STATUS_PROCHOT		0x00000004 +#define	IA32_PKG_THERM_STATUS_PROCHOT_LOG	0x00000008 +#define	IA32_PKG_THERM_STATUS_CRIT_STATUS	0x00000010 +#define	IA32_PKG_THERM_STATUS_CRIT_LOG		0x00000020 +#define	IA32_PKG_THERM_STATUS_TR1_STATUS	0x00000040 +#define	IA32_PKG_THERM_STATUS_TR1_LOG		0x00000080 +#define	IA32_PKG_THERM_STATUS_TR2_STATUS	0x00000100 +#define	IA32_PKG_THERM_STATUS_TR2_LOG		0x00000200 +#define	IA32_PKG_THERM_STATUS_READING(x)	(((x) >> 16) & 0x7f) + +#define	MSR_IA32_PACKAGE_THERM_INTERRUPT	0x1b2 +#define	IA32_PKG_THERM_INTERRUPT_HIGH_IE	0x00000001 +#define	IA32_PKG_THERM_INTERRUPT_LOW_IE		0x00000002 +#define	IA32_PKG_THERM_INTERRUPT_PROCHOT_IE	0x00000004 +#define	IA32_PKG_THERM_INTERRUPT_OVERHEAT_IE	0x00000010 +#define	IA32_PKG_THERM_INTERRUPT_TR1_VAL(x)	(((x) >> 8) & 0x7f) +#define	IA32_PKG_THERM_INTTERUPT_TR1_IE		0x00008000 +#define	IA32_PKG_THERM_INTTERUPT_TR2_VAL(x)	(((x) >> 16) & 0x7f) +#define	IA32_PKG_THERM_INTERRUPT_TR2_IE		0x00800000 +#define	IA32_PKG_THERM_INTERRUPT_PL_NE		0x01000000 +  #define	MCI_CTL_VALUE		0xffffffff  #define	MTRR_TYPE_UC		0 @@ -573,6 +673,8 @@ extern "C" {  #define	X86FSET_AMD_PCEC	92  #define	X86FSET_MD_CLEAR	93  #define	X86FSET_MDS_NO		94 +#define	X86FSET_CORE_THERMAL	95 +#define	X86FSET_PKG_THERMAL	96  /*   * Intel Deep C-State invariant TSC in leaf 0x80000007. @@ -580,16 +682,6 @@ extern "C" {  #define	CPUID_TSC_CSTATE_INVARIANCE	(0x100)  /* - * Intel Deep C-state always-running local APIC timer - */ -#define	CPUID_CSTATE_ARAT	(0x4) - -/* - * Intel ENERGY_PERF_BIAS MSR indicated by feature bit CPUID.6.ECX[3]. - */ -#define	CPUID_EPB_SUPPORT	(1 << 3) - -/*   * Intel TSC deadline timer   */  #define	CPUID_DEADLINE_TSC	(1 << 24) @@ -856,7 +948,9 @@ extern "C" {   * Definitions for Intel processor models. These are all for Family 6   * processors. This list and the Atom set below it are not exhuastive.   */ +#define	INTC_MODEL_YONAH		0x0e  #define	INTC_MODEL_MEROM		0x0f +#define	INTC_MODEL_MEROM_L		0x16  #define	INTC_MODEL_PENRYN		0x17  #define	INTC_MODEL_DUNNINGTON		0x1d @@ -942,7 +1036,7 @@ extern "C" {  #if defined(_KERNEL) || defined(_KMEMUSER) -#define	NUM_X86_FEATURES	95 +#define	NUM_X86_FEATURES	97  extern uchar_t x86_featureset[];  extern void free_x86_featureset(void *featureset); | 
