1f2dbfd3Robert Mustacchi/*
2f2dbfd3Robert Mustacchi * This file and its contents are supplied under the terms of the
3f2dbfd3Robert Mustacchi * Common Development and Distribution License ("CDDL"), version 1.0.
4f2dbfd3Robert Mustacchi * You may only use this file in accordance with the terms of version
5f2dbfd3Robert Mustacchi * 1.0 of the CDDL.
6f2dbfd3Robert Mustacchi *
7f2dbfd3Robert Mustacchi * A full copy of the text of the CDDL should have accompanied this
8f2dbfd3Robert Mustacchi * source.  A copy of the CDDL is also available via the Internet at
9f2dbfd3Robert Mustacchi * http://www.illumos.org/license/CDDL.
10f2dbfd3Robert Mustacchi */
11f2dbfd3Robert Mustacchi
12f2dbfd3Robert Mustacchi/*
13f2dbfd3Robert Mustacchi * Copyright 2019, Joyent, Inc.
1401c0c40Robert Mustacchi * Copyright 2020 Oxide Computer Company
15f2dbfd3Robert Mustacchi */
16f2dbfd3Robert Mustacchi
17f2dbfd3Robert Mustacchi/*
18f2dbfd3Robert Mustacchi * Intel CPU Thermal sensor driver
19f2dbfd3Robert Mustacchi *
20f2dbfd3Robert Mustacchi * These MSRs that were used were introduced with the 'Core' family processors
21f2dbfd3Robert Mustacchi * and have since spread beyond there, even to the Atom line. Currently,
22f2dbfd3Robert Mustacchi * temperature sensors exist on a per-core basis and optionally on a per-package
23f2dbfd3Robert Mustacchi * basis. The temperature sensor exposes a reading that's relative to the
24f2dbfd3Robert Mustacchi * processor's maximum junction temperature, often referred to as Tj. We
25f2dbfd3Robert Mustacchi * currently only support models where we can determine that junction
2601c0c40Robert Mustacchi * temperature programmatically. For older processors, we would need to track
27f2dbfd3Robert Mustacchi * down the datasheet. Unfortunately, the values here are often on a per-brand
28f2dbfd3Robert Mustacchi * string basis. As in two CPUs with the same model and stepping, but have
29f2dbfd3Robert Mustacchi * binned differently have different temperatures.
30f2dbfd3Robert Mustacchi *
31f2dbfd3Robert Mustacchi * The temperature is exposed through /dev and uses a semi-standard sensor
32f2dbfd3Robert Mustacchi * framework. We expose one minor node per CPU core and one minor node per CPU
33f2dbfd3Robert Mustacchi * package, if that is supported. Reads are rate-limited in the driver at 100ms
34f2dbfd3Robert Mustacchi * by default per the global variable coretemp_cache_ms.
35f2dbfd3Robert Mustacchi */
36f2dbfd3Robert Mustacchi
37f2dbfd3Robert Mustacchi#include <sys/modctl.h>
38f2dbfd3Robert Mustacchi#include <sys/conf.h>
39f2dbfd3Robert Mustacchi#include <sys/devops.h>
40f2dbfd3Robert Mustacchi#include <sys/types.h>
41f2dbfd3Robert Mustacchi#include <sys/file.h>
42f2dbfd3Robert Mustacchi#include <sys/open.h>
43f2dbfd3Robert Mustacchi#include <sys/stat.h>
44f2dbfd3Robert Mustacchi#include <sys/cred.h>
45f2dbfd3Robert Mustacchi#include <sys/ddi.h>
46f2dbfd3Robert Mustacchi#include <sys/sunddi.h>
47f2dbfd3Robert Mustacchi#include <sys/list.h>
48f2dbfd3Robert Mustacchi#include <sys/stddef.h>
49f2dbfd3Robert Mustacchi#include <sys/cmn_err.h>
50f2dbfd3Robert Mustacchi#include <sys/x86_archext.h>
51f2dbfd3Robert Mustacchi#include <sys/cpu_module.h>
52f2dbfd3Robert Mustacchi#include <sys/ontrap.h>
53f2dbfd3Robert Mustacchi#include <sys/cpuvar.h>
54f2dbfd3Robert Mustacchi#include <sys/x_call.h>
55f2dbfd3Robert Mustacchi#include <sys/sensors.h>
56f2dbfd3Robert Mustacchi
5701c0c40Robert Mustacchi/*
5801c0c40Robert Mustacchi * The Intel SDM says that the measurements we get are always in degrees
5901c0c40Robert Mustacchi * Celsius.
6001c0c40Robert Mustacchi */
6101c0c40Robert Mustacchi#define	CORETEMP_GRANULARITY	1
6201c0c40Robert Mustacchi
6301c0c40Robert Mustacchitypedef enum coretemp_sensor_type {
6401c0c40Robert Mustacchi	CORETEMP_S_CORE,
6501c0c40Robert Mustacchi	CORETEMP_S_SOCKET
6601c0c40Robert Mustacchi} coretemp_sensor_type_t;
6701c0c40Robert Mustacchi
6801c0c40Robert Mustacchitypedef struct coretemp_sensor {
6901c0c40Robert Mustacchi	list_node_t		cs_link;
7001c0c40Robert Mustacchi	struct coretemp		*cs_coretemp;
7101c0c40Robert Mustacchi	char			cs_name[128];
7201c0c40Robert Mustacchi	id_t			cs_sensor;
7301c0c40Robert Mustacchi	coretemp_sensor_type_t	cs_type;
7401c0c40Robert Mustacchi	enum cmi_hdl_class	cs_class;
7501c0c40Robert Mustacchi	uint_t			cs_chip;
7601c0c40Robert Mustacchi	uint_t			cs_core;
7701c0c40Robert Mustacchi	uint_t			cs_strand;
7801c0c40Robert Mustacchi	uint_t			cs_tjmax;
7901c0c40Robert Mustacchi	uint_t			cs_status_msr;
8001c0c40Robert Mustacchi	uint_t			cs_intr_msr;
8101c0c40Robert Mustacchi	hrtime_t		cs_last_read;
8201c0c40Robert Mustacchi	uint64_t		cs_status;
8301c0c40Robert Mustacchi	uint64_t		cs_intr;
84f2dbfd3Robert Mustacchi	/* The following fields are derived from above */
8501c0c40Robert Mustacchi	uint_t			cs_temperature;
8601c0c40Robert Mustacchi	uint_t			cs_resolution;
8701c0c40Robert Mustacchi} coretemp_sensor_t;
88f2dbfd3Robert Mustacchi
89f2dbfd3Robert Mustacchitypedef struct coretemp {
90f2dbfd3Robert Mustacchi	dev_info_t	*coretemp_dip;
91f2dbfd3Robert Mustacchi	cpuset_t	*coretemp_cpuset;
92f2dbfd3Robert Mustacchi	boolean_t	coretemp_pkg;
93f2dbfd3Robert Mustacchi	kmutex_t	coretemp_mutex;
9401c0c40Robert Mustacchi	list_t		coretemp_sensors;
95f2dbfd3Robert Mustacchi} coretemp_t;
96f2dbfd3Robert Mustacchi
97f2dbfd3Robert Mustacchicoretemp_t *coretemp;
98f2dbfd3Robert Mustacchi
99f2dbfd3Robert Mustacchi/*
100f2dbfd3Robert Mustacchi * This indicates a number of milliseconds that we should wait between reads.
101f2dbfd3Robert Mustacchi * This is somewhat arbitrary, but the goal is to reduce cross call activity
102f2dbfd3Robert Mustacchi * and reflect that the sensor may not update all the time.
103f2dbfd3Robert Mustacchi */
104f2dbfd3Robert Mustacchiuint_t coretemp_cache_ms = 100;
105f2dbfd3Robert Mustacchi
106f2dbfd3Robert Mustacchistatic int
107f2dbfd3Robert Mustacchicoretemp_rdmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
108f2dbfd3Robert Mustacchi{
109f2dbfd3Robert Mustacchi	uint_t msr = (uint_t)arg1;
110f2dbfd3Robert Mustacchi	uint64_t *valp = (uint64_t *)arg2;
111f2dbfd3Robert Mustacchi	cmi_errno_t *errp = (cmi_errno_t *)arg3;
112f2dbfd3Robert Mustacchi
113f2dbfd3Robert Mustacchi	on_trap_data_t otd;
114f2dbfd3Robert Mustacchi
115f2dbfd3Robert Mustacchi	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
116f2dbfd3Robert Mustacchi		if (checked_rdmsr(msr, valp) == 0) {
117f2dbfd3Robert Mustacchi			*errp = CMI_SUCCESS;
118f2dbfd3Robert Mustacchi		} else {
119f2dbfd3Robert Mustacchi			*errp = CMIERR_NOTSUP;
120f2dbfd3Robert Mustacchi		}
121f2dbfd3Robert Mustacchi	} else {
122f2dbfd3Robert Mustacchi		*errp = CMIERR_MSRGPF;
123f2dbfd3Robert Mustacchi	}
124f2dbfd3Robert Mustacchi	no_trap();
125f2dbfd3Robert Mustacchi
126f2dbfd3Robert Mustacchi	return (0);
127f2dbfd3Robert Mustacchi}
128f2dbfd3Robert Mustacchi
129f2dbfd3Robert Mustacchi/*
130f2dbfd3Robert Mustacchi * This really should just be a call to the CMI handle to provide us the MSR.
131f2dbfd3Robert Mustacchi * However, that routine, cmi_hdl_rdmsr(), cannot be safely used until it is
132f2dbfd3Robert Mustacchi * fixed for use outside of a panic-like context.
133f2dbfd3Robert Mustacchi */
134f2dbfd3Robert Mustacchistatic int
135f2dbfd3Robert Mustacchicoretemp_rdmsr(coretemp_t *ct, cmi_hdl_t hdl, uint_t msr, uint64_t *valp)
136f2dbfd3Robert Mustacchi{
137f2dbfd3Robert Mustacchi	id_t cpu = cmi_hdl_logical_id(hdl);
138f2dbfd3Robert Mustacchi	int ret = CMI_SUCCESS;
139f2dbfd3Robert Mustacchi
140f2dbfd3Robert Mustacchi	ASSERT(MUTEX_HELD(&ct->coretemp_mutex));
141f2dbfd3Robert Mustacchi	kpreempt_disable();
142f2dbfd3Robert Mustacchi	if (CPU->cpu_id == cpu) {
143f2dbfd3Robert Mustacchi		(void) coretemp_rdmsr_xc((xc_arg_t)msr, (xc_arg_t)valp,
144f2dbfd3Robert Mustacchi		    (xc_arg_t)&ret);
145f2dbfd3Robert Mustacchi	} else {
146f2dbfd3Robert Mustacchi		cpuset_only(ct->coretemp_cpuset, (uint_t)cpu);
147f2dbfd3Robert Mustacchi		xc_call((xc_arg_t)msr, (xc_arg_t)valp, (xc_arg_t)&ret,
148f2dbfd3Robert Mustacchi		    (ulong_t *)ct->coretemp_cpuset, coretemp_rdmsr_xc);
149f2dbfd3Robert Mustacchi	}
150f2dbfd3Robert Mustacchi	kpreempt_enable();
151f2dbfd3Robert Mustacchi
152f2dbfd3Robert Mustacchi	return (ret);
153f2dbfd3Robert Mustacchi}
154f2dbfd3Robert Mustacchi
155f2dbfd3Robert Mustacchistatic int
156f2dbfd3Robert Mustacchicoretemp_cmi_errno(cmi_errno_t e)
157f2dbfd3Robert Mustacchi{
158f2dbfd3Robert Mustacchi	switch (e) {
159f2dbfd3Robert Mustacchi	case CMIERR_NOTSUP:
160f2dbfd3Robert Mustacchi		return (ENOTSUP);
161f2dbfd3Robert Mustacchi	default:
162f2dbfd3Robert Mustacchi		return (EIO);
163f2dbfd3Robert Mustacchi	}
164f2dbfd3Robert Mustacchi}
165f2dbfd3Robert Mustacchi
166f2dbfd3Robert Mustacchi/*
167f2dbfd3Robert Mustacchi * Answer the question of whether or not the driver can support the CPU in
168f2dbfd3Robert Mustacchi * question. Right now we have the following constraints for supporting the CPU:
169f2dbfd3Robert Mustacchi *
170f2dbfd3Robert Mustacchi *   o The CPU is made by Intel
171f2dbfd3Robert Mustacchi *   o The CPU has the Digital Thermal Sensor
172f2dbfd3Robert Mustacchi *   o The CPU family is 6, which is usually implicit from the above
173f2dbfd3Robert Mustacchi *   o We can determine its junction temperature through an MSR
174f2dbfd3Robert Mustacchi *
17501c0c40Robert Mustacchi * If we can't determine the junction temperature programmatically, then we need
176f2dbfd3Robert Mustacchi * to set up tables of CPUs to do so. This can be fleshed out and improved.
177f2dbfd3Robert Mustacchi */
178f2dbfd3Robert Mustacchistatic boolean_t
179f2dbfd3Robert Mustacchicoretemp_supported(void)
180f2dbfd3Robert Mustacchi{
181f2dbfd3Robert Mustacchi	uint_t model;
182f2dbfd3Robert Mustacchi
183f2dbfd3Robert Mustacchi	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel) {
184f2dbfd3Robert Mustacchi		return (B_FALSE);
185f2dbfd3Robert Mustacchi	}
186f2dbfd3Robert Mustacchi
187f2dbfd3Robert Mustacchi	if (!is_x86_feature(x86_featureset, X86FSET_CORE_THERMAL)) {
188f2dbfd3Robert Mustacchi		return (B_FALSE);
189f2dbfd3Robert Mustacchi	}
190f2dbfd3Robert Mustacchi
191f2dbfd3Robert Mustacchi	if (cpuid_getfamily(CPU) != 6) {
192f2dbfd3Robert Mustacchi		return (B_FALSE);
193f2dbfd3Robert Mustacchi	}
194f2dbfd3Robert Mustacchi
195f2dbfd3Robert Mustacchi	model = cpuid_getmodel(CPU);
196f2dbfd3Robert Mustacchi	if (model <= INTC_MODEL_PENRYN || model == INTC_MODEL_SILVERTHORNE ||
197f2dbfd3Robert Mustacchi	    model == INTC_MODEL_LINCROFT || model == INTC_MODEL_PENWELL ||
198f2dbfd3Robert Mustacchi	    model == INTC_MODEL_CLOVERVIEW || model == INTC_MODEL_CEDARVIEW) {
199f2dbfd3Robert Mustacchi		return (B_FALSE);
200f2dbfd3Robert Mustacchi	}
201f2dbfd3Robert Mustacchi
202f2dbfd3Robert Mustacchi	return (B_TRUE);
203f2dbfd3Robert Mustacchi}
204f2dbfd3Robert Mustacchi
205f2dbfd3Robert Mustacchi/*
206f2dbfd3Robert Mustacchi * We need to determine the value of Tj Max as all temperature sensors are
207f2dbfd3Robert Mustacchi * derived from this value. The ease of this depends on how old the processor in
208f2dbfd3Robert Mustacchi * question is. The Core family processors after Penryn have support for an MSR
209f2dbfd3Robert Mustacchi * that tells us what to go for. In the Atom family, processors starting with
210f2dbfd3Robert Mustacchi * Silvermont have support for an MSR that documents this value. For older
211f2dbfd3Robert Mustacchi * processors, one needs to track down the datasheet for a specific processor.
212f2dbfd3Robert Mustacchi * Two processors in the same family/model may have different values of Tj Max.
213f2dbfd3Robert Mustacchi * At the moment, we only support this on processors that have that MSR.
214f2dbfd3Robert Mustacchi */
215f2dbfd3Robert Mustacchistatic int
21601c0c40Robert Mustacchicoretemp_calculate_tjmax(coretemp_t *ct, cmi_hdl_t hdl, uint_t *tjmax)
217f2dbfd3Robert Mustacchi{
218f2dbfd3Robert Mustacchi	cmi_errno_t e;
219f2dbfd3Robert Mustacchi	uint64_t val = 0;
220f2dbfd3Robert Mustacchi
221f2dbfd3Robert Mustacchi	e = coretemp_rdmsr(ct, hdl, MSR_TEMPERATURE_TARGET, &val);
22201c0c40Robert Mustacchi	if (e != CMI_SUCCESS) {
22301c0c40Robert Mustacchi		return (coretemp_cmi_errno(e));
224f2dbfd3Robert Mustacchi	} else if (val == 0) {
22501c0c40Robert Mustacchi		return (EINVAL);
226f2dbfd3Robert Mustacchi	}
227f2dbfd3Robert Mustacchi
22801c0c40Robert Mustacchi	*tjmax = MSR_TEMPERATURE_TARGET_TARGET(val);
22901c0c40Robert Mustacchi	return (0);
230f2dbfd3Robert Mustacchi}
231f2dbfd3Robert Mustacchi
232f2dbfd3Robert Mustacchistatic int
23301c0c40Robert Mustacchicoretemp_update(coretemp_t *ct, coretemp_sensor_t *sensor, cmi_hdl_t hdl)
234f2dbfd3Robert Mustacchi{
235f2dbfd3Robert Mustacchi	cmi_errno_t e;
236f2dbfd3Robert Mustacchi	int err = 0;
23701c0c40Robert Mustacchi	uint64_t intr, status;
238f2dbfd3Robert Mustacchi
23901c0c40Robert Mustacchi	if ((e = coretemp_rdmsr(ct, hdl, sensor->cs_status_msr, &status)) !=
24001c0c40Robert Mustacchi	    CMI_SUCCESS) {
241f2dbfd3Robert Mustacchi		err = coretemp_cmi_errno(e);
24201c0c40Robert Mustacchi		dev_err(ct->coretemp_dip, CE_WARN, "!failed to get thermal "
24301c0c40Robert Mustacchi		    "status on %s: %d", sensor->cs_name, err);
244f2dbfd3Robert Mustacchi		return (err);
245f2dbfd3Robert Mustacchi	}
246f2dbfd3Robert Mustacchi
24701c0c40Robert Mustacchi	if ((e = coretemp_rdmsr(ct, hdl, sensor->cs_intr_msr, &intr)) !=
24801c0c40Robert Mustacchi	    CMI_SUCCESS) {
249f2dbfd3Robert Mustacchi		err = coretemp_cmi_errno(e);
25001c0c40Robert Mustacchi		dev_err(ct->coretemp_dip, CE_WARN, "!failed to get thermal "
25101c0c40Robert Mustacchi		    "interrupt on %s: %d", sensor->cs_name, err);
252f2dbfd3Robert Mustacchi		return (err);
253f2dbfd3Robert Mustacchi	}
254f2dbfd3Robert Mustacchi
25501c0c40Robert Mustacchi	sensor->cs_status = status;
25601c0c40Robert Mustacchi	sensor->cs_intr = intr;
25701c0c40Robert Mustacchi	sensor->cs_last_read = gethrtime();
258f2dbfd3Robert Mustacchi	return (0);
259f2dbfd3Robert Mustacchi}
260f2dbfd3Robert Mustacchi
261f2dbfd3Robert Mustacchistatic int
2621045e13Robert Mustacchicoretemp_read(void *arg, sensor_ioctl_scalar_t *scalar)
263f2dbfd3Robert Mustacchi{
26401c0c40Robert Mustacchi	coretemp_sensor_t *sensor = arg;
26501c0c40Robert Mustacchi	coretemp_t *ct = sensor->cs_coretemp;
266f2dbfd3Robert Mustacchi	hrtime_t diff;
26701c0c40Robert Mustacchi	uint_t reading, resolution;
268f2dbfd3Robert Mustacchi
269f2dbfd3Robert Mustacchi	mutex_enter(&ct->coretemp_mutex);
27001c0c40Robert Mustacchi	diff = NSEC2MSEC(gethrtime() - sensor->cs_last_read);
271f2dbfd3Robert Mustacchi	if (diff > 0 && diff > (hrtime_t)coretemp_cache_ms) {
272f2dbfd3Robert Mustacchi		int ret;
273f2dbfd3Robert Mustacchi		cmi_hdl_t hdl;
274f2dbfd3Robert Mustacchi
27501c0c40Robert Mustacchi		if ((hdl = cmi_hdl_lookup(sensor->cs_class, sensor->cs_chip,
27601c0c40Robert Mustacchi		    sensor->cs_core, sensor->cs_strand)) == NULL) {
277f2dbfd3Robert Mustacchi			mutex_exit(&ct->coretemp_mutex);
278f2dbfd3Robert Mustacchi			return (ENXIO);
279f2dbfd3Robert Mustacchi		}
28001c0c40Robert Mustacchi		ret = coretemp_update(ct, sensor, hdl);
281f2dbfd3Robert Mustacchi		cmi_hdl_rele(hdl);
282f2dbfd3Robert Mustacchi		if (ret != 0) {
283f2dbfd3Robert Mustacchi			mutex_exit(&ct->coretemp_mutex);
284f2dbfd3Robert Mustacchi			return (ret);
285f2dbfd3Robert Mustacchi		}
286f2dbfd3Robert Mustacchi	}
287f2dbfd3Robert Mustacchi
28801c0c40Robert Mustacchi	switch (sensor->cs_type) {
28901c0c40Robert Mustacchi	case CORETEMP_S_CORE:
29001c0c40Robert Mustacchi		if ((sensor->cs_status & IA32_THERM_STATUS_READ_VALID) == 0) {
29101c0c40Robert Mustacchi			mutex_exit(&ct->coretemp_mutex);
29201c0c40Robert Mustacchi			return (EIO);
29301c0c40Robert Mustacchi		}
29401c0c40Robert Mustacchi		reading = IA32_THERM_STATUS_READING(sensor->cs_status);
29501c0c40Robert Mustacchi		resolution = IA32_THERM_STATUS_RESOLUTION(sensor->cs_status);
29601c0c40Robert Mustacchi		break;
29701c0c40Robert Mustacchi	case CORETEMP_S_SOCKET:
29801c0c40Robert Mustacchi		reading = IA32_PKG_THERM_STATUS_READING(sensor->cs_status);
29901c0c40Robert Mustacchi		resolution = 0;
30001c0c40Robert Mustacchi		break;
30101c0c40Robert Mustacchi	default:
30201c0c40Robert Mustacchi		mutex_exit(&ct->coretemp_mutex);
30301c0c40Robert Mustacchi		return (ENXIO);
304f2dbfd3Robert Mustacchi	}
30501c0c40Robert Mustacchi	if (reading >= sensor->cs_tjmax) {
30601c0c40Robert Mustacchi		dev_err(ct->coretemp_dip, CE_WARN, "!found invalid temperature "
30701c0c40Robert Mustacchi		    "on sensor %s: readout: %u, tjmax: %u, raw: 0x%"
30801c0c40Robert Mustacchi		    PRIx64, sensor->cs_name, reading, sensor->cs_tjmax,
30901c0c40Robert Mustacchi		    sensor->cs_status);
31001c0c40Robert Mustacchi		mutex_exit(&ct->coretemp_mutex);
31101c0c40Robert Mustacchi		return (EIO);
312f2dbfd3Robert Mustacchi	}
31301c0c40Robert Mustacchi	sensor->cs_temperature = sensor->cs_tjmax - reading;
31401c0c40Robert Mustacchi	sensor->cs_resolution = resolution;
315f2dbfd3Robert Mustacchi
3161045e13Robert Mustacchi	scalar->sis_unit = SENSOR_UNIT_CELSIUS;
3171045e13Robert Mustacchi	scalar->sis_value = sensor->cs_temperature;
3181045e13Robert Mustacchi	scalar->sis_gran = CORETEMP_GRANULARITY;
3191045e13Robert Mustacchi	scalar->sis_prec = sensor->cs_resolution;
32001c0c40Robert Mustacchi	mutex_exit(&ct->coretemp_mutex);
321f2dbfd3Robert Mustacchi
322f2dbfd3Robert Mustacchi	return (0);
323f2dbfd3Robert Mustacchi}
324f2dbfd3Robert Mustacchi
32501c0c40Robert Mustacchistatic const ksensor_ops_t coretemp_temp_ops = {
32601c0c40Robert Mustacchi	.kso_kind = ksensor_kind_temperature,
3271045e13Robert Mustacchi	.kso_scalar = coretemp_read
32801c0c40Robert Mustacchi};
329f2dbfd3Robert Mustacchi
330f2dbfd3Robert Mustacchistatic void
331f2dbfd3Robert Mustacchicoretemp_destroy(coretemp_t *ct)
332f2dbfd3Robert Mustacchi{
33301c0c40Robert Mustacchi	coretemp_sensor_t *sensor;
334f2dbfd3Robert Mustacchi
33501c0c40Robert Mustacchi	(void) ksensor_remove(ct->coretemp_dip, KSENSOR_ALL_IDS);
33601c0c40Robert Mustacchi	while ((sensor = list_remove_head(&ct->coretemp_sensors)) != NULL) {
33701c0c40Robert Mustacchi		kmem_free(sensor, sizeof (coretemp_sensor_t));
338f2dbfd3Robert Mustacchi	}
33901c0c40Robert Mustacchi	list_destroy(&ct->coretemp_sensors);
340f2dbfd3Robert Mustacchi
341f2dbfd3Robert Mustacchi	if (ct->coretemp_cpuset != NULL) {
342f2dbfd3Robert Mustacchi		cpuset_free(ct->coretemp_cpuset);
343f2dbfd3Robert Mustacchi	}
344f2dbfd3Robert Mustacchi
345f2dbfd3Robert Mustacchi	mutex_destroy(&ct->coretemp_mutex);
346f2dbfd3Robert Mustacchi	kmem_free(ct, sizeof (coretemp_t));
347f2dbfd3Robert Mustacchi}
348f2dbfd3Robert Mustacchi
34901c0c40Robert Mustacchistatic boolean_t
35001c0c40Robert Mustacchicoretemp_create_sensor(coretemp_t *ct, cmi_hdl_t hdl, uint_t tjmax,
35101c0c40Robert Mustacchi    coretemp_sensor_type_t type)
35201c0c40Robert Mustacchi{
35301c0c40Robert Mustacchi	int err;
35401c0c40Robert Mustacchi	coretemp_sensor_t *sensor;
35501c0c40Robert Mustacchi
35601c0c40Robert Mustacchi	sensor = kmem_zalloc(sizeof (coretemp_sensor_t), KM_SLEEP);
35701c0c40Robert Mustacchi	sensor->cs_coretemp = ct;
35801c0c40Robert Mustacchi	sensor->cs_type = type;
35901c0c40Robert Mustacchi	sensor->cs_class = cmi_hdl_class(hdl);
36001c0c40Robert Mustacchi	sensor->cs_chip = cmi_hdl_chipid(hdl);
36101c0c40Robert Mustacchi	sensor->cs_core = cmi_hdl_coreid(hdl);
36201c0c40Robert Mustacchi	sensor->cs_strand = 0;
36301c0c40Robert Mustacchi	sensor->cs_tjmax = tjmax;
36401c0c40Robert Mustacchi
36501c0c40Robert Mustacchi	switch (sensor->cs_type) {
36601c0c40Robert Mustacchi	case CORETEMP_S_CORE:
36701c0c40Robert Mustacchi		if (snprintf(sensor->cs_name, sizeof (sensor->cs_name),
36801c0c40Robert Mustacchi		    "chip%u.core%u", sensor->cs_chip, sensor->cs_core) >=
36901c0c40Robert Mustacchi		    sizeof (sensor->cs_name)) {
37001c0c40Robert Mustacchi			goto err;
37101c0c40Robert Mustacchi		}
37201c0c40Robert Mustacchi		sensor->cs_status_msr = MSR_IA32_THERM_STATUS;
37301c0c40Robert Mustacchi		sensor->cs_intr_msr = MSR_IA32_THERM_INTERRUPT;
37401c0c40Robert Mustacchi		break;
37501c0c40Robert Mustacchi	case CORETEMP_S_SOCKET:
37601c0c40Robert Mustacchi		if (snprintf(sensor->cs_name, sizeof (sensor->cs_name),
37701c0c40Robert Mustacchi		    "chip%u", sensor->cs_chip) >= sizeof (sensor->cs_name)) {
37801c0c40Robert Mustacchi			goto err;
37901c0c40Robert Mustacchi		}
38001c0c40Robert Mustacchi		sensor->cs_status_msr = MSR_IA32_PACKAGE_THERM_STATUS;
38101c0c40Robert Mustacchi		sensor->cs_intr_msr = MSR_IA32_PACKAGE_THERM_INTERRUPT;
38201c0c40Robert Mustacchi		break;
38301c0c40Robert Mustacchi	}
38401c0c40Robert Mustacchi
38501c0c40Robert Mustacchi	if ((err = ksensor_create(ct->coretemp_dip, &coretemp_temp_ops, sensor,
38601c0c40Robert Mustacchi	    sensor->cs_name, DDI_NT_SENSOR_TEMP_CPU, &sensor->cs_sensor)) !=
38701c0c40Robert Mustacchi	    0) {
38801c0c40Robert Mustacchi		dev_err(ct->coretemp_dip, CE_WARN, "failed to create ksensor "
38901c0c40Robert Mustacchi		    "for %s: %d", sensor->cs_name, err);
39001c0c40Robert Mustacchi	}
39101c0c40Robert Mustacchi
39201c0c40Robert Mustacchi	return (B_TRUE);
39301c0c40Robert Mustacchierr:
39401c0c40Robert Mustacchi	kmem_free(sensor, sizeof (coretemp_sensor_t));
39501c0c40Robert Mustacchi	return (B_FALSE);
39601c0c40Robert Mustacchi}
39701c0c40Robert Mustacchi
398f2dbfd3Robert Mustacchistatic int
39901c0c40Robert Mustacchicoretemp_walk(cmi_hdl_t hdl, void *arg1, void *arg2, void *arg3)
400f2dbfd3Robert Mustacchi{
401f2dbfd3Robert Mustacchi	coretemp_t *ct = arg1;
402f2dbfd3Robert Mustacchi	boolean_t *walkerr = arg2;
40301c0c40Robert Mustacchi	uint_t tjmax;
404f2dbfd3Robert Mustacchi	int err;
405f2dbfd3Robert Mustacchi
406f2dbfd3Robert Mustacchi	/*
407f2dbfd3Robert Mustacchi	 * The temperature sensor only exists on a per-core basis. Therefore we
408f2dbfd3Robert Mustacchi	 * ignore any non-zero strand.
409f2dbfd3Robert Mustacchi	 */
410f2dbfd3Robert Mustacchi	if (cmi_hdl_strandid(hdl) != 0) {
411f2dbfd3Robert Mustacchi		return (CMI_HDL_WALK_NEXT);
412f2dbfd3Robert Mustacchi	}
413f2dbfd3Robert Mustacchi
41401c0c40Robert Mustacchi	if ((err = coretemp_calculate_tjmax(ct, hdl, &tjmax)) != 0) {
415f2dbfd3Robert Mustacchi		dev_err(ct->coretemp_dip, CE_WARN,
41601c0c40Robert Mustacchi		    "failed to read Tj Max on %u/%u: %d", cmi_hdl_chipid(hdl),
41701c0c40Robert Mustacchi		    cmi_hdl_coreid(hdl), err);
418f2dbfd3Robert Mustacchi		*walkerr = B_TRUE;
419f2dbfd3Robert Mustacchi		return (CMI_HDL_WALK_DONE);
420f2dbfd3Robert Mustacchi	}
421f2dbfd3Robert Mustacchi
42201c0c40Robert Mustacchi	if (!coretemp_create_sensor(ct, hdl, tjmax, CORETEMP_S_CORE)) {
423f2dbfd3Robert Mustacchi		*walkerr = B_TRUE;
424f2dbfd3Robert Mustacchi		return (CMI_HDL_WALK_DONE);
425f2dbfd3Robert Mustacchi	}
426f2dbfd3Robert Mustacchi
42701c0c40Robert Mustacchi	if (ct->coretemp_pkg && cmi_hdl_coreid(hdl) == 0 &&
42801c0c40Robert Mustacchi	    !coretemp_create_sensor(ct, hdl, tjmax, CORETEMP_S_SOCKET)) {
42901c0c40Robert Mustacchi		*walkerr = B_TRUE;
43001c0c40Robert Mustacchi		return (CMI_HDL_WALK_DONE);
431f2dbfd3Robert Mustacchi	}
432f2dbfd3Robert Mustacchi
43301c0c40Robert Mustacchi	return (CMI_HDL_WALK_NEXT);
434f2dbfd3Robert Mustacchi}
435f2dbfd3Robert Mustacchi
436f2dbfd3Robert Mustacchistatic int
437f2dbfd3Robert Mustacchicoretemp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
438f2dbfd3Robert Mustacchi{
439f2dbfd3Robert Mustacchi	boolean_t walkerr;
440f2dbfd3Robert Mustacchi	coretemp_t *ct = NULL;
441f2dbfd3Robert Mustacchi
442f2dbfd3Robert Mustacchi	if (cmd == DDI_RESUME) {
443f2dbfd3Robert Mustacchi		return (DDI_SUCCESS);
44401c0c40Robert Mustacchi	} else if (cmd != DDI_ATTACH) {
445f2dbfd3Robert Mustacchi		return (DDI_FAILURE);
446f2dbfd3Robert Mustacchi	}
447f2dbfd3Robert Mustacchi
448f2dbfd3Robert Mustacchi	if (coretemp != NULL) {
449f2dbfd3Robert Mustacchi		return (DDI_FAILURE);
450f2dbfd3Robert Mustacchi	}
451f2dbfd3Robert Mustacchi
452f2dbfd3Robert Mustacchi	ct = kmem_zalloc(sizeof (coretemp_t), KM_SLEEP);
453f2dbfd3Robert Mustacchi	ct->coretemp_dip = dip;
454f2dbfd3Robert Mustacchi	ct->coretemp_pkg = is_x86_feature(x86_featureset, X86FSET_PKG_THERMAL);
45501c0c40Robert Mustacchi	list_create(&ct->coretemp_sensors, sizeof (coretemp_sensor_t),
45601c0c40Robert Mustacchi	    offsetof(coretemp_sensor_t, cs_link));
457f2dbfd3Robert Mustacchi	mutex_init(&ct->coretemp_mutex, NULL, MUTEX_DRIVER, NULL);
458f2dbfd3Robert Mustacchi	ct->coretemp_cpuset = cpuset_alloc(KM_SLEEP);
459f2dbfd3Robert Mustacchi
460f2dbfd3Robert Mustacchi	mutex_enter(&ct->coretemp_mutex);
461f2dbfd3Robert Mustacchi	walkerr = B_FALSE;
46201c0c40Robert Mustacchi	cmi_hdl_walk(coretemp_walk, ct, &walkerr, NULL);
463f2dbfd3Robert Mustacchi
464f2dbfd3Robert Mustacchi	if (walkerr) {
465f2dbfd3Robert Mustacchi		mutex_exit(&ct->coretemp_mutex);
466f2dbfd3Robert Mustacchi		goto fail;
467f2dbfd3Robert Mustacchi	}
468f2dbfd3Robert Mustacchi
469f2dbfd3Robert Mustacchi	coretemp = ct;
470f2dbfd3Robert Mustacchi	mutex_exit(&ct->coretemp_mutex);
471f2dbfd3Robert Mustacchi	return (DDI_SUCCESS);
472f2dbfd3Robert Mustacchifail:
473f2dbfd3Robert Mustacchi	coretemp = NULL;
474f2dbfd3Robert Mustacchi	coretemp_destroy(ct);
475f2dbfd3Robert Mustacchi	return (DDI_FAILURE);
476f2dbfd3Robert Mustacchi
477f2dbfd3Robert Mustacchi}
478f2dbfd3Robert Mustacchi
479f2dbfd3Robert Mustacchistatic int
480f2dbfd3Robert Mustacchicoretemp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
481f2dbfd3Robert Mustacchi{
482f2dbfd3Robert Mustacchi	if (cmd == DDI_SUSPEND) {
483f2dbfd3Robert Mustacchi		return (DDI_SUCCESS);
48401c0c40Robert Mustacchi	} else if (cmd != DDI_DETACH) {
485f2dbfd3Robert Mustacchi		return (DDI_FAILURE);
486f2dbfd3Robert Mustacchi	}
487f2dbfd3Robert Mustacchi
488f2dbfd3Robert Mustacchi	if (coretemp == NULL) {
489f2dbfd3Robert Mustacchi		return (DDI_FAILURE);
490f2dbfd3Robert Mustacchi	}
491f2dbfd3Robert Mustacchi
49201c0c40Robert Mustacchi	coretemp_destroy(coretemp);
493f2dbfd3Robert Mustacchi	coretemp = NULL;
494f2dbfd3Robert Mustacchi
495f2dbfd3Robert Mustacchi	return (DDI_SUCCESS);
496f2dbfd3Robert Mustacchi}
497f2dbfd3Robert Mustacchi
498f2dbfd3Robert Mustacchistatic struct dev_ops coretemp_dev_ops = {
499f2dbfd3Robert Mustacchi	.devo_rev = DEVO_REV,
500f2dbfd3Robert Mustacchi	.devo_refcnt = 0,
50101c0c40Robert Mustacchi	.devo_getinfo = nodev,
502f2dbfd3Robert Mustacchi	.devo_identify = nulldev,
503f2dbfd3Robert Mustacchi	.devo_probe = nulldev,
504f2dbfd3Robert Mustacchi	.devo_attach = coretemp_attach,
505f2dbfd3Robert Mustacchi	.devo_detach = coretemp_detach,
506f2dbfd3Robert Mustacchi	.devo_reset = nodev,
50701c0c40Robert Mustacchi	.devo_quiesce = ddi_quiesce_not_needed
508f2dbfd3Robert Mustacchi};
509f2dbfd3Robert Mustacchi
510f2dbfd3Robert Mustacchistatic struct modldrv coretemp_modldrv = {
511f2dbfd3Robert Mustacchi	.drv_modops = &mod_driverops,
512f2dbfd3Robert Mustacchi	.drv_linkinfo = "Intel CPU/Package thermal sensor",
513f2dbfd3Robert Mustacchi	.drv_dev_ops = &coretemp_dev_ops
514f2dbfd3Robert Mustacchi};
515f2dbfd3Robert Mustacchi
516f2dbfd3Robert Mustacchistatic struct modlinkage coretemp_modlinkage = {
517f2dbfd3Robert Mustacchi	.ml_rev = MODREV_1,
518f2dbfd3Robert Mustacchi	.ml_linkage = { &coretemp_modldrv, NULL }
519f2dbfd3Robert Mustacchi};
520f2dbfd3Robert Mustacchi
521f2dbfd3Robert Mustacchiint
522f2dbfd3Robert Mustacchi_init(void)
523f2dbfd3Robert Mustacchi{
524f2dbfd3Robert Mustacchi	if (!coretemp_supported()) {
525f2dbfd3Robert Mustacchi		return (ENOTSUP);
526f2dbfd3Robert Mustacchi	}
527f2dbfd3Robert Mustacchi
528f2dbfd3Robert Mustacchi	return (mod_install(&coretemp_modlinkage));
529f2dbfd3Robert Mustacchi}
530f2dbfd3Robert Mustacchi
531f2dbfd3Robert Mustacchiint
532f2dbfd3Robert Mustacchi_info(struct modinfo *modinfop)
533f2dbfd3Robert Mustacchi{
534f2dbfd3Robert Mustacchi	return (mod_info(&coretemp_modlinkage, modinfop));
535f2dbfd3Robert Mustacchi}
536f2dbfd3Robert Mustacchi
537f2dbfd3Robert Mustacchiint
538f2dbfd3Robert Mustacchi_fini(void)
539f2dbfd3Robert Mustacchi{
540f2dbfd3Robert Mustacchi	return (mod_remove(&coretemp_modlinkage));
541f2dbfd3Robert Mustacchi}
542