1f2dbfd32SRobert Mustacchi /*
2f2dbfd32SRobert Mustacchi  * This file and its contents are supplied under the terms of the
3f2dbfd32SRobert Mustacchi  * Common Development and Distribution License ("CDDL"), version 1.0.
4f2dbfd32SRobert Mustacchi  * You may only use this file in accordance with the terms of version
5f2dbfd32SRobert Mustacchi  * 1.0 of the CDDL.
6f2dbfd32SRobert Mustacchi  *
7f2dbfd32SRobert Mustacchi  * A full copy of the text of the CDDL should have accompanied this
8f2dbfd32SRobert Mustacchi  * source.  A copy of the CDDL is also available via the Internet at
9f2dbfd32SRobert Mustacchi  * http://www.illumos.org/license/CDDL.
10f2dbfd32SRobert Mustacchi  */
11f2dbfd32SRobert Mustacchi 
12f2dbfd32SRobert Mustacchi /*
13f2dbfd32SRobert Mustacchi  * Copyright 2019, Joyent, Inc.
14*173f6047SRobert Mustacchi  * Copyright 2021 Oxide Computer Company
15f2dbfd32SRobert Mustacchi  */
16f2dbfd32SRobert Mustacchi 
17f2dbfd32SRobert Mustacchi /*
18f2dbfd32SRobert Mustacchi  * Intel CPU Thermal sensor driver
19f2dbfd32SRobert Mustacchi  *
20f2dbfd32SRobert Mustacchi  * These MSRs that were used were introduced with the 'Core' family processors
21f2dbfd32SRobert Mustacchi  * and have since spread beyond there, even to the Atom line. Currently,
22f2dbfd32SRobert Mustacchi  * temperature sensors exist on a per-core basis and optionally on a per-package
23f2dbfd32SRobert Mustacchi  * basis. The temperature sensor exposes a reading that's relative to the
24f2dbfd32SRobert Mustacchi  * processor's maximum junction temperature, often referred to as Tj. We
25f2dbfd32SRobert Mustacchi  * currently only support models where we can determine that junction
2601c0c40bSRobert Mustacchi  * temperature programmatically. For older processors, we would need to track
27f2dbfd32SRobert Mustacchi  * down the datasheet. Unfortunately, the values here are often on a per-brand
28f2dbfd32SRobert Mustacchi  * string basis. As in two CPUs with the same model and stepping, but have
29f2dbfd32SRobert Mustacchi  * binned differently have different temperatures.
30f2dbfd32SRobert Mustacchi  *
31f2dbfd32SRobert Mustacchi  * The temperature is exposed through /dev and uses a semi-standard sensor
32f2dbfd32SRobert Mustacchi  * framework. We expose one minor node per CPU core and one minor node per CPU
33f2dbfd32SRobert Mustacchi  * package, if that is supported. Reads are rate-limited in the driver at 100ms
34f2dbfd32SRobert Mustacchi  * by default per the global variable coretemp_cache_ms.
35f2dbfd32SRobert Mustacchi  */
36f2dbfd32SRobert Mustacchi 
37f2dbfd32SRobert Mustacchi #include <sys/modctl.h>
38f2dbfd32SRobert Mustacchi #include <sys/conf.h>
39f2dbfd32SRobert Mustacchi #include <sys/devops.h>
40f2dbfd32SRobert Mustacchi #include <sys/types.h>
41f2dbfd32SRobert Mustacchi #include <sys/file.h>
42f2dbfd32SRobert Mustacchi #include <sys/open.h>
43f2dbfd32SRobert Mustacchi #include <sys/stat.h>
44f2dbfd32SRobert Mustacchi #include <sys/cred.h>
45f2dbfd32SRobert Mustacchi #include <sys/ddi.h>
46f2dbfd32SRobert Mustacchi #include <sys/sunddi.h>
47f2dbfd32SRobert Mustacchi #include <sys/list.h>
48f2dbfd32SRobert Mustacchi #include <sys/stddef.h>
49f2dbfd32SRobert Mustacchi #include <sys/cmn_err.h>
50f2dbfd32SRobert Mustacchi #include <sys/x86_archext.h>
51f2dbfd32SRobert Mustacchi #include <sys/cpu_module.h>
52f2dbfd32SRobert Mustacchi #include <sys/ontrap.h>
53f2dbfd32SRobert Mustacchi #include <sys/cpuvar.h>
54f2dbfd32SRobert Mustacchi #include <sys/x_call.h>
55f2dbfd32SRobert Mustacchi #include <sys/sensors.h>
56f2dbfd32SRobert Mustacchi 
5701c0c40bSRobert Mustacchi /*
5801c0c40bSRobert Mustacchi  * The Intel SDM says that the measurements we get are always in degrees
5901c0c40bSRobert Mustacchi  * Celsius.
6001c0c40bSRobert Mustacchi  */
6101c0c40bSRobert Mustacchi #define	CORETEMP_GRANULARITY	1
6201c0c40bSRobert Mustacchi 
6301c0c40bSRobert Mustacchi typedef enum coretemp_sensor_type {
6401c0c40bSRobert Mustacchi 	CORETEMP_S_CORE,
6501c0c40bSRobert Mustacchi 	CORETEMP_S_SOCKET
6601c0c40bSRobert Mustacchi } coretemp_sensor_type_t;
6701c0c40bSRobert Mustacchi 
6801c0c40bSRobert Mustacchi typedef struct coretemp_sensor {
6901c0c40bSRobert Mustacchi 	list_node_t		cs_link;
7001c0c40bSRobert Mustacchi 	struct coretemp		*cs_coretemp;
7101c0c40bSRobert Mustacchi 	char			cs_name[128];
7201c0c40bSRobert Mustacchi 	id_t			cs_sensor;
7301c0c40bSRobert Mustacchi 	coretemp_sensor_type_t	cs_type;
7401c0c40bSRobert Mustacchi 	enum cmi_hdl_class	cs_class;
7501c0c40bSRobert Mustacchi 	uint_t			cs_chip;
7601c0c40bSRobert Mustacchi 	uint_t			cs_core;
7701c0c40bSRobert Mustacchi 	uint_t			cs_strand;
7801c0c40bSRobert Mustacchi 	uint_t			cs_tjmax;
7901c0c40bSRobert Mustacchi 	uint_t			cs_status_msr;
8001c0c40bSRobert Mustacchi 	uint_t			cs_intr_msr;
8101c0c40bSRobert Mustacchi 	hrtime_t		cs_last_read;
8201c0c40bSRobert Mustacchi 	uint64_t		cs_status;
8301c0c40bSRobert Mustacchi 	uint64_t		cs_intr;
84f2dbfd32SRobert Mustacchi 	/* The following fields are derived from above */
8501c0c40bSRobert Mustacchi 	uint_t			cs_temperature;
8601c0c40bSRobert Mustacchi 	uint_t			cs_resolution;
8701c0c40bSRobert Mustacchi } coretemp_sensor_t;
88f2dbfd32SRobert Mustacchi 
89f2dbfd32SRobert Mustacchi typedef struct coretemp {
90f2dbfd32SRobert Mustacchi 	dev_info_t	*coretemp_dip;
91f2dbfd32SRobert Mustacchi 	cpuset_t	*coretemp_cpuset;
92f2dbfd32SRobert Mustacchi 	boolean_t	coretemp_pkg;
93f2dbfd32SRobert Mustacchi 	kmutex_t	coretemp_mutex;
9401c0c40bSRobert Mustacchi 	list_t		coretemp_sensors;
95f2dbfd32SRobert Mustacchi } coretemp_t;
96f2dbfd32SRobert Mustacchi 
97f2dbfd32SRobert Mustacchi coretemp_t *coretemp;
98f2dbfd32SRobert Mustacchi 
99f2dbfd32SRobert Mustacchi /*
100f2dbfd32SRobert Mustacchi  * This indicates a number of milliseconds that we should wait between reads.
101f2dbfd32SRobert Mustacchi  * This is somewhat arbitrary, but the goal is to reduce cross call activity
102f2dbfd32SRobert Mustacchi  * and reflect that the sensor may not update all the time.
103f2dbfd32SRobert Mustacchi  */
104f2dbfd32SRobert Mustacchi uint_t coretemp_cache_ms = 100;
105f2dbfd32SRobert Mustacchi 
106f2dbfd32SRobert Mustacchi static int
coretemp_rdmsr_xc(xc_arg_t arg1,xc_arg_t arg2,xc_arg_t arg3)107f2dbfd32SRobert Mustacchi coretemp_rdmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
108f2dbfd32SRobert Mustacchi {
109f2dbfd32SRobert Mustacchi 	uint_t msr = (uint_t)arg1;
110f2dbfd32SRobert Mustacchi 	uint64_t *valp = (uint64_t *)arg2;
111f2dbfd32SRobert Mustacchi 	cmi_errno_t *errp = (cmi_errno_t *)arg3;
112f2dbfd32SRobert Mustacchi 
113f2dbfd32SRobert Mustacchi 	on_trap_data_t otd;
114f2dbfd32SRobert Mustacchi 
115f2dbfd32SRobert Mustacchi 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
116f2dbfd32SRobert Mustacchi 		if (checked_rdmsr(msr, valp) == 0) {
117f2dbfd32SRobert Mustacchi 			*errp = CMI_SUCCESS;
118f2dbfd32SRobert Mustacchi 		} else {
119f2dbfd32SRobert Mustacchi 			*errp = CMIERR_NOTSUP;
120f2dbfd32SRobert Mustacchi 		}
121f2dbfd32SRobert Mustacchi 	} else {
122f2dbfd32SRobert Mustacchi 		*errp = CMIERR_MSRGPF;
123f2dbfd32SRobert Mustacchi 	}
124f2dbfd32SRobert Mustacchi 	no_trap();
125f2dbfd32SRobert Mustacchi 
126f2dbfd32SRobert Mustacchi 	return (0);
127f2dbfd32SRobert Mustacchi }
128f2dbfd32SRobert Mustacchi 
129f2dbfd32SRobert Mustacchi /*
130f2dbfd32SRobert Mustacchi  * This really should just be a call to the CMI handle to provide us the MSR.
131f2dbfd32SRobert Mustacchi  * However, that routine, cmi_hdl_rdmsr(), cannot be safely used until it is
132f2dbfd32SRobert Mustacchi  * fixed for use outside of a panic-like context.
133f2dbfd32SRobert Mustacchi  */
134f2dbfd32SRobert Mustacchi static int
coretemp_rdmsr(coretemp_t * ct,cmi_hdl_t hdl,uint_t msr,uint64_t * valp)135f2dbfd32SRobert Mustacchi coretemp_rdmsr(coretemp_t *ct, cmi_hdl_t hdl, uint_t msr, uint64_t *valp)
136f2dbfd32SRobert Mustacchi {
137f2dbfd32SRobert Mustacchi 	id_t cpu = cmi_hdl_logical_id(hdl);
138f2dbfd32SRobert Mustacchi 	int ret = CMI_SUCCESS;
139f2dbfd32SRobert Mustacchi 
140f2dbfd32SRobert Mustacchi 	ASSERT(MUTEX_HELD(&ct->coretemp_mutex));
141f2dbfd32SRobert Mustacchi 	kpreempt_disable();
142f2dbfd32SRobert Mustacchi 	if (CPU->cpu_id == cpu) {
143f2dbfd32SRobert Mustacchi 		(void) coretemp_rdmsr_xc((xc_arg_t)msr, (xc_arg_t)valp,
144f2dbfd32SRobert Mustacchi 		    (xc_arg_t)&ret);
145f2dbfd32SRobert Mustacchi 	} else {
146f2dbfd32SRobert Mustacchi 		cpuset_only(ct->coretemp_cpuset, (uint_t)cpu);
147f2dbfd32SRobert Mustacchi 		xc_call((xc_arg_t)msr, (xc_arg_t)valp, (xc_arg_t)&ret,
148f2dbfd32SRobert Mustacchi 		    (ulong_t *)ct->coretemp_cpuset, coretemp_rdmsr_xc);
149f2dbfd32SRobert Mustacchi 	}
150f2dbfd32SRobert Mustacchi 	kpreempt_enable();
151f2dbfd32SRobert Mustacchi 
152f2dbfd32SRobert Mustacchi 	return (ret);
153f2dbfd32SRobert Mustacchi }
154f2dbfd32SRobert Mustacchi 
155f2dbfd32SRobert Mustacchi static int
coretemp_cmi_errno(cmi_errno_t e)156f2dbfd32SRobert Mustacchi coretemp_cmi_errno(cmi_errno_t e)
157f2dbfd32SRobert Mustacchi {
158f2dbfd32SRobert Mustacchi 	switch (e) {
159f2dbfd32SRobert Mustacchi 	case CMIERR_NOTSUP:
160f2dbfd32SRobert Mustacchi 		return (ENOTSUP);
161f2dbfd32SRobert Mustacchi 	default:
162f2dbfd32SRobert Mustacchi 		return (EIO);
163f2dbfd32SRobert Mustacchi 	}
164f2dbfd32SRobert Mustacchi }
165f2dbfd32SRobert Mustacchi 
166f2dbfd32SRobert Mustacchi /*
167f2dbfd32SRobert Mustacchi  * Answer the question of whether or not the driver can support the CPU in
168f2dbfd32SRobert Mustacchi  * question. Right now we have the following constraints for supporting the CPU:
169f2dbfd32SRobert Mustacchi  *
170f2dbfd32SRobert Mustacchi  *   o The CPU is made by Intel
171f2dbfd32SRobert Mustacchi  *   o The CPU has the Digital Thermal Sensor
172f2dbfd32SRobert Mustacchi  *   o The CPU family is 6, which is usually implicit from the above
173f2dbfd32SRobert Mustacchi  *   o We can determine its junction temperature through an MSR
174f2dbfd32SRobert Mustacchi  *
17501c0c40bSRobert Mustacchi  * If we can't determine the junction temperature programmatically, then we need
176f2dbfd32SRobert Mustacchi  * to set up tables of CPUs to do so. This can be fleshed out and improved.
177f2dbfd32SRobert Mustacchi  */
178f2dbfd32SRobert Mustacchi static boolean_t
coretemp_supported(void)179f2dbfd32SRobert Mustacchi coretemp_supported(void)
180f2dbfd32SRobert Mustacchi {
181f2dbfd32SRobert Mustacchi 	uint_t model;
182f2dbfd32SRobert Mustacchi 
183f2dbfd32SRobert Mustacchi 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel) {
184f2dbfd32SRobert Mustacchi 		return (B_FALSE);
185f2dbfd32SRobert Mustacchi 	}
186f2dbfd32SRobert Mustacchi 
187f2dbfd32SRobert Mustacchi 	if (!is_x86_feature(x86_featureset, X86FSET_CORE_THERMAL)) {
188f2dbfd32SRobert Mustacchi 		return (B_FALSE);
189f2dbfd32SRobert Mustacchi 	}
190f2dbfd32SRobert Mustacchi 
191f2dbfd32SRobert Mustacchi 	if (cpuid_getfamily(CPU) != 6) {
192f2dbfd32SRobert Mustacchi 		return (B_FALSE);
193f2dbfd32SRobert Mustacchi 	}
194f2dbfd32SRobert Mustacchi 
195f2dbfd32SRobert Mustacchi 	model = cpuid_getmodel(CPU);
196f2dbfd32SRobert Mustacchi 	if (model <= INTC_MODEL_PENRYN || model == INTC_MODEL_SILVERTHORNE ||
197f2dbfd32SRobert Mustacchi 	    model == INTC_MODEL_LINCROFT || model == INTC_MODEL_PENWELL ||
198f2dbfd32SRobert Mustacchi 	    model == INTC_MODEL_CLOVERVIEW || model == INTC_MODEL_CEDARVIEW) {
199f2dbfd32SRobert Mustacchi 		return (B_FALSE);
200f2dbfd32SRobert Mustacchi 	}
201f2dbfd32SRobert Mustacchi 
202f2dbfd32SRobert Mustacchi 	return (B_TRUE);
203f2dbfd32SRobert Mustacchi }
204f2dbfd32SRobert Mustacchi 
205f2dbfd32SRobert Mustacchi /*
206f2dbfd32SRobert Mustacchi  * We need to determine the value of Tj Max as all temperature sensors are
207f2dbfd32SRobert Mustacchi  * derived from this value. The ease of this depends on how old the processor in
208f2dbfd32SRobert Mustacchi  * question is. The Core family processors after Penryn have support for an MSR
209f2dbfd32SRobert Mustacchi  * that tells us what to go for. In the Atom family, processors starting with
210f2dbfd32SRobert Mustacchi  * Silvermont have support for an MSR that documents this value. For older
211f2dbfd32SRobert Mustacchi  * processors, one needs to track down the datasheet for a specific processor.
212f2dbfd32SRobert Mustacchi  * Two processors in the same family/model may have different values of Tj Max.
213f2dbfd32SRobert Mustacchi  * At the moment, we only support this on processors that have that MSR.
214f2dbfd32SRobert Mustacchi  */
215f2dbfd32SRobert Mustacchi static int
coretemp_calculate_tjmax(coretemp_t * ct,cmi_hdl_t hdl,uint_t * tjmax)21601c0c40bSRobert Mustacchi coretemp_calculate_tjmax(coretemp_t *ct, cmi_hdl_t hdl, uint_t *tjmax)
217f2dbfd32SRobert Mustacchi {
218f2dbfd32SRobert Mustacchi 	cmi_errno_t e;
219f2dbfd32SRobert Mustacchi 	uint64_t val = 0;
220f2dbfd32SRobert Mustacchi 
221f2dbfd32SRobert Mustacchi 	e = coretemp_rdmsr(ct, hdl, MSR_TEMPERATURE_TARGET, &val);
22201c0c40bSRobert Mustacchi 	if (e != CMI_SUCCESS) {
22301c0c40bSRobert Mustacchi 		return (coretemp_cmi_errno(e));
224f2dbfd32SRobert Mustacchi 	} else if (val == 0) {
22501c0c40bSRobert Mustacchi 		return (EINVAL);
226f2dbfd32SRobert Mustacchi 	}
227f2dbfd32SRobert Mustacchi 
22801c0c40bSRobert Mustacchi 	*tjmax = MSR_TEMPERATURE_TARGET_TARGET(val);
22901c0c40bSRobert Mustacchi 	return (0);
230f2dbfd32SRobert Mustacchi }
231f2dbfd32SRobert Mustacchi 
232f2dbfd32SRobert Mustacchi static int
coretemp_update(coretemp_t * ct,coretemp_sensor_t * sensor,cmi_hdl_t hdl)23301c0c40bSRobert Mustacchi coretemp_update(coretemp_t *ct, coretemp_sensor_t *sensor, cmi_hdl_t hdl)
234f2dbfd32SRobert Mustacchi {
235f2dbfd32SRobert Mustacchi 	cmi_errno_t e;
236f2dbfd32SRobert Mustacchi 	int err = 0;
23701c0c40bSRobert Mustacchi 	uint64_t intr, status;
238f2dbfd32SRobert Mustacchi 
23901c0c40bSRobert Mustacchi 	if ((e = coretemp_rdmsr(ct, hdl, sensor->cs_status_msr, &status)) !=
24001c0c40bSRobert Mustacchi 	    CMI_SUCCESS) {
241f2dbfd32SRobert Mustacchi 		err = coretemp_cmi_errno(e);
24201c0c40bSRobert Mustacchi 		dev_err(ct->coretemp_dip, CE_WARN, "!failed to get thermal "
24301c0c40bSRobert Mustacchi 		    "status on %s: %d", sensor->cs_name, err);
244f2dbfd32SRobert Mustacchi 		return (err);
245f2dbfd32SRobert Mustacchi 	}
246f2dbfd32SRobert Mustacchi 
24701c0c40bSRobert Mustacchi 	if ((e = coretemp_rdmsr(ct, hdl, sensor->cs_intr_msr, &intr)) !=
24801c0c40bSRobert Mustacchi 	    CMI_SUCCESS) {
249f2dbfd32SRobert Mustacchi 		err = coretemp_cmi_errno(e);
25001c0c40bSRobert Mustacchi 		dev_err(ct->coretemp_dip, CE_WARN, "!failed to get thermal "
25101c0c40bSRobert Mustacchi 		    "interrupt on %s: %d", sensor->cs_name, err);
252f2dbfd32SRobert Mustacchi 		return (err);
253f2dbfd32SRobert Mustacchi 	}
254f2dbfd32SRobert Mustacchi 
25501c0c40bSRobert Mustacchi 	sensor->cs_status = status;
25601c0c40bSRobert Mustacchi 	sensor->cs_intr = intr;
25701c0c40bSRobert Mustacchi 	sensor->cs_last_read = gethrtime();
258f2dbfd32SRobert Mustacchi 	return (0);
259f2dbfd32SRobert Mustacchi }
260f2dbfd32SRobert Mustacchi 
261f2dbfd32SRobert Mustacchi static int
coretemp_read(void * arg,sensor_ioctl_scalar_t * scalar)2621045e13aSRobert Mustacchi coretemp_read(void *arg, sensor_ioctl_scalar_t *scalar)
263f2dbfd32SRobert Mustacchi {
26401c0c40bSRobert Mustacchi 	coretemp_sensor_t *sensor = arg;
26501c0c40bSRobert Mustacchi 	coretemp_t *ct = sensor->cs_coretemp;
266f2dbfd32SRobert Mustacchi 	hrtime_t diff;
26701c0c40bSRobert Mustacchi 	uint_t reading, resolution;
268f2dbfd32SRobert Mustacchi 
269f2dbfd32SRobert Mustacchi 	mutex_enter(&ct->coretemp_mutex);
27001c0c40bSRobert Mustacchi 	diff = NSEC2MSEC(gethrtime() - sensor->cs_last_read);
271f2dbfd32SRobert Mustacchi 	if (diff > 0 && diff > (hrtime_t)coretemp_cache_ms) {
272f2dbfd32SRobert Mustacchi 		int ret;
273f2dbfd32SRobert Mustacchi 		cmi_hdl_t hdl;
274f2dbfd32SRobert Mustacchi 
27501c0c40bSRobert Mustacchi 		if ((hdl = cmi_hdl_lookup(sensor->cs_class, sensor->cs_chip,
27601c0c40bSRobert Mustacchi 		    sensor->cs_core, sensor->cs_strand)) == NULL) {
277f2dbfd32SRobert Mustacchi 			mutex_exit(&ct->coretemp_mutex);
278f2dbfd32SRobert Mustacchi 			return (ENXIO);
279f2dbfd32SRobert Mustacchi 		}
28001c0c40bSRobert Mustacchi 		ret = coretemp_update(ct, sensor, hdl);
281f2dbfd32SRobert Mustacchi 		cmi_hdl_rele(hdl);
282f2dbfd32SRobert Mustacchi 		if (ret != 0) {
283f2dbfd32SRobert Mustacchi 			mutex_exit(&ct->coretemp_mutex);
284f2dbfd32SRobert Mustacchi 			return (ret);
285f2dbfd32SRobert Mustacchi 		}
286f2dbfd32SRobert Mustacchi 	}
287f2dbfd32SRobert Mustacchi 
28801c0c40bSRobert Mustacchi 	switch (sensor->cs_type) {
28901c0c40bSRobert Mustacchi 	case CORETEMP_S_CORE:
29001c0c40bSRobert Mustacchi 		if ((sensor->cs_status & IA32_THERM_STATUS_READ_VALID) == 0) {
29101c0c40bSRobert Mustacchi 			mutex_exit(&ct->coretemp_mutex);
29201c0c40bSRobert Mustacchi 			return (EIO);
29301c0c40bSRobert Mustacchi 		}
29401c0c40bSRobert Mustacchi 		reading = IA32_THERM_STATUS_READING(sensor->cs_status);
29501c0c40bSRobert Mustacchi 		resolution = IA32_THERM_STATUS_RESOLUTION(sensor->cs_status);
29601c0c40bSRobert Mustacchi 		break;
29701c0c40bSRobert Mustacchi 	case CORETEMP_S_SOCKET:
29801c0c40bSRobert Mustacchi 		reading = IA32_PKG_THERM_STATUS_READING(sensor->cs_status);
29901c0c40bSRobert Mustacchi 		resolution = 0;
30001c0c40bSRobert Mustacchi 		break;
30101c0c40bSRobert Mustacchi 	default:
30201c0c40bSRobert Mustacchi 		mutex_exit(&ct->coretemp_mutex);
30301c0c40bSRobert Mustacchi 		return (ENXIO);
304f2dbfd32SRobert Mustacchi 	}
30501c0c40bSRobert Mustacchi 	if (reading >= sensor->cs_tjmax) {
30601c0c40bSRobert Mustacchi 		dev_err(ct->coretemp_dip, CE_WARN, "!found invalid temperature "
30701c0c40bSRobert Mustacchi 		    "on sensor %s: readout: %u, tjmax: %u, raw: 0x%"
30801c0c40bSRobert Mustacchi 		    PRIx64, sensor->cs_name, reading, sensor->cs_tjmax,
30901c0c40bSRobert Mustacchi 		    sensor->cs_status);
31001c0c40bSRobert Mustacchi 		mutex_exit(&ct->coretemp_mutex);
31101c0c40bSRobert Mustacchi 		return (EIO);
312f2dbfd32SRobert Mustacchi 	}
31301c0c40bSRobert Mustacchi 	sensor->cs_temperature = sensor->cs_tjmax - reading;
31401c0c40bSRobert Mustacchi 	sensor->cs_resolution = resolution;
315f2dbfd32SRobert Mustacchi 
3161045e13aSRobert Mustacchi 	scalar->sis_unit = SENSOR_UNIT_CELSIUS;
3171045e13aSRobert Mustacchi 	scalar->sis_value = sensor->cs_temperature;
3181045e13aSRobert Mustacchi 	scalar->sis_gran = CORETEMP_GRANULARITY;
3191045e13aSRobert Mustacchi 	scalar->sis_prec = sensor->cs_resolution;
32001c0c40bSRobert Mustacchi 	mutex_exit(&ct->coretemp_mutex);
321f2dbfd32SRobert Mustacchi 
322f2dbfd32SRobert Mustacchi 	return (0);
323f2dbfd32SRobert Mustacchi }
324f2dbfd32SRobert Mustacchi 
32501c0c40bSRobert Mustacchi static const ksensor_ops_t coretemp_temp_ops = {
32601c0c40bSRobert Mustacchi 	.kso_kind = ksensor_kind_temperature,
3271045e13aSRobert Mustacchi 	.kso_scalar = coretemp_read
32801c0c40bSRobert Mustacchi };
329f2dbfd32SRobert Mustacchi 
330f2dbfd32SRobert Mustacchi static void
coretemp_destroy(coretemp_t * ct)331f2dbfd32SRobert Mustacchi coretemp_destroy(coretemp_t *ct)
332f2dbfd32SRobert Mustacchi {
33301c0c40bSRobert Mustacchi 	coretemp_sensor_t *sensor;
334f2dbfd32SRobert Mustacchi 
33501c0c40bSRobert Mustacchi 	(void) ksensor_remove(ct->coretemp_dip, KSENSOR_ALL_IDS);
33601c0c40bSRobert Mustacchi 	while ((sensor = list_remove_head(&ct->coretemp_sensors)) != NULL) {
33701c0c40bSRobert Mustacchi 		kmem_free(sensor, sizeof (coretemp_sensor_t));
338f2dbfd32SRobert Mustacchi 	}
33901c0c40bSRobert Mustacchi 	list_destroy(&ct->coretemp_sensors);
340f2dbfd32SRobert Mustacchi 
341f2dbfd32SRobert Mustacchi 	if (ct->coretemp_cpuset != NULL) {
342f2dbfd32SRobert Mustacchi 		cpuset_free(ct->coretemp_cpuset);
343f2dbfd32SRobert Mustacchi 	}
344f2dbfd32SRobert Mustacchi 
345f2dbfd32SRobert Mustacchi 	mutex_destroy(&ct->coretemp_mutex);
346f2dbfd32SRobert Mustacchi 	kmem_free(ct, sizeof (coretemp_t));
347f2dbfd32SRobert Mustacchi }
348f2dbfd32SRobert Mustacchi 
34901c0c40bSRobert Mustacchi static boolean_t
coretemp_create_sensor(coretemp_t * ct,cmi_hdl_t hdl,uint_t tjmax,coretemp_sensor_type_t type)35001c0c40bSRobert Mustacchi coretemp_create_sensor(coretemp_t *ct, cmi_hdl_t hdl, uint_t tjmax,
35101c0c40bSRobert Mustacchi     coretemp_sensor_type_t type)
35201c0c40bSRobert Mustacchi {
35301c0c40bSRobert Mustacchi 	int err;
35401c0c40bSRobert Mustacchi 	coretemp_sensor_t *sensor;
35501c0c40bSRobert Mustacchi 
35601c0c40bSRobert Mustacchi 	sensor = kmem_zalloc(sizeof (coretemp_sensor_t), KM_SLEEP);
35701c0c40bSRobert Mustacchi 	sensor->cs_coretemp = ct;
35801c0c40bSRobert Mustacchi 	sensor->cs_type = type;
35901c0c40bSRobert Mustacchi 	sensor->cs_class = cmi_hdl_class(hdl);
36001c0c40bSRobert Mustacchi 	sensor->cs_chip = cmi_hdl_chipid(hdl);
36101c0c40bSRobert Mustacchi 	sensor->cs_core = cmi_hdl_coreid(hdl);
36201c0c40bSRobert Mustacchi 	sensor->cs_strand = 0;
36301c0c40bSRobert Mustacchi 	sensor->cs_tjmax = tjmax;
36401c0c40bSRobert Mustacchi 
36501c0c40bSRobert Mustacchi 	switch (sensor->cs_type) {
36601c0c40bSRobert Mustacchi 	case CORETEMP_S_CORE:
36701c0c40bSRobert Mustacchi 		if (snprintf(sensor->cs_name, sizeof (sensor->cs_name),
36801c0c40bSRobert Mustacchi 		    "chip%u.core%u", sensor->cs_chip, sensor->cs_core) >=
36901c0c40bSRobert Mustacchi 		    sizeof (sensor->cs_name)) {
37001c0c40bSRobert Mustacchi 			goto err;
37101c0c40bSRobert Mustacchi 		}
37201c0c40bSRobert Mustacchi 		sensor->cs_status_msr = MSR_IA32_THERM_STATUS;
37301c0c40bSRobert Mustacchi 		sensor->cs_intr_msr = MSR_IA32_THERM_INTERRUPT;
37401c0c40bSRobert Mustacchi 		break;
37501c0c40bSRobert Mustacchi 	case CORETEMP_S_SOCKET:
37601c0c40bSRobert Mustacchi 		if (snprintf(sensor->cs_name, sizeof (sensor->cs_name),
37701c0c40bSRobert Mustacchi 		    "chip%u", sensor->cs_chip) >= sizeof (sensor->cs_name)) {
37801c0c40bSRobert Mustacchi 			goto err;
37901c0c40bSRobert Mustacchi 		}
38001c0c40bSRobert Mustacchi 		sensor->cs_status_msr = MSR_IA32_PACKAGE_THERM_STATUS;
38101c0c40bSRobert Mustacchi 		sensor->cs_intr_msr = MSR_IA32_PACKAGE_THERM_INTERRUPT;
38201c0c40bSRobert Mustacchi 		break;
38301c0c40bSRobert Mustacchi 	}
38401c0c40bSRobert Mustacchi 
38501c0c40bSRobert Mustacchi 	if ((err = ksensor_create(ct->coretemp_dip, &coretemp_temp_ops, sensor,
38601c0c40bSRobert Mustacchi 	    sensor->cs_name, DDI_NT_SENSOR_TEMP_CPU, &sensor->cs_sensor)) !=
38701c0c40bSRobert Mustacchi 	    0) {
38801c0c40bSRobert Mustacchi 		dev_err(ct->coretemp_dip, CE_WARN, "failed to create ksensor "
38901c0c40bSRobert Mustacchi 		    "for %s: %d", sensor->cs_name, err);
39001c0c40bSRobert Mustacchi 	}
39101c0c40bSRobert Mustacchi 
392*173f6047SRobert Mustacchi 	ASSERT(MUTEX_HELD(&ct->coretemp_mutex));
393*173f6047SRobert Mustacchi 	list_insert_tail(&ct->coretemp_sensors, sensor);
394*173f6047SRobert Mustacchi 
39501c0c40bSRobert Mustacchi 	return (B_TRUE);
39601c0c40bSRobert Mustacchi err:
39701c0c40bSRobert Mustacchi 	kmem_free(sensor, sizeof (coretemp_sensor_t));
39801c0c40bSRobert Mustacchi 	return (B_FALSE);
39901c0c40bSRobert Mustacchi }
40001c0c40bSRobert Mustacchi 
401f2dbfd32SRobert Mustacchi static int
coretemp_walk(cmi_hdl_t hdl,void * arg1,void * arg2,void * arg3)40201c0c40bSRobert Mustacchi coretemp_walk(cmi_hdl_t hdl, void *arg1, void *arg2, void *arg3)
403f2dbfd32SRobert Mustacchi {
404f2dbfd32SRobert Mustacchi 	coretemp_t *ct = arg1;
405f2dbfd32SRobert Mustacchi 	boolean_t *walkerr = arg2;
40601c0c40bSRobert Mustacchi 	uint_t tjmax;
407f2dbfd32SRobert Mustacchi 	int err;
408f2dbfd32SRobert Mustacchi 
409f2dbfd32SRobert Mustacchi 	/*
410f2dbfd32SRobert Mustacchi 	 * The temperature sensor only exists on a per-core basis. Therefore we
411f2dbfd32SRobert Mustacchi 	 * ignore any non-zero strand.
412f2dbfd32SRobert Mustacchi 	 */
413f2dbfd32SRobert Mustacchi 	if (cmi_hdl_strandid(hdl) != 0) {
414f2dbfd32SRobert Mustacchi 		return (CMI_HDL_WALK_NEXT);
415f2dbfd32SRobert Mustacchi 	}
416f2dbfd32SRobert Mustacchi 
41701c0c40bSRobert Mustacchi 	if ((err = coretemp_calculate_tjmax(ct, hdl, &tjmax)) != 0) {
418f2dbfd32SRobert Mustacchi 		dev_err(ct->coretemp_dip, CE_WARN,
41901c0c40bSRobert Mustacchi 		    "failed to read Tj Max on %u/%u: %d", cmi_hdl_chipid(hdl),
42001c0c40bSRobert Mustacchi 		    cmi_hdl_coreid(hdl), err);
421f2dbfd32SRobert Mustacchi 		*walkerr = B_TRUE;
422f2dbfd32SRobert Mustacchi 		return (CMI_HDL_WALK_DONE);
423f2dbfd32SRobert Mustacchi 	}
424f2dbfd32SRobert Mustacchi 
42501c0c40bSRobert Mustacchi 	if (!coretemp_create_sensor(ct, hdl, tjmax, CORETEMP_S_CORE)) {
426f2dbfd32SRobert Mustacchi 		*walkerr = B_TRUE;
427f2dbfd32SRobert Mustacchi 		return (CMI_HDL_WALK_DONE);
428f2dbfd32SRobert Mustacchi 	}
429f2dbfd32SRobert Mustacchi 
43001c0c40bSRobert Mustacchi 	if (ct->coretemp_pkg && cmi_hdl_coreid(hdl) == 0 &&
43101c0c40bSRobert Mustacchi 	    !coretemp_create_sensor(ct, hdl, tjmax, CORETEMP_S_SOCKET)) {
43201c0c40bSRobert Mustacchi 		*walkerr = B_TRUE;
43301c0c40bSRobert Mustacchi 		return (CMI_HDL_WALK_DONE);
434f2dbfd32SRobert Mustacchi 	}
435f2dbfd32SRobert Mustacchi 
43601c0c40bSRobert Mustacchi 	return (CMI_HDL_WALK_NEXT);
437f2dbfd32SRobert Mustacchi }
438f2dbfd32SRobert Mustacchi 
439f2dbfd32SRobert Mustacchi static int
coretemp_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)440f2dbfd32SRobert Mustacchi coretemp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
441f2dbfd32SRobert Mustacchi {
442f2dbfd32SRobert Mustacchi 	boolean_t walkerr;
443f2dbfd32SRobert Mustacchi 	coretemp_t *ct = NULL;
444f2dbfd32SRobert Mustacchi 
445f2dbfd32SRobert Mustacchi 	if (cmd == DDI_RESUME) {
446f2dbfd32SRobert Mustacchi 		return (DDI_SUCCESS);
44701c0c40bSRobert Mustacchi 	} else if (cmd != DDI_ATTACH) {
448f2dbfd32SRobert Mustacchi 		return (DDI_FAILURE);
449f2dbfd32SRobert Mustacchi 	}
450f2dbfd32SRobert Mustacchi 
451f2dbfd32SRobert Mustacchi 	if (coretemp != NULL) {
452f2dbfd32SRobert Mustacchi 		return (DDI_FAILURE);
453f2dbfd32SRobert Mustacchi 	}
454f2dbfd32SRobert Mustacchi 
455f2dbfd32SRobert Mustacchi 	ct = kmem_zalloc(sizeof (coretemp_t), KM_SLEEP);
456f2dbfd32SRobert Mustacchi 	ct->coretemp_dip = dip;
457f2dbfd32SRobert Mustacchi 	ct->coretemp_pkg = is_x86_feature(x86_featureset, X86FSET_PKG_THERMAL);
45801c0c40bSRobert Mustacchi 	list_create(&ct->coretemp_sensors, sizeof (coretemp_sensor_t),
45901c0c40bSRobert Mustacchi 	    offsetof(coretemp_sensor_t, cs_link));
460f2dbfd32SRobert Mustacchi 	mutex_init(&ct->coretemp_mutex, NULL, MUTEX_DRIVER, NULL);
461f2dbfd32SRobert Mustacchi 	ct->coretemp_cpuset = cpuset_alloc(KM_SLEEP);
462f2dbfd32SRobert Mustacchi 
463f2dbfd32SRobert Mustacchi 	mutex_enter(&ct->coretemp_mutex);
464f2dbfd32SRobert Mustacchi 	walkerr = B_FALSE;
46501c0c40bSRobert Mustacchi 	cmi_hdl_walk(coretemp_walk, ct, &walkerr, NULL);
466f2dbfd32SRobert Mustacchi 
467f2dbfd32SRobert Mustacchi 	if (walkerr) {
468f2dbfd32SRobert Mustacchi 		mutex_exit(&ct->coretemp_mutex);
469f2dbfd32SRobert Mustacchi 		goto fail;
470f2dbfd32SRobert Mustacchi 	}
471f2dbfd32SRobert Mustacchi 
472f2dbfd32SRobert Mustacchi 	coretemp = ct;
473f2dbfd32SRobert Mustacchi 	mutex_exit(&ct->coretemp_mutex);
474f2dbfd32SRobert Mustacchi 	return (DDI_SUCCESS);
475f2dbfd32SRobert Mustacchi fail:
476f2dbfd32SRobert Mustacchi 	coretemp = NULL;
477f2dbfd32SRobert Mustacchi 	coretemp_destroy(ct);
478f2dbfd32SRobert Mustacchi 	return (DDI_FAILURE);
479f2dbfd32SRobert Mustacchi 
480f2dbfd32SRobert Mustacchi }
481f2dbfd32SRobert Mustacchi 
482f2dbfd32SRobert Mustacchi static int
coretemp_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)483f2dbfd32SRobert Mustacchi coretemp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
484f2dbfd32SRobert Mustacchi {
485f2dbfd32SRobert Mustacchi 	if (cmd == DDI_SUSPEND) {
486f2dbfd32SRobert Mustacchi 		return (DDI_SUCCESS);
48701c0c40bSRobert Mustacchi 	} else if (cmd != DDI_DETACH) {
488f2dbfd32SRobert Mustacchi 		return (DDI_FAILURE);
489f2dbfd32SRobert Mustacchi 	}
490f2dbfd32SRobert Mustacchi 
491f2dbfd32SRobert Mustacchi 	if (coretemp == NULL) {
492f2dbfd32SRobert Mustacchi 		return (DDI_FAILURE);
493f2dbfd32SRobert Mustacchi 	}
494f2dbfd32SRobert Mustacchi 
49501c0c40bSRobert Mustacchi 	coretemp_destroy(coretemp);
496f2dbfd32SRobert Mustacchi 	coretemp = NULL;
497f2dbfd32SRobert Mustacchi 
498f2dbfd32SRobert Mustacchi 	return (DDI_SUCCESS);
499f2dbfd32SRobert Mustacchi }
500f2dbfd32SRobert Mustacchi 
501f2dbfd32SRobert Mustacchi static struct dev_ops coretemp_dev_ops = {
502f2dbfd32SRobert Mustacchi 	.devo_rev = DEVO_REV,
503f2dbfd32SRobert Mustacchi 	.devo_refcnt = 0,
50401c0c40bSRobert Mustacchi 	.devo_getinfo = nodev,
505f2dbfd32SRobert Mustacchi 	.devo_identify = nulldev,
506f2dbfd32SRobert Mustacchi 	.devo_probe = nulldev,
507f2dbfd32SRobert Mustacchi 	.devo_attach = coretemp_attach,
508f2dbfd32SRobert Mustacchi 	.devo_detach = coretemp_detach,
509f2dbfd32SRobert Mustacchi 	.devo_reset = nodev,
51001c0c40bSRobert Mustacchi 	.devo_quiesce = ddi_quiesce_not_needed
511f2dbfd32SRobert Mustacchi };
512f2dbfd32SRobert Mustacchi 
513f2dbfd32SRobert Mustacchi static struct modldrv coretemp_modldrv = {
514f2dbfd32SRobert Mustacchi 	.drv_modops = &mod_driverops,
515f2dbfd32SRobert Mustacchi 	.drv_linkinfo = "Intel CPU/Package thermal sensor",
516f2dbfd32SRobert Mustacchi 	.drv_dev_ops = &coretemp_dev_ops
517f2dbfd32SRobert Mustacchi };
518f2dbfd32SRobert Mustacchi 
519f2dbfd32SRobert Mustacchi static struct modlinkage coretemp_modlinkage = {
520f2dbfd32SRobert Mustacchi 	.ml_rev = MODREV_1,
521f2dbfd32SRobert Mustacchi 	.ml_linkage = { &coretemp_modldrv, NULL }
522f2dbfd32SRobert Mustacchi };
523f2dbfd32SRobert Mustacchi 
524f2dbfd32SRobert Mustacchi int
_init(void)525f2dbfd32SRobert Mustacchi _init(void)
526f2dbfd32SRobert Mustacchi {
527f2dbfd32SRobert Mustacchi 	if (!coretemp_supported()) {
528f2dbfd32SRobert Mustacchi 		return (ENOTSUP);
529f2dbfd32SRobert Mustacchi 	}
530f2dbfd32SRobert Mustacchi 
531f2dbfd32SRobert Mustacchi 	return (mod_install(&coretemp_modlinkage));
532f2dbfd32SRobert Mustacchi }
533f2dbfd32SRobert Mustacchi 
534f2dbfd32SRobert Mustacchi int
_info(struct modinfo * modinfop)535f2dbfd32SRobert Mustacchi _info(struct modinfo *modinfop)
536f2dbfd32SRobert Mustacchi {
537f2dbfd32SRobert Mustacchi 	return (mod_info(&coretemp_modlinkage, modinfop));
538f2dbfd32SRobert Mustacchi }
539f2dbfd32SRobert Mustacchi 
540f2dbfd32SRobert Mustacchi int
_fini(void)541f2dbfd32SRobert Mustacchi _fini(void)
542f2dbfd32SRobert Mustacchi {
543f2dbfd32SRobert Mustacchi 	return (mod_remove(&coretemp_modlinkage));
544f2dbfd32SRobert Mustacchi }
545