xref: /illumos-gate/usr/src/uts/intel/io/coretemp/coretemp.c (revision f2dbfd322ec9cd157a6e2cd8a53569e718a4b0af)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019, Joyent, Inc.
14  */
15 
16 /*
17  * Intel CPU Thermal sensor driver
18  *
19  * These MSRs that were used were introduced with the 'Core' family processors
20  * and have since spread beyond there, even to the Atom line. Currently,
21  * temperature sensors exist on a per-core basis and optionally on a per-package
22  * basis. The temperature sensor exposes a reading that's relative to the
23  * processor's maximum junction temperature, often referred to as Tj. We
24  * currently only support models where we can determine that junction
25  * temperature programatically. For older processors, we would need to track
26  * down the datasheet. Unfortunately, the values here are often on a per-brand
27  * string basis. As in two CPUs with the same model and stepping, but have
28  * binned differently have different temperatures.
29  *
30  * The temperature is exposed through /dev and uses a semi-standard sensor
31  * framework. We expose one minor node per CPU core and one minor node per CPU
32  * package, if that is supported. Reads are rate-limited in the driver at 100ms
33  * by default per the global variable coretemp_cache_ms.
34  */
35 
36 #include <sys/modctl.h>
37 #include <sys/conf.h>
38 #include <sys/devops.h>
39 #include <sys/types.h>
40 #include <sys/file.h>
41 #include <sys/open.h>
42 #include <sys/stat.h>
43 #include <sys/cred.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/list.h>
47 #include <sys/stddef.h>
48 #include <sys/cmn_err.h>
49 #include <sys/id_space.h>
50 #include <sys/x86_archext.h>
51 #include <sys/cpu_module.h>
52 #include <sys/ontrap.h>
53 #include <sys/cpuvar.h>
54 #include <sys/x_call.h>
55 #include <sys/sensors.h>
56 
57 #define	CORETEMP_MINOR_MIN	1
58 #define	CORETEMP_MINOR_MAX	INT32_MAX
59 
60 typedef struct coretemp_core {
61 	list_node_t		ctc_link;
62 	id_t			ctc_core_minor;
63 	id_t			ctc_pkg_minor;
64 	enum cmi_hdl_class	ctc_class;
65 	uint_t			ctc_chip;
66 	uint_t			ctc_core;
67 	uint_t			ctc_strand;
68 	uint_t			ctc_tjmax;
69 	hrtime_t		ctc_last_read;
70 	uint64_t		ctc_core_status;
71 	uint64_t		ctc_core_intr;
72 	uint64_t		ctc_pkg_status;
73 	uint64_t		ctc_pkg_intr;
74 	uint64_t		ctc_invalid_reads;
75 	/* The following fields are derived from above */
76 	uint_t			ctc_temperature;
77 	uint_t			ctc_resolution;
78 	uint_t			ctc_pkg_temperature;
79 } coretemp_core_t;
80 
81 typedef struct coretemp {
82 	dev_info_t	*coretemp_dip;
83 	id_space_t	*coretemp_ids;
84 	cpuset_t	*coretemp_cpuset;
85 	boolean_t	coretemp_pkg;
86 	kmutex_t	coretemp_mutex;
87 	list_t		coretemp_cores;
88 } coretemp_t;
89 
90 coretemp_t *coretemp;
91 
92 /*
93  * This indicates a number of milliseconds that we should wait between reads.
94  * This is somewhat arbitrary, but the goal is to reduce cross call activity
95  * and reflect that the sensor may not update all the time.
96  */
97 uint_t coretemp_cache_ms = 100;
98 
99 static int
100 coretemp_rdmsr_xc(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
101 {
102 	uint_t msr = (uint_t)arg1;
103 	uint64_t *valp = (uint64_t *)arg2;
104 	cmi_errno_t *errp = (cmi_errno_t *)arg3;
105 
106 	on_trap_data_t otd;
107 
108 	if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
109 		if (checked_rdmsr(msr, valp) == 0) {
110 			*errp = CMI_SUCCESS;
111 		} else {
112 			*errp = CMIERR_NOTSUP;
113 		}
114 	} else {
115 		*errp = CMIERR_MSRGPF;
116 	}
117 	no_trap();
118 
119 	return (0);
120 }
121 
122 /*
123  * This really should just be a call to the CMI handle to provide us the MSR.
124  * However, that routine, cmi_hdl_rdmsr(), cannot be safely used until it is
125  * fixed for use outside of a panic-like context.
126  */
127 static int
128 coretemp_rdmsr(coretemp_t *ct, cmi_hdl_t hdl, uint_t msr, uint64_t *valp)
129 {
130 	id_t cpu = cmi_hdl_logical_id(hdl);
131 	int ret = CMI_SUCCESS;
132 
133 	ASSERT(MUTEX_HELD(&ct->coretemp_mutex));
134 	kpreempt_disable();
135 	if (CPU->cpu_id == cpu) {
136 		(void) coretemp_rdmsr_xc((xc_arg_t)msr, (xc_arg_t)valp,
137 		    (xc_arg_t)&ret);
138 	} else {
139 		cpuset_only(ct->coretemp_cpuset, (uint_t)cpu);
140 		xc_call((xc_arg_t)msr, (xc_arg_t)valp, (xc_arg_t)&ret,
141 		    (ulong_t *)ct->coretemp_cpuset, coretemp_rdmsr_xc);
142 	}
143 	kpreempt_enable();
144 
145 	return (ret);
146 }
147 
148 static int
149 coretemp_cmi_errno(cmi_errno_t e)
150 {
151 	switch (e) {
152 	case CMIERR_NOTSUP:
153 		return (ENOTSUP);
154 	default:
155 		return (EIO);
156 	}
157 }
158 
159 /*
160  * Answer the question of whether or not the driver can support the CPU in
161  * question. Right now we have the following constraints for supporting the CPU:
162  *
163  *   o The CPU is made by Intel
164  *   o The CPU has the Digital Thermal Sensor
165  *   o The CPU family is 6, which is usually implicit from the above
166  *   o We can determine its junction temperature through an MSR
167  *
168  * If we can't determine the junction temperature programatically, then we need
169  * to set up tables of CPUs to do so. This can be fleshed out and improved.
170  */
171 static boolean_t
172 coretemp_supported(void)
173 {
174 	uint_t model;
175 
176 	if (cpuid_getvendor(CPU) != X86_VENDOR_Intel) {
177 		return (B_FALSE);
178 	}
179 
180 	if (!is_x86_feature(x86_featureset, X86FSET_CORE_THERMAL)) {
181 		return (B_FALSE);
182 	}
183 
184 	if (cpuid_getfamily(CPU) != 6) {
185 		return (B_FALSE);
186 	}
187 
188 	model = cpuid_getmodel(CPU);
189 	if (model <= INTC_MODEL_PENRYN || model == INTC_MODEL_SILVERTHORNE ||
190 	    model == INTC_MODEL_LINCROFT || model == INTC_MODEL_PENWELL ||
191 	    model == INTC_MODEL_CLOVERVIEW || model == INTC_MODEL_CEDARVIEW) {
192 		return (B_FALSE);
193 	}
194 
195 	return (B_TRUE);
196 }
197 
198 static coretemp_core_t *
199 coretemp_lookup_core(coretemp_t *ct, minor_t minor)
200 {
201 	coretemp_core_t *ctc;
202 
203 	ASSERT(MUTEX_HELD(&ct->coretemp_mutex));
204 
205 	if (minor < CORETEMP_MINOR_MIN || minor > CORETEMP_MINOR_MAX) {
206 		return (NULL);
207 	}
208 
209 	for (ctc = list_head(&ct->coretemp_cores); ctc != NULL;
210 	    ctc = list_next(&ct->coretemp_cores, ctc)) {
211 		if (ctc->ctc_core_minor == (id_t)minor ||
212 		    (ctc->ctc_pkg_minor >= CORETEMP_MINOR_MIN &&
213 		    ctc->ctc_pkg_minor == (id_t)minor)) {
214 			return (ctc);
215 		}
216 	}
217 
218 	return (NULL);
219 }
220 
221 
222 /*
223  * We need to determine the value of Tj Max as all temperature sensors are
224  * derived from this value. The ease of this depends on how old the processor in
225  * question is. The Core family processors after Penryn have support for an MSR
226  * that tells us what to go for. In the Atom family, processors starting with
227  * Silvermont have support for an MSR that documents this value. For older
228  * processors, one needs to track down the datasheet for a specific processor.
229  * Two processors in the same family/model may have different values of Tj Max.
230  * At the moment, we only support this on processors that have that MSR.
231  */
232 static int
233 coretemp_calculate_tjmax(coretemp_t *ct, coretemp_core_t *ctc, cmi_hdl_t hdl)
234 {
235 	cmi_errno_t e;
236 	int err = 0;
237 	uint64_t val = 0;
238 
239 	e = coretemp_rdmsr(ct, hdl, MSR_TEMPERATURE_TARGET, &val);
240 	if (e == CMI_SUCCESS && val != 0) {
241 		ctc->ctc_tjmax = MSR_TEMPERATURE_TARGET_TARGET(val);
242 	} else if (val == 0) {
243 		err = EINVAL;
244 	} else {
245 		err = coretemp_cmi_errno(e);
246 	}
247 
248 	return (err);
249 }
250 
251 static int
252 coretemp_read(coretemp_t *ct, coretemp_core_t *ctc, cmi_hdl_t hdl)
253 {
254 	cmi_errno_t e;
255 	int err = 0;
256 	uint64_t val = 0;
257 
258 	ctc->ctc_last_read = gethrtime();
259 
260 	e = coretemp_rdmsr(ct, hdl, MSR_IA32_THERM_STATUS, &val);
261 	if (e == CMI_SUCCESS) {
262 		ctc->ctc_core_status = val;
263 	} else {
264 		err = coretemp_cmi_errno(e);
265 		dev_err(ct->coretemp_dip, CE_WARN, "!failed to get core "
266 		    "thermal status on %u/%u: %d", ctc->ctc_chip, ctc->ctc_core,
267 		    err);
268 		return (err);
269 	}
270 
271 	e = coretemp_rdmsr(ct, hdl, MSR_IA32_THERM_INTERRUPT, &val);
272 	if (e == CMI_SUCCESS) {
273 		ctc->ctc_core_intr = val;
274 	} else {
275 		err = coretemp_cmi_errno(e);
276 		dev_err(ct->coretemp_dip, CE_WARN, "!failed to get core "
277 		    "thermal interrupt on %u/%u: %d", ctc->ctc_chip,
278 		    ctc->ctc_core, err);
279 		return (err);
280 	}
281 
282 	/*
283 	 * If the last read wasn't valid, then we should keep the current state.
284 	 */
285 	if ((ctc->ctc_core_status & IA32_THERM_STATUS_READ_VALID) != 0) {
286 		uint_t diff;
287 		diff = IA32_THERM_STATUS_READING(ctc->ctc_core_status);
288 
289 		if (diff >= ctc->ctc_tjmax) {
290 			dev_err(ct->coretemp_dip, CE_WARN, "!found invalid "
291 			    "core temperature on %u/%u: readout: %u, Tjmax: "
292 			    "%u, raw: 0x%" PRIx64, ctc->ctc_chip,
293 			    ctc->ctc_core, diff, ctc->ctc_tjmax,
294 			    ctc->ctc_core_status);
295 			ctc->ctc_invalid_reads++;
296 		} else {
297 			ctc->ctc_temperature = ctc->ctc_tjmax - diff;
298 		}
299 	} else {
300 		ctc->ctc_invalid_reads++;
301 	}
302 
303 	ctc->ctc_resolution =
304 	    IA32_THERM_STATUS_RESOLUTION(ctc->ctc_core_status);
305 
306 	/*
307 	 * If we have package support and this is core zero, then update the
308 	 * package data.
309 	 */
310 	if (ct->coretemp_pkg && ctc->ctc_core == 0) {
311 		uint_t diff;
312 
313 		e = coretemp_rdmsr(ct, hdl, MSR_IA32_PACKAGE_THERM_STATUS,
314 		    &val);
315 		if (e == CMI_SUCCESS) {
316 			ctc->ctc_pkg_status = val;
317 		} else {
318 			err = coretemp_cmi_errno(e);
319 			dev_err(ct->coretemp_dip, CE_WARN, "!failed to get "
320 			    "package thermal status on %u: %d", ctc->ctc_chip,
321 			    err);
322 			return (err);
323 		}
324 
325 		e = coretemp_rdmsr(ct, hdl, MSR_IA32_PACKAGE_THERM_INTERRUPT,
326 		    &val);
327 		if (e == CMI_SUCCESS) {
328 			ctc->ctc_pkg_intr = val;
329 		} else {
330 			err = coretemp_cmi_errno(e);
331 			dev_err(ct->coretemp_dip, CE_WARN, "!failed to get "
332 			    "package thermal interrupt on %u: %d",
333 			    ctc->ctc_chip, err);
334 			return (err);
335 		}
336 
337 		diff = IA32_PKG_THERM_STATUS_READING(ctc->ctc_pkg_status);
338 		if (diff >= ctc->ctc_tjmax) {
339 			dev_err(ct->coretemp_dip, CE_WARN, "!found invalid "
340 			    "package temperature on %u: readout: %u, tjmax: "
341 			    "%u, raw: 0x%" PRIx64, ctc->ctc_chip, diff,
342 			    ctc->ctc_tjmax, ctc->ctc_pkg_status);
343 			ctc->ctc_invalid_reads++;
344 
345 		} else {
346 			ctc->ctc_pkg_temperature = ctc->ctc_tjmax - diff;
347 		}
348 	}
349 
350 	return (0);
351 }
352 
353 static int
354 coretemp_open(dev_t *devp, int flags, int otype, cred_t *credp)
355 {
356 	coretemp_t *ct = coretemp;
357 
358 	if (crgetzoneid(credp) != GLOBAL_ZONEID || drv_priv(credp)) {
359 		return (EPERM);
360 	}
361 
362 	if ((flags & (FEXCL | FNDELAY | FWRITE)) != 0) {
363 		return (EINVAL);
364 	}
365 
366 	if (otype != OTYP_CHR) {
367 		return (EINVAL);
368 	}
369 
370 	/*
371 	 * Sanity check the minor
372 	 */
373 	mutex_enter(&ct->coretemp_mutex);
374 	if (coretemp_lookup_core(ct, getminor(*devp)) == NULL) {
375 		mutex_exit(&ct->coretemp_mutex);
376 		return (ENXIO);
377 	}
378 	mutex_exit(&ct->coretemp_mutex);
379 
380 	return (0);
381 }
382 
383 static int
384 coretemp_ioctl_kind(intptr_t arg, int mode)
385 {
386 	sensor_ioctl_kind_t kind;
387 
388 	bzero(&kind, sizeof (kind));
389 	kind.sik_kind = SENSOR_KIND_TEMPERATURE;
390 
391 	if (ddi_copyout((void *)&kind, (void *)arg, sizeof (kind),
392 	    mode & FKIOCTL) != 0) {
393 		return (EFAULT);
394 	}
395 
396 	return (0);
397 }
398 
399 static int
400 coretemp_ioctl_temp(coretemp_t *ct, minor_t minor, intptr_t arg, int mode)
401 {
402 	coretemp_core_t *ctc;
403 	hrtime_t diff;
404 	sensor_ioctl_temperature_t temp;
405 
406 	bzero(&temp, sizeof (temp));
407 
408 	mutex_enter(&ct->coretemp_mutex);
409 	ctc = coretemp_lookup_core(ct, minor);
410 	if (ctc == NULL) {
411 		mutex_exit(&ct->coretemp_mutex);
412 		return (ENXIO);
413 	}
414 
415 	diff = NSEC2MSEC(gethrtime() - ctc->ctc_last_read);
416 	if (diff > 0 && diff > (hrtime_t)coretemp_cache_ms) {
417 		int ret;
418 		cmi_hdl_t hdl;
419 
420 		if ((hdl = cmi_hdl_lookup(ctc->ctc_class, ctc->ctc_chip,
421 		    ctc->ctc_core, ctc->ctc_strand)) == NULL) {
422 			mutex_exit(&ct->coretemp_mutex);
423 			return (ENXIO);
424 		}
425 		ret = coretemp_read(ct, ctc, hdl);
426 		cmi_hdl_rele(hdl);
427 		if (ret != 0) {
428 			mutex_exit(&ct->coretemp_mutex);
429 			return (ret);
430 		}
431 	}
432 
433 	temp.sit_unit = SENSOR_UNIT_CELSIUS;
434 	if ((id_t)minor == ctc->ctc_core_minor) {
435 		temp.sit_temp = ctc->ctc_temperature;
436 	} else {
437 		temp.sit_temp = ctc->ctc_pkg_temperature;
438 	}
439 
440 	/*
441 	 * The resolution field is in whole units of degrees Celsius.
442 	 */
443 	temp.sit_gran = ctc->ctc_resolution;
444 	if (ctc->ctc_resolution > 1) {
445 		temp.sit_gran *= -1;
446 	}
447 	mutex_exit(&ct->coretemp_mutex);
448 
449 	if (ddi_copyout(&temp, (void *)arg, sizeof (temp),
450 	    mode & FKIOCTL) != 0) {
451 		return (EFAULT);
452 	}
453 
454 	return (0);
455 }
456 
457 static int
458 coretemp_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
459     int *rvalp)
460 {
461 	coretemp_t *ct = coretemp;
462 
463 	if ((mode & FREAD) == 0) {
464 		return (EINVAL);
465 	}
466 
467 	switch (cmd) {
468 	case SENSOR_IOCTL_TYPE:
469 		return (coretemp_ioctl_kind(arg, mode));
470 	case SENSOR_IOCTL_TEMPERATURE:
471 		return (coretemp_ioctl_temp(ct, getminor(dev), arg, mode));
472 	default:
473 		return (ENOTTY);
474 	}
475 }
476 
477 /*
478  * We don't really do any state tracking on close, so for now, just allow it to
479  * always succeed.
480  */
481 static int
482 coretemp_close(dev_t dev, int flags, int otype, cred_t *credp)
483 {
484 	return (0);
485 }
486 
487 static void
488 coretemp_fini_core(coretemp_t *ct, coretemp_core_t *ctc)
489 {
490 	if (ctc->ctc_core_minor > 0)
491 		id_free(ct->coretemp_ids, ctc->ctc_core_minor);
492 	if (ctc->ctc_pkg_minor > 0)
493 		id_free(ct->coretemp_ids, ctc->ctc_pkg_minor);
494 	kmem_free(ctc, sizeof (coretemp_core_t));
495 }
496 
497 static void
498 coretemp_destroy(coretemp_t *ct)
499 {
500 	coretemp_core_t *ctc;
501 
502 	ddi_remove_minor_node(ct->coretemp_dip, NULL);
503 
504 	while ((ctc = list_remove_head(&ct->coretemp_cores)) != NULL) {
505 		coretemp_fini_core(ct, ctc);
506 	}
507 	list_destroy(&ct->coretemp_cores);
508 
509 	if (ct->coretemp_cpuset != NULL) {
510 		cpuset_free(ct->coretemp_cpuset);
511 	}
512 
513 	if (ct->coretemp_ids != NULL) {
514 		id_space_destroy(ct->coretemp_ids);
515 	}
516 
517 	mutex_destroy(&ct->coretemp_mutex);
518 	kmem_free(ct, sizeof (coretemp_t));
519 }
520 
521 static int
522 coretemp_init_core(cmi_hdl_t hdl, void *arg1, void *arg2, void *arg3)
523 {
524 	coretemp_t *ct = arg1;
525 	boolean_t *walkerr = arg2;
526 	coretemp_core_t *ctc;
527 	uint_t chip, core;
528 	int err;
529 
530 	chip = cmi_hdl_chipid(hdl);
531 	core = cmi_hdl_coreid(hdl);
532 
533 	/*
534 	 * The temperature sensor only exists on a per-core basis. Therefore we
535 	 * ignore any non-zero strand.
536 	 */
537 	if (cmi_hdl_strandid(hdl) != 0) {
538 		return (CMI_HDL_WALK_NEXT);
539 	}
540 
541 	ctc = kmem_zalloc(sizeof (coretemp_core_t), KM_SLEEP);
542 	ctc->ctc_class = cmi_hdl_class(hdl);
543 	ctc->ctc_chip = chip;
544 	ctc->ctc_core = core;
545 	ctc->ctc_strand = 0;
546 	ctc->ctc_core_minor = id_alloc(ct->coretemp_ids);
547 	if (ct->coretemp_pkg && ctc->ctc_core == 0) {
548 		ctc->ctc_pkg_minor = id_alloc(ct->coretemp_ids);
549 	}
550 
551 	if ((err = coretemp_calculate_tjmax(ct, ctc, hdl)) != 0) {
552 		dev_err(ct->coretemp_dip, CE_WARN,
553 		    "failed to read Tj Max on %u/%u: %d", chip, core, err);
554 		*walkerr = B_TRUE;
555 		coretemp_fini_core(ct, ctc);
556 		return (CMI_HDL_WALK_DONE);
557 	}
558 
559 	if ((err = coretemp_read(ct, ctc, hdl)) != 0) {
560 		dev_err(ct->coretemp_dip, CE_WARN,
561 		    "failed to take initial temperature reading on %u/%u: %d",
562 		    chip, core, err);
563 		*walkerr = B_TRUE;
564 		coretemp_fini_core(ct, ctc);
565 		return (CMI_HDL_WALK_DONE);
566 	}
567 
568 	list_insert_tail(&ct->coretemp_cores, ctc);
569 
570 	return (CMI_HDL_WALK_NEXT);
571 }
572 
573 static boolean_t
574 coretemp_create_minors(coretemp_t *ct)
575 {
576 	coretemp_core_t *ctc;
577 
578 	for (ctc = list_head(&ct->coretemp_cores); ctc != NULL;
579 	    ctc = list_next(&ct->coretemp_cores, ctc)) {
580 		int ret;
581 		char buf[128];
582 
583 		if (snprintf(buf, sizeof (buf), "chip%u.core%u", ctc->ctc_chip,
584 		    ctc->ctc_core) >= sizeof (buf)) {
585 			return (B_FALSE);
586 		}
587 		ret = ddi_create_minor_node(ct->coretemp_dip, buf, S_IFCHR,
588 		    ctc->ctc_core_minor, DDI_NT_SENSOR_TEMP_CPU, 0);
589 		if (ret != DDI_SUCCESS) {
590 			dev_err(ct->coretemp_dip, CE_WARN, "!failed to create "
591 			    "minor node %s", buf);
592 			return (B_FALSE);
593 		}
594 
595 		if (ctc->ctc_core != 0)
596 			continue;
597 
598 		if (snprintf(buf, sizeof (buf), "chip%u", ctc->ctc_chip) >=
599 		    sizeof (buf)) {
600 			return (B_FALSE);
601 		}
602 
603 		ret = ddi_create_minor_node(ct->coretemp_dip, buf, S_IFCHR,
604 		    ctc->ctc_pkg_minor, DDI_NT_SENSOR_TEMP_CPU, 0);
605 		if (ret != DDI_SUCCESS) {
606 			dev_err(ct->coretemp_dip, CE_WARN, "!failed to create "
607 			    "minor node %s", buf);
608 			return (B_FALSE);
609 		}
610 	}
611 
612 	return (B_TRUE);
613 }
614 
615 static int
616 coretemp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
617 {
618 	boolean_t walkerr;
619 	coretemp_t *ct = NULL;
620 
621 	if (cmd == DDI_RESUME) {
622 		/*
623 		 * Currently suspend and resume for this driver are nops.
624 		 */
625 		return (DDI_SUCCESS);
626 	}
627 
628 	if (cmd != DDI_ATTACH) {
629 		return (DDI_FAILURE);
630 	}
631 
632 	if (coretemp != NULL) {
633 		return (DDI_FAILURE);
634 	}
635 
636 	ct = kmem_zalloc(sizeof (coretemp_t), KM_SLEEP);
637 	ct->coretemp_dip = dip;
638 	ct->coretemp_pkg = is_x86_feature(x86_featureset, X86FSET_PKG_THERMAL);
639 	list_create(&ct->coretemp_cores, sizeof (coretemp_core_t),
640 	    offsetof(coretemp_core_t, ctc_link));
641 	mutex_init(&ct->coretemp_mutex, NULL, MUTEX_DRIVER, NULL);
642 	ct->coretemp_cpuset = cpuset_alloc(KM_SLEEP);
643 	if ((ct->coretemp_ids = id_space_create("coretemp_minors", 1,
644 	    INT32_MAX)) == NULL) {
645 		goto fail;
646 	}
647 
648 	mutex_enter(&ct->coretemp_mutex);
649 	walkerr = B_FALSE;
650 	cmi_hdl_walk(coretemp_init_core, ct, &walkerr, NULL);
651 
652 	if (walkerr) {
653 		mutex_exit(&ct->coretemp_mutex);
654 		goto fail;
655 	}
656 
657 	if (!coretemp_create_minors(ct)) {
658 		mutex_exit(&ct->coretemp_mutex);
659 		goto fail;
660 	}
661 
662 	coretemp = ct;
663 	mutex_exit(&ct->coretemp_mutex);
664 	return (DDI_SUCCESS);
665 fail:
666 	coretemp = NULL;
667 	coretemp_destroy(ct);
668 	return (DDI_FAILURE);
669 
670 }
671 
672 static int
673 coretemp_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
674     void **resultp)
675 {
676 	int ret;
677 
678 	switch (cmd) {
679 	case DDI_INFO_DEVT2DEVINFO:
680 		*resultp = coretemp->coretemp_dip;
681 		ret = DDI_SUCCESS;
682 		break;
683 	case DDI_INFO_DEVT2INSTANCE:
684 		*resultp = (void *)0;
685 		ret = DDI_SUCCESS;
686 		break;
687 	default:
688 		ret = DDI_FAILURE;
689 		break;
690 	}
691 
692 	return (ret);
693 }
694 
695 static int
696 coretemp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
697 {
698 	coretemp_t *ct;
699 
700 	if (cmd == DDI_SUSPEND) {
701 		return (DDI_SUCCESS);
702 	}
703 
704 	if (cmd != DDI_DETACH) {
705 		return (DDI_FAILURE);
706 	}
707 
708 	if (coretemp == NULL) {
709 		return (DDI_FAILURE);
710 	}
711 
712 	ct = coretemp;
713 	coretemp = NULL;
714 	coretemp_destroy(ct);
715 
716 	return (DDI_SUCCESS);
717 }
718 
719 static struct cb_ops coretemp_cb_ops = {
720 	.cb_open = coretemp_open,
721 	.cb_close = coretemp_close,
722 	.cb_strategy = nodev,
723 	.cb_print = nodev,
724 	.cb_dump = nodev,
725 	.cb_read = nodev,
726 	.cb_write = nodev,
727 	.cb_ioctl = coretemp_ioctl,
728 	.cb_devmap = nodev,
729 	.cb_mmap = nodev,
730 	.cb_segmap = nodev,
731 	.cb_chpoll = nochpoll,
732 	.cb_prop_op = ddi_prop_op,
733 	.cb_flag = D_MP,
734 	.cb_rev = CB_REV,
735 	.cb_aread = nodev,
736 	.cb_awrite = nodev
737 };
738 
739 static struct dev_ops coretemp_dev_ops = {
740 	.devo_rev = DEVO_REV,
741 	.devo_refcnt = 0,
742 	.devo_getinfo = coretemp_getinfo,
743 	.devo_identify = nulldev,
744 	.devo_probe = nulldev,
745 	.devo_attach = coretemp_attach,
746 	.devo_detach = coretemp_detach,
747 	.devo_reset = nodev,
748 	.devo_power = ddi_power,
749 	.devo_quiesce = ddi_quiesce_not_needed,
750 	.devo_cb_ops = &coretemp_cb_ops
751 };
752 
753 static struct modldrv coretemp_modldrv = {
754 	.drv_modops = &mod_driverops,
755 	.drv_linkinfo = "Intel CPU/Package thermal sensor",
756 	.drv_dev_ops = &coretemp_dev_ops
757 };
758 
759 static struct modlinkage coretemp_modlinkage = {
760 	.ml_rev = MODREV_1,
761 	.ml_linkage = { &coretemp_modldrv, NULL }
762 };
763 
764 int
765 _init(void)
766 {
767 	if (!coretemp_supported()) {
768 		return (ENOTSUP);
769 	}
770 
771 	return (mod_install(&coretemp_modlinkage));
772 }
773 
774 int
775 _info(struct modinfo *modinfop)
776 {
777 	return (mod_info(&coretemp_modlinkage, modinfop));
778 }
779 
780 int
781 _fini(void)
782 {
783 	return (mod_remove(&coretemp_modlinkage));
784 }
785