1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2019 Robert Mustacchi
14 * Copyright 2023 Oxide Computer Company
15 */
16
17 /*
18 * AMD Northbridge CPU Temperature Driver
19 *
20 * The AMD northbridge CPU temperature driver supports the temperature sensor
21 * that was found on the AMD northbridge on AMD CPUs from approximately AMD
22 * Family 10h to Family 16h. For Zen and newer processors (Family 17h+) see the
23 * 'smntemp' driver.
24 *
25 * The temperature is stored on the 'miscellaneous' device on the northbridge.
26 * This is always found at PCI Device 18h, Function 3h. When there is more than
27 * one 'node' (see cpuid.c for the AMD parlance), then the node id is added to
28 * the device to create a unique device. This allows us to map the given PCI
29 * device we find back to the corresponding CPU.
30 *
31 * While all family 10h, 11h, 12h, 14h, and 16h CPUs are supported, not all
32 * family 15h CPUs are. Models 60h+ require the SMN interface, which this does
33 * not know how to consume.
34 */
35
36 #include <sys/modctl.h>
37 #include <sys/conf.h>
38 #include <sys/devops.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/cmn_err.h>
44 #include <sys/pci.h>
45 #include <sys/stddef.h>
46 #include <sys/cpuvar.h>
47 #include <sys/x86_archext.h>
48 #include <sys/list.h>
49 #include <sys/bitset.h>
50 #include <sys/sensors.h>
51
52 /*
53 * This register offset, in PCI config space, has the current temperature of the
54 * device.
55 */
56 #define AMDNBTEMP_TEMPREG 0xa4
57 #define AMDNBTEMP_TEMPREG_CURTMP(x) BITX(x, 31, 21)
58 #define AMDNBTEMP_TEMPREG_TJSEL(x) BITX(x, 17, 16)
59
60 /*
61 * Each bit in the temperature range represents 1/8th of a degree C.
62 */
63 #define AMDNBTEMP_GRANULARITY 8
64 #define AMDNBTEMP_GSHIFT 3
65
66 /*
67 * If the value of the current CurTmpTjSel is set to three, then the range that
68 * the data is in is shifted by -49 degrees. In this mode, the bottom two bits
69 * always read as zero.
70 */
71 #define AMDNBTEMP_TJSEL_ADJUST 0x3
72 #define AMDNBTEMP_TEMP_ADJUST (49 << AMDNBTEMP_GSHIFT)
73
74 /*
75 * There are a variable number of northbridges that exist in the system. The AMD
76 * BIOS and Kernel Developer's Guide (BKDG) says that for these families, the
77 * first node has a device of 0x18. This means that node 7, the maximum, has a
78 * device of 0x1f.
79 */
80 #define AMDNBTEMP_FIRST_DEV 0x18
81
82 typedef enum andnbtemp_state {
83 AMDNBTEMP_S_CFGSPACE = 1 << 0,
84 AMDNBTEMP_S_MUTEX = 1 << 1,
85 AMDNBTMEP_S_KSENSOR = 1 << 2
86 } amdnbtemp_state_t;
87
88 typedef struct amdnbtemp {
89 amdnbtemp_state_t at_state;
90 dev_info_t *at_dip;
91 ddi_acc_handle_t at_cfgspace;
92 uint_t at_bus;
93 uint_t at_dev;
94 uint_t at_func;
95 id_t at_ksensor;
96 minor_t at_minor;
97 boolean_t at_tjsel;
98 kmutex_t at_mutex;
99 uint32_t at_raw;
100 int64_t at_temp;
101 } amdnbtemp_t;
102
103 static void *amdnbtemp_state;
104
105 static int
amdnbtemp_read(void * arg,sensor_ioctl_scalar_t * scalar)106 amdnbtemp_read(void *arg, sensor_ioctl_scalar_t *scalar)
107 {
108 amdnbtemp_t *at = arg;
109
110 mutex_enter(&at->at_mutex);
111 at->at_raw = pci_config_get32(at->at_cfgspace, AMDNBTEMP_TEMPREG);
112 if (at->at_raw == PCI_EINVAL32) {
113 mutex_exit(&at->at_mutex);
114 return (EIO);
115 }
116
117 at->at_temp = AMDNBTEMP_TEMPREG_CURTMP(at->at_raw);
118 if (at->at_tjsel &&
119 AMDNBTEMP_TEMPREG_TJSEL(at->at_raw) == AMDNBTEMP_TJSEL_ADJUST) {
120 at->at_temp -= AMDNBTEMP_TEMP_ADJUST;
121 }
122
123 scalar->sis_unit = SENSOR_UNIT_CELSIUS;
124 scalar->sis_gran = AMDNBTEMP_GRANULARITY;
125 scalar->sis_value = at->at_temp;
126 mutex_exit(&at->at_mutex);
127
128 return (0);
129 }
130
131 static const ksensor_ops_t amdnbtemp_temp_ops = {
132 .kso_kind = ksensor_kind_temperature,
133 .kso_scalar = amdnbtemp_read
134 };
135
136 static void
amdnbtemp_cleanup(amdnbtemp_t * at)137 amdnbtemp_cleanup(amdnbtemp_t *at)
138 {
139 int inst;
140 inst = ddi_get_instance(at->at_dip);
141
142 if ((at->at_state & AMDNBTMEP_S_KSENSOR) != 0) {
143 (void) ksensor_remove(at->at_dip, KSENSOR_ALL_IDS);
144 at->at_state &= ~AMDNBTMEP_S_KSENSOR;
145 }
146
147 if ((at->at_state & AMDNBTEMP_S_MUTEX) != 0) {
148 mutex_destroy(&at->at_mutex);
149 at->at_state &= ~AMDNBTEMP_S_MUTEX;
150 }
151
152 if ((at->at_state & AMDNBTEMP_S_CFGSPACE) != 0) {
153 pci_config_teardown(&at->at_cfgspace);
154 at->at_state &= ~AMDNBTEMP_S_CFGSPACE;
155 }
156
157 ASSERT0(at->at_state);
158 ddi_soft_state_free(amdnbtemp_state, inst);
159 }
160
161 /*
162 * For several family 10h processors, certain models have an erratum which says
163 * that temperature information is unreliable. If we're on a platform that is
164 * subject to this erratum, do not attach to the device.
165 */
166 static boolean_t
amdnbtemp_erratum_319(void)167 amdnbtemp_erratum_319(void)
168 {
169 uint32_t socket;
170
171 if (cpuid_getfamily(CPU) != 0x10) {
172 return (B_FALSE);
173 }
174
175 /*
176 * All Family 10h socket F parts are impacted. Socket AM2 parts are all
177 * impacted. The family 10h socket bits in cpuid share the same bit for
178 * socket AM2 and AM3. If you look at the erratum description, they use
179 * information about the memory controller to do DDR2/DDR3
180 * disambiguation to determine whether it's socket AM2 or AM3. Our cpuid
181 * subroutines already do the DDR2/DDR3 disambiguation so we can just
182 * check the socket type as the disambiguation has already been done.
183 */
184 socket = cpuid_getsockettype(CPU);
185 if (socket == X86_SOCKET_F1207 || socket == X86_SOCKET_AM2R2) {
186 return (B_TRUE);
187 }
188
189 return (B_FALSE);
190 }
191
192 static int
amdnbtemp_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)193 amdnbtemp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
194 {
195 int inst, *regs, ret;
196 amdnbtemp_t *at;
197 uint_t nregs, id;
198 char buf[128];
199
200 switch (cmd) {
201 case DDI_RESUME:
202 return (DDI_SUCCESS);
203 case DDI_ATTACH:
204 break;
205 default:
206 return (DDI_FAILURE);
207 }
208
209 inst = ddi_get_instance(dip);
210 if (ddi_soft_state_zalloc(amdnbtemp_state, inst) != DDI_SUCCESS) {
211 dev_err(dip, CE_WARN, "failed to allocate soft state entry %d",
212 inst);
213 return (DDI_FAILURE);
214 }
215
216 at = ddi_get_soft_state(amdnbtemp_state, inst);
217 if (at == NULL) {
218 dev_err(dip, CE_WARN, "failed to retrieve soft state entry %d",
219 inst);
220 return (DDI_FAILURE);
221 }
222
223 at->at_dip = dip;
224
225 if (pci_config_setup(dip, &at->at_cfgspace) != DDI_SUCCESS) {
226 dev_err(dip, CE_WARN, "failed to set up PCI config space");
227 goto err;
228 }
229 at->at_state |= AMDNBTEMP_S_CFGSPACE;
230
231 if (amdnbtemp_erratum_319()) {
232 dev_err(dip, CE_WARN, "!device subject to AMD Erratum 319, "
233 "not attaching to unreliable sensor");
234 goto err;
235 }
236
237 mutex_init(&at->at_mutex, NULL, MUTEX_DRIVER, NULL);
238 at->at_state |= AMDNBTEMP_S_MUTEX;
239
240 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, 0, "reg",
241 ®s, &nregs) != DDI_PROP_SUCCESS) {
242 dev_err(dip, CE_WARN, "failed to get pci 'reg' property");
243 goto err;
244 }
245
246 if (nregs < 1) {
247 dev_err(dip, CE_WARN, "'reg' property missing PCI b/d/f");
248 ddi_prop_free(regs);
249 goto err;
250 }
251
252 at->at_bus = PCI_REG_BUS_G(regs[0]);
253 at->at_dev = PCI_REG_DEV_G(regs[0]);
254 at->at_func = PCI_REG_DEV_G(regs[0]);
255 ddi_prop_free(regs);
256
257 if (at->at_dev < AMDNBTEMP_FIRST_DEV) {
258 dev_err(dip, CE_WARN, "Invalid pci b/d/f device, found 0x%x",
259 at->at_dev);
260 goto err;
261 }
262
263 id = at->at_dev - AMDNBTEMP_FIRST_DEV;
264 if (snprintf(buf, sizeof (buf), "procnode.%u", id) >= sizeof (buf)) {
265 dev_err(dip, CE_WARN, "unexpected buffer name overrun "
266 "constructing sensor %u", id);
267 goto err;
268 }
269
270 /*
271 * On families 15h and 16h the BKDG documents that the CurTmpTjSel bits
272 * of the temperature register dictate how the temperature reading
273 * should be interpreted. Capture that now.
274 */
275 if (cpuid_getfamily(CPU) >= 0x15) {
276 at->at_tjsel = B_TRUE;
277 }
278
279 if ((ret = ksensor_create(dip, &amdnbtemp_temp_ops, at, buf,
280 DDI_NT_SENSOR_TEMP_CPU, &at->at_ksensor)) != 0) {
281 dev_err(dip, CE_WARN, "failed to create ksensor for %s: %d",
282 buf, ret);
283 goto err;
284 }
285 at->at_state |= AMDNBTMEP_S_KSENSOR;
286
287 return (DDI_SUCCESS);
288
289 err:
290 amdnbtemp_cleanup(at);
291 return (DDI_FAILURE);
292 }
293
294 static int
amdnbtemp_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)295 amdnbtemp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
296 {
297 int inst;
298 amdnbtemp_t *at;
299
300 switch (cmd) {
301 case DDI_DETACH:
302 break;
303 case DDI_SUSPEND:
304 return (DDI_SUCCESS);
305 default:
306 return (DDI_FAILURE);
307 }
308
309 inst = ddi_get_instance(dip);
310 at = ddi_get_soft_state(amdnbtemp_state, inst);
311 if (at == NULL) {
312 dev_err(dip, CE_WARN, "asked to detach instance %d, but it is "
313 "missing from the soft state", inst);
314 return (DDI_FAILURE);
315 }
316
317 amdnbtemp_cleanup(at);
318 return (DDI_SUCCESS);
319 }
320
321 static struct dev_ops amdnbtemp_dev_ops = {
322 .devo_rev = DEVO_REV,
323 .devo_refcnt = 0,
324 .devo_getinfo = nodev,
325 .devo_identify = nulldev,
326 .devo_probe = nulldev,
327 .devo_attach = amdnbtemp_attach,
328 .devo_detach = amdnbtemp_detach,
329 .devo_reset = nodev,
330 .devo_quiesce = ddi_quiesce_not_needed
331 };
332
333 static struct modldrv amdnbtemp_modldrv = {
334 .drv_modops = &mod_driverops,
335 .drv_linkinfo = "AMD NB Temp Driver",
336 .drv_dev_ops = &amdnbtemp_dev_ops
337 };
338
339 static struct modlinkage amdnbtemp_modlinkage = {
340 .ml_rev = MODREV_1,
341 .ml_linkage = { &amdnbtemp_modldrv, NULL }
342 };
343
344 int
_init(void)345 _init(void)
346 {
347 int ret;
348
349 if (ddi_soft_state_init(&amdnbtemp_state, sizeof (amdnbtemp_t), 2) !=
350 DDI_SUCCESS) {
351 return (ENOMEM);
352 }
353
354 if ((ret = mod_install(&amdnbtemp_modlinkage)) != 0) {
355 ddi_soft_state_fini(&amdnbtemp_state);
356 return (ret);
357 }
358
359 return (ret);
360 }
361
362 int
_info(struct modinfo * modinfop)363 _info(struct modinfo *modinfop)
364 {
365 return (mod_info(&amdnbtemp_modlinkage, modinfop));
366 }
367
368 int
_fini(void)369 _fini(void)
370 {
371 int ret;
372
373 if ((ret = mod_remove(&amdnbtemp_modlinkage)) != 0) {
374 return (ret);
375 }
376
377 ddi_soft_state_fini(&amdnbtemp_state);
378 return (ret);
379 }
380