xref: /illumos-gate/usr/src/uts/intel/io/amdzen/amdzen.c (revision 019df03d)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019, Joyent, Inc.
14  * Copyright 2024 Oxide Computer Company
15  */
16 
17 /*
18  * Nexus Driver for AMD Zen family systems. The purpose of this driver is to
19  * provide access to the following resources in a single, centralized fashion:
20  *
21  *  - The per-chip Data Fabric
22  *  - The North Bridge
23  *  - The System Management Network (SMN)
24  *
25  * This is a nexus driver as once we have attached to all the requisite
26  * components, we will enumerate child devices which consume this functionality.
27  *
28  * ------------------------
29  * Mapping Devices Together
30  * ------------------------
31  *
32  * The operating system needs to expose things like temperature sensors and DRAM
33  * configuration registers in terms of things that are meaningful to the system
34  * such as logical CPUs, cores, etc. This driver attaches to the PCI devices
35  * that represent the northbridge, data fabrics, and dies. Note that there are
36  * multiple northbridge and DF devices (one each per die) and this driver maps
37  * all of these three things together. Unfortunately, this requires some
38  * acrobatics as there is no direct way to map a northbridge to its
39  * corresponding die. Instead, we map a CPU die to a data fabric PCI device and
40  * a data fabric PCI device to a corresponding northbridge PCI device. This
41  * transitive relationship allows us to map from between northbridge and die.
42  *
43  * As each data fabric device is attached, based on vendor and device portions
44  * of the PCI ID, we add it to the DF stubs list in the global amdzen_t
45  * structure, amdzen_data->azn_df_stubs. We must now map these to logical CPUs.
46  *
47  * In current Zen based products, there is a direct mapping between processor
48  * nodes and a data fabric PCI device: all of the devices are on PCI Bus 0 and
49  * start from Device 0x18, so device 0x18 maps to processor node 0, 0x19 to
50  * processor node 1, etc. This means that to map a logical CPU to a data fabric
51  * device, we take its processor node id, add it to 0x18 and find the PCI device
52  * that is on bus 0 with that ID number. We already discovered the DF devices as
53  * described above.
54  *
55  * The northbridge PCI device has a well-defined device and function, but the
56  * bus that it is on varies. Each die has its own set of assigned PCI buses and
57  * its northbridge device is on the first die-specific bus. This implies that
58  * the northbridges do not show up on PCI bus 0, as that is the PCI bus that all
59  * of the data fabric devices are on and is not assigned to any particular die.
60  * Additionally, while the northbridge on the lowest-numbered PCI bus
61  * intuitively corresponds to processor node zero, hardware does not guarantee
62  * this. Because we don't want to be at the mercy of firmware, we don't rely on
63  * this ordering assumption, though we have yet to find a system that deviates
64  * from it, either.
65  *
66  * One of the registers in the data fabric device's function 0
67  * (AMDZEN_DF_F0_CFG_ADDR_CTL) happens to identify the first PCI bus that is
68  * associated with the processor node. This means that we can map a data fabric
69  * device to a northbridge by finding the northbridge whose PCI bus ID matches
70  * the value in the corresponding data fabric's AMDZEN_DF_F0_CFG_ADDR_CTL.
71  *
72  * Given all of the above, we can map a northbridge to a data fabric device and
73  * a die to a data fabric device. Because these are 1:1 mappings, there is a
74  * transitive relationship from northbridge to die. and therefore we know which
75  * northbridge is associated with which processor die. This is summarized in the
76  * following image:
77  *
78  *  +-------+     +------------------------------------+     +--------------+
79  *  | Die 0 |---->| Data Fabric PCI BDF 0/18/0         |---->| Northbridge  |
80  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 10  |     | PCI  10/0/0  |
81  *     ...        +------------------------------------+     +--------------+
82  *  +-------+     +------------------------------------+     +--------------+
83  *  | Die n |---->| Data Fabric PCI BDF 0/18+n/0       |---->| Northbridge  |
84  *  +-------+     | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 133 |     | PCI 133/0/0  |
85  *                +------------------------------------+     +--------------+
86  *
87  * Note, the PCI buses used by the northbridges here are arbitrary examples that
88  * do not necessarily reflect actual hardware values; however, the
89  * bus/device/function (BDF) of the data fabric accurately models hardware. All
90  * BDF values are in hex.
91  *
92  * Starting with the Rome generation of processors (Family 17h Model 30-3Fh),
93  * AMD has multiple northbridges on a given die. All of these northbridges share
94  * the same data fabric and system management network port. From our perspective
95  * this means that some of the northbridge devices will be redundant and that we
96  * no longer have a 1:1 mapping between the northbridge and the data fabric
97  * devices. Every data fabric will have a northbridge, but not every northbridge
98  * will have a data fabric device mapped. Because we're always trying to map
99  * from a die to a northbridge and not the reverse, the fact that there are
100  * extra northbridge devices hanging around that we don't know about shouldn't
101  * be a problem.
102  *
103  * -------------------------------
104  * Attach and Detach Complications
105  * -------------------------------
106  *
107  * We need to map different PCI devices together. Each device is attached to a
108  * amdzen_stub driver to facilitate integration with the rest of the kernel PCI
109  * machinery and so we have to manage multiple dev_info_t structures, each of
110  * which may be independently attached and detached.
111  *
112  * This is not particularly complex for attach: our _init routine allocates the
113  * necessary mutex and list structures at module load time, and as each stub is
114  * attached, it calls into this code to be added to the appropriate list. When
115  * the nexus itself is attached, we walk the PCI device tree accumulating a
116  * counter for all devices we expect to be attached. Once the scan is complete
117  * and all such devices are accounted for (stub registration may be happening
118  * asynchronously with respect to nexus attach), we initialize the nexus device
119  * and the attach is complete.
120  *
121  * Most other device drivers support instances that can be brought back after
122  * detach, provided they are associated with an active minor node in the
123  * /devices file system. This driver is different. Once a stub device has been
124  * attached, we do not permit detaching the nexus driver instance, as the kernel
125  * does not give us interlocking guarantees between nexus and stub driver attach
126  * and detach. It is simplest to just unconditionally fail detach once a stub
127  * has attached.
128  *
129  * ---------------
130  * Exposed Devices
131  * ---------------
132  *
133  * Rather than try and have all of the different functions that could be
134  * provided in one driver, we have a nexus driver that tries to load child
135  * pseudo-device drivers that provide specific pieces of functionality.
136  *
137  * -------
138  * Locking
139  * -------
140  *
141  * The amdzen_data structure contains a single lock, azn_mutex.
142  *
143  * The various client functions here are intended for our nexus's direct
144  * children, but have been designed in case someone else should depends on this
145  * driver. Once a DF has been discovered, the set of entities inside of it
146  * (adf_nents, adf_ents[]) is considered static, constant data, and iteration
147  * over them does not require locking. However, the discovery of the amd_df_t
148  * does. In addition, locking is required whenever performing register accesses
149  * to the DF or SMN.
150  *
151  * To summarize, one must hold the lock in the following circumstances:
152  *
153  *  - Looking up DF structures
154  *  - Reading or writing to DF registers
155  *  - Reading or writing to SMN registers
156  *
157  * In general, it is preferred that the lock be held across an entire client
158  * operation if possible. The only time this becomes an issue are when we have
159  * callbacks into our callers (ala amdzen_c_df_iter()) as they may recursively
160  * call into us.
161  */
162 
163 #include <sys/modctl.h>
164 #include <sys/conf.h>
165 #include <sys/devops.h>
166 #include <sys/ddi.h>
167 #include <sys/sunddi.h>
168 #include <sys/pci.h>
169 #include <sys/sysmacros.h>
170 #include <sys/sunndi.h>
171 #include <sys/x86_archext.h>
172 #include <sys/cpuvar.h>
173 #include <sys/policy.h>
174 #include <sys/stat.h>
175 #include <sys/sunddi.h>
176 #include <sys/bitmap.h>
177 #include <sys/stdbool.h>
178 
179 #include <sys/amdzen/df.h>
180 #include <sys/amdzen/ccd.h>
181 #include "amdzen.h"
182 #include "amdzen_client.h"
183 #include "amdzen_topo.h"
184 
185 amdzen_t *amdzen_data;
186 
187 /*
188  * Internal minor nodes for devices that the nexus provides itself.
189  */
190 #define	AMDZEN_MINOR_TOPO	0
191 
192 /*
193  * Array of northbridge IDs that we care about.
194  */
195 static const uint16_t amdzen_nb_ids[] = {
196 	/* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */
197 	0x1450,
198 	/* Family 17h Raven Ridge, Kestrel, Dali Models 10h-2fh (Zen uarch) */
199 	0x15d0,
200 	/* Family 17h/19h Rome, Milan, Matisse, Vermeer Zen 2/Zen 3 uarch */
201 	0x1480,
202 	/* Family 17h/19h Renoir, Cezanne, Van Gogh Zen 2/3 uarch */
203 	0x1630,
204 	/* Family 19h Genoa and Bergamo */
205 	0x14a4,
206 	/* Family 17h Mendocino, Family 19h Rembrandt */
207 	0x14b5,
208 	/* Family 19h Raphael, Family 1Ah 40-4fh */
209 	0x14d8,
210 	/* Family 19h Phoenix */
211 	0x14e8,
212 	/* Family 1Ah Turin */
213 	0x153a,
214 	/* Family 1Ah 20-2fh */
215 	0x1507
216 };
217 
218 typedef struct {
219 	char *acd_name;
220 	amdzen_child_t acd_addr;
221 	/*
222 	 * This indicates whether or not we should issue warnings to users when
223 	 * something happens specific to this instance. The main reason we don't
224 	 * want to is for optional devices that may not be installed as they are
225 	 * for development purposes (e.g. usmn, zen_udf); however, if there is
226 	 * an issue with the others we still want to know.
227 	 */
228 	bool acd_warn;
229 } amdzen_child_data_t;
230 
231 static const amdzen_child_data_t amdzen_children[] = {
232 	{ "smntemp", AMDZEN_C_SMNTEMP, true },
233 	{ "usmn", AMDZEN_C_USMN, false },
234 	{ "zen_udf", AMDZEN_C_ZEN_UDF, false },
235 	{ "zen_umc", AMDZEN_C_ZEN_UMC, true }
236 };
237 
238 static uint8_t
amdzen_stub_get8(amdzen_stub_t * stub,off_t reg)239 amdzen_stub_get8(amdzen_stub_t *stub, off_t reg)
240 {
241 	return (pci_config_get8(stub->azns_cfgspace, reg));
242 }
243 
244 static uint16_t
amdzen_stub_get16(amdzen_stub_t * stub,off_t reg)245 amdzen_stub_get16(amdzen_stub_t *stub, off_t reg)
246 {
247 	return (pci_config_get16(stub->azns_cfgspace, reg));
248 }
249 
250 static uint32_t
amdzen_stub_get32(amdzen_stub_t * stub,off_t reg)251 amdzen_stub_get32(amdzen_stub_t *stub, off_t reg)
252 {
253 	return (pci_config_get32(stub->azns_cfgspace, reg));
254 }
255 
256 static uint64_t
amdzen_stub_get64(amdzen_stub_t * stub,off_t reg)257 amdzen_stub_get64(amdzen_stub_t *stub, off_t reg)
258 {
259 	return (pci_config_get64(stub->azns_cfgspace, reg));
260 }
261 
262 static void
amdzen_stub_put8(amdzen_stub_t * stub,off_t reg,uint8_t val)263 amdzen_stub_put8(amdzen_stub_t *stub, off_t reg, uint8_t val)
264 {
265 	pci_config_put8(stub->azns_cfgspace, reg, val);
266 }
267 
268 static void
amdzen_stub_put16(amdzen_stub_t * stub,off_t reg,uint16_t val)269 amdzen_stub_put16(amdzen_stub_t *stub, off_t reg, uint16_t val)
270 {
271 	pci_config_put16(stub->azns_cfgspace, reg, val);
272 }
273 
274 static void
amdzen_stub_put32(amdzen_stub_t * stub,off_t reg,uint32_t val)275 amdzen_stub_put32(amdzen_stub_t *stub, off_t reg, uint32_t val)
276 {
277 	pci_config_put32(stub->azns_cfgspace, reg, val);
278 }
279 
280 static uint64_t
amdzen_df_read_regdef(amdzen_t * azn,amdzen_df_t * df,const df_reg_def_t def,uint8_t inst,boolean_t do_64)281 amdzen_df_read_regdef(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def,
282     uint8_t inst, boolean_t do_64)
283 {
284 	df_reg_def_t ficaa;
285 	df_reg_def_t ficad;
286 	uint32_t val = 0;
287 	df_rev_t df_rev = azn->azn_dfs[0].adf_rev;
288 
289 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
290 	ASSERT3U(def.drd_gens & df_rev, ==, df_rev);
291 	val = DF_FICAA_V2_SET_TARG_INST(val, 1);
292 	val = DF_FICAA_V2_SET_FUNC(val, def.drd_func);
293 	val = DF_FICAA_V2_SET_INST(val, inst);
294 	val = DF_FICAA_V2_SET_64B(val, do_64 ? 1 : 0);
295 
296 	switch (df_rev) {
297 	case DF_REV_2:
298 	case DF_REV_3:
299 	case DF_REV_3P5:
300 		ficaa = DF_FICAA_V2;
301 		ficad = DF_FICAD_LO_V2;
302 		/*
303 		 * Both here and in the DFv4 case, the register ignores the
304 		 * lower 2 bits. That is we can only address and encode things
305 		 * in units of 4 bytes.
306 		 */
307 		val = DF_FICAA_V2_SET_REG(val, def.drd_reg >> 2);
308 		break;
309 	case DF_REV_4:
310 	case DF_REV_4D2:
311 		ficaa = DF_FICAA_V4;
312 		ficad = DF_FICAD_LO_V4;
313 		val = DF_FICAA_V4_SET_REG(val, def.drd_reg >> 2);
314 		break;
315 	default:
316 		panic("encountered unexpected DF rev: %u", df_rev);
317 	}
318 
319 	amdzen_stub_put32(df->adf_funcs[ficaa.drd_func], ficaa.drd_reg, val);
320 	if (do_64) {
321 		return (amdzen_stub_get64(df->adf_funcs[ficad.drd_func],
322 		    ficad.drd_reg));
323 	} else {
324 		return (amdzen_stub_get32(df->adf_funcs[ficad.drd_func],
325 		    ficad.drd_reg));
326 	}
327 }
328 
329 /*
330  * Perform a targeted 32-bit indirect read to a specific instance and function.
331  */
332 static uint32_t
amdzen_df_read32(amdzen_t * azn,amdzen_df_t * df,uint8_t inst,const df_reg_def_t def)333 amdzen_df_read32(amdzen_t *azn, amdzen_df_t *df, uint8_t inst,
334     const df_reg_def_t def)
335 {
336 	return (amdzen_df_read_regdef(azn, df, def, inst, B_FALSE));
337 }
338 
339 /*
340  * For a broadcast read, just go to the underlying PCI function and perform a
341  * read. At this point in time, we don't believe we need to use the FICAA/FICAD
342  * to access it (though it does have a broadcast mode).
343  */
344 static uint32_t
amdzen_df_read32_bcast(amdzen_t * azn,amdzen_df_t * df,const df_reg_def_t def)345 amdzen_df_read32_bcast(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def)
346 {
347 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
348 	return (amdzen_stub_get32(df->adf_funcs[def.drd_func], def.drd_reg));
349 }
350 
351 static uint32_t
amdzen_smn_read(amdzen_t * azn,amdzen_df_t * df,const smn_reg_t reg)352 amdzen_smn_read(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg)
353 {
354 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
355 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
356 
357 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
358 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
359 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
360 
361 	switch (SMN_REG_SIZE(reg)) {
362 	case 1:
363 		return ((uint32_t)amdzen_stub_get8(df->adf_nb,
364 		    AMDZEN_NB_SMN_DATA + addr_off));
365 	case 2:
366 		return ((uint32_t)amdzen_stub_get16(df->adf_nb,
367 		    AMDZEN_NB_SMN_DATA + addr_off));
368 	case 4:
369 		return (amdzen_stub_get32(df->adf_nb, AMDZEN_NB_SMN_DATA));
370 	default:
371 		panic("unreachable invalid SMN register size %u",
372 		    SMN_REG_SIZE(reg));
373 	}
374 }
375 
376 static void
amdzen_smn_write(amdzen_t * azn,amdzen_df_t * df,const smn_reg_t reg,const uint32_t val)377 amdzen_smn_write(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg,
378     const uint32_t val)
379 {
380 	const uint32_t base_addr = SMN_REG_ADDR_BASE(reg);
381 	const uint32_t addr_off = SMN_REG_ADDR_OFF(reg);
382 
383 	VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg));
384 	VERIFY(SMN_REG_VALUE_FITS(reg, val));
385 	VERIFY(MUTEX_HELD(&azn->azn_mutex));
386 	amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr);
387 
388 	switch (SMN_REG_SIZE(reg)) {
389 	case 1:
390 		amdzen_stub_put8(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
391 		    (uint8_t)val);
392 		break;
393 	case 2:
394 		amdzen_stub_put16(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off,
395 		    (uint16_t)val);
396 		break;
397 	case 4:
398 		amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_DATA, val);
399 		break;
400 	default:
401 		panic("unreachable invalid SMN register size %u",
402 		    SMN_REG_SIZE(reg));
403 	}
404 }
405 
406 /*
407  * This is an unfortunate necessity due to the evolution of the CCM DF values.
408  */
409 static inline boolean_t
amdzen_df_at_least(const amdzen_df_t * df,uint8_t major,uint8_t minor)410 amdzen_df_at_least(const amdzen_df_t *df, uint8_t major, uint8_t minor)
411 {
412 	return (df->adf_major > major || (df->adf_major == major &&
413 	    df->adf_minor >= minor));
414 }
415 
416 static amdzen_df_t *
amdzen_df_find(amdzen_t * azn,uint_t dfno)417 amdzen_df_find(amdzen_t *azn, uint_t dfno)
418 {
419 	uint_t i;
420 
421 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
422 	if (dfno >= azn->azn_ndfs) {
423 		return (NULL);
424 	}
425 
426 	for (i = 0; i < azn->azn_ndfs; i++) {
427 		amdzen_df_t *df = &azn->azn_dfs[i];
428 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) {
429 			continue;
430 		}
431 
432 		if (dfno == 0) {
433 			return (df);
434 		}
435 		dfno--;
436 	}
437 
438 	return (NULL);
439 }
440 
441 static amdzen_df_ent_t *
amdzen_df_ent_find_by_instid(amdzen_df_t * df,uint8_t instid)442 amdzen_df_ent_find_by_instid(amdzen_df_t *df, uint8_t instid)
443 {
444 	for (uint_t i = 0; i < df->adf_nents; i++) {
445 		amdzen_df_ent_t *ent = &df->adf_ents[i];
446 
447 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) {
448 			continue;
449 		}
450 
451 		if (ent->adfe_inst_id == instid) {
452 			return (ent);
453 		}
454 	}
455 
456 	return (NULL);
457 }
458 
459 /*
460  * Client functions that are used by nexus children.
461  */
462 int
amdzen_c_smn_read(uint_t dfno,const smn_reg_t reg,uint32_t * valp)463 amdzen_c_smn_read(uint_t dfno, const smn_reg_t reg, uint32_t *valp)
464 {
465 	amdzen_df_t *df;
466 	amdzen_t *azn = amdzen_data;
467 
468 	if (!SMN_REG_SIZE_IS_VALID(reg))
469 		return (EINVAL);
470 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
471 		return (EINVAL);
472 
473 	mutex_enter(&azn->azn_mutex);
474 	df = amdzen_df_find(azn, dfno);
475 	if (df == NULL) {
476 		mutex_exit(&azn->azn_mutex);
477 		return (ENOENT);
478 	}
479 
480 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
481 		mutex_exit(&azn->azn_mutex);
482 		return (ENXIO);
483 	}
484 
485 	*valp = amdzen_smn_read(azn, df, reg);
486 	mutex_exit(&azn->azn_mutex);
487 	return (0);
488 }
489 
490 int
amdzen_c_smn_write(uint_t dfno,const smn_reg_t reg,const uint32_t val)491 amdzen_c_smn_write(uint_t dfno, const smn_reg_t reg, const uint32_t val)
492 {
493 	amdzen_df_t *df;
494 	amdzen_t *azn = amdzen_data;
495 
496 	if (!SMN_REG_SIZE_IS_VALID(reg))
497 		return (EINVAL);
498 	if (!SMN_REG_IS_NATURALLY_ALIGNED(reg))
499 		return (EINVAL);
500 	if (!SMN_REG_VALUE_FITS(reg, val))
501 		return (EOVERFLOW);
502 
503 	mutex_enter(&azn->azn_mutex);
504 	df = amdzen_df_find(azn, dfno);
505 	if (df == NULL) {
506 		mutex_exit(&azn->azn_mutex);
507 		return (ENOENT);
508 	}
509 
510 	if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) {
511 		mutex_exit(&azn->azn_mutex);
512 		return (ENXIO);
513 	}
514 
515 	amdzen_smn_write(azn, df, reg, val);
516 	mutex_exit(&azn->azn_mutex);
517 	return (0);
518 }
519 
520 uint_t
amdzen_c_df_count(void)521 amdzen_c_df_count(void)
522 {
523 	uint_t ret;
524 	amdzen_t *azn = amdzen_data;
525 
526 	mutex_enter(&azn->azn_mutex);
527 	ret = azn->azn_ndfs;
528 	mutex_exit(&azn->azn_mutex);
529 	return (ret);
530 }
531 
532 df_rev_t
amdzen_c_df_rev(void)533 amdzen_c_df_rev(void)
534 {
535 	amdzen_df_t *df;
536 	amdzen_t *azn = amdzen_data;
537 	df_rev_t rev;
538 
539 	/*
540 	 * Always use the first DF instance to determine what we're using. Our
541 	 * current assumption, which seems to generally be true, is that the
542 	 * given DF revisions are the same in a given system when the DFs are
543 	 * directly connected.
544 	 */
545 	mutex_enter(&azn->azn_mutex);
546 	df = amdzen_df_find(azn, 0);
547 	if (df == NULL) {
548 		rev = DF_REV_UNKNOWN;
549 	} else {
550 		rev = df->adf_rev;
551 	}
552 	mutex_exit(&azn->azn_mutex);
553 
554 	return (rev);
555 }
556 
557 int
amdzen_c_df_read32(uint_t dfno,uint8_t inst,const df_reg_def_t def,uint32_t * valp)558 amdzen_c_df_read32(uint_t dfno, uint8_t inst, const df_reg_def_t def,
559     uint32_t *valp)
560 {
561 	amdzen_df_t *df;
562 	amdzen_t *azn = amdzen_data;
563 
564 	mutex_enter(&azn->azn_mutex);
565 	df = amdzen_df_find(azn, dfno);
566 	if (df == NULL) {
567 		mutex_exit(&azn->azn_mutex);
568 		return (ENOENT);
569 	}
570 
571 	if (df->adf_rev == DF_REV_UNKNOWN) {
572 		mutex_exit(&azn->azn_mutex);
573 		return (ENOTSUP);
574 	}
575 
576 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_FALSE);
577 	mutex_exit(&azn->azn_mutex);
578 
579 	return (0);
580 }
581 
582 int
amdzen_c_df_read64(uint_t dfno,uint8_t inst,const df_reg_def_t def,uint64_t * valp)583 amdzen_c_df_read64(uint_t dfno, uint8_t inst, const df_reg_def_t def,
584     uint64_t *valp)
585 {
586 	amdzen_df_t *df;
587 	amdzen_t *azn = amdzen_data;
588 
589 	mutex_enter(&azn->azn_mutex);
590 	df = amdzen_df_find(azn, dfno);
591 	if (df == NULL) {
592 		mutex_exit(&azn->azn_mutex);
593 		return (ENOENT);
594 	}
595 
596 	if (df->adf_rev == DF_REV_UNKNOWN) {
597 		mutex_exit(&azn->azn_mutex);
598 		return (ENOTSUP);
599 	}
600 
601 	*valp = amdzen_df_read_regdef(azn, df, def, inst, B_TRUE);
602 	mutex_exit(&azn->azn_mutex);
603 
604 	return (0);
605 }
606 
607 int
amdzen_c_df_iter(uint_t dfno,zen_df_type_t type,amdzen_c_iter_f func,void * arg)608 amdzen_c_df_iter(uint_t dfno, zen_df_type_t type, amdzen_c_iter_f func,
609     void *arg)
610 {
611 	amdzen_df_t *df;
612 	amdzen_t *azn = amdzen_data;
613 	df_type_t df_type;
614 	uint8_t df_subtype;
615 
616 	/*
617 	 * Unlike other calls here, we hold our lock only to find the DF here.
618 	 * The main reason for this is the nature of the callback function.
619 	 * Folks are iterating over instances so they can call back into us. If
620 	 * you look at the locking statement, the thing that is most volatile
621 	 * right here and what we need to protect is the DF itself and
622 	 * subsequent register accesses to it. The actual data about which
623 	 * entities exist is static and so once we have found a DF we should
624 	 * hopefully be in good shape as they only come, but don't go.
625 	 */
626 	mutex_enter(&azn->azn_mutex);
627 	df = amdzen_df_find(azn, dfno);
628 	if (df == NULL) {
629 		mutex_exit(&azn->azn_mutex);
630 		return (ENOENT);
631 	}
632 	mutex_exit(&azn->azn_mutex);
633 
634 	switch (type) {
635 	case ZEN_DF_TYPE_CS_UMC:
636 		df_type = DF_TYPE_CS;
637 		/*
638 		 * In the original Zeppelin DFv2 die there was no subtype field
639 		 * used for the CS. The UMC is the only type and has a subtype
640 		 * of zero.
641 		 */
642 		if (df->adf_rev != DF_REV_2) {
643 			df_subtype = DF_CS_SUBTYPE_UMC;
644 		} else {
645 			df_subtype = 0;
646 		}
647 		break;
648 	case ZEN_DF_TYPE_CCM_CPU:
649 		df_type = DF_TYPE_CCM;
650 
651 		if (df->adf_rev >= DF_REV_4 && amdzen_df_at_least(df, 4, 1)) {
652 			df_subtype = DF_CCM_SUBTYPE_CPU_V4P1;
653 		} else {
654 			df_subtype = DF_CCM_SUBTYPE_CPU_V2;
655 		}
656 		break;
657 	default:
658 		return (EINVAL);
659 	}
660 
661 	for (uint_t i = 0; i < df->adf_nents; i++) {
662 		amdzen_df_ent_t *ent = &df->adf_ents[i];
663 
664 		/*
665 		 * Some DF components are not considered enabled and therefore
666 		 * will end up having bogus values in their ID fields. If we do
667 		 * not have an enable flag set, we must skip this node.
668 		 */
669 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
670 			continue;
671 
672 		if (ent->adfe_type == df_type &&
673 		    ent->adfe_subtype == df_subtype) {
674 			int ret = func(dfno, ent->adfe_fabric_id,
675 			    ent->adfe_inst_id, arg);
676 			if (ret != 0) {
677 				return (ret);
678 			}
679 		}
680 	}
681 
682 	return (0);
683 }
684 
685 int
amdzen_c_df_fabric_decomp(df_fabric_decomp_t * decomp)686 amdzen_c_df_fabric_decomp(df_fabric_decomp_t *decomp)
687 {
688 	const amdzen_df_t *df;
689 	amdzen_t *azn = amdzen_data;
690 
691 	mutex_enter(&azn->azn_mutex);
692 	df = amdzen_df_find(azn, 0);
693 	if (df == NULL) {
694 		mutex_exit(&azn->azn_mutex);
695 		return (ENOENT);
696 	}
697 
698 	*decomp = df->adf_decomp;
699 	mutex_exit(&azn->azn_mutex);
700 	return (0);
701 }
702 
703 static boolean_t
amdzen_create_child(amdzen_t * azn,const amdzen_child_data_t * acd)704 amdzen_create_child(amdzen_t *azn, const amdzen_child_data_t *acd)
705 {
706 	int ret;
707 	dev_info_t *child;
708 
709 	if (ndi_devi_alloc(azn->azn_dip, acd->acd_name,
710 	    (pnode_t)DEVI_SID_NODEID, &child) != NDI_SUCCESS) {
711 		dev_err(azn->azn_dip, CE_WARN, "!failed to allocate child "
712 		    "dip for %s", acd->acd_name);
713 		return (B_FALSE);
714 	}
715 
716 	ddi_set_parent_data(child, (void *)acd);
717 	if ((ret = ndi_devi_online(child, 0)) != NDI_SUCCESS) {
718 		if (acd->acd_warn) {
719 			dev_err(azn->azn_dip, CE_WARN, "!failed to online "
720 			    "child dip %s: %d", acd->acd_name, ret);
721 		}
722 		return (B_FALSE);
723 	}
724 
725 	return (B_TRUE);
726 }
727 
728 static boolean_t
amdzen_map_dfs(amdzen_t * azn)729 amdzen_map_dfs(amdzen_t *azn)
730 {
731 	amdzen_stub_t *stub;
732 
733 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
734 
735 	for (stub = list_head(&azn->azn_df_stubs); stub != NULL;
736 	    stub = list_next(&azn->azn_df_stubs, stub)) {
737 		amdzen_df_t *df;
738 		uint_t dfno;
739 
740 		dfno = stub->azns_dev - AMDZEN_DF_FIRST_DEVICE;
741 		if (dfno > AMDZEN_MAX_DFS) {
742 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
743 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
744 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
745 			goto err;
746 		}
747 
748 		df = &azn->azn_dfs[dfno];
749 
750 		if (stub->azns_func >= AMDZEN_MAX_DF_FUNCS) {
751 			dev_err(stub->azns_dip, CE_WARN, "encountered df "
752 			    "device with illegal DF PCI b/d/f: 0x%x/%x/%x",
753 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
754 			goto err;
755 		}
756 
757 		if (df->adf_funcs[stub->azns_func] != NULL) {
758 			dev_err(stub->azns_dip, CE_WARN, "encountered "
759 			    "duplicate df device with DF PCI b/d/f: 0x%x/%x/%x",
760 			    stub->azns_bus, stub->azns_dev, stub->azns_func);
761 			goto err;
762 		}
763 		df->adf_funcs[stub->azns_func] = stub;
764 	}
765 
766 	return (B_TRUE);
767 
768 err:
769 	azn->azn_flags |= AMDZEN_F_DEVICE_ERROR;
770 	return (B_FALSE);
771 }
772 
773 static boolean_t
amdzen_check_dfs(amdzen_t * azn)774 amdzen_check_dfs(amdzen_t *azn)
775 {
776 	uint_t i;
777 	boolean_t ret = B_TRUE;
778 
779 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
780 		amdzen_df_t *df = &azn->azn_dfs[i];
781 		uint_t count = 0;
782 
783 		/*
784 		 * We require all platforms to have DFs functions 0-6. Not all
785 		 * platforms have DF function 7.
786 		 */
787 		for (uint_t func = 0; func < AMDZEN_MAX_DF_FUNCS - 1; func++) {
788 			if (df->adf_funcs[func] != NULL) {
789 				count++;
790 			}
791 		}
792 
793 		if (count == 0)
794 			continue;
795 
796 		if (count != 7) {
797 			ret = B_FALSE;
798 			dev_err(azn->azn_dip, CE_WARN, "df %u devices "
799 			    "incomplete", i);
800 		} else {
801 			df->adf_flags |= AMDZEN_DF_F_VALID;
802 			azn->azn_ndfs++;
803 		}
804 	}
805 
806 	return (ret);
807 }
808 
809 static const uint8_t amdzen_df_rome_ids[0x2b] = {
810 	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23,
811 	24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
812 	44, 45, 46, 47, 48
813 };
814 
815 /*
816  * Check the first df entry to see if it belongs to Rome or Milan. If so, then
817  * it uses the disjoint ID space.
818  */
819 static boolean_t
amdzen_is_rome_style(uint_t id)820 amdzen_is_rome_style(uint_t id)
821 {
822 	return (id == 0x1490 || id == 0x1650);
823 }
824 
825 /*
826  * Deal with the differences between between how a CCM subtype is indicated
827  * across CPU generations.
828  */
829 static boolean_t
amdzen_dfe_is_ccm(const amdzen_df_t * df,const amdzen_df_ent_t * ent)830 amdzen_dfe_is_ccm(const amdzen_df_t *df, const amdzen_df_ent_t *ent)
831 {
832 	if (ent->adfe_type != DF_TYPE_CCM) {
833 		return (B_FALSE);
834 	}
835 
836 	if (df->adf_rev >= DF_REV_4 && amdzen_df_at_least(df, 4, 1)) {
837 		return (ent->adfe_subtype == DF_CCM_SUBTYPE_CPU_V4P1);
838 	} else {
839 		return (ent->adfe_subtype == DF_CCM_SUBTYPE_CPU_V2);
840 	}
841 }
842 
843 /*
844  * To be able to do most other things we want to do, we must first determine
845  * what revision of the DF (data fabric) that we're using.
846  *
847  * Snapshot the df version. This was added explicitly in DFv4.0, around the Zen
848  * 4 timeframe and allows us to tell apart different version of the DF register
849  * set, most usefully when various subtypes were added.
850  *
851  * Older versions can theoretically be told apart based on usage of reserved
852  * registers. We walk these in the following order, starting with the newest rev
853  * and walking backwards to tell things apart:
854  *
855  *   o v3.5 -> Check function 1, register 0x150. This was reserved prior
856  *             to this point. This is actually DF_FIDMASK0_V3P5. We are supposed
857  *             to check bits [7:0].
858  *
859  *   o v3.0 -> Check function 1, register 0x208. The low byte (7:0) was
860  *             changed to indicate a component mask. This is non-zero
861  *             in the 3.0 generation. This is actually DF_FIDMASK_V2.
862  *
863  *   o v2.0 -> This is just the not that case. Presumably v1 wasn't part
864  *             of the Zen generation.
865  *
866  * Because we don't know what version we are yet, we do not use the normal
867  * versioned register accesses which would check what DF version we are and
868  * would want to use the normal indirect register accesses (which also require
869  * us to know the version). We instead do direct broadcast reads.
870  */
871 static void
amdzen_determine_df_vers(amdzen_t * azn,amdzen_df_t * df)872 amdzen_determine_df_vers(amdzen_t *azn, amdzen_df_t *df)
873 {
874 	uint32_t val;
875 	df_reg_def_t rd = DF_FBICNT;
876 
877 	val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
878 	df->adf_major = DF_FBICNT_V4_GET_MAJOR(val);
879 	df->adf_minor = DF_FBICNT_V4_GET_MINOR(val);
880 	if (df->adf_major == 0 && df->adf_minor == 0) {
881 		rd = DF_FIDMASK0_V3P5;
882 		val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg);
883 		if (bitx32(val, 7, 0) != 0) {
884 			df->adf_major = 3;
885 			df->adf_minor = 5;
886 			df->adf_rev = DF_REV_3P5;
887 		} else {
888 			rd = DF_FIDMASK_V2;
889 			val = amdzen_stub_get32(df->adf_funcs[rd.drd_func],
890 			    rd.drd_reg);
891 			if (bitx32(val, 7, 0) != 0) {
892 				df->adf_major = 3;
893 				df->adf_minor = 0;
894 				df->adf_rev = DF_REV_3;
895 			} else {
896 				df->adf_major = 2;
897 				df->adf_minor = 0;
898 				df->adf_rev = DF_REV_2;
899 			}
900 		}
901 	} else if (df->adf_major == 4 && df->adf_minor >= 2) {
902 		/*
903 		 * These are devices that have the newer memory layout that
904 		 * moves the DF::DramBaseAddress to 0x200. Please see the df.h
905 		 * theory statement for more information.
906 		 */
907 		df->adf_rev = DF_REV_4D2;
908 	} else if (df->adf_major == 4) {
909 		df->adf_rev = DF_REV_4;
910 	} else {
911 		df->adf_rev = DF_REV_UNKNOWN;
912 	}
913 }
914 
915 /*
916  * All of the different versions of the DF have different ways of getting at and
917  * answering the question of how do I break a fabric ID into a corresponding
918  * socket, die, and component. Importantly the goal here is to obtain, cache,
919  * and normalize:
920  *
921  *  o The DF System Configuration
922  *  o The various Mask registers
923  *  o The Node ID
924  */
925 static void
amdzen_determine_fabric_decomp(amdzen_t * azn,amdzen_df_t * df)926 amdzen_determine_fabric_decomp(amdzen_t *azn, amdzen_df_t *df)
927 {
928 	uint32_t mask;
929 	df_fabric_decomp_t *decomp = &df->adf_decomp;
930 
931 	switch (df->adf_rev) {
932 	case DF_REV_2:
933 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V2);
934 		switch (DF_SYSCFG_V2_GET_MY_TYPE(df->adf_syscfg)) {
935 		case DF_DIE_TYPE_CPU:
936 			mask = amdzen_df_read32_bcast(azn, df,
937 			    DF_DIEMASK_CPU_V2);
938 			break;
939 		case DF_DIE_TYPE_APU:
940 			mask = amdzen_df_read32_bcast(azn, df,
941 			    DF_DIEMASK_APU_V2);
942 			break;
943 		default:
944 			panic("DF thinks we're not on a CPU!");
945 		}
946 		df->adf_mask0 = mask;
947 
948 		/*
949 		 * DFv2 is a bit different in how the fabric mask register is
950 		 * phrased. Logically a fabric ID is broken into something that
951 		 * uniquely identifies a "node" (a particular die on a socket)
952 		 * and something that identifies a "component", e.g. a memory
953 		 * controller.
954 		 *
955 		 * Starting with DFv3, these registers logically called out how
956 		 * to separate the fabric ID first into a node and a component.
957 		 * Then the node was then broken down into a socket and die. In
958 		 * DFv2, there is no separate mask and shift of a node. Instead
959 		 * the socket and die are absolute offsets into the fabric ID
960 		 * rather than relative offsets into the node ID. As such, when
961 		 * we encounter DFv2, we fake up a node mask and shift and make
962 		 * it look like DFv3+.
963 		 */
964 		decomp->dfd_node_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) |
965 		    DF_DIEMASK_V2_GET_DIE_MASK(mask);
966 		decomp->dfd_node_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask);
967 		decomp->dfd_comp_mask = DF_DIEMASK_V2_GET_COMP_MASK(mask);
968 		decomp->dfd_comp_shift = 0;
969 
970 		decomp->dfd_sock_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) >>
971 		    decomp->dfd_node_shift;
972 		decomp->dfd_die_mask = DF_DIEMASK_V2_GET_DIE_MASK(mask) >>
973 		    decomp->dfd_node_shift;
974 		decomp->dfd_sock_shift = DF_DIEMASK_V2_GET_SOCK_SHIFT(mask) -
975 		    decomp->dfd_node_shift;
976 		decomp->dfd_die_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask) -
977 		    decomp->dfd_node_shift;
978 		ASSERT3U(decomp->dfd_die_shift, ==, 0);
979 
980 		/*
981 		 * There is no register in the actual data fabric with the node
982 		 * ID in DFv2 that we have found. Instead we take the first
983 		 * entity's fabric ID and transform it into the node id.
984 		 */
985 		df->adf_nodeid = (df->adf_ents[0].adfe_fabric_id &
986 		    decomp->dfd_node_mask) >> decomp->dfd_node_shift;
987 		break;
988 	case DF_REV_3:
989 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V3);
990 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
991 		    DF_FIDMASK0_V3);
992 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
993 		    DF_FIDMASK1_V3);
994 
995 		decomp->dfd_sock_mask =
996 		    DF_FIDMASK1_V3_GET_SOCK_MASK(df->adf_mask1);
997 		decomp->dfd_sock_shift =
998 		    DF_FIDMASK1_V3_GET_SOCK_SHIFT(df->adf_mask1);
999 		decomp->dfd_die_mask =
1000 		    DF_FIDMASK1_V3_GET_DIE_MASK(df->adf_mask1);
1001 		decomp->dfd_die_shift = 0;
1002 		decomp->dfd_node_mask =
1003 		    DF_FIDMASK0_V3_GET_NODE_MASK(df->adf_mask0);
1004 		decomp->dfd_node_shift =
1005 		    DF_FIDMASK1_V3_GET_NODE_SHIFT(df->adf_mask1);
1006 		decomp->dfd_comp_mask =
1007 		    DF_FIDMASK0_V3_GET_COMP_MASK(df->adf_mask0);
1008 		decomp->dfd_comp_shift = 0;
1009 
1010 		df->adf_nodeid = DF_SYSCFG_V3_GET_NODE_ID(df->adf_syscfg);
1011 		break;
1012 	case DF_REV_3P5:
1013 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df,
1014 		    DF_SYSCFG_V3P5);
1015 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1016 		    DF_FIDMASK0_V3P5);
1017 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1018 		    DF_FIDMASK1_V3P5);
1019 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
1020 		    DF_FIDMASK2_V3P5);
1021 
1022 		decomp->dfd_sock_mask =
1023 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
1024 		decomp->dfd_sock_shift =
1025 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
1026 		decomp->dfd_die_mask =
1027 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
1028 		decomp->dfd_die_shift = 0;
1029 		decomp->dfd_node_mask =
1030 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
1031 		decomp->dfd_node_shift =
1032 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
1033 		decomp->dfd_comp_mask =
1034 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
1035 		decomp->dfd_comp_shift = 0;
1036 
1037 		df->adf_nodeid = DF_SYSCFG_V3P5_GET_NODE_ID(df->adf_syscfg);
1038 		break;
1039 	case DF_REV_4:
1040 	case DF_REV_4D2:
1041 		df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V4);
1042 		df->adf_mask0 =  amdzen_df_read32_bcast(azn, df,
1043 		    DF_FIDMASK0_V4);
1044 		df->adf_mask1 =  amdzen_df_read32_bcast(azn, df,
1045 		    DF_FIDMASK1_V4);
1046 		df->adf_mask2 =  amdzen_df_read32_bcast(azn, df,
1047 		    DF_FIDMASK2_V4);
1048 
1049 		/*
1050 		 * The DFv4 registers are at a different location in the DF;
1051 		 * however, the actual layout of fields is the same as DFv3.5.
1052 		 * This is why you see V3P5 below.
1053 		 */
1054 		decomp->dfd_sock_mask =
1055 		    DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2);
1056 		decomp->dfd_sock_shift =
1057 		    DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1);
1058 		decomp->dfd_die_mask =
1059 		    DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2);
1060 		decomp->dfd_die_shift = 0;
1061 		decomp->dfd_node_mask =
1062 		    DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0);
1063 		decomp->dfd_node_shift =
1064 		    DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1);
1065 		decomp->dfd_comp_mask =
1066 		    DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0);
1067 		decomp->dfd_comp_shift = 0;
1068 
1069 		df->adf_nodeid = DF_SYSCFG_V4_GET_NODE_ID(df->adf_syscfg);
1070 		break;
1071 	default:
1072 		panic("encountered suspicious, previously rejected DF "
1073 		    "rev: 0x%x", df->adf_rev);
1074 	}
1075 }
1076 
1077 /*
1078  * The purpose of this function is to map CCMs to the corresponding CCDs that
1079  * exist. This is not an obvious thing as there is no direct mapping in the data
1080  * fabric between these IDs.
1081  *
1082  * Prior to DFv4, a given CCM was only ever connected to at most one CCD.
1083  * Starting in DFv4 a given CCM may have one or two SDP (scalable data ports)
1084  * that connect to CCDs. These may be connected to the same CCD or a different
1085  * one. When both ports are enabled we must check whether or not the port is
1086  * considered to be in wide mode. When wide mode is enabled then the two ports
1087  * are connected to a single CCD. If wide mode is disabled then the two ports
1088  * are connected to separate CCDs.
1089  *
1090  * The physical number of a CCD, which is how we determine the SMN aperture to
1091  * use, is based on the CCM ID. In most sockets we have seen up to a maximum of
1092  * 8 CCMs. When a CCM is connected to more than one CCD we have determined based
1093  * on some hints from AMD's ACPI information that the numbering is assumed to be
1094  * that CCM's number plus the total number of CCMs.
1095  *
1096  * More concretely, the SP5 Genoa/Bergamo Zen 4 platform has 8 CCMs. When there
1097  * are more than 8 CCDs installed then CCM 0 maps to CCDs 0 and 8. CCM 1 to CCDs
1098  * 1 and 9, etc. CCMs 4-7 map 1:1 to CCDs 4-7. However, the placement of CCDs
1099  * within the package has changed across generations.
1100  *
1101  * Notably in Rome and Milan (Zen 2/3) it appears that each quadrant had an
1102  * increasing number of CCDs. So CCDs 0/1 were together, 2/3, 4/5, and 6/7. This
1103  * meant that in cases where only a subset of CCDs were populated it'd forcibly
1104  * disable the higher CCD in a group (but with DFv3 the CCM would still be
1105  * enabled). So a 4 CCD config would generally enable CCDs 0, 2, 4, and 6 say.
1106  * This was almost certainly done to balance the NUMA config.
1107  *
1108  * Instead, starting in Genoa (Zen 4) the CCMs are round-robined around the
1109  * quadrants so CCMs (CCDs) 0 (0/8) and 4 (4) are together, 1 (1/9) and 5 (5),
1110  * etc. This is also why we more often see disabled CCMs in Genoa, but not in
1111  * Rome/Milan.
1112  *
1113  * When we're operating in wide mode and therefore both SDPs are connected to a
1114  * single CCD, we've always found that the lower CCD index will be used by the
1115  * system and the higher one is not considered present. Therefore, when
1116  * operating in wide mode, we need to make sure that whenever we have a non-zero
1117  * value for SDPs being connected that we rewrite this to only appear as a
1118  * single CCD is present. It's conceivable (though hard to imagine) that we
1119  * could get a value of 0b10 indicating that only the upper SDP link is active
1120  * for some reason.
1121  */
1122 static void
amdzen_setup_df_ccm(amdzen_t * azn,amdzen_df_t * df,amdzen_df_ent_t * dfe,uint32_t ccmno)1123 amdzen_setup_df_ccm(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *dfe,
1124     uint32_t ccmno)
1125 {
1126 	amdzen_ccm_data_t *ccm = &dfe->adfe_data.aded_ccm;
1127 	uint32_t ccd_en;
1128 
1129 	if (df->adf_rev >= DF_REV_4) {
1130 		uint32_t val = amdzen_df_read32(azn, df, dfe->adfe_inst_id,
1131 		    DF_CCD_EN_V4);
1132 		ccd_en = DF_CCD_EN_V4_GET_CCD_EN(val);
1133 
1134 		val = amdzen_df_read32(azn, df, dfe->adfe_inst_id,
1135 		    DF_CCMCFG4_V4);
1136 		if (DF_CCMCFG4_V4_GET_WIDE_EN(val) != 0 && ccd_en != 0) {
1137 			ccd_en = 0x1;
1138 		}
1139 	} else {
1140 		ccd_en = 0x1;
1141 	}
1142 
1143 	for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) {
1144 		ccm->acd_ccd_en[i] = (ccd_en & (1 << i)) != 0;
1145 		if (ccm->acd_ccd_en[i] == 0)
1146 			continue;
1147 		ccm->acd_ccd_id[i] = ccmno + i * df->adf_nccm;
1148 		ccm->acd_nccds++;
1149 	}
1150 }
1151 
1152 /*
1153  * Initialize our knowledge about a given series of nodes on the data fabric.
1154  */
1155 static void
amdzen_setup_df(amdzen_t * azn,amdzen_df_t * df)1156 amdzen_setup_df(amdzen_t *azn, amdzen_df_t *df)
1157 {
1158 	uint_t i;
1159 	uint32_t val, ccmno;
1160 
1161 	amdzen_determine_df_vers(azn, df);
1162 
1163 	switch (df->adf_rev) {
1164 	case DF_REV_2:
1165 	case DF_REV_3:
1166 	case DF_REV_3P5:
1167 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V2);
1168 		break;
1169 	case DF_REV_4:
1170 	case DF_REV_4D2:
1171 		val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V4);
1172 		break;
1173 	default:
1174 		dev_err(azn->azn_dip, CE_WARN, "encountered unsupported DF "
1175 		    "revision: 0x%x", df->adf_rev);
1176 		return;
1177 	}
1178 	df->adf_nb_busno = DF_CFG_ADDR_CTL_GET_BUS_NUM(val);
1179 	val = amdzen_df_read32_bcast(azn, df, DF_FBICNT);
1180 	df->adf_nents = DF_FBICNT_GET_COUNT(val);
1181 	if (df->adf_nents == 0)
1182 		return;
1183 	df->adf_ents = kmem_zalloc(sizeof (amdzen_df_ent_t) * df->adf_nents,
1184 	    KM_SLEEP);
1185 
1186 	for (i = 0; i < df->adf_nents; i++) {
1187 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
1188 		uint8_t inst = i;
1189 
1190 		/*
1191 		 * Unfortunately, Rome uses a discontinuous instance ID pattern
1192 		 * while everything else we can find uses a contiguous instance
1193 		 * ID pattern. This means that for Rome, we need to adjust the
1194 		 * indexes that we iterate over, though the total number of
1195 		 * entries is right. This was carried over into Milan, but not
1196 		 * Genoa.
1197 		 */
1198 		if (amdzen_is_rome_style(df->adf_funcs[0]->azns_did)) {
1199 			if (inst >= ARRAY_SIZE(amdzen_df_rome_ids)) {
1200 				dev_err(azn->azn_dip, CE_WARN, "Rome family "
1201 				    "processor reported more ids than the PPR, "
1202 				    "resetting %u to instance zero", inst);
1203 				inst = 0;
1204 			} else {
1205 				inst = amdzen_df_rome_ids[inst];
1206 			}
1207 		}
1208 
1209 		dfe->adfe_drvid = inst;
1210 		dfe->adfe_info0 = amdzen_df_read32(azn, df, inst, DF_FBIINFO0);
1211 		if (df->adf_rev <= DF_REV_4) {
1212 			dfe->adfe_info1 = amdzen_df_read32(azn, df, inst,
1213 			    DF_FBIINFO1);
1214 			dfe->adfe_info2 = amdzen_df_read32(azn, df, inst,
1215 			    DF_FBIINFO2);
1216 		}
1217 		dfe->adfe_info3 = amdzen_df_read32(azn, df, inst, DF_FBIINFO3);
1218 
1219 		dfe->adfe_type = DF_FBIINFO0_GET_TYPE(dfe->adfe_info0);
1220 		dfe->adfe_subtype = DF_FBIINFO0_GET_SUBTYPE(dfe->adfe_info0);
1221 
1222 		/*
1223 		 * The enabled flag was not present in Zen 1. Simulate it by
1224 		 * checking for a non-zero register instead.
1225 		 */
1226 		if (DF_FBIINFO0_V3_GET_ENABLED(dfe->adfe_info0) ||
1227 		    (df->adf_rev == DF_REV_2 && dfe->adfe_info0 != 0)) {
1228 			dfe->adfe_flags |= AMDZEN_DFE_F_ENABLED;
1229 		}
1230 		if (DF_FBIINFO0_GET_HAS_MCA(dfe->adfe_info0)) {
1231 			dfe->adfe_flags |= AMDZEN_DFE_F_MCA;
1232 		}
1233 
1234 		/*
1235 		 * Starting with DFv4 there is no instance ID in the fabric info
1236 		 * 3 register, so we instead grab it out of the driver ID which
1237 		 * is what it should be anyways.
1238 		 */
1239 		if (df->adf_rev >= DF_REV_4) {
1240 			dfe->adfe_inst_id = dfe->adfe_drvid;
1241 		} else {
1242 			dfe->adfe_inst_id =
1243 			    DF_FBIINFO3_GET_INSTID(dfe->adfe_info3);
1244 		}
1245 
1246 		switch (df->adf_rev) {
1247 		case DF_REV_2:
1248 			dfe->adfe_fabric_id =
1249 			    DF_FBIINFO3_V2_GET_BLOCKID(dfe->adfe_info3);
1250 			break;
1251 		case DF_REV_3:
1252 			dfe->adfe_fabric_id =
1253 			    DF_FBIINFO3_V3_GET_BLOCKID(dfe->adfe_info3);
1254 			break;
1255 		case DF_REV_3P5:
1256 			dfe->adfe_fabric_id =
1257 			    DF_FBIINFO3_V3P5_GET_BLOCKID(dfe->adfe_info3);
1258 			break;
1259 		case DF_REV_4:
1260 		case DF_REV_4D2:
1261 			dfe->adfe_fabric_id =
1262 			    DF_FBIINFO3_V4_GET_BLOCKID(dfe->adfe_info3);
1263 			break;
1264 		default:
1265 			panic("encountered suspicious, previously rejected DF "
1266 			    "rev: 0x%x", df->adf_rev);
1267 		}
1268 
1269 		/*
1270 		 * Record information about a subset of DF entities that we've
1271 		 * found. Currently we're tracking this only for CCMs.
1272 		 */
1273 		if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1274 			continue;
1275 
1276 		if (amdzen_dfe_is_ccm(df, dfe)) {
1277 			df->adf_nccm++;
1278 		}
1279 	}
1280 
1281 	/*
1282 	 * Now that we have filled in all of our info, attempt to fill in
1283 	 * specific information about different types of instances.
1284 	 */
1285 	ccmno = 0;
1286 	for (uint_t i = 0; i < df->adf_nents; i++) {
1287 		amdzen_df_ent_t *dfe = &df->adf_ents[i];
1288 
1289 		if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1290 			continue;
1291 
1292 		/*
1293 		 * Perform type and sub-type specific initialization. Currently
1294 		 * limited to CCMs.
1295 		 */
1296 		switch (dfe->adfe_type) {
1297 		case DF_TYPE_CCM:
1298 			amdzen_setup_df_ccm(azn, df, dfe, ccmno);
1299 			ccmno++;
1300 			break;
1301 		default:
1302 			break;
1303 		}
1304 	}
1305 
1306 	amdzen_determine_fabric_decomp(azn, df);
1307 }
1308 
1309 static void
amdzen_find_nb(amdzen_t * azn,amdzen_df_t * df)1310 amdzen_find_nb(amdzen_t *azn, amdzen_df_t *df)
1311 {
1312 	amdzen_stub_t *stub;
1313 
1314 	for (stub = list_head(&azn->azn_nb_stubs); stub != NULL;
1315 	    stub = list_next(&azn->azn_nb_stubs, stub)) {
1316 		if (stub->azns_bus == df->adf_nb_busno) {
1317 			df->adf_flags |= AMDZEN_DF_F_FOUND_NB;
1318 			df->adf_nb = stub;
1319 			return;
1320 		}
1321 	}
1322 }
1323 
1324 /*
1325  * We need to be careful using this function as different AMD generations have
1326  * acted in different ways when there is a missing CCD. We've found that in
1327  * hardware where the CCM is enabled but there is no CCD attached, it generally
1328  * is safe (i.e. DFv3 on Rome), but on DFv4 if we ask for a CCD that would
1329  * correspond to a disabled CCM then the firmware may inject a fatal error
1330  * (which is hopefully something missing in our RAS/MCA-X enablement).
1331  *
1332  * Put differently if this doesn't correspond to an Enabled CCM and you know the
1333  * number of valid CCDs on this, don't use it.
1334  */
1335 static boolean_t
amdzen_ccd_present(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1336 amdzen_ccd_present(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1337 {
1338 	smn_reg_t die_reg = SMUPWR_CCD_DIE_ID(ccdno);
1339 	uint32_t val = amdzen_smn_read(azn, df, die_reg);
1340 	if (val == SMN_EINVAL32) {
1341 		return (B_FALSE);
1342 	}
1343 
1344 	ASSERT3U(ccdno, ==, SMUPWR_CCD_DIE_ID_GET(val));
1345 	return (B_TRUE);
1346 }
1347 
1348 static uint32_t
amdzen_ccd_thread_en(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1349 amdzen_ccd_thread_en(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1350 {
1351 	smn_reg_t reg;
1352 
1353 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1354 		reg = L3SOC_THREAD_EN(ccdno);
1355 	} else {
1356 		reg = SMUPWR_THREAD_EN(ccdno);
1357 	}
1358 
1359 	return (amdzen_smn_read(azn, df, reg));
1360 }
1361 
1362 static uint32_t
amdzen_ccd_core_en(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno)1363 amdzen_ccd_core_en(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno)
1364 {
1365 	smn_reg_t reg;
1366 
1367 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1368 		reg = L3SOC_CORE_EN(ccdno);
1369 	} else {
1370 		reg = SMUPWR_CORE_EN(ccdno);
1371 	}
1372 
1373 	return (amdzen_smn_read(azn, df, reg));
1374 }
1375 
1376 static void
amdzen_ccd_info(amdzen_t * azn,amdzen_df_t * df,uint32_t ccdno,uint32_t * nccxp,uint32_t * nlcorep,uint32_t * nthrp)1377 amdzen_ccd_info(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno, uint32_t *nccxp,
1378     uint32_t *nlcorep, uint32_t *nthrp)
1379 {
1380 	uint32_t nccx, nlcore, smt;
1381 
1382 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) {
1383 		smn_reg_t reg = L3SOC_THREAD_CFG(ccdno);
1384 		uint32_t val = amdzen_smn_read(azn, df, reg);
1385 		nccx = L3SOC_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1;
1386 		nlcore = L3SOC_THREAD_CFG_GET_CORE_COUNT(val) + 1;
1387 		smt = L3SOC_THREAD_CFG_GET_SMT_MODE(val);
1388 	} else {
1389 		smn_reg_t reg = SMUPWR_THREAD_CFG(ccdno);
1390 		uint32_t val = amdzen_smn_read(azn, df, reg);
1391 		nccx = SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1;
1392 		nlcore = SMUPWR_THREAD_CFG_GET_CORE_COUNT(val) + 1;
1393 		smt = SMUPWR_THREAD_CFG_GET_SMT_MODE(val);
1394 	}
1395 
1396 	if (nccxp != NULL) {
1397 		*nccxp = nccx;
1398 	}
1399 
1400 	if (nlcorep != NULL) {
1401 		*nlcorep = nlcore;
1402 	}
1403 
1404 	if (nthrp != NULL) {
1405 		/* The L3::L3SOC and SMU::PWR values are the same here */
1406 		if (smt == SMUPWR_THREAD_CFG_SMT_MODE_SMT) {
1407 			*nthrp = 2;
1408 		} else {
1409 			*nthrp = 1;
1410 		}
1411 	}
1412 }
1413 
1414 static void
amdzen_initpkg_to_apic(amdzen_t * azn,const uint32_t pkg0,const uint32_t pkg7)1415 amdzen_initpkg_to_apic(amdzen_t *azn, const uint32_t pkg0, const uint32_t pkg7)
1416 {
1417 	uint32_t nsock, nccd, nccx, ncore, nthr, extccx;
1418 	uint32_t nsock_bits, nccd_bits, nccx_bits, ncore_bits, nthr_bits;
1419 	amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp;
1420 
1421 	/*
1422 	 * These are all 0 based values, meaning that we need to add one to each
1423 	 * of them. However, we skip this because to calculate the number of
1424 	 * bits to cover an entity we would subtract one.
1425 	 */
1426 	nthr = SCFCTP_PMREG_INITPKG0_GET_SMTEN(pkg0);
1427 	ncore = SCFCTP_PMREG_INITPKG7_GET_N_CORES(pkg7);
1428 	nccx = SCFCTP_PMREG_INITPKG7_GET_N_CCXS(pkg7);
1429 	nccd = SCFCTP_PMREG_INITPKG7_GET_N_DIES(pkg7);
1430 	nsock = SCFCTP_PMREG_INITPKG7_GET_N_SOCKETS(pkg7);
1431 
1432 	if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN4) {
1433 		extccx = SCFCTP_PMREG_INITPKG7_ZEN4_GET_16TAPIC(pkg7);
1434 	} else {
1435 		extccx = 0;
1436 	}
1437 
1438 	nthr_bits = highbit(nthr);
1439 	ncore_bits = highbit(ncore);
1440 	nccx_bits = highbit(nccx);
1441 	nccd_bits = highbit(nccd);
1442 	nsock_bits = highbit(nsock);
1443 
1444 	apic->aad_thread_shift = 0;
1445 	apic->aad_thread_mask = (1 << nthr_bits) - 1;
1446 
1447 	apic->aad_core_shift = nthr_bits;
1448 	if (ncore_bits > 0) {
1449 		apic->aad_core_mask = (1 << ncore_bits) - 1;
1450 		apic->aad_core_mask <<= apic->aad_core_shift;
1451 	} else {
1452 		apic->aad_core_mask = 0;
1453 	}
1454 
1455 	/*
1456 	 * The APIC_16T_MODE bit indicates that the total shift to start the CCX
1457 	 * should be at 4 bits if it's not. It doesn't mean that the CCX portion
1458 	 * of the value should take up four bits. In the common Genoa case,
1459 	 * nccx_bits will be zero.
1460 	 */
1461 	apic->aad_ccx_shift = apic->aad_core_shift + ncore_bits;
1462 	if (extccx != 0 && apic->aad_ccx_shift < 4) {
1463 		apic->aad_ccx_shift = 4;
1464 	}
1465 	if (nccx_bits > 0) {
1466 		apic->aad_ccx_mask = (1 << nccx_bits) - 1;
1467 		apic->aad_ccx_mask <<= apic->aad_ccx_shift;
1468 	} else {
1469 		apic->aad_ccx_mask = 0;
1470 	}
1471 
1472 	apic->aad_ccd_shift = apic->aad_ccx_shift + nccx_bits;
1473 	if (nccd_bits > 0) {
1474 		apic->aad_ccd_mask = (1 << nccd_bits) - 1;
1475 		apic->aad_ccd_mask <<= apic->aad_ccd_shift;
1476 	} else {
1477 		apic->aad_ccd_mask = 0;
1478 	}
1479 
1480 	apic->aad_sock_shift = apic->aad_ccd_shift + nccd_bits;
1481 	if (nsock_bits > 0) {
1482 		apic->aad_sock_mask = (1 << nsock_bits) - 1;
1483 		apic->aad_sock_mask <<= apic->aad_sock_shift;
1484 	} else {
1485 		apic->aad_sock_mask = 0;
1486 	}
1487 
1488 	/*
1489 	 * Currently all supported Zen 2+ platforms only have a single die per
1490 	 * socket as compared to Zen 1. So this is always kept at zero.
1491 	 */
1492 	apic->aad_die_mask = 0;
1493 	apic->aad_die_shift = 0;
1494 }
1495 
1496 /*
1497  * We would like to determine what the logical APIC decomposition is on Zen 3
1498  * and newer family parts. While there is information added to CPUID in the form
1499  * of leaf 8X26, that isn't present in Zen 3, so instead we go to what we
1500  * believe is the underlying source of the CPUID data.
1501  *
1502  * Fundamentally there are a series of registers in SMN space that relate to the
1503  * SCFCTP. Coincidentally, there is one of these for each core and there are a
1504  * pair of related SMN registers. L3::SCFCTP::PMREG_INITPKG0 contains
1505  * information about a given's core logical and physical IDs. More interestingly
1506  * for this particular case, L3::SCFCTP::PMREG_INITPKG7, contains the overall
1507  * total number of logical entities. We've been promised that this has to be
1508  * the same across the fabric. That's all well and good, but this begs the
1509  * question of how do we actually get there. The above is a core-specific
1510  * register and requires that we understand information about which CCDs and
1511  * CCXs are actually present.
1512  *
1513  * So we are starting with a data fabric that has some CCM present. The CCM
1514  * entries in the data fabric may be tagged with our ENABLED flag.
1515  * Unfortunately, that can be true regardless of whether or not it's actually
1516  * present or not. As a result, we go to another chunk of SMN space registers,
1517  * SMU::PWR. These contain information about the CCDs, the physical cores that
1518  * are enabled, and related. So we will first walk the DF entities and see if we
1519  * can read its SMN::PWR::CCD_DIE_ID. If we get back a value of all 1s then
1520  * there is nothing present. Otherwise, we should get back something that
1521  * matches information in the data fabric.
1522  *
1523  * With that in hand, we can read the SMU::PWR::CORE_ENABLE register to
1524  * determine which physical cores are enabled in the CCD/CCX. That will finally
1525  * give us an index to get to our friend INITPKG7.
1526  */
1527 static boolean_t
amdzen_determine_apic_decomp_initpkg(amdzen_t * azn)1528 amdzen_determine_apic_decomp_initpkg(amdzen_t *azn)
1529 {
1530 	amdzen_df_t *df = &azn->azn_dfs[0];
1531 	uint32_t ccdno = 0;
1532 
1533 	for (uint_t i = 0; i < df->adf_nents; i++) {
1534 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
1535 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
1536 			continue;
1537 
1538 		if (amdzen_dfe_is_ccm(df, ent)) {
1539 			uint32_t val, nccx, pkg7, pkg0;
1540 			smn_reg_t pkg7_reg, pkg0_reg;
1541 			int core_bit;
1542 			uint8_t pccxno, pcoreno;
1543 
1544 			if (!amdzen_ccd_present(azn, df, ccdno)) {
1545 				ccdno++;
1546 				continue;
1547 			}
1548 
1549 			/*
1550 			 * This die actually exists. Switch over to the core
1551 			 * enable register to find one to ask about physically.
1552 			 */
1553 			amdzen_ccd_info(azn, df, ccdno, &nccx, NULL, NULL);
1554 			val = amdzen_ccd_core_en(azn, df, ccdno);
1555 			if (val == 0) {
1556 				ccdno++;
1557 				continue;
1558 			}
1559 
1560 			/*
1561 			 * There exists an enabled physical core. Find the first
1562 			 * index of it and map it to the corresponding CCD and
1563 			 * CCX. ddi_ffs is the bit index, but we want the
1564 			 * physical core number, hence the -1.
1565 			 */
1566 			core_bit = ddi_ffs(val);
1567 			ASSERT3S(core_bit, !=, 0);
1568 			pcoreno = core_bit - 1;
1569 
1570 			/*
1571 			 * Unfortunately SMU::PWR::THREAD_CONFIGURATION gives us
1572 			 * the Number of logical cores that are present in the
1573 			 * complex, not the total number of physical cores.
1574 			 * Right now we do assume that the physical and logical
1575 			 * ccx numbering is equivalent (we have no other way of
1576 			 * knowing if it is or isn't right now) and that we'd
1577 			 * always have CCX0 before CCX1. AMD seems to suggest we
1578 			 * can assume this, though it is a worrisome assumption.
1579 			 */
1580 			pccxno = pcoreno / azn->azn_ncore_per_ccx;
1581 			ASSERT3U(pccxno, <, nccx);
1582 			pkg7_reg = SCFCTP_PMREG_INITPKG7(ccdno, pccxno,
1583 			    pcoreno);
1584 			pkg7 = amdzen_smn_read(azn, df, pkg7_reg);
1585 			pkg0_reg = SCFCTP_PMREG_INITPKG0(ccdno, pccxno,
1586 			    pcoreno);
1587 			pkg0 = amdzen_smn_read(azn, df, pkg0_reg);
1588 			amdzen_initpkg_to_apic(azn, pkg0, pkg7);
1589 			return (B_TRUE);
1590 		}
1591 	}
1592 
1593 	return (B_FALSE);
1594 }
1595 
1596 /*
1597  * We have the fun job of trying to figure out what the correct form of the APIC
1598  * decomposition should be and how to break that into its logical components.
1599  * The way that we get at this is generation-specific unfortunately. Here's how
1600  * it works out:
1601  *
1602  * Zen 1-2	This era of CPUs are deceptively simple. The PPR for a given
1603  *		family defines exactly how the APIC ID is broken into logical
1604  *		components and it's fixed. That is, depending on whether or
1605  *		not SMT is enabled. Zen 1 and Zen 2 use different schemes for
1606  *		constructing this. The way that we're supposed to check if SMT
1607  *		is enabled is to use AMD leaf 8X1E and ask how many threads per
1608  *		core there are. We use the x86 feature set to determine that
1609  *		instead.
1610  *
1611  *		More specifically the Zen 1 scheme is 7 bits long. The bits have
1612  *		the following meanings.
1613  *
1614  *		[6]   Socket ID
1615  *		[5:4] Node ID
1616  *		[3]   Logical CCX ID
1617  *		With SMT		Without SMT
1618  *		[2:1] Logical Core ID	[2]   hardcoded to zero
1619  *		[0] Thread ID		[1:0] Logical Core ID
1620  *
1621  *		The following is the Zen 2 scheme assuming SMT. The Zen 2 scheme
1622  *		without SMT shifts everything to the right by one bit.
1623  *
1624  *		[7]   Socket ID
1625  *		[6:4] Logical CCD ID
1626  *		[3]   Logical CCX ID
1627  *		[2:1] Logical Core ID
1628  *		[0]   Thread ID
1629  *
1630  * Zen 3	Zen 3 CPUs moved past the fixed APIC ID format that Zen 1 and
1631  *		Zen 2 had, but also don't give us the nice way of discovering
1632  *		this via CPUID that Zen 4 did. The APIC ID id uses a given
1633  *		number of bits for each logical component that exists, but the
1634  *		exact number varies based on what's actually present. To get at
1635  *		this we use a piece of data that is embedded in the SCFCTP
1636  *		(Scalable Control Fabric, Clocks, Test, Power Gating). This can
1637  *		be used to determine how many logical entities of each kind the
1638  *		system thinks exist. While we could use the various CPUID
1639  *		topology items to try to speed this up, they don't tell us the
1640  *		die information that we need to do this.
1641  *
1642  * Zen 4+	Zen 4 introduced CPUID leaf 8000_0026h which gives us a means
1643  *		for determining how to extract the CCD, CCX, and related pieces
1644  *		out of the device. One thing we have to be aware of is that when
1645  *		the CCD and CCX shift are the same, that means that there is
1646  *		only a single CCX and therefore have to take that into account
1647  *		appropriately. This is the case generally on Zen 4 platforms,
1648  *		but not on Bergamo. Until we can confirm the actual CPUID leaf
1649  *		values that we receive in the cases of Bergamo and others, we
1650  *		opt instead to use the same SCFCTP scheme as Zen 3.
1651  */
1652 static boolean_t
amdzen_determine_apic_decomp(amdzen_t * azn)1653 amdzen_determine_apic_decomp(amdzen_t *azn)
1654 {
1655 	amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp;
1656 	boolean_t smt = is_x86_feature(x86_featureset, X86FSET_HTT);
1657 
1658 	switch (uarchrev_uarch(azn->azn_uarchrev)) {
1659 	case X86_UARCH_AMD_ZEN1:
1660 	case X86_UARCH_AMD_ZENPLUS:
1661 		apic->aad_sock_mask = 0x40;
1662 		apic->aad_sock_shift = 6;
1663 		apic->aad_die_mask = 0x30;
1664 		apic->aad_die_shift = 4;
1665 		apic->aad_ccd_mask = 0;
1666 		apic->aad_ccd_shift = 0;
1667 		apic->aad_ccx_mask = 0x08;
1668 		apic->aad_ccx_shift = 3;
1669 
1670 		if (smt) {
1671 			apic->aad_core_mask = 0x06;
1672 			apic->aad_core_shift = 1;
1673 			apic->aad_thread_mask = 0x1;
1674 			apic->aad_thread_shift = 0;
1675 		} else {
1676 			apic->aad_core_mask = 0x03;
1677 			apic->aad_core_shift = 0;
1678 			apic->aad_thread_mask = 0;
1679 			apic->aad_thread_shift = 0;
1680 		}
1681 		break;
1682 	case X86_UARCH_AMD_ZEN2:
1683 		if (smt) {
1684 			apic->aad_sock_mask = 0x80;
1685 			apic->aad_sock_shift = 7;
1686 			apic->aad_die_mask = 0;
1687 			apic->aad_die_shift = 0;
1688 			apic->aad_ccd_mask = 0x70;
1689 			apic->aad_ccd_shift = 4;
1690 			apic->aad_ccx_mask = 0x08;
1691 			apic->aad_ccx_shift = 3;
1692 			apic->aad_core_mask = 0x06;
1693 			apic->aad_core_shift = 1;
1694 			apic->aad_thread_mask = 0x01;
1695 			apic->aad_thread_shift = 0;
1696 		} else {
1697 			apic->aad_sock_mask = 0x40;
1698 			apic->aad_sock_shift = 6;
1699 			apic->aad_die_mask = 0;
1700 			apic->aad_die_shift = 0;
1701 			apic->aad_ccd_mask = 0x38;
1702 			apic->aad_ccd_shift = 3;
1703 			apic->aad_ccx_mask = 0x04;
1704 			apic->aad_ccx_shift = 2;
1705 			apic->aad_core_mask = 0x3;
1706 			apic->aad_core_shift = 0;
1707 			apic->aad_thread_mask = 0;
1708 			apic->aad_thread_shift = 0;
1709 		}
1710 		break;
1711 	case X86_UARCH_AMD_ZEN3:
1712 	case X86_UARCH_AMD_ZEN4:
1713 	case X86_UARCH_AMD_ZEN5:
1714 		return (amdzen_determine_apic_decomp_initpkg(azn));
1715 	default:
1716 		return (B_FALSE);
1717 	}
1718 	return (B_TRUE);
1719 }
1720 
1721 /*
1722  * Snapshot the number of cores that can exist in a CCX based on the Zen
1723  * microarchitecture revision. In Zen 1-4 this has been a constant number
1724  * regardless of the actual CPU Family. In Zen 5 this varies based upon whether
1725  * or not dense dies are being used.
1726  */
1727 static void
amdzen_determine_ncore_per_ccx(amdzen_t * azn)1728 amdzen_determine_ncore_per_ccx(amdzen_t *azn)
1729 {
1730 	switch (uarchrev_uarch(azn->azn_uarchrev)) {
1731 	case X86_UARCH_AMD_ZEN1:
1732 	case X86_UARCH_AMD_ZENPLUS:
1733 	case X86_UARCH_AMD_ZEN2:
1734 		azn->azn_ncore_per_ccx = 4;
1735 		break;
1736 	case X86_UARCH_AMD_ZEN3:
1737 	case X86_UARCH_AMD_ZEN4:
1738 		azn->azn_ncore_per_ccx = 8;
1739 		break;
1740 	case X86_UARCH_AMD_ZEN5:
1741 		if (chiprev_family(azn->azn_chiprev) ==
1742 		    X86_PF_AMD_DENSE_TURIN) {
1743 			azn->azn_ncore_per_ccx = 16;
1744 		} else {
1745 			azn->azn_ncore_per_ccx = 8;
1746 		}
1747 		break;
1748 	default:
1749 		panic("asked about non-Zen or unknown uarch");
1750 	}
1751 }
1752 
1753 /*
1754  * Attempt to determine a logical CCD number of a given CCD where we don't have
1755  * hardware support for L3::SCFCTP::PMREG_INITPKG* (e.g. pre-Zen 3 systems).
1756  * The CCD numbers that we have are the in the physical space. Likely because of
1757  * how the orientation of CCM numbers map to physical locations and the layout
1758  * of them within the package, we haven't found a good way using the core DFv3
1759  * registers to determine if a given CCD is actually present or not as generally
1760  * all the CCMs are left enabled. Instead we use SMU::PWR::DIE_ID as a proxy to
1761  * determine CCD presence.
1762  */
1763 static uint32_t
amdzen_ccd_log_id_zen2(amdzen_t * azn,amdzen_df_t * df,const amdzen_df_ent_t * targ)1764 amdzen_ccd_log_id_zen2(amdzen_t *azn, amdzen_df_t *df,
1765     const amdzen_df_ent_t *targ)
1766 {
1767 	uint32_t smnid = 0;
1768 	uint32_t logid = 0;
1769 
1770 	for (uint_t i = 0; i < df->adf_nents; i++) {
1771 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
1772 
1773 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) {
1774 			continue;
1775 		}
1776 
1777 		if (ent->adfe_inst_id == targ->adfe_inst_id) {
1778 			return (logid);
1779 		}
1780 
1781 		if (ent->adfe_type == targ->adfe_type &&
1782 		    ent->adfe_subtype == targ->adfe_subtype) {
1783 			boolean_t present = amdzen_ccd_present(azn, df, smnid);
1784 			smnid++;
1785 			if (present) {
1786 				logid++;
1787 			}
1788 		}
1789 	}
1790 
1791 	panic("asked to match against invalid DF entity %p in df %p", targ, df);
1792 }
1793 
1794 static void
amdzen_ccd_fill_core_initpkg0(amdzen_t * azn,amdzen_df_t * df,amdzen_topo_ccd_t * ccd,amdzen_topo_ccx_t * ccx,amdzen_topo_core_t * core,boolean_t * ccd_set,boolean_t * ccx_set)1795 amdzen_ccd_fill_core_initpkg0(amdzen_t *azn, amdzen_df_t *df,
1796     amdzen_topo_ccd_t *ccd, amdzen_topo_ccx_t *ccx, amdzen_topo_core_t *core,
1797     boolean_t *ccd_set, boolean_t *ccx_set)
1798 {
1799 	smn_reg_t pkg0_reg;
1800 	uint32_t pkg0;
1801 
1802 	pkg0_reg = SCFCTP_PMREG_INITPKG0(ccd->atccd_phys_no, ccx->atccx_phys_no,
1803 	    core->atcore_phys_no);
1804 	pkg0 = amdzen_smn_read(azn, df, pkg0_reg);
1805 	core->atcore_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CORE(pkg0);
1806 
1807 	if (!*ccx_set) {
1808 		ccx->atccx_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CCX(pkg0);
1809 		*ccx_set = B_TRUE;
1810 	}
1811 
1812 	if (!*ccd_set) {
1813 		ccd->atccd_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_DIE(pkg0);
1814 		*ccd_set = B_TRUE;
1815 	}
1816 }
1817 
1818 /*
1819  * Attempt to fill in the physical topology information for this given CCD.
1820  * There are a few steps to this that we undertake to perform this as follows:
1821  *
1822  * 1) First we determine whether the CCD is actually present or not by reading
1823  * SMU::PWR::DIE_ID. CCDs that are not installed will still have an enabled DF
1824  * entry it appears, but the request for the die ID will returns an invalid
1825  * read (all 1s). This die ID should match what we think of as the SMN number
1826  * below. If not, we're in trouble and the rest of this is in question.
1827  *
1828  * 2) We use the SMU::PWR registers to determine how many logical and physical
1829  * cores are present in this CCD and how they are split amongst the CCX. Here we
1830  * need to encode the CPU to CCX core size rankings. Through this process we
1831  * determine and fill out which threads and cores are enabled.
1832  *
1833  * 3) In Zen 3+ we then will read each core's INITPK0 values to ensure that we
1834  * have a proper physical to logical mapping, at which point we can fill in the
1835  * APIC IDs. For Zen 2, we will set the AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN to
1836  * indicate that we just mapped the first logical processor to the first enabled
1837  * core.
1838  *
1839  * 4) Once we have the logical IDs determined we will construct the APIC ID that
1840  * we expect this to have.
1841  *
1842  * Steps (2) - (4) are intertwined and done together.
1843  */
1844 static void
amdzen_ccd_fill_topo(amdzen_t * azn,amdzen_df_t * df,amdzen_df_ent_t * ent,amdzen_topo_ccd_t * ccd)1845 amdzen_ccd_fill_topo(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *ent,
1846     amdzen_topo_ccd_t *ccd)
1847 {
1848 	uint32_t nccx, core_en, thread_en;
1849 	uint32_t nlcore_per_ccx, nthreads_per_core;
1850 	uint32_t sockid, dieid, compid;
1851 	const uint32_t ccdno = ccd->atccd_phys_no;
1852 	const x86_uarch_t uarch = uarchrev_uarch(azn->azn_uarchrev);
1853 	boolean_t pkg0_ids, logccd_set = B_FALSE;
1854 
1855 	ASSERT(MUTEX_HELD(&azn->azn_mutex));
1856 	if (!amdzen_ccd_present(azn, df, ccdno)) {
1857 		ccd->atccd_err = AMDZEN_TOPO_CCD_E_CCD_MISSING;
1858 		return;
1859 	}
1860 
1861 	amdzen_ccd_info(azn, df, ccdno, &nccx, &nlcore_per_ccx,
1862 	    &nthreads_per_core);
1863 	ASSERT3U(nccx, <=, AMDZEN_TOPO_CCD_MAX_CCX);
1864 
1865 	core_en = amdzen_ccd_core_en(azn, df, ccdno);
1866 	thread_en = amdzen_ccd_thread_en(azn, df, ccdno);
1867 
1868 	/*
1869 	 * The BSP is never enabled in a conventional sense and therefore the
1870 	 * bit is reserved and left as 0. As the BSP should be in the first CCD,
1871 	 * we go through and OR back in the bit lest we think the thread isn't
1872 	 * enabled.
1873 	 */
1874 	if (ccdno == 0) {
1875 		thread_en |= 1;
1876 	}
1877 
1878 	ccd->atccd_phys_no = ccdno;
1879 	if (uarch >= X86_UARCH_AMD_ZEN3) {
1880 		pkg0_ids = B_TRUE;
1881 	} else {
1882 		ccd->atccd_flags |= AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN;
1883 		pkg0_ids = B_FALSE;
1884 
1885 		/*
1886 		 * Determine the CCD logical ID for Zen 2 now since this doesn't
1887 		 * rely upon needing a valid physical core.
1888 		 */
1889 		ccd->atccd_log_no = amdzen_ccd_log_id_zen2(azn, df, ent);
1890 		logccd_set = B_TRUE;
1891 	}
1892 
1893 	/*
1894 	 * To construct the APIC ID we need to know the socket and die (not CCD)
1895 	 * this is on. We deconstruct the CCD's fabric ID to determine that.
1896 	 */
1897 	zen_fabric_id_decompose(&df->adf_decomp, ent->adfe_fabric_id, &sockid,
1898 	    &dieid, &compid);
1899 
1900 	/*
1901 	 * At this point we have all the information about the CCD, the number
1902 	 * of CCX instances, and which physical cores and threads are enabled.
1903 	 * Currently we assume that if we have one CCX enabled, then it is
1904 	 * always CCX0. We cannot find evidence of a two CCX supporting part
1905 	 * that doesn't always ship with both CCXs present and enabled.
1906 	 */
1907 	ccd->atccd_nlog_ccx = ccd->atccd_nphys_ccx = nccx;
1908 	for (uint32_t ccxno = 0; ccxno < nccx; ccxno++) {
1909 		amdzen_topo_ccx_t *ccx = &ccd->atccd_ccx[ccxno];
1910 		const uint32_t core_mask = (1 << azn->azn_ncore_per_ccx) - 1;
1911 		const uint32_t core_shift = ccxno * azn->azn_ncore_per_ccx;
1912 		const uint32_t ccx_core_en = (core_en >> core_shift) &
1913 		    core_mask;
1914 		boolean_t logccx_set = B_FALSE;
1915 
1916 		ccd->atccd_ccx_en[ccxno] = 1;
1917 		ccx->atccx_phys_no = ccxno;
1918 		ccx->atccx_nphys_cores = azn->azn_ncore_per_ccx;
1919 		ccx->atccx_nlog_cores = nlcore_per_ccx;
1920 
1921 		if (!pkg0_ids) {
1922 			ccx->atccx_log_no = ccx->atccx_phys_no;
1923 			logccx_set = B_TRUE;
1924 		}
1925 
1926 		for (uint32_t coreno = 0, logcorezen2 = 0;
1927 		    coreno < azn->azn_ncore_per_ccx; coreno++) {
1928 			amdzen_topo_core_t *core = &ccx->atccx_cores[coreno];
1929 
1930 			if ((ccx_core_en & (1 << coreno)) == 0) {
1931 				continue;
1932 			}
1933 
1934 			ccx->atccx_core_en[coreno] = 1;
1935 			core->atcore_phys_no = coreno;
1936 
1937 			/*
1938 			 * Now that we have the physical core number present, we
1939 			 * must determine the logical core number and fill out
1940 			 * the logical CCX/CCD if it has not been set. We must
1941 			 * do this before we attempt to look at which threads
1942 			 * are enabled, because that operates based upon logical
1943 			 * core number.
1944 			 *
1945 			 * For Zen 2 we do not have INITPKG0 at our disposal. We
1946 			 * currently assume (and tag for userland with the
1947 			 * AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN flag) that we are
1948 			 * mapping logical cores to physicals in the order of
1949 			 * appearance.
1950 			 */
1951 			if (pkg0_ids) {
1952 				amdzen_ccd_fill_core_initpkg0(azn, df, ccd, ccx,
1953 				    core, &logccd_set, &logccx_set);
1954 			} else {
1955 				core->atcore_log_no = logcorezen2;
1956 				logcorezen2++;
1957 			}
1958 
1959 			/*
1960 			 * Determining which bits to use for the thread is a bit
1961 			 * weird here. Thread IDs within a CCX are logical, but
1962 			 * there are always physically spaced CCX sizes. See the
1963 			 * comment at the definition for SMU::PWR::THREAD_ENABLE
1964 			 * for more information.
1965 			 */
1966 			const uint32_t thread_shift = (ccx->atccx_nphys_cores *
1967 			    ccx->atccx_log_no + core->atcore_log_no) *
1968 			    nthreads_per_core;
1969 			const uint32_t thread_mask = (nthreads_per_core << 1) -
1970 			    1;
1971 			const uint32_t core_thread_en = (thread_en >>
1972 			    thread_shift) & thread_mask;
1973 			core->atcore_nthreads = nthreads_per_core;
1974 			core->atcore_thr_en[0] = core_thread_en & 0x01;
1975 			core->atcore_thr_en[1] = core_thread_en & 0x02;
1976 #ifdef	DEBUG
1977 			if (nthreads_per_core == 1) {
1978 				VERIFY0(core->atcore_thr_en[1]);
1979 			}
1980 #endif
1981 			for (uint32_t thrno = 0; thrno < core->atcore_nthreads;
1982 			    thrno++) {
1983 				ASSERT3U(core->atcore_thr_en[thrno], !=, 0);
1984 
1985 				zen_apic_id_compose(&azn->azn_apic_decomp,
1986 				    sockid, dieid, ccd->atccd_log_no,
1987 				    ccx->atccx_log_no, core->atcore_log_no,
1988 				    thrno, &core->atcore_apicids[thrno]);
1989 
1990 			}
1991 		}
1992 
1993 		ASSERT3U(logccx_set, ==, B_TRUE);
1994 		ASSERT3U(logccd_set, ==, B_TRUE);
1995 	}
1996 }
1997 
1998 static void
amdzen_nexus_init(void * arg)1999 amdzen_nexus_init(void *arg)
2000 {
2001 	uint_t i;
2002 	amdzen_t *azn = arg;
2003 
2004 	/*
2005 	 * Assign the requisite identifying information for this CPU.
2006 	 */
2007 	azn->azn_uarchrev = cpuid_getuarchrev(CPU);
2008 	azn->azn_chiprev = cpuid_getchiprev(CPU);
2009 
2010 	/*
2011 	 * Go through all of the stubs and assign the DF entries.
2012 	 */
2013 	mutex_enter(&azn->azn_mutex);
2014 	if (!amdzen_map_dfs(azn) || !amdzen_check_dfs(azn)) {
2015 		azn->azn_flags |= AMDZEN_F_MAP_ERROR;
2016 		goto done;
2017 	}
2018 
2019 	for (i = 0; i < AMDZEN_MAX_DFS; i++) {
2020 		amdzen_df_t *df = &azn->azn_dfs[i];
2021 
2022 		if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0)
2023 			continue;
2024 		amdzen_setup_df(azn, df);
2025 		amdzen_find_nb(azn, df);
2026 	}
2027 
2028 	amdzen_determine_ncore_per_ccx(azn);
2029 
2030 	if (amdzen_determine_apic_decomp(azn)) {
2031 		azn->azn_flags |= AMDZEN_F_APIC_DECOMP_VALID;
2032 	}
2033 
2034 	/*
2035 	 * Not all children may be installed. As such, we do not treat the
2036 	 * failure of a child as fatal to the driver.
2037 	 */
2038 	mutex_exit(&azn->azn_mutex);
2039 	for (i = 0; i < ARRAY_SIZE(amdzen_children); i++) {
2040 		(void) amdzen_create_child(azn, &amdzen_children[i]);
2041 	}
2042 	mutex_enter(&azn->azn_mutex);
2043 
2044 done:
2045 	azn->azn_flags &= ~AMDZEN_F_ATTACH_DISPATCHED;
2046 	azn->azn_flags |= AMDZEN_F_ATTACH_COMPLETE;
2047 	azn->azn_taskqid = TASKQID_INVALID;
2048 	cv_broadcast(&azn->azn_cv);
2049 	mutex_exit(&azn->azn_mutex);
2050 }
2051 
2052 static int
amdzen_stub_scan_cb(dev_info_t * dip,void * arg)2053 amdzen_stub_scan_cb(dev_info_t *dip, void *arg)
2054 {
2055 	amdzen_t *azn = arg;
2056 	uint16_t vid, did;
2057 	int *regs;
2058 	uint_t nregs, i;
2059 	boolean_t match = B_FALSE;
2060 
2061 	if (dip == ddi_root_node()) {
2062 		return (DDI_WALK_CONTINUE);
2063 	}
2064 
2065 	/*
2066 	 * If a node in question is not a pci node, then we have no interest in
2067 	 * it as all the stubs that we care about are related to pci devices.
2068 	 */
2069 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
2070 		return (DDI_WALK_PRUNECHILD);
2071 	}
2072 
2073 	/*
2074 	 * If we can't get a device or vendor ID and prove that this is an AMD
2075 	 * part, then we don't care about it.
2076 	 */
2077 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2078 	    "vendor-id", PCI_EINVAL16);
2079 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2080 	    "device-id", PCI_EINVAL16);
2081 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
2082 		return (DDI_WALK_CONTINUE);
2083 	}
2084 
2085 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
2086 		return (DDI_WALK_CONTINUE);
2087 	}
2088 
2089 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
2090 		if (amdzen_nb_ids[i] == did) {
2091 			match = B_TRUE;
2092 		}
2093 	}
2094 
2095 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2096 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
2097 		return (DDI_WALK_CONTINUE);
2098 	}
2099 
2100 	if (nregs == 0) {
2101 		ddi_prop_free(regs);
2102 		return (DDI_WALK_CONTINUE);
2103 	}
2104 
2105 	if (PCI_REG_BUS_G(regs[0]) == AMDZEN_DF_BUSNO &&
2106 	    PCI_REG_DEV_G(regs[0]) >= AMDZEN_DF_FIRST_DEVICE) {
2107 		match = B_TRUE;
2108 	}
2109 
2110 	ddi_prop_free(regs);
2111 	if (match) {
2112 		mutex_enter(&azn->azn_mutex);
2113 		azn->azn_nscanned++;
2114 		mutex_exit(&azn->azn_mutex);
2115 	}
2116 
2117 	return (DDI_WALK_CONTINUE);
2118 }
2119 
2120 static void
amdzen_stub_scan(void * arg)2121 amdzen_stub_scan(void *arg)
2122 {
2123 	amdzen_t *azn = arg;
2124 
2125 	mutex_enter(&azn->azn_mutex);
2126 	azn->azn_nscanned = 0;
2127 	mutex_exit(&azn->azn_mutex);
2128 
2129 	ddi_walk_devs(ddi_root_node(), amdzen_stub_scan_cb, azn);
2130 
2131 	mutex_enter(&azn->azn_mutex);
2132 	azn->azn_flags &= ~AMDZEN_F_SCAN_DISPATCHED;
2133 	azn->azn_flags |= AMDZEN_F_SCAN_COMPLETE;
2134 
2135 	if (azn->azn_nscanned == 0) {
2136 		azn->azn_flags |= AMDZEN_F_UNSUPPORTED;
2137 		azn->azn_taskqid = TASKQID_INVALID;
2138 		cv_broadcast(&azn->azn_cv);
2139 	} else if (azn->azn_npresent == azn->azn_nscanned) {
2140 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
2141 		azn->azn_taskqid = taskq_dispatch(system_taskq,
2142 		    amdzen_nexus_init, azn, TQ_SLEEP);
2143 	}
2144 	mutex_exit(&azn->azn_mutex);
2145 }
2146 
2147 /*
2148  * Unfortunately we can't really let the stubs detach as we may need them to be
2149  * available for client operations. We may be able to improve this if we know
2150  * that the actual nexus is going away. However, as long as it's active, we need
2151  * all the stubs.
2152  */
2153 int
amdzen_detach_stub(dev_info_t * dip,ddi_detach_cmd_t cmd)2154 amdzen_detach_stub(dev_info_t *dip, ddi_detach_cmd_t cmd)
2155 {
2156 	if (cmd == DDI_SUSPEND) {
2157 		return (DDI_SUCCESS);
2158 	}
2159 
2160 	return (DDI_FAILURE);
2161 }
2162 
2163 int
amdzen_attach_stub(dev_info_t * dip,ddi_attach_cmd_t cmd)2164 amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd)
2165 {
2166 	int *regs, reg;
2167 	uint_t nregs, i;
2168 	uint16_t vid, did;
2169 	amdzen_stub_t *stub;
2170 	amdzen_t *azn = amdzen_data;
2171 	boolean_t valid = B_FALSE;
2172 	boolean_t nb = B_FALSE;
2173 
2174 	if (cmd == DDI_RESUME) {
2175 		return (DDI_SUCCESS);
2176 	} else if (cmd != DDI_ATTACH) {
2177 		return (DDI_FAILURE);
2178 	}
2179 
2180 	/*
2181 	 * Make sure that the stub that we've been asked to attach is a pci type
2182 	 * device. If not, then there is no reason for us to proceed.
2183 	 */
2184 	if (strncmp("pci", ddi_get_name(dip), 3) != 0) {
2185 		dev_err(dip, CE_WARN, "asked to attach a bad AMD Zen nexus "
2186 		    "stub: %s", ddi_get_name(dip));
2187 		return (DDI_FAILURE);
2188 	}
2189 	vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2190 	    "vendor-id", PCI_EINVAL16);
2191 	did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2192 	    "device-id", PCI_EINVAL16);
2193 	if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) {
2194 		dev_err(dip, CE_WARN, "failed to get PCI ID properties");
2195 		return (DDI_FAILURE);
2196 	}
2197 
2198 	if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) {
2199 		dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x",
2200 		    cpuid_getvendor(CPU) == X86_VENDOR_HYGON ?
2201 		    AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid);
2202 		return (DDI_FAILURE);
2203 	}
2204 
2205 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2206 	    "reg", &regs, &nregs) != DDI_PROP_SUCCESS) {
2207 		dev_err(dip, CE_WARN, "failed to get 'reg' property");
2208 		return (DDI_FAILURE);
2209 	}
2210 
2211 	if (nregs == 0) {
2212 		ddi_prop_free(regs);
2213 		dev_err(dip, CE_WARN, "missing 'reg' property values");
2214 		return (DDI_FAILURE);
2215 	}
2216 	reg = *regs;
2217 	ddi_prop_free(regs);
2218 
2219 	for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) {
2220 		if (amdzen_nb_ids[i] == did) {
2221 			valid = B_TRUE;
2222 			nb = B_TRUE;
2223 		}
2224 	}
2225 
2226 	if (!valid && PCI_REG_BUS_G(reg) == AMDZEN_DF_BUSNO &&
2227 	    PCI_REG_DEV_G(reg) >= AMDZEN_DF_FIRST_DEVICE) {
2228 		valid = B_TRUE;
2229 		nb = B_FALSE;
2230 	}
2231 
2232 	if (!valid) {
2233 		dev_err(dip, CE_WARN, "device %s didn't match the nexus list",
2234 		    ddi_get_name(dip));
2235 		return (DDI_FAILURE);
2236 	}
2237 
2238 	stub = kmem_alloc(sizeof (amdzen_stub_t), KM_SLEEP);
2239 	if (pci_config_setup(dip, &stub->azns_cfgspace) != DDI_SUCCESS) {
2240 		dev_err(dip, CE_WARN, "failed to set up config space");
2241 		kmem_free(stub, sizeof (amdzen_stub_t));
2242 		return (DDI_FAILURE);
2243 	}
2244 
2245 	stub->azns_dip = dip;
2246 	stub->azns_vid = vid;
2247 	stub->azns_did = did;
2248 	stub->azns_bus = PCI_REG_BUS_G(reg);
2249 	stub->azns_dev = PCI_REG_DEV_G(reg);
2250 	stub->azns_func = PCI_REG_FUNC_G(reg);
2251 	ddi_set_driver_private(dip, stub);
2252 
2253 	mutex_enter(&azn->azn_mutex);
2254 	azn->azn_npresent++;
2255 	if (nb) {
2256 		list_insert_tail(&azn->azn_nb_stubs, stub);
2257 	} else {
2258 		list_insert_tail(&azn->azn_df_stubs, stub);
2259 	}
2260 
2261 	if ((azn->azn_flags & AMDZEN_F_TASKQ_MASK) == AMDZEN_F_SCAN_COMPLETE &&
2262 	    azn->azn_nscanned == azn->azn_npresent) {
2263 		azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED;
2264 		azn->azn_taskqid = taskq_dispatch(system_taskq,
2265 		    amdzen_nexus_init, azn, TQ_SLEEP);
2266 	}
2267 	mutex_exit(&azn->azn_mutex);
2268 
2269 	return (DDI_SUCCESS);
2270 }
2271 
2272 static int
amdzen_bus_ctl(dev_info_t * dip,dev_info_t * rdip,ddi_ctl_enum_t ctlop,void * arg,void * result)2273 amdzen_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
2274     void *arg, void *result)
2275 {
2276 	char buf[32];
2277 	dev_info_t *child;
2278 	const amdzen_child_data_t *acd;
2279 
2280 	switch (ctlop) {
2281 	case DDI_CTLOPS_REPORTDEV:
2282 		if (rdip == NULL) {
2283 			return (DDI_FAILURE);
2284 		}
2285 		cmn_err(CE_CONT, "amdzen nexus: %s@%s, %s%d\n",
2286 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
2287 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
2288 		break;
2289 	case DDI_CTLOPS_INITCHILD:
2290 		child = arg;
2291 		if (child == NULL) {
2292 			dev_err(dip, CE_WARN, "!no child passed for "
2293 			    "DDI_CTLOPS_INITCHILD");
2294 		}
2295 
2296 		acd = ddi_get_parent_data(child);
2297 		if (acd == NULL) {
2298 			dev_err(dip, CE_WARN, "!missing child parent data");
2299 			return (DDI_FAILURE);
2300 		}
2301 
2302 		if (snprintf(buf, sizeof (buf), "%d", acd->acd_addr) >=
2303 		    sizeof (buf)) {
2304 			dev_err(dip, CE_WARN, "!failed to construct device "
2305 			    "addr due to overflow");
2306 			return (DDI_FAILURE);
2307 		}
2308 
2309 		ddi_set_name_addr(child, buf);
2310 		break;
2311 	case DDI_CTLOPS_UNINITCHILD:
2312 		child = arg;
2313 		if (child == NULL) {
2314 			dev_err(dip, CE_WARN, "!no child passed for "
2315 			    "DDI_CTLOPS_UNINITCHILD");
2316 		}
2317 
2318 		ddi_set_name_addr(child, NULL);
2319 		break;
2320 	default:
2321 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
2322 	}
2323 	return (DDI_SUCCESS);
2324 }
2325 
2326 static int
amdzen_topo_open(dev_t * devp,int flag,int otyp,cred_t * credp)2327 amdzen_topo_open(dev_t *devp, int flag, int otyp, cred_t *credp)
2328 {
2329 	minor_t m;
2330 	amdzen_t *azn = amdzen_data;
2331 
2332 	if (crgetzoneid(credp) != GLOBAL_ZONEID ||
2333 	    secpolicy_sys_config(credp, B_FALSE) != 0) {
2334 		return (EPERM);
2335 	}
2336 
2337 	if ((flag & (FEXCL | FNDELAY | FNONBLOCK)) != 0) {
2338 		return (EINVAL);
2339 	}
2340 
2341 	if (otyp != OTYP_CHR) {
2342 		return (EINVAL);
2343 	}
2344 
2345 	m = getminor(*devp);
2346 	if (m != AMDZEN_MINOR_TOPO) {
2347 		return (ENXIO);
2348 	}
2349 
2350 	mutex_enter(&azn->azn_mutex);
2351 	if ((azn->azn_flags & AMDZEN_F_IOCTL_MASK) !=
2352 	    AMDZEN_F_ATTACH_COMPLETE) {
2353 		mutex_exit(&azn->azn_mutex);
2354 		return (ENOTSUP);
2355 	}
2356 	mutex_exit(&azn->azn_mutex);
2357 
2358 	return (0);
2359 }
2360 
2361 static int
amdzen_topo_ioctl_base(amdzen_t * azn,intptr_t arg,int mode)2362 amdzen_topo_ioctl_base(amdzen_t *azn, intptr_t arg, int mode)
2363 {
2364 	amdzen_topo_base_t base;
2365 
2366 	bzero(&base, sizeof (base));
2367 	mutex_enter(&azn->azn_mutex);
2368 	base.atb_ndf = azn->azn_ndfs;
2369 
2370 	if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) {
2371 		mutex_exit(&azn->azn_mutex);
2372 		return (ENOTSUP);
2373 	}
2374 
2375 	base.atb_apic_decomp = azn->azn_apic_decomp;
2376 	for (uint_t i = 0; i < azn->azn_ndfs; i++) {
2377 		const amdzen_df_t *df = &azn->azn_dfs[i];
2378 
2379 		base.atb_maxdfent = MAX(base.atb_maxdfent, df->adf_nents);
2380 		if (i == 0) {
2381 			base.atb_rev = df->adf_rev;
2382 			base.atb_df_decomp = df->adf_decomp;
2383 		}
2384 	}
2385 	mutex_exit(&azn->azn_mutex);
2386 
2387 	if (ddi_copyout(&base, (void *)(uintptr_t)arg, sizeof (base),
2388 	    mode & FKIOCTL) != 0) {
2389 		return (EFAULT);
2390 	}
2391 
2392 	return (0);
2393 }
2394 
2395 /*
2396  * Fill in the peers. We only have this information prior to DF 4D2.  The way we
2397  * do is this is to just fill in all the entries and then zero out the ones that
2398  * aren't valid.
2399  */
2400 static void
amdzen_topo_ioctl_df_fill_peers(const amdzen_df_t * df,const amdzen_df_ent_t * ent,amdzen_topo_df_ent_t * topo_ent)2401 amdzen_topo_ioctl_df_fill_peers(const amdzen_df_t *df,
2402     const amdzen_df_ent_t *ent, amdzen_topo_df_ent_t *topo_ent)
2403 {
2404 	topo_ent->atde_npeers = DF_FBIINFO0_GET_FTI_PCNT(ent->adfe_info0);
2405 
2406 	if (df->adf_rev >= DF_REV_4D2) {
2407 		bzero(topo_ent->atde_peers, sizeof (topo_ent->atde_npeers));
2408 		return;
2409 	}
2410 
2411 	topo_ent->atde_peers[0] = DF_FBINFO1_GET_FTI0_NINSTID(ent->adfe_info1);
2412 	topo_ent->atde_peers[1] = DF_FBINFO1_GET_FTI1_NINSTID(ent->adfe_info1);
2413 	topo_ent->atde_peers[2] = DF_FBINFO1_GET_FTI2_NINSTID(ent->adfe_info1);
2414 	topo_ent->atde_peers[3] = DF_FBINFO1_GET_FTI3_NINSTID(ent->adfe_info1);
2415 	topo_ent->atde_peers[4] = DF_FBINFO2_GET_FTI4_NINSTID(ent->adfe_info2);
2416 	topo_ent->atde_peers[5] = DF_FBINFO2_GET_FTI5_NINSTID(ent->adfe_info2);
2417 
2418 	for (uint32_t i = topo_ent->atde_npeers; i < AMDZEN_TOPO_DF_MAX_PEERS;
2419 	    i++) {
2420 		topo_ent->atde_peers[i] = 0;
2421 	}
2422 }
2423 
2424 static void
amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t * ent,amdzen_topo_df_ent_t * topo_ent)2425 amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t *ent,
2426     amdzen_topo_df_ent_t *topo_ent)
2427 {
2428 	const amdzen_ccm_data_t *ccm = &ent->adfe_data.aded_ccm;
2429 	amdzen_topo_ccm_data_t *topo_ccm = &topo_ent->atde_data.atded_ccm;
2430 
2431 	topo_ccm->atcd_nccds = ccm->acd_nccds;
2432 	for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) {
2433 		topo_ccm->atcd_ccd_en[i] = ccm->acd_ccd_en[i];
2434 		topo_ccm->atcd_ccd_ids[i] = ccm->acd_ccd_id[i];
2435 	}
2436 }
2437 
2438 static int
amdzen_topo_ioctl_df(amdzen_t * azn,intptr_t arg,int mode)2439 amdzen_topo_ioctl_df(amdzen_t *azn, intptr_t arg, int mode)
2440 {
2441 	uint_t model;
2442 	uint32_t max_ents, nwritten;
2443 	const amdzen_df_t *df;
2444 	amdzen_topo_df_t topo_df;
2445 #ifdef	_MULTI_DATAMODEL
2446 	amdzen_topo_df32_t topo_df32;
2447 #endif
2448 
2449 	model = ddi_model_convert_from(mode);
2450 	switch (model) {
2451 #ifdef	_MULTI_DATAMODEL
2452 	case DDI_MODEL_ILP32:
2453 		if (ddi_copyin((void *)(uintptr_t)arg, &topo_df32,
2454 		    sizeof (topo_df32), mode & FKIOCTL) != 0) {
2455 			return (EFAULT);
2456 		}
2457 		bzero(&topo_df, sizeof (topo_df));
2458 		topo_df.atd_dfno = topo_df32.atd_dfno;
2459 		topo_df.atd_df_buf_nents = topo_df32.atd_df_buf_nents;
2460 		topo_df.atd_df_ents = (void *)(uintptr_t)topo_df32.atd_df_ents;
2461 		break;
2462 #endif
2463 	case DDI_MODEL_NONE:
2464 		if (ddi_copyin((void *)(uintptr_t)arg, &topo_df,
2465 		    sizeof (topo_df), mode & FKIOCTL) != 0) {
2466 			return (EFAULT);
2467 		}
2468 		break;
2469 	default:
2470 		return (ENOTSUP);
2471 	}
2472 
2473 	mutex_enter(&azn->azn_mutex);
2474 	if (topo_df.atd_dfno >= azn->azn_ndfs) {
2475 		mutex_exit(&azn->azn_mutex);
2476 		return (EINVAL);
2477 	}
2478 
2479 	df = &azn->azn_dfs[topo_df.atd_dfno];
2480 	topo_df.atd_nodeid = df->adf_nodeid;
2481 	topo_df.atd_sockid = (df->adf_nodeid & df->adf_decomp.dfd_sock_mask) >>
2482 	    df->adf_decomp.dfd_sock_shift;
2483 	topo_df.atd_dieid = (df->adf_nodeid & df->adf_decomp.dfd_die_mask) >>
2484 	    df->adf_decomp.dfd_die_shift;
2485 	topo_df.atd_rev = df->adf_rev;
2486 	topo_df.atd_major = df->adf_major;
2487 	topo_df.atd_minor = df->adf_minor;
2488 	topo_df.atd_df_act_nents = df->adf_nents;
2489 	max_ents = MIN(topo_df.atd_df_buf_nents, df->adf_nents);
2490 
2491 	if (topo_df.atd_df_ents == NULL) {
2492 		topo_df.atd_df_buf_nvalid = 0;
2493 		mutex_exit(&azn->azn_mutex);
2494 		goto copyout;
2495 	}
2496 
2497 	nwritten = 0;
2498 	for (uint32_t i = 0; i < max_ents; i++) {
2499 		amdzen_topo_df_ent_t topo_ent;
2500 		const amdzen_df_ent_t *ent = &df->adf_ents[i];
2501 
2502 		/*
2503 		 * We opt not to include disabled elements right now. They
2504 		 * generally don't have a valid type and there isn't much useful
2505 		 * information we can get from them. This can be changed if we
2506 		 * find a use case for them for userland topo.
2507 		 */
2508 		if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0)
2509 			continue;
2510 
2511 		bzero(&topo_ent, sizeof (topo_ent));
2512 		topo_ent.atde_type = ent->adfe_type;
2513 		topo_ent.atde_subtype = ent->adfe_subtype;
2514 		topo_ent.atde_fabric_id = ent->adfe_fabric_id;
2515 		topo_ent.atde_inst_id = ent->adfe_inst_id;
2516 		amdzen_topo_ioctl_df_fill_peers(df, ent, &topo_ent);
2517 
2518 		if (amdzen_dfe_is_ccm(df, ent)) {
2519 			amdzen_topo_ioctl_df_fill_ccm(ent, &topo_ent);
2520 		}
2521 
2522 		if (ddi_copyout(&topo_ent, &topo_df.atd_df_ents[nwritten],
2523 		    sizeof (topo_ent), mode & FKIOCTL) != 0) {
2524 			mutex_exit(&azn->azn_mutex);
2525 			return (EFAULT);
2526 		}
2527 		nwritten++;
2528 	}
2529 	mutex_exit(&azn->azn_mutex);
2530 
2531 	topo_df.atd_df_buf_nvalid = nwritten;
2532 copyout:
2533 	switch (model) {
2534 #ifdef	_MULTI_DATAMODEL
2535 	case DDI_MODEL_ILP32:
2536 		topo_df32.atd_nodeid = topo_df.atd_nodeid;
2537 		topo_df32.atd_sockid = topo_df.atd_sockid;
2538 		topo_df32.atd_dieid = topo_df.atd_dieid;
2539 		topo_df32.atd_rev = topo_df.atd_rev;
2540 		topo_df32.atd_major = topo_df.atd_major;
2541 		topo_df32.atd_minor = topo_df.atd_minor;
2542 		topo_df32.atd_df_buf_nvalid = topo_df.atd_df_buf_nvalid;
2543 		topo_df32.atd_df_act_nents = topo_df.atd_df_act_nents;
2544 
2545 		if (ddi_copyout(&topo_df32, (void *)(uintptr_t)arg,
2546 		    sizeof (topo_df32), mode & FKIOCTL) != 0) {
2547 			return (EFAULT);
2548 		}
2549 		break;
2550 #endif
2551 	case DDI_MODEL_NONE:
2552 		if (ddi_copyout(&topo_df, (void *)(uintptr_t)arg,
2553 		    sizeof (topo_df), mode & FKIOCTL) != 0) {
2554 			return (EFAULT);
2555 		}
2556 		break;
2557 	default:
2558 		break;
2559 	}
2560 
2561 
2562 	return (0);
2563 }
2564 
2565 static int
amdzen_topo_ioctl_ccd(amdzen_t * azn,intptr_t arg,int mode)2566 amdzen_topo_ioctl_ccd(amdzen_t *azn, intptr_t arg, int mode)
2567 {
2568 	amdzen_topo_ccd_t ccd, *ccdp;
2569 	amdzen_df_t *df;
2570 	amdzen_df_ent_t *ent;
2571 	amdzen_ccm_data_t *ccm;
2572 	uint32_t ccdno;
2573 	size_t copyin_size = offsetof(amdzen_topo_ccd_t, atccd_err);
2574 
2575 	/*
2576 	 * Only copy in the identifying information so that way we can ensure
2577 	 * the rest of the structure we return to the user doesn't contain
2578 	 * anything unexpected in it.
2579 	 */
2580 	bzero(&ccd, sizeof (ccd));
2581 	if (ddi_copyin((void *)(uintptr_t)arg, &ccd, copyin_size,
2582 	    mode & FKIOCTL) != 0) {
2583 		return (EFAULT);
2584 	}
2585 
2586 	mutex_enter(&azn->azn_mutex);
2587 	if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) {
2588 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NO_APIC_DECOMP;
2589 		goto copyout;
2590 	}
2591 
2592 	df = amdzen_df_find(azn, ccd.atccd_dfno);
2593 	if (df == NULL) {
2594 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_DFNO;
2595 		goto copyout;
2596 	}
2597 
2598 	/*
2599 	 * We don't have enough information to know how to construct this
2600 	 * information in Zen 1 at this time, so refuse.
2601 	 */
2602 	if (df->adf_rev <= DF_REV_2) {
2603 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_SOC_UNSUPPORTED;
2604 		goto copyout;
2605 	}
2606 
2607 	ent = amdzen_df_ent_find_by_instid(df, ccd.atccd_instid);
2608 	if (ent == NULL) {
2609 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_INSTID;
2610 		goto copyout;
2611 	}
2612 
2613 	if (!amdzen_dfe_is_ccm(df, ent)) {
2614 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD;
2615 		goto copyout;
2616 	}
2617 
2618 	ccm = &ent->adfe_data.aded_ccm;
2619 	for (ccdno = 0; ccdno < DF_MAX_CCDS_PER_CCM; ccdno++) {
2620 		if (ccm->acd_ccd_en[ccdno] != 0 &&
2621 		    ccm->acd_ccd_id[ccdno] == ccd.atccd_phys_no) {
2622 			break;
2623 		}
2624 	}
2625 
2626 	if (ccdno == DF_MAX_CCDS_PER_CCM) {
2627 		ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD;
2628 		goto copyout;
2629 	}
2630 
2631 	if (ccm->acd_ccd_data[ccdno] == NULL) {
2632 		/*
2633 		 * We don't actually have this data. Go fill it out and save it
2634 		 * for future use.
2635 		 */
2636 		ccdp = kmem_zalloc(sizeof (amdzen_topo_ccd_t), KM_NOSLEEP_LAZY);
2637 		if (ccdp == NULL) {
2638 			mutex_exit(&azn->azn_mutex);
2639 			return (ENOMEM);
2640 		}
2641 
2642 		ccdp->atccd_dfno = ccd.atccd_dfno;
2643 		ccdp->atccd_instid = ccd.atccd_instid;
2644 		ccdp->atccd_phys_no = ccd.atccd_phys_no;
2645 		amdzen_ccd_fill_topo(azn, df, ent, ccdp);
2646 		ccm->acd_ccd_data[ccdno] = ccdp;
2647 	}
2648 	ASSERT3P(ccm->acd_ccd_data[ccdno], !=, NULL);
2649 	bcopy(ccm->acd_ccd_data[ccdno], &ccd, sizeof (ccd));
2650 
2651 copyout:
2652 	mutex_exit(&azn->azn_mutex);
2653 	if (ddi_copyout(&ccd, (void *)(uintptr_t)arg, sizeof (ccd),
2654 	    mode & FKIOCTL) != 0) {
2655 		return (EFAULT);
2656 	}
2657 
2658 	return (0);
2659 }
2660 
2661 static int
amdzen_topo_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)2662 amdzen_topo_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
2663     cred_t *credp, int *rvalp)
2664 {
2665 	int ret;
2666 	amdzen_t *azn = amdzen_data;
2667 
2668 	if (getminor(dev) != AMDZEN_MINOR_TOPO) {
2669 		return (ENXIO);
2670 	}
2671 
2672 	if ((mode & FREAD) == 0) {
2673 		return (EBADF);
2674 	}
2675 
2676 	switch (cmd) {
2677 	case AMDZEN_TOPO_IOCTL_BASE:
2678 		ret = amdzen_topo_ioctl_base(azn, arg, mode);
2679 		break;
2680 	case AMDZEN_TOPO_IOCTL_DF:
2681 		ret = amdzen_topo_ioctl_df(azn, arg, mode);
2682 		break;
2683 	case AMDZEN_TOPO_IOCTL_CCD:
2684 		ret = amdzen_topo_ioctl_ccd(azn, arg, mode);
2685 		break;
2686 	default:
2687 		ret = ENOTTY;
2688 		break;
2689 	}
2690 
2691 	return (ret);
2692 }
2693 
2694 static int
amdzen_topo_close(dev_t dev,int flag,int otyp,cred_t * credp)2695 amdzen_topo_close(dev_t dev, int flag, int otyp, cred_t *credp)
2696 {
2697 	if (otyp != OTYP_CHR) {
2698 		return (EINVAL);
2699 	}
2700 
2701 	if (getminor(dev) != AMDZEN_MINOR_TOPO) {
2702 		return (ENXIO);
2703 	}
2704 
2705 	return (0);
2706 }
2707 
2708 static int
amdzen_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)2709 amdzen_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2710 {
2711 	amdzen_t *azn = amdzen_data;
2712 
2713 	if (cmd == DDI_RESUME) {
2714 		return (DDI_SUCCESS);
2715 	} else if (cmd != DDI_ATTACH) {
2716 		return (DDI_FAILURE);
2717 	}
2718 
2719 	mutex_enter(&azn->azn_mutex);
2720 	if (azn->azn_dip != NULL) {
2721 		dev_err(dip, CE_WARN, "driver is already attached!");
2722 		mutex_exit(&azn->azn_mutex);
2723 		return (DDI_FAILURE);
2724 	}
2725 
2726 	if (ddi_create_minor_node(dip, "topo", S_IFCHR, AMDZEN_MINOR_TOPO,
2727 	    DDI_PSEUDO, 0) != 0) {
2728 		dev_err(dip, CE_WARN, "failed to create topo minor node!");
2729 		mutex_exit(&azn->azn_mutex);
2730 		return (DDI_FAILURE);
2731 	}
2732 
2733 	azn->azn_dip = dip;
2734 	azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_stub_scan,
2735 	    azn, TQ_SLEEP);
2736 	azn->azn_flags |= AMDZEN_F_SCAN_DISPATCHED;
2737 	mutex_exit(&azn->azn_mutex);
2738 
2739 	return (DDI_SUCCESS);
2740 }
2741 
2742 static int
amdzen_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)2743 amdzen_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
2744 {
2745 	amdzen_t *azn = amdzen_data;
2746 
2747 	if (cmd == DDI_SUSPEND) {
2748 		return (DDI_SUCCESS);
2749 	} else if (cmd != DDI_DETACH) {
2750 		return (DDI_FAILURE);
2751 	}
2752 
2753 	mutex_enter(&azn->azn_mutex);
2754 	while (azn->azn_taskqid != TASKQID_INVALID) {
2755 		cv_wait(&azn->azn_cv, &azn->azn_mutex);
2756 	}
2757 
2758 	/*
2759 	 * If we've attached any stub drivers, e.g. this platform is important
2760 	 * for us, then we fail detach.
2761 	 */
2762 	if (!list_is_empty(&azn->azn_df_stubs) ||
2763 	    !list_is_empty(&azn->azn_nb_stubs)) {
2764 		mutex_exit(&azn->azn_mutex);
2765 		return (DDI_FAILURE);
2766 	}
2767 
2768 	ddi_remove_minor_node(azn->azn_dip, NULL);
2769 	azn->azn_dip = NULL;
2770 	mutex_exit(&azn->azn_mutex);
2771 
2772 	return (DDI_SUCCESS);
2773 }
2774 
2775 static void
amdzen_free(void)2776 amdzen_free(void)
2777 {
2778 	if (amdzen_data == NULL) {
2779 		return;
2780 	}
2781 
2782 	VERIFY(list_is_empty(&amdzen_data->azn_df_stubs));
2783 	list_destroy(&amdzen_data->azn_df_stubs);
2784 	VERIFY(list_is_empty(&amdzen_data->azn_nb_stubs));
2785 	list_destroy(&amdzen_data->azn_nb_stubs);
2786 	cv_destroy(&amdzen_data->azn_cv);
2787 	mutex_destroy(&amdzen_data->azn_mutex);
2788 	kmem_free(amdzen_data, sizeof (amdzen_t));
2789 	amdzen_data = NULL;
2790 }
2791 
2792 static void
amdzen_alloc(void)2793 amdzen_alloc(void)
2794 {
2795 	amdzen_data = kmem_zalloc(sizeof (amdzen_t), KM_SLEEP);
2796 	mutex_init(&amdzen_data->azn_mutex, NULL, MUTEX_DRIVER, NULL);
2797 	list_create(&amdzen_data->azn_df_stubs, sizeof (amdzen_stub_t),
2798 	    offsetof(amdzen_stub_t, azns_link));
2799 	list_create(&amdzen_data->azn_nb_stubs, sizeof (amdzen_stub_t),
2800 	    offsetof(amdzen_stub_t, azns_link));
2801 	cv_init(&amdzen_data->azn_cv, NULL, CV_DRIVER, NULL);
2802 }
2803 
2804 static struct cb_ops amdzen_topo_cb_ops = {
2805 	.cb_open = amdzen_topo_open,
2806 	.cb_close = amdzen_topo_close,
2807 	.cb_strategy = nodev,
2808 	.cb_print = nodev,
2809 	.cb_dump = nodev,
2810 	.cb_read = nodev,
2811 	.cb_write = nodev,
2812 	.cb_ioctl = amdzen_topo_ioctl,
2813 	.cb_devmap = nodev,
2814 	.cb_mmap = nodev,
2815 	.cb_segmap = nodev,
2816 	.cb_chpoll = nochpoll,
2817 	.cb_prop_op = ddi_prop_op,
2818 	.cb_flag = D_MP,
2819 	.cb_rev = CB_REV,
2820 	.cb_aread = nodev,
2821 	.cb_awrite = nodev
2822 };
2823 
2824 struct bus_ops amdzen_bus_ops = {
2825 	.busops_rev = BUSO_REV,
2826 	.bus_map = nullbusmap,
2827 	.bus_dma_map = ddi_no_dma_map,
2828 	.bus_dma_allochdl = ddi_no_dma_allochdl,
2829 	.bus_dma_freehdl = ddi_no_dma_freehdl,
2830 	.bus_dma_bindhdl = ddi_no_dma_bindhdl,
2831 	.bus_dma_unbindhdl = ddi_no_dma_unbindhdl,
2832 	.bus_dma_flush = ddi_no_dma_flush,
2833 	.bus_dma_win = ddi_no_dma_win,
2834 	.bus_dma_ctl = ddi_no_dma_mctl,
2835 	.bus_prop_op = ddi_bus_prop_op,
2836 	.bus_ctl = amdzen_bus_ctl
2837 };
2838 
2839 static struct dev_ops amdzen_dev_ops = {
2840 	.devo_rev = DEVO_REV,
2841 	.devo_refcnt = 0,
2842 	.devo_getinfo = nodev,
2843 	.devo_identify = nulldev,
2844 	.devo_probe = nulldev,
2845 	.devo_attach = amdzen_attach,
2846 	.devo_detach = amdzen_detach,
2847 	.devo_reset = nodev,
2848 	.devo_quiesce = ddi_quiesce_not_needed,
2849 	.devo_bus_ops = &amdzen_bus_ops,
2850 	.devo_cb_ops = &amdzen_topo_cb_ops
2851 };
2852 
2853 static struct modldrv amdzen_modldrv = {
2854 	.drv_modops = &mod_driverops,
2855 	.drv_linkinfo = "AMD Zen Nexus Driver",
2856 	.drv_dev_ops = &amdzen_dev_ops
2857 };
2858 
2859 static struct modlinkage amdzen_modlinkage = {
2860 	.ml_rev = MODREV_1,
2861 	.ml_linkage = { &amdzen_modldrv, NULL }
2862 };
2863 
2864 int
_init(void)2865 _init(void)
2866 {
2867 	int ret;
2868 
2869 	if (cpuid_getvendor(CPU) != X86_VENDOR_AMD &&
2870 	    cpuid_getvendor(CPU) != X86_VENDOR_HYGON) {
2871 		return (ENOTSUP);
2872 	}
2873 
2874 	if ((ret = mod_install(&amdzen_modlinkage)) == 0) {
2875 		amdzen_alloc();
2876 	}
2877 
2878 	return (ret);
2879 }
2880 
2881 int
_info(struct modinfo * modinfop)2882 _info(struct modinfo *modinfop)
2883 {
2884 	return (mod_info(&amdzen_modlinkage, modinfop));
2885 }
2886 
2887 int
_fini(void)2888 _fini(void)
2889 {
2890 	int ret;
2891 
2892 	if ((ret = mod_remove(&amdzen_modlinkage)) == 0) {
2893 		amdzen_free();
2894 	}
2895 
2896 	return (ret);
2897 }
2898