1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 
16 /*
17  * This module implements a series of enumeration methods that tie into the
18  * amdzen(4D) nexus driver. This module is currently built out of the various
19  * x86 platform directories (though it'd be nice if we could just make this
20  * ISA-specific rather than platform-specific).
21  */
22 
23 #include <sys/fm/protocol.h>
24 #include <fm/topo_mod.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <errno.h>
29 #include <strings.h>
30 #include <unistd.h>
31 #include <sys/devfm.h>
32 #include <sys/x86_archext.h>
33 
34 #include "topo_zen_impl.h"
35 
36 /*
37  * This is the path to the device node that amdzen(4D) creates for us to ask it
38  * questions.
39  */
40 static const char *topo_zen_dev = "/devices/pseudo/amdzen@0:topo";
41 
42 static inline boolean_t
topo_zen_df_at_least(const amdzen_topo_df_t * df,uint8_t major,uint8_t minor)43 topo_zen_df_at_least(const amdzen_topo_df_t *df, uint8_t major, uint8_t minor)
44 {
45 	return (df->atd_major > major || (df->atd_major == major &&
46 	    df->atd_minor >= minor));
47 }
48 
49 /*
50  * Helper to determine whether or not a given DF entity's type is that of a CCM
51  * or not as this has changed across the various DF versions.
52  */
53 static boolean_t
topo_zen_fabric_is_ccm(const amdzen_topo_df_t * df,const amdzen_topo_df_ent_t * ent)54 topo_zen_fabric_is_ccm(const amdzen_topo_df_t *df,
55     const amdzen_topo_df_ent_t *ent)
56 {
57 	if (ent->atde_type != DF_TYPE_CCM) {
58 		return (B_FALSE);
59 	}
60 
61 	if (df->atd_rev >= DF_REV_4 && topo_zen_df_at_least(df, 4, 1)) {
62 		return (ent->atde_subtype == DF_CCM_SUBTYPE_CPU_V4P1);
63 	} else {
64 		return (ent->atde_subtype == DF_CCM_SUBTYPE_CPU_V2);
65 	}
66 }
67 
68 /*
69  * Clean up all data that is associated with an attempt to enumerate the socket.
70  * The structure itself is assumed to be on the stack or handled elsewhere. It
71  * must have been initialized prior to calling this. Don't give us stack
72  * garbage.
73  */
74 static void
topo_zen_enum_cleanup_sock(topo_mod_t * mod,zen_topo_enum_sock_t * sock)75 topo_zen_enum_cleanup_sock(topo_mod_t *mod, zen_topo_enum_sock_t *sock)
76 {
77 	if (sock->ztes_kstat != NULL) {
78 		(void) kstat_close(sock->ztes_kstat);
79 		sock->ztes_kstat = NULL;
80 	}
81 
82 	if (sock->ztes_cpus != NULL) {
83 		for (uint_t i = 0; i < sock->ztes_ncpus; i++) {
84 			nvlist_free(sock->ztes_cpus[i]);
85 		}
86 		umem_free(sock->ztes_cpus, sizeof (nvlist_t *) *
87 		    sock->ztes_ncpus);
88 		sock->ztes_cpus = NULL;
89 	}
90 
91 	if (sock->ztes_fm_agent != NULL) {
92 		fmd_agent_cache_info_free(sock->ztes_fm_agent,
93 		    &sock->ztes_cache);
94 		fmd_agent_close(sock->ztes_fm_agent);
95 		sock->ztes_fm_agent = NULL;
96 	}
97 
98 	if (sock->ztes_tn_ccd != NULL) {
99 		topo_mod_free(mod, sock->ztes_tn_ccd, sock->ztes_nccd *
100 		    sizeof (zen_topo_enum_ccd_t));
101 		sock->ztes_tn_ccd = NULL;
102 	}
103 
104 	if (sock->ztes_ccd != NULL) {
105 		topo_mod_free(mod, sock->ztes_ccd, sock->ztes_nccd *
106 		    sizeof (amdzen_topo_ccd_t));
107 		sock->ztes_ccd = NULL;
108 	}
109 }
110 
111 static int
topo_zen_enum_chip_gather_ccd(topo_mod_t * mod,const zen_topo_t * zen,zen_topo_enum_sock_t * sock,const amdzen_topo_df_ent_t * dfe,uint32_t ccdno,uint32_t phys_ccdno)112 topo_zen_enum_chip_gather_ccd(topo_mod_t *mod, const zen_topo_t *zen,
113     zen_topo_enum_sock_t *sock,
114     const amdzen_topo_df_ent_t *dfe, uint32_t ccdno, uint32_t phys_ccdno)
115 {
116 	amdzen_topo_ccd_t *ccd;
117 
118 	ccd = &sock->ztes_ccd[ccdno];
119 	ccd->atccd_dfno = sock->ztes_df->atd_dfno;
120 	ccd->atccd_instid = dfe->atde_inst_id;
121 	ccd->atccd_phys_no = phys_ccdno;
122 	if (ioctl(zen->zt_fd, AMDZEN_TOPO_IOCTL_CCD, ccd) != 0) {
123 		topo_mod_dprintf(mod, "failed to get CCD information "
124 		    "for DF/CCD 0x%x/0x%x: %s\n", sock->ztes_df->atd_dfno,
125 		    ccd->atccd_instid, strerror(errno));
126 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
127 	}
128 
129 	switch (ccd->atccd_err) {
130 	case AMDZEN_TOPO_CCD_E_OK:
131 		sock->ztes_nccd_valid++;
132 		break;
133 	/*
134 	 * We ignore errors about CCDs being missing. This is fine
135 	 * because on systems without a full CCD complement this will
136 	 * happen and is expected. We make sure we have at least one
137 	 * valid CCD before continuing.
138 	 */
139 	case AMDZEN_TOPO_CCD_E_CCD_MISSING:
140 		break;
141 	default:
142 		topo_mod_dprintf(mod, "DF CCM fabric 0x%x, CCD 0x%x "
143 		    "didn't give us valid info: found error 0x%x\n",
144 		    dfe->atde_fabric_id, phys_ccdno, ccd->atccd_err);
145 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
146 	}
147 
148 	return (0);
149 }
150 
151 
152 /*
153  * Go through all of our disparate sources and gather information that we'll
154  * need to process and perform enumeration. We need to gather the following
155  * disparate pieces of information:
156  *
157  * 1) We need to determine what's going on with all the CCDs and ask the
158  * amdzen(4D) driver for information.
159  *
160  * 2) We need to use the FM agent to ask /dev/fm to get all the CPU information
161  * for this system.
162  *
163  * 3) We use the same system to go get all the actual cache information for this
164  * system.
165  *
166  * 4) We grab some of the chip-wide information such as the socket and brand
167  * string information through kstats, with information about a valid CPU ID.
168  */
169 static int
topo_zen_enum_chip_gather(topo_mod_t * mod,const zen_topo_t * zen,const amdzen_topo_df_t * df,zen_topo_enum_sock_t * sock)170 topo_zen_enum_chip_gather(topo_mod_t *mod, const zen_topo_t *zen,
171     const amdzen_topo_df_t *df, zen_topo_enum_sock_t *sock)
172 {
173 	uint32_t nccd = 0;
174 
175 	sock->ztes_df = df;
176 	for (uint32_t i = 0; i < df->atd_df_buf_nvalid; i++) {
177 		const amdzen_topo_df_ent_t *dfe = &df->atd_df_ents[i];
178 		if (topo_zen_fabric_is_ccm(df, dfe)) {
179 			nccd += dfe->atde_data.atded_ccm.atcd_nccds;
180 		}
181 	}
182 
183 	if (nccd == 0) {
184 		topo_mod_dprintf(mod, "no CCDs found! Not much more we can "
185 		    "do... Something probably went wrong\n");
186 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
187 	}
188 
189 	sock->ztes_nccd = nccd;
190 	sock->ztes_ccd = topo_mod_zalloc(mod, sizeof (amdzen_topo_ccd_t) *
191 	    sock->ztes_nccd);
192 	if (sock->ztes_ccd == NULL) {
193 		topo_mod_dprintf(mod, "failed to allocate memory for "
194 		    "ztes_ccd[]\n");
195 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
196 	}
197 
198 	sock->ztes_tn_ccd = topo_mod_zalloc(mod, sizeof (zen_topo_enum_ccd_t) *
199 	    sock->ztes_nccd);
200 
201 	for (uint32_t i = 0, ccdno = 0; i < df->atd_df_buf_nvalid; i++) {
202 		const amdzen_topo_df_ent_t *dfe = &df->atd_df_ents[i];
203 		const amdzen_topo_ccm_data_t *ccm;
204 
205 		if (!topo_zen_fabric_is_ccm(df, dfe)) {
206 			continue;
207 		}
208 
209 		ccm = &dfe->atde_data.atded_ccm;
210 		for (uint32_t ccm_ccdno = 0; ccm_ccdno < ccm->atcd_nccds;
211 		    ccm_ccdno++) {
212 			if (ccm->atcd_ccd_en[ccm_ccdno] == 0) {
213 				continue;
214 			}
215 
216 			if (topo_zen_enum_chip_gather_ccd(mod, zen, sock, dfe,
217 			    ccdno, ccm->atcd_ccd_ids[ccm_ccdno]) != 0) {
218 				return (-1);
219 			}
220 
221 			ccdno++;
222 		}
223 	}
224 
225 	topo_mod_dprintf(mod, "found %u CCDs\n", sock->ztes_nccd_valid);
226 	if (sock->ztes_nccd_valid == 0) {
227 		topo_mod_dprintf(mod, "somehow we ended up with no CCDs with "
228 		    "valid topo information. Something went very wrong.\n");
229 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
230 	}
231 
232 	sock->ztes_fm_agent = fmd_agent_open(FMD_AGENT_VERSION);
233 	if (sock->ztes_fm_agent == NULL) {
234 		topo_mod_dprintf(mod, "failed to open FMD agent: %s\n",
235 		    strerror(errno));
236 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
237 	}
238 
239 	if (fmd_agent_physcpu_info(sock->ztes_fm_agent, &sock->ztes_cpus,
240 	    &sock->ztes_ncpus) != 0) {
241 		topo_mod_dprintf(mod, "failed to get FM agent CPU "
242 		    "information: %s\n",
243 		    strerror(fmd_agent_errno(sock->ztes_fm_agent)));
244 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
245 	}
246 
247 	topo_mod_dprintf(mod, "got %u CPUs worth of data from the FM agent\n",
248 	    sock->ztes_ncpus);
249 
250 	if (fmd_agent_cache_info(sock->ztes_fm_agent, &sock->ztes_cache) != 0) {
251 		topo_mod_dprintf(mod, "failed to get FM agent cache "
252 		    "information: %s\n",
253 		    strerror(fmd_agent_errno(sock->ztes_fm_agent)));
254 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
255 	}
256 
257 	if (sock->ztes_cache.fmc_ncpus != sock->ztes_ncpus) {
258 		topo_mod_dprintf(mod, "/dev/fm gave us %u CPUs, but %u CPUs "
259 		    "for cache information: cannot continue\n",
260 		    sock->ztes_ncpus, sock->ztes_cache.fmc_ncpus);
261 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
262 	}
263 
264 	sock->ztes_kstat = kstat_open();
265 	if (sock->ztes_kstat == NULL) {
266 		topo_mod_dprintf(mod, "failed to open kstat driver: %s\n",
267 		    strerror(errno));
268 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
269 	}
270 
271 	return (0);
272 }
273 
274 typedef enum {
275 	ZEN_TOPO_CACHE_UNKNOWN,
276 	ZEN_TOPO_CACHE_CORE_L1D,
277 	ZEN_TOPO_CACHE_CORE_L1I,
278 	ZEN_TOPO_CACHE_CORE_L2,
279 	ZEN_TOPO_CACHE_CCX_L3
280 } zen_topo_cache_type_t;
281 
282 typedef struct {
283 	uint32_t		ztcm_level;
284 	fm_cache_info_type_t	ztcm_type;
285 	boolean_t		ztcm_core;
286 	zen_topo_cache_type_t	ztcm_cache;
287 } zen_topo_cache_map_t;
288 
289 const zen_topo_cache_map_t zen_topo_cache_map[] = {
290 	{ 1, FM_CACHE_INFO_T_DATA, B_TRUE, ZEN_TOPO_CACHE_CORE_L1D },
291 	{ 1, FM_CACHE_INFO_T_INSTR, B_TRUE, ZEN_TOPO_CACHE_CORE_L1I },
292 	{ 2, FM_CACHE_INFO_T_DATA | FM_CACHE_INFO_T_INSTR |
293 	    FM_CACHE_INFO_T_UNIFIED, B_TRUE, ZEN_TOPO_CACHE_CORE_L2 },
294 	{ 3, FM_CACHE_INFO_T_DATA | FM_CACHE_INFO_T_INSTR |
295 	    FM_CACHE_INFO_T_UNIFIED, B_FALSE, ZEN_TOPO_CACHE_CCX_L3 }
296 };
297 
298 static zen_topo_cache_type_t
zen_topo_determine_cache(topo_mod_t * mod,uint32_t level,uint32_t type,uint32_t shift)299 zen_topo_determine_cache(topo_mod_t *mod, uint32_t level, uint32_t type,
300     uint32_t shift)
301 {
302 	zen_topo_t *zen = topo_mod_getspecific(mod);
303 
304 	for (size_t i = 0; i < ARRAY_SIZE(zen_topo_cache_map); i++) {
305 		const zen_topo_cache_map_t *map = &zen_topo_cache_map[i];
306 		uint32_t apic;
307 
308 		if (map->ztcm_level != level || map->ztcm_type != type) {
309 			continue;
310 		}
311 
312 		if (map->ztcm_core) {
313 			apic = zen->zt_base.atb_apic_decomp.aad_core_shift;
314 		} else {
315 			apic = zen->zt_base.atb_apic_decomp.aad_ccx_shift;
316 		}
317 
318 		if (shift == apic) {
319 			return (map->ztcm_cache);
320 		}
321 	}
322 
323 	return (ZEN_TOPO_CACHE_UNKNOWN);
324 }
325 
326 /*
327  * We have mapped a logical CPU to a position in the hierarchy. We must now walk
328  * its caches and attempt to install them up the chain. We assume that there
329  * there are four caches right now: an L1i, L1d, L2, and L3 cache. We will
330  * verify that these are shared at the points in the hierarchy that we expect.
331  * Note, AMD has mixed designs with 1 CCX and 2 CCXs. When there is only 1 CCX
332  * then we often describe the CCX and CCD as equivalent though if you look at
333  * the PPR it describes each CCD as having a single CCX. This is why the L3
334  * cache lives on the CCX right now.
335  */
336 static boolean_t
topo_zen_map_caches(topo_mod_t * mod,zen_topo_enum_sock_t * sock,zen_topo_enum_ccx_t * ccx,zen_topo_enum_core_t * core,uint32_t cpuno)337 topo_zen_map_caches(topo_mod_t *mod, zen_topo_enum_sock_t *sock,
338     zen_topo_enum_ccx_t *ccx, zen_topo_enum_core_t *core, uint32_t cpuno)
339 {
340 	fmd_agent_cpu_cache_t *cpu_cache = &sock->ztes_cache.fmc_cpus[cpuno];
341 	if (cpu_cache->fmcc_ncaches == 0) {
342 		return (B_TRUE);
343 	}
344 
345 	/*
346 	 * For each cache that we discover we need to do the following:
347 	 *
348 	 *  o Determine the type of cache that this is. While the upper layers
349 	 *    guarantee us the L1 caches come before L2 and L2 before L3, we
350 	 *    don't care.
351 	 *  o Use the APIC shift and our APIC decomp to confirm the level of the
352 	 *    hierarchy this should operate at.
353 	 *  o If a cache is already there, it should have the same ID as the one
354 	 *    that we already have.
355 	 */
356 	for (uint_t i = 0; i < cpu_cache->fmcc_ncaches; i++) {
357 		nvlist_t *nvl = cpu_cache->fmcc_caches[i];
358 		nvlist_t **cachep = NULL;
359 		zen_topo_cache_type_t ct;
360 		uint32_t level, type, shift;
361 		uint64_t id, alt_id;
362 
363 		if (nvlist_lookup_pairs(nvl, 0,
364 		    FM_CACHE_INFO_LEVEL, DATA_TYPE_UINT32, &level,
365 		    FM_CACHE_INFO_TYPE, DATA_TYPE_UINT32, &type,
366 		    FM_CACHE_INFO_ID, DATA_TYPE_UINT64, &id,
367 		    FM_CACHE_INFO_X86_APIC_SHIFT, DATA_TYPE_UINT32, &shift,
368 		    NULL) != 0) {
369 			topo_mod_dprintf(mod, "missing required nvlist fields "
370 			    "from FM CPU %u cache %u\n", cpuno, i);
371 			return (B_FALSE);
372 		}
373 
374 		ct = zen_topo_determine_cache(mod, level, type, shift);
375 		switch (ct) {
376 		case ZEN_TOPO_CACHE_UNKNOWN:
377 			topo_mod_dprintf(mod, "failed to map CPU %u cache %u "
378 			    "with id 0x%" PRIx64 " level %u, type 0x%x, APIC "
379 			    "shift 0x%x to a known type\n", cpuno, i, id, level,
380 			    type, shift);
381 			return (B_FALSE);
382 		case ZEN_TOPO_CACHE_CORE_L1D:
383 			cachep = &core->ztcore_l1d;
384 			break;
385 		case ZEN_TOPO_CACHE_CORE_L1I:
386 			cachep = &core->ztcore_l1i;
387 			break;
388 		case ZEN_TOPO_CACHE_CORE_L2:
389 			cachep = &core->ztcore_l2;
390 			break;
391 		case ZEN_TOPO_CACHE_CCX_L3:
392 			cachep = &ccx->ztccx_l3;
393 			break;
394 		}
395 
396 		if (*cachep == NULL) {
397 			*cachep = nvl;
398 			continue;
399 		}
400 
401 		alt_id = fnvlist_lookup_uint64(*cachep, FM_CACHE_INFO_ID);
402 		if (alt_id != id) {
403 			topo_mod_dprintf(mod, "wanted to map CPU %u cache %u "
404 			    "with id 0x%" PRIx64 " level %u, type 0x%x, APIC "
405 			    "shift 0x%x to Zen cache type 0x%x, but cache with "
406 			    "id 0x%" PRIx64 " already present", cpuno, i,
407 			    id, level, type, shift, ct, alt_id);
408 			return (B_FALSE);
409 		}
410 	}
411 
412 	return (B_TRUE);
413 }
414 
415 static boolean_t
topo_zen_map_logcpu_to_phys(topo_mod_t * mod,zen_topo_enum_sock_t * sock,nvlist_t * cpu_nvl,uint32_t cpuno,uint32_t apicid)416 topo_zen_map_logcpu_to_phys(topo_mod_t *mod, zen_topo_enum_sock_t *sock,
417     nvlist_t *cpu_nvl, uint32_t cpuno, uint32_t apicid)
418 {
419 	for (uint32_t ccdno = 0; ccdno < sock->ztes_nccd; ccdno++) {
420 		amdzen_topo_ccd_t *ccd = &sock->ztes_ccd[ccdno];
421 		if (ccd->atccd_err != AMDZEN_TOPO_CCD_E_OK)
422 			continue;
423 
424 		for (uint32_t ccxno = 0; ccxno < ccd->atccd_nphys_ccx;
425 		    ccxno++) {
426 			amdzen_topo_ccx_t *ccx;
427 			if (ccd->atccd_ccx_en[ccxno] == 0)
428 				continue;
429 
430 			ccx = &ccd->atccd_ccx[ccxno];
431 			for (uint32_t coreno = 0;
432 			    coreno < ccx->atccx_nphys_cores; coreno++) {
433 				amdzen_topo_core_t *core;
434 				if (ccx->atccx_core_en[coreno] == 0)
435 					continue;
436 
437 				core = &ccx->atccx_cores[coreno];
438 				for (uint32_t thrno = 0;
439 				    thrno < core->atcore_nthreads; thrno++) {
440 					zen_topo_enum_ccd_t *zt_ccd;
441 					zen_topo_enum_ccx_t *zt_ccx;
442 					zen_topo_enum_core_t *zt_core;
443 
444 					if (core->atcore_thr_en[thrno] == 0)
445 						continue;
446 
447 					if (core->atcore_apicids[thrno] !=
448 					    apicid) {
449 						continue;
450 					}
451 
452 					/*
453 					 * We have a match. Make sure we haven't
454 					 * already used it.
455 					 */
456 					zt_ccd = &sock->ztes_tn_ccd[ccdno];
457 					zt_ccx = &zt_ccd->ztccd_ccx[ccxno];
458 					zt_core = &zt_ccx->ztccx_core[coreno];
459 
460 					if (zt_core->ztcore_nvls[thrno] !=
461 					    NULL) {
462 						topo_mod_dprintf(mod, "APIC ID "
463 						    "0x%x mapped to CCD/CCX/"
464 						    "Core/Thread 0x%x/0x%x/"
465 						    "0x%x/0x%x, but found "
466 						    "another nvlist already "
467 						    "there\n", apicid, ccdno,
468 						    ccxno, coreno, thrno);
469 						return (B_FALSE);
470 					}
471 
472 					zt_core->ztcore_nvls[thrno] = cpu_nvl;
473 
474 					/*
475 					 * Now that we have successfully mapped
476 					 * a core into the tree go install the
477 					 * logical CPU's cache information up
478 					 * the tree.
479 					 */
480 					return (topo_zen_map_caches(mod, sock,
481 					    zt_ccx, zt_core, cpuno));
482 				}
483 			}
484 		}
485 	}
486 
487 	topo_mod_dprintf(mod, "failed to find a CPU for apic 0x%x\n",
488 	    apicid);
489 	return (B_FALSE);
490 }
491 
492 /*
493  * Using information from the given logical CPU that we know is part of our
494  * socket that we're enumerating, attempt to go through and load information
495  * about the chip itself such as the family, model, stepping, brand string, etc.
496  * This comes from both the /dev/fm information that we have in cpu_nvl and from
497  * kstats.
498  */
499 static int
topo_zen_map_common_chip_info(topo_mod_t * mod,zen_topo_enum_sock_t * sock,nvlist_t * cpu_nvl)500 topo_zen_map_common_chip_info(topo_mod_t *mod, zen_topo_enum_sock_t *sock,
501     nvlist_t *cpu_nvl)
502 {
503 	char name[KSTAT_STRLEN];
504 	int32_t cpu_id;
505 	uint32_t sockid;
506 	char *rev, *ident;
507 	kstat_t *ks;
508 	const kstat_named_t *knp;
509 
510 	if (nvlist_lookup_pairs(cpu_nvl, 0,
511 	    FM_PHYSCPU_INFO_CPU_ID, DATA_TYPE_INT32, &cpu_id,
512 	    FM_PHYSCPU_INFO_CHIP_REV, DATA_TYPE_STRING, &rev,
513 	    FM_PHYSCPU_INFO_SOCKET_TYPE, DATA_TYPE_UINT32, &sockid,
514 	    FM_PHYSCPU_INFO_FAMILY, DATA_TYPE_INT32, &sock->ztes_cpu_fam,
515 	    FM_PHYSCPU_INFO_MODEL, DATA_TYPE_INT32, &sock->ztes_cpu_model,
516 	    FM_PHYSCPU_INFO_STEPPING, DATA_TYPE_INT32, &sock->ztes_cpu_step,
517 	    NULL) != 0) {
518 		topo_mod_dprintf(mod, "missing required nvlist fields "
519 		    "from FM physcpu info chip ident\n");
520 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
521 	}
522 
523 	/*
524 	 * Some CPUs have PPIN disabled so we look for it separately here. The
525 	 * rest of the aspects are required.
526 	 */
527 	if (nvlist_lookup_string(cpu_nvl, FM_PHYSCPU_INFO_CHIP_IDENTSTR,
528 	    &ident) != 0) {
529 		ident = NULL;
530 	}
531 
532 	/*
533 	 * If we can not fully identify a revision, the kernel will indicate so
534 	 * with a '?' in the name where normally a stepping would show up. See
535 	 * amd_revmap[] in uts/intel/os/cpuid_subr.c. In such a case, we do not
536 	 * want to propagate such a revision.
537 	 */
538 	if (strchr(rev, '?') == NULL) {
539 		sock->ztes_cpu_rev = rev;
540 	}
541 	sock->ztes_cpu_serial = ident;
542 
543 	if (snprintf(name, sizeof (name), "cpu_info%d", cpu_id) >=
544 	    sizeof (name)) {
545 		topo_mod_dprintf(mod, "failed to construct kstat name: "
546 		    "overflow");
547 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
548 	}
549 
550 	ks = kstat_lookup(sock->ztes_kstat, "cpu_info", cpu_id, name);
551 	if (ks == NULL) {
552 		topo_mod_dprintf(mod, "failed to find 'cpu_info:%d:%s': %s",
553 		    cpu_id, name, strerror(errno));
554 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
555 	}
556 
557 	if (kstat_read(sock->ztes_kstat, ks, NULL) == -1) {
558 		topo_mod_dprintf(mod, "failed to read kstat 'cpu_info:%d:%s': "
559 		    "%s", cpu_id, name, strerror(errno));
560 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
561 	}
562 
563 	knp = kstat_data_lookup(ks, "brand");
564 	if (knp == NULL) {
565 		topo_mod_dprintf(mod, "failed to find 'cpu_info:%d:%s:brand\n",
566 		    cpu_id, name);
567 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
568 
569 	}
570 	sock->ztes_cpu_brand = KSTAT_NAMED_STR_PTR(knp);
571 
572 	if (sockid == X86_SOCKET_UNKNOWN) {
573 		return (0);
574 	}
575 
576 	knp = kstat_data_lookup(ks, "socket_type");
577 	if (knp == NULL) {
578 		topo_mod_dprintf(mod, "failed to find 'cpu_info:%d:%s:"
579 		    "socket_type\n", cpu_id, name);
580 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
581 	}
582 	sock->ztes_cpu_sock = KSTAT_NAMED_STR_PTR(knp);
583 
584 	return (0);
585 }
586 
587 static int
topo_zen_enum_chip_map(topo_mod_t * mod,zen_topo_enum_sock_t * sock)588 topo_zen_enum_chip_map(topo_mod_t *mod, zen_topo_enum_sock_t *sock)
589 {
590 	/*
591 	 * We have an arrray of information from /dev/fm that describes each
592 	 * logical CPU. We would like to map that to a given place in physical
593 	 * topology, which we do via the APIC ID. We will then also determine
594 	 * how caches are mapped together.
595 	 */
596 	for (uint_t i = 0; i < sock->ztes_ncpus; i++) {
597 		int32_t apicid, sockid;
598 		nvlist_t *cpu_nvl = sock->ztes_cpus[i];
599 
600 		if (nvlist_lookup_pairs(cpu_nvl, 0,
601 		    FM_PHYSCPU_INFO_CHIP_ID, DATA_TYPE_INT32, &sockid,
602 		    FM_PHYSCPU_INFO_STRAND_APICID, DATA_TYPE_INT32, &apicid,
603 		    NULL) != 0) {
604 			topo_mod_dprintf(mod, "missing required nvlist fields "
605 			    "from FM physcpu info for CPU %u\n", i);
606 			return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
607 		}
608 
609 		/*
610 		 * This logical CPU isn't for our socket, ignore it.
611 		 */
612 		if (sockid != sock->ztes_sockid) {
613 			continue;
614 		}
615 
616 		if (!topo_zen_map_logcpu_to_phys(mod, sock, cpu_nvl, i,
617 		    (uint32_t)apicid)) {
618 			return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
619 		}
620 	}
621 
622 	/*
623 	 * Now that we have each logical CPU taken care of, we want to fill in
624 	 * information about the common CPU.
625 	 */
626 	for (uint_t i = 0; i < sock->ztes_ncpus; i++) {
627 		int32_t sockid;
628 		nvlist_t *cpu_nvl = sock->ztes_cpus[i];
629 
630 		if (nvlist_lookup_pairs(cpu_nvl, 0,
631 		    FM_PHYSCPU_INFO_CHIP_ID, DATA_TYPE_INT32, &sockid,
632 		    NULL) != 0) {
633 			topo_mod_dprintf(mod, "missing required nvlist fields "
634 			    "from FM physcpu info for CPU %u\n", i);
635 			return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
636 		}
637 
638 		/*
639 		 * This logical CPU isn't for our socket, ignore it.
640 		 */
641 		if (sockid != sock->ztes_sockid) {
642 			continue;
643 		}
644 
645 		return (topo_zen_map_common_chip_info(mod, sock, cpu_nvl));
646 	}
647 
648 	topo_mod_dprintf(mod, "no logical CPUs match our target socket %u!\n",
649 	    sock->ztes_sockid);
650 	return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
651 }
652 
653 static int
topo_zen_enum(topo_mod_t * mod,tnode_t * pnode,const char * name,topo_instance_t min,topo_instance_t max,void * modarg,void * data)654 topo_zen_enum(topo_mod_t *mod, tnode_t *pnode, const char *name,
655     topo_instance_t min, topo_instance_t max, void *modarg, void *data)
656 {
657 	int ret;
658 	zen_topo_t *zen = topo_mod_getspecific(mod);
659 	amdzen_topo_df_t *df = NULL;
660 	topo_zen_chip_t *chip;
661 	zen_topo_enum_sock_t sock;
662 
663 	topo_mod_dprintf(mod, "asked to enum %s [%" PRIu64 ", %" PRIu64 "] on "
664 	    "%s%" PRIu64 "\n", name, min, max, topo_node_name(pnode),
665 	    topo_node_instance(pnode));
666 
667 	/*
668 	 * Currently we only support enumerating a given chip.
669 	 */
670 	if (strcmp(name, CHIP) != 0) {
671 		topo_mod_dprintf(mod, "cannot enumerate %s: unknown type\n",
672 		    name);
673 		return (-1);
674 	}
675 
676 	if (data == NULL) {
677 		topo_mod_dprintf(mod, "cannot enumerate %s: missing required "
678 		    "data\n", name);
679 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
680 	}
681 
682 	if (min != max) {
683 		topo_mod_dprintf(mod, "cannot enumerate %s: multiple instances "
684 		    "requested\n", name);
685 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
686 	}
687 
688 	chip = data;
689 	for (uint32_t i = 0; i < zen->zt_base.atb_ndf; i++) {
690 		if (zen->zt_dfs[i].atd_sockid == chip->tzc_sockid) {
691 			df = &zen->zt_dfs[i];
692 			break;
693 		}
694 	}
695 
696 	if (df == NULL) {
697 		topo_mod_dprintf(mod, "no matching DF with socket %u",
698 		    chip->tzc_sockid);
699 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
700 	}
701 
702 	/*
703 	 * In our supported platforms there is either a single DF instance per
704 	 * die (DFv3+ aka Zen 2+) or we have the older style Zen 1 (aka DFv2)
705 	 * systems where there are multiple dies within the package. We don't
706 	 * support Zen 1/DFv2 based systems right now.
707 	 */
708 	if (zen->zt_base.atb_rev == DF_REV_UNKNOWN) {
709 		topo_mod_dprintf(mod, "DF base revision is unknown, cannot "
710 		    "proceed\n");
711 		return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
712 	}
713 
714 	if (zen->zt_base.atb_rev == DF_REV_2) {
715 		topo_mod_dprintf(mod, "DFv2 multiple dies are not currently "
716 		    "supported\n");
717 		return (topo_mod_seterrno(mod, EMOD_METHOD_NOTSUP));
718 	}
719 
720 	/*
721 	 * We want to create our "chip" node at the top of this. To do that,
722 	 * we'd like to know things like the CPU's PPIN and other information
723 	 * like the socket type and related. To do this we will start by getting
724 	 * information about the physical CPU information from devfm. That will
725 	 * be combined with our knowledge of how APIC IDs map to data fabric
726 	 * elements.
727 	 */
728 	bzero(&sock, sizeof (sock));
729 	sock.ztes_sockid = chip->tzc_sockid;
730 	if ((ret = topo_zen_enum_chip_gather(mod, zen, df, &sock)) != 0) {
731 		topo_zen_enum_cleanup_sock(mod, &sock);
732 		return (ret);
733 	}
734 
735 	/*
736 	 * Determine the mapping of all the logical CPU entries and their data
737 	 * that we found to the CCD mapping.
738 	 */
739 	if ((ret = topo_zen_enum_chip_map(mod, &sock)) != 0) {
740 		return (ret);
741 	}
742 
743 	ret = topo_zen_build_chip(mod, pnode, min, &sock);
744 	topo_zen_enum_cleanup_sock(mod, &sock);
745 
746 	return (ret);
747 }
748 
749 static const topo_modops_t topo_zen_ops = {
750 	topo_zen_enum, NULL
751 };
752 
753 static topo_modinfo_t topo_zen_mod = {
754 	"AMD Zen Enumerator", FM_FMRI_SCHEME_HC, TOPO_MOD_ZEN_VERS,
755 	    &topo_zen_ops
756 };
757 
758 static void
topo_zen_cleanup(topo_mod_t * mod,zen_topo_t * zen)759 topo_zen_cleanup(topo_mod_t *mod, zen_topo_t *zen)
760 {
761 	if (zen->zt_dfs != NULL) {
762 		for (uint32_t i = 0; i < zen->zt_base.atb_ndf; i++) {
763 			size_t entsize;
764 
765 			if (zen->zt_dfs[i].atd_df_ents == NULL)
766 				continue;
767 			entsize = sizeof (amdzen_topo_df_ent_t) *
768 			    zen->zt_base.atb_maxdfent;
769 			topo_mod_free(mod, zen->zt_dfs[i].atd_df_ents,
770 			    entsize);
771 		}
772 		topo_mod_free(mod, zen->zt_dfs, sizeof (amdzen_topo_df_t) *
773 		    zen->zt_base.atb_ndf);
774 	}
775 
776 	if (zen->zt_fd >= 0) {
777 		(void) close(zen->zt_fd);
778 		zen->zt_fd = -1;
779 	}
780 	topo_mod_free(mod, zen, sizeof (zen_topo_t));
781 }
782 
783 static int
topo_zen_init(topo_mod_t * mod,zen_topo_t * zen)784 topo_zen_init(topo_mod_t *mod, zen_topo_t *zen)
785 {
786 	zen->zt_fd = open(topo_zen_dev, O_RDONLY);
787 	if (zen->zt_fd < 0) {
788 		topo_mod_dprintf(mod, "failed to open %s: %s\n", topo_zen_dev,
789 		    strerror(errno));
790 		return (-1);
791 	}
792 
793 	if (ioctl(zen->zt_fd, AMDZEN_TOPO_IOCTL_BASE, &zen->zt_base) != 0) {
794 		topo_mod_dprintf(mod, "failed to get base Zen topology "
795 		    "information: %s\n", strerror(errno));
796 		return (-1);
797 	}
798 
799 	/*
800 	 * Get all of the basic DF information now.
801 	 */
802 	zen->zt_dfs = topo_mod_zalloc(mod, sizeof (amdzen_topo_df_t) *
803 	    zen->zt_base.atb_ndf);
804 	if (zen->zt_dfs == NULL) {
805 		topo_mod_dprintf(mod, "failed to allocate space for %u DF "
806 		    "entries: %s\n", zen->zt_base.atb_ndf,
807 		    topo_strerror(EMOD_NOMEM));
808 		return (-1);
809 	}
810 
811 	for (uint32_t i = 0; i < zen->zt_base.atb_ndf; i++) {
812 		amdzen_topo_df_t *topo_df = &zen->zt_dfs[i];
813 
814 		topo_df->atd_df_ents = topo_mod_zalloc(mod,
815 		    sizeof (amdzen_topo_df_ent_t) * zen->zt_base.atb_maxdfent);
816 		if (topo_df->atd_df_ents == NULL) {
817 			topo_mod_dprintf(mod, "failed to allocate space for "
818 			    "DF %u's DF ents: %s\n", i,
819 			    topo_strerror(EMOD_NOMEM));
820 			return (-1);
821 		}
822 		topo_df->atd_df_buf_nents = zen->zt_base.atb_maxdfent;
823 		topo_df->atd_dfno = i;
824 
825 		if (ioctl(zen->zt_fd, AMDZEN_TOPO_IOCTL_DF, topo_df) != 0) {
826 			topo_mod_dprintf(mod, "failed to get information for "
827 			    "DF %u: %s", i, strerror(errno));
828 			return (-1);
829 		}
830 	}
831 
832 	return (0);
833 }
834 
835 int
_topo_init(topo_mod_t * mod,topo_version_t version)836 _topo_init(topo_mod_t *mod, topo_version_t version)
837 {
838 	zen_topo_t *zen = NULL;
839 
840 	if (getenv("TOPOZENDEBUG") != NULL) {
841 		topo_mod_setdebug(mod);
842 	}
843 	topo_mod_dprintf(mod, "module initializing\n");
844 
845 	zen = topo_mod_zalloc(mod, sizeof (zen_topo_t));
846 	if (zen == NULL) {
847 		topo_mod_dprintf(mod, "failed to allocate zen_topo_t: %s\n",
848 		    topo_strerror(EMOD_NOMEM));
849 		return (-1);
850 	}
851 
852 	if (topo_zen_init(mod, zen) != 0) {
853 		topo_zen_cleanup(mod, zen);
854 		return (-1);
855 	}
856 
857 	if (topo_mod_register(mod, &topo_zen_mod, TOPO_VERSION) != 0) {
858 		topo_zen_cleanup(mod, zen);
859 		return (-1);
860 	}
861 
862 	topo_mod_setspecific(mod, zen);
863 	return (0);
864 }
865 
866 void
_topo_fini(topo_mod_t * mod)867 _topo_fini(topo_mod_t *mod)
868 {
869 	zen_topo_t *zen;
870 
871 	if ((zen = topo_mod_getspecific(mod)) == NULL) {
872 		return;
873 	}
874 
875 	topo_mod_setspecific(mod, NULL);
876 	topo_zen_cleanup(mod, zen);
877 	topo_mod_unregister(mod);
878 }
879