1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019, Joyent, Inc. 14 * Copyright 2024 Oxide Computer Company 15 */ 16 17 /* 18 * Nexus Driver for AMD Zen family systems. The purpose of this driver is to 19 * provide access to the following resources in a single, centralized fashion: 20 * 21 * - The per-chip Data Fabric 22 * - The North Bridge 23 * - The System Management Network (SMN) 24 * 25 * This is a nexus driver as once we have attached to all the requisite 26 * components, we will enumerate child devices which consume this functionality. 27 * 28 * ------------------------ 29 * Mapping Devices Together 30 * ------------------------ 31 * 32 * The operating system needs to expose things like temperature sensors and DRAM 33 * configuration registers in terms of things that are meaningful to the system 34 * such as logical CPUs, cores, etc. This driver attaches to the PCI devices 35 * that represent the northbridge, data fabrics, and dies. Note that there are 36 * multiple northbridge and DF devices (one each per die) and this driver maps 37 * all of these three things together. Unfortunately, this requires some 38 * acrobatics as there is no direct way to map a northbridge to its 39 * corresponding die. Instead, we map a CPU die to a data fabric PCI device and 40 * a data fabric PCI device to a corresponding northbridge PCI device. This 41 * transitive relationship allows us to map from between northbridge and die. 42 * 43 * As each data fabric device is attached, based on vendor and device portions 44 * of the PCI ID, we add it to the DF stubs list in the global amdzen_t 45 * structure, amdzen_data->azn_df_stubs. We must now map these to logical CPUs. 46 * 47 * In current Zen based products, there is a direct mapping between processor 48 * nodes and a data fabric PCI device: all of the devices are on PCI Bus 0 and 49 * start from Device 0x18, so device 0x18 maps to processor node 0, 0x19 to 50 * processor node 1, etc. This means that to map a logical CPU to a data fabric 51 * device, we take its processor node id, add it to 0x18 and find the PCI device 52 * that is on bus 0 with that ID number. We already discovered the DF devices as 53 * described above. 54 * 55 * The northbridge PCI device has a well-defined device and function, but the 56 * bus that it is on varies. Each die has its own set of assigned PCI buses and 57 * its northbridge device is on the first die-specific bus. This implies that 58 * the northbridges do not show up on PCI bus 0, as that is the PCI bus that all 59 * of the data fabric devices are on and is not assigned to any particular die. 60 * Additionally, while the northbridge on the lowest-numbered PCI bus 61 * intuitively corresponds to processor node zero, hardware does not guarantee 62 * this. Because we don't want to be at the mercy of firmware, we don't rely on 63 * this ordering assumption, though we have yet to find a system that deviates 64 * from it, either. 65 * 66 * One of the registers in the data fabric device's function 0 67 * (AMDZEN_DF_F0_CFG_ADDR_CTL) happens to identify the first PCI bus that is 68 * associated with the processor node. This means that we can map a data fabric 69 * device to a northbridge by finding the northbridge whose PCI bus ID matches 70 * the value in the corresponding data fabric's AMDZEN_DF_F0_CFG_ADDR_CTL. 71 * 72 * Given all of the above, we can map a northbridge to a data fabric device and 73 * a die to a data fabric device. Because these are 1:1 mappings, there is a 74 * transitive relationship from northbridge to die. and therefore we know which 75 * northbridge is associated with which processor die. This is summarized in the 76 * following image: 77 * 78 * +-------+ +------------------------------------+ +--------------+ 79 * | Die 0 |---->| Data Fabric PCI BDF 0/18/0 |---->| Northbridge | 80 * +-------+ | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 10 | | PCI 10/0/0 | 81 * ... +------------------------------------+ +--------------+ 82 * +-------+ +------------------------------------+ +--------------+ 83 * | Die n |---->| Data Fabric PCI BDF 0/18+n/0 |---->| Northbridge | 84 * +-------+ | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 133 | | PCI 133/0/0 | 85 * +------------------------------------+ +--------------+ 86 * 87 * Note, the PCI buses used by the northbridges here are arbitrary examples that 88 * do not necessarily reflect actual hardware values; however, the 89 * bus/device/function (BDF) of the data fabric accurately models hardware. All 90 * BDF values are in hex. 91 * 92 * Starting with the Rome generation of processors (Family 17h Model 30-3Fh), 93 * AMD has multiple northbridges on a given die. All of these northbridges share 94 * the same data fabric and system management network port. From our perspective 95 * this means that some of the northbridge devices will be redundant and that we 96 * no longer have a 1:1 mapping between the northbridge and the data fabric 97 * devices. Every data fabric will have a northbridge, but not every northbridge 98 * will have a data fabric device mapped. Because we're always trying to map 99 * from a die to a northbridge and not the reverse, the fact that there are 100 * extra northbridge devices hanging around that we don't know about shouldn't 101 * be a problem. 102 * 103 * ------------------------------- 104 * Attach and Detach Complications 105 * ------------------------------- 106 * 107 * We need to map different PCI devices together. Each device is attached to a 108 * amdzen_stub driver to facilitate integration with the rest of the kernel PCI 109 * machinery and so we have to manage multiple dev_info_t structures, each of 110 * which may be independently attached and detached. 111 * 112 * This is not particularly complex for attach: our _init routine allocates the 113 * necessary mutex and list structures at module load time, and as each stub is 114 * attached, it calls into this code to be added to the appropriate list. When 115 * the nexus itself is attached, we walk the PCI device tree accumulating a 116 * counter for all devices we expect to be attached. Once the scan is complete 117 * and all such devices are accounted for (stub registration may be happening 118 * asynchronously with respect to nexus attach), we initialize the nexus device 119 * and the attach is complete. 120 * 121 * Most other device drivers support instances that can be brought back after 122 * detach, provided they are associated with an active minor node in the 123 * /devices file system. This driver is different. Once a stub device has been 124 * attached, we do not permit detaching the nexus driver instance, as the kernel 125 * does not give us interlocking guarantees between nexus and stub driver attach 126 * and detach. It is simplest to just unconditionally fail detach once a stub 127 * has attached. 128 * 129 * --------------- 130 * Exposed Devices 131 * --------------- 132 * 133 * Rather than try and have all of the different functions that could be 134 * provided in one driver, we have a nexus driver that tries to load child 135 * pseudo-device drivers that provide specific pieces of functionality. 136 * 137 * ------- 138 * Locking 139 * ------- 140 * 141 * The amdzen_data structure contains a single lock, azn_mutex. 142 * 143 * The various client functions here are intended for our nexus's direct 144 * children, but have been designed in case someone else should depends on this 145 * driver. Once a DF has been discovered, the set of entities inside of it 146 * (adf_nents, adf_ents[]) is considered static, constant data, and iteration 147 * over them does not require locking. However, the discovery of the amd_df_t 148 * does. In addition, locking is required whenever performing register accesses 149 * to the DF or SMN. 150 * 151 * To summarize, one must hold the lock in the following circumstances: 152 * 153 * - Looking up DF structures 154 * - Reading or writing to DF registers 155 * - Reading or writing to SMN registers 156 * 157 * In general, it is preferred that the lock be held across an entire client 158 * operation if possible. The only time this becomes an issue are when we have 159 * callbacks into our callers (ala amdzen_c_df_iter()) as they may recursively 160 * call into us. 161 */ 162 163 #include <sys/modctl.h> 164 #include <sys/conf.h> 165 #include <sys/devops.h> 166 #include <sys/ddi.h> 167 #include <sys/sunddi.h> 168 #include <sys/pci.h> 169 #include <sys/sysmacros.h> 170 #include <sys/sunndi.h> 171 #include <sys/x86_archext.h> 172 #include <sys/cpuvar.h> 173 #include <sys/policy.h> 174 #include <sys/stat.h> 175 #include <sys/sunddi.h> 176 #include <sys/bitmap.h> 177 #include <sys/stdbool.h> 178 179 #include <sys/amdzen/df.h> 180 #include <sys/amdzen/ccd.h> 181 #include "amdzen.h" 182 #include "amdzen_client.h" 183 #include "amdzen_topo.h" 184 185 amdzen_t *amdzen_data; 186 187 /* 188 * Internal minor nodes for devices that the nexus provides itself. 189 */ 190 #define AMDZEN_MINOR_TOPO 0 191 192 /* 193 * Array of northbridge IDs that we care about. 194 */ 195 static const uint16_t amdzen_nb_ids[] = { 196 /* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */ 197 0x1450, 198 /* Family 17h Raven Ridge, Kestrel, Dali Models 10h-2fh (Zen uarch) */ 199 0x15d0, 200 /* Family 17h/19h Rome, Milan, Matisse, Vermeer Zen 2/Zen 3 uarch */ 201 0x1480, 202 /* Family 17h/19h Renoir, Cezanne, Van Gogh Zen 2/3 uarch */ 203 0x1630, 204 /* Family 19h Genoa and Bergamo */ 205 0x14a4, 206 /* Family 17h Mendocino, Family 19h Rembrandt */ 207 0x14b5, 208 /* Family 19h Raphael */ 209 0x14d8, 210 /* Family 19h Phoenix */ 211 0x14e8 212 }; 213 214 typedef struct { 215 char *acd_name; 216 amdzen_child_t acd_addr; 217 /* 218 * This indicates whether or not we should issue warnings to users when 219 * something happens specific to this instance. The main reason we don't 220 * want to is for optional devices that may not be installed as they are 221 * for development purposes (e.g. usmn, zen_udf); however, if there is 222 * an issue with the others we still want to know. 223 */ 224 bool acd_warn; 225 } amdzen_child_data_t; 226 227 static const amdzen_child_data_t amdzen_children[] = { 228 { "smntemp", AMDZEN_C_SMNTEMP, true }, 229 { "usmn", AMDZEN_C_USMN, false }, 230 { "zen_udf", AMDZEN_C_ZEN_UDF, false }, 231 { "zen_umc", AMDZEN_C_ZEN_UMC, true } 232 }; 233 234 static uint8_t 235 amdzen_stub_get8(amdzen_stub_t *stub, off_t reg) 236 { 237 return (pci_config_get8(stub->azns_cfgspace, reg)); 238 } 239 240 static uint16_t 241 amdzen_stub_get16(amdzen_stub_t *stub, off_t reg) 242 { 243 return (pci_config_get16(stub->azns_cfgspace, reg)); 244 } 245 246 static uint32_t 247 amdzen_stub_get32(amdzen_stub_t *stub, off_t reg) 248 { 249 return (pci_config_get32(stub->azns_cfgspace, reg)); 250 } 251 252 static uint64_t 253 amdzen_stub_get64(amdzen_stub_t *stub, off_t reg) 254 { 255 return (pci_config_get64(stub->azns_cfgspace, reg)); 256 } 257 258 static void 259 amdzen_stub_put8(amdzen_stub_t *stub, off_t reg, uint8_t val) 260 { 261 pci_config_put8(stub->azns_cfgspace, reg, val); 262 } 263 264 static void 265 amdzen_stub_put16(amdzen_stub_t *stub, off_t reg, uint16_t val) 266 { 267 pci_config_put16(stub->azns_cfgspace, reg, val); 268 } 269 270 static void 271 amdzen_stub_put32(amdzen_stub_t *stub, off_t reg, uint32_t val) 272 { 273 pci_config_put32(stub->azns_cfgspace, reg, val); 274 } 275 276 static uint64_t 277 amdzen_df_read_regdef(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def, 278 uint8_t inst, boolean_t do_64) 279 { 280 df_reg_def_t ficaa; 281 df_reg_def_t ficad; 282 uint32_t val = 0; 283 df_rev_t df_rev = azn->azn_dfs[0].adf_rev; 284 285 VERIFY(MUTEX_HELD(&azn->azn_mutex)); 286 ASSERT3U(def.drd_gens & df_rev, ==, df_rev); 287 val = DF_FICAA_V2_SET_TARG_INST(val, 1); 288 val = DF_FICAA_V2_SET_FUNC(val, def.drd_func); 289 val = DF_FICAA_V2_SET_INST(val, inst); 290 val = DF_FICAA_V2_SET_64B(val, do_64 ? 1 : 0); 291 292 switch (df_rev) { 293 case DF_REV_2: 294 case DF_REV_3: 295 case DF_REV_3P5: 296 ficaa = DF_FICAA_V2; 297 ficad = DF_FICAD_LO_V2; 298 /* 299 * Both here and in the DFv4 case, the register ignores the 300 * lower 2 bits. That is we can only address and encode things 301 * in units of 4 bytes. 302 */ 303 val = DF_FICAA_V2_SET_REG(val, def.drd_reg >> 2); 304 break; 305 case DF_REV_4: 306 ficaa = DF_FICAA_V4; 307 ficad = DF_FICAD_LO_V4; 308 val = DF_FICAA_V4_SET_REG(val, def.drd_reg >> 2); 309 break; 310 default: 311 panic("encountered unexpected DF rev: %u", df_rev); 312 } 313 314 amdzen_stub_put32(df->adf_funcs[ficaa.drd_func], ficaa.drd_reg, val); 315 if (do_64) { 316 return (amdzen_stub_get64(df->adf_funcs[ficad.drd_func], 317 ficad.drd_reg)); 318 } else { 319 return (amdzen_stub_get32(df->adf_funcs[ficad.drd_func], 320 ficad.drd_reg)); 321 } 322 } 323 324 /* 325 * Perform a targeted 32-bit indirect read to a specific instance and function. 326 */ 327 static uint32_t 328 amdzen_df_read32(amdzen_t *azn, amdzen_df_t *df, uint8_t inst, 329 const df_reg_def_t def) 330 { 331 return (amdzen_df_read_regdef(azn, df, def, inst, B_FALSE)); 332 } 333 334 /* 335 * For a broadcast read, just go to the underlying PCI function and perform a 336 * read. At this point in time, we don't believe we need to use the FICAA/FICAD 337 * to access it (though it does have a broadcast mode). 338 */ 339 static uint32_t 340 amdzen_df_read32_bcast(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def) 341 { 342 VERIFY(MUTEX_HELD(&azn->azn_mutex)); 343 return (amdzen_stub_get32(df->adf_funcs[def.drd_func], def.drd_reg)); 344 } 345 346 static uint32_t 347 amdzen_smn_read(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg) 348 { 349 const uint32_t base_addr = SMN_REG_ADDR_BASE(reg); 350 const uint32_t addr_off = SMN_REG_ADDR_OFF(reg); 351 352 VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg)); 353 VERIFY(MUTEX_HELD(&azn->azn_mutex)); 354 amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr); 355 356 switch (SMN_REG_SIZE(reg)) { 357 case 1: 358 return ((uint32_t)amdzen_stub_get8(df->adf_nb, 359 AMDZEN_NB_SMN_DATA + addr_off)); 360 case 2: 361 return ((uint32_t)amdzen_stub_get16(df->adf_nb, 362 AMDZEN_NB_SMN_DATA + addr_off)); 363 case 4: 364 return (amdzen_stub_get32(df->adf_nb, AMDZEN_NB_SMN_DATA)); 365 default: 366 panic("unreachable invalid SMN register size %u", 367 SMN_REG_SIZE(reg)); 368 } 369 } 370 371 static void 372 amdzen_smn_write(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg, 373 const uint32_t val) 374 { 375 const uint32_t base_addr = SMN_REG_ADDR_BASE(reg); 376 const uint32_t addr_off = SMN_REG_ADDR_OFF(reg); 377 378 VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg)); 379 VERIFY(SMN_REG_VALUE_FITS(reg, val)); 380 VERIFY(MUTEX_HELD(&azn->azn_mutex)); 381 amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr); 382 383 switch (SMN_REG_SIZE(reg)) { 384 case 1: 385 amdzen_stub_put8(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off, 386 (uint8_t)val); 387 break; 388 case 2: 389 amdzen_stub_put16(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off, 390 (uint16_t)val); 391 break; 392 case 4: 393 amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_DATA, val); 394 break; 395 default: 396 panic("unreachable invalid SMN register size %u", 397 SMN_REG_SIZE(reg)); 398 } 399 } 400 401 static amdzen_df_t * 402 amdzen_df_find(amdzen_t *azn, uint_t dfno) 403 { 404 uint_t i; 405 406 ASSERT(MUTEX_HELD(&azn->azn_mutex)); 407 if (dfno >= azn->azn_ndfs) { 408 return (NULL); 409 } 410 411 for (i = 0; i < azn->azn_ndfs; i++) { 412 amdzen_df_t *df = &azn->azn_dfs[i]; 413 if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) { 414 continue; 415 } 416 417 if (dfno == 0) { 418 return (df); 419 } 420 dfno--; 421 } 422 423 return (NULL); 424 } 425 426 static amdzen_df_ent_t * 427 amdzen_df_ent_find_by_instid(amdzen_df_t *df, uint8_t instid) 428 { 429 for (uint_t i = 0; i < df->adf_nents; i++) { 430 amdzen_df_ent_t *ent = &df->adf_ents[i]; 431 432 if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) { 433 continue; 434 } 435 436 if (ent->adfe_inst_id == instid) { 437 return (ent); 438 } 439 } 440 441 return (NULL); 442 } 443 444 /* 445 * Client functions that are used by nexus children. 446 */ 447 int 448 amdzen_c_smn_read(uint_t dfno, const smn_reg_t reg, uint32_t *valp) 449 { 450 amdzen_df_t *df; 451 amdzen_t *azn = amdzen_data; 452 453 if (!SMN_REG_SIZE_IS_VALID(reg)) 454 return (EINVAL); 455 if (!SMN_REG_IS_NATURALLY_ALIGNED(reg)) 456 return (EINVAL); 457 458 mutex_enter(&azn->azn_mutex); 459 df = amdzen_df_find(azn, dfno); 460 if (df == NULL) { 461 mutex_exit(&azn->azn_mutex); 462 return (ENOENT); 463 } 464 465 if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) { 466 mutex_exit(&azn->azn_mutex); 467 return (ENXIO); 468 } 469 470 *valp = amdzen_smn_read(azn, df, reg); 471 mutex_exit(&azn->azn_mutex); 472 return (0); 473 } 474 475 int 476 amdzen_c_smn_write(uint_t dfno, const smn_reg_t reg, const uint32_t val) 477 { 478 amdzen_df_t *df; 479 amdzen_t *azn = amdzen_data; 480 481 if (!SMN_REG_SIZE_IS_VALID(reg)) 482 return (EINVAL); 483 if (!SMN_REG_IS_NATURALLY_ALIGNED(reg)) 484 return (EINVAL); 485 if (!SMN_REG_VALUE_FITS(reg, val)) 486 return (EOVERFLOW); 487 488 mutex_enter(&azn->azn_mutex); 489 df = amdzen_df_find(azn, dfno); 490 if (df == NULL) { 491 mutex_exit(&azn->azn_mutex); 492 return (ENOENT); 493 } 494 495 if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) { 496 mutex_exit(&azn->azn_mutex); 497 return (ENXIO); 498 } 499 500 amdzen_smn_write(azn, df, reg, val); 501 mutex_exit(&azn->azn_mutex); 502 return (0); 503 } 504 505 uint_t 506 amdzen_c_df_count(void) 507 { 508 uint_t ret; 509 amdzen_t *azn = amdzen_data; 510 511 mutex_enter(&azn->azn_mutex); 512 ret = azn->azn_ndfs; 513 mutex_exit(&azn->azn_mutex); 514 return (ret); 515 } 516 517 df_rev_t 518 amdzen_c_df_rev(void) 519 { 520 amdzen_df_t *df; 521 amdzen_t *azn = amdzen_data; 522 df_rev_t rev; 523 524 /* 525 * Always use the first DF instance to determine what we're using. Our 526 * current assumption, which seems to generally be true, is that the 527 * given DF revisions are the same in a given system when the DFs are 528 * directly connected. 529 */ 530 mutex_enter(&azn->azn_mutex); 531 df = amdzen_df_find(azn, 0); 532 if (df == NULL) { 533 rev = DF_REV_UNKNOWN; 534 } else { 535 rev = df->adf_rev; 536 } 537 mutex_exit(&azn->azn_mutex); 538 539 return (rev); 540 } 541 542 int 543 amdzen_c_df_read32(uint_t dfno, uint8_t inst, const df_reg_def_t def, 544 uint32_t *valp) 545 { 546 amdzen_df_t *df; 547 amdzen_t *azn = amdzen_data; 548 549 mutex_enter(&azn->azn_mutex); 550 df = amdzen_df_find(azn, dfno); 551 if (df == NULL) { 552 mutex_exit(&azn->azn_mutex); 553 return (ENOENT); 554 } 555 556 *valp = amdzen_df_read_regdef(azn, df, def, inst, B_FALSE); 557 mutex_exit(&azn->azn_mutex); 558 559 return (0); 560 } 561 562 int 563 amdzen_c_df_read64(uint_t dfno, uint8_t inst, const df_reg_def_t def, 564 uint64_t *valp) 565 { 566 amdzen_df_t *df; 567 amdzen_t *azn = amdzen_data; 568 569 mutex_enter(&azn->azn_mutex); 570 df = amdzen_df_find(azn, dfno); 571 if (df == NULL) { 572 mutex_exit(&azn->azn_mutex); 573 return (ENOENT); 574 } 575 576 *valp = amdzen_df_read_regdef(azn, df, def, inst, B_TRUE); 577 mutex_exit(&azn->azn_mutex); 578 579 return (0); 580 } 581 582 int 583 amdzen_c_df_iter(uint_t dfno, zen_df_type_t type, amdzen_c_iter_f func, 584 void *arg) 585 { 586 amdzen_df_t *df; 587 amdzen_t *azn = amdzen_data; 588 df_type_t df_type; 589 uint8_t df_subtype; 590 591 /* 592 * Unlike other calls here, we hold our lock only to find the DF here. 593 * The main reason for this is the nature of the callback function. 594 * Folks are iterating over instances so they can call back into us. If 595 * you look at the locking statement, the thing that is most volatile 596 * right here and what we need to protect is the DF itself and 597 * subsequent register accesses to it. The actual data about which 598 * entities exist is static and so once we have found a DF we should 599 * hopefully be in good shape as they only come, but don't go. 600 */ 601 mutex_enter(&azn->azn_mutex); 602 df = amdzen_df_find(azn, dfno); 603 if (df == NULL) { 604 mutex_exit(&azn->azn_mutex); 605 return (ENOENT); 606 } 607 mutex_exit(&azn->azn_mutex); 608 609 switch (type) { 610 case ZEN_DF_TYPE_CS_UMC: 611 df_type = DF_TYPE_CS; 612 /* 613 * In the original Zeppelin DFv2 die there was no subtype field 614 * used for the CS. The UMC is the only type and has a subtype 615 * of zero. 616 */ 617 if (df->adf_rev != DF_REV_2) { 618 df_subtype = DF_CS_SUBTYPE_UMC; 619 } else { 620 df_subtype = 0; 621 } 622 break; 623 case ZEN_DF_TYPE_CCM_CPU: 624 /* 625 * Because the CCM CPU subtype has always remained zero, we can 626 * use that regardless of the generation. 627 */ 628 df_type = DF_TYPE_CCM; 629 df_subtype = DF_CCM_SUBTYPE_CPU; 630 break; 631 default: 632 return (EINVAL); 633 } 634 635 for (uint_t i = 0; i < df->adf_nents; i++) { 636 amdzen_df_ent_t *ent = &df->adf_ents[i]; 637 638 /* 639 * Some DF components are not considered enabled and therefore 640 * will end up having bogus values in their ID fields. If we do 641 * not have an enable flag set, we must skip this node. 642 */ 643 if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) 644 continue; 645 646 if (ent->adfe_type == df_type && 647 ent->adfe_subtype == df_subtype) { 648 int ret = func(dfno, ent->adfe_fabric_id, 649 ent->adfe_inst_id, arg); 650 if (ret != 0) { 651 return (ret); 652 } 653 } 654 } 655 656 return (0); 657 } 658 659 int 660 amdzen_c_df_fabric_decomp(df_fabric_decomp_t *decomp) 661 { 662 const amdzen_df_t *df; 663 amdzen_t *azn = amdzen_data; 664 665 mutex_enter(&azn->azn_mutex); 666 df = amdzen_df_find(azn, 0); 667 if (df == NULL) { 668 mutex_exit(&azn->azn_mutex); 669 return (ENOENT); 670 } 671 672 *decomp = df->adf_decomp; 673 mutex_exit(&azn->azn_mutex); 674 return (0); 675 } 676 677 static boolean_t 678 amdzen_create_child(amdzen_t *azn, const amdzen_child_data_t *acd) 679 { 680 int ret; 681 dev_info_t *child; 682 683 if (ndi_devi_alloc(azn->azn_dip, acd->acd_name, 684 (pnode_t)DEVI_SID_NODEID, &child) != NDI_SUCCESS) { 685 dev_err(azn->azn_dip, CE_WARN, "!failed to allocate child " 686 "dip for %s", acd->acd_name); 687 return (B_FALSE); 688 } 689 690 ddi_set_parent_data(child, (void *)acd); 691 if ((ret = ndi_devi_online(child, 0)) != NDI_SUCCESS) { 692 if (acd->acd_warn) { 693 dev_err(azn->azn_dip, CE_WARN, "!failed to online " 694 "child dip %s: %d", acd->acd_name, ret); 695 } 696 return (B_FALSE); 697 } 698 699 return (B_TRUE); 700 } 701 702 static boolean_t 703 amdzen_map_dfs(amdzen_t *azn) 704 { 705 amdzen_stub_t *stub; 706 707 ASSERT(MUTEX_HELD(&azn->azn_mutex)); 708 709 for (stub = list_head(&azn->azn_df_stubs); stub != NULL; 710 stub = list_next(&azn->azn_df_stubs, stub)) { 711 amdzen_df_t *df; 712 uint_t dfno; 713 714 dfno = stub->azns_dev - AMDZEN_DF_FIRST_DEVICE; 715 if (dfno > AMDZEN_MAX_DFS) { 716 dev_err(stub->azns_dip, CE_WARN, "encountered df " 717 "device with illegal DF PCI b/d/f: 0x%x/%x/%x", 718 stub->azns_bus, stub->azns_dev, stub->azns_func); 719 goto err; 720 } 721 722 df = &azn->azn_dfs[dfno]; 723 724 if (stub->azns_func >= AMDZEN_MAX_DF_FUNCS) { 725 dev_err(stub->azns_dip, CE_WARN, "encountered df " 726 "device with illegal DF PCI b/d/f: 0x%x/%x/%x", 727 stub->azns_bus, stub->azns_dev, stub->azns_func); 728 goto err; 729 } 730 731 if (df->adf_funcs[stub->azns_func] != NULL) { 732 dev_err(stub->azns_dip, CE_WARN, "encountered " 733 "duplicate df device with DF PCI b/d/f: 0x%x/%x/%x", 734 stub->azns_bus, stub->azns_dev, stub->azns_func); 735 goto err; 736 } 737 df->adf_funcs[stub->azns_func] = stub; 738 } 739 740 return (B_TRUE); 741 742 err: 743 azn->azn_flags |= AMDZEN_F_DEVICE_ERROR; 744 return (B_FALSE); 745 } 746 747 static boolean_t 748 amdzen_check_dfs(amdzen_t *azn) 749 { 750 uint_t i; 751 boolean_t ret = B_TRUE; 752 753 for (i = 0; i < AMDZEN_MAX_DFS; i++) { 754 amdzen_df_t *df = &azn->azn_dfs[i]; 755 uint_t count = 0; 756 757 /* 758 * We require all platforms to have DFs functions 0-6. Not all 759 * platforms have DF function 7. 760 */ 761 for (uint_t func = 0; func < AMDZEN_MAX_DF_FUNCS - 1; func++) { 762 if (df->adf_funcs[func] != NULL) { 763 count++; 764 } 765 } 766 767 if (count == 0) 768 continue; 769 770 if (count != 7) { 771 ret = B_FALSE; 772 dev_err(azn->azn_dip, CE_WARN, "df %u devices " 773 "incomplete", i); 774 } else { 775 df->adf_flags |= AMDZEN_DF_F_VALID; 776 azn->azn_ndfs++; 777 } 778 } 779 780 return (ret); 781 } 782 783 static const uint8_t amdzen_df_rome_ids[0x2b] = { 784 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 785 24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 786 44, 45, 46, 47, 48 787 }; 788 789 /* 790 * Check the first df entry to see if it belongs to Rome or Milan. If so, then 791 * it uses the disjoint ID space. 792 */ 793 static boolean_t 794 amdzen_is_rome_style(uint_t id) 795 { 796 return (id == 0x1490 || id == 0x1650); 797 } 798 799 /* 800 * To be able to do most other things we want to do, we must first determine 801 * what revision of the DF (data fabric) that we're using. 802 * 803 * Snapshot the df version. This was added explicitly in DFv4.0, around the Zen 804 * 4 timeframe and allows us to tell apart different version of the DF register 805 * set, most usefully when various subtypes were added. 806 * 807 * Older versions can theoretically be told apart based on usage of reserved 808 * registers. We walk these in the following order, starting with the newest rev 809 * and walking backwards to tell things apart: 810 * 811 * o v3.5 -> Check function 1, register 0x150. This was reserved prior 812 * to this point. This is actually DF_FIDMASK0_V3P5. We are supposed 813 * to check bits [7:0]. 814 * 815 * o v3.0 -> Check function 1, register 0x208. The low byte (7:0) was 816 * changed to indicate a component mask. This is non-zero 817 * in the 3.0 generation. This is actually DF_FIDMASK_V2. 818 * 819 * o v2.0 -> This is just the not that case. Presumably v1 wasn't part 820 * of the Zen generation. 821 * 822 * Because we don't know what version we are yet, we do not use the normal 823 * versioned register accesses which would check what DF version we are and 824 * would want to use the normal indirect register accesses (which also require 825 * us to know the version). We instead do direct broadcast reads. 826 */ 827 static void 828 amdzen_determine_df_vers(amdzen_t *azn, amdzen_df_t *df) 829 { 830 uint32_t val; 831 df_reg_def_t rd = DF_FBICNT; 832 833 val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg); 834 df->adf_major = DF_FBICNT_V4_GET_MAJOR(val); 835 df->adf_minor = DF_FBICNT_V4_GET_MINOR(val); 836 if (df->adf_major == 0 && df->adf_minor == 0) { 837 rd = DF_FIDMASK0_V3P5; 838 val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg); 839 if (bitx32(val, 7, 0) != 0) { 840 df->adf_major = 3; 841 df->adf_minor = 5; 842 df->adf_rev = DF_REV_3P5; 843 } else { 844 rd = DF_FIDMASK_V2; 845 val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], 846 rd.drd_reg); 847 if (bitx32(val, 7, 0) != 0) { 848 df->adf_major = 3; 849 df->adf_minor = 0; 850 df->adf_rev = DF_REV_3; 851 } else { 852 df->adf_major = 2; 853 df->adf_minor = 0; 854 df->adf_rev = DF_REV_2; 855 } 856 } 857 } else if (df->adf_major == 4 && df->adf_minor == 0) { 858 df->adf_rev = DF_REV_4; 859 } else { 860 df->adf_rev = DF_REV_UNKNOWN; 861 } 862 } 863 864 /* 865 * All of the different versions of the DF have different ways of getting at and 866 * answering the question of how do I break a fabric ID into a corresponding 867 * socket, die, and component. Importantly the goal here is to obtain, cache, 868 * and normalize: 869 * 870 * o The DF System Configuration 871 * o The various Mask registers 872 * o The Node ID 873 */ 874 static void 875 amdzen_determine_fabric_decomp(amdzen_t *azn, amdzen_df_t *df) 876 { 877 uint32_t mask; 878 df_fabric_decomp_t *decomp = &df->adf_decomp; 879 880 switch (df->adf_rev) { 881 case DF_REV_2: 882 df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V2); 883 switch (DF_SYSCFG_V2_GET_MY_TYPE(df->adf_syscfg)) { 884 case DF_DIE_TYPE_CPU: 885 mask = amdzen_df_read32_bcast(azn, df, 886 DF_DIEMASK_CPU_V2); 887 break; 888 case DF_DIE_TYPE_APU: 889 mask = amdzen_df_read32_bcast(azn, df, 890 DF_DIEMASK_APU_V2); 891 break; 892 default: 893 panic("DF thinks we're not on a CPU!"); 894 } 895 df->adf_mask0 = mask; 896 897 /* 898 * DFv2 is a bit different in how the fabric mask register is 899 * phrased. Logically a fabric ID is broken into something that 900 * uniquely identifies a "node" (a particular die on a socket) 901 * and something that identifies a "component", e.g. a memory 902 * controller. 903 * 904 * Starting with DFv3, these registers logically called out how 905 * to separate the fabric ID first into a node and a component. 906 * Then the node was then broken down into a socket and die. In 907 * DFv2, there is no separate mask and shift of a node. Instead 908 * the socket and die are absolute offsets into the fabric ID 909 * rather than relative offsets into the node ID. As such, when 910 * we encounter DFv2, we fake up a node mask and shift and make 911 * it look like DFv3+. 912 */ 913 decomp->dfd_node_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) | 914 DF_DIEMASK_V2_GET_DIE_MASK(mask); 915 decomp->dfd_node_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask); 916 decomp->dfd_comp_mask = DF_DIEMASK_V2_GET_COMP_MASK(mask); 917 decomp->dfd_comp_shift = 0; 918 919 decomp->dfd_sock_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) >> 920 decomp->dfd_node_shift; 921 decomp->dfd_die_mask = DF_DIEMASK_V2_GET_DIE_MASK(mask) >> 922 decomp->dfd_node_shift; 923 decomp->dfd_sock_shift = DF_DIEMASK_V2_GET_SOCK_SHIFT(mask) - 924 decomp->dfd_node_shift; 925 decomp->dfd_die_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask) - 926 decomp->dfd_node_shift; 927 ASSERT3U(decomp->dfd_die_shift, ==, 0); 928 929 /* 930 * There is no register in the actual data fabric with the node 931 * ID in DFv2 that we have found. Instead we take the first 932 * entity's fabric ID and transform it into the node id. 933 */ 934 df->adf_nodeid = (df->adf_ents[0].adfe_fabric_id & 935 decomp->dfd_node_mask) >> decomp->dfd_node_shift; 936 break; 937 case DF_REV_3: 938 df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V3); 939 df->adf_mask0 = amdzen_df_read32_bcast(azn, df, 940 DF_FIDMASK0_V3); 941 df->adf_mask1 = amdzen_df_read32_bcast(azn, df, 942 DF_FIDMASK1_V3); 943 944 decomp->dfd_sock_mask = 945 DF_FIDMASK1_V3_GET_SOCK_MASK(df->adf_mask1); 946 decomp->dfd_sock_shift = 947 DF_FIDMASK1_V3_GET_SOCK_SHIFT(df->adf_mask1); 948 decomp->dfd_die_mask = 949 DF_FIDMASK1_V3_GET_DIE_MASK(df->adf_mask1); 950 decomp->dfd_die_shift = 0; 951 decomp->dfd_node_mask = 952 DF_FIDMASK0_V3_GET_NODE_MASK(df->adf_mask0); 953 decomp->dfd_node_shift = 954 DF_FIDMASK1_V3_GET_NODE_SHIFT(df->adf_mask1); 955 decomp->dfd_comp_mask = 956 DF_FIDMASK0_V3_GET_COMP_MASK(df->adf_mask0); 957 decomp->dfd_comp_shift = 0; 958 959 df->adf_nodeid = DF_SYSCFG_V3_GET_NODE_ID(df->adf_syscfg); 960 break; 961 case DF_REV_3P5: 962 df->adf_syscfg = amdzen_df_read32_bcast(azn, df, 963 DF_SYSCFG_V3P5); 964 df->adf_mask0 = amdzen_df_read32_bcast(azn, df, 965 DF_FIDMASK0_V3P5); 966 df->adf_mask1 = amdzen_df_read32_bcast(azn, df, 967 DF_FIDMASK1_V3P5); 968 df->adf_mask2 = amdzen_df_read32_bcast(azn, df, 969 DF_FIDMASK2_V3P5); 970 971 decomp->dfd_sock_mask = 972 DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2); 973 decomp->dfd_sock_shift = 974 DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1); 975 decomp->dfd_die_mask = 976 DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2); 977 decomp->dfd_die_shift = 0; 978 decomp->dfd_node_mask = 979 DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0); 980 decomp->dfd_node_shift = 981 DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1); 982 decomp->dfd_comp_mask = 983 DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0); 984 decomp->dfd_comp_shift = 0; 985 986 df->adf_nodeid = DF_SYSCFG_V3P5_GET_NODE_ID(df->adf_syscfg); 987 break; 988 case DF_REV_4: 989 df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V4); 990 df->adf_mask0 = amdzen_df_read32_bcast(azn, df, 991 DF_FIDMASK0_V4); 992 df->adf_mask1 = amdzen_df_read32_bcast(azn, df, 993 DF_FIDMASK1_V4); 994 df->adf_mask2 = amdzen_df_read32_bcast(azn, df, 995 DF_FIDMASK2_V4); 996 997 /* 998 * The DFv4 registers are at a different location in the DF; 999 * however, the actual layout of fields is the same as DFv3.5. 1000 * This is why you see V3P5 below. 1001 */ 1002 decomp->dfd_sock_mask = 1003 DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2); 1004 decomp->dfd_sock_shift = 1005 DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1); 1006 decomp->dfd_die_mask = 1007 DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2); 1008 decomp->dfd_die_shift = 0; 1009 decomp->dfd_node_mask = 1010 DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0); 1011 decomp->dfd_node_shift = 1012 DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1); 1013 decomp->dfd_comp_mask = 1014 DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0); 1015 decomp->dfd_comp_shift = 0; 1016 1017 df->adf_nodeid = DF_SYSCFG_V4_GET_NODE_ID(df->adf_syscfg); 1018 break; 1019 default: 1020 panic("encountered suspicious, previously rejected DF " 1021 "rev: 0x%x", df->adf_rev); 1022 } 1023 } 1024 1025 /* 1026 * The purpose of this function is to map CCMs to the corresponding CCDs that 1027 * exist. This is not an obvious thing as there is no direct mapping in the data 1028 * fabric between these IDs. 1029 * 1030 * Prior to DFv4, a given CCM was only ever connected to at most one CCD. 1031 * Starting in DFv4 a given CCM may have one or two SDP (scalable data ports) 1032 * that connect to CCDs. These may be connected to the same CCD or a different 1033 * one. When both ports are enabled we must check whether or not the port is 1034 * considered to be in wide mode. When wide mode is enabled then the two ports 1035 * are connected to a single CCD. If wide mode is disabled then the two ports 1036 * are connected to separate CCDs. 1037 * 1038 * The physical number of a CCD, which is how we determine the SMN aperture to 1039 * use, is based on the CCM ID. In most sockets we have seen up to a maximum of 1040 * 8 CCMs. When a CCM is connected to more than one CCD we have determined based 1041 * on some hints from AMD's ACPI information that the numbering is assumed to be 1042 * that CCM's number plus the total number of CCMs. 1043 * 1044 * More concretely, the SP5 Genoa/Bergamo Zen 4 platform has 8 CCMs. When there 1045 * are more than 8 CCDs installed then CCM 0 maps to CCDs 0 and 8. CCM 1 to CCDs 1046 * 1 and 9, etc. CCMs 4-7 map 1:1 to CCDs 4-7. However, the placement of CCDs 1047 * within the package has changed across generations. 1048 * 1049 * Notably in Rome and Milan (Zen 2/3) it appears that each quadrant had an 1050 * increasing number of CCDs. So CCDs 0/1 were together, 2/3, 4/5, and 6/7. This 1051 * meant that in cases where only a subset of CCDs were populated it'd forcibly 1052 * disable the higher CCD in a group (but with DFv3 the CCM would still be 1053 * enabled). So a 4 CCD config would generally enable CCDs 0, 2, 4, and 6 say. 1054 * This was almost certainly done to balance the NUMA config. 1055 * 1056 * Instead, starting in Genoa (Zen 4) the CCMs are round-robined around the 1057 * quadrants so CCMs (CCDs) 0 (0/8) and 4 (4) are together, 1 (1/9) and 5 (5), 1058 * etc. This is also why we more often see disabled CCMs in Genoa, but not in 1059 * Rome/Milan. 1060 * 1061 * When we're operating in wide mode and therefore both SDPs are connected to a 1062 * single CCD, we've always found that the lower CCD index will be used by the 1063 * system and the higher one is not considered present. Therefore, when 1064 * operating in wide mode, we need to make sure that whenever we have a non-zero 1065 * value for SDPs being connected that we rewrite this to only appear as a 1066 * single CCD is present. It's conceivable (though hard to imagine) that we 1067 * could get a value of 0b10 indicating that only the upper SDP link is active 1068 * for some reason. 1069 */ 1070 1071 static void 1072 amdzen_setup_df_ccm(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *dfe, 1073 uint32_t ccmno) 1074 { 1075 amdzen_ccm_data_t *ccm = &dfe->adfe_data.aded_ccm; 1076 uint32_t ccd_en; 1077 1078 if (df->adf_rev >= DF_REV_4) { 1079 uint32_t val = amdzen_df_read32(azn, df, dfe->adfe_inst_id, 1080 DF_CCD_EN_V4); 1081 ccd_en = DF_CCD_EN_V4_GET_CCD_EN(val); 1082 1083 val = amdzen_df_read32(azn, df, dfe->adfe_inst_id, 1084 DF_CCMCFG4_V4); 1085 if (DF_CCMCFG4_V4_GET_WIDE_EN(val) != 0 && ccd_en != 0) { 1086 ccd_en = 0x1; 1087 } 1088 } else { 1089 ccd_en = 0x1; 1090 } 1091 1092 for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) { 1093 ccm->acd_ccd_en[i] = (ccd_en & (1 << i)) != 0; 1094 if (ccm->acd_ccd_en[i] == 0) 1095 continue; 1096 ccm->acd_ccd_id[i] = ccmno + i * df->adf_nccm; 1097 ccm->acd_nccds++; 1098 } 1099 } 1100 1101 /* 1102 * Initialize our knowledge about a given series of nodes on the data fabric. 1103 */ 1104 static void 1105 amdzen_setup_df(amdzen_t *azn, amdzen_df_t *df) 1106 { 1107 uint_t i; 1108 uint32_t val, ccmno; 1109 1110 amdzen_determine_df_vers(azn, df); 1111 1112 switch (df->adf_rev) { 1113 case DF_REV_2: 1114 case DF_REV_3: 1115 case DF_REV_3P5: 1116 val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V2); 1117 break; 1118 case DF_REV_4: 1119 val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V4); 1120 break; 1121 default: 1122 dev_err(azn->azn_dip, CE_WARN, "encountered unsupported DF " 1123 "revision: 0x%x", df->adf_rev); 1124 return; 1125 } 1126 df->adf_nb_busno = DF_CFG_ADDR_CTL_GET_BUS_NUM(val); 1127 val = amdzen_df_read32_bcast(azn, df, DF_FBICNT); 1128 df->adf_nents = DF_FBICNT_GET_COUNT(val); 1129 if (df->adf_nents == 0) 1130 return; 1131 df->adf_ents = kmem_zalloc(sizeof (amdzen_df_ent_t) * df->adf_nents, 1132 KM_SLEEP); 1133 1134 for (i = 0; i < df->adf_nents; i++) { 1135 amdzen_df_ent_t *dfe = &df->adf_ents[i]; 1136 uint8_t inst = i; 1137 1138 /* 1139 * Unfortunately, Rome uses a discontinuous instance ID pattern 1140 * while everything else we can find uses a contiguous instance 1141 * ID pattern. This means that for Rome, we need to adjust the 1142 * indexes that we iterate over, though the total number of 1143 * entries is right. This was carried over into Milan, but not 1144 * Genoa. 1145 */ 1146 if (amdzen_is_rome_style(df->adf_funcs[0]->azns_did)) { 1147 if (inst >= ARRAY_SIZE(amdzen_df_rome_ids)) { 1148 dev_err(azn->azn_dip, CE_WARN, "Rome family " 1149 "processor reported more ids than the PPR, " 1150 "resetting %u to instance zero", inst); 1151 inst = 0; 1152 } else { 1153 inst = amdzen_df_rome_ids[inst]; 1154 } 1155 } 1156 1157 dfe->adfe_drvid = inst; 1158 dfe->adfe_info0 = amdzen_df_read32(azn, df, inst, DF_FBIINFO0); 1159 dfe->adfe_info1 = amdzen_df_read32(azn, df, inst, DF_FBIINFO1); 1160 dfe->adfe_info2 = amdzen_df_read32(azn, df, inst, DF_FBIINFO2); 1161 dfe->adfe_info3 = amdzen_df_read32(azn, df, inst, DF_FBIINFO3); 1162 1163 dfe->adfe_type = DF_FBIINFO0_GET_TYPE(dfe->adfe_info0); 1164 dfe->adfe_subtype = DF_FBIINFO0_GET_SUBTYPE(dfe->adfe_info0); 1165 1166 /* 1167 * The enabled flag was not present in Zen 1. Simulate it by 1168 * checking for a non-zero register instead. 1169 */ 1170 if (DF_FBIINFO0_V3_GET_ENABLED(dfe->adfe_info0) || 1171 (df->adf_rev == DF_REV_2 && dfe->adfe_info0 != 0)) { 1172 dfe->adfe_flags |= AMDZEN_DFE_F_ENABLED; 1173 } 1174 if (DF_FBIINFO0_GET_HAS_MCA(dfe->adfe_info0)) { 1175 dfe->adfe_flags |= AMDZEN_DFE_F_MCA; 1176 } 1177 dfe->adfe_inst_id = DF_FBIINFO3_GET_INSTID(dfe->adfe_info3); 1178 switch (df->adf_rev) { 1179 case DF_REV_2: 1180 dfe->adfe_fabric_id = 1181 DF_FBIINFO3_V2_GET_BLOCKID(dfe->adfe_info3); 1182 break; 1183 case DF_REV_3: 1184 dfe->adfe_fabric_id = 1185 DF_FBIINFO3_V3_GET_BLOCKID(dfe->adfe_info3); 1186 break; 1187 case DF_REV_3P5: 1188 dfe->adfe_fabric_id = 1189 DF_FBIINFO3_V3P5_GET_BLOCKID(dfe->adfe_info3); 1190 break; 1191 case DF_REV_4: 1192 dfe->adfe_fabric_id = 1193 DF_FBIINFO3_V4_GET_BLOCKID(dfe->adfe_info3); 1194 break; 1195 default: 1196 panic("encountered suspicious, previously rejected DF " 1197 "rev: 0x%x", df->adf_rev); 1198 } 1199 1200 /* 1201 * Record information about a subset of DF entities that we've 1202 * found. Currently we're tracking this only for CCMs. 1203 */ 1204 if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) 1205 continue; 1206 1207 if (dfe->adfe_type == DF_TYPE_CCM && 1208 dfe->adfe_subtype == DF_CCM_SUBTYPE_CPU) { 1209 df->adf_nccm++; 1210 } 1211 } 1212 1213 /* 1214 * Now that we have filled in all of our info, attempt to fill in 1215 * specific information about different types of instances. 1216 */ 1217 ccmno = 0; 1218 for (uint_t i = 0; i < df->adf_nents; i++) { 1219 amdzen_df_ent_t *dfe = &df->adf_ents[i]; 1220 1221 if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) 1222 continue; 1223 1224 /* 1225 * Perform type and sub-type specific initialization. Currently 1226 * limited to CCMs. 1227 */ 1228 switch (dfe->adfe_type) { 1229 case DF_TYPE_CCM: 1230 amdzen_setup_df_ccm(azn, df, dfe, ccmno); 1231 ccmno++; 1232 break; 1233 default: 1234 break; 1235 } 1236 } 1237 1238 amdzen_determine_fabric_decomp(azn, df); 1239 } 1240 1241 static void 1242 amdzen_find_nb(amdzen_t *azn, amdzen_df_t *df) 1243 { 1244 amdzen_stub_t *stub; 1245 1246 for (stub = list_head(&azn->azn_nb_stubs); stub != NULL; 1247 stub = list_next(&azn->azn_nb_stubs, stub)) { 1248 if (stub->azns_bus == df->adf_nb_busno) { 1249 df->adf_flags |= AMDZEN_DF_F_FOUND_NB; 1250 df->adf_nb = stub; 1251 return; 1252 } 1253 } 1254 } 1255 1256 static void 1257 amdzen_initpkg_to_apic(amdzen_t *azn, const uint32_t pkg0, const uint32_t pkg7) 1258 { 1259 uint32_t nsock, nccd, nccx, ncore, nthr, extccx; 1260 uint32_t nsock_bits, nccd_bits, nccx_bits, ncore_bits, nthr_bits; 1261 amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp; 1262 1263 /* 1264 * These are all 0 based values, meaning that we need to add one to each 1265 * of them. However, we skip this because to calculate the number of 1266 * bits to cover an entity we would subtract one. 1267 */ 1268 nthr = SCFCTP_PMREG_INITPKG0_GET_SMTEN(pkg0); 1269 ncore = SCFCTP_PMREG_INITPKG7_GET_N_CORES(pkg7); 1270 nccx = SCFCTP_PMREG_INITPKG7_GET_N_CCXS(pkg7); 1271 nccd = SCFCTP_PMREG_INITPKG7_GET_N_DIES(pkg7); 1272 nsock = SCFCTP_PMREG_INITPKG7_GET_N_SOCKETS(pkg7); 1273 1274 if (uarchrev_uarch(cpuid_getuarchrev(CPU)) >= X86_UARCH_AMD_ZEN4) { 1275 extccx = SCFCTP_PMREG_INITPKG7_ZEN4_GET_16TAPIC(pkg7); 1276 } else { 1277 extccx = 0; 1278 } 1279 1280 nthr_bits = highbit(nthr); 1281 ncore_bits = highbit(ncore); 1282 nccx_bits = highbit(nccx); 1283 nccd_bits = highbit(nccd); 1284 nsock_bits = highbit(nsock); 1285 1286 apic->aad_thread_shift = 0; 1287 apic->aad_thread_mask = (1 << nthr_bits) - 1; 1288 1289 apic->aad_core_shift = nthr_bits; 1290 if (ncore_bits > 0) { 1291 apic->aad_core_mask = (1 << ncore_bits) - 1; 1292 apic->aad_core_mask <<= apic->aad_core_shift; 1293 } else { 1294 apic->aad_core_mask = 0; 1295 } 1296 1297 /* 1298 * The APIC_16T_MODE bit indicates that the total shift to start the CCX 1299 * should be at 4 bits if it's not. It doesn't mean that the CCX portion 1300 * of the value should take up four bits. In the common Genoa case, 1301 * nccx_bits will be zero. 1302 */ 1303 apic->aad_ccx_shift = apic->aad_core_shift + ncore_bits; 1304 if (extccx != 0 && apic->aad_ccx_shift < 4) { 1305 apic->aad_ccx_shift = 4; 1306 } 1307 if (nccx_bits > 0) { 1308 apic->aad_ccx_mask = (1 << nccx_bits) - 1; 1309 apic->aad_ccx_mask <<= apic->aad_ccx_shift; 1310 } else { 1311 apic->aad_ccx_mask = 0; 1312 } 1313 1314 apic->aad_ccd_shift = apic->aad_ccx_shift + nccx_bits; 1315 if (nccd_bits > 0) { 1316 apic->aad_ccd_mask = (1 << nccd_bits) - 1; 1317 apic->aad_ccd_mask <<= apic->aad_ccd_shift; 1318 } else { 1319 apic->aad_ccd_mask = 0; 1320 } 1321 1322 apic->aad_sock_shift = apic->aad_ccd_shift + nccd_bits; 1323 if (nsock_bits > 0) { 1324 apic->aad_sock_mask = (1 << nsock_bits) - 1; 1325 apic->aad_sock_mask <<= apic->aad_sock_shift; 1326 } else { 1327 apic->aad_sock_mask = 0; 1328 } 1329 1330 /* 1331 * Currently all supported Zen 2+ platforms only have a single die per 1332 * socket as compared to Zen 1. So this is always kept at zero. 1333 */ 1334 apic->aad_die_mask = 0; 1335 apic->aad_die_shift = 0; 1336 } 1337 1338 /* 1339 * We would like to determine what the logical APIC decomposition is on Zen 3 1340 * and newer family parts. While there is information added to CPUID in the form 1341 * of leaf 8X26, that isn't present in Zen 3, so instead we go to what we 1342 * believe is the underlying source of the CPUID data. 1343 * 1344 * Fundamentally there are a series of registers in SMN space that relate to the 1345 * SCFCTP. Coincidentally, there is one of these for each core and there are a 1346 * pair of related SMN registers. L3::SCFCTP::PMREG_INITPKG0 contains 1347 * information about a given's core logical and physical IDs. More interestingly 1348 * for this particular case, L3::SCFCTP::PMREG_INITPKG7, contains the overall 1349 * total number of logical entities. We've been promised that this has to be 1350 * the same across the fabric. That's all well and good, but this begs the 1351 * question of how do we actually get there. The above is a core-specific 1352 * register and requires that we understand information about which CCDs and 1353 * CCXs are actually present. 1354 * 1355 * So we are starting with a data fabric that has some CCM present. The CCM 1356 * entries in the data fabric may be tagged with our ENABLED flag. 1357 * Unfortunately, that can be true regardless of whether or not it's actually 1358 * present or not. As a result, we go to another chunk of SMN space registers, 1359 * SMU::PWR. These contain information about the CCDs, the physical cores that 1360 * are enabled, and related. So we will first walk the DF entities and see if we 1361 * can read its SMN::PWR::CCD_DIE_ID. If we get back a value of all 1s then 1362 * there is nothing present. Otherwise, we should get back something that 1363 * matches information in the data fabric. 1364 * 1365 * With that in hand, we can read the SMU::PWR::CORE_ENABLE register to 1366 * determine which physical cores are enabled in the CCD/CCX. That will finally 1367 * give us an index to get to our friend INITPKG7. 1368 */ 1369 static boolean_t 1370 amdzen_determine_apic_decomp_initpkg(amdzen_t *azn) 1371 { 1372 amdzen_df_t *df = &azn->azn_dfs[0]; 1373 uint32_t ccdno = 0; 1374 1375 for (uint_t i = 0; i < df->adf_nents; i++) { 1376 const amdzen_df_ent_t *ent = &df->adf_ents[i]; 1377 if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) 1378 continue; 1379 1380 if (ent->adfe_type == DF_TYPE_CCM && 1381 ent->adfe_subtype == DF_CCM_SUBTYPE_CPU) { 1382 uint32_t val, nccx, pkg7, pkg0; 1383 smn_reg_t die_reg, thrcfg_reg, core_reg; 1384 smn_reg_t pkg7_reg, pkg0_reg; 1385 int core_bit; 1386 uint8_t pccxno, pcoreno; 1387 1388 die_reg = SMUPWR_CCD_DIE_ID(ccdno); 1389 val = amdzen_smn_read(azn, df, die_reg); 1390 if (val == SMN_EINVAL32) { 1391 ccdno++; 1392 continue; 1393 } 1394 1395 ASSERT3U(SMUPWR_CCD_DIE_ID_GET(val), ==, ccdno); 1396 1397 /* 1398 * This die actually exists. Switch over to the core 1399 * enable register to find one to ask about physically. 1400 */ 1401 thrcfg_reg = SMUPWR_THREAD_CFG(ccdno); 1402 val = amdzen_smn_read(azn, df, thrcfg_reg); 1403 nccx = SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1; 1404 core_reg = SMUPWR_CORE_EN(ccdno); 1405 val = amdzen_smn_read(azn, df, core_reg); 1406 if (val == 0) { 1407 ccdno++; 1408 continue; 1409 } 1410 1411 /* 1412 * There exists an enabled physical core. Find the first 1413 * index of it and map it to the corresponding CCD and 1414 * CCX. ddi_ffs is the bit index, but we want the 1415 * physical core number, hence the -1. 1416 */ 1417 core_bit = ddi_ffs(val); 1418 ASSERT3S(core_bit, !=, 0); 1419 pcoreno = core_bit - 1; 1420 1421 /* 1422 * Unfortunately SMU::PWR::THREAD_CONFIGURATION gives us 1423 * the Number of logical cores that are present in the 1424 * complex, not the total number of physical cores. So 1425 * here we need to encode that in Zen 3+ the number of 1426 * cores per CCX is a maximum of 8. Right now we do 1427 * assume that the physical and logical ccx numbering is 1428 * equivalent (we have no other way of knowing if it is 1429 * or isn't right now) and that we'd always have CCX0 1430 * before CCX1. AMD seems to suggest we can assume this, 1431 * though it is a worrisome assumption. 1432 */ 1433 pccxno = pcoreno / 8; 1434 ASSERT3U(pccxno, <, nccx); 1435 pkg7_reg = SCFCTP_PMREG_INITPKG7(ccdno, pccxno, 1436 pcoreno); 1437 pkg7 = amdzen_smn_read(azn, df, pkg7_reg); 1438 pkg0_reg = SCFCTP_PMREG_INITPKG0(ccdno, pccxno, 1439 pcoreno); 1440 pkg0 = amdzen_smn_read(azn, df, pkg0_reg); 1441 amdzen_initpkg_to_apic(azn, pkg0, pkg7); 1442 return (B_TRUE); 1443 } 1444 } 1445 1446 return (B_FALSE); 1447 } 1448 1449 /* 1450 * We have the fun job of trying to figure out what the correct form of the APIC 1451 * decomposition should be and how to break that into its logical components. 1452 * The way that we get at this is generation-specific unfortunately. Here's how 1453 * it works out: 1454 * 1455 * Zen 1-2 This era of CPUs are deceptively simple. The PPR for a given 1456 * family defines exactly how the APIC ID is broken into logical 1457 * components and it's fixed. That is, depending on whether or 1458 * not SMT is enabled. Zen 1 and Zen 2 use different schemes for 1459 * constructing this. The way that we're supposed to check if SMT 1460 * is enabled is to use AMD leaf 8X1E and ask how many threads per 1461 * core there are. We use the x86 feature set to determine that 1462 * instead. 1463 * 1464 * More specifically the Zen 1 scheme is 7 bits long. The bits have 1465 * the following meanings. 1466 * 1467 * [6] Socket ID 1468 * [5:4] Node ID 1469 * [3] Logical CCX ID 1470 * With SMT Without SMT 1471 * [2:1] Logical Core ID [2] hardcoded to zero 1472 * [0] Thread ID [1:0] Logical Core ID 1473 * 1474 * The following is the Zen 2 scheme assuming SMT. The Zen 2 scheme 1475 * without SMT shifts everything to the right by one bit. 1476 * 1477 * [7] Socket ID 1478 * [6:4] Logical CCD ID 1479 * [3] Logical CCX ID 1480 * [2:1] Logical Core ID 1481 * [0] Thread ID 1482 * 1483 * Zen 3 Zen 3 CPUs moved past the fixed APIC ID format that Zen 1 and 1484 * Zen 2 had, but also don't give us the nice way of discovering 1485 * this via CPUID that Zen 4 did. The APIC ID id uses a given 1486 * number of bits for each logical component that exists, but the 1487 * exact number varies based on what's actually present. To get at 1488 * this we use a piece of data that is embedded in the SCFCTP 1489 * (Scalable Control Fabric, Clocks, Test, Power Gating). This can 1490 * be used to determine how many logical entities of each kind the 1491 * system thinks exist. While we could use the various CPUID 1492 * topology items to try to speed this up, they don't tell us the 1493 * die information that we need to do this. 1494 * 1495 * Zen 4+ Zen 4 introduced CPUID leaf 8000_0026h which gives us a means 1496 * for determining how to extract the CCD, CCX, and related pieces 1497 * out of the device. One thing we have to be aware of is that when 1498 * the CCD and CCX shift are the same, that means that there is 1499 * only a single CCX and therefore have to take that into account 1500 * appropriately. This is the case generally on Zen 4 platforms, 1501 * but not on Bergamo. Until we can confirm the actual CPUID leaf 1502 * values that we receive in the cases of Bergamo and others, we 1503 * opt instead to use the same SCFCTP scheme as Zen 3. 1504 */ 1505 static boolean_t 1506 amdzen_determine_apic_decomp(amdzen_t *azn) 1507 { 1508 x86_uarchrev_t uarchrev = cpuid_getuarchrev(CPU); 1509 amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp; 1510 boolean_t smt = is_x86_feature(x86_featureset, X86FSET_HTT); 1511 1512 switch (uarchrev_uarch(uarchrev)) { 1513 case X86_UARCH_AMD_ZEN1: 1514 case X86_UARCH_AMD_ZENPLUS: 1515 apic->aad_sock_mask = 0x40; 1516 apic->aad_sock_shift = 6; 1517 apic->aad_die_mask = 0x30; 1518 apic->aad_die_shift = 4; 1519 apic->aad_ccd_mask = 0; 1520 apic->aad_ccd_shift = 0; 1521 apic->aad_ccx_mask = 0x08; 1522 apic->aad_ccx_shift = 3; 1523 1524 if (smt) { 1525 apic->aad_core_mask = 0x06; 1526 apic->aad_core_shift = 1; 1527 apic->aad_thread_mask = 0x1; 1528 apic->aad_thread_shift = 0; 1529 } else { 1530 apic->aad_core_mask = 0x03; 1531 apic->aad_core_shift = 0; 1532 apic->aad_thread_mask = 0; 1533 apic->aad_thread_shift = 0; 1534 } 1535 break; 1536 case X86_UARCH_AMD_ZEN2: 1537 if (smt) { 1538 apic->aad_sock_mask = 0x80; 1539 apic->aad_sock_shift = 7; 1540 apic->aad_die_mask = 0; 1541 apic->aad_die_shift = 0; 1542 apic->aad_ccd_mask = 0x70; 1543 apic->aad_ccd_shift = 4; 1544 apic->aad_ccx_mask = 0x08; 1545 apic->aad_ccx_shift = 3; 1546 apic->aad_core_mask = 0x06; 1547 apic->aad_core_shift = 1; 1548 apic->aad_thread_mask = 0x01; 1549 apic->aad_thread_shift = 0; 1550 } else { 1551 apic->aad_sock_mask = 0x40; 1552 apic->aad_sock_shift = 6; 1553 apic->aad_die_mask = 0; 1554 apic->aad_die_shift = 0; 1555 apic->aad_ccd_mask = 0x38; 1556 apic->aad_ccd_shift = 3; 1557 apic->aad_ccx_mask = 0x04; 1558 apic->aad_ccx_shift = 2; 1559 apic->aad_core_mask = 0x3; 1560 apic->aad_core_shift = 0; 1561 apic->aad_thread_mask = 0; 1562 apic->aad_thread_shift = 0; 1563 } 1564 break; 1565 case X86_UARCH_AMD_ZEN3: 1566 case X86_UARCH_AMD_ZEN4: 1567 return (amdzen_determine_apic_decomp_initpkg(azn)); 1568 default: 1569 return (B_FALSE); 1570 } 1571 return (B_TRUE); 1572 } 1573 1574 /* 1575 * Snapshot the number of cores that can exist in a CCX based on the Zen 1576 * microarchitecture revision. In Zen 1-4 this has been a constant number 1577 * regardless of the actual CPU Family. 1578 */ 1579 static void 1580 amdzen_determine_ncore_per_ccx(amdzen_t *azn) 1581 { 1582 x86_uarchrev_t uarchrev = cpuid_getuarchrev(CPU); 1583 1584 switch (uarchrev_uarch(uarchrev)) { 1585 case X86_UARCH_AMD_ZEN1: 1586 case X86_UARCH_AMD_ZENPLUS: 1587 case X86_UARCH_AMD_ZEN2: 1588 azn->azn_ncore_per_ccx = 4; 1589 break; 1590 case X86_UARCH_AMD_ZEN3: 1591 case X86_UARCH_AMD_ZEN4: 1592 azn->azn_ncore_per_ccx = 8; 1593 break; 1594 default: 1595 panic("asked about non-Zen uarch"); 1596 } 1597 } 1598 1599 /* 1600 * We need to be careful using this function as different AMD generations have 1601 * acted in different ways when there is a missing CCD. We've found that in 1602 * hardware where the CCM is enabled but there is no CCD attached, it generally 1603 * is safe (i.e. DFv3 on Rome), but on DFv4 if we ask for a CCD that would 1604 * correspond to a disabled CCM then the firmware may inject a fatal error 1605 * (which is hopefully something missing in our RAS/MCA-X enablement). 1606 * 1607 * Put differently if this doesn't correspond to an Enabled CCM and you know the 1608 * number of valid CCDs on this, don't use it. 1609 */ 1610 static boolean_t 1611 amdzen_ccd_present(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno) 1612 { 1613 smn_reg_t die_reg = SMUPWR_CCD_DIE_ID(ccdno); 1614 uint32_t val = amdzen_smn_read(azn, df, die_reg); 1615 if (val == SMN_EINVAL32) { 1616 return (B_FALSE); 1617 } 1618 1619 ASSERT3U(ccdno, ==, SMUPWR_CCD_DIE_ID_GET(val)); 1620 return (B_TRUE); 1621 } 1622 1623 /* 1624 * Attempt to determine a logical CCD number of a given CCD where we don't have 1625 * hardware support for L3::SCFCTP::PMREG_INITPKG* (e.g. pre-Zen 3 systems). 1626 * The CCD numbers that we have are the in the physical space. Likely beacuse of 1627 * how the orientation of CCM numbers map to physical locations and the layout 1628 * of them within the pacakge, we haven't found a good way using the core DFv3 1629 * registers to determine if a given CCD is actually present or not as generally 1630 * all the CCMs are left enabled. Instead we use SMU::PWR::DIE_ID as a proxy to 1631 * determine CCD presence. 1632 */ 1633 static uint32_t 1634 amdzen_ccd_log_id_zen2(amdzen_t *azn, amdzen_df_t *df, 1635 const amdzen_df_ent_t *targ) 1636 { 1637 uint32_t smnid = 0; 1638 uint32_t logid = 0; 1639 1640 for (uint_t i = 0; i < df->adf_nents; i++) { 1641 const amdzen_df_ent_t *ent = &df->adf_ents[i]; 1642 1643 if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) { 1644 continue; 1645 } 1646 1647 if (ent->adfe_inst_id == targ->adfe_inst_id) { 1648 return (logid); 1649 } 1650 1651 if (ent->adfe_type == targ->adfe_type && 1652 ent->adfe_subtype == targ->adfe_subtype) { 1653 boolean_t present = amdzen_ccd_present(azn, df, smnid); 1654 smnid++; 1655 if (present) { 1656 logid++; 1657 } 1658 } 1659 } 1660 1661 panic("asked to match against invalid DF entity %p in df %p", targ, df); 1662 } 1663 1664 static void 1665 amdzen_ccd_fill_core_initpkg0(amdzen_t *azn, amdzen_df_t *df, 1666 amdzen_topo_ccd_t *ccd, amdzen_topo_ccx_t *ccx, amdzen_topo_core_t *core, 1667 boolean_t *ccd_set, boolean_t *ccx_set) 1668 { 1669 smn_reg_t pkg0_reg; 1670 uint32_t pkg0; 1671 1672 pkg0_reg = SCFCTP_PMREG_INITPKG0(ccd->atccd_phys_no, ccx->atccx_phys_no, 1673 core->atcore_phys_no); 1674 pkg0 = amdzen_smn_read(azn, df, pkg0_reg); 1675 core->atcore_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CORE(pkg0); 1676 1677 if (!*ccx_set) { 1678 ccx->atccx_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CCX(pkg0); 1679 *ccx_set = B_TRUE; 1680 } 1681 1682 if (!*ccd_set) { 1683 ccd->atccd_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_DIE(pkg0); 1684 *ccd_set = B_TRUE; 1685 } 1686 } 1687 1688 /* 1689 * Attempt to fill in the physical topology information for this given CCD. 1690 * There are a few steps to this that we undertake to perform this as follows: 1691 * 1692 * 1) First we determine whether the CCD is actually present or not by reading 1693 * SMU::PWR::DIE_ID. CCDs that are not installed will still have an enabled DF 1694 * entry it appears, but the request for the die ID will returns an invalid 1695 * read (all 1s). This die ID should match what we think of as the SMN number 1696 * below. If not, we're in trouble and the rest of this is in question. 1697 * 1698 * 2) We use the SMU::PWR registers to determine how many logical and physical 1699 * cores are present in this CCD and how they are split amongst the CCX. Here we 1700 * need to encode the CPU to CCX core size rankings. Through this process we 1701 * determine and fill out which threads and cores are enabled. 1702 * 1703 * 3) In Zen 3+ we then will read each core's INITPK0 values to ensure that we 1704 * have a proper physical to logical mapping, at which point we can fill in the 1705 * APIC IDs. For Zen 2, we will set the AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN to 1706 * indicate that we just mapped the first logical processor to the first enabled 1707 * core. 1708 * 1709 * 4) Once we have the logical IDs determined we will construct the APIC ID that 1710 * we expect this to have. 1711 * 1712 * Steps (2) - (4) are intertwined and done together. 1713 */ 1714 static void 1715 amdzen_ccd_fill_topo(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *ent, 1716 amdzen_topo_ccd_t *ccd) 1717 { 1718 uint32_t val, nccx, core_en, thread_en; 1719 uint32_t nlcore_per_ccx, nthreads_per_core; 1720 uint32_t sockid, dieid, compid; 1721 const uint32_t ccdno = ccd->atccd_phys_no; 1722 const x86_uarch_t uarch = uarchrev_uarch(cpuid_getuarchrev(CPU)); 1723 boolean_t smt, pkg0_ids, logccd_set = B_FALSE; 1724 smn_reg_t reg; 1725 1726 ASSERT(MUTEX_HELD(&azn->azn_mutex)); 1727 if (!amdzen_ccd_present(azn, df, ccdno)) { 1728 ccd->atccd_err = AMDZEN_TOPO_CCD_E_CCD_MISSING; 1729 return; 1730 } 1731 1732 reg = SMUPWR_THREAD_CFG(ccdno); 1733 val = amdzen_smn_read(azn, df, reg); 1734 nccx = SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1; 1735 nlcore_per_ccx = SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1; 1736 smt = SMUPWR_THREAD_CFG_GET_SMT_MODE(val); 1737 ASSERT3U(nccx, <=, AMDZEN_TOPO_CCD_MAX_CCX); 1738 if (smt == SMUPWR_THREAD_CFG_SMT_MODE_SMT) { 1739 nthreads_per_core = 2; 1740 } else { 1741 nthreads_per_core = 1; 1742 } 1743 1744 reg = SMUPWR_CORE_EN(ccdno); 1745 core_en = amdzen_smn_read(azn, df, reg); 1746 reg = SMUPWR_THREAD_EN(ccdno); 1747 thread_en = amdzen_smn_read(azn, df, reg); 1748 1749 /* 1750 * The BSP is never enabled in a conventional sense and therefore the 1751 * bit is reserved and left as 0. As the BSP should be in the first CCD, 1752 * we go through and OR back in the bit lest we think the thread isn't 1753 * enabled. 1754 */ 1755 if (ccdno == 0) { 1756 thread_en |= 1; 1757 } 1758 1759 ccd->atccd_phys_no = ccdno; 1760 if (uarch >= X86_UARCH_AMD_ZEN3) { 1761 pkg0_ids = B_TRUE; 1762 } else { 1763 ccd->atccd_flags |= AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN; 1764 pkg0_ids = B_FALSE; 1765 1766 /* 1767 * Determine the CCD logical ID for Zen 2 now since this doesn't 1768 * rely upon needing a valid physical core. 1769 */ 1770 ccd->atccd_log_no = amdzen_ccd_log_id_zen2(azn, df, ent); 1771 logccd_set = B_TRUE; 1772 } 1773 1774 /* 1775 * To construct the APIC ID we need to know the socket and die (not CCD) 1776 * this is on. We deconstruct the CCD's fabric ID to determine that. 1777 */ 1778 zen_fabric_id_decompose(&df->adf_decomp, ent->adfe_fabric_id, &sockid, 1779 &dieid, &compid); 1780 1781 /* 1782 * At this point we have all the information about the CCD, the number 1783 * of CCX instances, and which physical cores and threads are enabled. 1784 * Currently we assume that if we have one CCX enabled, then it is 1785 * always CCX0. We cannot find evidence of a two CCX supporting part 1786 * that doesn't always ship with both CCXs present and enabled. 1787 */ 1788 ccd->atccd_nlog_ccx = ccd->atccd_nphys_ccx = nccx; 1789 for (uint32_t ccxno = 0; ccxno < nccx; ccxno++) { 1790 amdzen_topo_ccx_t *ccx = &ccd->atccd_ccx[ccxno]; 1791 const uint32_t core_mask = (1 << azn->azn_ncore_per_ccx) - 1; 1792 const uint32_t core_shift = ccxno * azn->azn_ncore_per_ccx; 1793 const uint32_t ccx_core_en = (core_en >> core_shift) & 1794 core_mask; 1795 boolean_t logccx_set = B_FALSE; 1796 1797 ccd->atccd_ccx_en[ccxno] = 1; 1798 ccx->atccx_phys_no = ccxno; 1799 ccx->atccx_nphys_cores = azn->azn_ncore_per_ccx; 1800 ccx->atccx_nlog_cores = nlcore_per_ccx; 1801 1802 if (!pkg0_ids) { 1803 ccx->atccx_log_no = ccx->atccx_phys_no; 1804 logccx_set = B_TRUE; 1805 } 1806 1807 for (uint32_t coreno = 0, logcorezen2 = 0; 1808 coreno < azn->azn_ncore_per_ccx; coreno++) { 1809 amdzen_topo_core_t *core = &ccx->atccx_cores[coreno]; 1810 1811 if ((ccx_core_en & (1 << coreno)) == 0) { 1812 continue; 1813 } 1814 1815 ccx->atccx_core_en[coreno] = 1; 1816 core->atcore_phys_no = coreno; 1817 1818 /* 1819 * Now that we have the physical core number present, we 1820 * must determine the logical core number and fill out 1821 * the logical CCX/CCD if it has not been set. We must 1822 * do this before we attempt to look at which threads 1823 * are enabled, because that operates based upon logical 1824 * core number. 1825 * 1826 * For Zen 2 we do not have INITPKG0 at our disposal. We 1827 * currently assume (and tag for userland with the 1828 * AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN flag) that we are 1829 * mapping logical cores to physicals in the order of 1830 * appearance. 1831 */ 1832 if (pkg0_ids) { 1833 amdzen_ccd_fill_core_initpkg0(azn, df, ccd, ccx, 1834 core, &logccd_set, &logccx_set); 1835 } else { 1836 core->atcore_log_no = logcorezen2; 1837 logcorezen2++; 1838 } 1839 1840 /* 1841 * Determining which bits to use for the thread is a bit 1842 * weird here. Thread IDs within a CCX are logical, but 1843 * there are always physically spaced CCX sizes. See the 1844 * comment at the definition for SMU::PWR::THREAD_ENABLE 1845 * for more information. 1846 */ 1847 const uint32_t thread_shift = (ccx->atccx_nphys_cores * 1848 ccx->atccx_log_no + core->atcore_log_no) * 1849 nthreads_per_core; 1850 const uint32_t thread_mask = (nthreads_per_core << 1) - 1851 1; 1852 const uint32_t core_thread_en = (thread_en >> 1853 thread_shift) & thread_mask; 1854 core->atcore_nthreads = nthreads_per_core; 1855 core->atcore_thr_en[0] = core_thread_en & 0x01; 1856 core->atcore_thr_en[1] = core_thread_en & 0x02; 1857 #ifdef DEBUG 1858 if (nthreads_per_core == 1) { 1859 VERIFY0(core->atcore_thr_en[1]); 1860 } 1861 #endif 1862 for (uint32_t thrno = 0; thrno < core->atcore_nthreads; 1863 thrno++) { 1864 ASSERT3U(core->atcore_thr_en[thrno], !=, 0); 1865 1866 zen_apic_id_compose(&azn->azn_apic_decomp, 1867 sockid, dieid, ccd->atccd_log_no, 1868 ccx->atccx_log_no, core->atcore_log_no, 1869 thrno, &core->atcore_apicids[thrno]); 1870 1871 } 1872 } 1873 1874 ASSERT3U(logccx_set, ==, B_TRUE); 1875 ASSERT3U(logccd_set, ==, B_TRUE); 1876 } 1877 } 1878 1879 static void 1880 amdzen_nexus_init(void *arg) 1881 { 1882 uint_t i; 1883 amdzen_t *azn = arg; 1884 1885 /* 1886 * First go through all of the stubs and assign the DF entries. 1887 */ 1888 mutex_enter(&azn->azn_mutex); 1889 if (!amdzen_map_dfs(azn) || !amdzen_check_dfs(azn)) { 1890 azn->azn_flags |= AMDZEN_F_MAP_ERROR; 1891 goto done; 1892 } 1893 1894 for (i = 0; i < AMDZEN_MAX_DFS; i++) { 1895 amdzen_df_t *df = &azn->azn_dfs[i]; 1896 1897 if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) 1898 continue; 1899 amdzen_setup_df(azn, df); 1900 amdzen_find_nb(azn, df); 1901 } 1902 1903 if (amdzen_determine_apic_decomp(azn)) { 1904 azn->azn_flags |= AMDZEN_F_APIC_DECOMP_VALID; 1905 } 1906 1907 amdzen_determine_ncore_per_ccx(azn); 1908 1909 /* 1910 * Not all children may be installed. As such, we do not treat the 1911 * failure of a child as fatal to the driver. 1912 */ 1913 mutex_exit(&azn->azn_mutex); 1914 for (i = 0; i < ARRAY_SIZE(amdzen_children); i++) { 1915 (void) amdzen_create_child(azn, &amdzen_children[i]); 1916 } 1917 mutex_enter(&azn->azn_mutex); 1918 1919 done: 1920 azn->azn_flags &= ~AMDZEN_F_ATTACH_DISPATCHED; 1921 azn->azn_flags |= AMDZEN_F_ATTACH_COMPLETE; 1922 azn->azn_taskqid = TASKQID_INVALID; 1923 cv_broadcast(&azn->azn_cv); 1924 mutex_exit(&azn->azn_mutex); 1925 } 1926 1927 static int 1928 amdzen_stub_scan_cb(dev_info_t *dip, void *arg) 1929 { 1930 amdzen_t *azn = arg; 1931 uint16_t vid, did; 1932 int *regs; 1933 uint_t nregs, i; 1934 boolean_t match = B_FALSE; 1935 1936 if (dip == ddi_root_node()) { 1937 return (DDI_WALK_CONTINUE); 1938 } 1939 1940 /* 1941 * If a node in question is not a pci node, then we have no interest in 1942 * it as all the stubs that we care about are related to pci devices. 1943 */ 1944 if (strncmp("pci", ddi_get_name(dip), 3) != 0) { 1945 return (DDI_WALK_PRUNECHILD); 1946 } 1947 1948 /* 1949 * If we can't get a device or vendor ID and prove that this is an AMD 1950 * part, then we don't care about it. 1951 */ 1952 vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1953 "vendor-id", PCI_EINVAL16); 1954 did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1955 "device-id", PCI_EINVAL16); 1956 if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) { 1957 return (DDI_WALK_CONTINUE); 1958 } 1959 1960 if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) { 1961 return (DDI_WALK_CONTINUE); 1962 } 1963 1964 for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) { 1965 if (amdzen_nb_ids[i] == did) { 1966 match = B_TRUE; 1967 } 1968 } 1969 1970 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1971 "reg", ®s, &nregs) != DDI_PROP_SUCCESS) { 1972 return (DDI_WALK_CONTINUE); 1973 } 1974 1975 if (nregs == 0) { 1976 ddi_prop_free(regs); 1977 return (DDI_WALK_CONTINUE); 1978 } 1979 1980 if (PCI_REG_BUS_G(regs[0]) == AMDZEN_DF_BUSNO && 1981 PCI_REG_DEV_G(regs[0]) >= AMDZEN_DF_FIRST_DEVICE) { 1982 match = B_TRUE; 1983 } 1984 1985 ddi_prop_free(regs); 1986 if (match) { 1987 mutex_enter(&azn->azn_mutex); 1988 azn->azn_nscanned++; 1989 mutex_exit(&azn->azn_mutex); 1990 } 1991 1992 return (DDI_WALK_CONTINUE); 1993 } 1994 1995 static void 1996 amdzen_stub_scan(void *arg) 1997 { 1998 amdzen_t *azn = arg; 1999 2000 mutex_enter(&azn->azn_mutex); 2001 azn->azn_nscanned = 0; 2002 mutex_exit(&azn->azn_mutex); 2003 2004 ddi_walk_devs(ddi_root_node(), amdzen_stub_scan_cb, azn); 2005 2006 mutex_enter(&azn->azn_mutex); 2007 azn->azn_flags &= ~AMDZEN_F_SCAN_DISPATCHED; 2008 azn->azn_flags |= AMDZEN_F_SCAN_COMPLETE; 2009 2010 if (azn->azn_nscanned == 0) { 2011 azn->azn_flags |= AMDZEN_F_UNSUPPORTED; 2012 azn->azn_taskqid = TASKQID_INVALID; 2013 cv_broadcast(&azn->azn_cv); 2014 } else if (azn->azn_npresent == azn->azn_nscanned) { 2015 azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED; 2016 azn->azn_taskqid = taskq_dispatch(system_taskq, 2017 amdzen_nexus_init, azn, TQ_SLEEP); 2018 } 2019 mutex_exit(&azn->azn_mutex); 2020 } 2021 2022 /* 2023 * Unfortunately we can't really let the stubs detach as we may need them to be 2024 * available for client operations. We may be able to improve this if we know 2025 * that the actual nexus is going away. However, as long as it's active, we need 2026 * all the stubs. 2027 */ 2028 int 2029 amdzen_detach_stub(dev_info_t *dip, ddi_detach_cmd_t cmd) 2030 { 2031 if (cmd == DDI_SUSPEND) { 2032 return (DDI_SUCCESS); 2033 } 2034 2035 return (DDI_FAILURE); 2036 } 2037 2038 int 2039 amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd) 2040 { 2041 int *regs, reg; 2042 uint_t nregs, i; 2043 uint16_t vid, did; 2044 amdzen_stub_t *stub; 2045 amdzen_t *azn = amdzen_data; 2046 boolean_t valid = B_FALSE; 2047 boolean_t nb = B_FALSE; 2048 2049 if (cmd == DDI_RESUME) { 2050 return (DDI_SUCCESS); 2051 } else if (cmd != DDI_ATTACH) { 2052 return (DDI_FAILURE); 2053 } 2054 2055 /* 2056 * Make sure that the stub that we've been asked to attach is a pci type 2057 * device. If not, then there is no reason for us to proceed. 2058 */ 2059 if (strncmp("pci", ddi_get_name(dip), 3) != 0) { 2060 dev_err(dip, CE_WARN, "asked to attach a bad AMD Zen nexus " 2061 "stub: %s", ddi_get_name(dip)); 2062 return (DDI_FAILURE); 2063 } 2064 vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2065 "vendor-id", PCI_EINVAL16); 2066 did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2067 "device-id", PCI_EINVAL16); 2068 if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) { 2069 dev_err(dip, CE_WARN, "failed to get PCI ID properties"); 2070 return (DDI_FAILURE); 2071 } 2072 2073 if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) { 2074 dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x", 2075 cpuid_getvendor(CPU) == X86_VENDOR_HYGON ? 2076 AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid); 2077 return (DDI_FAILURE); 2078 } 2079 2080 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2081 "reg", ®s, &nregs) != DDI_PROP_SUCCESS) { 2082 dev_err(dip, CE_WARN, "failed to get 'reg' property"); 2083 return (DDI_FAILURE); 2084 } 2085 2086 if (nregs == 0) { 2087 ddi_prop_free(regs); 2088 dev_err(dip, CE_WARN, "missing 'reg' property values"); 2089 return (DDI_FAILURE); 2090 } 2091 reg = *regs; 2092 ddi_prop_free(regs); 2093 2094 for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) { 2095 if (amdzen_nb_ids[i] == did) { 2096 valid = B_TRUE; 2097 nb = B_TRUE; 2098 } 2099 } 2100 2101 if (!valid && PCI_REG_BUS_G(reg) == AMDZEN_DF_BUSNO && 2102 PCI_REG_DEV_G(reg) >= AMDZEN_DF_FIRST_DEVICE) { 2103 valid = B_TRUE; 2104 nb = B_FALSE; 2105 } 2106 2107 if (!valid) { 2108 dev_err(dip, CE_WARN, "device %s didn't match the nexus list", 2109 ddi_get_name(dip)); 2110 return (DDI_FAILURE); 2111 } 2112 2113 stub = kmem_alloc(sizeof (amdzen_stub_t), KM_SLEEP); 2114 if (pci_config_setup(dip, &stub->azns_cfgspace) != DDI_SUCCESS) { 2115 dev_err(dip, CE_WARN, "failed to set up config space"); 2116 kmem_free(stub, sizeof (amdzen_stub_t)); 2117 return (DDI_FAILURE); 2118 } 2119 2120 stub->azns_dip = dip; 2121 stub->azns_vid = vid; 2122 stub->azns_did = did; 2123 stub->azns_bus = PCI_REG_BUS_G(reg); 2124 stub->azns_dev = PCI_REG_DEV_G(reg); 2125 stub->azns_func = PCI_REG_FUNC_G(reg); 2126 ddi_set_driver_private(dip, stub); 2127 2128 mutex_enter(&azn->azn_mutex); 2129 azn->azn_npresent++; 2130 if (nb) { 2131 list_insert_tail(&azn->azn_nb_stubs, stub); 2132 } else { 2133 list_insert_tail(&azn->azn_df_stubs, stub); 2134 } 2135 2136 if ((azn->azn_flags & AMDZEN_F_TASKQ_MASK) == AMDZEN_F_SCAN_COMPLETE && 2137 azn->azn_nscanned == azn->azn_npresent) { 2138 azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED; 2139 azn->azn_taskqid = taskq_dispatch(system_taskq, 2140 amdzen_nexus_init, azn, TQ_SLEEP); 2141 } 2142 mutex_exit(&azn->azn_mutex); 2143 2144 return (DDI_SUCCESS); 2145 } 2146 2147 static int 2148 amdzen_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 2149 void *arg, void *result) 2150 { 2151 char buf[32]; 2152 dev_info_t *child; 2153 const amdzen_child_data_t *acd; 2154 2155 switch (ctlop) { 2156 case DDI_CTLOPS_REPORTDEV: 2157 if (rdip == NULL) { 2158 return (DDI_FAILURE); 2159 } 2160 cmn_err(CE_CONT, "amdzen nexus: %s@%s, %s%d\n", 2161 ddi_node_name(rdip), ddi_get_name_addr(rdip), 2162 ddi_driver_name(rdip), ddi_get_instance(rdip)); 2163 break; 2164 case DDI_CTLOPS_INITCHILD: 2165 child = arg; 2166 if (child == NULL) { 2167 dev_err(dip, CE_WARN, "!no child passed for " 2168 "DDI_CTLOPS_INITCHILD"); 2169 } 2170 2171 acd = ddi_get_parent_data(child); 2172 if (acd == NULL) { 2173 dev_err(dip, CE_WARN, "!missing child parent data"); 2174 return (DDI_FAILURE); 2175 } 2176 2177 if (snprintf(buf, sizeof (buf), "%d", acd->acd_addr) >= 2178 sizeof (buf)) { 2179 dev_err(dip, CE_WARN, "!failed to construct device " 2180 "addr due to overflow"); 2181 return (DDI_FAILURE); 2182 } 2183 2184 ddi_set_name_addr(child, buf); 2185 break; 2186 case DDI_CTLOPS_UNINITCHILD: 2187 child = arg; 2188 if (child == NULL) { 2189 dev_err(dip, CE_WARN, "!no child passed for " 2190 "DDI_CTLOPS_UNINITCHILD"); 2191 } 2192 2193 ddi_set_name_addr(child, NULL); 2194 break; 2195 default: 2196 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 2197 } 2198 return (DDI_SUCCESS); 2199 } 2200 2201 static int 2202 amdzen_topo_open(dev_t *devp, int flag, int otyp, cred_t *credp) 2203 { 2204 minor_t m; 2205 amdzen_t *azn = amdzen_data; 2206 2207 if (crgetzoneid(credp) != GLOBAL_ZONEID || 2208 secpolicy_sys_config(credp, B_FALSE) != 0) { 2209 return (EPERM); 2210 } 2211 2212 if ((flag & (FEXCL | FNDELAY | FNONBLOCK)) != 0) { 2213 return (EINVAL); 2214 } 2215 2216 if (otyp != OTYP_CHR) { 2217 return (EINVAL); 2218 } 2219 2220 m = getminor(*devp); 2221 if (m != AMDZEN_MINOR_TOPO) { 2222 return (ENXIO); 2223 } 2224 2225 mutex_enter(&azn->azn_mutex); 2226 if ((azn->azn_flags & AMDZEN_F_IOCTL_MASK) != 2227 AMDZEN_F_ATTACH_COMPLETE) { 2228 mutex_exit(&azn->azn_mutex); 2229 return (ENOTSUP); 2230 } 2231 mutex_exit(&azn->azn_mutex); 2232 2233 return (0); 2234 } 2235 2236 static int 2237 amdzen_topo_ioctl_base(amdzen_t *azn, intptr_t arg, int mode) 2238 { 2239 amdzen_topo_base_t base; 2240 2241 bzero(&base, sizeof (base)); 2242 mutex_enter(&azn->azn_mutex); 2243 base.atb_ndf = azn->azn_ndfs; 2244 2245 if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) { 2246 mutex_exit(&azn->azn_mutex); 2247 return (ENOTSUP); 2248 } 2249 2250 base.atb_apic_decomp = azn->azn_apic_decomp; 2251 for (uint_t i = 0; i < azn->azn_ndfs; i++) { 2252 const amdzen_df_t *df = &azn->azn_dfs[i]; 2253 2254 base.atb_maxdfent = MAX(base.atb_maxdfent, df->adf_nents); 2255 if (i == 0) { 2256 base.atb_rev = df->adf_rev; 2257 base.atb_df_decomp = df->adf_decomp; 2258 } 2259 } 2260 mutex_exit(&azn->azn_mutex); 2261 2262 if (ddi_copyout(&base, (void *)(uintptr_t)arg, sizeof (base), 2263 mode & FKIOCTL) != 0) { 2264 return (EFAULT); 2265 } 2266 2267 return (0); 2268 } 2269 2270 /* 2271 * Fill in the peers. The way we do is this is to just fill in all the entries 2272 * and then zero out the ones that aren't valid. 2273 */ 2274 static void 2275 amdzen_topo_ioctl_df_fill_peers(const amdzen_df_ent_t *ent, 2276 amdzen_topo_df_ent_t *topo_ent) 2277 { 2278 topo_ent->atde_npeers = DF_FBIINFO0_GET_FTI_PCNT(ent->adfe_info0); 2279 topo_ent->atde_peers[0] = DF_FBINFO1_GET_FTI0_NINSTID(ent->adfe_info1); 2280 topo_ent->atde_peers[1] = DF_FBINFO1_GET_FTI1_NINSTID(ent->adfe_info1); 2281 topo_ent->atde_peers[2] = DF_FBINFO1_GET_FTI2_NINSTID(ent->adfe_info1); 2282 topo_ent->atde_peers[3] = DF_FBINFO1_GET_FTI3_NINSTID(ent->adfe_info1); 2283 topo_ent->atde_peers[4] = DF_FBINFO2_GET_FTI4_NINSTID(ent->adfe_info2); 2284 topo_ent->atde_peers[5] = DF_FBINFO2_GET_FTI5_NINSTID(ent->adfe_info2); 2285 2286 for (uint32_t i = topo_ent->atde_npeers; i < AMDZEN_TOPO_DF_MAX_PEERS; 2287 i++) { 2288 topo_ent->atde_peers[i] = 0; 2289 } 2290 } 2291 2292 static void 2293 amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t *ent, 2294 amdzen_topo_df_ent_t *topo_ent) 2295 { 2296 const amdzen_ccm_data_t *ccm = &ent->adfe_data.aded_ccm; 2297 amdzen_topo_ccm_data_t *topo_ccm = &topo_ent->atde_data.atded_ccm; 2298 2299 topo_ccm->atcd_nccds = ccm->acd_nccds; 2300 for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) { 2301 topo_ccm->atcd_ccd_en[i] = ccm->acd_ccd_en[i]; 2302 topo_ccm->atcd_ccd_ids[i] = ccm->acd_ccd_id[i]; 2303 } 2304 } 2305 2306 static int 2307 amdzen_topo_ioctl_df(amdzen_t *azn, intptr_t arg, int mode) 2308 { 2309 uint_t model; 2310 uint32_t max_ents, nwritten; 2311 const amdzen_df_t *df; 2312 amdzen_topo_df_t topo_df; 2313 #ifdef _MULTI_DATAMODEL 2314 amdzen_topo_df32_t topo_df32; 2315 #endif 2316 2317 model = ddi_model_convert_from(mode); 2318 switch (model) { 2319 #ifdef _MULTI_DATAMODEL 2320 case DDI_MODEL_ILP32: 2321 if (ddi_copyin((void *)(uintptr_t)arg, &topo_df32, 2322 sizeof (topo_df32), mode & FKIOCTL) != 0) { 2323 return (EFAULT); 2324 } 2325 bzero(&topo_df, sizeof (topo_df)); 2326 topo_df.atd_dfno = topo_df32.atd_dfno; 2327 topo_df.atd_df_buf_nents = topo_df32.atd_df_buf_nents; 2328 topo_df.atd_df_ents = (void *)(uintptr_t)topo_df32.atd_df_ents; 2329 break; 2330 #endif 2331 case DDI_MODEL_NONE: 2332 if (ddi_copyin((void *)(uintptr_t)arg, &topo_df, 2333 sizeof (topo_df), mode & FKIOCTL) != 0) { 2334 return (EFAULT); 2335 } 2336 break; 2337 default: 2338 return (ENOTSUP); 2339 } 2340 2341 mutex_enter(&azn->azn_mutex); 2342 if (topo_df.atd_dfno >= azn->azn_ndfs) { 2343 mutex_exit(&azn->azn_mutex); 2344 return (EINVAL); 2345 } 2346 2347 df = &azn->azn_dfs[topo_df.atd_dfno]; 2348 topo_df.atd_nodeid = df->adf_nodeid; 2349 topo_df.atd_sockid = (df->adf_nodeid & df->adf_decomp.dfd_sock_mask) >> 2350 df->adf_decomp.dfd_sock_shift; 2351 topo_df.atd_dieid = (df->adf_nodeid & df->adf_decomp.dfd_die_mask) >> 2352 df->adf_decomp.dfd_die_shift; 2353 topo_df.atd_rev = df->adf_rev; 2354 topo_df.atd_df_act_nents = df->adf_nents; 2355 max_ents = MIN(topo_df.atd_df_buf_nents, df->adf_nents); 2356 2357 if (topo_df.atd_df_ents == NULL) { 2358 topo_df.atd_df_buf_nvalid = 0; 2359 mutex_exit(&azn->azn_mutex); 2360 goto copyout; 2361 } 2362 2363 nwritten = 0; 2364 for (uint32_t i = 0; i < max_ents; i++) { 2365 amdzen_topo_df_ent_t topo_ent; 2366 const amdzen_df_ent_t *ent = &df->adf_ents[i]; 2367 2368 /* 2369 * We opt not to include disabled elements right now. They 2370 * generally don't have a valid type and there isn't much useful 2371 * information we can get from them. This can be changed if we 2372 * find a use case for them for userland topo. 2373 */ 2374 if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) 2375 continue; 2376 2377 bzero(&topo_ent, sizeof (topo_ent)); 2378 topo_ent.atde_type = ent->adfe_type; 2379 topo_ent.atde_subtype = ent->adfe_subtype; 2380 topo_ent.atde_fabric_id = ent->adfe_fabric_id; 2381 topo_ent.atde_inst_id = ent->adfe_inst_id; 2382 amdzen_topo_ioctl_df_fill_peers(ent, &topo_ent); 2383 2384 if (ent->adfe_type == DF_TYPE_CCM && 2385 ent->adfe_subtype == DF_CCM_SUBTYPE_CPU) { 2386 amdzen_topo_ioctl_df_fill_ccm(ent, &topo_ent); 2387 } 2388 2389 if (ddi_copyout(&topo_ent, &topo_df.atd_df_ents[nwritten], 2390 sizeof (topo_ent), mode & FKIOCTL) != 0) { 2391 mutex_exit(&azn->azn_mutex); 2392 return (EFAULT); 2393 } 2394 nwritten++; 2395 } 2396 mutex_exit(&azn->azn_mutex); 2397 2398 topo_df.atd_df_buf_nvalid = nwritten; 2399 copyout: 2400 switch (model) { 2401 #ifdef _MULTI_DATAMODEL 2402 case DDI_MODEL_ILP32: 2403 topo_df32.atd_nodeid = topo_df.atd_nodeid; 2404 topo_df32.atd_sockid = topo_df.atd_sockid; 2405 topo_df32.atd_dieid = topo_df.atd_dieid; 2406 topo_df32.atd_rev = topo_df.atd_rev; 2407 topo_df32.atd_df_buf_nvalid = topo_df.atd_df_buf_nvalid; 2408 topo_df32.atd_df_act_nents = topo_df.atd_df_act_nents; 2409 2410 if (ddi_copyout(&topo_df32, (void *)(uintptr_t)arg, 2411 sizeof (topo_df32), mode & FKIOCTL) != 0) { 2412 return (EFAULT); 2413 } 2414 break; 2415 #endif 2416 case DDI_MODEL_NONE: 2417 if (ddi_copyout(&topo_df, (void *)(uintptr_t)arg, 2418 sizeof (topo_df), mode & FKIOCTL) != 0) { 2419 return (EFAULT); 2420 } 2421 break; 2422 default: 2423 break; 2424 } 2425 2426 2427 return (0); 2428 } 2429 2430 static int 2431 amdzen_topo_ioctl_ccd(amdzen_t *azn, intptr_t arg, int mode) 2432 { 2433 amdzen_topo_ccd_t ccd, *ccdp; 2434 amdzen_df_t *df; 2435 amdzen_df_ent_t *ent; 2436 amdzen_ccm_data_t *ccm; 2437 uint32_t ccdno; 2438 size_t copyin_size = offsetof(amdzen_topo_ccd_t, atccd_err); 2439 2440 /* 2441 * Only copy in the identifying information so that way we can ensure 2442 * the rest of the structure we return to the user doesn't contain 2443 * anything unexpected in it. 2444 */ 2445 bzero(&ccd, sizeof (ccd)); 2446 if (ddi_copyin((void *)(uintptr_t)arg, &ccd, copyin_size, 2447 mode & FKIOCTL) != 0) { 2448 return (EFAULT); 2449 } 2450 2451 mutex_enter(&azn->azn_mutex); 2452 if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) { 2453 ccd.atccd_err = AMDZEN_TOPO_CCD_E_NO_APIC_DECOMP; 2454 goto copyout; 2455 } 2456 2457 df = amdzen_df_find(azn, ccd.atccd_dfno); 2458 if (df == NULL) { 2459 ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_DFNO; 2460 goto copyout; 2461 } 2462 2463 /* 2464 * We don't have enough information to know how to construct this 2465 * information in Zen 1 at this time, so refuse. 2466 */ 2467 if (df->adf_rev <= DF_REV_2) { 2468 ccd.atccd_err = AMDZEN_TOPO_CCD_E_SOC_UNSUPPORTED; 2469 goto copyout; 2470 } 2471 2472 ent = amdzen_df_ent_find_by_instid(df, ccd.atccd_instid); 2473 if (ent == NULL) { 2474 ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_INSTID; 2475 goto copyout; 2476 } 2477 2478 if (ent->adfe_type != DF_TYPE_CCM || 2479 ent->adfe_subtype != DF_CCM_SUBTYPE_CPU) { 2480 ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD; 2481 goto copyout; 2482 } 2483 2484 ccm = &ent->adfe_data.aded_ccm; 2485 for (ccdno = 0; ccdno < DF_MAX_CCDS_PER_CCM; ccdno++) { 2486 if (ccm->acd_ccd_en[ccdno] != 0 && 2487 ccm->acd_ccd_id[ccdno] == ccd.atccd_phys_no) { 2488 break; 2489 } 2490 } 2491 2492 if (ccdno == DF_MAX_CCDS_PER_CCM) { 2493 ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD; 2494 goto copyout; 2495 } 2496 2497 if (ccm->acd_ccd_data[ccdno] == NULL) { 2498 /* 2499 * We don't actually have this data. Go fill it out and save it 2500 * for future use. 2501 */ 2502 ccdp = kmem_zalloc(sizeof (amdzen_topo_ccd_t), KM_NOSLEEP_LAZY); 2503 if (ccdp == NULL) { 2504 mutex_exit(&azn->azn_mutex); 2505 return (ENOMEM); 2506 } 2507 2508 ccdp->atccd_dfno = ccd.atccd_dfno; 2509 ccdp->atccd_instid = ccd.atccd_instid; 2510 ccdp->atccd_phys_no = ccd.atccd_phys_no; 2511 amdzen_ccd_fill_topo(azn, df, ent, ccdp); 2512 ccm->acd_ccd_data[ccdno] = ccdp; 2513 } 2514 ASSERT3P(ccm->acd_ccd_data[ccdno], !=, NULL); 2515 bcopy(ccm->acd_ccd_data[ccdno], &ccd, sizeof (ccd)); 2516 2517 copyout: 2518 mutex_exit(&azn->azn_mutex); 2519 if (ddi_copyout(&ccd, (void *)(uintptr_t)arg, sizeof (ccd), 2520 mode & FKIOCTL) != 0) { 2521 return (EFAULT); 2522 } 2523 2524 return (0); 2525 } 2526 2527 static int 2528 amdzen_topo_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 2529 cred_t *credp, int *rvalp) 2530 { 2531 int ret; 2532 amdzen_t *azn = amdzen_data; 2533 2534 if (getminor(dev) != AMDZEN_MINOR_TOPO) { 2535 return (ENXIO); 2536 } 2537 2538 if ((mode & FREAD) == 0) { 2539 return (EBADF); 2540 } 2541 2542 switch (cmd) { 2543 case AMDZEN_TOPO_IOCTL_BASE: 2544 ret = amdzen_topo_ioctl_base(azn, arg, mode); 2545 break; 2546 case AMDZEN_TOPO_IOCTL_DF: 2547 ret = amdzen_topo_ioctl_df(azn, arg, mode); 2548 break; 2549 case AMDZEN_TOPO_IOCTL_CCD: 2550 ret = amdzen_topo_ioctl_ccd(azn, arg, mode); 2551 break; 2552 default: 2553 ret = ENOTTY; 2554 break; 2555 } 2556 2557 return (ret); 2558 } 2559 2560 static int 2561 amdzen_topo_close(dev_t dev, int flag, int otyp, cred_t *credp) 2562 { 2563 if (otyp != OTYP_CHR) { 2564 return (EINVAL); 2565 } 2566 2567 if (getminor(dev) != AMDZEN_MINOR_TOPO) { 2568 return (ENXIO); 2569 } 2570 2571 return (0); 2572 } 2573 2574 static int 2575 amdzen_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 2576 { 2577 amdzen_t *azn = amdzen_data; 2578 2579 if (cmd == DDI_RESUME) { 2580 return (DDI_SUCCESS); 2581 } else if (cmd != DDI_ATTACH) { 2582 return (DDI_FAILURE); 2583 } 2584 2585 mutex_enter(&azn->azn_mutex); 2586 if (azn->azn_dip != NULL) { 2587 dev_err(dip, CE_WARN, "driver is already attached!"); 2588 mutex_exit(&azn->azn_mutex); 2589 return (DDI_FAILURE); 2590 } 2591 2592 if (ddi_create_minor_node(dip, "topo", S_IFCHR, AMDZEN_MINOR_TOPO, 2593 DDI_PSEUDO, 0) != 0) { 2594 dev_err(dip, CE_WARN, "failed to create topo minor node!"); 2595 mutex_exit(&azn->azn_mutex); 2596 return (DDI_FAILURE); 2597 } 2598 2599 azn->azn_dip = dip; 2600 azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_stub_scan, 2601 azn, TQ_SLEEP); 2602 azn->azn_flags |= AMDZEN_F_SCAN_DISPATCHED; 2603 mutex_exit(&azn->azn_mutex); 2604 2605 return (DDI_SUCCESS); 2606 } 2607 2608 static int 2609 amdzen_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 2610 { 2611 amdzen_t *azn = amdzen_data; 2612 2613 if (cmd == DDI_SUSPEND) { 2614 return (DDI_SUCCESS); 2615 } else if (cmd != DDI_DETACH) { 2616 return (DDI_FAILURE); 2617 } 2618 2619 mutex_enter(&azn->azn_mutex); 2620 while (azn->azn_taskqid != TASKQID_INVALID) { 2621 cv_wait(&azn->azn_cv, &azn->azn_mutex); 2622 } 2623 2624 /* 2625 * If we've attached any stub drivers, e.g. this platform is important 2626 * for us, then we fail detach. 2627 */ 2628 if (!list_is_empty(&azn->azn_df_stubs) || 2629 !list_is_empty(&azn->azn_nb_stubs)) { 2630 mutex_exit(&azn->azn_mutex); 2631 return (DDI_FAILURE); 2632 } 2633 2634 ddi_remove_minor_node(azn->azn_dip, NULL); 2635 azn->azn_dip = NULL; 2636 mutex_exit(&azn->azn_mutex); 2637 2638 return (DDI_SUCCESS); 2639 } 2640 2641 static void 2642 amdzen_free(void) 2643 { 2644 if (amdzen_data == NULL) { 2645 return; 2646 } 2647 2648 VERIFY(list_is_empty(&amdzen_data->azn_df_stubs)); 2649 list_destroy(&amdzen_data->azn_df_stubs); 2650 VERIFY(list_is_empty(&amdzen_data->azn_nb_stubs)); 2651 list_destroy(&amdzen_data->azn_nb_stubs); 2652 cv_destroy(&amdzen_data->azn_cv); 2653 mutex_destroy(&amdzen_data->azn_mutex); 2654 kmem_free(amdzen_data, sizeof (amdzen_t)); 2655 amdzen_data = NULL; 2656 } 2657 2658 static void 2659 amdzen_alloc(void) 2660 { 2661 amdzen_data = kmem_zalloc(sizeof (amdzen_t), KM_SLEEP); 2662 mutex_init(&amdzen_data->azn_mutex, NULL, MUTEX_DRIVER, NULL); 2663 list_create(&amdzen_data->azn_df_stubs, sizeof (amdzen_stub_t), 2664 offsetof(amdzen_stub_t, azns_link)); 2665 list_create(&amdzen_data->azn_nb_stubs, sizeof (amdzen_stub_t), 2666 offsetof(amdzen_stub_t, azns_link)); 2667 cv_init(&amdzen_data->azn_cv, NULL, CV_DRIVER, NULL); 2668 } 2669 2670 static struct cb_ops amdzen_topo_cb_ops = { 2671 .cb_open = amdzen_topo_open, 2672 .cb_close = amdzen_topo_close, 2673 .cb_strategy = nodev, 2674 .cb_print = nodev, 2675 .cb_dump = nodev, 2676 .cb_read = nodev, 2677 .cb_write = nodev, 2678 .cb_ioctl = amdzen_topo_ioctl, 2679 .cb_devmap = nodev, 2680 .cb_mmap = nodev, 2681 .cb_segmap = nodev, 2682 .cb_chpoll = nochpoll, 2683 .cb_prop_op = ddi_prop_op, 2684 .cb_flag = D_MP, 2685 .cb_rev = CB_REV, 2686 .cb_aread = nodev, 2687 .cb_awrite = nodev 2688 }; 2689 2690 struct bus_ops amdzen_bus_ops = { 2691 .busops_rev = BUSO_REV, 2692 .bus_map = nullbusmap, 2693 .bus_dma_map = ddi_no_dma_map, 2694 .bus_dma_allochdl = ddi_no_dma_allochdl, 2695 .bus_dma_freehdl = ddi_no_dma_freehdl, 2696 .bus_dma_bindhdl = ddi_no_dma_bindhdl, 2697 .bus_dma_unbindhdl = ddi_no_dma_unbindhdl, 2698 .bus_dma_flush = ddi_no_dma_flush, 2699 .bus_dma_win = ddi_no_dma_win, 2700 .bus_dma_ctl = ddi_no_dma_mctl, 2701 .bus_prop_op = ddi_bus_prop_op, 2702 .bus_ctl = amdzen_bus_ctl 2703 }; 2704 2705 static struct dev_ops amdzen_dev_ops = { 2706 .devo_rev = DEVO_REV, 2707 .devo_refcnt = 0, 2708 .devo_getinfo = nodev, 2709 .devo_identify = nulldev, 2710 .devo_probe = nulldev, 2711 .devo_attach = amdzen_attach, 2712 .devo_detach = amdzen_detach, 2713 .devo_reset = nodev, 2714 .devo_quiesce = ddi_quiesce_not_needed, 2715 .devo_bus_ops = &amdzen_bus_ops, 2716 .devo_cb_ops = &amdzen_topo_cb_ops 2717 }; 2718 2719 static struct modldrv amdzen_modldrv = { 2720 .drv_modops = &mod_driverops, 2721 .drv_linkinfo = "AMD Zen Nexus Driver", 2722 .drv_dev_ops = &amdzen_dev_ops 2723 }; 2724 2725 static struct modlinkage amdzen_modlinkage = { 2726 .ml_rev = MODREV_1, 2727 .ml_linkage = { &amdzen_modldrv, NULL } 2728 }; 2729 2730 int 2731 _init(void) 2732 { 2733 int ret; 2734 2735 if (cpuid_getvendor(CPU) != X86_VENDOR_AMD && 2736 cpuid_getvendor(CPU) != X86_VENDOR_HYGON) { 2737 return (ENOTSUP); 2738 } 2739 2740 if ((ret = mod_install(&amdzen_modlinkage)) == 0) { 2741 amdzen_alloc(); 2742 } 2743 2744 return (ret); 2745 } 2746 2747 int 2748 _info(struct modinfo *modinfop) 2749 { 2750 return (mod_info(&amdzen_modlinkage, modinfop)); 2751 } 2752 2753 int 2754 _fini(void) 2755 { 2756 int ret; 2757 2758 if ((ret = mod_remove(&amdzen_modlinkage)) == 0) { 2759 amdzen_free(); 2760 } 2761 2762 return (ret); 2763 } 2764