xref: /illumos-gate/usr/src/common/mc/imc/imc_decode.c (revision eb00b1c8)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2019 Joyent, Inc.
14  */
15 
16 /*
17  * Memory decoding logic.
18  *
19  * This file is part of the 'imc' driver on x86. It supports taking a physical
20  * address and determining what the corresponding DIMM is. This is shared
21  * between the kernel and userland for easier testing.
22  *
23  * For more information about the different parts of the decoding process,
24  * please see the file 'uts/i86pc/io/imc/imc.c'.
25  */
26 
27 #include <sys/sysmacros.h>
28 
29 #ifndef _KERNEL
30 #include <stdint.h>
31 #include <strings.h>
32 #define	BITX(u, h, l)	(((u) >> (l)) & ((1LU << ((h) - (l) + 1LU)) - 1LU))
33 #endif	/* !_KERNEL */
34 
35 #include "imc.h"
36 
37 /*
38  * Address ranges for decoding system addresses. There are three ranges that
39  * exist on x86, traditional DOS memory (hi 640 KiB), low memory, and high
40  * memory. Low memory always starts at 1 MiB and high memory always starts at 4
41  * GiB. The upper bounds of these ranges is based on registers on the system.
42  */
43 #define	IMC_DECODE_CONV_BASE	0UL
44 #define	IMC_DECODE_CONV_MAX	0x00009ffffULL	/* 640 KiB - 1 */
45 #define	IMC_DECODE_LOW_BASE	0x000100000ULL	/* 1 M */
46 #define	IMC_DECODE_HIGH_BASE	0x100000000ULL /* 4 GiB */
47 
48 typedef struct imc_legacy_range {
49 	uint64_t	ilr_base;
50 	size_t		ilr_len;
51 	const char	*ilr_desc;
52 } imc_legacy_range_t;
53 
54 /*
55  * These represent regions of memory that are reserved for use and will not be
56  * decoded by DRAM.
57  */
58 static imc_legacy_range_t imc_legacy_ranges[] = {
59 	{ 0x00000A0000ULL,	128 * 1024,	"VGA" },
60 	{ 0x00000C0000ULL,	256 * 1024,	"PAM" },
61 	{ 0x0000F00000ULL,	1024 * 1024,	"Reserved" },
62 	{ 0x00FE000000ULL,	32 * 1024 * 1024, "Unknown" },
63 	{ 0x00FF000000ULL,	16 * 1024 * 1024, "Firmware" },
64 	{ 0x00FED20000ULL,	384 * 1024,	"TXT" },
65 	{ 0x00FED00000ULL,	1024 * 1024,	"PCH" },
66 	{ 0x00FEC00000ULL,	1024 * 1024,	"IOAPIC" },
67 	{ 0x00FEB80000ULL,	512 * 1024,	"Reserved" },
68 	{ 0x00FEB00000ULL,	64 * 1024,	"Reserved" }
69 };
70 
71 /*
72  * Determine whether or not this address is in one of the reserved regions or if
73  * it falls outside of the explicit DRAM ranges.
74  */
75 static boolean_t
imc_decode_addr_resvd(const imc_t * imc,imc_decode_state_t * dec)76 imc_decode_addr_resvd(const imc_t *imc, imc_decode_state_t *dec)
77 {
78 	uint_t i;
79 	const imc_sad_t *sad;
80 
81 	for (i = 0; i < ARRAY_SIZE(imc_legacy_ranges); i++) {
82 		uint64_t end = imc_legacy_ranges[i].ilr_base +
83 		    imc_legacy_ranges[i].ilr_len;
84 
85 		if (dec->ids_pa >= imc_legacy_ranges[i].ilr_base &&
86 		    dec->ids_pa < end) {
87 			dec->ids_fail = IMC_DECODE_F_LEGACY_RANGE;
88 			dec->ids_fail_data = i;
89 			return (B_TRUE);
90 		}
91 	}
92 
93 	/*
94 	 * For checking and determining whether or not we fit in DRAM, we need
95 	 * to check against the top of low memory and the top of high memory.
96 	 * While we technically have this information on a per-socket basis, we
97 	 * have to rely on the fact that both processors have the same
98 	 * information. A requirement which if not true, would lead to chaos
99 	 * depending on what socket we're running on.
100 	 */
101 	sad = &imc->imc_sockets[0].isock_sad;
102 	if (sad->isad_valid != IMC_SAD_V_VALID) {
103 		dec->ids_fail = IMC_DECODE_F_BAD_SAD;
104 		return (B_TRUE);
105 	}
106 
107 	/*
108 	 * An address may fall into three ranges. It may fall into conventional
109 	 * memory. It may fall into low memory. It may fall into high memory.
110 	 * The conventional memory range is inclusive at the top. The others
111 	 * have been translated such that they are uniformly exclusive at the
112 	 * top. Because the bottom of conventional memory is at zero, the
113 	 * compiler will be angry if we compare against IMC_DECODE_CONV_BASE as
114 	 * it is always true.
115 	 */
116 	if (dec->ids_pa <= IMC_DECODE_CONV_MAX) {
117 		return (B_FALSE);
118 	}
119 
120 	if (dec->ids_pa >= IMC_DECODE_LOW_BASE &&
121 	    dec->ids_pa < sad->isad_tolm) {
122 		return (B_FALSE);
123 	}
124 
125 	if (dec->ids_pa >= IMC_DECODE_HIGH_BASE &&
126 	    dec->ids_pa < sad->isad_tohm) {
127 		return (B_FALSE);
128 	}
129 
130 	/*
131 	 * Memory fell outside of the valid range. It's not for us.
132 	 */
133 	dec->ids_fail = IMC_DECODE_F_OUTSIDE_DRAM;
134 	return (B_TRUE);
135 }
136 
137 static uint_t
imc_decode_sad_interleave(const imc_sad_rule_t * rule,uint64_t pa)138 imc_decode_sad_interleave(const imc_sad_rule_t *rule, uint64_t pa)
139 {
140 	uint_t itgt = 0;
141 
142 	switch (rule->isr_imode) {
143 	case IMC_SAD_IMODE_8t6:
144 		if (rule->isr_a7mode) {
145 			itgt = BITX(pa, 9, 9);
146 			itgt |= (BITX(pa, 8, 7) << 1);
147 		} else {
148 			itgt = BITX(pa, 8, 6);
149 		}
150 		break;
151 	case IMC_SAD_IMODE_8t6XOR:
152 		if (rule->isr_a7mode) {
153 			itgt = BITX(pa, 9, 9);
154 			itgt |= (BITX(pa, 8, 7) << 1);
155 		} else {
156 			itgt = BITX(pa, 8, 6);
157 		}
158 		itgt ^= BITX(pa, 18, 16);
159 		break;
160 	case IMC_SAD_IMODE_10t8:
161 		itgt = BITX(pa, 10, 8);
162 		break;
163 	case IMC_SAD_IMODE_14t12:
164 		itgt = BITX(pa, 14, 12);
165 		break;
166 	case IMC_SAD_IMODE_32t30:
167 		itgt = BITX(pa, 32, 30);
168 		break;
169 	}
170 
171 	return (itgt);
172 }
173 
174 /*
175  * Use the system address decoder to try and find a valid SAD entry for this
176  * address. We always use socket zero's SAD as the SAD rules should be the same
177  * between the different sockets.
178  */
179 static boolean_t
imc_decode_sad(const imc_t * imc,imc_decode_state_t * dec)180 imc_decode_sad(const imc_t *imc, imc_decode_state_t *dec)
181 {
182 	uint_t i, ileaveidx;
183 	uint8_t ileavetgt;
184 	uint32_t nodeid, tadid, channelid;
185 	uint64_t base;
186 	const imc_socket_t *socket = &imc->imc_sockets[0];
187 	const imc_sad_t *sad = &socket->isock_sad;
188 	const imc_sad_rule_t *rule;
189 	boolean_t loop = B_FALSE;
190 
191 	/*
192 	 * Note, all SAD rules have been adjusted so that they are uniformly
193 	 * exclusive.
194 	 */
195 start:
196 	for (rule = NULL, i = 0, base = 0; i < sad->isad_nrules; i++) {
197 		rule = &sad->isad_rules[i];
198 
199 		if (rule->isr_enable && dec->ids_pa >= base &&
200 		    dec->ids_pa < rule->isr_limit) {
201 			break;
202 		}
203 
204 		base = rule->isr_limit;
205 	}
206 
207 	if (rule == NULL || i == sad->isad_nrules) {
208 		dec->ids_fail = IMC_DECODE_F_NO_SAD_RULE;
209 		return (B_FALSE);
210 	}
211 
212 	/*
213 	 * Store the SAD rule in the decode information for debugging's sake.
214 	 */
215 	dec->ids_sad = sad;
216 	dec->ids_sad_rule = rule;
217 
218 	/*
219 	 * We have found a SAD rule. We now need to transform that into the
220 	 * corresponding target based on its mode, etc. The way we do this
221 	 * varies based on the generation.
222 	 *
223 	 * The first thing we need to do is to figure out the target in the
224 	 * interleave list.
225 	 */
226 	ileaveidx = imc_decode_sad_interleave(rule, dec->ids_pa);
227 	if (ileaveidx >= rule->isr_ntargets) {
228 		dec->ids_fail = IMC_DECODE_F_BAD_SAD_INTERLEAVE;
229 		dec->ids_fail_data = ileaveidx;
230 		return (B_FALSE);
231 	}
232 	ileavetgt = rule->isr_targets[ileaveidx];
233 	if (imc->imc_gen >= IMC_GEN_SKYLAKE &&
234 	    IMC_SAD_ILEAVE_SKX_LOCAL(ileavetgt) == 0) {
235 		/*
236 		 * If we're in this case, the interleave rule said we had a
237 		 * remote target. That means we need to find the correct SAD
238 		 * based on the Node ID and then do all of this over again.
239 		 */
240 		nodeid = IMC_SAD_ILEAVE_SKX_TARGET(ileavetgt);
241 
242 		if (loop) {
243 			dec->ids_fail = IMC_DECODE_F_SAD_SEARCH_LOOP;
244 			return (B_FALSE);
245 		}
246 
247 		for (i = 0; i < imc->imc_nsockets; i++) {
248 			if (imc->imc_sockets[i].isock_valid ==
249 			    IMC_SOCKET_V_VALID &&
250 			    imc->imc_sockets[i].isock_nodeid == nodeid) {
251 				socket = &imc->imc_sockets[i];
252 				sad = &imc->imc_sockets[i].isock_sad;
253 				loop = B_TRUE;
254 				goto start;
255 			}
256 		}
257 
258 		dec->ids_fail = IMC_DECODE_F_BAD_REMOTE_MC_ROUTE;
259 		dec->ids_fail_data = nodeid;
260 		return (B_FALSE);
261 	}
262 
263 	/*
264 	 * On some platforms we need to derive the target channel based on the
265 	 * physical address and additional rules in the SAD. If we do, do that
266 	 * here. The idea is that this may overrule the memory channel route
267 	 * table target that was determined from the SAD rule.
268 	 */
269 	if (rule->isr_need_mod3) {
270 		uint64_t addr;
271 		uint8_t channel;
272 
273 		switch (rule->isr_mod_mode) {
274 		case IMC_SAD_MOD_MODE_45t6:
275 			addr = dec->ids_pa >> 6;
276 			break;
277 		case IMC_SAD_MOD_MODE_45t8:
278 			addr = dec->ids_pa >> 8;
279 			break;
280 		case IMC_SAD_MOD_MODE_45t12:
281 			addr = dec->ids_pa >> 12;
282 			break;
283 		default:
284 			dec->ids_fail = IMC_DECODE_F_SAD_BAD_MOD;
285 			return (B_FALSE);
286 		}
287 
288 		switch (rule->isr_mod_type) {
289 		case IMC_SAD_MOD_TYPE_MOD3:
290 			channel = (addr % 3) << 1;
291 			channel |= ileavetgt & 1;
292 			break;
293 		case IMC_SAD_MOD_TYPE_MOD2_01:
294 			channel = (addr % 2) << 1;
295 			channel |= ileavetgt & 1;
296 			break;
297 		case IMC_SAD_MOD_TYPE_MOD2_12:
298 			channel = (addr % 2) << 2;
299 			channel |= (~addr % 2) << 1;
300 			channel |= ileavetgt & 1;
301 			break;
302 		case IMC_SAD_MOD_TYPE_MOD2_02:
303 			channel = (addr % 2) << 2;
304 			channel |= ileavetgt & 1;
305 			break;
306 		default:
307 			dec->ids_fail = IMC_DECODE_F_SAD_BAD_MOD;
308 			return (B_FALSE);
309 		}
310 
311 		ileavetgt = channel;
312 	}
313 
314 	switch (imc->imc_gen) {
315 	case IMC_GEN_SANDY:
316 		/*
317 		 * Sandy Bridge systems only have a single home agent, so the
318 		 * interleave target is always the node id.
319 		 */
320 		nodeid = ileavetgt;
321 		tadid = 0;
322 		channelid = UINT32_MAX;
323 		break;
324 	case IMC_GEN_IVY:
325 	case IMC_GEN_HASWELL:
326 	case IMC_GEN_BROADWELL:
327 		/*
328 		 * On these generations, the interleave NodeID in the SAD
329 		 * encodes both the nodeid and the home agent ID that we care
330 		 * about.
331 		 */
332 		nodeid = IMC_NODEID_IVY_BRD_UPPER(ileavetgt) |
333 		    IMC_NODEID_IVY_BRD_LOWER(ileavetgt);
334 		tadid = IMC_NODEID_IVY_BRD_HA(ileavetgt);
335 		channelid = UINT32_MAX;
336 		break;
337 	case IMC_GEN_SKYLAKE:
338 		/*
339 		 * On Skylake generation systems we take the interleave target
340 		 * and use that to look up both the memory controller and the
341 		 * physical channel in the route table. The nodeid is already
342 		 * known because its SAD rules redirect us.
343 		 */
344 		nodeid = socket->isock_nodeid;
345 		if (ileavetgt > IMC_SAD_ILEAVE_SKX_MAX) {
346 			dec->ids_fail = IMC_DECODE_F_BAD_SAD_INTERLEAVE;
347 			dec->ids_fail_data = ileavetgt;
348 			return (B_FALSE);
349 		}
350 		ileavetgt = IMC_SAD_ILEAVE_SKX_TARGET(ileavetgt);
351 		if (ileavetgt > sad->isad_mcroute.ismc_nroutes) {
352 			dec->ids_fail = IMC_DECODE_F_BAD_SAD_INTERLEAVE;
353 			dec->ids_fail_data = ileavetgt;
354 			return (B_FALSE);
355 		}
356 		tadid = sad->isad_mcroute.ismc_mcroutes[ileavetgt].ismce_imc;
357 		channelid =
358 		    sad->isad_mcroute.ismc_mcroutes[ileavetgt].ismce_pchannel;
359 		break;
360 	default:
361 		nodeid = tadid = channelid = UINT32_MAX;
362 		break;
363 	}
364 
365 	/*
366 	 * Map to the correct socket based on the nodeid. Make sure that we have
367 	 * a valid TAD.
368 	 */
369 	dec->ids_socket = NULL;
370 	for (i = 0; i < imc->imc_nsockets; i++) {
371 		if (imc->imc_sockets[i].isock_nodeid == nodeid) {
372 			dec->ids_socket = &imc->imc_sockets[i];
373 			break;
374 		}
375 	}
376 	if (dec->ids_socket == NULL) {
377 		dec->ids_fail = IMC_DECODE_F_SAD_BAD_SOCKET;
378 		dec->ids_fail_data = nodeid;
379 		return (B_FALSE);
380 	}
381 
382 	if (tadid >= dec->ids_socket->isock_ntad) {
383 		dec->ids_fail = IMC_DECODE_F_SAD_BAD_TAD;
384 		dec->ids_fail_data = tadid;
385 		return (B_FALSE);
386 	}
387 
388 	dec->ids_nodeid = nodeid;
389 	dec->ids_tadid = tadid;
390 	dec->ids_channelid = channelid;
391 	dec->ids_tad = &dec->ids_socket->isock_tad[tadid];
392 	dec->ids_mc = &dec->ids_socket->isock_imcs[tadid];
393 
394 	return (B_TRUE);
395 }
396 
397 /*
398  * For Sandy Bridge through Broadwell we need to decode the memory channel that
399  * we're targeting. This is determined based on the number of ways that the
400  * socket and channel are supposed to be interleaved. The TAD has a target
401  * channel list sitting with the TAD rule. To figure out the appropriate index,
402  * the algorithm is roughly:
403  *
404  *    idx = [(dec->ids_pa >> 6) / socket-ways] % channel-ways
405  *
406  * The shift by six, comes from taking the number of bits that are in theory in
407  * the cache line size. Of course, if things were this simple, that'd be great.
408  * The first complication is a7mode / MCChanShiftUpEnable. When this is enabled,
409  * more cache lines are used for this. The next complication comes when the
410  * feature MCChanHashEn is enabled. This means that we have to hash the
411  * resulting address before we do the modulus based on the number of channel
412  * ways.
413  *
414  * The last, and most complicated problem is when the number of channel ways is
415  * set to three. When this is the case, the base address of the range may not
416  * actually start at index zero. The nominal solution is to use the offset
417  * that's programmed on a per-channel basis to offset the system address.
418  * However, to get that information we would have to know what channel we're on,
419  * which is what we're trying to figure out. Regretfully, proclaim that we can't
420  * in this case.
421  */
422 static boolean_t
imc_decode_tad_channel(const imc_t * imc,imc_decode_state_t * dec)423 imc_decode_tad_channel(const imc_t *imc, imc_decode_state_t *dec)
424 {
425 	uint64_t index;
426 	const imc_tad_rule_t *rule = dec->ids_tad_rule;
427 
428 	index = dec->ids_pa >> 6;
429 	if ((dec->ids_tad->itad_flags & IMC_TAD_FLAG_CHANSHIFT) != 0) {
430 		index = index >> 1;
431 	}
432 
433 	/*
434 	 * When performing a socket way equals three comparison, this would not
435 	 * work.
436 	 */
437 	index = index / rule->itr_sock_way;
438 
439 	if ((dec->ids_tad->itad_flags & IMC_TAD_FLAG_CHANHASH) != 0) {
440 		uint_t i;
441 		for (i = 12; i < 28; i += 2) {
442 			uint64_t shift = (dec->ids_pa >> i) & 0x3;
443 			index ^= shift;
444 		}
445 	}
446 
447 	index %= rule->itr_chan_way;
448 	if (index >= rule->itr_ntargets) {
449 		dec->ids_fail = IMC_DECODE_F_TAD_BAD_TARGET_INDEX;
450 		dec->ids_fail_data = index;
451 		return (B_FALSE);
452 	}
453 
454 	dec->ids_channelid = rule->itr_targets[index];
455 	return (B_TRUE);
456 }
457 
458 static uint_t
imc_tad_gran_to_shift(const imc_tad_t * tad,imc_tad_gran_t gran)459 imc_tad_gran_to_shift(const imc_tad_t *tad, imc_tad_gran_t gran)
460 {
461 	uint_t shift = 0;
462 
463 	switch (gran) {
464 	case IMC_TAD_GRAN_64B:
465 		shift = 6;
466 		if ((tad->itad_flags & IMC_TAD_FLAG_CHANSHIFT) != 0) {
467 			shift++;
468 		}
469 		break;
470 	case IMC_TAD_GRAN_256B:
471 		shift = 8;
472 		break;
473 	case IMC_TAD_GRAN_4KB:
474 		shift = 12;
475 		break;
476 	case IMC_TAD_GRAN_1GB:
477 		shift = 30;
478 		break;
479 	}
480 
481 	return (shift);
482 }
483 
484 static boolean_t
imc_decode_tad(const imc_t * imc,imc_decode_state_t * dec)485 imc_decode_tad(const imc_t *imc, imc_decode_state_t *dec)
486 {
487 	uint_t i, tadruleno;
488 	uint_t sockshift, chanshift, sockmask, chanmask;
489 	uint64_t off, chanaddr;
490 	const imc_tad_t *tad = dec->ids_tad;
491 	const imc_mc_t *mc = dec->ids_mc;
492 	const imc_tad_rule_t *rule = NULL;
493 	const imc_channel_t *chan;
494 
495 	/*
496 	 * The first step in all of this is to determine which TAD rule applies
497 	 * for this address.
498 	 */
499 	for (i = 0; i < tad->itad_nrules; i++) {
500 		rule = &tad->itad_rules[i];
501 
502 		if (dec->ids_pa >= rule->itr_base &&
503 		    dec->ids_pa < rule->itr_limit) {
504 			break;
505 		}
506 	}
507 
508 	if (rule == NULL || i == tad->itad_nrules) {
509 		dec->ids_fail = IMC_DECODE_F_NO_TAD_RULE;
510 		return (B_FALSE);
511 	}
512 	tadruleno = i;
513 	dec->ids_tad_rule = rule;
514 
515 	/*
516 	 * Check if our TAD rule requires 3-way interleaving on the channel. We
517 	 * basically can't do that right now. For more information, see the
518 	 * comment above imc_decode_tad_channel().
519 	 */
520 	if (rule->itr_chan_way == 3) {
521 		dec->ids_fail = IMC_DECODE_F_TAD_3_ILEAVE;
522 		return (B_FALSE);
523 	}
524 
525 	/*
526 	 * On some platforms, we need to now calculate the channel index from
527 	 * this. The way that we calculate this is nominally straightforward,
528 	 * but complicated by a number of different issues.
529 	 */
530 	switch (imc->imc_gen) {
531 	case IMC_GEN_SANDY:
532 	case IMC_GEN_IVY:
533 	case IMC_GEN_HASWELL:
534 	case IMC_GEN_BROADWELL:
535 		if (!imc_decode_tad_channel(imc, dec)) {
536 			return (B_FALSE);
537 		}
538 		break;
539 	default:
540 		/*
541 		 * On Skylake and newer platforms we should have already decoded
542 		 * the target channel based on using the memory controller route
543 		 * table above.
544 		 */
545 		break;
546 	}
547 
548 	/*
549 	 * We initialize ids_channelid to UINT32_MAX, so this should make sure
550 	 * that we catch an incorrect channel as well.
551 	 */
552 	if (dec->ids_channelid >= mc->icn_nchannels) {
553 		dec->ids_fail = IMC_DECODE_F_BAD_CHANNEL_ID;
554 		dec->ids_fail_data = dec->ids_channelid;
555 		return (B_FALSE);
556 	}
557 	chan = &mc->icn_channels[dec->ids_channelid];
558 	dec->ids_chan = chan;
559 
560 	if (tadruleno >= chan->ich_ntad_offsets) {
561 		dec->ids_fail = IMC_DECODE_F_BAD_CHANNEL_TAD_OFFSET;
562 		dec->ids_fail_data = tadruleno;
563 		return (B_FALSE);
564 	}
565 
566 	/*
567 	 * Now we can go ahead and calculate the channel address, which is
568 	 * roughly equal to:
569 	 *
570 	 * chan_addr = (sys_addr - off) / (chan way * sock way).
571 	 *
572 	 * The catch is that we want to preserve the low bits where possible.
573 	 * The number of bits is based on the interleaving granularities, the
574 	 * way that's calculated is based on information in the TAD rule.
575 	 * However, if a7mode is enabled on Ivy Bridge through Broadwell, then
576 	 * we need to add one to that. So we will save the smallest number of
577 	 * bits that are left after interleaving.
578 	 *
579 	 * Because the interleaving occurs at different granularities, we need
580 	 * to break this into two discrete steps, one where we apply the socket
581 	 * interleaving and one where we apply the channel interleaving,
582 	 * shifting and dividing at each step.
583 	 */
584 	off = chan->ich_tad_offsets[tadruleno];
585 	if (off > dec->ids_pa) {
586 		dec->ids_fail = IMC_DECODE_F_CHANOFF_UNDERFLOW;
587 		return (B_FALSE);
588 	}
589 	chanshift = imc_tad_gran_to_shift(tad, rule->itr_chan_gran);
590 	sockshift = imc_tad_gran_to_shift(tad, rule->itr_sock_gran);
591 	chanmask = (1 << chanshift) - 1;
592 	sockmask = (1 << sockshift) - 1;
593 
594 	chanaddr = dec->ids_pa - off;
595 	chanaddr >>= sockshift;
596 	chanaddr /= rule->itr_sock_way;
597 	chanaddr <<= sockshift;
598 	chanaddr |= dec->ids_pa & sockmask;
599 	chanaddr >>= chanshift;
600 	chanaddr /= rule->itr_chan_way;
601 	chanaddr <<= chanshift;
602 	chanaddr |= dec->ids_pa & chanmask;
603 
604 	dec->ids_chanaddr = chanaddr;
605 
606 	return (B_TRUE);
607 }
608 
609 static boolean_t
imc_decode_rir(const imc_t * imc,imc_decode_state_t * dec)610 imc_decode_rir(const imc_t *imc, imc_decode_state_t *dec)
611 {
612 	const imc_mc_t *mc = dec->ids_mc;
613 	const imc_channel_t *chan = dec->ids_chan;
614 	const imc_rank_ileave_t *rir = NULL;
615 	const imc_rank_ileave_entry_t *rirtarg;
616 	const imc_dimm_t *dimm;
617 	uint32_t shift, index;
618 	uint_t i, dimmid, rankid;
619 	uint64_t mask, base, rankaddr;
620 
621 	if (mc->icn_closed) {
622 		shift = IMC_PAGE_BITS_CLOSED;
623 	} else {
624 		shift = IMC_PAGE_BITS_OPEN;
625 	}
626 	mask = (1UL << shift) - 1;
627 
628 	for (i = 0, base = 0; i < chan->ich_nrankileaves; i++) {
629 		rir = &chan->ich_rankileaves[i];
630 		if (rir->irle_enabled && dec->ids_chanaddr >= base &&
631 		    dec->ids_chanaddr < rir->irle_limit) {
632 			break;
633 		}
634 
635 		base = rir->irle_limit;
636 	}
637 
638 	if (rir == NULL || i == chan->ich_nrankileaves) {
639 		dec->ids_fail = IMC_DECODE_F_NO_RIR_RULE;
640 		return (B_FALSE);
641 	}
642 	dec->ids_rir = rir;
643 
644 	/*
645 	 * Determine the index of the rule that we care about. This is done by
646 	 * shifting the address based on the open and closed page bits and then
647 	 * just modding it by the number of ways in question.
648 	 */
649 	index = (dec->ids_chanaddr >> shift) % rir->irle_nways;
650 	if (index >= rir->irle_nentries) {
651 		dec->ids_fail = IMC_DECODE_F_BAD_RIR_ILEAVE_TARGET;
652 		dec->ids_fail_data = index;
653 		return (B_FALSE);
654 	}
655 	rirtarg = &rir->irle_entries[index];
656 
657 	/*
658 	 * The rank interleaving register has information about a physical rank
659 	 * target. This is within the notion of the physical chip selects that
660 	 * exist. While the memory controller only has eight actual chip
661 	 * selects, the physical values that are programmed depend a bit on the
662 	 * underlying hardware. Effectively, in this ID space, each DIMM has
663 	 * four ranks associated with it. Even when we only have two ranks with
664 	 * each physical channel, they'll be programmed so we can simply do the
665 	 * following match:
666 	 *
667 	 * DIMM = rank id / 4
668 	 * RANK = rank id % 4
669 	 */
670 	dec->ids_physrankid = rirtarg->irle_target;
671 	dimmid = dec->ids_physrankid / 4;
672 	rankid = dec->ids_physrankid % 4;
673 
674 	if (dimmid >= chan->ich_ndimms) {
675 		dec->ids_fail = IMC_DECODE_F_BAD_DIMM_INDEX;
676 		dec->ids_fail_data = dimmid;
677 		return (B_FALSE);
678 	}
679 
680 	dimm = &chan->ich_dimms[dimmid];
681 	if (!dimm->idimm_present) {
682 		dec->ids_fail = IMC_DECODE_F_DIMM_NOT_PRESENT;
683 		return (B_FALSE);
684 	}
685 	dec->ids_dimmid = dimmid;
686 	dec->ids_dimm = dimm;
687 
688 	if (rankid >= dimm->idimm_nranks) {
689 		dec->ids_fail = IMC_DECODE_F_BAD_DIMM_RANK;
690 		dec->ids_fail_data = rankid;
691 		return (B_FALSE);
692 	}
693 	dec->ids_rankid = rankid;
694 
695 	/*
696 	 * Calculate the rank address. We need to divide the address by the
697 	 * number of rank ways and then or in the lower bits.
698 	 */
699 	rankaddr = dec->ids_chanaddr;
700 	rankaddr >>= shift;
701 	rankaddr /= rir->irle_nways;
702 	rankaddr <<= shift;
703 	rankaddr |= dec->ids_chanaddr & mask;
704 
705 	if (rirtarg->irle_offset > rankaddr) {
706 		dec->ids_fail = IMC_DECODE_F_RANKOFF_UNDERFLOW;
707 		return (B_FALSE);
708 	}
709 	rankaddr -= rirtarg->irle_offset;
710 	dec->ids_rankaddr = rankaddr;
711 
712 	return (B_TRUE);
713 }
714 
715 boolean_t
imc_decode_pa(const imc_t * imc,uint64_t pa,imc_decode_state_t * dec)716 imc_decode_pa(const imc_t *imc, uint64_t pa, imc_decode_state_t *dec)
717 {
718 	bzero(dec, sizeof (*dec));
719 	dec->ids_pa = pa;
720 	dec->ids_nodeid = dec->ids_tadid = dec->ids_channelid = UINT32_MAX;
721 
722 	/*
723 	 * We need to rely on socket zero's information. Make sure that it both
724 	 * exists and is considered valid.
725 	 */
726 	if (imc->imc_nsockets < 1 ||
727 	    imc->imc_sockets[0].isock_valid != IMC_SOCKET_V_VALID) {
728 		dec->ids_fail = IMC_DECODE_F_BAD_SOCKET;
729 		dec->ids_fail_data = 0;
730 		return (B_FALSE);
731 	}
732 
733 	/*
734 	 * First, we need to make sure that the PA we've been given actually is
735 	 * meant to target a DRAM address. This address may fall to MMIO, MMCFG,
736 	 * be an address that's outside of DRAM, or belong to a legacy address
737 	 * range that is interposed.
738 	 */
739 	if (imc_decode_addr_resvd(imc, dec)) {
740 		return (B_FALSE);
741 	}
742 
743 	/*
744 	 * Now that we have this data, we want to go through and look at the
745 	 * SAD. The SAD will point us to a specific socket and an IMC / home
746 	 * agent on that socket which will tell us which TAD we need to use.
747 	 */
748 	if (!imc_decode_sad(imc, dec)) {
749 		return (B_FALSE);
750 	}
751 
752 	/*
753 	 * The decoded SAD information has pointed us a TAD. We need to use this
754 	 * to point us to the corresponding memory channel and the corresponding
755 	 * address on the channel.
756 	 */
757 	if (!imc_decode_tad(imc, dec)) {
758 		return (B_FALSE);
759 	}
760 
761 	/*
762 	 * Use the rank interleaving data to determine which DIMM this is, the
763 	 * relevant rank, and the rank address.
764 	 */
765 	if (!imc_decode_rir(imc, dec)) {
766 		return (B_FALSE);
767 	}
768 
769 	return (B_TRUE);
770 }
771