xref: /illumos-gate/usr/src/uts/i86pc/io/immu.c (revision 243bebc0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Portions Copyright (c) 2010, Oracle and/or its affiliates.
23  * All rights reserved.
24  */
25 /*
26  * Copyright (c) 2009, Intel Corporation.
27  * All rights reserved.
28  */
29 
30 /*
31  * Intel IOMMU implementation
32  * This file contains Intel IOMMU code exported
33  * to the rest of the system and code that deals
34  * with the Intel IOMMU as a whole.
35  */
36 
37 #include <sys/conf.h>
38 #include <sys/modctl.h>
39 #include <sys/pci.h>
40 #include <sys/pci_impl.h>
41 #include <sys/sysmacros.h>
42 #include <sys/ddi.h>
43 #include <sys/ddidmareq.h>
44 #include <sys/ddi_impldefs.h>
45 #include <sys/ddifm.h>
46 #include <sys/sunndi.h>
47 #include <sys/debug.h>
48 #include <sys/fm/protocol.h>
49 #include <sys/note.h>
50 #include <sys/apic.h>
51 #include <vm/hat_i86.h>
52 #include <sys/smp_impldefs.h>
53 #include <sys/spl.h>
54 #include <sys/archsystm.h>
55 #include <sys/x86_archext.h>
56 #include <sys/avl.h>
57 #include <sys/bootconf.h>
58 #include <sys/bootinfo.h>
59 #include <sys/atomic.h>
60 #include <sys/immu.h>
61 /* ########################### Globals and tunables ######################## */
62 /*
63  * Global switches (boolean) that can be toggled either via boot options
64  * or via /etc/system or kmdb
65  */
66 
67 /* Various features */
68 boolean_t immu_enable = B_TRUE;
69 boolean_t immu_dvma_enable = B_TRUE;
70 
71 /* accessed in other files so not static */
72 boolean_t immu_gfxdvma_enable = B_TRUE;
73 boolean_t immu_intrmap_enable = B_FALSE;
74 boolean_t immu_qinv_enable = B_TRUE;
75 
76 /* various quirks that need working around */
77 
78 /* XXX We always map page 0 read/write for now */
79 boolean_t immu_quirk_usbpage0 = B_TRUE;
80 boolean_t immu_quirk_usbrmrr = B_TRUE;
81 boolean_t immu_quirk_usbfullpa;
82 boolean_t immu_quirk_mobile4;
83 
84 /* debug messages */
85 boolean_t immu_dmar_print;
86 
87 /* Tunables */
88 int64_t immu_flush_gran = 5;
89 
90 immu_flags_t immu_global_dvma_flags;
91 
92 /* ############  END OPTIONS section ################ */
93 
94 /*
95  * Global used internally by Intel IOMMU code
96  */
97 dev_info_t *root_devinfo;
98 kmutex_t immu_lock;
99 list_t immu_list;
100 boolean_t immu_setup;
101 boolean_t immu_running;
102 boolean_t immu_quiesced;
103 
104 /* ######################## END Globals and tunables ###################### */
105 /* Globals used only in this file */
106 static char **black_array;
107 static uint_t nblacks;
108 
109 static char **unity_driver_array;
110 static uint_t nunity;
111 static char **xlate_driver_array;
112 static uint_t nxlate;
113 
114 static char **premap_driver_array;
115 static uint_t npremap;
116 static char **nopremap_driver_array;
117 static uint_t nnopremap;
118 /* ###################### Utility routines ############################# */
119 
120 /*
121  * Check if the device has mobile 4 chipset
122  */
123 static int
124 check_mobile4(dev_info_t *dip, void *arg)
125 {
126 	_NOTE(ARGUNUSED(arg));
127 	int vendor, device;
128 	int *ip = (int *)arg;
129 
130 	vendor = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
131 	    "vendor-id", -1);
132 	device = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
133 	    "device-id", -1);
134 
135 	if (vendor == 0x8086 && device == 0x2a40) {
136 		*ip = B_TRUE;
137 		ddi_err(DER_NOTE, dip, "iommu: Mobile 4 chipset detected. "
138 		    "Force setting IOMMU write buffer");
139 		return (DDI_WALK_TERMINATE);
140 	} else {
141 		return (DDI_WALK_CONTINUE);
142 	}
143 }
144 
145 static void
146 map_bios_rsvd_mem(dev_info_t *dip)
147 {
148 	struct memlist *mp;
149 
150 	/*
151 	 * Make sure the domain for the device is set up before
152 	 * mapping anything.
153 	 */
154 	(void) immu_dvma_device_setup(dip, 0);
155 
156 	memlist_read_lock();
157 
158 	mp = bios_rsvd;
159 	while (mp != NULL) {
160 		memrng_t mrng = {0};
161 
162 		ddi_err(DER_LOG, dip, "iommu: Mapping BIOS rsvd range "
163 		    "[0x%" PRIx64 " - 0x%"PRIx64 "]\n", mp->ml_address,
164 		    mp->ml_address + mp->ml_size);
165 
166 		mrng.mrng_start = IMMU_ROUNDOWN(mp->ml_address);
167 		mrng.mrng_npages = IMMU_ROUNDUP(mp->ml_size) / IMMU_PAGESIZE;
168 
169 		(void) immu_map_memrange(dip, &mrng);
170 
171 		mp = mp->ml_next;
172 	}
173 
174 	memlist_read_unlock();
175 }
176 
177 
178 /*
179  * Check if the driver requests a specific type of mapping.
180  */
181 /*ARGSUSED*/
182 static void
183 check_conf(dev_info_t *dip, void *arg)
184 {
185 	immu_devi_t *immu_devi;
186 	const char *dname;
187 	uint_t i;
188 	int hasmapprop = 0, haspreprop = 0;
189 	boolean_t old_premap;
190 
191 	/*
192 	 * Only PCI devices can use an IOMMU. Legacy ISA devices
193 	 * are handled in check_lpc.
194 	 */
195 	if (!DEVI_IS_PCI(dip))
196 		return;
197 
198 	dname = ddi_driver_name(dip);
199 	if (dname == NULL)
200 		return;
201 	immu_devi = immu_devi_get(dip);
202 
203 	for (i = 0; i < nunity; i++) {
204 		if (strcmp(unity_driver_array[i], dname) == 0) {
205 			hasmapprop = 1;
206 			immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
207 		}
208 	}
209 
210 	for (i = 0; i < nxlate; i++) {
211 		if (strcmp(xlate_driver_array[i], dname) == 0) {
212 			hasmapprop = 1;
213 			immu_devi->imd_dvma_flags &= ~IMMU_FLAGS_UNITY;
214 		}
215 	}
216 
217 	old_premap = immu_devi->imd_use_premap;
218 
219 	for (i = 0; i < nnopremap; i++) {
220 		if (strcmp(nopremap_driver_array[i], dname) == 0) {
221 			haspreprop = 1;
222 			immu_devi->imd_use_premap = B_FALSE;
223 		}
224 	}
225 
226 	for (i = 0; i < npremap; i++) {
227 		if (strcmp(premap_driver_array[i], dname) == 0) {
228 			haspreprop = 1;
229 			immu_devi->imd_use_premap = B_TRUE;
230 		}
231 	}
232 
233 	/*
234 	 * Report if we changed the value from the default.
235 	 */
236 	if (hasmapprop && (immu_devi->imd_dvma_flags ^ immu_global_dvma_flags))
237 		ddi_err(DER_LOG, dip, "using %s DVMA mapping",
238 		    immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY ?
239 		    DDI_DVMA_MAPTYPE_UNITY : DDI_DVMA_MAPTYPE_XLATE);
240 
241 	if (haspreprop && (immu_devi->imd_use_premap != old_premap))
242 		ddi_err(DER_LOG, dip, "%susing premapped DVMA space",
243 		    immu_devi->imd_use_premap ? "" : "not ");
244 }
245 
246 /*
247  * Check if the device is USB controller
248  */
249 /*ARGSUSED*/
250 static void
251 check_usb(dev_info_t *dip, void *arg)
252 {
253 	const char *drv = ddi_driver_name(dip);
254 	immu_devi_t *immu_devi;
255 
256 
257 	/*
258 	 * It's not clear if xHCI really needs these quirks; however, to be on
259 	 * the safe side until we know for certain we add it to the list below.
260 	 */
261 	if (drv == NULL ||
262 	    (strcmp(drv, "uhci") != 0 && strcmp(drv, "ohci") != 0 &&
263 	    strcmp(drv, "ehci") != 0 && strcmp(drv, "xhci") != 0)) {
264 		return;
265 	}
266 
267 	immu_devi = immu_devi_get(dip);
268 
269 	/*
270 	 * If unit mappings are already specified, globally or
271 	 * locally, we're done here, since that covers both
272 	 * quirks below.
273 	 */
274 	if (immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY)
275 		return;
276 
277 	/* This must come first since it does unity mapping */
278 	if (immu_quirk_usbfullpa == B_TRUE) {
279 		immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
280 	} else if (immu_quirk_usbrmrr == B_TRUE) {
281 		ddi_err(DER_LOG, dip, "Applying USB RMRR quirk");
282 		map_bios_rsvd_mem(dip);
283 	}
284 }
285 
286 /*
287  * Check if the device is a LPC device
288  */
289 /*ARGSUSED*/
290 static void
291 check_lpc(dev_info_t *dip, void *arg)
292 {
293 	immu_devi_t *immu_devi;
294 
295 	immu_devi = immu_devi_get(dip);
296 	if (immu_devi->imd_lpc == B_TRUE) {
297 		ddi_err(DER_LOG, dip, "iommu: Found LPC device");
298 		/* This will put the immu_devi on the LPC "specials" list */
299 		(void) immu_dvma_device_setup(dip, IMMU_FLAGS_SLEEP);
300 	}
301 }
302 
303 /*
304  * Check if the device is a GFX device
305  */
306 /*ARGSUSED*/
307 static void
308 check_gfx(dev_info_t *dip, void *arg)
309 {
310 	immu_devi_t *immu_devi;
311 
312 	immu_devi = immu_devi_get(dip);
313 	if (immu_devi->imd_display == B_TRUE) {
314 		immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
315 		ddi_err(DER_LOG, dip, "iommu: Found GFX device");
316 		/* This will put the immu_devi on the GFX "specials" list */
317 		(void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP);
318 	}
319 }
320 
321 static void
322 walk_tree(int (*f)(dev_info_t *, void *), void *arg)
323 {
324 	int count;
325 
326 	ndi_devi_enter(root_devinfo, &count);
327 	ddi_walk_devs(ddi_get_child(root_devinfo), f, arg);
328 	ndi_devi_exit(root_devinfo, count);
329 }
330 
331 static int
332 check_pre_setup_quirks(dev_info_t *dip, void *arg)
333 {
334 	/* just 1 check right now */
335 	return (check_mobile4(dip, arg));
336 }
337 
338 static int
339 check_pre_startup_quirks(dev_info_t *dip, void *arg)
340 {
341 	if (immu_devi_set(dip, IMMU_FLAGS_SLEEP) != DDI_SUCCESS) {
342 		ddi_err(DER_PANIC, dip, "Failed to get immu_devi");
343 	}
344 
345 	check_gfx(dip, arg);
346 
347 	check_lpc(dip, arg);
348 
349 	check_conf(dip, arg);
350 
351 	check_usb(dip, arg);
352 
353 	return (DDI_WALK_CONTINUE);
354 }
355 
356 static void
357 pre_setup_quirks(void)
358 {
359 	walk_tree(check_pre_setup_quirks, &immu_quirk_mobile4);
360 }
361 
362 static void
363 pre_startup_quirks(void)
364 {
365 	walk_tree(check_pre_startup_quirks, NULL);
366 
367 	immu_dmar_rmrr_map();
368 }
369 
370 static int
371 get_conf_str(char *bopt, char **val)
372 {
373 	int ret;
374 
375 	/*
376 	 * Check the rootnex.conf property
377 	 * Fake up a dev_t since searching the global
378 	 * property list needs it
379 	 */
380 	ret = ddi_prop_lookup_string(
381 	    makedevice(ddi_name_to_major("rootnex"), 0),
382 	    root_devinfo, DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
383 	    bopt, val);
384 
385 	return (ret);
386 }
387 
388 /*
389  * get_conf_opt()
390  * 	get a rootnex.conf setting  (always a boolean)
391  */
392 static void
393 get_conf_opt(char *bopt, boolean_t *kvar)
394 {
395 	char *val = NULL;
396 
397 	/*
398 	 * Check the rootnex.conf property
399 	 * Fake up a dev_t since searching the global
400 	 * property list needs it
401 	 */
402 
403 	if (get_conf_str(bopt, &val) != DDI_PROP_SUCCESS)
404 		return;
405 
406 	if (strcmp(val, "true") == 0) {
407 		*kvar = B_TRUE;
408 	} else if (strcmp(val, "false") == 0) {
409 		*kvar = B_FALSE;
410 	} else {
411 		ddi_err(DER_WARN, NULL, "rootnex.conf switch %s=\"%s\" ",
412 		    "is not set to true or false. Ignoring option.",
413 		    bopt, val);
414 	}
415 	ddi_prop_free(val);
416 }
417 
418 /*
419  * get_bootopt()
420  * 	check a boot option  (always a boolean)
421  */
422 static int
423 get_boot_str(char *bopt, char **val)
424 {
425 	int ret;
426 
427 	ret = ddi_prop_lookup_string(DDI_DEV_T_ANY, root_devinfo,
428 	    DDI_PROP_DONTPASS, bopt, val);
429 
430 	return (ret);
431 }
432 
433 static void
434 get_bootopt(char *bopt, boolean_t *kvar)
435 {
436 	char *val = NULL;
437 
438 	/*
439 	 * All boot options set at the GRUB menu become
440 	 * properties on the rootnex.
441 	 */
442 	if (get_boot_str(bopt, &val) != DDI_PROP_SUCCESS)
443 		return;
444 
445 	if (strcmp(val, "true") == 0) {
446 		*kvar = B_TRUE;
447 	} else if (strcmp(val, "false") == 0) {
448 		*kvar = B_FALSE;
449 	} else {
450 		ddi_err(DER_WARN, NULL, "boot option %s=\"%s\" ",
451 		    "is not set to true or false. Ignoring option.",
452 		    bopt, val);
453 	}
454 	ddi_prop_free(val);
455 }
456 
457 static void
458 get_boot_dvma_mode(void)
459 {
460 	char *val = NULL;
461 
462 	if (get_boot_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val)
463 	    != DDI_PROP_SUCCESS)
464 		return;
465 
466 	if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) {
467 		immu_global_dvma_flags |= IMMU_FLAGS_UNITY;
468 	} else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) {
469 		immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY;
470 	} else {
471 		ddi_err(DER_WARN, NULL, "bad value \"%s\" for boot option %s",
472 		    val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP);
473 	}
474 	ddi_prop_free(val);
475 }
476 
477 static void
478 get_conf_dvma_mode(void)
479 {
480 	char *val = NULL;
481 
482 	if (get_conf_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val)
483 	    != DDI_PROP_SUCCESS)
484 		return;
485 
486 	if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) {
487 		immu_global_dvma_flags |= IMMU_FLAGS_UNITY;
488 	} else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) {
489 		immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY;
490 	} else {
491 		ddi_err(DER_WARN, NULL, "bad value \"%s\" for rootnex "
492 		    "option %s", val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP);
493 	}
494 	ddi_prop_free(val);
495 }
496 
497 
498 static void
499 get_conf_tunables(char *bopt, int64_t *ivar)
500 {
501 	int64_t	*iarray;
502 	uint_t n;
503 
504 	/*
505 	 * Check the rootnex.conf property
506 	 * Fake up a dev_t since searching the global
507 	 * property list needs it
508 	 */
509 	if (ddi_prop_lookup_int64_array(
510 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
511 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, bopt,
512 	    &iarray, &n) != DDI_PROP_SUCCESS) {
513 		return;
514 	}
515 
516 	if (n != 1) {
517 		ddi_err(DER_WARN, NULL, "More than one value specified for "
518 		    "%s property. Ignoring and using default",
519 		    "immu-flush-gran");
520 		ddi_prop_free(iarray);
521 		return;
522 	}
523 
524 	if (iarray[0] < 0) {
525 		ddi_err(DER_WARN, NULL, "Negative value specified for "
526 		    "%s property. Inoring and Using default value",
527 		    "immu-flush-gran");
528 		ddi_prop_free(iarray);
529 		return;
530 	}
531 
532 	*ivar = iarray[0];
533 
534 	ddi_prop_free(iarray);
535 }
536 
537 static void
538 read_conf_options(void)
539 {
540 	/* enable/disable options */
541 	get_conf_opt("immu-enable", &immu_enable);
542 	get_conf_opt("immu-dvma-enable", &immu_dvma_enable);
543 	get_conf_opt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
544 	get_conf_opt("immu-intrmap-enable", &immu_intrmap_enable);
545 	get_conf_opt("immu-qinv-enable", &immu_qinv_enable);
546 
547 	/* workaround switches */
548 	get_conf_opt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
549 	get_conf_opt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
550 	get_conf_opt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
551 
552 	/* debug printing */
553 	get_conf_opt("immu-dmar-print", &immu_dmar_print);
554 
555 	/* get tunables */
556 	get_conf_tunables("immu-flush-gran", &immu_flush_gran);
557 
558 	get_conf_dvma_mode();
559 }
560 
561 static void
562 read_boot_options(void)
563 {
564 	/* enable/disable options */
565 	get_bootopt("immu-enable", &immu_enable);
566 	get_bootopt("immu-dvma-enable", &immu_dvma_enable);
567 	get_bootopt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
568 	get_bootopt("immu-intrmap-enable", &immu_intrmap_enable);
569 	get_bootopt("immu-qinv-enable", &immu_qinv_enable);
570 
571 	/* workaround switches */
572 	get_bootopt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
573 	get_bootopt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
574 	get_bootopt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
575 
576 	/* debug printing */
577 	get_bootopt("immu-dmar-print", &immu_dmar_print);
578 
579 	get_boot_dvma_mode();
580 }
581 
582 static void
583 mapping_list_setup(void)
584 {
585 	char **string_array;
586 	uint_t nstrings;
587 
588 	if (ddi_prop_lookup_string_array(
589 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
590 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
591 	    "immu-dvma-unity-drivers",
592 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
593 		unity_driver_array = string_array;
594 		nunity = nstrings;
595 	}
596 
597 	if (ddi_prop_lookup_string_array(
598 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
599 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
600 	    "immu-dvma-xlate-drivers",
601 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
602 		xlate_driver_array = string_array;
603 		nxlate = nstrings;
604 	}
605 
606 	if (ddi_prop_lookup_string_array(
607 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
608 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
609 	    "immu-dvma-premap-drivers",
610 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
611 		premap_driver_array = string_array;
612 		npremap = nstrings;
613 	}
614 
615 	if (ddi_prop_lookup_string_array(
616 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
617 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
618 	    "immu-dvma-nopremap-drivers",
619 	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
620 		nopremap_driver_array = string_array;
621 		nnopremap = nstrings;
622 	}
623 }
624 
625 /*
626  * Note, this will not catch hardware not enumerated
627  * in early boot
628  */
629 static boolean_t
630 blacklisted_driver(void)
631 {
632 	char **strptr;
633 	int i;
634 	major_t maj;
635 
636 	/* need at least 2 strings */
637 	if (nblacks < 2) {
638 		return (B_FALSE);
639 	}
640 
641 	for (i = 0; nblacks - i > 1; i++) {
642 		strptr = &black_array[i];
643 		if (strcmp(*strptr++, "DRIVER") == 0) {
644 			if ((maj = ddi_name_to_major(*strptr++))
645 			    != DDI_MAJOR_T_NONE) {
646 				/* is there hardware bound to this drvr */
647 				if (devnamesp[maj].dn_head != NULL) {
648 					return (B_TRUE);
649 				}
650 			}
651 			i += 1;   /* for loop adds 1, so add only 1 here */
652 		}
653 	}
654 
655 	return (B_FALSE);
656 }
657 
658 static boolean_t
659 blacklisted_smbios(void)
660 {
661 	id_t smid;
662 	smbios_hdl_t *smhdl;
663 	smbios_info_t sminf;
664 	smbios_system_t smsys;
665 	char *mfg, *product, *version;
666 	char **strptr;
667 	int i;
668 
669 	/* need at least 4 strings for this setting */
670 	if (nblacks < 4) {
671 		return (B_FALSE);
672 	}
673 
674 	smhdl = smbios_open(NULL, SMB_VERSION, ksmbios_flags, NULL);
675 	if (smhdl == NULL ||
676 	    (smid = smbios_info_system(smhdl, &smsys)) == SMB_ERR ||
677 	    smbios_info_common(smhdl, smid, &sminf) == SMB_ERR) {
678 		return (B_FALSE);
679 	}
680 
681 	mfg = (char *)sminf.smbi_manufacturer;
682 	product = (char *)sminf.smbi_product;
683 	version = (char *)sminf.smbi_version;
684 
685 	ddi_err(DER_CONT, NULL, "?System SMBIOS information:\n");
686 	ddi_err(DER_CONT, NULL, "?Manufacturer = <%s>\n", mfg);
687 	ddi_err(DER_CONT, NULL, "?Product = <%s>\n", product);
688 	ddi_err(DER_CONT, NULL, "?Version = <%s>\n", version);
689 
690 	for (i = 0; nblacks - i > 3; i++) {
691 		strptr = &black_array[i];
692 		if (strcmp(*strptr++, "SMBIOS") == 0) {
693 			if (strcmp(*strptr++, mfg) == 0 &&
694 			    (*strptr[0] == '\0' ||
695 			    strcmp(*strptr++, product) == 0) &&
696 			    (*strptr[0] == '\0' ||
697 			    strcmp(*strptr++, version) == 0)) {
698 				return (B_TRUE);
699 			}
700 			i += 3;
701 		}
702 	}
703 
704 	return (B_FALSE);
705 }
706 
707 static boolean_t
708 blacklisted_acpi(void)
709 {
710 	if (nblacks == 0) {
711 		return (B_FALSE);
712 	}
713 
714 	return (immu_dmar_blacklisted(black_array, nblacks));
715 }
716 
717 /*
718  * Check if system is blacklisted by Intel IOMMU driver
719  * i.e. should Intel IOMMU be disabled on this system
720  * Currently a system can be blacklistd based on the
721  * following bases:
722  *
723  * 1. DMAR ACPI table information.
724  *    This information includes things like
725  *    manufacturer and revision number. If rootnex.conf
726  *    has matching info set in its blacklist property
727  *    then Intel IOMMu will be disabled
728  *
729  * 2. SMBIOS information
730  *
731  * 3. Driver installed - useful if a particular
732  *    driver or hardware is toxic if Intel IOMMU
733  *    is turned on.
734  */
735 
736 static void
737 blacklist_setup(void)
738 {
739 	char **string_array;
740 	uint_t nstrings;
741 
742 	/*
743 	 * Check the rootnex.conf blacklist property.
744 	 * Fake up a dev_t since searching the global
745 	 * property list needs it
746 	 */
747 	if (ddi_prop_lookup_string_array(
748 	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
749 	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, "immu-blacklist",
750 	    &string_array, &nstrings) != DDI_PROP_SUCCESS) {
751 		return;
752 	}
753 
754 	/* smallest blacklist criteria works with multiples of 2 */
755 	if (nstrings % 2 != 0) {
756 		ddi_err(DER_WARN, NULL, "Invalid IOMMU blacklist "
757 		    "rootnex.conf: number of strings must be a "
758 		    "multiple of 2");
759 		ddi_prop_free(string_array);
760 		return;
761 	}
762 
763 	black_array = string_array;
764 	nblacks = nstrings;
765 }
766 
767 static void
768 blacklist_destroy(void)
769 {
770 	if (black_array) {
771 		ddi_prop_free(black_array);
772 		black_array = NULL;
773 		nblacks = 0;
774 	}
775 }
776 
777 static char *
778 immu_alloc_name(const char *str, int instance)
779 {
780 	size_t slen;
781 	char *s;
782 
783 	slen = strlen(str) + IMMU_ISTRLEN + 1;
784 	s = kmem_zalloc(slen, VM_SLEEP);
785 	if (s != NULL)
786 		(void) snprintf(s, slen, "%s%d", str, instance);
787 
788 	return (s);
789 }
790 
791 
792 /*
793  * Now set all the fields in the order they are defined
794  * We do this only as a defensive-coding practice, it is
795  * not a correctness issue.
796  */
797 static void *
798 immu_state_alloc(int seg, void *dmar_unit)
799 {
800 	immu_t *immu;
801 	char *nodename, *hcachename, *pcachename;
802 	int instance;
803 
804 	dmar_unit = immu_dmar_walk_units(seg, dmar_unit);
805 	if (dmar_unit == NULL) {
806 		/* No more IOMMUs in this segment */
807 		return (NULL);
808 	}
809 
810 	immu = kmem_zalloc(sizeof (immu_t), KM_SLEEP);
811 
812 	mutex_init(&(immu->immu_lock), NULL, MUTEX_DRIVER, NULL);
813 
814 	mutex_enter(&(immu->immu_lock));
815 
816 	immu->immu_dmar_unit = dmar_unit;
817 	immu->immu_dip = immu_dmar_unit_dip(dmar_unit);
818 
819 	nodename = ddi_node_name(immu->immu_dip);
820 	instance = ddi_get_instance(immu->immu_dip);
821 
822 	immu->immu_name = immu_alloc_name(nodename, instance);
823 	if (immu->immu_name == NULL)
824 		return (NULL);
825 
826 	/*
827 	 * the immu_intr_lock mutex is grabbed by the IOMMU
828 	 * unit's interrupt handler so we need to use an
829 	 * interrupt cookie for the mutex
830 	 */
831 	mutex_init(&(immu->immu_intr_lock), NULL, MUTEX_DRIVER,
832 	    (void *)ipltospl(IMMU_INTR_IPL));
833 
834 	/* IOMMU regs related */
835 	mutex_init(&(immu->immu_regs_lock), NULL, MUTEX_DEFAULT, NULL);
836 	cv_init(&(immu->immu_regs_cv), NULL, CV_DEFAULT, NULL);
837 	immu->immu_regs_busy = B_FALSE;
838 
839 	/* DVMA related */
840 	immu->immu_dvma_coherent = B_FALSE;
841 
842 	/* DVMA context related */
843 	rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
844 
845 	/* DVMA domain related */
846 	list_create(&(immu->immu_domain_list), sizeof (domain_t),
847 	    offsetof(domain_t, dom_immu_node));
848 
849 	/* DVMA special device lists */
850 	immu->immu_dvma_gfx_only = B_FALSE;
851 	list_create(&(immu->immu_dvma_lpc_list), sizeof (immu_devi_t),
852 	    offsetof(immu_devi_t, imd_spc_node));
853 	list_create(&(immu->immu_dvma_gfx_list), sizeof (immu_devi_t),
854 	    offsetof(immu_devi_t, imd_spc_node));
855 
856 	/* interrupt remapping related */
857 	mutex_init(&(immu->immu_intrmap_lock), NULL, MUTEX_DEFAULT, NULL);
858 
859 	/* qinv related */
860 	mutex_init(&(immu->immu_qinv_lock), NULL, MUTEX_DEFAULT, NULL);
861 
862 	/*
863 	 * insert this immu unit into the system-wide list
864 	 */
865 	list_insert_tail(&immu_list, immu);
866 
867 	pcachename = immu_alloc_name("immu_pgtable_cache", instance);
868 	if (pcachename == NULL)
869 		return (NULL);
870 
871 	hcachename = immu_alloc_name("immu_hdl_cache", instance);
872 	if (hcachename == NULL)
873 		return (NULL);
874 
875 	immu->immu_pgtable_cache = kmem_cache_create(pcachename,
876 	    sizeof (pgtable_t), 0, pgtable_ctor, pgtable_dtor, NULL, immu,
877 	    NULL, 0);
878 	immu->immu_hdl_cache = kmem_cache_create(hcachename,
879 	    sizeof (immu_hdl_priv_t), 64, immu_hdl_priv_ctor,
880 	    NULL, NULL, immu, NULL, 0);
881 
882 	mutex_exit(&(immu->immu_lock));
883 
884 	ddi_err(DER_LOG, immu->immu_dip, "unit setup");
885 
886 	immu_dmar_set_immu(dmar_unit, immu);
887 
888 	return (dmar_unit);
889 }
890 
891 static void
892 immu_subsystems_setup(void)
893 {
894 	int seg;
895 	void *unit_hdl;
896 
897 	ddi_err(DER_VERB, NULL,
898 	    "Creating state structures for Intel IOMMU units");
899 
900 	mutex_init(&immu_lock, NULL, MUTEX_DEFAULT, NULL);
901 	list_create(&immu_list, sizeof (immu_t), offsetof(immu_t, immu_node));
902 
903 	mutex_enter(&immu_lock);
904 
905 	unit_hdl = NULL;
906 	for (seg = 0; seg < IMMU_MAXSEG; seg++) {
907 		while (unit_hdl = immu_state_alloc(seg, unit_hdl)) {
908 			;
909 		}
910 	}
911 
912 	immu_regs_setup(&immu_list);	/* subsequent code needs this first */
913 	immu_dvma_setup(&immu_list);
914 	if (immu_qinv_setup(&immu_list) == DDI_SUCCESS)
915 		immu_intrmap_setup(&immu_list);
916 	else
917 		immu_intrmap_enable = B_FALSE;
918 
919 	mutex_exit(&immu_lock);
920 }
921 
922 /*
923  * immu_subsystems_startup()
924  * 	startup all units that were setup
925  */
926 static void
927 immu_subsystems_startup(void)
928 {
929 	immu_t *immu;
930 	iommulib_ops_t *iommulib_ops;
931 
932 	mutex_enter(&immu_lock);
933 
934 	immu_dmar_startup();
935 
936 	immu = list_head(&immu_list);
937 	for (; immu; immu = list_next(&immu_list, immu)) {
938 
939 		mutex_enter(&(immu->immu_lock));
940 
941 		immu_intr_register(immu);
942 		immu_dvma_startup(immu);
943 		immu_intrmap_startup(immu);
944 		immu_qinv_startup(immu);
945 
946 		/*
947 		 * Set IOMMU unit's regs to do
948 		 * the actual startup. This will
949 		 * set immu->immu_running  field
950 		 * if the unit is successfully
951 		 * started
952 		 */
953 		immu_regs_startup(immu);
954 
955 		mutex_exit(&(immu->immu_lock));
956 
957 		iommulib_ops = kmem_alloc(sizeof (iommulib_ops_t), KM_SLEEP);
958 		*iommulib_ops = immulib_ops;
959 		iommulib_ops->ilops_data = (void *)immu;
960 		(void) iommulib_iommu_register(immu->immu_dip, iommulib_ops,
961 		    &immu->immu_iommulib_handle);
962 	}
963 
964 	mutex_exit(&immu_lock);
965 }
966 
967 /* ##################  Intel IOMMU internal interfaces ###################### */
968 
969 /*
970  * Internal interfaces for IOMMU code (i.e. not exported to rootnex
971  * or rest of system)
972  */
973 
974 /*
975  * ddip can be NULL, in which case we walk up until we find the root dip
976  * NOTE: We never visit the root dip since its not a hardware node
977  */
978 int
979 immu_walk_ancestor(
980 	dev_info_t *rdip,
981 	dev_info_t *ddip,
982 	int (*func)(dev_info_t *, void *arg),
983 	void *arg,
984 	int *lvlp,
985 	immu_flags_t immu_flags)
986 {
987 	dev_info_t *pdip;
988 	int level;
989 	int error = DDI_SUCCESS;
990 
991 	/* ddip and immu can be NULL */
992 
993 	/* Hold rdip so that branch is not detached */
994 	ndi_hold_devi(rdip);
995 	for (pdip = rdip, level = 1; pdip && pdip != root_devinfo;
996 	    pdip = ddi_get_parent(pdip), level++) {
997 
998 		if (immu_devi_set(pdip, immu_flags) != DDI_SUCCESS) {
999 			error = DDI_FAILURE;
1000 			break;
1001 		}
1002 		if (func(pdip, arg) == DDI_WALK_TERMINATE) {
1003 			break;
1004 		}
1005 		if (immu_flags & IMMU_FLAGS_DONTPASS) {
1006 			break;
1007 		}
1008 		if (pdip == ddip) {
1009 			break;
1010 		}
1011 	}
1012 
1013 	ndi_rele_devi(rdip);
1014 
1015 	if (lvlp)
1016 		*lvlp = level;
1017 
1018 	return (error);
1019 }
1020 
1021 /* ########################  Intel IOMMU entry points ####################### */
1022 /*
1023  * immu_init()
1024  *	called from rootnex_attach(). setup but don't startup the Intel IOMMU
1025  *      This is the first function called in Intel IOMMU code
1026  */
1027 void
1028 immu_init(void)
1029 {
1030 	char *phony_reg = "A thing of beauty is a joy forever";
1031 
1032 	/* Set some global shorthands that are needed by all of IOMMU code */
1033 	root_devinfo = ddi_root_node();
1034 
1035 	/*
1036 	 * Intel IOMMU only supported only if MMU(CPU) page size is ==
1037 	 * IOMMU pages size.
1038 	 */
1039 	/*LINTED*/
1040 	if (MMU_PAGESIZE != IMMU_PAGESIZE) {
1041 		ddi_err(DER_WARN, NULL,
1042 		    "MMU page size (%d) is not equal to\n"
1043 		    "IOMMU page size (%d). "
1044 		    "Disabling Intel IOMMU. ",
1045 		    MMU_PAGESIZE, IMMU_PAGESIZE);
1046 		immu_enable = B_FALSE;
1047 		return;
1048 	}
1049 
1050 	/*
1051 	 * Read rootnex.conf options. Do this before
1052 	 * boot options so boot options can override .conf options.
1053 	 */
1054 	read_conf_options();
1055 
1056 	/*
1057 	 * retrieve the Intel IOMMU boot options.
1058 	 * Do this before parsing immu ACPI table
1059 	 * as a boot option could potentially affect
1060 	 * ACPI parsing.
1061 	 */
1062 	ddi_err(DER_CONT, NULL, "?Reading Intel IOMMU boot options\n");
1063 	read_boot_options();
1064 
1065 	/*
1066 	 * Check the IOMMU enable boot-option first.
1067 	 * This is so that we can skip parsing the ACPI table
1068 	 * if necessary because that may cause problems in
1069 	 * systems with buggy BIOS or ACPI tables
1070 	 */
1071 	if (immu_enable == B_FALSE) {
1072 		return;
1073 	}
1074 
1075 	if (immu_intrmap_enable == B_TRUE)
1076 		immu_qinv_enable = B_TRUE;
1077 
1078 	/*
1079 	 * Next, check if the system even has an Intel IOMMU
1080 	 * We use the presence or absence of the IOMMU ACPI
1081 	 * table to detect Intel IOMMU.
1082 	 */
1083 	if (immu_dmar_setup() != DDI_SUCCESS) {
1084 		immu_enable = B_FALSE;
1085 		return;
1086 	}
1087 
1088 	mapping_list_setup();
1089 
1090 	/*
1091 	 * Check blacklists
1092 	 */
1093 	blacklist_setup();
1094 
1095 	if (blacklisted_smbios() == B_TRUE) {
1096 		blacklist_destroy();
1097 		immu_enable = B_FALSE;
1098 		return;
1099 	}
1100 
1101 	if (blacklisted_driver() == B_TRUE) {
1102 		blacklist_destroy();
1103 		immu_enable = B_FALSE;
1104 		return;
1105 	}
1106 
1107 	/*
1108 	 * Read the "raw" DMAR ACPI table to get information
1109 	 * and convert into a form we can use.
1110 	 */
1111 	if (immu_dmar_parse() != DDI_SUCCESS) {
1112 		blacklist_destroy();
1113 		immu_enable = B_FALSE;
1114 		return;
1115 	}
1116 
1117 	/*
1118 	 * now that we have processed the ACPI table
1119 	 * check if we need to blacklist this system
1120 	 * based on ACPI info
1121 	 */
1122 	if (blacklisted_acpi() == B_TRUE) {
1123 		immu_dmar_destroy();
1124 		blacklist_destroy();
1125 		immu_enable = B_FALSE;
1126 		return;
1127 	}
1128 
1129 	blacklist_destroy();
1130 
1131 	/*
1132 	 * Check if system has HW quirks.
1133 	 */
1134 	pre_setup_quirks();
1135 
1136 	/* Now do the rest of the setup */
1137 	immu_subsystems_setup();
1138 
1139 	/*
1140 	 * Now that the IMMU is setup, create a phony
1141 	 * reg prop so that suspend/resume works
1142 	 */
1143 	if (ddi_prop_update_byte_array(DDI_DEV_T_NONE, root_devinfo, "reg",
1144 	    (uchar_t *)phony_reg, strlen(phony_reg) + 1) != DDI_PROP_SUCCESS) {
1145 		ddi_err(DER_PANIC, NULL, "Failed to create reg prop for "
1146 		    "rootnex node");
1147 		/*NOTREACHED*/
1148 	}
1149 
1150 	immu_setup = B_TRUE;
1151 }
1152 
1153 /*
1154  * immu_startup()
1155  * 	called directly by boot code to startup
1156  *      all units of the IOMMU
1157  */
1158 void
1159 immu_startup(void)
1160 {
1161 	/*
1162 	 * If IOMMU is disabled, do nothing
1163 	 */
1164 	if (immu_enable == B_FALSE) {
1165 		return;
1166 	}
1167 
1168 	if (immu_setup == B_FALSE) {
1169 		ddi_err(DER_WARN, NULL, "Intel IOMMU not setup, "
1170 		    "skipping IOMMU startup");
1171 		return;
1172 	}
1173 
1174 	pre_startup_quirks();
1175 
1176 	ddi_err(DER_CONT, NULL,
1177 	    "?Starting Intel IOMMU (dmar) units...\n");
1178 
1179 	immu_subsystems_startup();
1180 
1181 	immu_running = B_TRUE;
1182 }
1183 
1184 /*
1185  * Hook to notify IOMMU code of device tree changes
1186  */
1187 void
1188 immu_device_tree_changed(void)
1189 {
1190 	if (immu_setup == B_FALSE) {
1191 		return;
1192 	}
1193 
1194 	ddi_err(DER_WARN, NULL, "Intel IOMMU currently "
1195 	    "does not use device tree updates");
1196 }
1197 
1198 /*
1199  * Hook to notify IOMMU code of memory changes
1200  */
1201 void
1202 immu_physmem_update(uint64_t addr, uint64_t size)
1203 {
1204 	if (immu_setup == B_FALSE) {
1205 		return;
1206 	}
1207 	immu_dvma_physmem_update(addr, size);
1208 }
1209 
1210 /*
1211  * immu_quiesce()
1212  * 	quiesce all units that are running
1213  */
1214 int
1215 immu_quiesce(void)
1216 {
1217 	immu_t *immu;
1218 	int ret = DDI_SUCCESS;
1219 
1220 	mutex_enter(&immu_lock);
1221 
1222 	if (immu_running == B_FALSE) {
1223 		mutex_exit(&immu_lock);
1224 		return (DDI_SUCCESS);
1225 	}
1226 
1227 	immu = list_head(&immu_list);
1228 	for (; immu; immu = list_next(&immu_list, immu)) {
1229 
1230 		/* if immu is not running, we dont quiesce */
1231 		if (immu->immu_regs_running == B_FALSE)
1232 			continue;
1233 
1234 		/* flush caches */
1235 		rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1236 		immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1237 		immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1238 		rw_exit(&(immu->immu_ctx_rwlock));
1239 		immu_regs_wbf_flush(immu);
1240 
1241 		mutex_enter(&(immu->immu_lock));
1242 
1243 		/*
1244 		 * Set IOMMU unit's regs to do
1245 		 * the actual shutdown.
1246 		 */
1247 		immu_regs_shutdown(immu);
1248 		immu_regs_suspend(immu);
1249 
1250 		/* if immu is still running, we failed */
1251 		if (immu->immu_regs_running == B_TRUE)
1252 			ret = DDI_FAILURE;
1253 		else
1254 			immu->immu_regs_quiesced = B_TRUE;
1255 
1256 		mutex_exit(&(immu->immu_lock));
1257 	}
1258 
1259 	if (ret == DDI_SUCCESS) {
1260 		immu_running = B_FALSE;
1261 		immu_quiesced = B_TRUE;
1262 	}
1263 	mutex_exit(&immu_lock);
1264 
1265 	return (ret);
1266 }
1267 
1268 /*
1269  * immu_unquiesce()
1270  * 	unquiesce all units
1271  */
1272 int
1273 immu_unquiesce(void)
1274 {
1275 	immu_t *immu;
1276 	int ret = DDI_SUCCESS;
1277 
1278 	mutex_enter(&immu_lock);
1279 
1280 	if (immu_quiesced == B_FALSE) {
1281 		mutex_exit(&immu_lock);
1282 		return (DDI_SUCCESS);
1283 	}
1284 
1285 	immu = list_head(&immu_list);
1286 	for (; immu; immu = list_next(&immu_list, immu)) {
1287 
1288 		mutex_enter(&(immu->immu_lock));
1289 
1290 		/* if immu was not quiesced, i.e was not running before */
1291 		if (immu->immu_regs_quiesced == B_FALSE) {
1292 			mutex_exit(&(immu->immu_lock));
1293 			continue;
1294 		}
1295 
1296 		if (immu_regs_resume(immu) != DDI_SUCCESS) {
1297 			ret = DDI_FAILURE;
1298 			mutex_exit(&(immu->immu_lock));
1299 			continue;
1300 		}
1301 
1302 		/* flush caches before unquiesce */
1303 		rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1304 		immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1305 		immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1306 		rw_exit(&(immu->immu_ctx_rwlock));
1307 
1308 		/*
1309 		 * Set IOMMU unit's regs to do
1310 		 * the actual startup. This will
1311 		 * set immu->immu_regs_running  field
1312 		 * if the unit is successfully
1313 		 * started
1314 		 */
1315 		immu_regs_startup(immu);
1316 
1317 		if (immu->immu_regs_running == B_FALSE) {
1318 			ret = DDI_FAILURE;
1319 		} else {
1320 			immu_quiesced = B_TRUE;
1321 			immu_running = B_TRUE;
1322 			immu->immu_regs_quiesced = B_FALSE;
1323 		}
1324 
1325 		mutex_exit(&(immu->immu_lock));
1326 	}
1327 
1328 	mutex_exit(&immu_lock);
1329 
1330 	return (ret);
1331 }
1332 
1333 void
1334 immu_init_inv_wait(immu_inv_wait_t *iwp, const char *name, boolean_t sync)
1335 {
1336 	caddr_t vaddr;
1337 	uint64_t paddr;
1338 
1339 	iwp->iwp_sync = sync;
1340 
1341 	vaddr = (caddr_t)&iwp->iwp_vstatus;
1342 	paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
1343 	paddr += ((uintptr_t)vaddr) & MMU_PAGEOFFSET;
1344 
1345 	iwp->iwp_pstatus = paddr;
1346 	iwp->iwp_name = name;
1347 }
1348 
1349 /* ##############  END Intel IOMMU entry points ################## */
1350