1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Portions Copyright (c) 2010, Oracle and/or its affiliates.
23 * All rights reserved.
24 */
25/*
26 * Copyright (c) 2009, Intel Corporation.
27 * All rights reserved.
28 */
29
30/*
31 * Intel IOMMU implementation
32 * This file contains Intel IOMMU code exported
33 * to the rest of the system and code that deals
34 * with the Intel IOMMU as a whole.
35 */
36
37#include <sys/conf.h>
38#include <sys/modctl.h>
39#include <sys/pci.h>
40#include <sys/pci_impl.h>
41#include <sys/sysmacros.h>
42#include <sys/ddi.h>
43#include <sys/ddidmareq.h>
44#include <sys/ddi_impldefs.h>
45#include <sys/ddifm.h>
46#include <sys/sunndi.h>
47#include <sys/debug.h>
48#include <sys/fm/protocol.h>
49#include <sys/note.h>
50#include <sys/apic.h>
51#include <vm/hat_i86.h>
52#include <sys/smp_impldefs.h>
53#include <sys/spl.h>
54#include <sys/archsystm.h>
55#include <sys/x86_archext.h>
56#include <sys/avl.h>
57#include <sys/bootconf.h>
58#include <sys/bootinfo.h>
59#include <sys/atomic.h>
60#include <sys/immu.h>
61/* ########################### Globals and tunables ######################## */
62/*
63 * Global switches (boolean) that can be toggled either via boot options
64 * or via /etc/system or kmdb
65 */
66
67/* Various features */
68boolean_t immu_enable = B_TRUE;
69boolean_t immu_dvma_enable = B_TRUE;
70
71/* accessed in other files so not static */
72boolean_t immu_gfxdvma_enable = B_TRUE;
73boolean_t immu_intrmap_enable = B_FALSE;
74boolean_t immu_qinv_enable = B_TRUE;
75
76/* various quirks that need working around */
77
78/* XXX We always map page 0 read/write for now */
79boolean_t immu_quirk_usbpage0 = B_TRUE;
80boolean_t immu_quirk_usbrmrr = B_TRUE;
81boolean_t immu_quirk_usbfullpa;
82boolean_t immu_quirk_mobile4;
83
84/* debug messages */
85boolean_t immu_dmar_print;
86
87/* Tunables */
88int64_t immu_flush_gran = 5;
89
90immu_flags_t immu_global_dvma_flags;
91
92/* ############  END OPTIONS section ################ */
93
94/*
95 * Global used internally by Intel IOMMU code
96 */
97dev_info_t *root_devinfo;
98kmutex_t immu_lock;
99list_t immu_list;
100boolean_t immu_setup;
101boolean_t immu_running;
102boolean_t immu_quiesced;
103
104/* ######################## END Globals and tunables ###################### */
105/* Globals used only in this file */
106static char **black_array;
107static uint_t nblacks;
108
109static char **unity_driver_array;
110static uint_t nunity;
111static char **xlate_driver_array;
112static uint_t nxlate;
113
114static char **premap_driver_array;
115static uint_t npremap;
116static char **nopremap_driver_array;
117static uint_t nnopremap;
118/* ###################### Utility routines ############################# */
119
120/*
121 * Check if the device has mobile 4 chipset
122 */
123static int
124check_mobile4(dev_info_t *dip, void *arg)
125{
126	_NOTE(ARGUNUSED(arg));
127	int vendor, device;
128	int *ip = (int *)arg;
129
130	vendor = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
131	    "vendor-id", -1);
132	device = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
133	    "device-id", -1);
134
135	if (vendor == 0x8086 && device == 0x2a40) {
136		*ip = B_TRUE;
137		ddi_err(DER_NOTE, dip, "iommu: Mobile 4 chipset detected. "
138		    "Force setting IOMMU write buffer");
139		return (DDI_WALK_TERMINATE);
140	} else {
141		return (DDI_WALK_CONTINUE);
142	}
143}
144
145static void
146map_bios_rsvd_mem(dev_info_t *dip)
147{
148	struct memlist *mp;
149
150	/*
151	 * Make sure the domain for the device is set up before
152	 * mapping anything.
153	 */
154	(void) immu_dvma_device_setup(dip, 0);
155
156	memlist_read_lock();
157
158	mp = bios_rsvd;
159	while (mp != NULL) {
160		memrng_t mrng = {0};
161
162		ddi_err(DER_LOG, dip, "iommu: Mapping BIOS rsvd range "
163		    "[0x%" PRIx64 " - 0x%"PRIx64 "]\n", mp->ml_address,
164		    mp->ml_address + mp->ml_size);
165
166		mrng.mrng_start = IMMU_ROUNDOWN(mp->ml_address);
167		mrng.mrng_npages = IMMU_ROUNDUP(mp->ml_size) / IMMU_PAGESIZE;
168
169		(void) immu_map_memrange(dip, &mrng);
170
171		mp = mp->ml_next;
172	}
173
174	memlist_read_unlock();
175}
176
177
178/*
179 * Check if the driver requests a specific type of mapping.
180 */
181/*ARGSUSED*/
182static void
183check_conf(dev_info_t *dip, void *arg)
184{
185	immu_devi_t *immu_devi;
186	const char *dname;
187	uint_t i;
188	int hasmapprop = 0, haspreprop = 0;
189	boolean_t old_premap;
190
191	/*
192	 * Only PCI devices can use an IOMMU. Legacy ISA devices
193	 * are handled in check_lpc.
194	 */
195	if (!DEVI_IS_PCI(dip))
196		return;
197
198	dname = ddi_driver_name(dip);
199	if (dname == NULL)
200		return;
201	immu_devi = immu_devi_get(dip);
202
203	for (i = 0; i < nunity; i++) {
204		if (strcmp(unity_driver_array[i], dname) == 0) {
205			hasmapprop = 1;
206			immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
207		}
208	}
209
210	for (i = 0; i < nxlate; i++) {
211		if (strcmp(xlate_driver_array[i], dname) == 0) {
212			hasmapprop = 1;
213			immu_devi->imd_dvma_flags &= ~IMMU_FLAGS_UNITY;
214		}
215	}
216
217	old_premap = immu_devi->imd_use_premap;
218
219	for (i = 0; i < nnopremap; i++) {
220		if (strcmp(nopremap_driver_array[i], dname) == 0) {
221			haspreprop = 1;
222			immu_devi->imd_use_premap = B_FALSE;
223		}
224	}
225
226	for (i = 0; i < npremap; i++) {
227		if (strcmp(premap_driver_array[i], dname) == 0) {
228			haspreprop = 1;
229			immu_devi->imd_use_premap = B_TRUE;
230		}
231	}
232
233	/*
234	 * Report if we changed the value from the default.
235	 */
236	if (hasmapprop && (immu_devi->imd_dvma_flags ^ immu_global_dvma_flags))
237		ddi_err(DER_LOG, dip, "using %s DVMA mapping",
238		    immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY ?
239		    DDI_DVMA_MAPTYPE_UNITY : DDI_DVMA_MAPTYPE_XLATE);
240
241	if (haspreprop && (immu_devi->imd_use_premap != old_premap))
242		ddi_err(DER_LOG, dip, "%susing premapped DVMA space",
243		    immu_devi->imd_use_premap ? "" : "not ");
244}
245
246/*
247 * Check if the device is USB controller
248 */
249/*ARGSUSED*/
250static void
251check_usb(dev_info_t *dip, void *arg)
252{
253	const char *drv = ddi_driver_name(dip);
254	immu_devi_t *immu_devi;
255
256
257	/*
258	 * It's not clear if xHCI really needs these quirks; however, to be on
259	 * the safe side until we know for certain we add it to the list below.
260	 */
261	if (drv == NULL ||
262	    (strcmp(drv, "uhci") != 0 && strcmp(drv, "ohci") != 0 &&
263	    strcmp(drv, "ehci") != 0 && strcmp(drv, "xhci") != 0)) {
264		return;
265	}
266
267	immu_devi = immu_devi_get(dip);
268
269	/*
270	 * If unit mappings are already specified, globally or
271	 * locally, we're done here, since that covers both
272	 * quirks below.
273	 */
274	if (immu_devi->imd_dvma_flags & IMMU_FLAGS_UNITY)
275		return;
276
277	/* This must come first since it does unity mapping */
278	if (immu_quirk_usbfullpa == B_TRUE) {
279		immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
280	} else if (immu_quirk_usbrmrr == B_TRUE) {
281		ddi_err(DER_LOG, dip, "Applying USB RMRR quirk");
282		map_bios_rsvd_mem(dip);
283	}
284}
285
286/*
287 * Check if the device is a LPC device
288 */
289/*ARGSUSED*/
290static void
291check_lpc(dev_info_t *dip, void *arg)
292{
293	immu_devi_t *immu_devi;
294
295	immu_devi = immu_devi_get(dip);
296	if (immu_devi->imd_lpc == B_TRUE) {
297		ddi_err(DER_LOG, dip, "iommu: Found LPC device");
298		/* This will put the immu_devi on the LPC "specials" list */
299		(void) immu_dvma_device_setup(dip, IMMU_FLAGS_SLEEP);
300	}
301}
302
303/*
304 * Check if the device is a GFX device
305 */
306/*ARGSUSED*/
307static void
308check_gfx(dev_info_t *dip, void *arg)
309{
310	immu_devi_t *immu_devi;
311
312	immu_devi = immu_devi_get(dip);
313	if (immu_devi->imd_display == B_TRUE) {
314		immu_devi->imd_dvma_flags |= IMMU_FLAGS_UNITY;
315		ddi_err(DER_LOG, dip, "iommu: Found GFX device");
316		/* This will put the immu_devi on the GFX "specials" list */
317		(void) immu_dvma_get_immu(dip, IMMU_FLAGS_SLEEP);
318	}
319}
320
321static void
322walk_tree(int (*f)(dev_info_t *, void *), void *arg)
323{
324	int count;
325
326	ndi_devi_enter(root_devinfo, &count);
327	ddi_walk_devs(ddi_get_child(root_devinfo), f, arg);
328	ndi_devi_exit(root_devinfo, count);
329}
330
331static int
332check_pre_setup_quirks(dev_info_t *dip, void *arg)
333{
334	/* just 1 check right now */
335	return (check_mobile4(dip, arg));
336}
337
338static int
339check_pre_startup_quirks(dev_info_t *dip, void *arg)
340{
341	if (immu_devi_set(dip, IMMU_FLAGS_SLEEP) != DDI_SUCCESS) {
342		ddi_err(DER_PANIC, dip, "Failed to get immu_devi");
343	}
344
345	check_gfx(dip, arg);
346
347	check_lpc(dip, arg);
348
349	check_conf(dip, arg);
350
351	check_usb(dip, arg);
352
353	return (DDI_WALK_CONTINUE);
354}
355
356static void
357pre_setup_quirks(void)
358{
359	walk_tree(check_pre_setup_quirks, &immu_quirk_mobile4);
360}
361
362static void
363pre_startup_quirks(void)
364{
365	walk_tree(check_pre_startup_quirks, NULL);
366
367	immu_dmar_rmrr_map();
368}
369
370static int
371get_conf_str(char *bopt, char **val)
372{
373	int ret;
374
375	/*
376	 * Check the rootnex.conf property
377	 * Fake up a dev_t since searching the global
378	 * property list needs it
379	 */
380	ret = ddi_prop_lookup_string(
381	    makedevice(ddi_name_to_major("rootnex"), 0),
382	    root_devinfo, DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
383	    bopt, val);
384
385	return (ret);
386}
387
388/*
389 * get_conf_opt()
390 * 	get a rootnex.conf setting  (always a boolean)
391 */
392static void
393get_conf_opt(char *bopt, boolean_t *kvar)
394{
395	char *val = NULL;
396
397	/*
398	 * Check the rootnex.conf property
399	 * Fake up a dev_t since searching the global
400	 * property list needs it
401	 */
402
403	if (get_conf_str(bopt, &val) != DDI_PROP_SUCCESS)
404		return;
405
406	if (strcmp(val, "true") == 0) {
407		*kvar = B_TRUE;
408	} else if (strcmp(val, "false") == 0) {
409		*kvar = B_FALSE;
410	} else {
411		ddi_err(DER_WARN, NULL, "rootnex.conf switch %s=\"%s\" ",
412		    "is not set to true or false. Ignoring option.",
413		    bopt, val);
414	}
415	ddi_prop_free(val);
416}
417
418/*
419 * get_bootopt()
420 * 	check a boot option  (always a boolean)
421 */
422static int
423get_boot_str(char *bopt, char **val)
424{
425	int ret;
426
427	ret = ddi_prop_lookup_string(DDI_DEV_T_ANY, root_devinfo,
428	    DDI_PROP_DONTPASS, bopt, val);
429
430	return (ret);
431}
432
433static void
434get_bootopt(char *bopt, boolean_t *kvar)
435{
436	char *val = NULL;
437
438	/*
439	 * All boot options set at the GRUB menu become
440	 * properties on the rootnex.
441	 */
442	if (get_boot_str(bopt, &val) != DDI_PROP_SUCCESS)
443		return;
444
445	if (strcmp(val, "true") == 0) {
446		*kvar = B_TRUE;
447	} else if (strcmp(val, "false") == 0) {
448		*kvar = B_FALSE;
449	} else {
450		ddi_err(DER_WARN, NULL, "boot option %s=\"%s\" ",
451		    "is not set to true or false. Ignoring option.",
452		    bopt, val);
453	}
454	ddi_prop_free(val);
455}
456
457static void
458get_boot_dvma_mode(void)
459{
460	char *val = NULL;
461
462	if (get_boot_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val)
463	    != DDI_PROP_SUCCESS)
464		return;
465
466	if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) {
467		immu_global_dvma_flags |= IMMU_FLAGS_UNITY;
468	} else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) {
469		immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY;
470	} else {
471		ddi_err(DER_WARN, NULL, "bad value \"%s\" for boot option %s",
472		    val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP);
473	}
474	ddi_prop_free(val);
475}
476
477static void
478get_conf_dvma_mode(void)
479{
480	char *val = NULL;
481
482	if (get_conf_str(DDI_DVMA_MAPTYPE_ROOTNEX_PROP, &val)
483	    != DDI_PROP_SUCCESS)
484		return;
485
486	if (strcmp(val, DDI_DVMA_MAPTYPE_UNITY) == 0) {
487		immu_global_dvma_flags |= IMMU_FLAGS_UNITY;
488	} else if (strcmp(val, DDI_DVMA_MAPTYPE_XLATE) == 0) {
489		immu_global_dvma_flags &= ~IMMU_FLAGS_UNITY;
490	} else {
491		ddi_err(DER_WARN, NULL, "bad value \"%s\" for rootnex "
492		    "option %s", val, DDI_DVMA_MAPTYPE_ROOTNEX_PROP);
493	}
494	ddi_prop_free(val);
495}
496
497
498static void
499get_conf_tunables(char *bopt, int64_t *ivar)
500{
501	int64_t	*iarray;
502	uint_t n;
503
504	/*
505	 * Check the rootnex.conf property
506	 * Fake up a dev_t since searching the global
507	 * property list needs it
508	 */
509	if (ddi_prop_lookup_int64_array(
510	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
511	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, bopt,
512	    &iarray, &n) != DDI_PROP_SUCCESS) {
513		return;
514	}
515
516	if (n != 1) {
517		ddi_err(DER_WARN, NULL, "More than one value specified for "
518		    "%s property. Ignoring and using default",
519		    "immu-flush-gran");
520		ddi_prop_free(iarray);
521		return;
522	}
523
524	if (iarray[0] < 0) {
525		ddi_err(DER_WARN, NULL, "Negative value specified for "
526		    "%s property. Inoring and Using default value",
527		    "immu-flush-gran");
528		ddi_prop_free(iarray);
529		return;
530	}
531
532	*ivar = iarray[0];
533
534	ddi_prop_free(iarray);
535}
536
537static void
538read_conf_options(void)
539{
540	/* enable/disable options */
541	get_conf_opt("immu-enable", &immu_enable);
542	get_conf_opt("immu-dvma-enable", &immu_dvma_enable);
543	get_conf_opt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
544	get_conf_opt("immu-intrmap-enable", &immu_intrmap_enable);
545	get_conf_opt("immu-qinv-enable", &immu_qinv_enable);
546
547	/* workaround switches */
548	get_conf_opt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
549	get_conf_opt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
550	get_conf_opt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
551
552	/* debug printing */
553	get_conf_opt("immu-dmar-print", &immu_dmar_print);
554
555	/* get tunables */
556	get_conf_tunables("immu-flush-gran", &immu_flush_gran);
557
558	get_conf_dvma_mode();
559}
560
561static void
562read_boot_options(void)
563{
564	/* enable/disable options */
565	get_bootopt("immu-enable", &immu_enable);
566	get_bootopt("immu-dvma-enable", &immu_dvma_enable);
567	get_bootopt("immu-gfxdvma-enable", &immu_gfxdvma_enable);
568	get_bootopt("immu-intrmap-enable", &immu_intrmap_enable);
569	get_bootopt("immu-qinv-enable", &immu_qinv_enable);
570
571	/* workaround switches */
572	get_bootopt("immu-quirk-usbpage0", &immu_quirk_usbpage0);
573	get_bootopt("immu-quirk-usbfullpa", &immu_quirk_usbfullpa);
574	get_bootopt("immu-quirk-usbrmrr", &immu_quirk_usbrmrr);
575
576	/* debug printing */
577	get_bootopt("immu-dmar-print", &immu_dmar_print);
578
579	get_boot_dvma_mode();
580}
581
582static void
583mapping_list_setup(void)
584{
585	char **string_array;
586	uint_t nstrings;
587
588	if (ddi_prop_lookup_string_array(
589	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
590	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
591	    "immu-dvma-unity-drivers",
592	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
593		unity_driver_array = string_array;
594		nunity = nstrings;
595	}
596
597	if (ddi_prop_lookup_string_array(
598	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
599	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
600	    "immu-dvma-xlate-drivers",
601	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
602		xlate_driver_array = string_array;
603		nxlate = nstrings;
604	}
605
606	if (ddi_prop_lookup_string_array(
607	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
608	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
609	    "immu-dvma-premap-drivers",
610	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
611		premap_driver_array = string_array;
612		npremap = nstrings;
613	}
614
615	if (ddi_prop_lookup_string_array(
616	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
617	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL,
618	    "immu-dvma-nopremap-drivers",
619	    &string_array, &nstrings) == DDI_PROP_SUCCESS) {
620		nopremap_driver_array = string_array;
621		nnopremap = nstrings;
622	}
623}
624
625/*
626 * Note, this will not catch hardware not enumerated
627 * in early boot
628 */
629static boolean_t
630blacklisted_driver(void)
631{
632	char **strptr;
633	int i;
634	major_t maj;
635
636	/* need at least 2 strings */
637	if (nblacks < 2) {
638		return (B_FALSE);
639	}
640
641	for (i = 0; nblacks - i > 1; i++) {
642		strptr = &black_array[i];
643		if (strcmp(*strptr++, "DRIVER") == 0) {
644			if ((maj = ddi_name_to_major(*strptr++))
645			    != DDI_MAJOR_T_NONE) {
646				/* is there hardware bound to this drvr */
647				if (devnamesp[maj].dn_head != NULL) {
648					return (B_TRUE);
649				}
650			}
651			i += 1;   /* for loop adds 1, so add only 1 here */
652		}
653	}
654
655	return (B_FALSE);
656}
657
658static boolean_t
659blacklisted_smbios(void)
660{
661	id_t smid;
662	smbios_hdl_t *smhdl;
663	smbios_info_t sminf;
664	smbios_system_t smsys;
665	char *mfg, *product, *version;
666	char **strptr;
667	int i;
668
669	/* need at least 4 strings for this setting */
670	if (nblacks < 4) {
671		return (B_FALSE);
672	}
673
674	smhdl = smbios_open(NULL, SMB_VERSION, ksmbios_flags, NULL);
675	if (smhdl == NULL ||
676	    (smid = smbios_info_system(smhdl, &smsys)) == SMB_ERR ||
677	    smbios_info_common(smhdl, smid, &sminf) == SMB_ERR) {
678		return (B_FALSE);
679	}
680
681	mfg = (char *)sminf.smbi_manufacturer;
682	product = (char *)sminf.smbi_product;
683	version = (char *)sminf.smbi_version;
684
685	ddi_err(DER_CONT, NULL, "?System SMBIOS information:\n");
686	ddi_err(DER_CONT, NULL, "?Manufacturer = <%s>\n", mfg);
687	ddi_err(DER_CONT, NULL, "?Product = <%s>\n", product);
688	ddi_err(DER_CONT, NULL, "?Version = <%s>\n", version);
689
690	for (i = 0; nblacks - i > 3; i++) {
691		strptr = &black_array[i];
692		if (strcmp(*strptr++, "SMBIOS") == 0) {
693			if (strcmp(*strptr++, mfg) == 0 &&
694			    (*strptr[0] == '\0' ||
695			    strcmp(*strptr++, product) == 0) &&
696			    (*strptr[0] == '\0' ||
697			    strcmp(*strptr++, version) == 0)) {
698				return (B_TRUE);
699			}
700			i += 3;
701		}
702	}
703
704	return (B_FALSE);
705}
706
707static boolean_t
708blacklisted_acpi(void)
709{
710	if (nblacks == 0) {
711		return (B_FALSE);
712	}
713
714	return (immu_dmar_blacklisted(black_array, nblacks));
715}
716
717/*
718 * Check if system is blacklisted by Intel IOMMU driver
719 * i.e. should Intel IOMMU be disabled on this system
720 * Currently a system can be blacklistd based on the
721 * following bases:
722 *
723 * 1. DMAR ACPI table information.
724 *    This information includes things like
725 *    manufacturer and revision number. If rootnex.conf
726 *    has matching info set in its blacklist property
727 *    then Intel IOMMu will be disabled
728 *
729 * 2. SMBIOS information
730 *
731 * 3. Driver installed - useful if a particular
732 *    driver or hardware is toxic if Intel IOMMU
733 *    is turned on.
734 */
735
736static void
737blacklist_setup(void)
738{
739	char **string_array;
740	uint_t nstrings;
741
742	/*
743	 * Check the rootnex.conf blacklist property.
744	 * Fake up a dev_t since searching the global
745	 * property list needs it
746	 */
747	if (ddi_prop_lookup_string_array(
748	    makedevice(ddi_name_to_major("rootnex"), 0), root_devinfo,
749	    DDI_PROP_DONTPASS | DDI_PROP_ROOTNEX_GLOBAL, "immu-blacklist",
750	    &string_array, &nstrings) != DDI_PROP_SUCCESS) {
751		return;
752	}
753
754	/* smallest blacklist criteria works with multiples of 2 */
755	if (nstrings % 2 != 0) {
756		ddi_err(DER_WARN, NULL, "Invalid IOMMU blacklist "
757		    "rootnex.conf: number of strings must be a "
758		    "multiple of 2");
759		ddi_prop_free(string_array);
760		return;
761	}
762
763	black_array = string_array;
764	nblacks = nstrings;
765}
766
767static void
768blacklist_destroy(void)
769{
770	if (black_array) {
771		ddi_prop_free(black_array);
772		black_array = NULL;
773		nblacks = 0;
774	}
775}
776
777static char *
778immu_alloc_name(const char *str, int instance)
779{
780	size_t slen;
781	char *s;
782
783	slen = strlen(str) + IMMU_ISTRLEN + 1;
784	s = kmem_zalloc(slen, VM_SLEEP);
785	if (s != NULL)
786		(void) snprintf(s, slen, "%s%d", str, instance);
787
788	return (s);
789}
790
791
792/*
793 * Now set all the fields in the order they are defined
794 * We do this only as a defensive-coding practice, it is
795 * not a correctness issue.
796 */
797static void *
798immu_state_alloc(int seg, void *dmar_unit)
799{
800	immu_t *immu;
801	char *nodename, *hcachename, *pcachename;
802	int instance;
803
804	dmar_unit = immu_dmar_walk_units(seg, dmar_unit);
805	if (dmar_unit == NULL) {
806		/* No more IOMMUs in this segment */
807		return (NULL);
808	}
809
810	immu = kmem_zalloc(sizeof (immu_t), KM_SLEEP);
811
812	mutex_init(&(immu->immu_lock), NULL, MUTEX_DRIVER, NULL);
813
814	mutex_enter(&(immu->immu_lock));
815
816	immu->immu_dmar_unit = dmar_unit;
817	immu->immu_dip = immu_dmar_unit_dip(dmar_unit);
818
819	nodename = ddi_node_name(immu->immu_dip);
820	instance = ddi_get_instance(immu->immu_dip);
821
822	immu->immu_name = immu_alloc_name(nodename, instance);
823	if (immu->immu_name == NULL)
824		return (NULL);
825
826	/*
827	 * the immu_intr_lock mutex is grabbed by the IOMMU
828	 * unit's interrupt handler so we need to use an
829	 * interrupt cookie for the mutex
830	 */
831	mutex_init(&(immu->immu_intr_lock), NULL, MUTEX_DRIVER,
832	    (void *)ipltospl(IMMU_INTR_IPL));
833
834	/* IOMMU regs related */
835	mutex_init(&(immu->immu_regs_lock), NULL, MUTEX_DEFAULT, NULL);
836	cv_init(&(immu->immu_regs_cv), NULL, CV_DEFAULT, NULL);
837	immu->immu_regs_busy = B_FALSE;
838
839	/* DVMA related */
840	immu->immu_dvma_coherent = B_FALSE;
841
842	/* DVMA context related */
843	rw_init(&(immu->immu_ctx_rwlock), NULL, RW_DEFAULT, NULL);
844
845	/* DVMA domain related */
846	list_create(&(immu->immu_domain_list), sizeof (domain_t),
847	    offsetof(domain_t, dom_immu_node));
848
849	/* DVMA special device lists */
850	immu->immu_dvma_gfx_only = B_FALSE;
851	list_create(&(immu->immu_dvma_lpc_list), sizeof (immu_devi_t),
852	    offsetof(immu_devi_t, imd_spc_node));
853	list_create(&(immu->immu_dvma_gfx_list), sizeof (immu_devi_t),
854	    offsetof(immu_devi_t, imd_spc_node));
855
856	/* interrupt remapping related */
857	mutex_init(&(immu->immu_intrmap_lock), NULL, MUTEX_DEFAULT, NULL);
858
859	/* qinv related */
860	mutex_init(&(immu->immu_qinv_lock), NULL, MUTEX_DEFAULT, NULL);
861
862	/*
863	 * insert this immu unit into the system-wide list
864	 */
865	list_insert_tail(&immu_list, immu);
866
867	pcachename = immu_alloc_name("immu_pgtable_cache", instance);
868	if (pcachename == NULL)
869		return (NULL);
870
871	hcachename = immu_alloc_name("immu_hdl_cache", instance);
872	if (hcachename == NULL)
873		return (NULL);
874
875	immu->immu_pgtable_cache = kmem_cache_create(pcachename,
876	    sizeof (pgtable_t), 0, pgtable_ctor, pgtable_dtor, NULL, immu,
877	    NULL, 0);
878	immu->immu_hdl_cache = kmem_cache_create(hcachename,
879	    sizeof (immu_hdl_priv_t), 64, immu_hdl_priv_ctor,
880	    NULL, NULL, immu, NULL, 0);
881
882	mutex_exit(&(immu->immu_lock));
883
884	ddi_err(DER_LOG, immu->immu_dip, "unit setup");
885
886	immu_dmar_set_immu(dmar_unit, immu);
887
888	return (dmar_unit);
889}
890
891static void
892immu_subsystems_setup(void)
893{
894	int seg;
895	void *unit_hdl;
896
897	ddi_err(DER_VERB, NULL,
898	    "Creating state structures for Intel IOMMU units");
899
900	mutex_init(&immu_lock, NULL, MUTEX_DEFAULT, NULL);
901	list_create(&immu_list, sizeof (immu_t), offsetof(immu_t, immu_node));
902
903	mutex_enter(&immu_lock);
904
905	unit_hdl = NULL;
906	for (seg = 0; seg < IMMU_MAXSEG; seg++) {
907		while (unit_hdl = immu_state_alloc(seg, unit_hdl)) {
908			;
909		}
910	}
911
912	immu_regs_setup(&immu_list);	/* subsequent code needs this first */
913	immu_dvma_setup(&immu_list);
914	if (immu_qinv_setup(&immu_list) == DDI_SUCCESS)
915		immu_intrmap_setup(&immu_list);
916	else
917		immu_intrmap_enable = B_FALSE;
918
919	mutex_exit(&immu_lock);
920}
921
922/*
923 * immu_subsystems_startup()
924 * 	startup all units that were setup
925 */
926static void
927immu_subsystems_startup(void)
928{
929	immu_t *immu;
930	iommulib_ops_t *iommulib_ops;
931
932	mutex_enter(&immu_lock);
933
934	immu_dmar_startup();
935
936	immu = list_head(&immu_list);
937	for (; immu; immu = list_next(&immu_list, immu)) {
938
939		mutex_enter(&(immu->immu_lock));
940
941		immu_intr_register(immu);
942		immu_dvma_startup(immu);
943		immu_intrmap_startup(immu);
944		immu_qinv_startup(immu);
945
946		/*
947		 * Set IOMMU unit's regs to do
948		 * the actual startup. This will
949		 * set immu->immu_running  field
950		 * if the unit is successfully
951		 * started
952		 */
953		immu_regs_startup(immu);
954
955		mutex_exit(&(immu->immu_lock));
956
957		iommulib_ops = kmem_alloc(sizeof (iommulib_ops_t), KM_SLEEP);
958		*iommulib_ops = immulib_ops;
959		iommulib_ops->ilops_data = (void *)immu;
960		(void) iommulib_iommu_register(immu->immu_dip, iommulib_ops,
961		    &immu->immu_iommulib_handle);
962	}
963
964	mutex_exit(&immu_lock);
965}
966
967/* ##################  Intel IOMMU internal interfaces ###################### */
968
969/*
970 * Internal interfaces for IOMMU code (i.e. not exported to rootnex
971 * or rest of system)
972 */
973
974/*
975 * ddip can be NULL, in which case we walk up until we find the root dip
976 * NOTE: We never visit the root dip since its not a hardware node
977 */
978int
979immu_walk_ancestor(
980	dev_info_t *rdip,
981	dev_info_t *ddip,
982	int (*func)(dev_info_t *, void *arg),
983	void *arg,
984	int *lvlp,
985	immu_flags_t immu_flags)
986{
987	dev_info_t *pdip;
988	int level;
989	int error = DDI_SUCCESS;
990
991	/* ddip and immu can be NULL */
992
993	/* Hold rdip so that branch is not detached */
994	ndi_hold_devi(rdip);
995	for (pdip = rdip, level = 1; pdip && pdip != root_devinfo;
996	    pdip = ddi_get_parent(pdip), level++) {
997
998		if (immu_devi_set(pdip, immu_flags) != DDI_SUCCESS) {
999			error = DDI_FAILURE;
1000			break;
1001		}
1002		if (func(pdip, arg) == DDI_WALK_TERMINATE) {
1003			break;
1004		}
1005		if (immu_flags & IMMU_FLAGS_DONTPASS) {
1006			break;
1007		}
1008		if (pdip == ddip) {
1009			break;
1010		}
1011	}
1012
1013	ndi_rele_devi(rdip);
1014
1015	if (lvlp)
1016		*lvlp = level;
1017
1018	return (error);
1019}
1020
1021/* ########################  Intel IOMMU entry points ####################### */
1022/*
1023 * immu_init()
1024 *	called from rootnex_attach(). setup but don't startup the Intel IOMMU
1025 *      This is the first function called in Intel IOMMU code
1026 */
1027void
1028immu_init(void)
1029{
1030	char *phony_reg = "A thing of beauty is a joy forever";
1031
1032	/* Set some global shorthands that are needed by all of IOMMU code */
1033	root_devinfo = ddi_root_node();
1034
1035	/*
1036	 * Intel IOMMU only supported only if MMU(CPU) page size is ==
1037	 * IOMMU pages size.
1038	 */
1039	/*LINTED*/
1040	if (MMU_PAGESIZE != IMMU_PAGESIZE) {
1041		ddi_err(DER_WARN, NULL,
1042		    "MMU page size (%d) is not equal to\n"
1043		    "IOMMU page size (%d). "
1044		    "Disabling Intel IOMMU. ",
1045		    MMU_PAGESIZE, IMMU_PAGESIZE);
1046		immu_enable = B_FALSE;
1047		return;
1048	}
1049
1050	/*
1051	 * Read rootnex.conf options. Do this before
1052	 * boot options so boot options can override .conf options.
1053	 */
1054	read_conf_options();
1055
1056	/*
1057	 * retrieve the Intel IOMMU boot options.
1058	 * Do this before parsing immu ACPI table
1059	 * as a boot option could potentially affect
1060	 * ACPI parsing.
1061	 */
1062	ddi_err(DER_CONT, NULL, "?Reading Intel IOMMU boot options\n");
1063	read_boot_options();
1064
1065	/*
1066	 * Check the IOMMU enable boot-option first.
1067	 * This is so that we can skip parsing the ACPI table
1068	 * if necessary because that may cause problems in
1069	 * systems with buggy BIOS or ACPI tables
1070	 */
1071	if (immu_enable == B_FALSE) {
1072		return;
1073	}
1074
1075	if (immu_intrmap_enable == B_TRUE)
1076		immu_qinv_enable = B_TRUE;
1077
1078	/*
1079	 * Next, check if the system even has an Intel IOMMU
1080	 * We use the presence or absence of the IOMMU ACPI
1081	 * table to detect Intel IOMMU.
1082	 */
1083	if (immu_dmar_setup() != DDI_SUCCESS) {
1084		immu_enable = B_FALSE;
1085		return;
1086	}
1087
1088	mapping_list_setup();
1089
1090	/*
1091	 * Check blacklists
1092	 */
1093	blacklist_setup();
1094
1095	if (blacklisted_smbios() == B_TRUE) {
1096		blacklist_destroy();
1097		immu_enable = B_FALSE;
1098		return;
1099	}
1100
1101	if (blacklisted_driver() == B_TRUE) {
1102		blacklist_destroy();
1103		immu_enable = B_FALSE;
1104		return;
1105	}
1106
1107	/*
1108	 * Read the "raw" DMAR ACPI table to get information
1109	 * and convert into a form we can use.
1110	 */
1111	if (immu_dmar_parse() != DDI_SUCCESS) {
1112		blacklist_destroy();
1113		immu_enable = B_FALSE;
1114		return;
1115	}
1116
1117	/*
1118	 * now that we have processed the ACPI table
1119	 * check if we need to blacklist this system
1120	 * based on ACPI info
1121	 */
1122	if (blacklisted_acpi() == B_TRUE) {
1123		immu_dmar_destroy();
1124		blacklist_destroy();
1125		immu_enable = B_FALSE;
1126		return;
1127	}
1128
1129	blacklist_destroy();
1130
1131	/*
1132	 * Check if system has HW quirks.
1133	 */
1134	pre_setup_quirks();
1135
1136	/* Now do the rest of the setup */
1137	immu_subsystems_setup();
1138
1139	/*
1140	 * Now that the IMMU is setup, create a phony
1141	 * reg prop so that suspend/resume works
1142	 */
1143	if (ddi_prop_update_byte_array(DDI_DEV_T_NONE, root_devinfo, "reg",
1144	    (uchar_t *)phony_reg, strlen(phony_reg) + 1) != DDI_PROP_SUCCESS) {
1145		ddi_err(DER_PANIC, NULL, "Failed to create reg prop for "
1146		    "rootnex node");
1147		/*NOTREACHED*/
1148	}
1149
1150	immu_setup = B_TRUE;
1151}
1152
1153/*
1154 * immu_startup()
1155 * 	called directly by boot code to startup
1156 *      all units of the IOMMU
1157 */
1158void
1159immu_startup(void)
1160{
1161	/*
1162	 * If IOMMU is disabled, do nothing
1163	 */
1164	if (immu_enable == B_FALSE) {
1165		return;
1166	}
1167
1168	if (immu_setup == B_FALSE) {
1169		ddi_err(DER_WARN, NULL, "Intel IOMMU not setup, "
1170		    "skipping IOMMU startup");
1171		return;
1172	}
1173
1174	pre_startup_quirks();
1175
1176	ddi_err(DER_CONT, NULL,
1177	    "?Starting Intel IOMMU (dmar) units...\n");
1178
1179	immu_subsystems_startup();
1180
1181	immu_running = B_TRUE;
1182}
1183
1184/*
1185 * Hook to notify IOMMU code of device tree changes
1186 */
1187void
1188immu_device_tree_changed(void)
1189{
1190	if (immu_setup == B_FALSE) {
1191		return;
1192	}
1193
1194	ddi_err(DER_WARN, NULL, "Intel IOMMU currently "
1195	    "does not use device tree updates");
1196}
1197
1198/*
1199 * Hook to notify IOMMU code of memory changes
1200 */
1201void
1202immu_physmem_update(uint64_t addr, uint64_t size)
1203{
1204	if (immu_setup == B_FALSE) {
1205		return;
1206	}
1207	immu_dvma_physmem_update(addr, size);
1208}
1209
1210/*
1211 * immu_quiesce()
1212 * 	quiesce all units that are running
1213 */
1214int
1215immu_quiesce(void)
1216{
1217	immu_t *immu;
1218	int ret = DDI_SUCCESS;
1219
1220	mutex_enter(&immu_lock);
1221
1222	if (immu_running == B_FALSE) {
1223		mutex_exit(&immu_lock);
1224		return (DDI_SUCCESS);
1225	}
1226
1227	immu = list_head(&immu_list);
1228	for (; immu; immu = list_next(&immu_list, immu)) {
1229
1230		/* if immu is not running, we dont quiesce */
1231		if (immu->immu_regs_running == B_FALSE)
1232			continue;
1233
1234		/* flush caches */
1235		rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1236		immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1237		immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1238		rw_exit(&(immu->immu_ctx_rwlock));
1239		immu_regs_wbf_flush(immu);
1240
1241		mutex_enter(&(immu->immu_lock));
1242
1243		/*
1244		 * Set IOMMU unit's regs to do
1245		 * the actual shutdown.
1246		 */
1247		immu_regs_shutdown(immu);
1248		immu_regs_suspend(immu);
1249
1250		/* if immu is still running, we failed */
1251		if (immu->immu_regs_running == B_TRUE)
1252			ret = DDI_FAILURE;
1253		else
1254			immu->immu_regs_quiesced = B_TRUE;
1255
1256		mutex_exit(&(immu->immu_lock));
1257	}
1258
1259	if (ret == DDI_SUCCESS) {
1260		immu_running = B_FALSE;
1261		immu_quiesced = B_TRUE;
1262	}
1263	mutex_exit(&immu_lock);
1264
1265	return (ret);
1266}
1267
1268/*
1269 * immu_unquiesce()
1270 * 	unquiesce all units
1271 */
1272int
1273immu_unquiesce(void)
1274{
1275	immu_t *immu;
1276	int ret = DDI_SUCCESS;
1277
1278	mutex_enter(&immu_lock);
1279
1280	if (immu_quiesced == B_FALSE) {
1281		mutex_exit(&immu_lock);
1282		return (DDI_SUCCESS);
1283	}
1284
1285	immu = list_head(&immu_list);
1286	for (; immu; immu = list_next(&immu_list, immu)) {
1287
1288		mutex_enter(&(immu->immu_lock));
1289
1290		/* if immu was not quiesced, i.e was not running before */
1291		if (immu->immu_regs_quiesced == B_FALSE) {
1292			mutex_exit(&(immu->immu_lock));
1293			continue;
1294		}
1295
1296		if (immu_regs_resume(immu) != DDI_SUCCESS) {
1297			ret = DDI_FAILURE;
1298			mutex_exit(&(immu->immu_lock));
1299			continue;
1300		}
1301
1302		/* flush caches before unquiesce */
1303		rw_enter(&(immu->immu_ctx_rwlock), RW_WRITER);
1304		immu_flush_context_gbl(immu, &immu->immu_ctx_inv_wait);
1305		immu_flush_iotlb_gbl(immu, &immu->immu_ctx_inv_wait);
1306		rw_exit(&(immu->immu_ctx_rwlock));
1307
1308		/*
1309		 * Set IOMMU unit's regs to do
1310		 * the actual startup. This will
1311		 * set immu->immu_regs_running  field
1312		 * if the unit is successfully
1313		 * started
1314		 */
1315		immu_regs_startup(immu);
1316
1317		if (immu->immu_regs_running == B_FALSE) {
1318			ret = DDI_FAILURE;
1319		} else {
1320			immu_quiesced = B_TRUE;
1321			immu_running = B_TRUE;
1322			immu->immu_regs_quiesced = B_FALSE;
1323		}
1324
1325		mutex_exit(&(immu->immu_lock));
1326	}
1327
1328	mutex_exit(&immu_lock);
1329
1330	return (ret);
1331}
1332
1333void
1334immu_init_inv_wait(immu_inv_wait_t *iwp, const char *name, boolean_t sync)
1335{
1336	caddr_t vaddr;
1337	uint64_t paddr;
1338
1339	iwp->iwp_sync = sync;
1340
1341	vaddr = (caddr_t)&iwp->iwp_vstatus;
1342	paddr = pfn_to_pa(hat_getpfnum(kas.a_hat, vaddr));
1343	paddr += ((uintptr_t)vaddr) & MMU_PAGEOFFSET;
1344
1345	iwp->iwp_pstatus = paddr;
1346	iwp->iwp_name = name;
1347}
1348
1349/* ##############  END Intel IOMMU entry points ################## */
1350