xref: /illumos-gate/usr/src/uts/i86pc/os/ddi_impl.c (revision 86ef0a63)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2012 Garrett D'Amore <garrett@damore.org>
25  * Copyright 2014 Pluribus Networks, Inc.
26  * Copyright 2016 Nexenta Systems, Inc.
27  * Copyright 2018 Joyent, Inc.
28  */
29 
30 /*
31  * PC specific DDI implementation
32  */
33 #include <sys/types.h>
34 #include <sys/autoconf.h>
35 #include <sys/avintr.h>
36 #include <sys/bootconf.h>
37 #include <sys/conf.h>
38 #include <sys/cpuvar.h>
39 #include <sys/ddi_impldefs.h>
40 #include <sys/ddi_subrdefs.h>
41 #include <sys/ethernet.h>
42 #include <sys/fp.h>
43 #include <sys/instance.h>
44 #include <sys/kmem.h>
45 #include <sys/machsystm.h>
46 #include <sys/modctl.h>
47 #include <sys/promif.h>
48 #include <sys/prom_plat.h>
49 #include <sys/sunndi.h>
50 #include <sys/ndi_impldefs.h>
51 #include <sys/ddi_impldefs.h>
52 #include <sys/sysmacros.h>
53 #include <sys/systeminfo.h>
54 #include <sys/utsname.h>
55 #include <sys/atomic.h>
56 #include <sys/spl.h>
57 #include <sys/archsystm.h>
58 #include <vm/seg_kmem.h>
59 #include <sys/ontrap.h>
60 #include <sys/fm/protocol.h>
61 #include <sys/ramdisk.h>
62 #include <sys/sunndi.h>
63 #include <sys/vmem.h>
64 #include <sys/pci_impl.h>
65 #if defined(__xpv)
66 #include <sys/hypervisor.h>
67 #endif
68 #include <sys/mach_intr.h>
69 #include <vm/hat_i86.h>
70 #include <sys/x86_archext.h>
71 #include <sys/avl.h>
72 #include <sys/font.h>
73 
74 /*
75  * DDI Boot Configuration
76  */
77 
78 /*
79  * Platform drivers on this platform
80  */
81 char *platform_module_list[] = {
82 	"acpippm",
83 	"ppm",
84 	(char *)0
85 };
86 
87 /* pci bus resource maps */
88 struct pci_bus_resource *pci_bus_res;
89 
90 size_t dma_max_copybuf_size = 0x101000;		/* 1M + 4K */
91 
92 uint64_t ramdisk_start, ramdisk_end;
93 
94 int pseudo_isa = 0;
95 
96 /*
97  * Forward declarations
98  */
99 static int getlongprop_buf();
100 static void get_boot_properties(void);
101 static void impl_bus_initialprobe(void);
102 static void impl_bus_reprobe(void);
103 
104 static int poke_mem(peekpoke_ctlops_t *in_args);
105 static int peek_mem(peekpoke_ctlops_t *in_args);
106 
107 static int kmem_override_cache_attrs(caddr_t, size_t, uint_t);
108 
109 #if !defined(__xpv)
110 extern void immu_init(void);
111 #endif
112 
113 /*
114  * We use an AVL tree to store contiguous address allocations made with the
115  * kalloca() routine, so that we can return the size to free with kfreea().
116  * Note that in the future it would be vastly faster if we could eliminate
117  * this lookup by insisting that all callers keep track of their own sizes,
118  * just as for kmem_alloc().
119  */
120 struct ctgas {
121 	avl_node_t ctg_link;
122 	void *ctg_addr;
123 	size_t ctg_size;
124 };
125 
126 static avl_tree_t ctgtree;
127 
128 static kmutex_t		ctgmutex;
129 #define	CTGLOCK()	mutex_enter(&ctgmutex)
130 #define	CTGUNLOCK()	mutex_exit(&ctgmutex)
131 
132 /*
133  * Minimum pfn value of page_t's put on the free list.  This is to simplify
134  * support of ddi dma memory requests which specify small, non-zero addr_lo
135  * values.
136  *
137  * The default value of 2, which corresponds to the only known non-zero addr_lo
138  * value used, means a single page will be sacrificed (pfn typically starts
139  * at 1).  ddiphysmin can be set to 0 to disable. It cannot be set above 0x100
140  * otherwise mp startup panics.
141  */
142 pfn_t	ddiphysmin = 2;
143 
144 static void
145 check_driver_disable(void)
146 {
147 	int proplen = 128;
148 	char *prop_name;
149 	char *drv_name, *propval;
150 	major_t major;
151 
152 	prop_name = kmem_alloc(proplen, KM_SLEEP);
153 	for (major = 0; major < devcnt; major++) {
154 		drv_name = ddi_major_to_name(major);
155 		if (drv_name == NULL)
156 			continue;
157 		(void) snprintf(prop_name, proplen, "disable-%s", drv_name);
158 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
159 		    DDI_PROP_DONTPASS, prop_name, &propval) == DDI_SUCCESS) {
160 			if (strcmp(propval, "true") == 0) {
161 				devnamesp[major].dn_flags |= DN_DRIVER_REMOVED;
162 				cmn_err(CE_NOTE, "driver %s disabled",
163 				    drv_name);
164 			}
165 			ddi_prop_free(propval);
166 		}
167 	}
168 	kmem_free(prop_name, proplen);
169 }
170 
171 
172 /*
173  * Configure the hardware on the system.
174  * Called before the rootfs is mounted
175  */
176 void
177 configure(void)
178 {
179 	extern void i_ddi_init_root();
180 
181 	extern int fpu_ignored;
182 
183 	/*
184 	 * Determine if an FPU is attached
185 	 */
186 
187 	fpu_probe();
188 
189 
190 	if (fpu_ignored) {
191 		printf("FP hardware will not be used\n");
192 	} else if (!fpu_exists) {
193 		printf("No FPU in configuration\n");
194 	}
195 
196 	/*
197 	 * Initialize devices on the machine.
198 	 * Uses configuration tree built by the PROMs to determine what
199 	 * is present, and builds a tree of prototype dev_info nodes
200 	 * corresponding to the hardware which identified itself.
201 	 */
202 
203 	/*
204 	 * Initialize root node.
205 	 */
206 	i_ddi_init_root();
207 
208 	/* reprogram devices not set up by firmware (BIOS) */
209 	impl_bus_reprobe();
210 
211 #if !defined(__xpv)
212 	/*
213 	 * Setup but don't startup the IOMMU
214 	 * Startup happens later via a direct call
215 	 * to IOMMU code by boot code.
216 	 * At this point, all PCI bus renumbering
217 	 * is done, so safe to init the IMMU
218 	 * AKA Intel IOMMU.
219 	 */
220 	immu_init();
221 #endif
222 
223 	/*
224 	 * attach the isa nexus to get ACPI resource usage
225 	 * isa is "kind of" a pseudo node
226 	 */
227 #if defined(__xpv)
228 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
229 		if (pseudo_isa)
230 			(void) i_ddi_attach_pseudo_node("isa");
231 		else
232 			(void) i_ddi_attach_hw_nodes("isa");
233 	}
234 #else
235 	if (pseudo_isa)
236 		(void) i_ddi_attach_pseudo_node("isa");
237 	else
238 		(void) i_ddi_attach_hw_nodes("isa");
239 #endif
240 }
241 
242 /*
243  * The "status" property indicates the operational status of a device.
244  * If this property is present, the value is a string indicating the
245  * status of the device as follows:
246  *
247  *	"okay"		operational.
248  *	"disabled"	not operational, but might become operational.
249  *	"fail"		not operational because a fault has been detected,
250  *			and it is unlikely that the device will become
251  *			operational without repair. no additional details
252  *			are available.
253  *	"fail-xxx"	not operational because a fault has been detected,
254  *			and it is unlikely that the device will become
255  *			operational without repair. "xxx" is additional
256  *			human-readable information about the particular
257  *			fault condition that was detected.
258  *
259  * The absence of this property means that the operational status is
260  * unknown or okay.
261  *
262  * This routine checks the status property of the specified device node
263  * and returns 0 if the operational status indicates failure, and 1 otherwise.
264  *
265  * The property may exist on plug-in cards the existed before IEEE 1275-1994.
266  * And, in that case, the property may not even be a string. So we carefully
267  * check for the value "fail", in the beginning of the string, noting
268  * the property length.
269  */
270 int
271 status_okay(int id, char *buf, int buflen)
272 {
273 	char status_buf[OBP_MAXPROPNAME];
274 	char *bufp = buf;
275 	int len = buflen;
276 	int proplen;
277 	static const char *status = "status";
278 	static const char *fail = "fail";
279 	int fail_len = (int)strlen(fail);
280 
281 	/*
282 	 * Get the proplen ... if it's smaller than "fail",
283 	 * or doesn't exist ... then we don't care, since
284 	 * the value can't begin with the char string "fail".
285 	 *
286 	 * NB: proplen, if it's a string, includes the NULL in the
287 	 * the size of the property, and fail_len does not.
288 	 */
289 	proplen = prom_getproplen((pnode_t)id, (caddr_t)status);
290 	if (proplen <= fail_len)	/* nonexistant or uninteresting len */
291 		return (1);
292 
293 	/*
294 	 * if a buffer was provided, use it
295 	 */
296 	if ((buf == (char *)NULL) || (buflen <= 0)) {
297 		bufp = status_buf;
298 		len = sizeof (status_buf);
299 	}
300 	*bufp = (char)0;
301 
302 	/*
303 	 * Get the property into the buffer, to the extent of the buffer,
304 	 * and in case the buffer is smaller than the property size,
305 	 * NULL terminate the buffer. (This handles the case where
306 	 * a buffer was passed in and the caller wants to print the
307 	 * value, but the buffer was too small).
308 	 */
309 	(void) prom_bounded_getprop((pnode_t)id, (caddr_t)status,
310 	    (caddr_t)bufp, len);
311 	*(bufp + len - 1) = (char)0;
312 
313 	/*
314 	 * If the value begins with the char string "fail",
315 	 * then it means the node is failed. We don't care
316 	 * about any other values. We assume the node is ok
317 	 * although it might be 'disabled'.
318 	 */
319 	if (strncmp(bufp, fail, fail_len) == 0)
320 		return (0);
321 
322 	return (1);
323 }
324 
325 /*
326  * Check the status of the device node passed as an argument.
327  *
328  *	if ((status is OKAY) || (status is DISABLED))
329  *		return DDI_SUCCESS
330  *	else
331  *		print a warning and return DDI_FAILURE
332  */
333 /*ARGSUSED1*/
334 int
335 check_status(int id, char *name, dev_info_t *parent)
336 {
337 	char status_buf[64];
338 	char devtype_buf[OBP_MAXPROPNAME];
339 	int retval = DDI_FAILURE;
340 
341 	/*
342 	 * is the status okay?
343 	 */
344 	if (status_okay(id, status_buf, sizeof (status_buf)))
345 		return (DDI_SUCCESS);
346 
347 	/*
348 	 * a status property indicating bad memory will be associated
349 	 * with a node which has a "device_type" property with a value of
350 	 * "memory-controller". in this situation, return DDI_SUCCESS
351 	 */
352 	if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
353 	    sizeof (devtype_buf)) > 0) {
354 		if (strcmp(devtype_buf, "memory-controller") == 0)
355 			retval = DDI_SUCCESS;
356 	}
357 
358 	/*
359 	 * print the status property information
360 	 */
361 	cmn_err(CE_WARN, "status '%s' for '%s'", status_buf, name);
362 	return (retval);
363 }
364 
365 /*ARGSUSED*/
366 uint_t
367 softlevel1(caddr_t arg1, caddr_t arg2)
368 {
369 	softint();
370 	return (1);
371 }
372 
373 /*
374  * Allow for implementation specific correction of PROM property values.
375  */
376 
377 /*ARGSUSED*/
378 void
379 impl_fix_props(dev_info_t *dip, dev_info_t *ch_dip, char *name, int len,
380     caddr_t buffer)
381 {
382 	/*
383 	 * There are no adjustments needed in this implementation.
384 	 */
385 }
386 
387 static int
388 getlongprop_buf(int id, char *name, char *buf, int maxlen)
389 {
390 	int size;
391 
392 	size = prom_getproplen((pnode_t)id, name);
393 	if (size <= 0 || (size > maxlen - 1))
394 		return (-1);
395 
396 	if (-1 == prom_getprop((pnode_t)id, name, buf))
397 		return (-1);
398 
399 	if (strcmp("name", name) == 0) {
400 		if (buf[size - 1] != '\0') {
401 			buf[size] = '\0';
402 			size += 1;
403 		}
404 	}
405 
406 	return (size);
407 }
408 
409 static int
410 get_prop_int_array(dev_info_t *di, char *pname, int **pval, uint_t *plen)
411 {
412 	int ret;
413 
414 	if ((ret = ddi_prop_lookup_int_array(DDI_DEV_T_ANY, di,
415 	    DDI_PROP_DONTPASS, pname, pval, plen))
416 	    == DDI_PROP_SUCCESS) {
417 		*plen = (*plen) * (sizeof (int));
418 	}
419 	return (ret);
420 }
421 
422 
423 /*
424  * Node Configuration
425  */
426 
427 struct prop_ispec {
428 	uint_t	pri, vec;
429 };
430 
431 /*
432  * For the x86, we're prepared to claim that the interrupt string
433  * is in the form of a list of <ipl,vec> specifications.
434  */
435 
436 #define	VEC_MIN	1
437 #define	VEC_MAX	255
438 
439 static int
440 impl_xlate_intrs(dev_info_t *child, int *in,
441     struct ddi_parent_private_data *pdptr)
442 {
443 	size_t size;
444 	int n;
445 	struct intrspec *new;
446 	caddr_t got_prop;
447 	int *inpri;
448 	int got_len;
449 	extern int ignore_hardware_nodes;	/* force flag from ddi_impl.c */
450 
451 	static char bad_intr_fmt[] =
452 	    "bad interrupt spec from %s%d - ipl %d, irq %d\n";
453 
454 	/*
455 	 * determine if the driver is expecting the new style "interrupts"
456 	 * property which just contains the IRQ, or the old style which
457 	 * contains pairs of <IPL,IRQ>.  if it is the new style, we always
458 	 * assign IPL 5 unless an "interrupt-priorities" property exists.
459 	 * in that case, the "interrupt-priorities" property contains the
460 	 * IPL values that match, one for one, the IRQ values in the
461 	 * "interrupts" property.
462 	 */
463 	inpri = NULL;
464 	if ((ddi_getprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
465 	    "ignore-hardware-nodes", -1) != -1) || ignore_hardware_nodes) {
466 		/* the old style "interrupts" property... */
467 
468 		/*
469 		 * The list consists of <ipl,vec> elements
470 		 */
471 		if ((n = (*in++ >> 1)) < 1)
472 			return (DDI_FAILURE);
473 
474 		pdptr->par_nintr = n;
475 		size = n * sizeof (struct intrspec);
476 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
477 
478 		while (n--) {
479 			int level = *in++;
480 			int vec = *in++;
481 
482 			if (level < 1 || level > MAXIPL ||
483 			    vec < VEC_MIN || vec > VEC_MAX) {
484 				cmn_err(CE_CONT, bad_intr_fmt,
485 				    DEVI(child)->devi_name,
486 				    DEVI(child)->devi_instance, level, vec);
487 				goto broken;
488 			}
489 			new->intrspec_pri = level;
490 			if (vec != 2)
491 				new->intrspec_vec = vec;
492 			else
493 				/*
494 				 * irq 2 on the PC bus is tied to irq 9
495 				 * on ISA, EISA and MicroChannel
496 				 */
497 				new->intrspec_vec = 9;
498 			new++;
499 		}
500 
501 		return (DDI_SUCCESS);
502 	} else {
503 		/* the new style "interrupts" property... */
504 
505 		/*
506 		 * The list consists of <vec> elements
507 		 */
508 		if ((n = (*in++)) < 1)
509 			return (DDI_FAILURE);
510 
511 		pdptr->par_nintr = n;
512 		size = n * sizeof (struct intrspec);
513 		new = pdptr->par_intr = kmem_zalloc(size, KM_SLEEP);
514 
515 		/* XXX check for "interrupt-priorities" property... */
516 		if (ddi_getlongprop(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
517 		    "interrupt-priorities", (caddr_t)&got_prop, &got_len)
518 		    == DDI_PROP_SUCCESS) {
519 			if (n != (got_len / sizeof (int))) {
520 				cmn_err(CE_CONT,
521 				    "bad interrupt-priorities length"
522 				    " from %s%d: expected %d, got %d\n",
523 				    DEVI(child)->devi_name,
524 				    DEVI(child)->devi_instance, n,
525 				    (int)(got_len / sizeof (int)));
526 				goto broken;
527 			}
528 			inpri = (int *)got_prop;
529 		}
530 
531 		while (n--) {
532 			int level;
533 			int vec = *in++;
534 
535 			if (inpri == NULL)
536 				level = 5;
537 			else
538 				level = *inpri++;
539 
540 			if (level < 1 || level > MAXIPL ||
541 			    vec < VEC_MIN || vec > VEC_MAX) {
542 				cmn_err(CE_CONT, bad_intr_fmt,
543 				    DEVI(child)->devi_name,
544 				    DEVI(child)->devi_instance, level, vec);
545 				goto broken;
546 			}
547 			new->intrspec_pri = level;
548 			if (vec != 2)
549 				new->intrspec_vec = vec;
550 			else
551 				/*
552 				 * irq 2 on the PC bus is tied to irq 9
553 				 * on ISA, EISA and MicroChannel
554 				 */
555 				new->intrspec_vec = 9;
556 			new++;
557 		}
558 
559 		if (inpri != NULL)
560 			kmem_free(got_prop, got_len);
561 		return (DDI_SUCCESS);
562 	}
563 
564 broken:
565 	kmem_free(pdptr->par_intr, size);
566 	pdptr->par_intr = NULL;
567 	pdptr->par_nintr = 0;
568 	if (inpri != NULL)
569 		kmem_free(got_prop, got_len);
570 
571 	return (DDI_FAILURE);
572 }
573 
574 /*
575  * Create a ddi_parent_private_data structure from the ddi properties of
576  * the dev_info node.
577  *
578  * The "reg" and either an "intr" or "interrupts" properties are required
579  * if the driver wishes to create mappings or field interrupts on behalf
580  * of the device.
581  *
582  * The "reg" property is assumed to be a list of at least one triple
583  *
584  *	<bustype, address, size>*1
585  *
586  * The "intr" property is assumed to be a list of at least one duple
587  *
588  *	<SPARC ipl, vector#>*1
589  *
590  * The "interrupts" property is assumed to be a list of at least one
591  * n-tuples that describes the interrupt capabilities of the bus the device
592  * is connected to.  For SBus, this looks like
593  *
594  *	<SBus-level>*1
595  *
596  * (This property obsoletes the 'intr' property).
597  *
598  * The "ranges" property is optional.
599  */
600 void
601 make_ddi_ppd(dev_info_t *child, struct ddi_parent_private_data **ppd)
602 {
603 	struct ddi_parent_private_data *pdptr;
604 	int n;
605 	int *reg_prop, *rng_prop, *intr_prop, *irupts_prop;
606 	uint_t reg_len, rng_len, intr_len, irupts_len;
607 
608 	*ppd = pdptr = kmem_zalloc(sizeof (*pdptr), KM_SLEEP);
609 
610 	/*
611 	 * Handle the 'reg' property.
612 	 */
613 	if ((get_prop_int_array(child, "reg", &reg_prop, &reg_len) ==
614 	    DDI_PROP_SUCCESS) && (reg_len != 0)) {
615 		pdptr->par_nreg = reg_len / (int)sizeof (struct regspec);
616 		pdptr->par_reg = (struct regspec *)reg_prop;
617 	}
618 
619 	/*
620 	 * See if I have a range (adding one where needed - this
621 	 * means to add one for sbus node in sun4c, when romvec > 0,
622 	 * if no range is already defined in the PROM node.
623 	 * (Currently no sun4c PROMS define range properties,
624 	 * but they should and may in the future.)  For the SBus
625 	 * node, the range is defined by the SBus reg property.
626 	 */
627 	if (get_prop_int_array(child, "ranges", &rng_prop, &rng_len)
628 	    == DDI_PROP_SUCCESS) {
629 		pdptr->par_nrng = rng_len / (int)(sizeof (struct rangespec));
630 		pdptr->par_rng = (struct rangespec *)rng_prop;
631 	}
632 
633 	/*
634 	 * Handle the 'intr' and 'interrupts' properties
635 	 */
636 
637 	/*
638 	 * For backwards compatibility
639 	 * we first look for the 'intr' property for the device.
640 	 */
641 	if (get_prop_int_array(child, "intr", &intr_prop, &intr_len)
642 	    != DDI_PROP_SUCCESS) {
643 		intr_len = 0;
644 	}
645 
646 	/*
647 	 * If we're to support bus adapters and future platforms cleanly,
648 	 * we need to support the generalized 'interrupts' property.
649 	 */
650 	if (get_prop_int_array(child, "interrupts", &irupts_prop,
651 	    &irupts_len) != DDI_PROP_SUCCESS) {
652 		irupts_len = 0;
653 	} else if (intr_len != 0) {
654 		/*
655 		 * If both 'intr' and 'interrupts' are defined,
656 		 * then 'interrupts' wins and we toss the 'intr' away.
657 		 */
658 		ddi_prop_free((void *)intr_prop);
659 		intr_len = 0;
660 	}
661 
662 	if (intr_len != 0) {
663 
664 		/*
665 		 * Translate the 'intr' property into an array
666 		 * an array of struct intrspec's.  There's not really
667 		 * very much to do here except copy what's out there.
668 		 */
669 
670 		struct intrspec *new;
671 		struct prop_ispec *l;
672 
673 		n = pdptr->par_nintr = intr_len / sizeof (struct prop_ispec);
674 		l = (struct prop_ispec *)intr_prop;
675 		pdptr->par_intr =
676 		    new = kmem_zalloc(n * sizeof (struct intrspec), KM_SLEEP);
677 		while (n--) {
678 			new->intrspec_pri = l->pri;
679 			new->intrspec_vec = l->vec;
680 			new++;
681 			l++;
682 		}
683 		ddi_prop_free((void *)intr_prop);
684 
685 	} else if ((n = irupts_len) != 0) {
686 		size_t size;
687 		int *out;
688 
689 		/*
690 		 * Translate the 'interrupts' property into an array
691 		 * of intrspecs for the rest of the DDI framework to
692 		 * toy with.  Only our ancestors really know how to
693 		 * do this, so ask 'em.  We massage the 'interrupts'
694 		 * property so that it is pre-pended by a count of
695 		 * the number of integers in the argument.
696 		 */
697 		size = sizeof (int) + n;
698 		out = kmem_alloc(size, KM_SLEEP);
699 		*out = n / sizeof (int);
700 		bcopy(irupts_prop, out + 1, (size_t)n);
701 		ddi_prop_free((void *)irupts_prop);
702 		if (impl_xlate_intrs(child, out, pdptr) != DDI_SUCCESS) {
703 			cmn_err(CE_CONT,
704 			    "Unable to translate 'interrupts' for %s%d\n",
705 			    DEVI(child)->devi_binding_name,
706 			    DEVI(child)->devi_instance);
707 		}
708 		kmem_free(out, size);
709 	}
710 }
711 
712 /*
713  * Name a child
714  */
715 static int
716 impl_sunbus_name_child(dev_info_t *child, char *name, int namelen)
717 {
718 	/*
719 	 * Fill in parent-private data and this function returns to us
720 	 * an indication if it used "registers" to fill in the data.
721 	 */
722 	if (ddi_get_parent_data(child) == NULL) {
723 		struct ddi_parent_private_data *pdptr;
724 		make_ddi_ppd(child, &pdptr);
725 		ddi_set_parent_data(child, pdptr);
726 	}
727 
728 	name[0] = '\0';
729 	if (sparc_pd_getnreg(child) > 0) {
730 		(void) snprintf(name, namelen, "%x,%x",
731 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_bustype,
732 		    (uint_t)sparc_pd_getreg(child, 0)->regspec_addr);
733 	}
734 
735 	return (DDI_SUCCESS);
736 }
737 
738 /*
739  * Called from the bus_ctl op of sunbus (sbus, obio, etc) nexus drivers
740  * to implement the DDI_CTLOPS_INITCHILD operation.  That is, it names
741  * the children of sun busses based on the reg spec.
742  *
743  * Handles the following properties (in make_ddi_ppd):
744  *	Property		value
745  *	  Name			type
746  *	reg		register spec
747  *	intr		old-form interrupt spec
748  *	interrupts	new (bus-oriented) interrupt spec
749  *	ranges		range spec
750  */
751 int
752 impl_ddi_sunbus_initchild(dev_info_t *child)
753 {
754 	char name[MAXNAMELEN];
755 	void impl_ddi_sunbus_removechild(dev_info_t *);
756 
757 	/*
758 	 * Name the child, also makes parent private data
759 	 */
760 	(void) impl_sunbus_name_child(child, name, MAXNAMELEN);
761 	ddi_set_name_addr(child, name);
762 
763 	/*
764 	 * Attempt to merge a .conf node; if successful, remove the
765 	 * .conf node.
766 	 */
767 	if ((ndi_dev_is_persistent_node(child) == 0) &&
768 	    (ndi_merge_node(child, impl_sunbus_name_child) == DDI_SUCCESS)) {
769 		/*
770 		 * Return failure to remove node
771 		 */
772 		impl_ddi_sunbus_removechild(child);
773 		return (DDI_FAILURE);
774 	}
775 	return (DDI_SUCCESS);
776 }
777 
778 void
779 impl_free_ddi_ppd(dev_info_t *dip)
780 {
781 	struct ddi_parent_private_data *pdptr;
782 	size_t n;
783 
784 	if ((pdptr = ddi_get_parent_data(dip)) == NULL)
785 		return;
786 
787 	if ((n = (size_t)pdptr->par_nintr) != 0)
788 		/*
789 		 * Note that kmem_free is used here (instead of
790 		 * ddi_prop_free) because the contents of the
791 		 * property were placed into a separate buffer and
792 		 * mucked with a bit before being stored in par_intr.
793 		 * The actual return value from the prop lookup
794 		 * was freed with ddi_prop_free previously.
795 		 */
796 		kmem_free(pdptr->par_intr, n * sizeof (struct intrspec));
797 
798 	if ((n = (size_t)pdptr->par_nrng) != 0)
799 		ddi_prop_free((void *)pdptr->par_rng);
800 
801 	if ((n = pdptr->par_nreg) != 0)
802 		ddi_prop_free((void *)pdptr->par_reg);
803 
804 	kmem_free(pdptr, sizeof (*pdptr));
805 	ddi_set_parent_data(dip, NULL);
806 }
807 
808 void
809 impl_ddi_sunbus_removechild(dev_info_t *dip)
810 {
811 	impl_free_ddi_ppd(dip);
812 	ddi_set_name_addr(dip, NULL);
813 	/*
814 	 * Strip the node to properly convert it back to prototype form
815 	 */
816 	impl_rem_dev_props(dip);
817 }
818 
819 /*
820  * DDI Interrupt
821  */
822 
823 /*
824  * turn this on to force isa, eisa, and mca device to ignore the new
825  * hardware nodes in the device tree (normally turned on only for
826  * drivers that need it by setting the property "ignore-hardware-nodes"
827  * in their driver.conf file).
828  *
829  * 7/31/96 -- Turned off globally.  Leaving variable in for the moment
830  *		as safety valve.
831  */
832 int ignore_hardware_nodes = 0;
833 
834 /*
835  * Local data
836  */
837 static struct impl_bus_promops *impl_busp;
838 
839 
840 /*
841  * New DDI interrupt framework
842  */
843 
844 /*
845  * i_ddi_intr_ops:
846  *
847  * This is the interrupt operator function wrapper for the bus function
848  * bus_intr_op.
849  */
850 int
851 i_ddi_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t op,
852     ddi_intr_handle_impl_t *hdlp, void * result)
853 {
854 	dev_info_t	*pdip = (dev_info_t *)DEVI(dip)->devi_parent;
855 	int		ret = DDI_FAILURE;
856 
857 	/* request parent to process this interrupt op */
858 	if (NEXUS_HAS_INTR_OP(pdip))
859 		ret = (*(DEVI(pdip)->devi_ops->devo_bus_ops->bus_intr_op))(
860 		    pdip, rdip, op, hdlp, result);
861 	else
862 		cmn_err(CE_WARN, "Failed to process interrupt "
863 		    "for %s%d due to down-rev nexus driver %s%d",
864 		    ddi_get_name(rdip), ddi_get_instance(rdip),
865 		    ddi_get_name(pdip), ddi_get_instance(pdip));
866 	return (ret);
867 }
868 
869 /*
870  * i_ddi_add_softint - allocate and add a soft interrupt to the system
871  */
872 int
873 i_ddi_add_softint(ddi_softint_hdl_impl_t *hdlp)
874 {
875 	int ret;
876 
877 	/* add soft interrupt handler */
878 	ret = add_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func,
879 	    DEVI(hdlp->ih_dip)->devi_name, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
880 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
881 }
882 
883 
884 void
885 i_ddi_remove_softint(ddi_softint_hdl_impl_t *hdlp)
886 {
887 	(void) rem_avsoftintr((void *)hdlp, hdlp->ih_pri, hdlp->ih_cb_func);
888 }
889 
890 
891 extern void (*setsoftint)(int, struct av_softinfo *);
892 extern boolean_t av_check_softint_pending(struct av_softinfo *, boolean_t);
893 
894 int
895 i_ddi_trigger_softint(ddi_softint_hdl_impl_t *hdlp, void *arg2)
896 {
897 	if (av_check_softint_pending(hdlp->ih_pending, B_FALSE))
898 		return (DDI_EPENDING);
899 
900 	update_avsoftintr_args((void *)hdlp, hdlp->ih_pri, arg2);
901 
902 	(*setsoftint)(hdlp->ih_pri, hdlp->ih_pending);
903 	return (DDI_SUCCESS);
904 }
905 
906 /*
907  * i_ddi_set_softint_pri:
908  *
909  * The way this works is that it first tries to add a softint vector
910  * at the new priority in hdlp. If that succeeds; then it removes the
911  * existing softint vector at the old priority.
912  */
913 int
914 i_ddi_set_softint_pri(ddi_softint_hdl_impl_t *hdlp, uint_t old_pri)
915 {
916 	int ret;
917 
918 	/*
919 	 * If a softint is pending at the old priority then fail the request.
920 	 */
921 	if (av_check_softint_pending(hdlp->ih_pending, B_TRUE))
922 		return (DDI_FAILURE);
923 
924 	ret = av_softint_movepri((void *)hdlp, old_pri);
925 	return (ret ? DDI_SUCCESS : DDI_FAILURE);
926 }
927 
928 void
929 i_ddi_alloc_intr_phdl(ddi_intr_handle_impl_t *hdlp)
930 {
931 	hdlp->ih_private = (void *)kmem_zalloc(sizeof (ihdl_plat_t), KM_SLEEP);
932 }
933 
934 void
935 i_ddi_free_intr_phdl(ddi_intr_handle_impl_t *hdlp)
936 {
937 	kmem_free(hdlp->ih_private, sizeof (ihdl_plat_t));
938 	hdlp->ih_private = NULL;
939 }
940 
941 int
942 i_ddi_get_intx_nintrs(dev_info_t *dip)
943 {
944 	struct ddi_parent_private_data *pdp;
945 
946 	if ((pdp = ddi_get_parent_data(dip)) == NULL)
947 		return (0);
948 
949 	return (pdp->par_nintr);
950 }
951 
952 /*
953  * DDI Memory/DMA
954  */
955 
956 /*
957  * Support for allocating DMAable memory to implement
958  * ddi_dma_mem_alloc(9F) interface.
959  */
960 
961 #define	KA_ALIGN_SHIFT	7
962 #define	KA_ALIGN	(1 << KA_ALIGN_SHIFT)
963 #define	KA_NCACHE	(PAGESHIFT + 1 - KA_ALIGN_SHIFT)
964 
965 /*
966  * Dummy DMA attribute template for kmem_io[].kmem_io_attr.  We only
967  * care about addr_lo, addr_hi, and align.  addr_hi will be dynamically set.
968  */
969 
970 static ddi_dma_attr_t kmem_io_attr = {
971 	DMA_ATTR_V0,
972 	0x0000000000000000ULL,		/* dma_attr_addr_lo */
973 	0x0000000000000000ULL,		/* dma_attr_addr_hi */
974 	0x00ffffff,
975 	0x1000,				/* dma_attr_align */
976 	1, 1, 0xffffffffULL, 0xffffffffULL, 0x1, 1, 0
977 };
978 
979 /* kmem io memory ranges and indices */
980 enum {
981 	IO_4P, IO_64G, IO_4G, IO_2G, IO_1G, IO_512M,
982 	IO_256M, IO_128M, IO_64M, IO_32M, IO_16M, MAX_MEM_RANGES
983 };
984 
985 static struct {
986 	vmem_t		*kmem_io_arena;
987 	kmem_cache_t	*kmem_io_cache[KA_NCACHE];
988 	ddi_dma_attr_t	kmem_io_attr;
989 } kmem_io[MAX_MEM_RANGES];
990 
991 static int kmem_io_idx;		/* index of first populated kmem_io[] */
992 
993 static page_t *
994 page_create_io_wrapper(void *addr, size_t len, int vmflag, void *arg)
995 {
996 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
997 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
998 
999 	return (page_create_io(&kvp, (u_offset_t)(uintptr_t)addr, len,
1000 	    PG_EXCL | ((vmflag & VM_NOSLEEP) ? 0 : PG_WAIT), &kas, addr, arg));
1001 }
1002 
1003 #ifdef __xpv
1004 static void
1005 segkmem_free_io(vmem_t *vmp, void *ptr, size_t size)
1006 {
1007 	extern void page_destroy_io(page_t *);
1008 	segkmem_xfree(vmp, ptr, size, &kvp, page_destroy_io);
1009 }
1010 #endif
1011 
1012 static void *
1013 segkmem_alloc_io_4P(vmem_t *vmp, size_t size, int vmflag)
1014 {
1015 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1016 	    page_create_io_wrapper, &kmem_io[IO_4P].kmem_io_attr));
1017 }
1018 
1019 static void *
1020 segkmem_alloc_io_64G(vmem_t *vmp, size_t size, int vmflag)
1021 {
1022 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1023 	    page_create_io_wrapper, &kmem_io[IO_64G].kmem_io_attr));
1024 }
1025 
1026 static void *
1027 segkmem_alloc_io_4G(vmem_t *vmp, size_t size, int vmflag)
1028 {
1029 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1030 	    page_create_io_wrapper, &kmem_io[IO_4G].kmem_io_attr));
1031 }
1032 
1033 static void *
1034 segkmem_alloc_io_2G(vmem_t *vmp, size_t size, int vmflag)
1035 {
1036 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1037 	    page_create_io_wrapper, &kmem_io[IO_2G].kmem_io_attr));
1038 }
1039 
1040 static void *
1041 segkmem_alloc_io_1G(vmem_t *vmp, size_t size, int vmflag)
1042 {
1043 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1044 	    page_create_io_wrapper, &kmem_io[IO_1G].kmem_io_attr));
1045 }
1046 
1047 static void *
1048 segkmem_alloc_io_512M(vmem_t *vmp, size_t size, int vmflag)
1049 {
1050 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1051 	    page_create_io_wrapper, &kmem_io[IO_512M].kmem_io_attr));
1052 }
1053 
1054 static void *
1055 segkmem_alloc_io_256M(vmem_t *vmp, size_t size, int vmflag)
1056 {
1057 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1058 	    page_create_io_wrapper, &kmem_io[IO_256M].kmem_io_attr));
1059 }
1060 
1061 static void *
1062 segkmem_alloc_io_128M(vmem_t *vmp, size_t size, int vmflag)
1063 {
1064 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1065 	    page_create_io_wrapper, &kmem_io[IO_128M].kmem_io_attr));
1066 }
1067 
1068 static void *
1069 segkmem_alloc_io_64M(vmem_t *vmp, size_t size, int vmflag)
1070 {
1071 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1072 	    page_create_io_wrapper, &kmem_io[IO_64M].kmem_io_attr));
1073 }
1074 
1075 static void *
1076 segkmem_alloc_io_32M(vmem_t *vmp, size_t size, int vmflag)
1077 {
1078 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1079 	    page_create_io_wrapper, &kmem_io[IO_32M].kmem_io_attr));
1080 }
1081 
1082 static void *
1083 segkmem_alloc_io_16M(vmem_t *vmp, size_t size, int vmflag)
1084 {
1085 	return (segkmem_xalloc(vmp, NULL, size, vmflag, 0,
1086 	    page_create_io_wrapper, &kmem_io[IO_16M].kmem_io_attr));
1087 }
1088 
1089 struct {
1090 	uint64_t	io_limit;
1091 	char		*io_name;
1092 	void		*(*io_alloc)(vmem_t *, size_t, int);
1093 	int		io_initial;	/* kmem_io_init during startup */
1094 } io_arena_params[MAX_MEM_RANGES] = {
1095 	{0x000fffffffffffffULL,	"kmem_io_4P",	segkmem_alloc_io_4P,	1},
1096 	{0x0000000fffffffffULL,	"kmem_io_64G",	segkmem_alloc_io_64G,	0},
1097 	{0x00000000ffffffffULL,	"kmem_io_4G",	segkmem_alloc_io_4G,	1},
1098 	{0x000000007fffffffULL,	"kmem_io_2G",	segkmem_alloc_io_2G,	1},
1099 	{0x000000003fffffffULL,	"kmem_io_1G",	segkmem_alloc_io_1G,	0},
1100 	{0x000000001fffffffULL,	"kmem_io_512M",	segkmem_alloc_io_512M,	0},
1101 	{0x000000000fffffffULL,	"kmem_io_256M",	segkmem_alloc_io_256M,	0},
1102 	{0x0000000007ffffffULL,	"kmem_io_128M",	segkmem_alloc_io_128M,	0},
1103 	{0x0000000003ffffffULL,	"kmem_io_64M",	segkmem_alloc_io_64M,	0},
1104 	{0x0000000001ffffffULL,	"kmem_io_32M",	segkmem_alloc_io_32M,	0},
1105 	{0x0000000000ffffffULL,	"kmem_io_16M",	segkmem_alloc_io_16M,	1}
1106 };
1107 
1108 void
1109 kmem_io_init(int a)
1110 {
1111 	int	c;
1112 	char name[40];
1113 
1114 	kmem_io[a].kmem_io_arena = vmem_create(io_arena_params[a].io_name,
1115 	    NULL, 0, PAGESIZE, io_arena_params[a].io_alloc,
1116 #ifdef __xpv
1117 	    segkmem_free_io,
1118 #else
1119 	    segkmem_free,
1120 #endif
1121 	    heap_arena, 0, VM_SLEEP);
1122 
1123 	for (c = 0; c < KA_NCACHE; c++) {
1124 		size_t size = KA_ALIGN << c;
1125 		(void) sprintf(name, "%s_%lu",
1126 		    io_arena_params[a].io_name, size);
1127 		kmem_io[a].kmem_io_cache[c] = kmem_cache_create(name,
1128 		    size, size, NULL, NULL, NULL, NULL,
1129 		    kmem_io[a].kmem_io_arena, 0);
1130 	}
1131 }
1132 
1133 /*
1134  * Return the index of the highest memory range for addr.
1135  */
1136 static int
1137 kmem_io_index(uint64_t addr)
1138 {
1139 	int n;
1140 
1141 	for (n = kmem_io_idx; n < MAX_MEM_RANGES; n++) {
1142 		if (kmem_io[n].kmem_io_attr.dma_attr_addr_hi <= addr) {
1143 			if (kmem_io[n].kmem_io_arena == NULL)
1144 				kmem_io_init(n);
1145 			return (n);
1146 		}
1147 	}
1148 	panic("kmem_io_index: invalid addr - must be at least 16m");
1149 
1150 	/*NOTREACHED*/
1151 }
1152 
1153 /*
1154  * Return the index of the next kmem_io populated memory range
1155  * after curindex.
1156  */
1157 static int
1158 kmem_io_index_next(int curindex)
1159 {
1160 	int n;
1161 
1162 	for (n = curindex + 1; n < MAX_MEM_RANGES; n++) {
1163 		if (kmem_io[n].kmem_io_arena)
1164 			return (n);
1165 	}
1166 	return (-1);
1167 }
1168 
1169 /*
1170  * allow kmem to be mapped in with different PTE cache attribute settings.
1171  * Used by i_ddi_mem_alloc()
1172  */
1173 int
1174 kmem_override_cache_attrs(caddr_t kva, size_t size, uint_t order)
1175 {
1176 	uint_t hat_flags;
1177 	caddr_t kva_end;
1178 	uint_t hat_attr;
1179 	pfn_t pfn;
1180 
1181 	if (hat_getattr(kas.a_hat, kva, &hat_attr) == -1) {
1182 		return (-1);
1183 	}
1184 
1185 	hat_attr &= ~HAT_ORDER_MASK;
1186 	hat_attr |= order | HAT_NOSYNC;
1187 	hat_flags = HAT_LOAD_LOCK;
1188 
1189 	kva_end = (caddr_t)(((uintptr_t)kva + size + PAGEOFFSET) &
1190 	    (uintptr_t)PAGEMASK);
1191 	kva = (caddr_t)((uintptr_t)kva & (uintptr_t)PAGEMASK);
1192 
1193 	while (kva < kva_end) {
1194 		pfn = hat_getpfnum(kas.a_hat, kva);
1195 		hat_unload(kas.a_hat, kva, PAGESIZE, HAT_UNLOAD_UNLOCK);
1196 		hat_devload(kas.a_hat, kva, PAGESIZE, pfn, hat_attr, hat_flags);
1197 		kva += MMU_PAGESIZE;
1198 	}
1199 
1200 	return (0);
1201 }
1202 
1203 static int
1204 ctgcompare(const void *a1, const void *a2)
1205 {
1206 	/* we just want to compare virtual addresses */
1207 	a1 = ((struct ctgas *)a1)->ctg_addr;
1208 	a2 = ((struct ctgas *)a2)->ctg_addr;
1209 	return (a1 == a2 ? 0 : (a1 < a2 ? -1 : 1));
1210 }
1211 
1212 void
1213 ka_init(void)
1214 {
1215 	int a;
1216 	paddr_t maxphysaddr;
1217 #if !defined(__xpv)
1218 	extern pfn_t physmax;
1219 
1220 	maxphysaddr = mmu_ptob((paddr_t)physmax) + MMU_PAGEOFFSET;
1221 #else
1222 	maxphysaddr = mmu_ptob((paddr_t)HYPERVISOR_memory_op(
1223 	    XENMEM_maximum_ram_page, NULL)) + MMU_PAGEOFFSET;
1224 #endif
1225 
1226 	ASSERT(maxphysaddr <= io_arena_params[0].io_limit);
1227 
1228 	for (a = 0; a < MAX_MEM_RANGES; a++) {
1229 		if (maxphysaddr >= io_arena_params[a + 1].io_limit) {
1230 			if (maxphysaddr > io_arena_params[a + 1].io_limit)
1231 				io_arena_params[a].io_limit = maxphysaddr;
1232 			else
1233 				a++;
1234 			break;
1235 		}
1236 	}
1237 	kmem_io_idx = a;
1238 
1239 	for (; a < MAX_MEM_RANGES; a++) {
1240 		kmem_io[a].kmem_io_attr = kmem_io_attr;
1241 		kmem_io[a].kmem_io_attr.dma_attr_addr_hi =
1242 		    io_arena_params[a].io_limit;
1243 		/*
1244 		 * initialize kmem_io[] arena/cache corresponding to
1245 		 * maxphysaddr and to the "common" io memory ranges that
1246 		 * have io_initial set to a non-zero value.
1247 		 */
1248 		if (io_arena_params[a].io_initial || a == kmem_io_idx)
1249 			kmem_io_init(a);
1250 	}
1251 
1252 	/* initialize ctgtree */
1253 	avl_create(&ctgtree, ctgcompare, sizeof (struct ctgas),
1254 	    offsetof(struct ctgas, ctg_link));
1255 }
1256 
1257 /*
1258  * put contig address/size
1259  */
1260 static void *
1261 putctgas(void *addr, size_t size)
1262 {
1263 	struct ctgas    *ctgp;
1264 	if ((ctgp = kmem_zalloc(sizeof (*ctgp), KM_NOSLEEP)) != NULL) {
1265 		ctgp->ctg_addr = addr;
1266 		ctgp->ctg_size = size;
1267 		CTGLOCK();
1268 		avl_add(&ctgtree, ctgp);
1269 		CTGUNLOCK();
1270 	}
1271 	return (ctgp);
1272 }
1273 
1274 /*
1275  * get contig size by addr
1276  */
1277 static size_t
1278 getctgsz(void *addr)
1279 {
1280 	struct ctgas    *ctgp;
1281 	struct ctgas    find;
1282 	size_t		sz = 0;
1283 
1284 	find.ctg_addr = addr;
1285 	CTGLOCK();
1286 	if ((ctgp = avl_find(&ctgtree, &find, NULL)) != NULL) {
1287 		avl_remove(&ctgtree, ctgp);
1288 	}
1289 	CTGUNLOCK();
1290 
1291 	if (ctgp != NULL) {
1292 		sz = ctgp->ctg_size;
1293 		kmem_free(ctgp, sizeof (*ctgp));
1294 	}
1295 
1296 	return (sz);
1297 }
1298 
1299 /*
1300  * contig_alloc:
1301  *
1302  *	allocates contiguous memory to satisfy the 'size' and dma attributes
1303  *	specified in 'attr'.
1304  *
1305  *	Not all of memory need to be physically contiguous if the
1306  *	scatter-gather list length is greater than 1.
1307  */
1308 
1309 /*ARGSUSED*/
1310 void *
1311 contig_alloc(size_t size, ddi_dma_attr_t *attr, uintptr_t align, int cansleep)
1312 {
1313 	pgcnt_t		pgcnt = btopr(size);
1314 	size_t		asize = pgcnt * PAGESIZE;
1315 	page_t		*ppl;
1316 	int		pflag;
1317 	void		*addr;
1318 
1319 	extern page_t *page_create_io(vnode_t *, u_offset_t, uint_t,
1320 	    uint_t, struct as *, caddr_t, ddi_dma_attr_t *);
1321 
1322 	/* segkmem_xalloc */
1323 
1324 	if (align <= PAGESIZE)
1325 		addr = vmem_alloc(heap_arena, asize,
1326 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1327 	else
1328 		addr = vmem_xalloc(heap_arena, asize, align, 0, 0, NULL, NULL,
1329 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1330 	if (addr) {
1331 		ASSERT(!((uintptr_t)addr & (align - 1)));
1332 
1333 		if (page_resv(pgcnt, (cansleep) ? KM_SLEEP : KM_NOSLEEP) == 0) {
1334 			vmem_free(heap_arena, addr, asize);
1335 			return (NULL);
1336 		}
1337 		pflag = PG_EXCL;
1338 
1339 		if (cansleep)
1340 			pflag |= PG_WAIT;
1341 
1342 		/* 4k req gets from freelists rather than pfn search */
1343 		if (pgcnt > 1 || align > PAGESIZE)
1344 			pflag |= PG_PHYSCONTIG;
1345 
1346 		ppl = page_create_io(&kvp, (u_offset_t)(uintptr_t)addr,
1347 		    asize, pflag, &kas, (caddr_t)addr, attr);
1348 
1349 		if (!ppl) {
1350 			vmem_free(heap_arena, addr, asize);
1351 			page_unresv(pgcnt);
1352 			return (NULL);
1353 		}
1354 
1355 		while (ppl != NULL) {
1356 			page_t	*pp = ppl;
1357 			page_sub(&ppl, pp);
1358 			ASSERT(page_iolock_assert(pp));
1359 			page_io_unlock(pp);
1360 			page_downgrade(pp);
1361 			hat_memload(kas.a_hat, (caddr_t)(uintptr_t)pp->p_offset,
1362 			    pp, (PROT_ALL & ~PROT_USER) |
1363 			    HAT_NOSYNC, HAT_LOAD_LOCK);
1364 		}
1365 	}
1366 	return (addr);
1367 }
1368 
1369 void
1370 contig_free(void *addr, size_t size)
1371 {
1372 	pgcnt_t	pgcnt = btopr(size);
1373 	size_t	asize = pgcnt * PAGESIZE;
1374 	caddr_t	a, ea;
1375 	page_t	*pp;
1376 
1377 	hat_unload(kas.a_hat, addr, asize, HAT_UNLOAD_UNLOCK);
1378 
1379 	for (a = addr, ea = a + asize; a < ea; a += PAGESIZE) {
1380 		pp = page_find(&kvp, (u_offset_t)(uintptr_t)a);
1381 		if (!pp)
1382 			panic("contig_free: contig pp not found");
1383 
1384 		if (!page_tryupgrade(pp)) {
1385 			page_unlock(pp);
1386 			pp = page_lookup(&kvp,
1387 			    (u_offset_t)(uintptr_t)a, SE_EXCL);
1388 			if (pp == NULL)
1389 				panic("contig_free: page freed");
1390 		}
1391 		page_destroy(pp, 0);
1392 	}
1393 
1394 	page_unresv(pgcnt);
1395 	vmem_free(heap_arena, addr, asize);
1396 }
1397 
1398 /*
1399  * Allocate from the system, aligned on a specific boundary.
1400  * The alignment, if non-zero, must be a power of 2.
1401  */
1402 static void *
1403 kalloca(size_t size, size_t align, int cansleep, int physcontig,
1404     ddi_dma_attr_t *attr)
1405 {
1406 	size_t *addr, *raddr, rsize;
1407 	size_t hdrsize = 4 * sizeof (size_t);	/* must be power of 2 */
1408 	int a, i, c;
1409 	vmem_t *vmp = NULL;
1410 	kmem_cache_t *cp = NULL;
1411 
1412 	if (attr->dma_attr_addr_lo > mmu_ptob((uint64_t)ddiphysmin))
1413 		return (NULL);
1414 
1415 	align = MAX(align, hdrsize);
1416 	ASSERT((align & (align - 1)) == 0);
1417 
1418 	/*
1419 	 * All of our allocators guarantee 16-byte alignment, so we don't
1420 	 * need to reserve additional space for the header.
1421 	 * To simplify picking the correct kmem_io_cache, we round up to
1422 	 * a multiple of KA_ALIGN.
1423 	 */
1424 	rsize = P2ROUNDUP_TYPED(size + align, KA_ALIGN, size_t);
1425 
1426 	if (physcontig && rsize > PAGESIZE) {
1427 		if (addr = contig_alloc(size, attr, align, cansleep)) {
1428 			if (!putctgas(addr, size))
1429 				contig_free(addr, size);
1430 			else
1431 				return (addr);
1432 		}
1433 		return (NULL);
1434 	}
1435 
1436 	a = kmem_io_index(attr->dma_attr_addr_hi);
1437 
1438 	if (rsize > PAGESIZE) {
1439 		vmp = kmem_io[a].kmem_io_arena;
1440 		raddr = vmem_alloc(vmp, rsize,
1441 		    (cansleep) ? VM_SLEEP : VM_NOSLEEP);
1442 	} else {
1443 		c = highbit((rsize >> KA_ALIGN_SHIFT) - 1);
1444 		cp = kmem_io[a].kmem_io_cache[c];
1445 		raddr = kmem_cache_alloc(cp, (cansleep) ? KM_SLEEP :
1446 		    KM_NOSLEEP);
1447 	}
1448 
1449 	if (raddr == NULL) {
1450 		int	na;
1451 
1452 		ASSERT(cansleep == 0);
1453 		if (rsize > PAGESIZE)
1454 			return (NULL);
1455 		/*
1456 		 * System does not have memory in the requested range.
1457 		 * Try smaller kmem io ranges and larger cache sizes
1458 		 * to see if there might be memory available in
1459 		 * these other caches.
1460 		 */
1461 
1462 		for (na = kmem_io_index_next(a); na >= 0;
1463 		    na = kmem_io_index_next(na)) {
1464 			ASSERT(kmem_io[na].kmem_io_arena);
1465 			cp = kmem_io[na].kmem_io_cache[c];
1466 			raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1467 			if (raddr)
1468 				goto kallocdone;
1469 		}
1470 		/* now try the larger kmem io cache sizes */
1471 		for (na = a; na >= 0; na = kmem_io_index_next(na)) {
1472 			for (i = c + 1; i < KA_NCACHE; i++) {
1473 				cp = kmem_io[na].kmem_io_cache[i];
1474 				raddr = kmem_cache_alloc(cp, KM_NOSLEEP);
1475 				if (raddr)
1476 					goto kallocdone;
1477 			}
1478 		}
1479 		return (NULL);
1480 	}
1481 
1482 kallocdone:
1483 	ASSERT(!P2BOUNDARY((uintptr_t)raddr, rsize, PAGESIZE) ||
1484 	    rsize > PAGESIZE);
1485 
1486 	addr = (size_t *)P2ROUNDUP((uintptr_t)raddr + hdrsize, align);
1487 	ASSERT((uintptr_t)addr + size - (uintptr_t)raddr <= rsize);
1488 
1489 	addr[-4] = (size_t)cp;
1490 	addr[-3] = (size_t)vmp;
1491 	addr[-2] = (size_t)raddr;
1492 	addr[-1] = rsize;
1493 
1494 	return (addr);
1495 }
1496 
1497 static void
1498 kfreea(void *addr)
1499 {
1500 	size_t		size;
1501 
1502 	if (!((uintptr_t)addr & PAGEOFFSET) && (size = getctgsz(addr))) {
1503 		contig_free(addr, size);
1504 	} else {
1505 		size_t	*saddr = addr;
1506 		if (saddr[-4] == 0)
1507 			vmem_free((vmem_t *)saddr[-3], (void *)saddr[-2],
1508 			    saddr[-1]);
1509 		else
1510 			kmem_cache_free((kmem_cache_t *)saddr[-4],
1511 			    (void *)saddr[-2]);
1512 	}
1513 }
1514 
1515 /*ARGSUSED*/
1516 void
1517 i_ddi_devacc_to_hatacc(ddi_device_acc_attr_t *devaccp, uint_t *hataccp)
1518 {
1519 }
1520 
1521 /*
1522  * Check if the specified cache attribute is supported on the platform.
1523  * This function must be called before i_ddi_cacheattr_to_hatacc().
1524  */
1525 boolean_t
1526 i_ddi_check_cache_attr(uint_t flags)
1527 {
1528 	/*
1529 	 * The cache attributes are mutually exclusive. Any combination of
1530 	 * the attributes leads to a failure.
1531 	 */
1532 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1533 	if ((cache_attr != 0) && !ISP2(cache_attr))
1534 		return (B_FALSE);
1535 
1536 	/* All cache attributes are supported on X86/X64 */
1537 	if (cache_attr & (IOMEM_DATA_UNCACHED | IOMEM_DATA_CACHED |
1538 	    IOMEM_DATA_UC_WR_COMBINE))
1539 		return (B_TRUE);
1540 
1541 	/* undefined attributes */
1542 	return (B_FALSE);
1543 }
1544 
1545 /* set HAT cache attributes from the cache attributes */
1546 void
1547 i_ddi_cacheattr_to_hatacc(uint_t flags, uint_t *hataccp)
1548 {
1549 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1550 	static char *fname = "i_ddi_cacheattr_to_hatacc";
1551 
1552 	/*
1553 	 * If write-combining is not supported, then it falls back
1554 	 * to uncacheable.
1555 	 */
1556 	if (cache_attr == IOMEM_DATA_UC_WR_COMBINE &&
1557 	    !is_x86_feature(x86_featureset, X86FSET_PAT))
1558 		cache_attr = IOMEM_DATA_UNCACHED;
1559 
1560 	/*
1561 	 * set HAT attrs according to the cache attrs.
1562 	 */
1563 	switch (cache_attr) {
1564 	case IOMEM_DATA_UNCACHED:
1565 		*hataccp &= ~HAT_ORDER_MASK;
1566 		*hataccp |= (HAT_STRICTORDER | HAT_PLAT_NOCACHE);
1567 		break;
1568 	case IOMEM_DATA_UC_WR_COMBINE:
1569 		*hataccp &= ~HAT_ORDER_MASK;
1570 		*hataccp |= (HAT_MERGING_OK | HAT_PLAT_NOCACHE);
1571 		break;
1572 	case IOMEM_DATA_CACHED:
1573 		*hataccp &= ~HAT_ORDER_MASK;
1574 		*hataccp |= HAT_UNORDERED_OK;
1575 		break;
1576 	/*
1577 	 * This case must not occur because the cache attribute is scrutinized
1578 	 * before this function is called.
1579 	 */
1580 	default:
1581 		/*
1582 		 * set cacheable to hat attrs.
1583 		 */
1584 		*hataccp &= ~HAT_ORDER_MASK;
1585 		*hataccp |= HAT_UNORDERED_OK;
1586 		cmn_err(CE_WARN, "%s: cache_attr=0x%x is ignored.",
1587 		    fname, cache_attr);
1588 	}
1589 }
1590 
1591 /*
1592  * This should actually be called i_ddi_dma_mem_alloc. There should
1593  * also be an i_ddi_pio_mem_alloc. i_ddi_dma_mem_alloc should call
1594  * through the device tree with the DDI_CTLOPS_DMA_ALIGN ctl ops to
1595  * get alignment requirements for DMA memory. i_ddi_pio_mem_alloc
1596  * should use DDI_CTLOPS_PIO_ALIGN. Since we only have i_ddi_mem_alloc
1597  * so far which is used for both, DMA and PIO, we have to use the DMA
1598  * ctl ops to make everybody happy.
1599  */
1600 /*ARGSUSED*/
1601 int
1602 i_ddi_mem_alloc(dev_info_t *dip, ddi_dma_attr_t *attr,
1603     size_t length, int cansleep, int flags,
1604     ddi_device_acc_attr_t *accattrp, caddr_t *kaddrp,
1605     size_t *real_length, ddi_acc_hdl_t *ap)
1606 {
1607 	caddr_t a;
1608 	int iomin;
1609 	ddi_acc_impl_t *iap;
1610 	int physcontig = 0;
1611 	pgcnt_t npages;
1612 	pgcnt_t minctg;
1613 	uint_t order;
1614 	int e;
1615 
1616 	/*
1617 	 * Check legality of arguments
1618 	 */
1619 	if (length == 0 || kaddrp == NULL || attr == NULL) {
1620 		return (DDI_FAILURE);
1621 	}
1622 
1623 	if (attr->dma_attr_minxfer == 0 || attr->dma_attr_align == 0 ||
1624 	    !ISP2(attr->dma_attr_align) || !ISP2(attr->dma_attr_minxfer)) {
1625 		return (DDI_FAILURE);
1626 	}
1627 
1628 	/*
1629 	 * figure out most restrictive alignment requirement
1630 	 */
1631 	iomin = attr->dma_attr_minxfer;
1632 	iomin = maxbit(iomin, attr->dma_attr_align);
1633 	if (iomin == 0)
1634 		return (DDI_FAILURE);
1635 
1636 	ASSERT((iomin & (iomin - 1)) == 0);
1637 
1638 	/*
1639 	 * if we allocate memory with IOMEM_DATA_UNCACHED or
1640 	 * IOMEM_DATA_UC_WR_COMBINE, make sure we allocate a page aligned
1641 	 * memory that ends on a page boundry.
1642 	 * Don't want to have to different cache mappings to the same
1643 	 * physical page.
1644 	 */
1645 	if (OVERRIDE_CACHE_ATTR(flags)) {
1646 		iomin = (iomin + MMU_PAGEOFFSET) & MMU_PAGEMASK;
1647 		length = (length + MMU_PAGEOFFSET) & (size_t)MMU_PAGEMASK;
1648 	}
1649 
1650 	/*
1651 	 * Determine if we need to satisfy the request for physically
1652 	 * contiguous memory or alignments larger than pagesize.
1653 	 */
1654 	npages = btopr(length + attr->dma_attr_align);
1655 	minctg = howmany(npages, attr->dma_attr_sgllen);
1656 
1657 	if (minctg > 1) {
1658 		uint64_t pfnseg = attr->dma_attr_seg >> PAGESHIFT;
1659 		/*
1660 		 * verify that the minimum contig requirement for the
1661 		 * actual length does not cross segment boundary.
1662 		 */
1663 		length = P2ROUNDUP_TYPED(length, attr->dma_attr_minxfer,
1664 		    size_t);
1665 		npages = btopr(length);
1666 		minctg = howmany(npages, attr->dma_attr_sgllen);
1667 		if (minctg > pfnseg + 1)
1668 			return (DDI_FAILURE);
1669 		physcontig = 1;
1670 	} else {
1671 		length = P2ROUNDUP_TYPED(length, iomin, size_t);
1672 	}
1673 
1674 	/*
1675 	 * Allocate the requested amount from the system.
1676 	 */
1677 	a = kalloca(length, iomin, cansleep, physcontig, attr);
1678 
1679 	if ((*kaddrp = a) == NULL)
1680 		return (DDI_FAILURE);
1681 
1682 	/*
1683 	 * if we to modify the cache attributes, go back and muck with the
1684 	 * mappings.
1685 	 */
1686 	if (OVERRIDE_CACHE_ATTR(flags)) {
1687 		order = 0;
1688 		i_ddi_cacheattr_to_hatacc(flags, &order);
1689 		e = kmem_override_cache_attrs(a, length, order);
1690 		if (e != 0) {
1691 			kfreea(a);
1692 			return (DDI_FAILURE);
1693 		}
1694 	}
1695 
1696 	if (real_length) {
1697 		*real_length = length;
1698 	}
1699 	if (ap) {
1700 		/*
1701 		 * initialize access handle
1702 		 */
1703 		iap = (ddi_acc_impl_t *)ap->ah_platform_private;
1704 		iap->ahi_acc_attr |= DDI_ACCATTR_CPU_VADDR;
1705 		impl_acc_hdl_init(ap);
1706 	}
1707 
1708 	return (DDI_SUCCESS);
1709 }
1710 
1711 /* ARGSUSED */
1712 void
1713 i_ddi_mem_free(caddr_t kaddr, ddi_acc_hdl_t *ap)
1714 {
1715 	if (ap != NULL) {
1716 		/*
1717 		 * if we modified the cache attributes on alloc, go back and
1718 		 * fix them since this memory could be returned to the
1719 		 * general pool.
1720 		 */
1721 		if (OVERRIDE_CACHE_ATTR(ap->ah_xfermodes)) {
1722 			uint_t order = 0;
1723 			int e;
1724 			i_ddi_cacheattr_to_hatacc(IOMEM_DATA_CACHED, &order);
1725 			e = kmem_override_cache_attrs(kaddr, ap->ah_len, order);
1726 			if (e != 0) {
1727 				cmn_err(CE_WARN, "i_ddi_mem_free() failed to "
1728 				    "override cache attrs, memory leaked\n");
1729 				return;
1730 			}
1731 		}
1732 	}
1733 	kfreea(kaddr);
1734 }
1735 
1736 /*
1737  * Access Barriers
1738  *
1739  */
1740 /*ARGSUSED*/
1741 int
1742 i_ddi_ontrap(ddi_acc_handle_t hp)
1743 {
1744 	return (DDI_FAILURE);
1745 }
1746 
1747 /*ARGSUSED*/
1748 void
1749 i_ddi_notrap(ddi_acc_handle_t hp)
1750 {
1751 }
1752 
1753 
1754 /*
1755  * Misc Functions
1756  */
1757 
1758 /*
1759  * Implementation instance override functions
1760  *
1761  * No override on i86pc
1762  */
1763 /*ARGSUSED*/
1764 uint_t
1765 impl_assign_instance(dev_info_t *dip)
1766 {
1767 	return ((uint_t)-1);
1768 }
1769 
1770 /*ARGSUSED*/
1771 int
1772 impl_keep_instance(dev_info_t *dip)
1773 {
1774 
1775 #if defined(__xpv)
1776 	/*
1777 	 * Do not persist instance numbers assigned to devices in dom0
1778 	 */
1779 	dev_info_t *pdip;
1780 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1781 		if (((pdip = ddi_get_parent(dip)) != NULL) &&
1782 		    (strcmp(ddi_get_name(pdip), "xpvd") == 0))
1783 			return (DDI_SUCCESS);
1784 	}
1785 #endif
1786 	return (DDI_FAILURE);
1787 }
1788 
1789 /*ARGSUSED*/
1790 int
1791 impl_free_instance(dev_info_t *dip)
1792 {
1793 	return (DDI_FAILURE);
1794 }
1795 
1796 /*ARGSUSED*/
1797 int
1798 impl_check_cpu(dev_info_t *devi)
1799 {
1800 	return (DDI_SUCCESS);
1801 }
1802 
1803 /*
1804  * Referenced in common/cpr_driver.c: Power off machine.
1805  * Don't know how to power off i86pc.
1806  */
1807 void
1808 arch_power_down()
1809 {}
1810 
1811 /*
1812  * Copy name to property_name, since name
1813  * is in the low address range below kernelbase.
1814  */
1815 static void
1816 copy_boot_str(const char *boot_str, char *kern_str, int len)
1817 {
1818 	int i = 0;
1819 
1820 	while (i < len - 1 && boot_str[i] != '\0') {
1821 		kern_str[i] = boot_str[i];
1822 		i++;
1823 	}
1824 
1825 	kern_str[i] = 0;	/* null terminate */
1826 	if (boot_str[i] != '\0')
1827 		cmn_err(CE_WARN,
1828 		    "boot property string is truncated to %s", kern_str);
1829 }
1830 
1831 static void
1832 get_boot_properties(void)
1833 {
1834 	extern char hw_provider[];
1835 	dev_info_t *devi;
1836 	char *name;
1837 	int length, flags;
1838 	char property_name[50], property_val[50];
1839 	void *bop_staging_area;
1840 
1841 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_NOSLEEP);
1842 
1843 	/*
1844 	 * Import "root" properties from the boot.
1845 	 *
1846 	 * We do this by invoking BOP_NEXTPROP until the list
1847 	 * is completely copied in.
1848 	 */
1849 
1850 	devi = ddi_root_node();
1851 	for (name = BOP_NEXTPROP(bootops, "");		/* get first */
1852 	    name;					/* NULL => DONE */
1853 	    name = BOP_NEXTPROP(bootops, name)) {	/* get next */
1854 
1855 		/* copy string to memory above kernelbase */
1856 		copy_boot_str(name, property_name, 50);
1857 
1858 		/*
1859 		 * Skip vga properties. They will be picked up later
1860 		 * by get_vga_properties.
1861 		 */
1862 		if (strcmp(property_name, "display-edif-block") == 0 ||
1863 		    strcmp(property_name, "display-edif-id") == 0) {
1864 			continue;
1865 		}
1866 
1867 		length = BOP_GETPROPLEN(bootops, property_name);
1868 		if (length < 0)
1869 			continue;
1870 		if (length > MMU_PAGESIZE) {
1871 			cmn_err(CE_NOTE,
1872 			    "boot property %s longer than 0x%x, ignored\n",
1873 			    property_name, MMU_PAGESIZE);
1874 			continue;
1875 		}
1876 		BOP_GETPROP(bootops, property_name, bop_staging_area);
1877 		flags = do_bsys_getproptype(bootops, property_name);
1878 
1879 		/*
1880 		 * special properties:
1881 		 * si-machine, si-hw-provider
1882 		 *	goes to kernel data structures.
1883 		 * bios-boot-device and stdout
1884 		 *	goes to hardware property list so it may show up
1885 		 *	in the prtconf -vp output. This is needed by
1886 		 *	Install/Upgrade. Once we fix install upgrade,
1887 		 *	this can be taken out.
1888 		 */
1889 		if (strcmp(name, "si-machine") == 0) {
1890 			(void) strncpy(utsname.machine, bop_staging_area,
1891 			    SYS_NMLN);
1892 			utsname.machine[SYS_NMLN - 1] = '\0';
1893 			continue;
1894 		}
1895 		if (strcmp(name, "si-hw-provider") == 0) {
1896 			(void) strncpy(hw_provider, bop_staging_area, SYS_NMLN);
1897 			hw_provider[SYS_NMLN - 1] = '\0';
1898 			continue;
1899 		}
1900 		if (strcmp(name, "bios-boot-device") == 0) {
1901 			copy_boot_str(bop_staging_area, property_val, 50);
1902 			(void) ndi_prop_update_string(DDI_DEV_T_NONE, devi,
1903 			    property_name, property_val);
1904 			continue;
1905 		}
1906 		if (strcmp(name, "stdout") == 0) {
1907 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, devi,
1908 			    property_name, *((int *)bop_staging_area));
1909 			continue;
1910 		}
1911 
1912 		/* Boolean property */
1913 		if (length == 0) {
1914 			(void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1915 			    DDI_PROP_CANSLEEP, property_name, NULL, 0);
1916 			continue;
1917 		}
1918 
1919 		/* Now anything else based on type. */
1920 		switch (flags) {
1921 		case DDI_PROP_TYPE_INT:
1922 			if (length == sizeof (int)) {
1923 				(void) e_ddi_prop_update_int(DDI_DEV_T_NONE,
1924 				    devi, property_name,
1925 				    *((int *)bop_staging_area));
1926 			} else {
1927 				(void) e_ddi_prop_update_int_array(
1928 				    DDI_DEV_T_NONE, devi, property_name,
1929 				    bop_staging_area, length / sizeof (int));
1930 			}
1931 			break;
1932 		case DDI_PROP_TYPE_STRING:
1933 			(void) e_ddi_prop_update_string(DDI_DEV_T_NONE, devi,
1934 			    property_name, bop_staging_area);
1935 			break;
1936 		case DDI_PROP_TYPE_BYTE:
1937 			(void) e_ddi_prop_update_byte_array(DDI_DEV_T_NONE,
1938 			    devi, property_name, bop_staging_area, length);
1939 			break;
1940 		case DDI_PROP_TYPE_INT64:
1941 			if (length == sizeof (int64_t)) {
1942 				(void) e_ddi_prop_update_int64(DDI_DEV_T_NONE,
1943 				    devi, property_name,
1944 				    *((int64_t *)bop_staging_area));
1945 			} else {
1946 				(void) e_ddi_prop_update_int64_array(
1947 				    DDI_DEV_T_NONE, devi, property_name,
1948 				    bop_staging_area,
1949 				    length / sizeof (int64_t));
1950 			}
1951 			break;
1952 		default:
1953 			/* Property type unknown, use old prop interface */
1954 			(void) e_ddi_prop_create(DDI_DEV_T_NONE, devi,
1955 			    DDI_PROP_CANSLEEP, property_name, bop_staging_area,
1956 			    length);
1957 		}
1958 	}
1959 
1960 	kmem_free(bop_staging_area, MMU_PAGESIZE);
1961 }
1962 
1963 static void
1964 get_vga_properties(void)
1965 {
1966 	dev_info_t *devi;
1967 	major_t major;
1968 	char *name;
1969 	int length;
1970 	char property_val[50];
1971 	void *bop_staging_area;
1972 
1973 	/*
1974 	 * XXXX Hack Allert!
1975 	 * There really needs to be a better way for identifying various
1976 	 * console framebuffers and their related issues.  Till then,
1977 	 * check for this one as a replacement to vgatext.
1978 	 */
1979 	major = ddi_name_to_major("ragexl");
1980 	if (major == (major_t)-1) {
1981 		major = ddi_name_to_major("vgatext");
1982 		if (major == (major_t)-1)
1983 			return;
1984 	}
1985 	devi = devnamesp[major].dn_head;
1986 	if (devi == NULL)
1987 		return;
1988 
1989 	bop_staging_area = kmem_zalloc(MMU_PAGESIZE, KM_SLEEP);
1990 
1991 	/*
1992 	 * Import "vga" properties from the boot.
1993 	 */
1994 	name = "display-edif-block";
1995 	length = BOP_GETPROPLEN(bootops, name);
1996 	if (length > 0 && length < MMU_PAGESIZE) {
1997 		BOP_GETPROP(bootops, name, bop_staging_area);
1998 		(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE,
1999 		    devi, name, bop_staging_area, length);
2000 	}
2001 
2002 	/*
2003 	 * kdmconfig is also looking for display-type and
2004 	 * video-adapter-type. We default to color and svga.
2005 	 *
2006 	 * Could it be "monochrome", "vga"?
2007 	 * Nah, you've got to come to the 21st century...
2008 	 * And you can set monitor type manually in kdmconfig
2009 	 * if you are really an old junky.
2010 	 */
2011 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2012 	    devi, "display-type", "color");
2013 	(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2014 	    devi, "video-adapter-type", "svga");
2015 
2016 	name = "display-edif-id";
2017 	length = BOP_GETPROPLEN(bootops, name);
2018 	if (length > 0 && length < MMU_PAGESIZE) {
2019 		BOP_GETPROP(bootops, name, bop_staging_area);
2020 		copy_boot_str(bop_staging_area, property_val, length);
2021 		(void) ndi_prop_update_string(DDI_DEV_T_NONE,
2022 		    devi, name, property_val);
2023 	}
2024 
2025 	kmem_free(bop_staging_area, MMU_PAGESIZE);
2026 }
2027 
2028 /*
2029  * Copy console font to kernel memory. The temporary font setup
2030  * to use font module was done in early console setup, using low
2031  * memory and data from font module. Now we need to allocate
2032  * kernel memory and copy data over, so the low memory can be freed.
2033  * We can have at most one entry in font list from early boot.
2034  */
2035 static void
2036 get_console_font(void)
2037 {
2038 	struct fontlist *fp, *fl;
2039 	bitmap_data_t *bd;
2040 	struct font *fd, *tmp;
2041 	int i;
2042 
2043 	if (STAILQ_EMPTY(&fonts))
2044 		return;
2045 
2046 	fl = STAILQ_FIRST(&fonts);
2047 	STAILQ_REMOVE_HEAD(&fonts, font_next);
2048 	fp = kmem_zalloc(sizeof (*fp), KM_SLEEP);
2049 	bd = kmem_zalloc(sizeof (*bd), KM_SLEEP);
2050 	fd = kmem_zalloc(sizeof (*fd), KM_SLEEP);
2051 
2052 	fp->font_name = NULL;
2053 	fp->font_flags = FONT_BOOT;
2054 	fp->font_data = bd;
2055 
2056 	bd->width = fl->font_data->width;
2057 	bd->height = fl->font_data->height;
2058 	bd->uncompressed_size = fl->font_data->uncompressed_size;
2059 	bd->font = fd;
2060 
2061 	tmp = fl->font_data->font;
2062 	fd->vf_width = tmp->vf_width;
2063 	fd->vf_height = tmp->vf_height;
2064 	for (i = 0; i < VFNT_MAPS; i++) {
2065 		if (tmp->vf_map_count[i] == 0)
2066 			continue;
2067 		fd->vf_map_count[i] = tmp->vf_map_count[i];
2068 		fd->vf_map[i] = kmem_alloc(fd->vf_map_count[i] *
2069 		    sizeof (*fd->vf_map[i]), KM_SLEEP);
2070 		bcopy(tmp->vf_map[i], fd->vf_map[i], fd->vf_map_count[i] *
2071 		    sizeof (*fd->vf_map[i]));
2072 	}
2073 	fd->vf_bytes = kmem_alloc(bd->uncompressed_size, KM_SLEEP);
2074 	bcopy(tmp->vf_bytes, fd->vf_bytes, bd->uncompressed_size);
2075 	STAILQ_INSERT_HEAD(&fonts, fp, font_next);
2076 }
2077 
2078 /*
2079  * This is temporary, but absolutely necessary.  If we are being
2080  * booted with a device tree created by the DevConf project's bootconf
2081  * program, then we have device information nodes that reflect
2082  * reality.  At this point in time in the Solaris release schedule, the
2083  * kernel drivers aren't prepared for reality.  They still depend on their
2084  * own ad-hoc interpretations of the properties created when their .conf
2085  * files were interpreted. These drivers use an "ignore-hardware-nodes"
2086  * property to prevent them from using the nodes passed up from the bootconf
2087  * device tree.
2088  *
2089  * Trying to assemble root file system drivers as we are booting from
2090  * devconf will fail if the kernel driver is basing its name_addr's on the
2091  * psuedo-node device info while the bootpath passed up from bootconf is using
2092  * reality-based name_addrs.  We help the boot along in this case by
2093  * looking at the pre-bootconf bootpath and determining if we would have
2094  * successfully matched if that had been the bootpath we had chosen.
2095  *
2096  * Note that we only even perform this extra check if we've booted
2097  * using bootconf's 1275 compliant bootpath, this is the boot device, and
2098  * we're trying to match the name_addr specified in the 1275 bootpath.
2099  */
2100 
2101 #define	MAXCOMPONENTLEN	32
2102 
2103 int
2104 x86_old_bootpath_name_addr_match(dev_info_t *cdip, char *caddr, char *naddr)
2105 {
2106 	/*
2107 	 *  There are multiple criteria to be met before we can even
2108 	 *  consider allowing a name_addr match here.
2109 	 *
2110 	 *  1) We must have been booted such that the bootconf program
2111 	 *	created device tree nodes and properties.  This can be
2112 	 *	determined by examining the 'bootpath' property.  This
2113 	 *	property will be a non-null string iff bootconf was
2114 	 *	involved in the boot.
2115 	 *
2116 	 *  2) The module that we want to match must be the boot device.
2117 	 *
2118 	 *  3) The instance of the module we are thinking of letting be
2119 	 *	our match must be ignoring hardware nodes.
2120 	 *
2121 	 *  4) The name_addr we want to match must be the name_addr
2122 	 *	specified in the 1275 bootpath.
2123 	 */
2124 	static char bootdev_module[MAXCOMPONENTLEN];
2125 	static char bootdev_oldmod[MAXCOMPONENTLEN];
2126 	static char bootdev_newaddr[MAXCOMPONENTLEN];
2127 	static char bootdev_oldaddr[MAXCOMPONENTLEN];
2128 	static int  quickexit;
2129 
2130 	char *daddr;
2131 	int dlen;
2132 
2133 	char	*lkupname;
2134 	int	rv = DDI_FAILURE;
2135 
2136 	if ((ddi_getlongprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2137 	    "devconf-addr", (caddr_t)&daddr, &dlen) == DDI_PROP_SUCCESS) &&
2138 	    (ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2139 	    "ignore-hardware-nodes", -1) != -1)) {
2140 		if (strcmp(daddr, caddr) == 0) {
2141 			return (DDI_SUCCESS);
2142 		}
2143 	}
2144 
2145 	if (quickexit)
2146 		return (rv);
2147 
2148 	if (bootdev_module[0] == '\0') {
2149 		char *addrp, *eoaddrp;
2150 		char *busp, *modp, *atp;
2151 		char *bp1275, *bp;
2152 		int  bp1275len, bplen;
2153 
2154 		bp1275 = bp = addrp = eoaddrp = busp = modp = atp = NULL;
2155 
2156 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2157 		    ddi_root_node(), 0, "bootpath",
2158 		    (caddr_t)&bp1275, &bp1275len) != DDI_PROP_SUCCESS ||
2159 		    bp1275len <= 1) {
2160 			/*
2161 			 * We didn't boot from bootconf so we never need to
2162 			 * do any special matches.
2163 			 */
2164 			quickexit = 1;
2165 			if (bp1275)
2166 				kmem_free(bp1275, bp1275len);
2167 			return (rv);
2168 		}
2169 
2170 		if (ddi_getlongprop(DDI_DEV_T_ANY,
2171 		    ddi_root_node(), 0, "boot-path",
2172 		    (caddr_t)&bp, &bplen) != DDI_PROP_SUCCESS || bplen <= 1) {
2173 			/*
2174 			 * No fallback position for matching. This is
2175 			 * certainly unexpected, but we'll handle it
2176 			 * just in case.
2177 			 */
2178 			quickexit = 1;
2179 			kmem_free(bp1275, bp1275len);
2180 			if (bp)
2181 				kmem_free(bp, bplen);
2182 			return (rv);
2183 		}
2184 
2185 		/*
2186 		 *  Determine boot device module and 1275 name_addr
2187 		 *
2188 		 *  bootpath assumed to be of the form /bus/module@name_addr
2189 		 */
2190 		if (busp = strchr(bp1275, '/')) {
2191 			if (modp = strchr(busp + 1, '/')) {
2192 				if (atp = strchr(modp + 1, '@')) {
2193 					*atp = '\0';
2194 					addrp = atp + 1;
2195 					if (eoaddrp = strchr(addrp, '/'))
2196 						*eoaddrp = '\0';
2197 				}
2198 			}
2199 		}
2200 
2201 		if (modp && addrp) {
2202 			(void) strncpy(bootdev_module, modp + 1,
2203 			    MAXCOMPONENTLEN);
2204 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2205 
2206 			(void) strncpy(bootdev_newaddr, addrp, MAXCOMPONENTLEN);
2207 			bootdev_newaddr[MAXCOMPONENTLEN - 1] = '\0';
2208 		} else {
2209 			quickexit = 1;
2210 			kmem_free(bp1275, bp1275len);
2211 			kmem_free(bp, bplen);
2212 			return (rv);
2213 		}
2214 
2215 		/*
2216 		 *  Determine fallback name_addr
2217 		 *
2218 		 *  10/3/96 - Also save fallback module name because it
2219 		 *  might actually be different than the current module
2220 		 *  name.  E.G., ISA pnp drivers have new names.
2221 		 *
2222 		 *  bootpath assumed to be of the form /bus/module@name_addr
2223 		 */
2224 		addrp = NULL;
2225 		if (busp = strchr(bp, '/')) {
2226 			if (modp = strchr(busp + 1, '/')) {
2227 				if (atp = strchr(modp + 1, '@')) {
2228 					*atp = '\0';
2229 					addrp = atp + 1;
2230 					if (eoaddrp = strchr(addrp, '/'))
2231 						*eoaddrp = '\0';
2232 				}
2233 			}
2234 		}
2235 
2236 		if (modp && addrp) {
2237 			(void) strncpy(bootdev_oldmod, modp + 1,
2238 			    MAXCOMPONENTLEN);
2239 			bootdev_module[MAXCOMPONENTLEN - 1] = '\0';
2240 
2241 			(void) strncpy(bootdev_oldaddr, addrp, MAXCOMPONENTLEN);
2242 			bootdev_oldaddr[MAXCOMPONENTLEN - 1] = '\0';
2243 		}
2244 
2245 		/* Free up the bootpath storage now that we're done with it. */
2246 		kmem_free(bp1275, bp1275len);
2247 		kmem_free(bp, bplen);
2248 
2249 		if (bootdev_oldaddr[0] == '\0') {
2250 			quickexit = 1;
2251 			return (rv);
2252 		}
2253 	}
2254 
2255 	if (((lkupname = ddi_get_name(cdip)) != NULL) &&
2256 	    (strcmp(bootdev_module, lkupname) == 0 ||
2257 	    strcmp(bootdev_oldmod, lkupname) == 0) &&
2258 	    ((ddi_getprop(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
2259 	    "ignore-hardware-nodes", -1) != -1) ||
2260 	    ignore_hardware_nodes) &&
2261 	    strcmp(bootdev_newaddr, caddr) == 0 &&
2262 	    strcmp(bootdev_oldaddr, naddr) == 0) {
2263 		rv = DDI_SUCCESS;
2264 	}
2265 
2266 	return (rv);
2267 }
2268 
2269 /*
2270  * Perform a copy from a memory mapped device (whose devinfo pointer is devi)
2271  * separately mapped at devaddr in the kernel to a kernel buffer at kaddr.
2272  */
2273 /*ARGSUSED*/
2274 int
2275 e_ddi_copyfromdev(dev_info_t *devi,
2276     off_t off, const void *devaddr, void *kaddr, size_t len)
2277 {
2278 	bcopy(devaddr, kaddr, len);
2279 	return (0);
2280 }
2281 
2282 /*
2283  * Perform a copy to a memory mapped device (whose devinfo pointer is devi)
2284  * separately mapped at devaddr in the kernel from a kernel buffer at kaddr.
2285  */
2286 /*ARGSUSED*/
2287 int
2288 e_ddi_copytodev(dev_info_t *devi,
2289     off_t off, const void *kaddr, void *devaddr, size_t len)
2290 {
2291 	bcopy(kaddr, devaddr, len);
2292 	return (0);
2293 }
2294 
2295 
2296 static int
2297 poke_mem(peekpoke_ctlops_t *in_args)
2298 {
2299 	int err = DDI_SUCCESS;
2300 	on_trap_data_t otd;
2301 
2302 	/* Set up protected environment. */
2303 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2304 		switch (in_args->size) {
2305 		case sizeof (uint8_t):
2306 			*(uint8_t *)(in_args->dev_addr) =
2307 			    *(uint8_t *)in_args->host_addr;
2308 			break;
2309 
2310 		case sizeof (uint16_t):
2311 			*(uint16_t *)(in_args->dev_addr) =
2312 			    *(uint16_t *)in_args->host_addr;
2313 			break;
2314 
2315 		case sizeof (uint32_t):
2316 			*(uint32_t *)(in_args->dev_addr) =
2317 			    *(uint32_t *)in_args->host_addr;
2318 			break;
2319 
2320 		case sizeof (uint64_t):
2321 			*(uint64_t *)(in_args->dev_addr) =
2322 			    *(uint64_t *)in_args->host_addr;
2323 			break;
2324 
2325 		default:
2326 			err = DDI_FAILURE;
2327 			break;
2328 		}
2329 	} else
2330 		err = DDI_FAILURE;
2331 
2332 	/* Take down protected environment. */
2333 	no_trap();
2334 
2335 	return (err);
2336 }
2337 
2338 
2339 static int
2340 peek_mem(peekpoke_ctlops_t *in_args)
2341 {
2342 	int err = DDI_SUCCESS;
2343 	on_trap_data_t otd;
2344 
2345 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2346 		switch (in_args->size) {
2347 		case sizeof (uint8_t):
2348 			*(uint8_t *)in_args->host_addr =
2349 			    *(uint8_t *)in_args->dev_addr;
2350 			break;
2351 
2352 		case sizeof (uint16_t):
2353 			*(uint16_t *)in_args->host_addr =
2354 			    *(uint16_t *)in_args->dev_addr;
2355 			break;
2356 
2357 		case sizeof (uint32_t):
2358 			*(uint32_t *)in_args->host_addr =
2359 			    *(uint32_t *)in_args->dev_addr;
2360 			break;
2361 
2362 		case sizeof (uint64_t):
2363 			*(uint64_t *)in_args->host_addr =
2364 			    *(uint64_t *)in_args->dev_addr;
2365 			break;
2366 
2367 		default:
2368 			err = DDI_FAILURE;
2369 			break;
2370 		}
2371 	} else
2372 		err = DDI_FAILURE;
2373 
2374 	no_trap();
2375 	return (err);
2376 }
2377 
2378 
2379 /*
2380  * This is called only to process peek/poke when the DIP is NULL.
2381  * Assume that this is for memory, as nexi take care of device safe accesses.
2382  */
2383 int
2384 peekpoke_mem(ddi_ctl_enum_t cmd, peekpoke_ctlops_t *in_args)
2385 {
2386 	return (cmd == DDI_CTLOPS_PEEK ? peek_mem(in_args) : poke_mem(in_args));
2387 }
2388 
2389 /*
2390  * we've just done a cautious put/get. Check if it was successful by
2391  * calling pci_ereport_post() on all puts and for any gets that return -1
2392  */
2393 static int
2394 pci_peekpoke_check_fma(dev_info_t *dip, void *arg, ddi_ctl_enum_t ctlop,
2395     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2396 {
2397 	int	rval = DDI_SUCCESS;
2398 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2399 	ddi_fm_error_t de;
2400 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2401 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2402 	int check_err = 0;
2403 	int repcount = in_args->repcount;
2404 
2405 	if (ctlop == DDI_CTLOPS_POKE &&
2406 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC)
2407 		return (DDI_SUCCESS);
2408 
2409 	if (ctlop == DDI_CTLOPS_PEEK &&
2410 	    hdlp->ah_acc.devacc_attr_access != DDI_CAUTIOUS_ACC) {
2411 		for (; repcount; repcount--) {
2412 			switch (in_args->size) {
2413 			case sizeof (uint8_t):
2414 				if (*(uint8_t *)in_args->host_addr == 0xff)
2415 					check_err = 1;
2416 				break;
2417 			case sizeof (uint16_t):
2418 				if (*(uint16_t *)in_args->host_addr == 0xffff)
2419 					check_err = 1;
2420 				break;
2421 			case sizeof (uint32_t):
2422 				if (*(uint32_t *)in_args->host_addr ==
2423 				    0xffffffff)
2424 					check_err = 1;
2425 				break;
2426 			case sizeof (uint64_t):
2427 				if (*(uint64_t *)in_args->host_addr ==
2428 				    0xffffffffffffffff)
2429 					check_err = 1;
2430 				break;
2431 			}
2432 		}
2433 		if (check_err == 0)
2434 			return (DDI_SUCCESS);
2435 	}
2436 	/*
2437 	 * for a cautious put or get or a non-cautious get that returned -1 call
2438 	 * io framework to see if there really was an error
2439 	 */
2440 	bzero(&de, sizeof (ddi_fm_error_t));
2441 	de.fme_version = DDI_FME_VERSION;
2442 	de.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
2443 	if (hdlp->ah_acc.devacc_attr_access == DDI_CAUTIOUS_ACC) {
2444 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2445 		de.fme_acc_handle = in_args->handle;
2446 	} else if (hdlp->ah_acc.devacc_attr_access == DDI_DEFAULT_ACC) {
2447 		/*
2448 		 * We only get here with DDI_DEFAULT_ACC for config space gets.
2449 		 * Non-hardened drivers may be probing the hardware and
2450 		 * expecting -1 returned. So need to treat errors on
2451 		 * DDI_DEFAULT_ACC as DDI_FM_ERR_EXPECTED.
2452 		 */
2453 		de.fme_flag = DDI_FM_ERR_EXPECTED;
2454 		de.fme_acc_handle = in_args->handle;
2455 	} else {
2456 		/*
2457 		 * Hardened driver doing protected accesses shouldn't
2458 		 * get errors unless there's a hardware problem. Treat
2459 		 * as nonfatal if there's an error, but set UNEXPECTED
2460 		 * so we raise ereports on any errors and potentially
2461 		 * fault the device
2462 		 */
2463 		de.fme_flag = DDI_FM_ERR_UNEXPECTED;
2464 	}
2465 	(void) scan(dip, &de);
2466 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2467 	    de.fme_status != DDI_FM_OK) {
2468 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2469 		rval = DDI_FAILURE;
2470 		errp->err_ena = de.fme_ena;
2471 		errp->err_expected = de.fme_flag;
2472 		errp->err_status = DDI_FM_NONFATAL;
2473 	}
2474 	return (rval);
2475 }
2476 
2477 /*
2478  * pci_peekpoke_check_nofma() is for when an error occurs on a register access
2479  * during pci_ereport_post(). We can't call pci_ereport_post() again or we'd
2480  * recurse, so assume all puts are OK and gets have failed if they return -1
2481  */
2482 static int
2483 pci_peekpoke_check_nofma(void *arg, ddi_ctl_enum_t ctlop)
2484 {
2485 	int rval = DDI_SUCCESS;
2486 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2487 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2488 	ddi_acc_hdl_t *hdlp = (ddi_acc_hdl_t *)in_args->handle;
2489 	int repcount = in_args->repcount;
2490 
2491 	if (ctlop == DDI_CTLOPS_POKE)
2492 		return (rval);
2493 
2494 	for (; repcount; repcount--) {
2495 		switch (in_args->size) {
2496 		case sizeof (uint8_t):
2497 			if (*(uint8_t *)in_args->host_addr == 0xff)
2498 				rval = DDI_FAILURE;
2499 			break;
2500 		case sizeof (uint16_t):
2501 			if (*(uint16_t *)in_args->host_addr == 0xffff)
2502 				rval = DDI_FAILURE;
2503 			break;
2504 		case sizeof (uint32_t):
2505 			if (*(uint32_t *)in_args->host_addr == 0xffffffff)
2506 				rval = DDI_FAILURE;
2507 			break;
2508 		case sizeof (uint64_t):
2509 			if (*(uint64_t *)in_args->host_addr ==
2510 			    0xffffffffffffffff)
2511 				rval = DDI_FAILURE;
2512 			break;
2513 		}
2514 	}
2515 	if (hdlp->ah_acc.devacc_attr_access != DDI_DEFAULT_ACC &&
2516 	    rval == DDI_FAILURE) {
2517 		ndi_err_t *errp = (ndi_err_t *)hp->ahi_err;
2518 		errp->err_ena = fm_ena_generate(0, FM_ENA_FMT1);
2519 		errp->err_expected = DDI_FM_ERR_UNEXPECTED;
2520 		errp->err_status = DDI_FM_NONFATAL;
2521 	}
2522 	return (rval);
2523 }
2524 
2525 int
2526 pci_peekpoke_check(dev_info_t *dip, dev_info_t *rdip,
2527     ddi_ctl_enum_t ctlop, void *arg, void *result,
2528     int (*handler)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
2529     void *), kmutex_t *err_mutexp, kmutex_t *peek_poke_mutexp,
2530     void (*scan)(dev_info_t *, ddi_fm_error_t *))
2531 {
2532 	int rval;
2533 	peekpoke_ctlops_t *in_args = (peekpoke_ctlops_t *)arg;
2534 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)in_args->handle;
2535 
2536 	/*
2537 	 * this function only supports cautious accesses, not peeks/pokes
2538 	 * which don't have a handle
2539 	 */
2540 	if (hp == NULL)
2541 		return (DDI_FAILURE);
2542 
2543 	if (hp->ahi_acc_attr & DDI_ACCATTR_CONFIG_SPACE) {
2544 		if (!mutex_tryenter(err_mutexp)) {
2545 			/*
2546 			 * As this may be a recursive call from within
2547 			 * pci_ereport_post() we can't wait for the mutexes.
2548 			 * Fortunately we know someone is already calling
2549 			 * pci_ereport_post() which will handle the error bits
2550 			 * for us, and as this is a config space access we can
2551 			 * just do the access and check return value for -1
2552 			 * using pci_peekpoke_check_nofma().
2553 			 */
2554 			rval = handler(dip, rdip, ctlop, arg, result);
2555 			if (rval == DDI_SUCCESS)
2556 				rval = pci_peekpoke_check_nofma(arg, ctlop);
2557 			return (rval);
2558 		}
2559 		/*
2560 		 * This can't be a recursive call. Drop the err_mutex and get
2561 		 * both mutexes in the right order. If an error hasn't already
2562 		 * been detected by the ontrap code, use pci_peekpoke_check_fma
2563 		 * which will call pci_ereport_post() to check error status.
2564 		 */
2565 		mutex_exit(err_mutexp);
2566 	}
2567 	mutex_enter(peek_poke_mutexp);
2568 	rval = handler(dip, rdip, ctlop, arg, result);
2569 	if (rval == DDI_SUCCESS) {
2570 		mutex_enter(err_mutexp);
2571 		rval = pci_peekpoke_check_fma(dip, arg, ctlop, scan);
2572 		mutex_exit(err_mutexp);
2573 	}
2574 	mutex_exit(peek_poke_mutexp);
2575 	return (rval);
2576 }
2577 
2578 void
2579 impl_setup_ddi(void)
2580 {
2581 #if !defined(__xpv)
2582 	extern void startup_bios_disk(void);
2583 	extern int post_fastreboot;
2584 #endif
2585 	dev_info_t *xdip, *isa_dip;
2586 	rd_existing_t rd_mem_prop;
2587 	int err;
2588 
2589 	ndi_devi_alloc_sleep(ddi_root_node(), "ramdisk",
2590 	    (pnode_t)DEVI_SID_NODEID, &xdip);
2591 
2592 	(void) BOP_GETPROP(bootops,
2593 	    "ramdisk_start", (void *)&ramdisk_start);
2594 	(void) BOP_GETPROP(bootops,
2595 	    "ramdisk_end", (void *)&ramdisk_end);
2596 
2597 #ifdef __xpv
2598 	ramdisk_start -= ONE_GIG;
2599 	ramdisk_end -= ONE_GIG;
2600 #endif
2601 	rd_mem_prop.phys = ramdisk_start;
2602 	rd_mem_prop.size = ramdisk_end - ramdisk_start + 1;
2603 
2604 	(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, xdip,
2605 	    RD_EXISTING_PROP_NAME, (uchar_t *)&rd_mem_prop,
2606 	    sizeof (rd_mem_prop));
2607 	err = ndi_devi_bind_driver(xdip, 0);
2608 	ASSERT(err == 0);
2609 
2610 	/* isa node */
2611 	if (pseudo_isa) {
2612 		ndi_devi_alloc_sleep(ddi_root_node(), "isa",
2613 		    (pnode_t)DEVI_SID_NODEID, &isa_dip);
2614 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2615 		    "device_type", "isa");
2616 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, isa_dip,
2617 		    "bus-type", "isa");
2618 		(void) ndi_devi_bind_driver(isa_dip, 0);
2619 	}
2620 
2621 	/*
2622 	 * Read in the properties from the boot.
2623 	 */
2624 	get_boot_properties();
2625 
2626 	/* not framebuffer should be enumerated, if present */
2627 	get_vga_properties();
2628 
2629 	/* Copy console font if provided by boot. */
2630 	get_console_font();
2631 
2632 	/*
2633 	 * Check for administratively disabled drivers.
2634 	 */
2635 	check_driver_disable();
2636 
2637 #if !defined(__xpv)
2638 	if (!post_fastreboot && BOP_GETPROPLEN(bootops, "efi-systab") < 0)
2639 		startup_bios_disk();
2640 #endif
2641 	/* do bus dependent probes. */
2642 	impl_bus_initialprobe();
2643 }
2644 
2645 dev_t
2646 getrootdev(void)
2647 {
2648 	/*
2649 	 * Usually rootfs.bo_name is initialized by the
2650 	 * the bootpath property from bootenv.rc, but
2651 	 * defaults to "/ramdisk:a" otherwise.
2652 	 */
2653 	return (ddi_pathname_to_dev_t(rootfs.bo_name));
2654 }
2655 
2656 static struct bus_probe {
2657 	struct bus_probe *next;
2658 	void (*probe)(int);
2659 } *bus_probes;
2660 
2661 void
2662 impl_bus_add_probe(void (*func)(int))
2663 {
2664 	struct bus_probe *probe;
2665 	struct bus_probe *lastprobe = NULL;
2666 
2667 	probe = kmem_alloc(sizeof (*probe), KM_SLEEP);
2668 	probe->probe = func;
2669 	probe->next = NULL;
2670 
2671 	if (!bus_probes) {
2672 		bus_probes = probe;
2673 		return;
2674 	}
2675 
2676 	lastprobe = bus_probes;
2677 	while (lastprobe->next)
2678 		lastprobe = lastprobe->next;
2679 	lastprobe->next = probe;
2680 }
2681 
2682 /*ARGSUSED*/
2683 void
2684 impl_bus_delete_probe(void (*func)(int))
2685 {
2686 	struct bus_probe *prev = NULL;
2687 	struct bus_probe *probe = bus_probes;
2688 
2689 	while (probe) {
2690 		if (probe->probe == func)
2691 			break;
2692 		prev = probe;
2693 		probe = probe->next;
2694 	}
2695 
2696 	if (probe == NULL)
2697 		return;
2698 
2699 	if (prev)
2700 		prev->next = probe->next;
2701 	else
2702 		bus_probes = probe->next;
2703 
2704 	kmem_free(probe, sizeof (struct bus_probe));
2705 }
2706 
2707 /*
2708  * impl_bus_initialprobe
2709  *	Modload the prom simulator, then let it probe to verify existence
2710  *	and type of PCI support.
2711  */
2712 static void
2713 impl_bus_initialprobe(void)
2714 {
2715 	struct bus_probe *probe;
2716 
2717 	/* load modules to install bus probes */
2718 #if defined(__xpv)
2719 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2720 		if (modload("misc", "pci_autoconfig") < 0) {
2721 			panic("failed to load misc/pci_autoconfig");
2722 		}
2723 
2724 		if (modload("drv", "isa") < 0)
2725 			panic("failed to load drv/isa");
2726 	}
2727 
2728 	(void) modload("misc", "xpv_autoconfig");
2729 #else
2730 	if (modload("misc", "pci_autoconfig") < 0) {
2731 		panic("failed to load misc/pci_autoconfig");
2732 	}
2733 
2734 	(void) modload("misc", "acpidev");
2735 
2736 	if (modload("drv", "isa") < 0)
2737 		panic("failed to load drv/isa");
2738 #endif
2739 
2740 	probe = bus_probes;
2741 	while (probe) {
2742 		/* run the probe functions */
2743 		(*probe->probe)(0);
2744 		probe = probe->next;
2745 	}
2746 }
2747 
2748 /*
2749  * impl_bus_reprobe
2750  *	Reprogram devices not set up by firmware.
2751  */
2752 static void
2753 impl_bus_reprobe(void)
2754 {
2755 	struct bus_probe *probe;
2756 
2757 	probe = bus_probes;
2758 	while (probe) {
2759 		/* run the probe function */
2760 		(*probe->probe)(1);
2761 		probe = probe->next;
2762 	}
2763 }
2764 
2765 
2766 /*
2767  * The following functions ready a cautious request to go up to the nexus
2768  * driver.  It is up to the nexus driver to decide how to process the request.
2769  * It may choose to call i_ddi_do_caut_get/put in this file, or do it
2770  * differently.
2771  */
2772 
2773 static void
2774 i_ddi_caut_getput_ctlops(ddi_acc_impl_t *hp, uint64_t host_addr,
2775     uint64_t dev_addr, size_t size, size_t repcount, uint_t flags,
2776     ddi_ctl_enum_t cmd)
2777 {
2778 	peekpoke_ctlops_t	cautacc_ctlops_arg;
2779 
2780 	cautacc_ctlops_arg.size = size;
2781 	cautacc_ctlops_arg.dev_addr = dev_addr;
2782 	cautacc_ctlops_arg.host_addr = host_addr;
2783 	cautacc_ctlops_arg.handle = (ddi_acc_handle_t)hp;
2784 	cautacc_ctlops_arg.repcount = repcount;
2785 	cautacc_ctlops_arg.flags = flags;
2786 
2787 	(void) ddi_ctlops(hp->ahi_common.ah_dip, hp->ahi_common.ah_dip, cmd,
2788 	    &cautacc_ctlops_arg, NULL);
2789 }
2790 
2791 uint8_t
2792 i_ddi_caut_get8(ddi_acc_impl_t *hp, uint8_t *addr)
2793 {
2794 	uint8_t value;
2795 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2796 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_PEEK);
2797 
2798 	return (value);
2799 }
2800 
2801 uint16_t
2802 i_ddi_caut_get16(ddi_acc_impl_t *hp, uint16_t *addr)
2803 {
2804 	uint16_t value;
2805 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2806 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_PEEK);
2807 
2808 	return (value);
2809 }
2810 
2811 uint32_t
2812 i_ddi_caut_get32(ddi_acc_impl_t *hp, uint32_t *addr)
2813 {
2814 	uint32_t value;
2815 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2816 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_PEEK);
2817 
2818 	return (value);
2819 }
2820 
2821 uint64_t
2822 i_ddi_caut_get64(ddi_acc_impl_t *hp, uint64_t *addr)
2823 {
2824 	uint64_t value;
2825 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2826 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_PEEK);
2827 
2828 	return (value);
2829 }
2830 
2831 void
2832 i_ddi_caut_put8(ddi_acc_impl_t *hp, uint8_t *addr, uint8_t value)
2833 {
2834 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2835 	    sizeof (uint8_t), 1, 0, DDI_CTLOPS_POKE);
2836 }
2837 
2838 void
2839 i_ddi_caut_put16(ddi_acc_impl_t *hp, uint16_t *addr, uint16_t value)
2840 {
2841 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2842 	    sizeof (uint16_t), 1, 0, DDI_CTLOPS_POKE);
2843 }
2844 
2845 void
2846 i_ddi_caut_put32(ddi_acc_impl_t *hp, uint32_t *addr, uint32_t value)
2847 {
2848 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2849 	    sizeof (uint32_t), 1, 0, DDI_CTLOPS_POKE);
2850 }
2851 
2852 void
2853 i_ddi_caut_put64(ddi_acc_impl_t *hp, uint64_t *addr, uint64_t value)
2854 {
2855 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)&value, (uintptr_t)addr,
2856 	    sizeof (uint64_t), 1, 0, DDI_CTLOPS_POKE);
2857 }
2858 
2859 void
2860 i_ddi_caut_rep_get8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2861     size_t repcount, uint_t flags)
2862 {
2863 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2864 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_PEEK);
2865 }
2866 
2867 void
2868 i_ddi_caut_rep_get16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2869     uint16_t *dev_addr, size_t repcount, uint_t flags)
2870 {
2871 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2872 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_PEEK);
2873 }
2874 
2875 void
2876 i_ddi_caut_rep_get32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2877     uint32_t *dev_addr, size_t repcount, uint_t flags)
2878 {
2879 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2880 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_PEEK);
2881 }
2882 
2883 void
2884 i_ddi_caut_rep_get64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2885     uint64_t *dev_addr, size_t repcount, uint_t flags)
2886 {
2887 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2888 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_PEEK);
2889 }
2890 
2891 void
2892 i_ddi_caut_rep_put8(ddi_acc_impl_t *hp, uint8_t *host_addr, uint8_t *dev_addr,
2893     size_t repcount, uint_t flags)
2894 {
2895 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2896 	    sizeof (uint8_t), repcount, flags, DDI_CTLOPS_POKE);
2897 }
2898 
2899 void
2900 i_ddi_caut_rep_put16(ddi_acc_impl_t *hp, uint16_t *host_addr,
2901     uint16_t *dev_addr, size_t repcount, uint_t flags)
2902 {
2903 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2904 	    sizeof (uint16_t), repcount, flags, DDI_CTLOPS_POKE);
2905 }
2906 
2907 void
2908 i_ddi_caut_rep_put32(ddi_acc_impl_t *hp, uint32_t *host_addr,
2909     uint32_t *dev_addr, size_t repcount, uint_t flags)
2910 {
2911 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2912 	    sizeof (uint32_t), repcount, flags, DDI_CTLOPS_POKE);
2913 }
2914 
2915 void
2916 i_ddi_caut_rep_put64(ddi_acc_impl_t *hp, uint64_t *host_addr,
2917     uint64_t *dev_addr, size_t repcount, uint_t flags)
2918 {
2919 	i_ddi_caut_getput_ctlops(hp, (uintptr_t)host_addr, (uintptr_t)dev_addr,
2920 	    sizeof (uint64_t), repcount, flags, DDI_CTLOPS_POKE);
2921 }
2922 
2923 boolean_t
2924 i_ddi_copybuf_required(ddi_dma_attr_t *attrp)
2925 {
2926 	uint64_t hi_pa;
2927 
2928 	hi_pa = ((uint64_t)physmax + 1ull) << PAGESHIFT;
2929 	if (attrp->dma_attr_addr_hi < hi_pa) {
2930 		return (B_TRUE);
2931 	}
2932 
2933 	return (B_FALSE);
2934 }
2935 
2936 size_t
2937 i_ddi_copybuf_size()
2938 {
2939 	return (dma_max_copybuf_size);
2940 }
2941 
2942 /*
2943  * i_ddi_dma_max()
2944  *    returns the maximum DMA size which can be performed in a single DMA
2945  *    window taking into account the devices DMA contraints (attrp), the
2946  *    maximum copy buffer size (if applicable), and the worse case buffer
2947  *    fragmentation.
2948  */
2949 /*ARGSUSED*/
2950 uint32_t
2951 i_ddi_dma_max(dev_info_t *dip, ddi_dma_attr_t *attrp)
2952 {
2953 	uint64_t maxxfer;
2954 
2955 
2956 	/*
2957 	 * take the min of maxxfer and the the worse case fragementation
2958 	 * (e.g. every cookie <= 1 page)
2959 	 */
2960 	maxxfer = MIN(attrp->dma_attr_maxxfer,
2961 	    ((uint64_t)(attrp->dma_attr_sgllen - 1) << PAGESHIFT));
2962 
2963 	/*
2964 	 * If the DMA engine can't reach all off memory, we also need to take
2965 	 * the max size of the copybuf into consideration.
2966 	 */
2967 	if (i_ddi_copybuf_required(attrp)) {
2968 		maxxfer = MIN(i_ddi_copybuf_size(), maxxfer);
2969 	}
2970 
2971 	/*
2972 	 * we only return a 32-bit value. Make sure it's not -1. Round to a
2973 	 * page so it won't be mistaken for an error value during debug.
2974 	 */
2975 	if (maxxfer >= 0xFFFFFFFF) {
2976 		maxxfer = 0xFFFFF000;
2977 	}
2978 
2979 	/*
2980 	 * make sure the value we return is a whole multiple of the
2981 	 * granlarity.
2982 	 */
2983 	if (attrp->dma_attr_granular > 1) {
2984 		maxxfer = maxxfer - (maxxfer % attrp->dma_attr_granular);
2985 	}
2986 
2987 	return ((uint32_t)maxxfer);
2988 }
2989 
2990 /*ARGSUSED*/
2991 void
2992 translate_devid(dev_info_t *dip)
2993 {
2994 }
2995 
2996 pfn_t
2997 i_ddi_paddr_to_pfn(paddr_t paddr)
2998 {
2999 	pfn_t pfn;
3000 
3001 #ifdef __xpv
3002 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
3003 		pfn = xen_assign_pfn(mmu_btop(paddr));
3004 	} else {
3005 		pfn = mmu_btop(paddr);
3006 	}
3007 #else
3008 	pfn = mmu_btop(paddr);
3009 #endif
3010 
3011 	return (pfn);
3012 }
3013