xref: /illumos-gate/usr/src/uts/i86pc/io/pci/pci_tools.c (revision 7ff178cd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/mkdev.h>
27 #include <sys/stat.h>
28 #include <sys/sunddi.h>
29 #include <vm/seg_kmem.h>
30 #include <sys/machparam.h>
31 #include <sys/sunndi.h>
32 #include <sys/ontrap.h>
33 #include <sys/psm.h>
34 #include <sys/pcie.h>
35 #include <sys/pci_cfgspace.h>
36 #include <sys/pci_tools.h>
37 #include <io/pci/pci_tools_ext.h>
38 #include <sys/apic.h>
39 #include <sys/apix.h>
40 #include <io/pci/pci_var.h>
41 #include <sys/pci_impl.h>
42 #include <sys/promif.h>
43 #include <sys/x86_archext.h>
44 #include <sys/cpuvar.h>
45 #include <sys/pci_cfgacc.h>
46 
47 #ifdef __xpv
48 #include <sys/hypervisor.h>
49 #endif
50 
51 #define	PCIEX_BDF_OFFSET_DELTA	4
52 #define	PCIEX_REG_FUNC_SHIFT	(PCI_REG_FUNC_SHIFT + PCIEX_BDF_OFFSET_DELTA)
53 #define	PCIEX_REG_DEV_SHIFT	(PCI_REG_DEV_SHIFT + PCIEX_BDF_OFFSET_DELTA)
54 #define	PCIEX_REG_BUS_SHIFT	(PCI_REG_BUS_SHIFT + PCIEX_BDF_OFFSET_DELTA)
55 
56 #define	SUCCESS	0
57 
58 extern uint64_t mcfg_mem_base;
59 int pcitool_debug = 0;
60 
61 /*
62  * Offsets of BARS in config space.  First entry of 0 means config space.
63  * Entries here correlate to pcitool_bars_t enumerated type.
64  */
65 static uint8_t pci_bars[] = {
66 	0x0,
67 	PCI_CONF_BASE0,
68 	PCI_CONF_BASE1,
69 	PCI_CONF_BASE2,
70 	PCI_CONF_BASE3,
71 	PCI_CONF_BASE4,
72 	PCI_CONF_BASE5,
73 	PCI_CONF_ROM
74 };
75 
76 /* Max offset allowed into config space for a particular device. */
77 static uint64_t max_cfg_size = PCI_CONF_HDR_SIZE;
78 
79 static uint64_t pcitool_swap_endian(uint64_t data, int size);
80 static int pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
81     boolean_t io_access);
82 static int pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag);
83 static int pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr,
84     boolean_t write_flag);
85 static uint64_t pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages);
86 static void pcitool_unmap(uint64_t virt_addr, size_t num_pages);
87 
88 /* Extern declarations */
89 extern int	(*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
90 		    psm_intr_op_t, int *);
91 
92 int
93 pcitool_init(dev_info_t *dip, boolean_t is_pciex)
94 {
95 	int instance = ddi_get_instance(dip);
96 
97 	/* Create pcitool nodes for register access and interrupt routing. */
98 
99 	if (ddi_create_minor_node(dip, PCI_MINOR_REG, S_IFCHR,
100 	    PCI_MINOR_NUM(instance, PCI_TOOL_REG_MINOR_NUM),
101 	    DDI_NT_REGACC, 0) != DDI_SUCCESS) {
102 		return (DDI_FAILURE);
103 	}
104 
105 	if (ddi_create_minor_node(dip, PCI_MINOR_INTR, S_IFCHR,
106 	    PCI_MINOR_NUM(instance, PCI_TOOL_INTR_MINOR_NUM),
107 	    DDI_NT_INTRCTL, 0) != DDI_SUCCESS) {
108 		ddi_remove_minor_node(dip, PCI_MINOR_REG);
109 		return (DDI_FAILURE);
110 	}
111 
112 	if (is_pciex)
113 		max_cfg_size = PCIE_CONF_HDR_SIZE;
114 
115 	return (DDI_SUCCESS);
116 }
117 
118 void
119 pcitool_uninit(dev_info_t *dip)
120 {
121 	ddi_remove_minor_node(dip, PCI_MINOR_INTR);
122 	ddi_remove_minor_node(dip, PCI_MINOR_REG);
123 }
124 
125 /*ARGSUSED*/
126 static int
127 pcitool_set_intr(dev_info_t *dip, void *arg, int mode)
128 {
129 	ddi_intr_handle_impl_t info_hdl;
130 	pcitool_intr_set_t iset;
131 	uint32_t old_cpu;
132 	int ret, result;
133 	size_t copyinout_size;
134 	int rval = SUCCESS;
135 	apic_get_type_t type_info;
136 
137 	/* Version 1 of pcitool_intr_set_t doesn't have flags. */
138 	copyinout_size = (size_t)&iset.flags - (size_t)&iset;
139 
140 	if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
141 		return (EFAULT);
142 
143 	switch (iset.user_version) {
144 	case PCITOOL_V1:
145 		break;
146 
147 	case PCITOOL_V2:
148 		copyinout_size = sizeof (pcitool_intr_set_t);
149 		if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
150 			return (EFAULT);
151 		break;
152 
153 	default:
154 		iset.status = PCITOOL_OUT_OF_RANGE;
155 		rval = ENOTSUP;
156 		goto done_set_intr;
157 	}
158 
159 	if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) {
160 		rval = ENOTSUP;
161 		iset.status = PCITOOL_IO_ERROR;
162 		goto done_set_intr;
163 	}
164 
165 	info_hdl.ih_private = &type_info;
166 
167 	if ((*psm_intr_ops)(NULL, &info_hdl,
168 	    PSM_INTR_OP_APIC_TYPE, NULL) != PSM_SUCCESS) {
169 		rval = ENOTSUP;
170 		iset.status = PCITOOL_IO_ERROR;
171 		goto done_set_intr;
172 	}
173 
174 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
175 		if (iset.old_cpu > type_info.avgi_num_cpu) {
176 			rval = EINVAL;
177 			iset.status = PCITOOL_INVALID_CPUID;
178 			goto done_set_intr;
179 		}
180 		old_cpu = iset.old_cpu;
181 	} else {
182 		if ((old_cpu =
183 		    pci_get_cpu_from_vecirq(iset.ino, IS_VEC)) == -1) {
184 			iset.status = PCITOOL_IO_ERROR;
185 			rval = EINVAL;
186 			goto done_set_intr;
187 		}
188 	}
189 
190 	if (iset.ino > type_info.avgi_num_intr) {
191 		rval = EINVAL;
192 		iset.status = PCITOOL_INVALID_INO;
193 		goto done_set_intr;
194 	}
195 
196 	iset.status = PCITOOL_SUCCESS;
197 
198 	old_cpu &= ~PSMGI_CPU_USER_BOUND;
199 
200 	/*
201 	 * For this locally-declared and used handle, ih_private will contain a
202 	 * CPU value, not an ihdl_plat_t as used for global interrupt handling.
203 	 */
204 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
205 		info_hdl.ih_vector = APIX_VIRTVECTOR(old_cpu, iset.ino);
206 	} else {
207 		info_hdl.ih_vector = iset.ino;
208 	}
209 	info_hdl.ih_private = (void *)(uintptr_t)iset.cpu_id;
210 	info_hdl.ih_flags = PSMGI_INTRBY_VEC;
211 	if (pcitool_debug)
212 		prom_printf("user version:%d, flags:0x%x\n",
213 		    iset.user_version, iset.flags);
214 
215 	result = ENOTSUP;
216 	if ((iset.user_version >= PCITOOL_V2) &&
217 	    (iset.flags & PCITOOL_INTR_FLAG_SET_GROUP)) {
218 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_GRP_SET_CPU,
219 		    &result);
220 	} else {
221 		ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_SET_CPU,
222 		    &result);
223 	}
224 
225 	if (ret != PSM_SUCCESS) {
226 		switch (result) {
227 		case EIO:		/* Error making the change */
228 			rval = EIO;
229 			iset.status = PCITOOL_IO_ERROR;
230 			break;
231 		case ENXIO:		/* Couldn't convert vector to irq */
232 			rval = EINVAL;
233 			iset.status = PCITOOL_INVALID_INO;
234 			break;
235 		case EINVAL:		/* CPU out of range */
236 			rval = EINVAL;
237 			iset.status = PCITOOL_INVALID_CPUID;
238 			break;
239 		case ENOTSUP:		/* Requested PSM intr ops missing */
240 			rval = ENOTSUP;
241 			iset.status = PCITOOL_IO_ERROR;
242 			break;
243 		}
244 	}
245 
246 	/* Return original CPU. */
247 	iset.cpu_id = old_cpu;
248 
249 	/* Return new vector */
250 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
251 		iset.ino = APIX_VIRTVEC_VECTOR(info_hdl.ih_vector);
252 	}
253 
254 done_set_intr:
255 	iset.drvr_version = PCITOOL_VERSION;
256 	if (ddi_copyout(&iset, arg, copyinout_size, mode) != DDI_SUCCESS)
257 		rval = EFAULT;
258 	return (rval);
259 }
260 
261 
262 /* It is assumed that dip != NULL */
263 static void
264 pcitool_get_intr_dev_info(dev_info_t *dip, pcitool_intr_dev_t *devs)
265 {
266 	(void) strncpy(devs->driver_name,
267 	    ddi_driver_name(dip), MAXMODCONFNAME-2);
268 	devs->driver_name[MAXMODCONFNAME-1] = '\0';
269 	(void) ddi_pathname(dip, devs->path);
270 	devs->dev_inst = ddi_get_instance(dip);
271 }
272 
273 static int
274 pcitool_get_intr(dev_info_t *dip, void *arg, int mode)
275 {
276 	/* Array part isn't used here, but oh well... */
277 	pcitool_intr_get_t partial_iget;
278 	pcitool_intr_get_t *iget = &partial_iget;
279 	size_t	iget_kmem_alloc_size = 0;
280 	uint8_t num_devs_ret;
281 	int copyout_rval;
282 	int rval = SUCCESS;
283 	int circ;
284 	int i;
285 
286 	ddi_intr_handle_impl_t info_hdl;
287 	apic_get_intr_t intr_info;
288 	apic_get_type_t type_info;
289 
290 	/* Read in just the header part, no array section. */
291 	if (ddi_copyin(arg, &partial_iget, PCITOOL_IGET_SIZE(0), mode) !=
292 	    DDI_SUCCESS)
293 		return (EFAULT);
294 
295 	if (partial_iget.flags & PCITOOL_INTR_FLAG_GET_MSI) {
296 		partial_iget.status = PCITOOL_IO_ERROR;
297 		partial_iget.num_devs_ret = 0;
298 		rval = ENOTSUP;
299 		goto done_get_intr;
300 	}
301 
302 	info_hdl.ih_private = &type_info;
303 
304 	if ((*psm_intr_ops)(NULL, &info_hdl,
305 	    PSM_INTR_OP_APIC_TYPE, NULL) != PSM_SUCCESS) {
306 		iget->status = PCITOOL_IO_ERROR;
307 		iget->num_devs_ret = 0;
308 		rval = EINVAL;
309 		goto done_get_intr;
310 	}
311 
312 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
313 		if (partial_iget.cpu_id > type_info.avgi_num_cpu) {
314 			partial_iget.status = PCITOOL_INVALID_CPUID;
315 			partial_iget.num_devs_ret = 0;
316 			rval = EINVAL;
317 			goto done_get_intr;
318 		}
319 	}
320 
321 	/* Validate argument. */
322 	if ((partial_iget.ino & APIX_VIRTVEC_VECMASK) >
323 	    type_info.avgi_num_intr) {
324 		partial_iget.status = PCITOOL_INVALID_INO;
325 		partial_iget.num_devs_ret = 0;
326 		rval = EINVAL;
327 		goto done_get_intr;
328 	}
329 
330 	num_devs_ret = partial_iget.num_devs_ret;
331 	intr_info.avgi_dip_list = NULL;
332 	intr_info.avgi_req_flags =
333 	    PSMGI_REQ_CPUID | PSMGI_REQ_NUM_DEVS | PSMGI_INTRBY_VEC;
334 	/*
335 	 * For this locally-declared and used handle, ih_private will contain a
336 	 * pointer to apic_get_intr_t, not an ihdl_plat_t as used for
337 	 * global interrupt handling.
338 	 */
339 	info_hdl.ih_private = &intr_info;
340 
341 	if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
342 		info_hdl.ih_vector =
343 		    APIX_VIRTVECTOR(partial_iget.cpu_id, partial_iget.ino);
344 	} else {
345 		info_hdl.ih_vector = partial_iget.ino;
346 	}
347 
348 	/* Caller wants device information returned. */
349 	if (num_devs_ret > 0) {
350 
351 		intr_info.avgi_req_flags |= PSMGI_REQ_GET_DEVS;
352 
353 		/*
354 		 * Allocate room.
355 		 * If num_devs_ret == 0 iget remains pointing to partial_iget.
356 		 */
357 		iget_kmem_alloc_size = PCITOOL_IGET_SIZE(num_devs_ret);
358 		iget = kmem_alloc(iget_kmem_alloc_size, KM_SLEEP);
359 
360 		/* Read in whole structure to verify there's room. */
361 		if (ddi_copyin(arg, iget, iget_kmem_alloc_size, mode) !=
362 		    SUCCESS) {
363 
364 			/* Be consistent and just return EFAULT here. */
365 			kmem_free(iget, iget_kmem_alloc_size);
366 
367 			return (EFAULT);
368 		}
369 	}
370 
371 	bzero(iget, PCITOOL_IGET_SIZE(num_devs_ret));
372 	iget->ino = info_hdl.ih_vector;
373 
374 	/*
375 	 * Lock device tree branch from the pci root nexus on down if info will
376 	 * be extracted from dips returned from the tree.
377 	 */
378 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
379 		ndi_devi_enter(dip, &circ);
380 	}
381 
382 	/* Call psm_intr_ops(PSM_INTR_OP_GET_INTR) to get information. */
383 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
384 	    PSM_INTR_OP_GET_INTR, NULL)) != PSM_SUCCESS) {
385 		iget->status = PCITOOL_IO_ERROR;
386 		iget->num_devs_ret = 0;
387 		rval = EINVAL;
388 		goto done_get_intr;
389 	}
390 
391 	/*
392 	 * Fill in the pcitool_intr_get_t to be returned,
393 	 * with the CPU, num_devs_ret and num_devs.
394 	 */
395 	if (intr_info.avgi_cpu_id == IRQ_UNBOUND ||
396 	    intr_info.avgi_cpu_id == IRQ_UNINIT)
397 		iget->cpu_id = 0;
398 	else
399 		iget->cpu_id = intr_info.avgi_cpu_id & ~PSMGI_CPU_USER_BOUND;
400 
401 	/* Number of devices returned by apic. */
402 	iget->num_devs = intr_info.avgi_num_devs;
403 
404 	/* Device info was returned. */
405 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
406 
407 		/*
408 		 * num devs returned is num devs ret by apic,
409 		 * space permitting.
410 		 */
411 		iget->num_devs_ret = min(num_devs_ret, intr_info.avgi_num_devs);
412 
413 		/*
414 		 * Loop thru list of dips and extract driver, name and instance.
415 		 * Fill in the pcitool_intr_dev_t's with this info.
416 		 */
417 		for (i = 0; i < iget->num_devs_ret; i++)
418 			pcitool_get_intr_dev_info(intr_info.avgi_dip_list[i],
419 			    &iget->dev[i]);
420 
421 		/* Free kmem_alloc'ed memory of the apic_get_intr_t */
422 		kmem_free(intr_info.avgi_dip_list,
423 		    intr_info.avgi_num_devs * sizeof (dev_info_t *));
424 	}
425 
426 done_get_intr:
427 
428 	if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
429 		ndi_devi_exit(dip, circ);
430 	}
431 
432 	iget->drvr_version = PCITOOL_VERSION;
433 	copyout_rval = ddi_copyout(iget, arg,
434 	    PCITOOL_IGET_SIZE(num_devs_ret), mode);
435 
436 	if (iget_kmem_alloc_size > 0)
437 		kmem_free(iget, iget_kmem_alloc_size);
438 
439 	if (copyout_rval != DDI_SUCCESS)
440 		rval = EFAULT;
441 
442 	return (rval);
443 }
444 
445 /*ARGSUSED*/
446 static int
447 pcitool_intr_info(dev_info_t *dip, void *arg, int mode)
448 {
449 	pcitool_intr_info_t intr_info;
450 	ddi_intr_handle_impl_t info_hdl;
451 	int rval = SUCCESS;
452 	apic_get_type_t type_info;
453 
454 	/* If we need user_version, and to ret same user version as passed in */
455 	if (ddi_copyin(arg, &intr_info, sizeof (pcitool_intr_info_t), mode) !=
456 	    DDI_SUCCESS) {
457 		if (pcitool_debug)
458 			prom_printf("Error reading arguments\n");
459 		return (EFAULT);
460 	}
461 
462 	if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI)
463 		return (ENOTSUP);
464 
465 	info_hdl.ih_private = &type_info;
466 
467 	/* For UPPC systems, psm_intr_ops has no entry for APIC_TYPE. */
468 	if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
469 	    PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) {
470 		intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UPPC;
471 		intr_info.ctlr_version = 0;
472 		intr_info.num_intr = APIC_MAX_VECTOR;
473 	} else {
474 		intr_info.ctlr_version = (uint32_t)info_hdl.ih_ver;
475 		intr_info.num_cpu = type_info.avgi_num_cpu;
476 		if (strcmp(type_info.avgi_type,
477 		    APIC_PCPLUSMP_NAME) == 0) {
478 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_PCPLUSMP;
479 			intr_info.num_intr = type_info.avgi_num_intr;
480 		} else if (strcmp(type_info.avgi_type,
481 		    APIC_APIX_NAME) == 0) {
482 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_APIX;
483 			intr_info.num_intr = type_info.avgi_num_intr;
484 		} else {
485 			intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UNKNOWN;
486 			intr_info.num_intr = APIC_MAX_VECTOR;
487 		}
488 	}
489 
490 	intr_info.drvr_version = PCITOOL_VERSION;
491 	if (ddi_copyout(&intr_info, arg, sizeof (pcitool_intr_info_t), mode) !=
492 	    DDI_SUCCESS) {
493 		if (pcitool_debug)
494 			prom_printf("Error returning arguments.\n");
495 		rval = EFAULT;
496 	}
497 
498 	return (rval);
499 }
500 
501 
502 
503 /*
504  * Main function for handling interrupt CPU binding requests and queries.
505  * Need to implement later
506  */
507 int
508 pcitool_intr_admn(dev_info_t *dip, void *arg, int cmd, int mode)
509 {
510 	int rval;
511 
512 	switch (cmd) {
513 
514 	/* Associate a new CPU with a given vector */
515 	case PCITOOL_DEVICE_SET_INTR:
516 		rval = pcitool_set_intr(dip, arg, mode);
517 		break;
518 
519 	case PCITOOL_DEVICE_GET_INTR:
520 		rval = pcitool_get_intr(dip, arg, mode);
521 		break;
522 
523 	case PCITOOL_SYSTEM_INTR_INFO:
524 		rval = pcitool_intr_info(dip, arg, mode);
525 		break;
526 
527 	default:
528 		rval = ENOTSUP;
529 	}
530 
531 	return (rval);
532 }
533 
534 /*
535  * Perform register accesses on the nexus device itself.
536  * No explicit PCI nexus device for X86, so not applicable.
537  */
538 
539 /*ARGSUSED*/
540 int
541 pcitool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
542 {
543 	return (ENOTSUP);
544 }
545 
546 /* Swap endianness. */
547 static uint64_t
548 pcitool_swap_endian(uint64_t data, int size)
549 {
550 	typedef union {
551 		uint64_t data64;
552 		uint8_t data8[8];
553 	} data_split_t;
554 
555 	data_split_t orig_data;
556 	data_split_t returned_data;
557 	int i;
558 
559 	orig_data.data64 = data;
560 	returned_data.data64 = 0;
561 
562 	for (i = 0; i < size; i++) {
563 		returned_data.data8[i] = orig_data.data8[size - 1 - i];
564 	}
565 
566 	return (returned_data.data64);
567 }
568 
569 /*
570  * A note about ontrap handling:
571  *
572  * X86 systems on which this module was tested return FFs instead of bus errors
573  * when accessing devices with invalid addresses.  Ontrap handling, which
574  * gracefully handles kernel bus errors, is installed anyway for I/O and mem
575  * space accessing (not for pci config space), in case future X86 platforms
576  * require it.
577  */
578 
579 /* Access device.  prg is modified. */
580 static int
581 pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
582     boolean_t io_access)
583 {
584 	int size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
585 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
586 	int rval = SUCCESS;
587 	uint64_t local_data;
588 	pci_cfgacc_req_t req;
589 	uint32_t max_offset;
590 
591 	if ((size <= 0) || (size > 8) || ((size & (size - 1)) != 0)) {
592 		prg->status = PCITOOL_INVALID_SIZE;
593 		return (ENOTSUP);
594 	}
595 
596 	/*
597 	 * NOTE: there is no way to verify whether or not the address is
598 	 * valid other than that it is within the maximum offset.  The
599 	 * put functions return void and the get functions return -1 on error.
600 	 */
601 
602 	if (io_access)
603 		max_offset = 0xFF;
604 	else
605 		max_offset = 0xFFF;
606 	if (prg->offset + size - 1 > max_offset) {
607 		prg->status = PCITOOL_INVALID_ADDRESS;
608 		return (ENOTSUP);
609 	}
610 
611 	prg->status = PCITOOL_SUCCESS;
612 
613 	req.rcdip = NULL;
614 	req.bdf = PCI_GETBDF(prg->bus_no, prg->dev_no, prg->func_no);
615 	req.offset = prg->offset;
616 	req.size = size;
617 	req.write = write_flag;
618 	req.ioacc = io_access;
619 	if (write_flag) {
620 		if (big_endian) {
621 			local_data = pcitool_swap_endian(prg->data, size);
622 		} else {
623 			local_data = prg->data;
624 		}
625 		VAL64(&req) = local_data;
626 		pci_cfgacc_acc(&req);
627 	} else {
628 		pci_cfgacc_acc(&req);
629 		switch (size) {
630 		case 1:
631 			local_data = VAL8(&req);
632 			break;
633 		case 2:
634 			local_data = VAL16(&req);
635 			break;
636 		case 4:
637 			local_data = VAL32(&req);
638 			break;
639 		case 8:
640 			local_data = VAL64(&req);
641 			break;
642 		}
643 		if (big_endian) {
644 			prg->data =
645 			    pcitool_swap_endian(local_data, size);
646 		} else {
647 			prg->data = local_data;
648 		}
649 	}
650 	/*
651 	 * Check if legacy IO config access is used, in which case
652 	 * only first 256 bytes are valid.
653 	 */
654 	if (req.ioacc && (prg->offset + size - 1 > 0xFF)) {
655 		prg->status = PCITOOL_INVALID_ADDRESS;
656 		return (ENOTSUP);
657 	}
658 
659 	/* Set phys_addr only if MMIO is used */
660 	prg->phys_addr = 0;
661 	if (!req.ioacc && mcfg_mem_base != 0) {
662 		prg->phys_addr = mcfg_mem_base + prg->offset +
663 		    ((prg->bus_no << PCIEX_REG_BUS_SHIFT) |
664 		    (prg->dev_no << PCIEX_REG_DEV_SHIFT) |
665 		    (prg->func_no << PCIEX_REG_FUNC_SHIFT));
666 	}
667 
668 	return (rval);
669 }
670 
671 static int
672 pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag)
673 {
674 	int port = (int)prg->phys_addr;
675 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
676 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
677 	int rval = SUCCESS;
678 	on_trap_data_t otd;
679 	uint64_t local_data;
680 
681 
682 	/*
683 	 * on_trap works like setjmp.
684 	 *
685 	 * A non-zero return here means on_trap has returned from an error.
686 	 *
687 	 * A zero return here means that on_trap has just returned from setup.
688 	 */
689 	if (on_trap(&otd, OT_DATA_ACCESS)) {
690 		no_trap();
691 		if (pcitool_debug)
692 			prom_printf(
693 			    "pcitool_io_access: on_trap caught an error...\n");
694 		prg->status = PCITOOL_INVALID_ADDRESS;
695 		return (EFAULT);
696 	}
697 
698 	if (write_flag) {
699 
700 		if (big_endian) {
701 			local_data = pcitool_swap_endian(prg->data, size);
702 		} else {
703 			local_data = prg->data;
704 		}
705 
706 		if (pcitool_debug)
707 			prom_printf("Writing %ld byte(s) to port 0x%x\n",
708 			    size, port);
709 
710 		switch (size) {
711 		case 1:
712 			outb(port, (uint8_t)local_data);
713 			break;
714 		case 2:
715 			outw(port, (uint16_t)local_data);
716 			break;
717 		case 4:
718 			outl(port, (uint32_t)local_data);
719 			break;
720 		default:
721 			rval = ENOTSUP;
722 			prg->status = PCITOOL_INVALID_SIZE;
723 			break;
724 		}
725 	} else {
726 		if (pcitool_debug)
727 			prom_printf("Reading %ld byte(s) from port 0x%x\n",
728 			    size, port);
729 
730 		switch (size) {
731 		case 1:
732 			local_data = inb(port);
733 			break;
734 		case 2:
735 			local_data = inw(port);
736 			break;
737 		case 4:
738 			local_data = inl(port);
739 			break;
740 		default:
741 			rval = ENOTSUP;
742 			prg->status = PCITOOL_INVALID_SIZE;
743 			break;
744 		}
745 
746 		if (rval == SUCCESS) {
747 			if (big_endian) {
748 				prg->data =
749 				    pcitool_swap_endian(local_data, size);
750 			} else {
751 				prg->data = local_data;
752 			}
753 		}
754 	}
755 
756 	no_trap();
757 	return (rval);
758 }
759 
760 static int
761 pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr, boolean_t write_flag)
762 {
763 	size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
764 	boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
765 	int rval = DDI_SUCCESS;
766 	on_trap_data_t otd;
767 	uint64_t local_data;
768 
769 	/*
770 	 * on_trap works like setjmp.
771 	 *
772 	 * A non-zero return here means on_trap has returned from an error.
773 	 *
774 	 * A zero return here means that on_trap has just returned from setup.
775 	 */
776 	if (on_trap(&otd, OT_DATA_ACCESS)) {
777 		no_trap();
778 		if (pcitool_debug)
779 			prom_printf(
780 			    "pcitool_mem_access: on_trap caught an error...\n");
781 		prg->status = PCITOOL_INVALID_ADDRESS;
782 		return (EFAULT);
783 	}
784 
785 	if (write_flag) {
786 
787 		if (big_endian) {
788 			local_data = pcitool_swap_endian(prg->data, size);
789 		} else {
790 			local_data = prg->data;
791 		}
792 
793 		switch (size) {
794 		case 1:
795 			*((uint8_t *)(uintptr_t)virt_addr) = local_data;
796 			break;
797 		case 2:
798 			*((uint16_t *)(uintptr_t)virt_addr) = local_data;
799 			break;
800 		case 4:
801 			*((uint32_t *)(uintptr_t)virt_addr) = local_data;
802 			break;
803 		case 8:
804 			*((uint64_t *)(uintptr_t)virt_addr) = local_data;
805 			break;
806 		default:
807 			rval = ENOTSUP;
808 			prg->status = PCITOOL_INVALID_SIZE;
809 			break;
810 		}
811 	} else {
812 		switch (size) {
813 		case 1:
814 			local_data = *((uint8_t *)(uintptr_t)virt_addr);
815 			break;
816 		case 2:
817 			local_data = *((uint16_t *)(uintptr_t)virt_addr);
818 			break;
819 		case 4:
820 			local_data = *((uint32_t *)(uintptr_t)virt_addr);
821 			break;
822 		case 8:
823 			local_data = *((uint64_t *)(uintptr_t)virt_addr);
824 			break;
825 		default:
826 			rval = ENOTSUP;
827 			prg->status = PCITOOL_INVALID_SIZE;
828 			break;
829 		}
830 
831 		if (rval == SUCCESS) {
832 			if (big_endian) {
833 				prg->data =
834 				    pcitool_swap_endian(local_data, size);
835 			} else {
836 				prg->data = local_data;
837 			}
838 		}
839 	}
840 
841 	no_trap();
842 	return (rval);
843 }
844 
845 /*
846  * Map up to 2 pages which contain the address we want to access.
847  *
848  * Mapping should span no more than 8 bytes.  With X86 it is possible for an
849  * 8 byte value to start on a 4 byte boundary, so it can cross a page boundary.
850  * We'll never have to map more than two pages.
851  */
852 
853 static uint64_t
854 pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages)
855 {
856 
857 	uint64_t page_base = phys_addr & ~MMU_PAGEOFFSET;
858 	uint64_t offset = phys_addr & MMU_PAGEOFFSET;
859 	void *virt_base;
860 	uint64_t returned_addr;
861 	pfn_t pfn;
862 
863 	if (pcitool_debug)
864 		prom_printf("pcitool_map: Called with PA:0x%p\n",
865 		    (void *)(uintptr_t)phys_addr);
866 
867 	*num_pages = 1;
868 
869 	/* Desired mapping would span more than two pages. */
870 	if ((offset + size) > (MMU_PAGESIZE * 2)) {
871 		if (pcitool_debug)
872 			prom_printf("boundary violation: "
873 			    "offset:0x%" PRIx64 ", size:%ld, pagesize:0x%lx\n",
874 			    offset, (uintptr_t)size, (uintptr_t)MMU_PAGESIZE);
875 		return (NULL);
876 
877 	} else if ((offset + size) > MMU_PAGESIZE) {
878 		(*num_pages)++;
879 	}
880 
881 	/* Get page(s) of virtual space. */
882 	virt_base = vmem_alloc(heap_arena, ptob(*num_pages), VM_NOSLEEP);
883 	if (virt_base == NULL) {
884 		if (pcitool_debug)
885 			prom_printf("Couldn't get virtual base address.\n");
886 		return (NULL);
887 	}
888 
889 	if (pcitool_debug)
890 		prom_printf("Got base virtual address:0x%p\n", virt_base);
891 
892 #ifdef __xpv
893 	/*
894 	 * We should only get here if we are dom0.
895 	 * We're using a real device so we need to translate the MA to a PFN.
896 	 */
897 	ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
898 	pfn = xen_assign_pfn(mmu_btop(page_base));
899 #else
900 	pfn = btop(page_base);
901 #endif
902 
903 	/* Now map the allocated virtual space to the physical address. */
904 	hat_devload(kas.a_hat, virt_base, mmu_ptob(*num_pages), pfn,
905 	    PROT_READ | PROT_WRITE | HAT_STRICTORDER,
906 	    HAT_LOAD_LOCK);
907 
908 	returned_addr = ((uintptr_t)(virt_base)) + offset;
909 
910 	if (pcitool_debug)
911 		prom_printf("pcitool_map: returning VA:0x%p\n",
912 		    (void *)(uintptr_t)returned_addr);
913 
914 	return (returned_addr);
915 }
916 
917 /* Unmap the mapped page(s). */
918 static void
919 pcitool_unmap(uint64_t virt_addr, size_t num_pages)
920 {
921 	void *base_virt_addr = (void *)(uintptr_t)(virt_addr & ~MMU_PAGEOFFSET);
922 
923 	hat_unload(kas.a_hat, base_virt_addr, ptob(num_pages),
924 	    HAT_UNLOAD_UNLOCK);
925 	vmem_free(heap_arena, base_virt_addr, ptob(num_pages));
926 }
927 
928 
929 /* Perform register accesses on PCI leaf devices. */
930 /*ARGSUSED*/
931 int
932 pcitool_dev_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
933 {
934 	boolean_t	write_flag = B_FALSE;
935 	boolean_t	io_access = B_TRUE;
936 	int		rval = 0;
937 	pcitool_reg_t	prg;
938 	uint8_t		size;
939 
940 	uint64_t	base_addr;
941 	uint64_t	virt_addr;
942 	size_t		num_virt_pages;
943 
944 	switch (cmd) {
945 	case (PCITOOL_DEVICE_SET_REG):
946 		write_flag = B_TRUE;
947 
948 	/*FALLTHRU*/
949 	case (PCITOOL_DEVICE_GET_REG):
950 		if (pcitool_debug)
951 			prom_printf("pci_dev_reg_ops set/get reg\n");
952 		if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t), mode) !=
953 		    DDI_SUCCESS) {
954 			if (pcitool_debug)
955 				prom_printf("Error reading arguments\n");
956 			return (EFAULT);
957 		}
958 
959 		if (prg.barnum >= (sizeof (pci_bars) / sizeof (pci_bars[0]))) {
960 			prg.status = PCITOOL_OUT_OF_RANGE;
961 			rval = EINVAL;
962 			goto done_reg;
963 		}
964 
965 		if (pcitool_debug)
966 			prom_printf("raw bus:0x%x, dev:0x%x, func:0x%x\n",
967 			    prg.bus_no, prg.dev_no, prg.func_no);
968 		/* Validate address arguments of bus / dev / func */
969 		if (((prg.bus_no &
970 		    (PCI_REG_BUS_M >> PCI_REG_BUS_SHIFT)) !=
971 		    prg.bus_no) ||
972 		    ((prg.dev_no &
973 		    (PCI_REG_DEV_M >> PCI_REG_DEV_SHIFT)) !=
974 		    prg.dev_no) ||
975 		    ((prg.func_no &
976 		    (PCI_REG_FUNC_M >> PCI_REG_FUNC_SHIFT)) !=
977 		    prg.func_no)) {
978 			prg.status = PCITOOL_INVALID_ADDRESS;
979 			rval = EINVAL;
980 			goto done_reg;
981 		}
982 
983 		size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
984 
985 		/* Proper config space desired. */
986 		if (prg.barnum == 0) {
987 
988 			if (pcitool_debug)
989 				prom_printf(
990 				    "config access: offset:0x%" PRIx64 ", "
991 				    "phys_addr:0x%" PRIx64 "\n",
992 				    prg.offset, prg.phys_addr);
993 
994 			if (prg.offset >= max_cfg_size) {
995 				prg.status = PCITOOL_OUT_OF_RANGE;
996 				rval = EINVAL;
997 				goto done_reg;
998 			}
999 			if (max_cfg_size == PCIE_CONF_HDR_SIZE)
1000 				io_access = B_FALSE;
1001 
1002 			rval = pcitool_cfg_access(&prg, write_flag, io_access);
1003 			if (pcitool_debug)
1004 				prom_printf(
1005 				    "config access: data:0x%" PRIx64 "\n",
1006 				    prg.data);
1007 
1008 		/* IO/ MEM/ MEM64 space. */
1009 		} else {
1010 
1011 			pcitool_reg_t	prg2;
1012 			bcopy(&prg, &prg2, sizeof (pcitool_reg_t));
1013 
1014 			/*
1015 			 * Translate BAR number into offset of the BAR in
1016 			 * the device's config space.
1017 			 */
1018 			prg2.offset = pci_bars[prg2.barnum];
1019 			prg2.acc_attr =
1020 			    PCITOOL_ACC_ATTR_SIZE_4 | PCITOOL_ACC_ATTR_ENDN_LTL;
1021 
1022 			if (pcitool_debug)
1023 				prom_printf(
1024 				    "barnum:%d, bar_offset:0x%" PRIx64 "\n",
1025 				    prg2.barnum, prg2.offset);
1026 			/*
1027 			 * Get Bus Address Register (BAR) from config space.
1028 			 * prg2.offset is the offset into config space of the
1029 			 * BAR desired.  prg.status is modified on error.
1030 			 */
1031 			rval = pcitool_cfg_access(&prg2, B_FALSE, B_TRUE);
1032 			if (rval != SUCCESS) {
1033 				if (pcitool_debug)
1034 					prom_printf("BAR access failed\n");
1035 				prg.status = prg2.status;
1036 				goto done_reg;
1037 			}
1038 			/*
1039 			 * Reference proper PCI space based on the BAR.
1040 			 * If 64 bit MEM space, need to load other half of the
1041 			 * BAR first.
1042 			 */
1043 
1044 			if (pcitool_debug)
1045 				prom_printf("bar returned is 0x%" PRIx64 "\n",
1046 				    prg2.data);
1047 			if (!prg2.data) {
1048 				if (pcitool_debug)
1049 					prom_printf("BAR data == 0\n");
1050 				rval = EINVAL;
1051 				prg.status = PCITOOL_INVALID_ADDRESS;
1052 				goto done_reg;
1053 			}
1054 			if (prg2.data == 0xffffffff) {
1055 				if (pcitool_debug)
1056 					prom_printf("BAR data == -1\n");
1057 				rval = EINVAL;
1058 				prg.status = PCITOOL_INVALID_ADDRESS;
1059 				goto done_reg;
1060 			}
1061 
1062 			/*
1063 			 * BAR has bits saying this space is IO space, unless
1064 			 * this is the ROM address register.
1065 			 */
1066 			if (((PCI_BASE_SPACE_M & prg2.data) ==
1067 			    PCI_BASE_SPACE_IO) &&
1068 			    (prg2.offset != PCI_CONF_ROM)) {
1069 				if (pcitool_debug)
1070 					prom_printf("IO space\n");
1071 
1072 				prg2.data &= PCI_BASE_IO_ADDR_M;
1073 				prg.phys_addr = prg2.data + prg.offset;
1074 
1075 				rval = pcitool_io_access(&prg, write_flag);
1076 				if ((rval != SUCCESS) && (pcitool_debug))
1077 					prom_printf("IO access failed\n");
1078 
1079 				goto done_reg;
1080 
1081 
1082 			/*
1083 			 * BAR has bits saying this space is 64 bit memory
1084 			 * space, unless this is the ROM address register.
1085 			 *
1086 			 * The 64 bit address stored in two BAR cells is not
1087 			 * necessarily aligned on an 8-byte boundary.
1088 			 * Need to keep the first 4 bytes read,
1089 			 * and do a separate read of the high 4 bytes.
1090 			 */
1091 
1092 			} else if ((PCI_BASE_TYPE_ALL & prg2.data) &&
1093 			    (prg2.offset != PCI_CONF_ROM)) {
1094 
1095 				uint32_t low_bytes =
1096 				    (uint32_t)(prg2.data & ~PCI_BASE_TYPE_ALL);
1097 
1098 				/*
1099 				 * Don't try to read the next 4 bytes
1100 				 * past the end of BARs.
1101 				 */
1102 				if (prg2.offset >= PCI_CONF_BASE5) {
1103 					prg.status = PCITOOL_OUT_OF_RANGE;
1104 					rval = EIO;
1105 					goto done_reg;
1106 				}
1107 
1108 				/*
1109 				 * Access device.
1110 				 * prg2.status is modified on error.
1111 				 */
1112 				prg2.offset += 4;
1113 				rval = pcitool_cfg_access(&prg2,
1114 				    B_FALSE, B_TRUE);
1115 				if (rval != SUCCESS) {
1116 					prg.status = prg2.status;
1117 					goto done_reg;
1118 				}
1119 
1120 				if (prg2.data == 0xffffffff) {
1121 					prg.status = PCITOOL_INVALID_ADDRESS;
1122 					prg.status = EFAULT;
1123 					goto done_reg;
1124 				}
1125 
1126 				prg2.data = (prg2.data << 32) + low_bytes;
1127 				if (pcitool_debug)
1128 					prom_printf(
1129 					    "64 bit mem space.  "
1130 					    "64-bit bar is 0x%" PRIx64 "\n",
1131 					    prg2.data);
1132 
1133 			/* Mem32 space, including ROM */
1134 			} else {
1135 
1136 				if (prg2.offset == PCI_CONF_ROM) {
1137 					if (pcitool_debug)
1138 						prom_printf(
1139 						    "Additional ROM "
1140 						    "checking\n");
1141 					/* Can't write to ROM */
1142 					if (write_flag) {
1143 						prg.status = PCITOOL_ROM_WRITE;
1144 						rval = EIO;
1145 						goto done_reg;
1146 
1147 					/* ROM disabled for reading */
1148 					} else if (!(prg2.data & 0x00000001)) {
1149 						prg.status =
1150 						    PCITOOL_ROM_DISABLED;
1151 						rval = EIO;
1152 						goto done_reg;
1153 					}
1154 				}
1155 
1156 				if (pcitool_debug)
1157 					prom_printf("32 bit mem space\n");
1158 			}
1159 
1160 			/* Common code for all IO/MEM range spaces. */
1161 
1162 			base_addr = prg2.data;
1163 			if (pcitool_debug)
1164 				prom_printf(
1165 				    "addr portion of bar is 0x%" PRIx64 ", "
1166 				    "base=0x%" PRIx64 ", "
1167 				    "offset:0x%" PRIx64 "\n",
1168 				    prg2.data, base_addr, prg.offset);
1169 			/*
1170 			 * Use offset provided by caller to index into
1171 			 * desired space, then access.
1172 			 * Note that prg.status is modified on error.
1173 			 */
1174 			prg.phys_addr = base_addr + prg.offset;
1175 
1176 			virt_addr = pcitool_map(prg.phys_addr, size,
1177 			    &num_virt_pages);
1178 			if (virt_addr == NULL) {
1179 				prg.status = PCITOOL_IO_ERROR;
1180 				rval = EIO;
1181 				goto done_reg;
1182 			}
1183 
1184 			rval = pcitool_mem_access(&prg, virt_addr, write_flag);
1185 			pcitool_unmap(virt_addr, num_virt_pages);
1186 		}
1187 done_reg:
1188 		prg.drvr_version = PCITOOL_VERSION;
1189 		if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t), mode) !=
1190 		    DDI_SUCCESS) {
1191 			if (pcitool_debug)
1192 				prom_printf("Error returning arguments.\n");
1193 			rval = EFAULT;
1194 		}
1195 		break;
1196 	default:
1197 		rval = ENOTTY;
1198 		break;
1199 	}
1200 	return (rval);
1201 }
1202