1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2018 Joyent, Inc.
14 *
15 * Convenience routines for identifying current or available devices that are
16 * suitable for PCI passthrough to a bhyve guest.
17 */
18
19 #include <libdevinfo.h>
20 #include <libppt.h>
21
22 #include <sys/param.h>
23 #include <sys/stat.h>
24 #include <sys/list.h>
25 #include <strings.h>
26 #include <stddef.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <errno.h>
30 #include <pcidb.h>
31 #include <glob.h>
32
33 typedef struct node_data {
34 pcidb_hdl_t *nd_db;
35 list_t nd_matches;
36 nvlist_t *nd_nvl;
37 int nd_err;
38 } node_data_t;
39
40 typedef struct ppt_match {
41 list_node_t pm_list;
42 char pm_path[MAXPATHLEN];
43 char pm_vendor[5];
44 char pm_device[5];
45 } ppt_match_t;
46
47 static boolean_t
is_pci(di_node_t di_node)48 is_pci(di_node_t di_node)
49 {
50 char *svals;
51
52 if (di_prop_lookup_strings(DDI_DEV_T_ANY, di_parent_node(di_node),
53 "device_type", &svals) != 1)
54 return (B_FALSE);
55
56 return (strcmp(svals, "pci") == 0 || strcmp(svals, "pciex") == 0);
57 }
58
59 static int
populate_int_prop(di_node_t di_node,nvlist_t * nvl,const char * name,int * ival)60 populate_int_prop(di_node_t di_node, nvlist_t *nvl, const char *name, int *ival)
61 {
62 char val[20];
63 int *ivals;
64 int err;
65
66 if (di_prop_lookup_ints(DDI_DEV_T_ANY, di_node, name, &ivals) != 1)
67 return (errno);
68
69 (void) snprintf(val, sizeof (val), "%x", ivals[0]);
70
71 err = nvlist_add_string(nvl, name, val);
72
73 if (err == 0 && ival != NULL)
74 *ival = ivals[0];
75
76 return (err);
77 }
78
79 static int
dev_getlabel(pcidb_hdl_t * db,int vid,int did,char * buf,size_t buflen)80 dev_getlabel(pcidb_hdl_t *db, int vid, int did, char *buf, size_t buflen)
81 {
82 pcidb_vendor_t *vend = NULL;
83 pcidb_device_t *dev = NULL;
84
85 if ((vend = pcidb_lookup_vendor(db, vid)) == NULL)
86 return (ENOENT);
87
88 if ((dev = pcidb_lookup_device_by_vendor(vend, did)) == NULL)
89 return (ENOENT);
90
91 (void) snprintf(buf, buflen, "%s %s", pcidb_vendor_name(vend),
92 pcidb_device_name(dev));
93
94 return (0);
95 }
96
97 static nvlist_t *
dev_getinfo(di_node_t di_node,pcidb_hdl_t * db,const char * dev,const char * path)98 dev_getinfo(di_node_t di_node, pcidb_hdl_t *db,
99 const char *dev, const char *path)
100 {
101 char label[MAXPATHLEN];
102 nvlist_t *nvl = NULL;
103 int vid, did;
104 int err;
105
106 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
107 goto out;
108
109 if (dev != NULL && (err = nvlist_add_string(nvl, "dev", dev)) != 0)
110 goto out;
111 if ((err = nvlist_add_string(nvl, "path", path)) != 0)
112 goto out;
113 if ((err = populate_int_prop(di_node, nvl, "vendor-id", &vid)) != 0)
114 goto out;
115 if ((err = populate_int_prop(di_node, nvl, "device-id", &did)) != 0)
116 goto out;
117 if ((err = populate_int_prop(di_node, nvl,
118 "subsystem-vendor-id", NULL)) != 0)
119 goto out;
120 if ((err = populate_int_prop(di_node, nvl, "subsystem-id", NULL)) != 0)
121 goto out;
122 if ((err = populate_int_prop(di_node, nvl, "revision-id", NULL)) != 0)
123 goto out;
124
125 err = dev_getlabel(db, vid, did, label, sizeof (label));
126
127 if (err == 0) {
128 err = nvlist_add_string(nvl, "label", label);
129 } else if (err == ENOENT) {
130 err = 0;
131 }
132
133 out:
134 if (err) {
135 nvlist_free(nvl);
136 errno = err;
137 return (NULL);
138 }
139
140 return (nvl);
141 }
142
143 /*
144 * /devices/pci0@0/....@0,1:ppt -> /pci0@0/...@0,1
145 */
146 static const char *
fs_to_phys_path(char * fspath)147 fs_to_phys_path(char *fspath)
148 {
149 const char prefix[] = "/devices";
150 char *c;
151
152 if ((c = strrchr(fspath, ':')) != NULL && strcmp(c, ":ppt") == 0)
153 *c = '\0';
154
155 c = fspath;
156
157 if (strncmp(c, prefix, sizeof (prefix) - 1) == 0)
158 c += sizeof (prefix) - 1;
159
160 return (c);
161 }
162
163 /*
164 * Return an nvlist representing the mappings of /dev/ppt* devices to physical
165 * devices. Of the form:
166 *
167 * /pci@0,0/... {
168 * dev: "/dev/ppt0"
169 * path: "/pci@0,0/..."
170 * vendor-id: "8086"
171 * device-id: "1528"
172 * subsystem-vendor-id: "8086"
173 * subsystem-id: "1528"
174 * revision-id: "1"
175 * label: "Intel Corporation ..."
176 * },
177 * /pci@0,0/...
178 *
179 * The nvlist should be freed by the caller.
180 */
181 nvlist_t *
ppt_list_assigned(void)182 ppt_list_assigned(void)
183 {
184 di_node_t di_root = DI_NODE_NIL;
185 pcidb_hdl_t *db = NULL;
186 nvlist_t *nvl = NULL;
187 glob_t gl;
188 int err;
189
190 bzero(&gl, sizeof (gl));
191
192 if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL)
193 return (NULL);
194
195 if ((db = pcidb_open(PCIDB_VERSION)) == NULL) {
196 err = errno;
197 goto out;
198 }
199
200 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
201 goto out;
202
203 if ((err = glob("/dev/ppt*", GLOB_KEEPSTAT | GLOB_ERR,
204 NULL, &gl)) != 0) {
205 err = (err == GLOB_NOMATCH) ? 0 : errno;
206 goto out;
207 }
208
209 for (size_t i = 0; i < gl.gl_pathc; i++) {
210 char fspath[MAXPATHLEN];
211 nvlist_t *info_nvl;
212 di_node_t di_node;
213 const char *path;
214
215 if (!S_ISLNK(gl.gl_statv[i]->st_mode))
216 continue;
217
218 if (realpath(gl.gl_pathv[i], fspath) == NULL) {
219 err = errno;
220 goto out;
221 }
222
223 path = fs_to_phys_path(fspath);
224
225 /*
226 * path argument is treated as const.
227 */
228 if ((di_node = di_lookup_node(di_root, (char *)path)) == NULL) {
229 err = errno;
230 goto out;
231 }
232
233 if (!is_pci(di_node))
234 continue;
235
236 info_nvl = dev_getinfo(di_node, db, gl.gl_pathv[i], path);
237
238 if (info_nvl == NULL) {
239 err = errno;
240 goto out;
241 }
242
243 err = nvlist_add_nvlist(nvl, path, info_nvl);
244 nvlist_free(info_nvl);
245
246 if (err)
247 goto out;
248 }
249
250 out:
251 if (di_root != DI_NODE_NIL)
252 di_fini(di_root);
253
254 pcidb_close(db);
255 globfree(&gl);
256
257 if (err) {
258 nvlist_free(nvl);
259 errno = err;
260 return (NULL);
261 }
262
263 return (nvl);
264 }
265
266 /*
267 * Read in our list of potential PPT devices. A boot-module provided file
268 * explicitly over-rides anything delivered.
269 */
270 static int
get_matches(list_t * listp)271 get_matches(list_t *listp)
272 {
273 int err = 0;
274 FILE *fp;
275
276 list_create(listp, sizeof (ppt_match_t),
277 offsetof(ppt_match_t, pm_list));
278
279 if ((fp = fopen("/system/boot/etc/ppt_matches", "r")) == NULL) {
280 if (errno != ENOENT)
281 return (errno);
282
283 if ((fp = fopen("/etc/ppt_matches", "r")) == NULL) {
284 if (errno == ENOENT)
285 return (0);
286 return (errno);
287 }
288 }
289
290 for (;;) {
291 char *line = NULL;
292 ppt_match_t *pm;
293 size_t cap = 0;
294 ssize_t read;
295
296 if ((read = getline(&line, &cap, fp)) <= 0) {
297 free(line);
298 break;
299 }
300
301 if (line[read - 1] == '\n')
302 line[read - 1] = '\0';
303
304 if ((pm = malloc(sizeof (*pm))) == NULL) {
305 err = errno;
306 free(line);
307 goto out;
308 }
309
310 bzero(pm, sizeof (*pm));
311
312 if (sscanf(line, "pciex%4s,%4s", &pm->pm_vendor,
313 &pm->pm_device) == 2 ||
314 sscanf(line, "pci%4s,%4s", &pm->pm_vendor,
315 &pm->pm_device) == 2 ||
316 sscanf(line, "pciex%4s", &pm->pm_vendor) == 1 ||
317 sscanf(line, "pci%4s", &pm->pm_vendor) == 1) {
318 list_insert_tail(listp, pm);
319 } else if (line[0] == '/') {
320 (void) strlcpy(pm->pm_path, line, sizeof (pm->pm_path));
321 list_insert_tail(listp, pm);
322 } else {
323 /*
324 * Ignore any line we don't understand.
325 */
326 free(pm);
327 }
328
329 free(line);
330 }
331
332 out:
333 (void) fclose(fp);
334 return (err);
335 }
336
337 static boolean_t
match_ppt(list_t * matches,nvlist_t * nvl)338 match_ppt(list_t *matches, nvlist_t *nvl)
339 {
340 char *vendor;
341 char *device;
342 char *path;
343
344 if (nvlist_lookup_string(nvl, "path", &path) != 0 ||
345 nvlist_lookup_string(nvl, "vendor-id", &vendor) != 0 ||
346 nvlist_lookup_string(nvl, "device-id", &device) != 0)
347 return (B_FALSE);
348
349 for (ppt_match_t *pm = list_head(matches); pm != NULL;
350 pm = list_next(matches, pm)) {
351 if (pm->pm_path[0] != '\0' && strcmp(pm->pm_path, path) == 0)
352 return (B_TRUE);
353
354 if (pm->pm_vendor[0] != '\0' &&
355 strcmp(pm->pm_vendor, vendor) == 0) {
356 if (pm->pm_device[0] == '\0')
357 return (B_TRUE);
358 if (strcmp(pm->pm_device, device) == 0)
359 return (B_TRUE);
360 }
361 }
362
363 return (B_FALSE);
364 }
365
366 static int
inspect_node(di_node_t di_node,void * arg)367 inspect_node(di_node_t di_node, void *arg)
368 {
369 node_data_t *data = arg;
370 nvlist_t *info_nvl = NULL;
371 char *devname = NULL;
372 const char *driver;
373 char *path = NULL;
374
375 if (!is_pci(di_node))
376 return (DI_WALK_CONTINUE);
377
378 driver = di_driver_name(di_node);
379
380 if (driver != NULL && strcmp(driver, "ppt") == 0) {
381 if (asprintf(&devname, "/dev/ppt%d",
382 di_instance(di_node)) < 0) {
383 data->nd_err = errno;
384 goto out;
385 }
386 }
387
388 if ((path = di_devfs_path(di_node)) == NULL) {
389 data->nd_err = ENOENT;
390 goto out;
391 }
392
393 info_nvl = dev_getinfo(di_node, data->nd_db, devname, path);
394
395 if (info_nvl == NULL)
396 goto out;
397
398 if (devname == NULL && !match_ppt(&data->nd_matches, info_nvl))
399 goto out;
400
401 data->nd_err = nvlist_add_nvlist(data->nd_nvl, path, info_nvl);
402
403 out:
404 free(path);
405 free(devname);
406 nvlist_free(info_nvl);
407 return (data->nd_err ? DI_WALK_TERMINATE : DI_WALK_CONTINUE);
408 }
409
410 /*
411 * Like ppt_list_assigned() output, but includes all devices that could be used
412 * for passthrough, whether assigned or not.
413 */
414 nvlist_t *
ppt_list(void)415 ppt_list(void)
416 {
417 node_data_t nd = { NULL, };
418 di_node_t di_root;
419 int err;
420
421 if ((di_root = di_init("/", DINFOCACHE)) == DI_NODE_NIL)
422 return (NULL);
423
424 if ((err = get_matches(&nd.nd_matches)) != 0)
425 goto out;
426
427 if ((nd.nd_db = pcidb_open(PCIDB_VERSION)) == NULL) {
428 err = errno;
429 goto out;
430 }
431
432 if ((err = nvlist_alloc(&nd.nd_nvl, NV_UNIQUE_NAME, 0)) != 0)
433 goto out;
434
435 if ((err = di_walk_node(di_root, DI_WALK_CLDFIRST,
436 &nd, inspect_node)) != 0)
437 goto out;
438
439 err = nd.nd_err;
440
441 out:
442 pcidb_close(nd.nd_db);
443
444 for (ppt_match_t *pm = list_head(&nd.nd_matches); pm != NULL; ) {
445 ppt_match_t *next = list_next(&nd.nd_matches, pm);
446 free(pm);
447 pm = next;
448 }
449
450 if (di_root != DI_NODE_NIL)
451 di_fini(di_root);
452
453 if (err) {
454 nvlist_free(nd.nd_nvl);
455 errno = err;
456 return (NULL);
457 }
458
459 return (nd.nd_nvl);
460 }
461
462 /*
463 * Given a physical path such as "/devices/pci0@0...", return the "/dev/pptX"
464 * that is bound to it, if any. The "/devices/" prefix is optional. The
465 * physical path may have the ":ppt" minor name suffix.
466 *
467 * Returns ENOENT if no such PPT device exists.
468 */
469 int
ppt_devpath_to_dev(const char * inpath,char * buf,size_t buflen)470 ppt_devpath_to_dev(const char *inpath, char *buf, size_t buflen)
471 {
472 char fspath[MAXPATHLEN] = "";
473 nvpair_t *nvp = NULL;
474 const char *devpath;
475 int err = ENOENT;
476 nvlist_t *nvl;
477
478 if (strlcat(fspath, inpath, sizeof (fspath)) >= sizeof (fspath))
479 return (ENAMETOOLONG);
480
481 devpath = fs_to_phys_path(fspath);
482
483 if ((nvl = ppt_list_assigned()) == NULL)
484 return (errno);
485
486 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
487 const char *name = nvpair_name(nvp);
488 char *ppt = NULL;
489 nvlist_t *props;
490
491 (void) nvpair_value_nvlist(nvp, &props);
492
493 if (strcmp(name, devpath) == 0) {
494 (void) nvlist_lookup_string(props, "dev", &ppt);
495
496 err = 0;
497
498 if (strlcpy(buf, ppt, buflen) >= buflen)
499 err = ENAMETOOLONG;
500 break;
501 }
502 }
503
504 nvlist_free(nvl);
505 return (err);
506 }
507