1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 * Copyright (c) 2016 by Delphix. All rights reserved.
25 */
26
27#include "libdevinfo.h"
28#include "devinfo_devlink.h"
29#include "device_info.h"
30
31#undef	DEBUG
32#ifndef	DEBUG
33#define	NDEBUG 1
34#else
35#undef	NDEBUG
36#endif
37
38#include <assert.h>
39
40static mutex_t update_mutex = DEFAULTMUTEX; /* Protects update record lock */
41static mutex_t temp_file_mutex = DEFAULTMUTEX; /* for file creation tests */
42
43static const size_t elem_sizes[DB_TYPES] = {
44	sizeof (struct db_node),
45	sizeof (struct db_minor),
46	sizeof (struct db_link),
47	sizeof (char)
48};
49
50/*
51 * List of directories/files skipped while physically walking /dev
52 * Paths are relative to "<root>/dev/"
53 */
54static const char *skip_dirs[] = {"fd"};
55static const char *skip_files[] = {
56	"stdout",
57	"stdin",
58	"stderr"
59};
60
61#define	N_SKIP_DIRS	(sizeof (skip_dirs) / sizeof (skip_dirs[0]))
62#define	N_SKIP_FILES	(sizeof (skip_files) / sizeof (skip_files[0]))
63
64#define	DI_TEST_DB	ETCDEV "di_test_db"
65
66/*
67 *
68 * This file contains two sets of interfaces which operate on the reverse
69 * links database. One set (which includes di_devlink_open()/_close())
70 * allows link generators like devfsadm(1M) and ucblinks(1B) (writers) to
71 * populate the database with /devices -> /dev mappings. Another set
72 * of interfaces (which includes di_devlink_init()/_fini()) allows
73 * applications (readers) to lookup the database for /dev links corresponding
74 * to a given minor.
75 *
76 * Writers operate on a cached version of the database. The cache is created
77 * when di_devlink_open() is called. As links in /dev are created and removed,
78 * the cache is updated to keep it in synch with /dev. When the /dev updates
79 * are complete, the link generator calls di_devlink_close() which writes
80 * out the cache to the database.
81 *
82 * Applications which need to lookup the database, call di_devlink_init().
83 * di_devlink_init() checks the database file (if one exists). If the
84 * database is valid, it is mapped into the address space of the
85 * application. The database file consists of several segments. Each
86 * segment can be mapped in independently and is mapped on demand.
87 *
88 *		   Database Layout
89 *
90 *		---------------------
91 *		|	Magic #     |
92 *		| ----------------- |
93 *		|       Version	    |	HEADER
94 *		| ----------------- |
95 *		|        ...        |
96 *		---------------------
97 *		|		    |
98 *		|		    |	NODES
99 *		|	            |
100 *		|		    |
101 *		---------------------
102 *		|		    |
103 *		|		    |	MINORS
104 *		|	            |
105 *		|		    |
106 *		---------------------
107 *		|		    |
108 *		|		    |   LINKS
109 *		|	            |
110 *		|		    |
111 *		---------------------
112 *		|		    |
113 *		|		    |	STRINGS
114 *		|	            |
115 *		|		    |
116 *		---------------------
117 *
118 * Readers can lookup /dev links for a specific minor or
119 * lookup all /dev links. In the latter case, the node
120 * and minor segments are not mapped in and the reader
121 * walks through every link in the link segment.
122 *
123 */
124di_devlink_handle_t
125di_devlink_open(const char *root_dir, uint_t flags)
126{
127	int err;
128	char path[PATH_MAX];
129	struct di_devlink_handle *hdp;
130	int retried = 0;
131
132retry:
133	/*
134	 * Allocate a read-write handle but open the DB in readonly
135	 * mode. We do writes only to a temporary copy of the database.
136	 */
137	if ((hdp = handle_alloc(root_dir, OPEN_RDWR)) == NULL) {
138		return (NULL);
139	}
140
141	err = open_db(hdp, OPEN_RDONLY);
142
143	/*
144	 * We don't want to unlink the db at this point - if we did we
145	 * would be creating a window where consumers would take a slow
146	 * code path (and those consumers might also trigger requests for
147	 * db creation, which we are already in the process of doing).
148	 * When we are done with our update, we use rename to install the
149	 * latest version of the db file.
150	 */
151	get_db_path(hdp, DB_FILE, path, sizeof (path));
152
153	/*
154	 * The flags argument is reserved for future use.
155	 */
156	if (flags != 0) {
157		handle_free(&hdp); /* also closes the DB */
158		errno = EINVAL;
159		return (NULL);
160	}
161
162	if (cache_alloc(hdp) != 0) {
163		handle_free(&hdp);
164		return (NULL);
165	}
166
167	if (err) {
168		/*
169		 * Failed to open DB.
170		 * The most likely cause is that DB file did not exist.
171		 * Call di_devlink_close() to recreate the DB file and
172		 * retry di_devlink_open().
173		 */
174		if (retried == 0) {
175			(void) di_devlink_close(&hdp, 0);
176			retried = 1;
177			goto retry;
178		}
179
180		/*
181		 * DB cannot be opened, just return the
182		 * handle. We will recreate the DB later.
183		 */
184		return (hdp);
185	}
186
187	/* Read the database into the cache */
188	CACHE(hdp)->update_count = DB_HDR(hdp)->update_count;
189	(void) read_nodes(hdp, NULL, DB_HDR(hdp)->root_idx);
190	(void) read_links(hdp, NULL, DB_HDR(hdp)->dngl_idx);
191
192	(void) close_db(hdp);
193
194	return (hdp);
195}
196
197static void
198get_db_path(
199	struct di_devlink_handle *hdp,
200	const char *fname,
201	char *buf,
202	size_t blen)
203{
204	char *dir = NULL;
205
206#ifdef	DEBUG
207	if (dir = getenv(ALT_DB_DIR)) {
208		(void) dprintf(DBG_INFO, "get_db_path: alternate db dir: %s\n",
209		    dir);
210	}
211#endif
212	if (dir == NULL) {
213		dir = hdp->db_dir;
214	}
215
216	(void) snprintf(buf, blen, "%s/%s", dir, fname);
217}
218
219static int
220open_db(struct di_devlink_handle *hdp, int flags)
221{
222	size_t sz;
223	long page_sz;
224	int fd, rv, flg;
225	struct stat sbuf;
226	uint32_t count[DB_TYPES] = {0};
227	char path[PATH_MAX];
228	void *cp;
229
230	assert(!DB_OPEN(hdp));
231
232#ifdef	DEBUG
233	if (getenv(SKIP_DB)) {
234		(void) dprintf(DBG_INFO, "open_db: skipping database\n");
235		return (-1);
236	}
237#endif
238	if ((page_sz = sysconf(_SC_PAGE_SIZE)) == -1) {
239		return (-1);
240	}
241
242	/*
243	 * Use O_TRUNC flag for write access, so that the subsequent ftruncate()
244	 * call will zero-fill the entire file
245	 */
246	if (IS_RDONLY(flags)) {
247		flg = O_RDONLY;
248		get_db_path(hdp, DB_FILE, path, sizeof (path));
249	} else {
250		flg = O_RDWR|O_CREAT|O_TRUNC;
251		get_db_path(hdp, DB_TMP, path, sizeof (path));
252	}
253
254	/*
255	 * Avoid triggering /dev reconfigure for read when not present
256	 */
257	if (IS_RDONLY(flags) &&
258	    (strncmp(path, "/dev/", 5) == 0) && !device_exists(path)) {
259		return (-1);
260	}
261
262	if ((fd = open(path, flg, DB_PERMS)) == -1) {
263		return (-1);
264	}
265
266	if (IS_RDONLY(flags)) {
267		flg = PROT_READ;
268		rv = fstat(fd, &sbuf);
269		sz = sbuf.st_size;
270	} else {
271		flg = PROT_READ | PROT_WRITE;
272		sz = size_db(hdp, page_sz, count);
273		rv = ftruncate(fd, sz);
274	}
275
276	if (rv == -1 || sz < HDR_LEN) {
277		if (rv != -1)
278			errno = EINVAL;
279		(void) close(fd);
280		return (-1);
281	}
282
283	cp = mmap(0, HDR_LEN, flg, MAP_SHARED, fd, 0);
284	if (cp == MAP_FAILED) {
285		(void) close(fd);
286		return (-1);
287	}
288	DB(hdp)->hdr = (struct db_hdr *)cp;
289	DB(hdp)->db_fd = fd;
290	DB(hdp)->flags = flags;
291
292	if (IS_RDONLY(flags)) {
293		rv = invalid_db(hdp, sz, page_sz);
294	} else {
295		rv = init_hdr(hdp, page_sz, count);
296	}
297
298	if (rv) {
299		(void) dprintf(DBG_ERR, "open_db: invalid DB(%s)\n", path);
300		(void) close_db(hdp);
301		return (-1);
302	} else {
303		(void) dprintf(DBG_STEP, "open_db: DB(%s): opened\n", path);
304		return (0);
305	}
306}
307
308/*
309 * A handle can be allocated for read-only or read-write access
310 */
311static struct di_devlink_handle *
312handle_alloc(const char *root_dir, uint_t flags)
313{
314	char dev_dir[PATH_MAX], path[PATH_MAX], db_dir[PATH_MAX];
315	struct di_devlink_handle *hdp, proto = {0};
316	int install = 0;
317	int isroot = 0;
318	struct stat sb;
319	char can_path[PATH_MAX];
320
321	assert(flags == OPEN_RDWR || flags == OPEN_RDONLY);
322
323	dev_dir[0] = '\0';
324	db_dir[0] = '\0';
325
326	/*
327	 * NULL and the empty string are equivalent to "/"
328	 */
329	if (root_dir && root_dir[0] != '\0') {
330
331		if (root_dir[0] != '/') {
332			errno = EINVAL;
333			return (NULL);
334		}
335
336#ifdef	DEBUG
337		/*LINTED*/
338		assert(sizeof (dev_dir) >= PATH_MAX);
339#endif
340		if ((realpath(root_dir, dev_dir) == NULL) ||
341		    (realpath(root_dir, db_dir) == NULL)) {
342			return (NULL);
343		}
344	} else {
345		/*
346		 * The dev dir is at /dev i.e. we are not doing a -r /altroot
347		 */
348		isroot = 1;
349	}
350
351	if (strcmp(dev_dir, "/") == 0) {
352		dev_dir[0] = 0;
353		db_dir[0] = 0;
354	} else {
355		(void) strlcpy(db_dir, dev_dir, sizeof (db_dir));
356	}
357
358	(void) strlcat(dev_dir, DEV, sizeof (dev_dir));
359	(void) strlcat(db_dir, ETCDEV, sizeof (db_dir));
360
361	/*
362	 * The following code is for install. Readers and writers need
363	 * to be redirected to /tmp/etc/dev for the database file.
364	 * Note that we test for readonly /etc by actually creating a
365	 * file since statvfs is not a reliable method for determining
366	 * readonly filesystems.
367	 */
368	install = 0;
369	(void) snprintf(can_path, sizeof (can_path), "%s/%s", ETCDEV, DB_FILE);
370	if (flags == OPEN_RDWR && isroot) {
371		char di_test_db[PATH_MAX];
372		int fd;
373		(void) mutex_lock(&temp_file_mutex);
374		(void) snprintf(di_test_db, sizeof (di_test_db), "%s.%d",
375		    DI_TEST_DB, getpid());
376		fd = open(di_test_db, O_CREAT|O_RDWR|O_EXCL, 0644);
377		if (fd == -1 && errno == EROFS && stat(can_path, &sb) == -1)
378			install = 1;
379		if (fd != -1) {
380			(void) close(fd);
381			(void) unlink(di_test_db);
382		}
383		(void) mutex_unlock(&temp_file_mutex);
384	} else if (isroot) {
385		/*
386		 * Readers can be non-privileged so we cannot test by creating
387		 * a file in /etc/dev. Instead we check if the database
388		 * file is missing in /etc/dev and is present in /tmp/etc/dev
389		 * and is owned by root.
390		 */
391		char install_path[PATH_MAX];
392
393		(void) snprintf(install_path, sizeof (install_path),
394		    "/tmp%s/%s", ETCDEV, DB_FILE);
395		if (stat(can_path, &sb) == -1 && stat(install_path, &sb)
396		    != -1 && sb.st_uid == 0) {
397			install = 1;
398		}
399	}
400
401	/*
402	 * Check if we are in install. If we are, the database will be in
403	 * /tmp/etc/dev
404	 */
405	if (install)
406		(void) snprintf(db_dir, sizeof (db_dir), "/tmp%s", ETCDEV);
407
408	proto.dev_dir = dev_dir;
409	proto.db_dir = db_dir;
410	proto.flags = flags;
411	proto.lock_fd = -1;
412
413	/*
414	 * Lock database if a read-write handle is being allocated.
415	 * Locks are needed to protect against multiple writers.
416	 * Readers don't need locks.
417	 */
418	if (HDL_RDWR(&proto)) {
419		if (enter_db_lock(&proto, root_dir) != 1) {
420			return (NULL);
421		}
422	}
423
424	DB(&proto)->db_fd = -1;
425
426	hdp = calloc(1, sizeof (struct di_devlink_handle));
427	if (hdp == NULL) {
428		goto error;
429	}
430
431	*hdp = proto;
432
433	/*
434	 * The handle hdp now contains a pointer to local storage
435	 * in the dev_dir field (obtained from the proto handle).
436	 * In the following line, a dynamically allocated version
437	 * is substituted.
438	 */
439
440	if ((hdp->dev_dir = strdup(proto.dev_dir)) == NULL) {
441		free(hdp);
442		goto error;
443	}
444
445	if ((hdp->db_dir = strdup(proto.db_dir)) == NULL) {
446		free(hdp->dev_dir);
447		free(hdp);
448		goto error;
449	}
450
451	return (hdp);
452
453error:
454	if (HDL_RDWR(&proto)) {
455		/* Unlink DB file on error */
456		get_db_path(&proto, DB_FILE, path, sizeof (path));
457		(void) unlink(path);
458		exit_db_lock(&proto);
459	}
460	return (NULL);
461}
462
463
464static int
465cache_alloc(struct di_devlink_handle *hdp)
466{
467	size_t hash_sz = 0;
468
469	assert(HDL_RDWR(hdp));
470
471	if (DB_OPEN(hdp)) {
472		hash_sz = DB_NUM(hdp, DB_LINK) / AVG_CHAIN_SIZE;
473	}
474	hash_sz = (hash_sz >= MIN_HASH_SIZE) ? hash_sz : MIN_HASH_SIZE;
475
476	CACHE(hdp)->hash = calloc(hash_sz, sizeof (cache_link_t *));
477	if (CACHE(hdp)->hash == NULL) {
478		return (-1);
479	}
480	CACHE(hdp)->hash_sz = hash_sz;
481
482	return (0);
483}
484
485
486static int
487invalid_db(struct di_devlink_handle *hdp, size_t fsize, long page_sz)
488{
489	int i;
490	char *cp;
491	size_t sz;
492
493	if (DB_HDR(hdp)->magic != DB_MAGIC || DB_HDR(hdp)->vers != DB_VERSION) {
494		return (1);
495	}
496
497	if (DB_HDR(hdp)->page_sz == 0 || DB_HDR(hdp)->page_sz != page_sz) {
498		return (1);
499	}
500
501	sz = seg_size(hdp, DB_HEADER);
502	for (i = 0; i < DB_TYPES; i++) {
503		(void) dprintf(DBG_INFO, "N[%u] = %u\n", i, DB_NUM(hdp, i));
504		/* There must be at least 1 element of each type */
505		if (DB_NUM(hdp, i) < 1) {
506			return (1);
507		}
508		sz += seg_size(hdp, i);
509		assert(sz % page_sz == 0);
510	}
511
512	if (sz != fsize) {
513		return (1);
514	}
515
516	if (!VALID_INDEX(hdp, DB_NODE, DB_HDR(hdp)->root_idx)) {
517		return (1);
518	}
519
520	if (!VALID_INDEX(hdp, DB_LINK, DB_HDR(hdp)->dngl_idx)) {
521		return (1);
522	}
523
524	if (DB_EMPTY(hdp)) {
525		return (1);
526	}
527
528	/*
529	 * The last character in the string segment must be a NUL char.
530	 */
531	cp = get_string(hdp, DB_NUM(hdp, DB_STR) - 1);
532	if (cp == NULL || *cp != '\0') {
533		return (1);
534	}
535
536	return (0);
537}
538
539static int
540read_nodes(struct di_devlink_handle *hdp, cache_node_t *pcnp, uint32_t nidx)
541{
542	char *path;
543	cache_node_t *cnp;
544	struct db_node *dnp;
545	const char *fcn = "read_nodes";
546
547	assert(HDL_RDWR(hdp));
548
549	/*
550	 * parent node should be NULL only for the root node
551	 */
552	if ((pcnp == NULL) ^ (nidx == DB_HDR(hdp)->root_idx)) {
553		(void) dprintf(DBG_ERR, "%s: invalid parent or index(%u)\n",
554		    fcn, nidx);
555		SET_DB_ERR(hdp);
556		return (-1);
557	}
558
559	for (; dnp = get_node(hdp, nidx); nidx = dnp->sib) {
560
561		path = get_string(hdp, dnp->path);
562
563		/*
564		 * Insert at head of list to recreate original order
565		 */
566		cnp = node_insert(hdp, pcnp, path, INSERT_HEAD);
567		if (cnp == NULL) {
568			SET_DB_ERR(hdp);
569			break;
570		}
571
572		assert(strcmp(path, "/") ^ (nidx == DB_HDR(hdp)->root_idx));
573		assert(strcmp(path, "/") != 0 || dnp->sib == DB_NIL);
574
575		if (read_minors(hdp, cnp, dnp->minor) != 0 ||
576		    read_nodes(hdp, cnp, dnp->child) != 0) {
577			break;
578		}
579
580		(void) dprintf(DBG_STEP, "%s: node[%u]: %s\n", fcn, nidx,
581		    cnp->path);
582	}
583
584	return (dnp ? -1 : 0);
585}
586
587static int
588read_minors(struct di_devlink_handle *hdp, cache_node_t *pcnp, uint32_t nidx)
589{
590	cache_minor_t *cmnp;
591	struct db_minor *dmp;
592	char *name, *nodetype;
593	const char *fcn = "read_minors";
594
595	assert(HDL_RDWR(hdp));
596
597	if (pcnp == NULL) {
598		(void) dprintf(DBG_ERR, "%s: minor[%u]: orphan minor\n", fcn,
599		    nidx);
600		SET_DB_ERR(hdp);
601		return (-1);
602	}
603
604	for (; dmp = get_minor(hdp, nidx); nidx = dmp->sib) {
605
606		name = get_string(hdp, dmp->name);
607		nodetype = get_string(hdp, dmp->nodetype);
608
609		cmnp = minor_insert(hdp, pcnp, name, nodetype, NULL);
610		if (cmnp == NULL) {
611			SET_DB_ERR(hdp);
612			break;
613		}
614
615		(void) dprintf(DBG_STEP, "%s: minor[%u]: %s\n", fcn, nidx,
616		    cmnp->name);
617
618		if (read_links(hdp, cmnp, dmp->link) != 0) {
619			break;
620		}
621	}
622
623	return (dmp ? -1 : 0);
624}
625
626/*
627 * If the link is dangling the corresponding minor will be absent.
628 */
629static int
630read_links(struct di_devlink_handle *hdp, cache_minor_t *pcmp, uint32_t nidx)
631{
632	cache_link_t *clp;
633	struct db_link *dlp;
634	char *path, *content;
635
636	assert(HDL_RDWR(hdp));
637
638	if (nidx != DB_NIL &&
639	    ((pcmp == NULL) ^ (nidx == DB_HDR(hdp)->dngl_idx))) {
640		(void) dprintf(DBG_ERR, "read_links: invalid minor or"
641		    " index(%u)\n", nidx);
642		SET_DB_ERR(hdp);
643		return (-1);
644	}
645
646	for (; dlp = get_link(hdp, nidx); nidx = dlp->sib) {
647
648		path = get_string(hdp, dlp->path);
649		content = get_string(hdp, dlp->content);
650
651		clp = link_insert(hdp, pcmp, path, content, dlp->attr);
652		if (clp == NULL) {
653			SET_DB_ERR(hdp);
654			break;
655		}
656
657		(void) dprintf(DBG_STEP, "read_links: link[%u]: %s%s\n",
658		    nidx, clp->path, pcmp == NULL ? "(DANGLING)" : "");
659	}
660
661	return (dlp ? -1 : 0);
662}
663
664int
665di_devlink_close(di_devlink_handle_t *pp, int flag)
666{
667	int i, rv;
668	char tmp[PATH_MAX];
669	char file[PATH_MAX];
670	uint32_t next[DB_TYPES] = {0};
671	struct di_devlink_handle *hdp;
672
673	if (pp == NULL || *pp == NULL || !HDL_RDWR(*pp)) {
674		errno = EINVAL;
675		return (-1);
676	}
677
678	hdp = *pp;
679	*pp = NULL;
680
681	/*
682	 * The caller encountered some error in their processing.
683	 * so handle isn't valid. Discard it and return success.
684	 */
685	if (flag == DI_LINK_ERROR) {
686		handle_free(&hdp);
687		return (0);
688	}
689
690	if (DB_ERR(hdp)) {
691		handle_free(&hdp);
692		errno = EINVAL;
693		return (-1);
694	}
695
696	/*
697	 * Extract the DB path before the handle is freed.
698	 */
699	get_db_path(hdp, DB_FILE, file, sizeof (file));
700	get_db_path(hdp, DB_TMP, tmp, sizeof (tmp));
701
702	/*
703	 * update database with actual contents of /dev
704	 */
705	(void) dprintf(DBG_INFO, "di_devlink_close: update_count = %u\n",
706	    CACHE(hdp)->update_count);
707
708	/*
709	 * For performance reasons, synchronization of the database
710	 * with /dev is turned off by default. However, applications
711	 * with appropriate permissions can request a "sync" by
712	 * calling di_devlink_update().
713	 */
714	if (CACHE(hdp)->update_count == 0) {
715		CACHE(hdp)->update_count = 1;
716		(void) dprintf(DBG_INFO,
717		    "di_devlink_close: synchronizing DB\n");
718		(void) synchronize_db(hdp);
719	}
720
721	/*
722	 * Resolve dangling links AFTER synchronizing DB with /dev as the
723	 * synchronization process may create dangling links.
724	 */
725	resolve_dangling_links(hdp);
726
727	/*
728	 * All changes to the cache are complete. Write out the cache
729	 * to the database only if it is not empty.
730	 */
731	if (CACHE_EMPTY(hdp)) {
732		(void) dprintf(DBG_INFO, "di_devlink_close: skipping write\n");
733		(void) unlink(file);
734		handle_free(&hdp);
735		return (0);
736	}
737
738	if (open_db(hdp, OPEN_RDWR) != 0) {
739		handle_free(&hdp);
740		return (-1);
741	}
742
743	/*
744	 * Keep track of array assignments. There is at least
745	 * 1 element (the "NIL" element) per type.
746	 */
747	for (i = 0; i < DB_TYPES; i++) {
748		next[i] = 1;
749	}
750
751	(void) write_nodes(hdp, NULL, CACHE_ROOT(hdp), next);
752	(void) write_links(hdp, NULL, CACHE(hdp)->dngl, next);
753	DB_HDR(hdp)->update_count = CACHE(hdp)->update_count;
754
755	rv = close_db(hdp);
756
757	if (rv != 0 || DB_ERR(hdp) || rename(tmp, file) != 0) {
758		(void) dprintf(DBG_ERR, "di_devlink_close: %s error: %s\n",
759		    rv ? "close_db" : "DB or rename", strerror(errno));
760		(void) unlink(tmp);
761		(void) unlink(file);
762		handle_free(&hdp);
763		return (-1);
764	}
765
766	handle_free(&hdp);
767
768	(void) dprintf(DBG_INFO, "di_devlink_close: wrote DB(%s)\n", file);
769
770	return (0);
771}
772
773/*
774 * Inits the database header.
775 */
776static int
777init_hdr(struct di_devlink_handle *hdp, long page_sz, uint32_t *count)
778{
779	int i;
780
781	DB_HDR(hdp)->magic = DB_MAGIC;
782	DB_HDR(hdp)->vers = DB_VERSION;
783	DB_HDR(hdp)->root_idx = DB_NIL;
784	DB_HDR(hdp)->dngl_idx = DB_NIL;
785	DB_HDR(hdp)->page_sz = (uint32_t)page_sz;
786
787	for (i = 0; i < DB_TYPES; i++) {
788		assert(count[i] >= 1);
789		DB_NUM(hdp, i) = count[i];
790	}
791
792	return (0);
793}
794
795static int
796write_nodes(
797	struct di_devlink_handle *hdp,
798	struct db_node *pdnp,
799	cache_node_t *cnp,
800	uint32_t *next)
801{
802	uint32_t idx;
803	struct db_node *dnp;
804	const char *fcn = "write_nodes";
805
806	assert(HDL_RDWR(hdp));
807
808	for (; cnp != NULL; cnp = cnp->sib) {
809
810		assert(cnp->path != NULL);
811
812		/* parent node should only be NULL for root node */
813		if ((pdnp == NULL) ^ (cnp == CACHE_ROOT(hdp))) {
814			(void) dprintf(DBG_ERR, "%s: invalid parent for: %s\n",
815			    fcn, cnp->path);
816			SET_DB_ERR(hdp);
817			break;
818		}
819
820		assert((strcmp(cnp->path, "/") != 0) ^
821		    (cnp == CACHE_ROOT(hdp)));
822
823		idx = next[DB_NODE];
824		if ((dnp = set_node(hdp, idx)) == NULL) {
825			SET_DB_ERR(hdp);
826			break;
827		}
828
829		dnp->path = write_string(hdp, cnp->path, next);
830		if (dnp->path == DB_NIL) {
831			SET_DB_ERR(hdp);
832			break;
833		}
834		/* commit write for this node */
835		next[DB_NODE]++;
836
837		if (pdnp == NULL) {
838			assert(DB_HDR(hdp)->root_idx == DB_NIL);
839			DB_HDR(hdp)->root_idx = idx;
840		} else {
841			dnp->sib = pdnp->child;
842			pdnp->child = idx;
843		}
844
845		(void) dprintf(DBG_STEP, "%s: node[%u]: %s\n", fcn, idx,
846		    cnp->path);
847
848		if (write_minors(hdp, dnp, cnp->minor, next) != 0 ||
849		    write_nodes(hdp, dnp, cnp->child, next) != 0) {
850			break;
851		}
852	}
853
854	return (cnp ? -1 : 0);
855}
856
857static int
858write_minors(
859	struct di_devlink_handle *hdp,
860	struct db_node *pdnp,
861	cache_minor_t *cmnp,
862	uint32_t *next)
863{
864	uint32_t idx;
865	struct db_minor *dmp;
866	const char *fcn = "write_minors";
867
868	assert(HDL_RDWR(hdp));
869
870	if (pdnp == NULL) {
871		(void) dprintf(DBG_ERR, "%s: no node for minor: %s\n", fcn,
872		    cmnp ? cmnp->name : "<NULL>");
873		SET_DB_ERR(hdp);
874		return (-1);
875	}
876
877	for (; cmnp != NULL; cmnp = cmnp->sib) {
878
879		assert(cmnp->name != NULL);
880
881		idx = next[DB_MINOR];
882		if ((dmp = set_minor(hdp, idx)) == NULL) {
883			SET_DB_ERR(hdp);
884			break;
885		}
886
887		dmp->name = write_string(hdp, cmnp->name, next);
888		dmp->nodetype = write_string(hdp, cmnp->nodetype, next);
889		if (dmp->name == DB_NIL || dmp->nodetype == DB_NIL) {
890			dmp->name = dmp->nodetype = DB_NIL;
891			SET_DB_ERR(hdp);
892			break;
893		}
894
895		/* Commit writes to this minor */
896		next[DB_MINOR]++;
897
898		dmp->sib = pdnp->minor;
899		pdnp->minor = idx;
900
901		(void) dprintf(DBG_STEP, "%s: minor[%u]: %s\n", fcn, idx,
902		    cmnp->name);
903
904		if (write_links(hdp, dmp, cmnp->link, next) != 0) {
905			break;
906		}
907	}
908
909	return (cmnp ? -1 : 0);
910}
911
912static int
913write_links(
914	struct di_devlink_handle *hdp,
915	struct db_minor *pdmp,
916	cache_link_t *clp,
917	uint32_t *next)
918{
919	uint32_t idx;
920	struct db_link *dlp;
921	const char *fcn = "write_links";
922
923	assert(HDL_RDWR(hdp));
924
925	/* A NULL minor if and only if the links are dangling */
926	if (clp != NULL && ((pdmp == NULL) ^ (clp == CACHE(hdp)->dngl))) {
927		(void) dprintf(DBG_ERR, "%s: invalid minor for link\n", fcn);
928		SET_DB_ERR(hdp);
929		return (-1);
930	}
931
932	for (; clp != NULL; clp = clp->sib) {
933
934		assert(clp->path != NULL);
935
936		if ((pdmp == NULL) ^ (clp->minor == NULL)) {
937			(void) dprintf(DBG_ERR, "%s: invalid minor for link"
938			    "(%s)\n", fcn, clp->path);
939			SET_DB_ERR(hdp);
940			break;
941		}
942
943		idx = next[DB_LINK];
944		if ((dlp = set_link(hdp, idx)) == NULL) {
945			SET_DB_ERR(hdp);
946			break;
947		}
948
949		dlp->path = write_string(hdp, clp->path, next);
950		dlp->content = write_string(hdp, clp->content, next);
951		if (dlp->path == DB_NIL || dlp->content == DB_NIL) {
952			dlp->path = dlp->content = DB_NIL;
953			SET_DB_ERR(hdp);
954			break;
955		}
956
957		dlp->attr = clp->attr;
958
959		/* Commit writes to this link */
960		next[DB_LINK]++;
961
962		if (pdmp != NULL) {
963			dlp->sib = pdmp->link;
964			pdmp->link = idx;
965		} else {
966			dlp->sib = DB_HDR(hdp)->dngl_idx;
967			DB_HDR(hdp)->dngl_idx = idx;
968		}
969
970		(void) dprintf(DBG_STEP, "%s: link[%u]: %s%s\n", fcn, idx,
971		    clp->path, pdmp == NULL ? "(DANGLING)" : "");
972	}
973
974	return (clp ? -1 : 0);
975}
976
977
978static uint32_t
979write_string(struct di_devlink_handle *hdp, const char *str, uint32_t *next)
980{
981	char *dstr;
982	uint32_t idx;
983
984	assert(HDL_RDWR(hdp));
985
986	if (str == NULL) {
987		(void) dprintf(DBG_ERR, "write_string: NULL argument\n");
988		return (DB_NIL);
989	}
990
991	idx = next[DB_STR];
992	if (!VALID_STR(hdp, idx, str)) {
993		(void) dprintf(DBG_ERR, "write_string: invalid index[%u],"
994		    " string(%s)\n", idx, str);
995		return (DB_NIL);
996	}
997
998	if ((dstr = set_string(hdp, idx)) == NULL) {
999		return (DB_NIL);
1000	}
1001
1002	(void) strcpy(dstr, str);
1003
1004	next[DB_STR] += strlen(dstr) + 1;
1005
1006	return (idx);
1007}
1008
1009static int
1010close_db(struct di_devlink_handle *hdp)
1011{
1012	int i, rv = 0;
1013	size_t sz;
1014
1015	if (!DB_OPEN(hdp)) {
1016#ifdef	DEBUG
1017		assert(DB(hdp)->db_fd == -1);
1018		assert(DB(hdp)->flags == 0);
1019		for (i = 0; i < DB_TYPES; i++) {
1020			assert(DB_SEG(hdp, i) == NULL);
1021			assert(DB_SEG_PROT(hdp, i) == 0);
1022		}
1023#endif
1024		return (0);
1025	}
1026
1027	/* Unmap header after unmapping all other mapped segments */
1028	for (i = 0; i < DB_TYPES; i++) {
1029		if (DB_SEG(hdp, i)) {
1030			sz = seg_size(hdp, i);
1031			if (DB_RDWR(hdp))
1032				rv += msync(DB_SEG(hdp, i), sz, MS_SYNC);
1033			(void) munmap(DB_SEG(hdp, i), sz);
1034			DB_SEG(hdp, i) = NULL;
1035			DB_SEG_PROT(hdp, i) = 0;
1036		}
1037	}
1038
1039	if (DB_RDWR(hdp))
1040		rv += msync((caddr_t)DB_HDR(hdp), HDR_LEN, MS_SYNC);
1041	(void) munmap((caddr_t)DB_HDR(hdp), HDR_LEN);
1042	DB(hdp)->hdr = NULL;
1043
1044	(void) close(DB(hdp)->db_fd);
1045	DB(hdp)->db_fd = -1;
1046	DB(hdp)->flags = 0;
1047
1048	return (rv ? -1 : 0);
1049}
1050
1051
1052static void
1053cache_free(struct di_devlink_handle *hdp)
1054{
1055	cache_link_t *clp;
1056
1057	subtree_free(hdp, &(CACHE_ROOT(hdp)));
1058	assert(CACHE_LAST(hdp) == NULL);
1059
1060	/*
1061	 * Don't bother removing links from hash table chains,
1062	 * as we are freeing the hash table itself.
1063	 */
1064	while (CACHE(hdp)->dngl != NULL) {
1065		clp = CACHE(hdp)->dngl;
1066		CACHE(hdp)->dngl = clp->sib;
1067		assert(clp->minor == NULL);
1068		link_free(&clp);
1069	}
1070
1071	assert((CACHE(hdp)->hash == NULL) ^ (CACHE(hdp)->hash_sz != 0));
1072
1073	free(CACHE(hdp)->hash);
1074	CACHE(hdp)->hash = NULL;
1075	CACHE(hdp)->hash_sz = 0;
1076}
1077
1078static void
1079handle_free(struct di_devlink_handle **pp)
1080{
1081	struct di_devlink_handle *hdp = *pp;
1082
1083	*pp = NULL;
1084
1085	if (hdp == NULL)
1086		return;
1087
1088	(void) close_db(hdp);
1089	cache_free(hdp);
1090
1091	if (HDL_RDWR(hdp))
1092		exit_db_lock(hdp);
1093	assert(hdp->lock_fd == -1);
1094
1095	free(hdp->dev_dir);
1096	free(hdp->db_dir);
1097	free(hdp);
1098}
1099
1100/*
1101 * Frees the tree rooted at a node. Siblings of the subtree root
1102 * have to be handled by the caller.
1103 */
1104static void
1105subtree_free(struct di_devlink_handle *hdp, cache_node_t **pp)
1106{
1107	cache_node_t *np;
1108	cache_link_t *clp;
1109	cache_minor_t *cmnp;
1110
1111	if (pp == NULL || *pp == NULL)
1112		return;
1113
1114	while ((*pp)->child != NULL) {
1115		np = (*pp)->child;
1116		(*pp)->child = np->sib;
1117		subtree_free(hdp, &np);
1118	}
1119
1120	while ((*pp)->minor != NULL) {
1121		cmnp = (*pp)->minor;
1122		(*pp)->minor = cmnp->sib;
1123
1124		while (cmnp->link != NULL) {
1125			clp = cmnp->link;
1126			cmnp->link = clp->sib;
1127			rm_link_from_hash(hdp, clp);
1128			link_free(&clp);
1129		}
1130		minor_free(hdp, &cmnp);
1131	}
1132
1133	node_free(pp);
1134}
1135
1136static void
1137rm_link_from_hash(struct di_devlink_handle *hdp, cache_link_t *clp)
1138{
1139	int hval;
1140	cache_link_t **pp;
1141
1142	if (clp == NULL)
1143		return;
1144
1145	if (clp->path == NULL)
1146		return;
1147
1148	hval = hashfn(hdp, clp->path);
1149	pp = &(CACHE_HASH(hdp, hval));
1150	for (; *pp != NULL; pp = &(*pp)->hash) {
1151		if (*pp == clp) {
1152			*pp = clp->hash;
1153			clp->hash = NULL;
1154			return;
1155		}
1156	}
1157
1158	dprintf(DBG_ERR, "rm_link_from_hash: link(%s) not found\n", clp->path);
1159}
1160
1161static cache_link_t *
1162link_hash(di_devlink_handle_t hdp, const char *link, uint_t flags)
1163{
1164	int hval;
1165	cache_link_t **pp, *clp;
1166
1167	if (link == NULL)
1168		return (NULL);
1169
1170	hval = hashfn(hdp, link);
1171	pp = &(CACHE_HASH(hdp, hval));
1172	for (; (clp = *pp) != NULL; pp = &clp->hash) {
1173		if (strcmp(clp->path, link) == 0) {
1174			break;
1175		}
1176	}
1177
1178	if (clp == NULL)
1179		return (NULL);
1180
1181	if ((flags & UNLINK_FROM_HASH) == UNLINK_FROM_HASH) {
1182		*pp = clp->hash;
1183		clp->hash = NULL;
1184	}
1185
1186	return (clp);
1187}
1188
1189static cache_minor_t *
1190link2minor(struct di_devlink_handle *hdp, cache_link_t *clp)
1191{
1192	cache_link_t *plp;
1193	const char *minor_path;
1194	char *cp, buf[PATH_MAX], link[PATH_MAX];
1195	char abspath[PATH_MAX];
1196	struct stat st;
1197
1198	if (TYPE_PRI(attr2type(clp->attr))) {
1199		/*
1200		 * For primary link, content should point to a /devices node.
1201		 */
1202		if (!is_minor_node(clp->content, &minor_path)) {
1203			return (NULL);
1204		}
1205
1206		return (lookup_minor(hdp, minor_path, NULL,
1207		    TYPE_CACHE|CREATE_FLAG));
1208
1209	}
1210
1211	/*
1212	 * If secondary, the primary link is derived from the secondary
1213	 * link contents. Secondary link contents can have two formats:
1214	 *	audio -> /dev/sound/0
1215	 *	fb0 -> fbs/afb0
1216	 */
1217
1218	buf[0] = '\0';
1219	if (strncmp(clp->content, DEV"/", strlen(DEV"/")) == 0) {
1220		cp = &clp->content[strlen(DEV"/")];
1221	} else if (clp->content[0] != '/') {
1222		if ((cp = strrchr(clp->path, '/')) != NULL) {
1223			char savechar = *(cp + 1);
1224			*(cp + 1) = '\0';
1225			(void) snprintf(buf, sizeof (buf), "%s", clp->path);
1226			*(cp + 1) = savechar;
1227		}
1228		(void) strlcat(buf, clp->content, sizeof (buf));
1229		cp = buf;
1230	} else {
1231		goto follow_link;
1232	}
1233
1234	/*
1235	 * Lookup the primary link if possible and find its minor.
1236	 */
1237	if ((plp = link_hash(hdp, cp, 0)) != NULL && plp->minor != NULL) {
1238		return (plp->minor);
1239	}
1240
1241	/* realpath() used only as a last resort because it is expensive */
1242follow_link:
1243	(void) snprintf(link, sizeof (link), "%s/%s", hdp->dev_dir, clp->path);
1244
1245#ifdef	DEBUG
1246	/*LINTED*/
1247	assert(sizeof (buf) >= PATH_MAX);
1248#endif
1249
1250	/*
1251	 * A realpath attempt to lookup a dangling link can invoke implicit
1252	 * reconfig so verify there's an actual device behind the link first.
1253	 */
1254	if (lstat(link, &st) == -1)
1255		return (NULL);
1256	if (S_ISLNK(st.st_mode)) {
1257		if (s_readlink(link, buf, sizeof (buf)) < 0)
1258			return (NULL);
1259		if (buf[0] != '/') {
1260			char *p;
1261			size_t n = sizeof (abspath);
1262			if (strlcpy(abspath, link, n) >= n)
1263				return (NULL);
1264			p = strrchr(abspath, '/') + 1;
1265			*p = 0;
1266			n = sizeof (abspath) - strlen(p);
1267			if (strlcpy(p, buf, n) >= n)
1268				return (NULL);
1269		} else {
1270			if (strlcpy(abspath, buf, sizeof (abspath)) >=
1271			    sizeof (abspath))
1272				return (NULL);
1273		}
1274		if (!device_exists(abspath))
1275			return (NULL);
1276	}
1277
1278	if (s_realpath(link, buf) == NULL || !is_minor_node(buf, &minor_path)) {
1279		return (NULL);
1280	}
1281	return (lookup_minor(hdp, minor_path, NULL, TYPE_CACHE|CREATE_FLAG));
1282}
1283
1284
1285static void
1286resolve_dangling_links(struct di_devlink_handle *hdp)
1287{
1288	cache_minor_t *cmnp;
1289	cache_link_t *clp, **pp;
1290
1291	for (pp = &(CACHE(hdp)->dngl); *pp != NULL; ) {
1292		clp = *pp;
1293		if ((cmnp = link2minor(hdp, clp)) != NULL) {
1294			*pp = clp->sib;
1295			clp->sib = cmnp->link;
1296			cmnp->link = clp;
1297			assert(clp->minor == NULL);
1298			clp->minor = cmnp;
1299		} else {
1300			dprintf(DBG_INFO, "resolve_dangling_links: link(%s):"
1301			    " unresolved\n", clp->path);
1302			pp = &clp->sib;
1303		}
1304	}
1305}
1306
1307
1308/*
1309 * The elements are assumed to be detached from the cache tree.
1310 */
1311static void
1312node_free(cache_node_t **pp)
1313{
1314	cache_node_t *cnp = *pp;
1315
1316	*pp = NULL;
1317
1318	if (cnp == NULL)
1319		return;
1320
1321	free(cnp->path);
1322	free(cnp);
1323}
1324
1325static void
1326minor_free(struct di_devlink_handle *hdp, cache_minor_t **pp)
1327{
1328	cache_minor_t *cmnp = *pp;
1329
1330	*pp = NULL;
1331
1332	if (cmnp == NULL)
1333		return;
1334
1335	if (CACHE_LAST(hdp) == cmnp) {
1336		dprintf(DBG_STEP, "minor_free: last_minor(%s)\n", cmnp->name);
1337		CACHE_LAST(hdp) = NULL;
1338	}
1339
1340	free(cmnp->name);
1341	free(cmnp->nodetype);
1342	free(cmnp);
1343}
1344
1345static void
1346link_free(cache_link_t **pp)
1347{
1348	cache_link_t *clp = *pp;
1349
1350	*pp = NULL;
1351
1352	if (clp == NULL)
1353		return;
1354
1355	free(clp->path);
1356	free(clp->content);
1357	free(clp);
1358}
1359
1360/*
1361 * Returns the ':' preceding the minor name
1362 */
1363static char *
1364minor_colon(const char *path)
1365{
1366	char *cp;
1367
1368	if ((cp = strrchr(path, '/')) == NULL) {
1369		return (NULL);
1370	}
1371
1372	return (strchr(cp, ':'));
1373}
1374
1375static void *
1376lookup_minor(
1377	struct di_devlink_handle *hdp,
1378	const char *minor_path,
1379	const char *nodetype,
1380	const int flags)
1381{
1382	void *vp;
1383	char *colon;
1384	char pdup[PATH_MAX];
1385	const char *fcn = "lookup_minor";
1386
1387	if (minor_path == NULL) {
1388		errno = EINVAL;
1389		return (NULL);
1390	}
1391
1392	(void) snprintf(pdup, sizeof (pdup), "%s", minor_path);
1393
1394	if ((colon = minor_colon(pdup)) == NULL) {
1395		(void) dprintf(DBG_ERR, "%s: invalid minor path(%s)\n", fcn,
1396		    minor_path);
1397		errno = EINVAL;
1398		return (NULL);
1399	}
1400	*colon = '\0';
1401
1402	if ((vp = get_last_minor(hdp, pdup, colon + 1, flags)) != NULL) {
1403		return (vp);
1404	}
1405
1406	if ((vp = lookup_node(hdp, pdup, flags)) == NULL) {
1407		(void) dprintf(DBG_ERR, "%s: node(%s) not found\n", fcn, pdup);
1408		return (NULL);
1409	}
1410	*colon = ':';
1411
1412	if (LOOKUP_CACHE(flags)) {
1413		cache_minor_t **pp;
1414
1415		pp = &((cache_node_t *)vp)->minor;
1416		for (; *pp != NULL; pp = &(*pp)->sib) {
1417			if (strcmp((*pp)->name, colon + 1) == 0)
1418				break;
1419		}
1420
1421		if (*pp == NULL && CREATE_ELEM(flags)) {
1422			*pp = minor_insert(hdp, vp, colon + 1, nodetype, pp);
1423		}
1424		set_last_minor(hdp, *pp, flags);
1425
1426		return (*pp);
1427	} else {
1428		char *cp;
1429		uint32_t nidx;
1430		struct db_minor *dmp;
1431
1432		nidx = (((struct db_node *)vp)->minor);
1433		for (; dmp = get_minor(hdp, nidx); nidx = dmp->sib) {
1434			cp = get_string(hdp, dmp->name);
1435			if (cp && strcmp(cp, colon + 1) == 0)
1436				break;
1437		}
1438		return (dmp);
1439	}
1440}
1441
1442static void *
1443lookup_node(struct di_devlink_handle *hdp, char *path, const int flags)
1444{
1445	struct tnode tnd = {NULL};
1446
1447	if (tnd.node = get_last_node(hdp, path, flags))
1448		return (tnd.node);
1449
1450	tnd.handle = hdp;
1451	tnd.flags = flags;
1452
1453	if (walk_tree(path, &tnd, visit_node) != 0)
1454		return (NULL);
1455
1456	return (tnd.node);
1457}
1458
1459/*
1460 * last_minor is used for nodes of TYPE_CACHE only.
1461 */
1462static void *
1463get_last_node(struct di_devlink_handle *hdp, const char *path, int flags)
1464{
1465	cache_node_t *cnp;
1466
1467#ifdef	DEBUG
1468	if (getenv(SKIP_LAST_CACHE)) {
1469		(void) dprintf(DBG_INFO, "get_last_node: SKIPPING \"last\" "
1470		    "node cache\n");
1471		return (NULL);
1472	}
1473#endif
1474
1475	if (!LOOKUP_CACHE(flags) || CACHE_LAST(hdp) == NULL ||
1476	    CACHE_LAST(hdp)->node == NULL) {
1477		return (NULL);
1478	}
1479
1480	cnp = CACHE_LAST(hdp)->node;
1481	if (strcmp(cnp->path, path) == 0) {
1482		return (cnp);
1483	}
1484
1485	cnp = cnp->sib;
1486	if (cnp && strcmp(cnp->path, path) == 0) {
1487		return (cnp);
1488	}
1489
1490	return (NULL);
1491}
1492
1493static void *
1494get_last_minor(
1495	struct di_devlink_handle *hdp,
1496	const char *devfs_path,
1497	const char *minor_name,
1498	int flags)
1499{
1500	cache_minor_t *cmnp;
1501
1502#ifdef	DEBUG
1503	if (getenv(SKIP_LAST_CACHE)) {
1504		(void) dprintf(DBG_INFO, "get_last_minor: SKIPPING \"last\" "
1505		    "minor cache\n");
1506		return (NULL);
1507	}
1508#endif
1509
1510	if (!LOOKUP_CACHE(flags) || CACHE_LAST(hdp) == NULL) {
1511		return (NULL);
1512	}
1513
1514	cmnp = CACHE_LAST(hdp);
1515	if (strcmp(cmnp->name, minor_name) == 0 && cmnp->node &&
1516	    strcmp(cmnp->node->path, devfs_path) == 0) {
1517		return (cmnp);
1518	}
1519
1520	cmnp = cmnp->sib;
1521	if (cmnp && strcmp(cmnp->name, minor_name) == 0 && cmnp->node &&
1522	    strcmp(cmnp->node->path, devfs_path) == 0) {
1523		set_last_minor(hdp, cmnp, TYPE_CACHE);
1524		return (cmnp);
1525	}
1526
1527	return (NULL);
1528}
1529
1530static void
1531set_last_minor(struct di_devlink_handle *hdp, cache_minor_t *cmnp, int flags)
1532{
1533#ifdef	DEBUG
1534	if (getenv(SKIP_LAST_CACHE)) {
1535		(void) dprintf(DBG_INFO, "set_last_minor: SKIPPING \"last\" "
1536		    "minor cache\n");
1537		return;
1538	}
1539#endif
1540
1541	if (LOOKUP_CACHE(flags) && cmnp) {
1542		CACHE_LAST(hdp) = cmnp;
1543	}
1544}
1545
1546
1547/*
1548 * Returns 0 if normal return or -1 otherwise.
1549 */
1550static int
1551walk_tree(
1552	char *cur,
1553	void *arg,
1554	int (*node_callback)(const char *path, void *arg))
1555{
1556	char *slash, buf[PATH_MAX];
1557
1558	if (cur == NULL || cur[0] != '/' || strlen(cur) > sizeof (buf) - 1) {
1559		errno = EINVAL;
1560		return (-1);
1561	}
1562
1563	(void) strcpy(buf, "/");
1564
1565	for (;;) {
1566
1567		if (node_callback(buf, arg) != DI_WALK_CONTINUE)
1568			break;
1569
1570		while (*cur == '/')
1571			cur++;
1572
1573		if (*cur == '\0')
1574			break;
1575
1576		/*
1577		 * There is a next component(s). Append a "/" separator for all
1578		 * but the first (root) component.
1579		 */
1580		if (buf[1] != '\0') {
1581			(void) strlcat(buf, "/", sizeof (buf));
1582		}
1583
1584		if (slash = strchr(cur, '/')) {
1585			*slash = '\0';
1586			(void) strlcat(buf, cur, sizeof (buf));
1587			*slash = '/';
1588			cur = slash;
1589		} else {
1590			(void) strlcat(buf, cur, sizeof (buf));
1591			cur += strlen(cur);
1592		}
1593
1594	}
1595
1596	return (0);
1597}
1598
1599
1600static int
1601visit_node(const char *path, void *arg)
1602{
1603	struct tnode *tnp = arg;
1604
1605	if (LOOKUP_CACHE(tnp->flags)) {
1606
1607		cache_node_t *cnp = tnp->node;
1608
1609		cnp = (cnp) ? cnp->child : CACHE_ROOT(tnp->handle);
1610
1611		for (; cnp != NULL; cnp = cnp->sib) {
1612			if (strcmp(cnp->path, path) == 0)
1613				break;
1614		}
1615		if (cnp == NULL && CREATE_ELEM(tnp->flags)) {
1616			cnp = node_insert(tnp->handle, tnp->node, path,
1617			    INSERT_TAIL);
1618		}
1619		tnp->node = cnp;
1620	} else {
1621		char *cp;
1622		struct db_node *dnp = tnp->node;
1623
1624		dnp = (dnp) ? get_node(tnp->handle, dnp->child)
1625		    : get_node(tnp->handle, DB_HDR(tnp->handle)->root_idx);
1626
1627		for (; dnp != NULL; dnp = get_node(tnp->handle, dnp->sib)) {
1628			cp = get_string(tnp->handle, dnp->path);
1629			if (cp && strcmp(cp, path) == 0) {
1630				break;
1631			}
1632		}
1633		tnp->node = dnp;
1634	}
1635
1636	/*
1637	 * Terminate walk if node is not found for a path component.
1638	 */
1639	return (tnp->node ? DI_WALK_CONTINUE : DI_WALK_TERMINATE);
1640}
1641
1642static void
1643minor_delete(di_devlink_handle_t hdp, cache_minor_t *cmnp)
1644{
1645	cache_link_t **lpp;
1646	cache_minor_t **mpp;
1647	const char *fcn = "minor_delete";
1648
1649	(void) dprintf(DBG_STEP, "%s: removing minor: %s\n", fcn, cmnp->name);
1650
1651	/* detach minor from node */
1652	if (cmnp->node != NULL) {
1653		mpp = &cmnp->node->minor;
1654		for (; *mpp != NULL; mpp = &(*mpp)->sib) {
1655			if (*mpp == cmnp)
1656				break;
1657		}
1658
1659		if (*mpp == NULL) {
1660			(void) dprintf(DBG_ERR, "%s: dangling minor: %s\n",
1661			    fcn, cmnp->name);
1662		} else {
1663			*mpp = cmnp->sib;
1664		}
1665	} else {
1666		(void) dprintf(DBG_ERR, "%s: orphan minor(%s)\n", fcn,
1667		    cmnp->name);
1668	}
1669
1670	delete_unused_nodes(hdp, cmnp->node);
1671
1672	cmnp->node = NULL;
1673	cmnp->sib = NULL;
1674
1675	/* Move all remaining links to dangling list */
1676	for (lpp = &cmnp->link; *lpp != NULL; lpp = &(*lpp)->sib) {
1677		(*lpp)->minor = NULL;
1678	}
1679	*lpp = CACHE(hdp)->dngl;
1680	CACHE(hdp)->dngl = cmnp->link;
1681	cmnp->link = NULL;
1682
1683	minor_free(hdp, &cmnp);
1684}
1685
1686static void
1687delete_unused_nodes(di_devlink_handle_t hdp, cache_node_t *cnp)
1688{
1689	cache_node_t **npp;
1690	const char *fcn = "delete_unused_nodes";
1691
1692	if (cnp == NULL)
1693		return;
1694
1695	if (cnp->minor != NULL || cnp->child != NULL)
1696		return;
1697
1698	(void) dprintf(DBG_INFO, "%s: removing unused node: %s\n", fcn,
1699	    cnp->path);
1700
1701	/* Unlink node from tree */
1702	if (cnp->parent != NULL) {
1703		npp = &cnp->parent->child;
1704		for (; *npp != NULL; npp = &(*npp)->sib) {
1705			if (*npp == cnp)
1706				break;
1707		}
1708
1709		if (*npp == NULL) {
1710			(void) dprintf(DBG_ERR, "%s: dangling node: %s\n", fcn,
1711			    cnp->path);
1712		} else {
1713			*npp = cnp->sib;
1714		}
1715	} else if (cnp == CACHE_ROOT(hdp)) {
1716		CACHE_ROOT(hdp) = NULL;
1717	} else {
1718		(void) dprintf(DBG_ERR, "%s: orphan node (%s)\n", fcn,
1719		    cnp->path);
1720	}
1721
1722	delete_unused_nodes(hdp, cnp->parent);
1723
1724	cnp->parent = cnp->sib = NULL;
1725
1726	node_free(&cnp);
1727}
1728
1729static int
1730rm_link(di_devlink_handle_t hdp, const char *link)
1731{
1732	cache_link_t *clp;
1733	const char *fcn = "rm_link";
1734
1735	if (hdp == NULL || DB_ERR(hdp) || link == NULL || link[0] == '/' ||
1736	    (!HDL_RDWR(hdp) && !HDL_RDONLY(hdp))) {
1737		dprintf(DBG_ERR, "%s: %s: invalid args\n",
1738		    fcn, link ? link : "<NULL>");
1739		errno = EINVAL;
1740		return (-1);
1741	}
1742
1743	dprintf(DBG_STEP, "%s: link(%s)\n", fcn, link);
1744
1745	if ((clp = link_hash(hdp, link, UNLINK_FROM_HASH)) == NULL) {
1746		return (0);
1747	}
1748
1749	link_delete(hdp, clp);
1750
1751	return (0);
1752}
1753
1754int
1755di_devlink_rm_link(di_devlink_handle_t hdp, const char *link)
1756{
1757	if (hdp == NULL || !HDL_RDWR(hdp)) {
1758		errno = EINVAL;
1759		return (-1);
1760	}
1761
1762	return (rm_link(hdp, link));
1763}
1764
1765static void
1766link_delete(di_devlink_handle_t hdp, cache_link_t *clp)
1767{
1768	cache_link_t **pp;
1769	const char *fcn = "link_delete";
1770
1771	(void) dprintf(DBG_STEP, "%s: removing link: %s\n", fcn, clp->path);
1772
1773	if (clp->minor == NULL)
1774		pp = &(CACHE(hdp)->dngl);
1775	else
1776		pp = &clp->minor->link;
1777
1778	for (; *pp != NULL; pp = &(*pp)->sib) {
1779		if (*pp == clp)
1780			break;
1781	}
1782
1783	if (*pp == NULL) {
1784		(void) dprintf(DBG_ERR, "%s: link(%s) not on list\n",
1785		    fcn, clp->path);
1786	} else {
1787		*pp = clp->sib;
1788	}
1789
1790	delete_unused_minor(hdp, clp->minor);
1791
1792	clp->minor = NULL;
1793
1794	link_free(&clp);
1795}
1796
1797static void
1798delete_unused_minor(di_devlink_handle_t hdp, cache_minor_t *cmnp)
1799{
1800	if (cmnp == NULL)
1801		return;
1802
1803	if (cmnp->link != NULL)
1804		return;
1805
1806	dprintf(DBG_STEP, "delete_unused_minor: removing minor(%s)\n",
1807	    cmnp->name);
1808
1809	minor_delete(hdp, cmnp);
1810}
1811
1812int
1813di_devlink_add_link(
1814	di_devlink_handle_t hdp,
1815	const char *link,
1816	const char *content,
1817	int flags)
1818{
1819	return (add_link(hdp, link, content, flags) != NULL ? 0 : -1);
1820}
1821
1822static cache_link_t *
1823add_link(
1824	struct di_devlink_handle *hdp,
1825	const char *link,
1826	const char *content,
1827	int flags)
1828{
1829	uint32_t attr;
1830	cache_link_t *clp;
1831	cache_minor_t *cmnp;
1832	const char *fcn = "add_link";
1833
1834	if (hdp == NULL || DB_ERR(hdp) || link == NULL ||
1835	    link[0] == '/' || content == NULL || !link_flag(flags) ||
1836	    (!HDL_RDWR(hdp) && !HDL_RDONLY(hdp))) {
1837		dprintf(DBG_ERR, "%s: %s: invalid args\n",
1838		    fcn, link ? link : "<NULL>");
1839		errno = EINVAL;
1840		return (NULL);
1841	}
1842
1843	if ((clp = link_hash(hdp, link, 0)) != NULL) {
1844		if (link_cmp(clp, content, LINK_TYPE(flags)) != 0) {
1845			(void) rm_link(hdp, link);
1846		} else {
1847			return (clp);
1848		}
1849	}
1850
1851	if (TYPE_PRI(flags)) {
1852		const char *minor_path = NULL;
1853
1854		if (!is_minor_node(content, &minor_path)) {
1855			(void) dprintf(DBG_ERR, "%s: invalid content(%s)"
1856			    " for primary link\n", fcn, content);
1857			errno = EINVAL;
1858			return (NULL);
1859		}
1860		if ((cmnp = lookup_minor(hdp, minor_path, NULL,
1861		    TYPE_CACHE|CREATE_FLAG)) == NULL) {
1862			return (NULL);
1863		}
1864		attr = A_PRIMARY;
1865	} else {
1866		/*
1867		 * Defer resolving a secondary link to a minor until the
1868		 * database is closed. This ensures that the primary link
1869		 * (required for a successful resolve) has also been created.
1870		 */
1871		cmnp = NULL;
1872		attr = A_SECONDARY;
1873	}
1874
1875	return (link_insert(hdp, cmnp, link, content, attr));
1876}
1877
1878/*
1879 * Returns 0 on match or 1 otherwise.
1880 */
1881static int
1882link_cmp(cache_link_t *clp, const char *content, int type)
1883{
1884	if (strcmp(clp->content, content) != 0)
1885		return (1);
1886
1887	if (attr2type(clp->attr) != type)
1888		return (1);
1889
1890	return (0);
1891}
1892
1893int
1894di_devlink_update(di_devlink_handle_t hdp)
1895{
1896	if (hdp == NULL || !HDL_RDWR(hdp) || DB_ERR(hdp)) {
1897		errno = EINVAL;
1898		return (-1);
1899	}
1900
1901	/*
1902	 * Reset the counter to schedule a synchronization with /dev on the next
1903	 * di_devlink_close().
1904	 */
1905	CACHE(hdp)->update_count = 0;
1906
1907	return (0);
1908}
1909
1910static int
1911synchronize_db(di_devlink_handle_t hdp)
1912{
1913	int hval;
1914	cache_link_t *clp;
1915	char pdup[PATH_MAX];
1916	recurse_t rec = {NULL};
1917	const char *fcn = "synchronize_db";
1918
1919	rec.data = NULL;
1920	rec.fcn = cache_dev_link;
1921
1922	/*
1923	 * Walk through $ROOT/dev, reading every link and marking the
1924	 * corresponding cached version as valid(adding new links as needed).
1925	 * Then walk through the cache and remove all unmarked links.
1926	 */
1927	if (recurse_dev(hdp, &rec) != 0) {
1928		return (-1);
1929	}
1930
1931	for (hval = 0; hval < CACHE(hdp)->hash_sz; hval++) {
1932		for (clp = CACHE_HASH(hdp, hval); clp != NULL; ) {
1933			if (GET_VALID_ATTR(clp->attr)) {
1934				CLR_VALID_ATTR(clp->attr);
1935				clp = clp->hash;
1936				continue;
1937			}
1938
1939			/*
1940			 * The link is stale, so remove it. Since the link
1941			 * will be destroyed, use a copy of the link path to
1942			 * invoke the remove function.
1943			 */
1944			(void) snprintf(pdup, sizeof (pdup), "%s", clp->path);
1945			clp = clp->hash;
1946			(void) dprintf(DBG_STEP, "%s: removing invalid link:"
1947			    " %s\n", fcn, pdup);
1948			(void) di_devlink_rm_link(hdp, pdup);
1949		}
1950	}
1951
1952	(void) dprintf(DBG_STEP, "%s: update completed\n", fcn);
1953
1954	return (0);
1955}
1956
1957static di_devlink_handle_t
1958di_devlink_init_impl(const char *root, const char *name, uint_t flags)
1959{
1960	int	err = 0;
1961
1962	if ((flags != 0 && flags != DI_MAKE_LINK) ||
1963	    (flags == 0 && name != NULL)) {
1964		errno = EINVAL;
1965		return (NULL);
1966	}
1967
1968	if ((flags == DI_MAKE_LINK) &&
1969	    (err = devlink_create(root, name, DCA_DEVLINK_CACHE))) {
1970		errno = err;
1971		return (NULL);
1972	}
1973
1974	(void) dprintf(DBG_INFO, "devlink_init_impl: success\n");
1975
1976	return (devlink_snapshot(root));
1977}
1978
1979di_devlink_handle_t
1980di_devlink_init(const char *name, uint_t flags)
1981{
1982	return (di_devlink_init_impl("/", name, flags));
1983}
1984
1985di_devlink_handle_t
1986di_devlink_init_root(const char *root, const char *name, uint_t flags)
1987{
1988	return (di_devlink_init_impl(root, name, flags));
1989}
1990
1991static di_devlink_handle_t
1992devlink_snapshot(const char *root_dir)
1993{
1994	struct di_devlink_handle *hdp;
1995	int		err;
1996	static int	retried = 0;
1997
1998	if ((hdp = handle_alloc(root_dir, OPEN_RDONLY)) == NULL) {
1999		return (NULL);
2000	}
2001
2002	/*
2003	 * We don't need to lock.  If a consumer wants the very latest db
2004	 * then it must perform a di_devlink_init with the DI_MAKE_LINK
2005	 * flag to force a sync with devfsadm first.  Otherwise, the
2006	 * current database file is opened and mmaped on demand: the rename
2007	 * associated with a db update does not change the contents
2008	 * of files already opened.
2009	 */
2010again:	err = open_db(hdp, OPEN_RDONLY);
2011
2012	/*
2013	 * If we failed to open DB the most likely cause is that DB file did
2014	 * not exist. If we have not done a retry, signal devfsadmd to
2015	 * recreate the DB file and retry. If we fail to open the DB after
2016	 * retry, we will walk /dev in di_devlink_walk.
2017	 */
2018	if (err && (retried == 0)) {
2019		retried++;
2020		(void) devlink_create(root_dir, NULL, DCA_DEVLINK_SYNC);
2021		goto again;
2022	}
2023	return (hdp);
2024}
2025
2026int
2027di_devlink_fini(di_devlink_handle_t *pp)
2028{
2029	if (pp == NULL || *pp == NULL || !HDL_RDONLY(*pp)) {
2030		errno = EINVAL;
2031		return (-1);
2032	}
2033
2034	/* Freeing the handle also closes the DB */
2035	handle_free(pp);
2036
2037	return (0);
2038}
2039
2040int
2041di_devlink_walk(
2042	di_devlink_handle_t hdp,
2043	const char *re,
2044	const char *minor_path,
2045	uint_t flags,
2046	void *arg,
2047	int (*devlink_callback)(di_devlink_t, void *))
2048{
2049	int rv;
2050	regex_t reg;
2051	link_desc_t linkd = {NULL};
2052
2053	if (hdp == NULL || !HDL_RDONLY(hdp)) {
2054		errno = EINVAL;
2055		return (-1);
2056	}
2057
2058	linkd.minor_path = minor_path;
2059	linkd.flags = flags;
2060	linkd.arg = arg;
2061	linkd.fcn = devlink_callback;
2062
2063	if (re) {
2064		if (regcomp(&reg, re, REG_EXTENDED) != 0)
2065			return (-1);
2066		linkd.regp = &reg;
2067	}
2068
2069	if (check_args(&linkd)) {
2070		errno = EINVAL;
2071		rv = -1;
2072		goto out;
2073	}
2074
2075	if (DB_OPEN(hdp)) {
2076		rv = walk_db(hdp, &linkd);
2077	} else {
2078		rv = walk_dev(hdp, &linkd);
2079	}
2080
2081out:
2082	if (re) {
2083		regfree(&reg);
2084	}
2085
2086	return (rv ? -1 : 0);
2087}
2088
2089static int
2090link_flag(uint_t flags)
2091{
2092	if (flags != 0 && flags != DI_PRIMARY_LINK &&
2093	    flags != DI_SECONDARY_LINK) {
2094		return (0);
2095	}
2096
2097	return (1);
2098}
2099
2100/*
2101 * Currently allowed flags are:
2102 *	DI_PRIMARY_LINK
2103 *	DI_SECONDARY_LINK
2104 */
2105static int
2106check_args(link_desc_t *linkp)
2107{
2108	if (linkp->fcn == NULL)
2109		return (-1);
2110
2111	if (!link_flag(linkp->flags)) {
2112		return (-1);
2113	}
2114
2115	/*
2116	 * Minor path can be NULL. In that case, all links will be
2117	 * selected.
2118	 */
2119	if (linkp->minor_path) {
2120		if (linkp->minor_path[0] != '/' ||
2121		    minor_colon(linkp->minor_path) == NULL) {
2122			return (-1);
2123		}
2124	}
2125
2126	return (0);
2127}
2128
2129
2130/*
2131 * Walk all links in database if no minor path is specified.
2132 */
2133static int
2134walk_db(struct di_devlink_handle *hdp, link_desc_t *linkp)
2135{
2136	assert(DB_OPEN(hdp));
2137
2138	if (linkp->minor_path == NULL) {
2139		return (walk_all_links(hdp, linkp));
2140	} else {
2141		return (walk_matching_links(hdp, linkp));
2142	}
2143}
2144
2145static int
2146cache_dev(struct di_devlink_handle *hdp)
2147{
2148	size_t sz;
2149	recurse_t rec = {NULL};
2150
2151	assert(hdp);
2152	assert(HDL_RDONLY(hdp));
2153
2154	if (hdp == NULL || !HDL_RDONLY(hdp)) {
2155		dprintf(DBG_ERR, "cache_dev: invalid arg\n");
2156		return (-1);
2157	}
2158
2159	sz = MIN_HASH_SIZE;
2160
2161	CACHE(hdp)->hash = calloc(sz, sizeof (cache_link_t *));
2162	if (CACHE(hdp)->hash == NULL) {
2163		return (-1);
2164	}
2165	CACHE(hdp)->hash_sz = sz;
2166
2167	rec.data = NULL;
2168	rec.fcn = cache_dev_link;
2169
2170	return (recurse_dev(hdp, &rec));
2171}
2172
2173static int
2174walk_dev(struct di_devlink_handle *hdp, link_desc_t *linkp)
2175{
2176	assert(hdp && linkp);
2177	assert(!DB_OPEN(hdp));
2178	assert(HDL_RDONLY(hdp));
2179
2180	if (hdp == NULL || !HDL_RDONLY(hdp) || DB_OPEN(hdp)) {
2181		dprintf(DBG_ERR, "walk_dev: invalid args\n");
2182		return (-1);
2183	}
2184
2185	if (CACHE_EMPTY(hdp) && cache_dev(hdp) != 0) {
2186		dprintf(DBG_ERR, "walk_dev: /dev caching failed\n");
2187		return (-1);
2188	}
2189
2190	if (linkp->minor_path)
2191		walk_cache_minor(hdp, linkp->minor_path, linkp);
2192	else
2193		walk_all_cache(hdp, linkp);
2194
2195	return (linkp->retval);
2196}
2197
2198/* ARGSUSED */
2199static int
2200cache_dev_link(struct di_devlink_handle *hdp, void *data, const char *link)
2201{
2202	int flags;
2203	cache_link_t *clp;
2204	char content[PATH_MAX];
2205
2206	assert(HDL_RDWR(hdp) || HDL_RDONLY(hdp));
2207
2208	if (s_readlink(link, content, sizeof (content)) < 0) {
2209		return (DI_WALK_CONTINUE);
2210	}
2211
2212	if (is_minor_node(content, NULL)) {
2213		flags = DI_PRIMARY_LINK;
2214	} else {
2215		flags = DI_SECONDARY_LINK;
2216	}
2217
2218	assert(strncmp(link, hdp->dev_dir, strlen(hdp->dev_dir)) == 0);
2219
2220	/*
2221	 * Store only the part after <root-dir>/dev/
2222	 */
2223	link += strlen(hdp->dev_dir) + 1;
2224
2225	if ((clp = add_link(hdp, link, content, flags)) != NULL) {
2226		SET_VALID_ATTR(clp->attr);
2227	}
2228
2229	return (DI_WALK_CONTINUE);
2230}
2231
2232
2233static int
2234walk_all_links(struct di_devlink_handle *hdp, link_desc_t *linkp)
2235{
2236	struct db_link *dlp;
2237	uint32_t nidx, eidx;
2238
2239	assert(DB_NUM(hdp, DB_LINK) >= 1);
2240
2241	eidx = DB_NUM(hdp, DB_LINK);
2242
2243	/* Skip the "NIL" (index == 0) link. */
2244	for (nidx = 1; nidx < eidx; nidx++) {
2245		/*
2246		 * Declare this local to the block with zero
2247		 * initializer so that it gets rezeroed
2248		 * for each iteration.
2249		 */
2250		struct di_devlink vlink = {NULL};
2251
2252		if ((dlp = get_link(hdp, nidx)) == NULL)
2253			continue;
2254
2255		vlink.rel_path = get_string(hdp, dlp->path);
2256		vlink.content = get_string(hdp, dlp->content);
2257		vlink.type = attr2type(dlp->attr);
2258
2259		if (visit_link(hdp, linkp, &vlink) != DI_WALK_CONTINUE) {
2260			break;
2261		}
2262	}
2263
2264	return (linkp->retval);
2265}
2266
2267static int
2268walk_matching_links(struct di_devlink_handle *hdp, link_desc_t *linkp)
2269{
2270	uint32_t nidx;
2271	struct db_link *dlp;
2272	struct db_minor *dmp;
2273
2274	assert(linkp->minor_path != NULL);
2275
2276	dmp = lookup_minor(hdp, linkp->minor_path, NULL, TYPE_DB);
2277
2278	/*
2279	 * If a minor matching the path exists, walk that minor's devlinks list.
2280	 * Then walk the dangling devlinks list. Non-matching devlinks will be
2281	 * filtered out in visit_link.
2282	 */
2283	for (;;) {
2284		nidx = dmp ? dmp->link : DB_HDR(hdp)->dngl_idx;
2285		for (; dlp = get_link(hdp, nidx); nidx = dlp->sib) {
2286			struct di_devlink vlink = {NULL};
2287
2288			vlink.rel_path = get_string(hdp, dlp->path);
2289			vlink.content = get_string(hdp, dlp->content);
2290			vlink.type = attr2type(dlp->attr);
2291
2292			if (visit_link(hdp, linkp, &vlink) != DI_WALK_CONTINUE)
2293				goto out;
2294		}
2295		if (dmp == NULL) {
2296			break;
2297		} else {
2298			dmp = NULL;
2299		}
2300	}
2301
2302out:
2303	return (linkp->retval);
2304}
2305
2306static int
2307visit_link(
2308	struct di_devlink_handle *hdp,
2309	link_desc_t *linkp,
2310	struct di_devlink *vlp)
2311{
2312	struct stat sbuf;
2313	const char *minor_path = NULL;
2314	char abs_path[PATH_MAX], cont[PATH_MAX];
2315
2316	/*
2317	 * It is legal for the link's content and type to be unknown.
2318	 * but one of absolute or relative path must be set.
2319	 */
2320	if (vlp->rel_path == NULL && vlp->abs_path == NULL) {
2321		(void) dprintf(DBG_ERR, "visit_link: invalid arguments\n");
2322		return (DI_WALK_CONTINUE);
2323	}
2324
2325	if (vlp->rel_path == NULL) {
2326		vlp->rel_path = (char *)rel_path(hdp, vlp->abs_path);
2327		if (vlp->rel_path == NULL || vlp->rel_path[0] == '\0')
2328			return (DI_WALK_CONTINUE);
2329	}
2330
2331	if (linkp->regp) {
2332		if (regexec(linkp->regp, vlp->rel_path, 0, NULL, 0) != 0)
2333			return (DI_WALK_CONTINUE);
2334	}
2335
2336	if (vlp->abs_path == NULL) {
2337		assert(vlp->rel_path[0] != '/');
2338		(void) snprintf(abs_path, sizeof (abs_path), "%s/%s",
2339		    hdp->dev_dir, vlp->rel_path);
2340		vlp->abs_path = abs_path;
2341	}
2342
2343	if (vlp->content == NULL) {
2344		if (s_readlink(vlp->abs_path, cont, sizeof (cont)) < 0) {
2345			return (DI_WALK_CONTINUE);
2346		}
2347		vlp->content = cont;
2348	}
2349
2350
2351	if (vlp->type == 0) {
2352		if (is_minor_node(vlp->content, &minor_path)) {
2353			vlp->type = DI_PRIMARY_LINK;
2354		} else {
2355			vlp->type = DI_SECONDARY_LINK;
2356		}
2357	}
2358
2359	/*
2360	 * Filter based on minor path
2361	 */
2362	if (linkp->minor_path) {
2363		char tmp[PATH_MAX];
2364
2365		/*
2366		 * derive minor path
2367		 */
2368		if (vlp->type == DI_SECONDARY_LINK) {
2369
2370#ifdef	DEBUG
2371			/*LINTED*/
2372			assert(sizeof (tmp) >= PATH_MAX);
2373#endif
2374			if (s_realpath(vlp->abs_path, tmp) == NULL)
2375				return (DI_WALK_CONTINUE);
2376
2377			if (!is_minor_node(tmp, &minor_path))
2378				return (DI_WALK_CONTINUE);
2379
2380		} else if (minor_path == NULL) {
2381			if (!is_minor_node(vlp->content, &minor_path))
2382				return (DI_WALK_CONTINUE);
2383		}
2384
2385		assert(minor_path != NULL);
2386
2387		if (strcmp(linkp->minor_path, minor_path) != 0)
2388			return (DI_WALK_CONTINUE);
2389	}
2390
2391	/*
2392	 * Filter based on link type
2393	 */
2394	if (!TYPE_NONE(linkp->flags) && LINK_TYPE(linkp->flags) != vlp->type) {
2395		return (DI_WALK_CONTINUE);
2396	}
2397
2398	if (lstat(vlp->abs_path, &sbuf) < 0) {
2399		dprintf(DBG_ERR, "visit_link: %s: lstat failed: %s\n",
2400		    vlp->abs_path, strerror(errno));
2401		return (DI_WALK_CONTINUE);
2402	}
2403
2404	return (linkp->fcn(vlp, linkp->arg));
2405}
2406
2407static int
2408devlink_valid(di_devlink_t devlink)
2409{
2410	if (devlink == NULL || devlink->rel_path == NULL ||
2411	    devlink->abs_path == NULL || devlink->content == NULL ||
2412	    TYPE_NONE(devlink->type)) {
2413		return (0);
2414	}
2415
2416	return (1);
2417}
2418
2419const char *
2420di_devlink_path(di_devlink_t devlink)
2421{
2422	if (!devlink_valid(devlink)) {
2423		errno = EINVAL;
2424		return (NULL);
2425	}
2426
2427	return (devlink->abs_path);
2428}
2429
2430const char *
2431di_devlink_content(di_devlink_t devlink)
2432{
2433	if (!devlink_valid(devlink)) {
2434		errno = EINVAL;
2435		return (NULL);
2436	}
2437
2438	return (devlink->content);
2439}
2440
2441int
2442di_devlink_type(di_devlink_t devlink)
2443{
2444	if (!devlink_valid(devlink)) {
2445		errno = EINVAL;
2446		return (-1);
2447	}
2448
2449	return (devlink->type);
2450}
2451
2452di_devlink_t
2453di_devlink_dup(di_devlink_t devlink)
2454{
2455	struct di_devlink *duplink;
2456
2457	if (!devlink_valid(devlink)) {
2458		errno = EINVAL;
2459		return (NULL);
2460	}
2461
2462	if ((duplink = calloc(1, sizeof (struct di_devlink))) == NULL) {
2463		return (NULL);
2464	}
2465
2466	duplink->rel_path = strdup(devlink->rel_path);
2467	duplink->abs_path = strdup(devlink->abs_path);
2468	duplink->content  = strdup(devlink->content);
2469	duplink->type	  = devlink->type;
2470
2471	if (!devlink_valid(duplink)) {
2472		(void) di_devlink_free(duplink);
2473		errno = ENOMEM;
2474		return (NULL);
2475	}
2476
2477	return (duplink);
2478}
2479
2480int
2481di_devlink_free(di_devlink_t devlink)
2482{
2483	if (devlink == NULL) {
2484		errno = EINVAL;
2485		return (-1);
2486	}
2487
2488	free(devlink->rel_path);
2489	free(devlink->abs_path);
2490	free(devlink->content);
2491	free(devlink);
2492
2493	return (0);
2494}
2495
2496/*
2497 * Obtain path relative to dev_dir
2498 */
2499static const char *
2500rel_path(struct di_devlink_handle *hdp, const char *path)
2501{
2502	const size_t len = strlen(hdp->dev_dir);
2503
2504	if (strncmp(path, hdp->dev_dir, len) != 0)
2505		return (NULL);
2506
2507	if (path[len] == '\0')
2508		return (&path[len]);
2509
2510	if (path[len] != '/')
2511		return (NULL);
2512
2513	return (&path[len+1]);
2514}
2515
2516static int
2517recurse_dev(struct di_devlink_handle *hdp, recurse_t *rp)
2518{
2519	int ret = 0;
2520
2521	(void) do_recurse(hdp->dev_dir, hdp, rp, &ret);
2522
2523	return (ret);
2524}
2525
2526static int
2527do_recurse(
2528	const char *dir,
2529	struct di_devlink_handle *hdp,
2530	recurse_t *rp,
2531	int *retp)
2532{
2533	size_t len;
2534	const char *rel;
2535	struct stat sbuf;
2536	char cur[PATH_MAX], *cp;
2537	int i, rv = DI_WALK_CONTINUE;
2538	finddevhdl_t handle;
2539	char *d_name;
2540
2541
2542	if ((rel = rel_path(hdp, dir)) == NULL)
2543		return (DI_WALK_CONTINUE);
2544
2545	/*
2546	 * Skip directories we are not interested in.
2547	 */
2548	for (i = 0; i < N_SKIP_DIRS; i++) {
2549		if (strcmp(rel, skip_dirs[i]) == 0) {
2550			(void) dprintf(DBG_STEP, "do_recurse: skipping %s\n",
2551			    dir);
2552			return (DI_WALK_CONTINUE);
2553		}
2554	}
2555
2556	(void) dprintf(DBG_STEP, "do_recurse: dir = %s\n", dir);
2557
2558	if (finddev_readdir(dir, &handle) != 0)
2559		return (DI_WALK_CONTINUE);
2560
2561	(void) snprintf(cur, sizeof (cur), "%s/", dir);
2562	len = strlen(cur);
2563	cp = cur + len;
2564	len = sizeof (cur) - len;
2565
2566	for (;;) {
2567		if ((d_name = (char *)finddev_next(handle)) == NULL)
2568			break;
2569
2570		if (strlcpy(cp, d_name, len) >= len)
2571			break;
2572
2573		/*
2574		 * Skip files we are not interested in.
2575		 */
2576		for (i = 0; i < N_SKIP_FILES; i++) {
2577
2578			rel = rel_path(hdp, cur);
2579			if (rel == NULL || strcmp(rel, skip_files[i]) == 0) {
2580				(void) dprintf(DBG_STEP,
2581				    "do_recurse: skipping %s\n", cur);
2582				goto next_entry;
2583			}
2584		}
2585
2586		if (lstat(cur, &sbuf) == 0) {
2587			if (S_ISDIR(sbuf.st_mode)) {
2588				rv = do_recurse(cur, hdp, rp, retp);
2589			} else if (S_ISLNK(sbuf.st_mode)) {
2590				rv = rp->fcn(hdp, rp->data, cur);
2591			} else {
2592				(void) dprintf(DBG_STEP,
2593				    "do_recurse: Skipping entry: %s\n", cur);
2594			}
2595		} else {
2596			(void) dprintf(DBG_ERR, "do_recurse: cur(%s): lstat"
2597			    " failed: %s\n", cur, strerror(errno));
2598		}
2599
2600next_entry:
2601		*cp = '\0';
2602
2603		if (rv != DI_WALK_CONTINUE)
2604			break;
2605	}
2606
2607	finddev_close(handle);
2608
2609	return (rv);
2610}
2611
2612
2613static int
2614check_attr(uint32_t attr)
2615{
2616	switch (attr & A_LINK_TYPES) {
2617		case A_PRIMARY:
2618		case A_SECONDARY:
2619			return (1);
2620		default:
2621			dprintf(DBG_ERR, "check_attr: incorrect attr(%u)\n",
2622			    attr);
2623			return (0);
2624	}
2625}
2626
2627static int
2628attr2type(uint32_t attr)
2629{
2630	switch (attr & A_LINK_TYPES) {
2631		case A_PRIMARY:
2632			return (DI_PRIMARY_LINK);
2633		case A_SECONDARY:
2634			return (DI_SECONDARY_LINK);
2635		default:
2636			dprintf(DBG_ERR, "attr2type: incorrect attr(%u)\n",
2637			    attr);
2638			return (0);
2639	}
2640}
2641
2642/* Allocate new node and link it in */
2643static cache_node_t *
2644node_insert(
2645	struct di_devlink_handle *hdp,
2646	cache_node_t *pcnp,
2647	const char *path,
2648	int insert)
2649{
2650	cache_node_t *cnp;
2651
2652	if (path == NULL) {
2653		errno = EINVAL;
2654		SET_DB_ERR(hdp);
2655		return (NULL);
2656	}
2657
2658	if ((cnp = calloc(1, sizeof (cache_node_t))) == NULL) {
2659		SET_DB_ERR(hdp);
2660		return (NULL);
2661	}
2662
2663	if ((cnp->path = strdup(path)) == NULL) {
2664		SET_DB_ERR(hdp);
2665		free(cnp);
2666		return (NULL);
2667	}
2668
2669	cnp->parent = pcnp;
2670
2671	if (pcnp == NULL) {
2672		assert(strcmp(path, "/") == 0);
2673		assert(CACHE(hdp)->root == NULL);
2674		CACHE(hdp)->root = cnp;
2675	} else if (insert == INSERT_HEAD) {
2676		cnp->sib = pcnp->child;
2677		pcnp->child = cnp;
2678	} else if (CACHE_LAST(hdp) && CACHE_LAST(hdp)->node &&
2679	    CACHE_LAST(hdp)->node->parent == pcnp &&
2680	    CACHE_LAST(hdp)->node->sib == NULL) {
2681
2682		CACHE_LAST(hdp)->node->sib = cnp;
2683
2684	} else {
2685		cache_node_t **pp;
2686
2687		for (pp = &pcnp->child; *pp != NULL; pp = &(*pp)->sib)
2688			;
2689		*pp = cnp;
2690	}
2691
2692	return (cnp);
2693}
2694
2695/*
2696 * Allocate a new minor and link it in either at the tail or head
2697 * of the minor list depending on the value of "prev".
2698 */
2699static cache_minor_t *
2700minor_insert(
2701	struct di_devlink_handle *hdp,
2702	cache_node_t *pcnp,
2703	const char *name,
2704	const char *nodetype,
2705	cache_minor_t **prev)
2706{
2707	cache_minor_t *cmnp;
2708
2709	if (pcnp == NULL || name == NULL) {
2710		errno = EINVAL;
2711		SET_DB_ERR(hdp);
2712		return (NULL);
2713	}
2714
2715	/*
2716	 * Some pseudo drivers don't specify nodetype. Assume pseudo if
2717	 * nodetype is not specified.
2718	 */
2719	if (nodetype == NULL)
2720		nodetype = DDI_PSEUDO;
2721
2722	if ((cmnp = calloc(1, sizeof (cache_minor_t))) == NULL) {
2723		SET_DB_ERR(hdp);
2724		return (NULL);
2725	}
2726
2727	cmnp->name = strdup(name);
2728	cmnp->nodetype = strdup(nodetype);
2729	if (cmnp->name == NULL || cmnp->nodetype == NULL) {
2730		SET_DB_ERR(hdp);
2731		free(cmnp->name);
2732		free(cmnp->nodetype);
2733		free(cmnp);
2734		return (NULL);
2735	}
2736
2737	cmnp->node = pcnp;
2738
2739	/* Add to node's minor list */
2740	if (prev == NULL) {
2741		cmnp->sib = pcnp->minor;
2742		pcnp->minor = cmnp;
2743	} else {
2744		assert(*prev == NULL);
2745		*prev = cmnp;
2746	}
2747
2748	return (cmnp);
2749}
2750
2751static cache_link_t *
2752link_insert(
2753	struct di_devlink_handle *hdp,
2754	cache_minor_t *cmnp,
2755	const char *path,
2756	const char *content,
2757	uint32_t attr)
2758{
2759	cache_link_t *clp;
2760
2761	if (path == NULL || content == NULL || !check_attr(attr)) {
2762		errno = EINVAL;
2763		SET_DB_ERR(hdp);
2764		return (NULL);
2765	}
2766
2767	if ((clp = calloc(1, sizeof (cache_link_t))) == NULL) {
2768		SET_DB_ERR(hdp);
2769		return (NULL);
2770	}
2771
2772	clp->path = strdup(path);
2773	clp->content = strdup(content);
2774	if (clp->path == NULL || clp->content == NULL) {
2775		SET_DB_ERR(hdp);
2776		link_free(&clp);
2777		return (NULL);
2778	}
2779
2780	clp->attr = attr;
2781	hash_insert(hdp, clp);
2782	clp->minor = cmnp;
2783
2784	/* Add to minor's link list */
2785	if (cmnp != NULL) {
2786		clp->sib = cmnp->link;
2787		cmnp->link = clp;
2788	} else {
2789		clp->sib = CACHE(hdp)->dngl;
2790		CACHE(hdp)->dngl = clp;
2791	}
2792
2793	return (clp);
2794}
2795
2796static void
2797hash_insert(struct di_devlink_handle *hdp, cache_link_t *clp)
2798{
2799	uint_t hval;
2800
2801	hval = hashfn(hdp, clp->path);
2802	clp->hash = CACHE_HASH(hdp, hval);
2803	CACHE_HASH(hdp, hval) = clp;
2804}
2805
2806
2807static struct db_node *
2808get_node(struct di_devlink_handle *hdp, uint32_t idx)
2809{
2810	return (map_seg(hdp, idx, PROT_READ, DB_NODE));
2811}
2812
2813static struct db_node *
2814set_node(struct di_devlink_handle *hdp, uint32_t idx)
2815{
2816	return (map_seg(hdp, idx, PROT_READ | PROT_WRITE, DB_NODE));
2817}
2818
2819static struct db_minor *
2820get_minor(struct di_devlink_handle *hdp, uint32_t idx)
2821{
2822	return (map_seg(hdp, idx, PROT_READ, DB_MINOR));
2823}
2824
2825static struct db_minor *
2826set_minor(struct di_devlink_handle *hdp, uint32_t idx)
2827{
2828	return (map_seg(hdp, idx, PROT_READ | PROT_WRITE, DB_MINOR));
2829}
2830
2831static struct db_link *
2832get_link(struct di_devlink_handle *hdp, uint32_t idx)
2833{
2834	return (map_seg(hdp, idx, PROT_READ, DB_LINK));
2835}
2836
2837static struct db_link *
2838set_link(struct di_devlink_handle *hdp, uint32_t idx)
2839{
2840	return (map_seg(hdp, idx, PROT_READ | PROT_WRITE, DB_LINK));
2841}
2842
2843static char *
2844get_string(struct di_devlink_handle *hdp, uint32_t idx)
2845{
2846	return (map_seg(hdp, idx, PROT_READ, DB_STR));
2847}
2848
2849static char *
2850set_string(struct di_devlink_handle *hdp, uint32_t idx)
2851{
2852	return (map_seg(hdp, idx, PROT_READ | PROT_WRITE, DB_STR));
2853}
2854
2855
2856/*
2857 * Returns the element corresponding to idx. If the portion of file involved
2858 * is not yet mapped, does an mmap() as well. Existing mappings are not changed.
2859 */
2860static void *
2861map_seg(
2862	struct di_devlink_handle *hdp,
2863	uint32_t idx,
2864	int prot,
2865	db_seg_t seg)
2866{
2867	int s;
2868	off_t off;
2869	size_t slen;
2870	caddr_t addr;
2871
2872	if (idx == DB_NIL) {
2873		return (NULL);
2874	}
2875
2876	if (!VALID_INDEX(hdp, seg, idx)) {
2877		(void) dprintf(DBG_ERR, "map_seg: seg(%d): invalid idx(%u)\n",
2878		    seg, idx);
2879		return (NULL);
2880	}
2881
2882	/*
2883	 * If the seg is already mapped in, use it if the access type is
2884	 * valid.
2885	 */
2886	if (DB_SEG(hdp, seg) != NULL) {
2887		if (DB_SEG_PROT(hdp, seg) != prot) {
2888			(void) dprintf(DBG_ERR, "map_seg: illegal access: "
2889			    "seg[%d]: idx=%u, seg_prot=%d, access=%d\n",
2890			    seg, idx, DB_SEG_PROT(hdp, seg), prot);
2891			return (NULL);
2892		}
2893		return (DB_SEG(hdp, seg) + idx * elem_sizes[seg]);
2894	}
2895
2896	/*
2897	 * Segment is not mapped. Mmap() the segment.
2898	 */
2899	off = seg_size(hdp, DB_HEADER);
2900	for (s = 0; s < seg; s++) {
2901		off += seg_size(hdp, s);
2902	}
2903	slen = seg_size(hdp, seg);
2904
2905	addr = mmap(0, slen, prot, MAP_SHARED, DB(hdp)->db_fd, off);
2906	if (addr == MAP_FAILED) {
2907		(void) dprintf(DBG_ERR, "map_seg: seg[%d]: mmap failed: %s\n",
2908		    seg, strerror(errno));
2909		(void) dprintf(DBG_ERR, "map_seg: args: len=%lu, prot=%d,"
2910		    " fd=%d, off=%ld\n", (ulong_t)slen, prot, DB(hdp)->db_fd,
2911		    off);
2912		return (NULL);
2913	}
2914
2915	DB_SEG(hdp, seg) = addr;
2916	DB_SEG_PROT(hdp, seg) = prot;
2917
2918	(void) dprintf(DBG_STEP, "map_seg: seg[%d]: len=%lu, prot=%d, fd=%d, "
2919	    "off=%ld, seg_base=%p\n", seg, (ulong_t)slen, prot, DB(hdp)->db_fd,
2920	    off, (void *)addr);
2921
2922	return (DB_SEG(hdp, seg) + idx * elem_sizes[seg]);
2923}
2924
2925/*
2926 * Computes the size of a segment rounded up to the nearest page boundary.
2927 */
2928static size_t
2929seg_size(struct di_devlink_handle *hdp, int seg)
2930{
2931	size_t sz;
2932
2933	assert(DB_HDR(hdp)->page_sz);
2934
2935	if (seg == DB_HEADER) {
2936		sz = HDR_LEN;
2937	} else {
2938		assert(DB_NUM(hdp, seg) >= 1);
2939		sz = DB_NUM(hdp, seg) * elem_sizes[seg];
2940	}
2941
2942	sz = (sz / DB_HDR(hdp)->page_sz) + 1;
2943
2944	sz *= DB_HDR(hdp)->page_sz;
2945
2946	return (sz);
2947}
2948
2949static size_t
2950size_db(struct di_devlink_handle *hdp, long page_sz, uint32_t *count)
2951{
2952	int i;
2953	size_t sz;
2954	cache_link_t *clp;
2955
2956	assert(page_sz > 0);
2957
2958	/* Take "NIL" element into account */
2959	for (i = 0; i < DB_TYPES; i++) {
2960		count[i] = 1;
2961	}
2962
2963	count_node(CACHE(hdp)->root, count);
2964
2965	for (clp = CACHE(hdp)->dngl; clp != NULL; clp = clp->sib) {
2966		count_link(clp, count);
2967	}
2968
2969	sz = ((HDR_LEN / page_sz) + 1) * page_sz;
2970	for (i = 0; i < DB_TYPES; i++) {
2971		assert(count[i] >= 1);
2972		sz += (((count[i] * elem_sizes[i]) / page_sz) + 1) * page_sz;
2973		(void) dprintf(DBG_INFO, "N[%u]=%u\n", i, count[i]);
2974	}
2975	(void) dprintf(DBG_INFO, "DB size=%lu\n", (ulong_t)sz);
2976
2977	return (sz);
2978}
2979
2980
2981static void
2982count_node(cache_node_t *cnp, uint32_t *count)
2983{
2984	cache_minor_t *cmnp;
2985
2986	if (cnp == NULL)
2987		return;
2988
2989	count[DB_NODE]++;
2990	count_string(cnp->path, count);
2991
2992	for (cmnp = cnp->minor; cmnp != NULL; cmnp = cmnp->sib) {
2993		count_minor(cmnp, count);
2994	}
2995
2996	for (cnp = cnp->child; cnp != NULL; cnp = cnp->sib) {
2997		count_node(cnp, count);
2998	}
2999
3000}
3001
3002static void
3003count_minor(cache_minor_t *cmnp, uint32_t *count)
3004{
3005	cache_link_t *clp;
3006
3007	if (cmnp == NULL)
3008		return;
3009
3010	count[DB_MINOR]++;
3011	count_string(cmnp->name, count);
3012	count_string(cmnp->nodetype, count);
3013
3014	for (clp = cmnp->link; clp != NULL; clp = clp->sib) {
3015		count_link(clp, count);
3016	}
3017}
3018
3019static void
3020count_link(cache_link_t *clp, uint32_t *count)
3021{
3022	if (clp == NULL)
3023		return;
3024
3025	count[DB_LINK]++;
3026	count_string(clp->path, count);
3027	count_string(clp->content, count);
3028}
3029
3030
3031static void
3032count_string(const char *str, uint32_t *count)
3033{
3034	if (str == NULL) {
3035		(void) dprintf(DBG_ERR, "count_string: NULL argument\n");
3036		return;
3037	}
3038
3039	count[DB_STR] += strlen(str) + 1;
3040}
3041
3042static uint_t
3043hashfn(struct di_devlink_handle *hdp, const char *str)
3044{
3045	const char *cp;
3046	ulong_t hval = 0;
3047
3048	if (str == NULL) {
3049		return (0);
3050	}
3051
3052	assert(CACHE(hdp)->hash_sz >= MIN_HASH_SIZE);
3053
3054	for (cp = str; *cp != '\0'; cp++) {
3055		hval += *cp;
3056	}
3057
3058	return (hval % CACHE(hdp)->hash_sz);
3059}
3060
3061/*
3062 * enter_db_lock()
3063 *
3064 * If the handle is IS_RDWR then we lock as writer to "update" database,
3065 * if IS_RDONLY then we lock as reader to "snapshot" database. The
3066 * implementation uses advisory file locking.
3067 *
3068 * This function returns:
3069 *   == 1	success and grabbed the lock file, we can open the DB.
3070 *   == 0	success but did not lock the lock file,	reader must walk
3071 *		the /dev directory.
3072 *   == -1	failure.
3073 */
3074static int
3075enter_db_lock(struct di_devlink_handle *hdp, const char *root_dir)
3076{
3077	int		fd;
3078	struct flock	lock;
3079	char		lockfile[PATH_MAX];
3080	int		rv;
3081	int		writer = HDL_RDWR(hdp);
3082	static int	did_sync = 0;
3083	int		eintrs;
3084
3085	assert(hdp->lock_fd < 0);
3086
3087	get_db_path(hdp, DB_LOCK, lockfile, sizeof (lockfile));
3088
3089	dprintf(DBG_LCK, "enter_db_lock: %s BEGIN\n",
3090	    writer ? "update" : "snapshot");
3091
3092	/* Record locks are per-process. Protect against multiple threads. */
3093	(void) mutex_lock(&update_mutex);
3094
3095again:	if ((fd = open(lockfile,
3096	    (writer ? (O_RDWR|O_CREAT) : O_RDONLY), DB_LOCK_PERMS)) < 0) {
3097		/*
3098		 * Typically the lock file and the database go hand in hand.
3099		 * If we find that the lock file does not exist (for some
3100		 * unknown reason) and we are the reader then we return
3101		 * success (after triggering devfsadm to create the file and
3102		 * a retry) so that we can still provide service via slow
3103		 * /dev walk.  If we get a failure as a writer we want the
3104		 * error to manifests itself.
3105		 */
3106		if ((errno == ENOENT) && !writer) {
3107			/* If reader, signal once to get files created */
3108			if (did_sync == 0) {
3109				did_sync = 1;
3110				dprintf(DBG_LCK, "enter_db_lock: %s OSYNC\n",
3111				    writer ? "update" : "snapshot");
3112
3113				/* signal to get files created */
3114				(void) devlink_create(root_dir, NULL,
3115				    DCA_DEVLINK_SYNC);
3116				goto again;
3117			}
3118			dprintf(DBG_LCK, "enter_db_lock: %s OPENFAILD %s: "
3119			    "WALK\n", writer ? "update" : "snapshot",
3120			    strerror(errno));
3121			(void) mutex_unlock(&update_mutex);
3122			return (0);		/* success, but not locked */
3123		} else {
3124			dprintf(DBG_LCK, "enter_db_lock: %s OPENFAILD %s\n",
3125			    writer ? "update" : "snapshot", strerror(errno));
3126			(void) mutex_unlock(&update_mutex);
3127			return (-1);		/* failed */
3128		}
3129	}
3130
3131	lock.l_type = writer ? F_WRLCK : F_RDLCK;
3132	lock.l_whence = SEEK_SET;
3133	lock.l_start = 0;
3134	lock.l_len = 0;
3135
3136	/* Enter the lock. */
3137	for (eintrs = 0; eintrs < MAX_LOCK_RETRY; eintrs++) {
3138		rv = fcntl(fd, F_SETLKW, &lock);
3139		if ((rv != -1) || (errno != EINTR))
3140			break;
3141	}
3142
3143	if (rv != -1) {
3144		hdp->lock_fd = fd;
3145		dprintf(DBG_LCK, "enter_db_lock: %s LOCKED\n",
3146		    writer ? "update" : "snapshot");
3147		return (1);		/* success, locked */
3148	}
3149
3150	(void) close(fd);
3151	dprintf(DBG_ERR, "enter_db_lock: %s FAILED: %s: WALK\n",
3152	    writer ? "update" : "snapshot", strerror(errno));
3153	(void) mutex_unlock(&update_mutex);
3154	return (-1);
3155}
3156
3157/*
3158 * Close and re-open lock file every time so that it is recreated if deleted.
3159 */
3160static void
3161exit_db_lock(struct di_devlink_handle *hdp)
3162{
3163	struct flock	unlock;
3164	int		writer = HDL_RDWR(hdp);
3165
3166	if (hdp->lock_fd < 0) {
3167		return;
3168	}
3169
3170	unlock.l_type = F_UNLCK;
3171	unlock.l_whence = SEEK_SET;
3172	unlock.l_start = 0;
3173	unlock.l_len = 0;
3174
3175	dprintf(DBG_LCK, "exit_db_lock : %s UNLOCKED\n",
3176	    writer ? "update" : "snapshot");
3177	if (fcntl(hdp->lock_fd, F_SETLK, &unlock) == -1) {
3178		dprintf(DBG_ERR, "exit_db_lock : %s failed: %s\n",
3179		    writer ? "update" : "snapshot", strerror(errno));
3180	}
3181
3182	(void) close(hdp->lock_fd);
3183
3184	hdp->lock_fd = -1;
3185
3186	(void) mutex_unlock(&update_mutex);
3187}
3188
3189/*
3190 * returns 1 if contents is a minor node in /devices.
3191 * If mn_root is not NULL, mn_root is set to:
3192 *	if contents is a /dev node, mn_root = contents
3193 *			OR
3194 *	if contents is a /devices node, mn_root set to the '/'
3195 *	following /devices.
3196 */
3197int
3198is_minor_node(const char *contents, const char **mn_root)
3199{
3200	char *ptr, *prefix;
3201
3202	prefix = "../devices/";
3203
3204	if ((ptr = strstr(contents, prefix)) != NULL) {
3205
3206		/* mn_root should point to the / following /devices */
3207		if (mn_root != NULL) {
3208			*mn_root = ptr += strlen(prefix) - 1;
3209		}
3210		return (1);
3211	}
3212
3213	prefix = "/devices/";
3214
3215	if (strncmp(contents, prefix, strlen(prefix)) == 0) {
3216
3217		/* mn_root should point to the / following /devices/ */
3218		if (mn_root != NULL) {
3219			*mn_root = contents + strlen(prefix) - 1;
3220		}
3221		return (1);
3222	}
3223
3224	if (mn_root != NULL) {
3225		*mn_root = contents;
3226	}
3227	return (0);
3228}
3229
3230static int
3231s_readlink(const char *link, char *buf, size_t blen)
3232{
3233	int rv;
3234
3235	if ((rv = readlink(link, buf, blen)) == -1)
3236		goto bad;
3237
3238	if (rv >= blen && buf[blen - 1] != '\0') {
3239		errno = ENAMETOOLONG;
3240		goto bad;
3241	} else if (rv < blen) {
3242		buf[rv] = '\0';
3243	}
3244
3245	return (0);
3246bad:
3247	dprintf(DBG_ERR, "s_readlink: %s: failed: %s\n",
3248	    link, strerror(errno));
3249	return (-1);
3250}
3251
3252/*
3253 * Synchronous link creation interface routines
3254 * The scope of the operation is determined by the "name" arg.
3255 * "name" can be NULL, a driver name or a devfs pathname (without /devices)
3256 *
3257 *	"name"				creates
3258 *	======				=======
3259 *
3260 *	NULL		=>		All devlinks in system
3261 *	<driver>	=>		devlinks for named driver
3262 *	/pci@1		=>		devlinks for subtree rooted at pci@1
3263 *	/pseudo/foo@0:X	=>		devlinks for minor X
3264 *
3265 * devlink_create() returns 0 on success or an errno value on failure
3266 */
3267
3268#define	MAX_DAEMON_ATTEMPTS 2
3269
3270static int
3271devlink_create(const char *root, const char *name, int dca_devlink_flag)
3272{
3273	int i;
3274	int install;
3275	struct dca_off dca;
3276
3277	assert(root);
3278
3279	/*
3280	 * Convert name into arg for door_call
3281	 */
3282	if (dca_init(name, &dca, dca_devlink_flag) != 0)
3283		return (EINVAL);
3284
3285	/*
3286	 * Attempt to use the daemon first
3287	 */
3288	i = 0;
3289	do {
3290		install = daemon_call(root, &dca);
3291
3292		dprintf(DBG_INFO, "daemon_call() retval=%d\n", dca.dca_error);
3293
3294		/*
3295		 * Retry only if door server isn't running
3296		 */
3297		if (dca.dca_error != ENOENT && dca.dca_error != EBADF) {
3298			return (dca.dca_error);
3299		}
3300
3301		dca.dca_error = 0;
3302
3303		/*
3304		 * To improve performance defer this check until the first
3305		 * failure. Safe to defer as door server checks perms.
3306		 */
3307		if (geteuid() != 0)
3308			return (EPERM);
3309	/*
3310	 * Daemon may not be running. Try to start it.
3311	 */
3312	} while ((++i < MAX_DAEMON_ATTEMPTS) &&
3313	    start_daemon(root, install) == 0);
3314
3315	dprintf(DBG_INFO, "devlink_create: can't start daemon\n");
3316
3317	assert(dca.dca_error == 0);
3318
3319	/*
3320	 * If the daemon cannot be started execute the devfsadm command.
3321	 */
3322	exec_cmd(root, &dca);
3323
3324	return (dca.dca_error);
3325}
3326
3327/*
3328 * The "name" member of "struct dca" contains data in the following order
3329 *	root'\0'minor'\0'driver'\0'
3330 * The root component is always present at offset 0 in the "name" field.
3331 * The driver and minor are optional. If present they have a non-zero
3332 * offset in the "name" member.
3333 */
3334static int
3335dca_init(const char *name, struct dca_off *dcp, int dca_flags)
3336{
3337	char *cp;
3338
3339	dcp->dca_root = 0;
3340	dcp->dca_minor = 0;
3341	dcp->dca_driver = 0;
3342	dcp->dca_error = 0;
3343	dcp->dca_flags = dca_flags;
3344	dcp->dca_name[0] = '\0';
3345
3346	name = name ? name : "/";
3347
3348	/*
3349	 *  Check if name is a driver name
3350	 */
3351	if (*name != '/') {
3352		(void) snprintf(dcp->dca_name, sizeof (dcp->dca_name),
3353		    "/ %s", name);
3354		dcp->dca_root = 0;
3355		*(dcp->dca_name + 1) = '\0';
3356		dcp->dca_driver = 2;
3357		return (0);
3358	}
3359
3360	(void) snprintf(dcp->dca_name, sizeof (dcp->dca_name), "%s", name);
3361
3362	/*
3363	 * "/devices" not allowed in devfs pathname
3364	 */
3365	if (is_minor_node(name, NULL))
3366		return (-1);
3367
3368	dcp->dca_root = 0;
3369	if (cp = strrchr(dcp->dca_name, ':')) {
3370		*cp++ = '\0';
3371		dcp->dca_minor = cp - dcp->dca_name;
3372	}
3373
3374	return (0);
3375}
3376
3377
3378#define	DAEMON_STARTUP_TIME	1 /* 1 second. This may need to be adjusted */
3379#define	DEVNAME_CHECK_FILE	"/etc/devname_check_RDONLY"
3380
3381static int
3382daemon_call(const char *root, struct dca_off *dcp)
3383{
3384	door_arg_t	arg;
3385	int		fd, door_error;
3386	sigset_t	oset, nset;
3387	char		synch_door[PATH_MAX];
3388	struct stat	sb;
3389	char		*prefix;
3390	int		rofd;
3391	int		rdonly;
3392	int		install = 0;
3393
3394	/*
3395	 * If root is readonly, there are two possibilities:
3396	 *	- we are in some sort of install scenario
3397	 *	- we are early in boot
3398	 * If the latter we don't want daemon_call()  to succeed.
3399	 * else we want to use /tmp/etc/dev
3400	 *
3401	 * Both of these requrements are fulfilled if we check for
3402	 * for a root owned door file in /tmp/etc/dev. If we are
3403	 * early in boot, the door file won't exist, so this call
3404	 * will fail.
3405	 *
3406	 * If we are in install, the door file will be present.
3407	 *
3408	 * If root is read-only, try only once, since libdevinfo
3409	 * isn't capable of starting devfsadmd correctly in that
3410	 * situation.
3411	 *
3412	 * Don't use statvfs() to check for readonly roots since it
3413	 * doesn't always report the truth.
3414	 */
3415	rofd = -1;
3416	rdonly = 0;
3417	if ((rofd = open(DEVNAME_CHECK_FILE, O_WRONLY|O_CREAT|O_TRUNC, 0644))
3418	    == -1 && errno == EROFS) {
3419		rdonly = 1;
3420		prefix = "/tmp";
3421	} else {
3422		if (rofd != -1) {
3423			(void) close(rofd);
3424			(void) unlink(DEVNAME_CHECK_FILE);
3425		}
3426		prefix = (char *)root;
3427	}
3428
3429	if (rdonly && stat(DEVNAME_CHECK_FILE, &sb) != -1)
3430		install = 1;
3431
3432	(void) snprintf(synch_door, sizeof (synch_door),
3433	    "%s/etc/dev/%s", prefix, DEVFSADM_SYNCH_DOOR);
3434
3435	/*
3436	 * Return ENOTSUP to prevent retries if root is readonly
3437	 */
3438	if (stat(synch_door, &sb) == -1 || sb.st_uid != 0) {
3439		if (rdonly)
3440			dcp->dca_error = ENOTSUP;
3441		else
3442			dcp->dca_error = ENOENT;
3443		dprintf(DBG_ERR, "stat failed: %s: no file or not root owned\n",
3444		    synch_door);
3445		return (install);
3446	}
3447
3448	if ((fd = open(synch_door, O_RDONLY)) == -1) {
3449		dcp->dca_error = errno;
3450		dprintf(DBG_ERR, "open of %s failed: %s\n",
3451		    synch_door, strerror(errno));
3452		return (install);
3453	}
3454
3455	arg.data_ptr = (char *)dcp;
3456	arg.data_size = sizeof (*dcp);
3457	arg.desc_ptr = NULL;
3458	arg.desc_num = 0;
3459	arg.rbuf = (char *)dcp;
3460	arg.rsize = sizeof (*dcp);
3461
3462	/*
3463	 * Block signals to this thread until door call
3464	 * completes.
3465	 */
3466	(void) sigfillset(&nset);
3467	(void) sigemptyset(&oset);
3468	(void) sigprocmask(SIG_SETMASK, &nset, &oset);
3469	if (door_call(fd, &arg)) {
3470		door_error = 1;
3471		dcp->dca_error = errno;
3472	}
3473	(void) sigprocmask(SIG_SETMASK, &oset, NULL);
3474
3475	(void) close(fd);
3476
3477	if (door_error)
3478		return (install);
3479
3480	assert(arg.data_ptr);
3481
3482	/*LINTED*/
3483	dcp->dca_error = ((struct dca_off *)arg.data_ptr)->dca_error;
3484
3485	/*
3486	 * The doors interface may return data in a different buffer
3487	 * If that happens, deallocate buffer via munmap()
3488	 */
3489	if (arg.rbuf != (char *)dcp)
3490		(void) munmap(arg.rbuf, arg.rsize);
3491
3492	return (install);
3493}
3494
3495#define	DEVFSADM_PATH	"/usr/sbin/devfsadm"
3496#define	DEVFSADM	"devfsadm"
3497
3498#define	DEVFSADMD_PATH	"/usr/lib/devfsadm/devfsadmd"
3499#define	DEVFSADM_DAEMON	"devfsadmd"
3500
3501static int
3502start_daemon(const char *root, int install)
3503{
3504	int rv, i = 0;
3505	char *argv[20];
3506
3507	argv[i++] = DEVFSADM_DAEMON;
3508	if (install) {
3509		argv[i++] = "-a";
3510		argv[i++] = "/tmp";
3511		argv[i++] = "-p";
3512		argv[i++] = "/tmp/root/etc/path_to_inst";
3513	} else if (strcmp(root, "/")) {
3514		argv[i++] = "-r";
3515		argv[i++] = (char *)root;
3516	}
3517	argv[i++] = NULL;
3518
3519	rv = do_exec(DEVFSADMD_PATH, argv);
3520
3521	(void) sleep(DAEMON_STARTUP_TIME);
3522
3523	return (rv);
3524}
3525
3526static void
3527exec_cmd(const char *root, struct dca_off *dcp)
3528{
3529	int i;
3530	char *argv[20];
3531
3532	i = 0;
3533	argv[i++] = DEVFSADM;
3534
3535	/*
3536	 * Load drivers only if -i is specified
3537	 */
3538	if (dcp->dca_driver) {
3539		argv[i++] = "-i";
3540		argv[i++] = &dcp->dca_name[dcp->dca_driver];
3541	} else {
3542		argv[i++] = "-n";
3543	}
3544
3545	if (root != NULL && strcmp(root, "/") != 0) {
3546		argv[i++] = "-r";
3547		argv[i++] = (char *)root;
3548	}
3549
3550	argv[i] = NULL;
3551
3552	if (do_exec(DEVFSADM_PATH, argv))
3553		dcp->dca_error = errno;
3554}
3555
3556static int
3557do_exec(const char *path, char *const argv[])
3558{
3559	int i;
3560	pid_t cpid;
3561
3562#ifdef	DEBUG
3563	dprintf(DBG_INFO, "Executing %s\n\tArgument list:", path);
3564	for (i = 0; argv[i] != NULL; i++) {
3565		dprintf(DBG_INFO, " %s", argv[i]);
3566	}
3567	dprintf(DBG_INFO, "\n");
3568#endif
3569
3570	if ((cpid = fork1()) == -1) {
3571		dprintf(DBG_ERR, "fork1 failed: %s\n", strerror(errno));
3572		return (-1);
3573	}
3574
3575	if (cpid == 0) { /* child process */
3576		int fd;
3577
3578		if ((fd = open("/dev/null", O_RDWR)) >= 0) {
3579			(void) dup2(fd, fileno(stdout));
3580			(void) dup2(fd, fileno(stderr));
3581			(void) close(fd);
3582
3583			(void) execv(path, argv);
3584		} else {
3585			dprintf(DBG_ERR, "open of /dev/null failed: %s\n",
3586			    strerror(errno));
3587		}
3588
3589		_exit(-1);
3590	}
3591
3592	/* Parent process */
3593	if (waitpid(cpid, &i, 0) == cpid) {
3594		if (WIFEXITED(i)) {
3595			if (WEXITSTATUS(i) == 0) {
3596				dprintf(DBG_STEP,
3597				    "do_exec: child exited normally\n");
3598				return (0);
3599			} else
3600				errno = EINVAL;
3601		} else {
3602			/*
3603			 * The child was interrupted by a signal
3604			 */
3605			errno = EINTR;
3606		}
3607		dprintf(DBG_ERR, "child terminated abnormally: %s\n",
3608		    strerror(errno));
3609	} else {
3610		dprintf(DBG_ERR, "waitpid failed: %s\n", strerror(errno));
3611	}
3612
3613	return (-1);
3614}
3615
3616static int
3617walk_cache_links(di_devlink_handle_t hdp, cache_link_t *clp, link_desc_t *linkp)
3618{
3619	int i;
3620
3621	assert(HDL_RDWR(hdp) || HDL_RDONLY(hdp));
3622
3623	dprintf(DBG_INFO, "walk_cache_links: initial link: %s\n",
3624	    clp ? clp->path : "<NULL>");
3625
3626	/*
3627	 * First search the links under the specified minor. On the
3628	 * 2nd pass, search the dangling list - secondary links may
3629	 * exist on this list since they are not resolved during the
3630	 * /dev walk.
3631	 */
3632	for (i = 0; i < 2; i++) {
3633		for (; clp != NULL; clp = clp->sib) {
3634			struct di_devlink vlink = {NULL};
3635
3636			assert(clp->path[0] != '/');
3637
3638			vlink.rel_path = clp->path;
3639			vlink.content = clp->content;
3640			vlink.type = attr2type(clp->attr);
3641
3642			if (visit_link(hdp, linkp, &vlink)
3643			    != DI_WALK_CONTINUE) {
3644				dprintf(DBG_INFO, "walk_cache_links: "
3645				    "terminating at link: %s\n", clp->path);
3646				goto out;
3647			}
3648		}
3649
3650		clp = CACHE(hdp)->dngl;
3651	}
3652
3653out:
3654
3655	/* If i < 2, we terminated the walk prematurely */
3656	return (i < 2 ? DI_WALK_TERMINATE : DI_WALK_CONTINUE);
3657}
3658
3659static void
3660walk_all_cache(di_devlink_handle_t hdp, link_desc_t *linkp)
3661{
3662	int i;
3663	cache_link_t *clp;
3664
3665	dprintf(DBG_INFO, "walk_all_cache: entered\n");
3666
3667	for (i = 0; i < CACHE(hdp)->hash_sz; i++) {
3668		clp = CACHE_HASH(hdp, i);
3669		for (; clp; clp = clp->hash) {
3670			struct di_devlink vlink = {NULL};
3671
3672			assert(clp->path[0] != '/');
3673
3674			vlink.rel_path = clp->path;
3675			vlink.content = clp->content;
3676			vlink.type = attr2type(clp->attr);
3677			if (visit_link(hdp, linkp, &vlink) !=
3678			    DI_WALK_CONTINUE) {
3679				dprintf(DBG_INFO, "walk_all_cache: terminating "
3680				    "walk at link: %s\n", clp->path);
3681				return;
3682			}
3683		}
3684	}
3685}
3686
3687static void
3688walk_cache_minor(di_devlink_handle_t hdp, const char *mpath, link_desc_t *linkp)
3689{
3690	cache_minor_t *cmnp;
3691
3692	assert(mpath);
3693
3694	if ((cmnp = lookup_minor(hdp, mpath, NULL, TYPE_CACHE)) != NULL) {
3695		(void) walk_cache_links(hdp, cmnp->link, linkp);
3696	} else {
3697		dprintf(DBG_ERR, "lookup minor failed: %s\n", mpath);
3698	}
3699}
3700
3701static void
3702walk_cache_node(di_devlink_handle_t hdp, const char *path, link_desc_t *linkp)
3703{
3704	cache_minor_t *cmnp;
3705	cache_node_t *cnp;
3706
3707	assert(path);
3708
3709	if ((cnp = lookup_node(hdp, (char *)path, TYPE_CACHE)) == NULL) {
3710		dprintf(DBG_ERR, "lookup node failed: %s\n", path);
3711		return;
3712	}
3713
3714	for (cmnp = cnp->minor; cmnp != NULL; cmnp = cmnp->sib) {
3715		if (walk_cache_links(hdp, cmnp->link, linkp)
3716		    == DI_WALK_TERMINATE)
3717			break;
3718	}
3719}
3720
3721/*
3722 * Private function
3723 *
3724 * Walk cached links corresponding to the given path.
3725 *
3726 * path		path to a node or minor node.
3727 *
3728 * flags	specifies the type of devlinks to be selected.
3729 *		If DI_PRIMARY_LINK is used, only primary links are selected.
3730 *		If DI_SECONDARY_LINK is specified, only secondary links
3731 *		are selected.
3732 *		If neither flag is specified, all devlinks are selected.
3733 *
3734 * re		An extended regular expression in regex(5) format which
3735 *		selects the /dev links to be returned. The regular
3736 *		expression should use link pathnames relative to
3737 *		/dev. i.e. without the leading "/dev/" prefix.
3738 *		A NULL value matches all devlinks.
3739 */
3740int
3741di_devlink_cache_walk(di_devlink_handle_t hdp,
3742	const char *re,
3743	const char *path,
3744	uint_t flags,
3745	void *arg,
3746	int (*devlink_callback)(di_devlink_t, void *))
3747{
3748	regex_t reg;
3749	link_desc_t linkd = {NULL};
3750
3751	if (hdp == NULL || path == NULL || !link_flag(flags) ||
3752	    !HDL_RDWR(hdp) || devlink_callback == NULL) {
3753		errno = EINVAL;
3754		return (-1);
3755	}
3756
3757	linkd.flags = flags;
3758	linkd.arg = arg;
3759	linkd.fcn = devlink_callback;
3760
3761	if (re) {
3762		if (regcomp(&reg, re, REG_EXTENDED) != 0)
3763			return (-1);
3764		linkd.regp = &reg;
3765	}
3766
3767	if (minor_colon(path) == NULL) {
3768		walk_cache_node(hdp, path, &linkd);
3769	} else {
3770		walk_cache_minor(hdp, path, &linkd);
3771	}
3772
3773	if (re)
3774		regfree(&reg);
3775
3776	return (0);
3777}
3778
3779#define	DEBUG_ENV_VAR	"_DEVLINK_DEBUG"
3780static int _devlink_debug = -1;
3781
3782/*
3783 * debug level is initialized to -1.
3784 * On first call into this routine, debug level is set.
3785 * If debug level is zero, debugging msgs are disabled.
3786 */
3787static void
3788debug_print(debug_level_t msglevel, const char *fmt, va_list ap)
3789{
3790	char	*cp;
3791	int	save;
3792
3793	/*
3794	 * We shouldn't be here if debug is disabled
3795	 */
3796	assert(_devlink_debug != 0);
3797
3798	/*
3799	 * Set debug level on first call into this routine
3800	 */
3801	if (_devlink_debug < 0) {
3802		if ((cp = getenv(DEBUG_ENV_VAR)) == NULL) {
3803			_devlink_debug = 0;
3804			return;
3805		}
3806
3807		save = errno;
3808		errno = 0;
3809		_devlink_debug = strtol(cp, NULL, 10);
3810		if (errno != 0 || _devlink_debug < 0)  {
3811			_devlink_debug = 0;
3812			errno = save;
3813			return;
3814		}
3815		errno = save;
3816
3817		if (!_devlink_debug)
3818			return;
3819	}
3820
3821	/* debug msgs are enabled */
3822	assert(_devlink_debug > 0);
3823
3824	if (_devlink_debug < msglevel)
3825		return;
3826	if ((_devlink_debug == DBG_LCK) && (msglevel != _devlink_debug))
3827		return;
3828
3829	/* Print a distinctive label for error msgs */
3830	if (msglevel == DBG_ERR) {
3831		(void) fprintf(stderr, "[ERROR]: ");
3832	}
3833
3834	(void) vfprintf(stderr, fmt, ap);
3835	(void) fflush(stderr);
3836}
3837
3838/* ARGSUSED */
3839/* PRINTFLIKE2 */
3840void
3841dprintf(debug_level_t msglevel, const char *fmt, ...)
3842{
3843	va_list ap;
3844
3845	assert(msglevel > 0);
3846	if (!_devlink_debug)
3847		return;
3848
3849	va_start(ap, fmt);
3850	debug_print(msglevel, fmt, ap);
3851	va_end(ap);
3852}
3853