1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24 * Copyright 2020 Joyent, Inc.
25 * Copyright 2017 RackTop Systems.
26 */
27
28#include <assert.h>
29#include <fcntl.h>
30#include <poll.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <zlib.h>
35#include <libgen.h>
36#include <sys/spa.h>
37#include <sys/stat.h>
38#include <sys/processor.h>
39#include <sys/zfs_context.h>
40#include <sys/rrwlock.h>
41#include <sys/zmod.h>
42#include <sys/utsname.h>
43#include <sys/systeminfo.h>
44#include <libzutil.h>
45#include <sys/crypto/common.h>
46#include <sys/crypto/impl.h>
47#include <sys/crypto/api.h>
48#include <sys/sha2.h>
49#include <crypto/aes/aes_impl.h>
50
51extern void system_taskq_init(void);
52extern void system_taskq_fini(void);
53
54/*
55 * Emulation of kernel services in userland.
56 */
57
58pgcnt_t physmem;
59vnode_t *rootdir = (vnode_t *)0xabcd1234;
60char hw_serial[HW_HOSTID_LEN];
61kmutex_t cpu_lock;
62vmem_t *zio_arena = NULL;
63
64/* If set, all blocks read will be copied to the specified directory. */
65char *vn_dumpdir = NULL;
66
67struct utsname utsname = {
68	"userland", "libzpool", "1", "1", "na"
69};
70
71/*
72 * =========================================================================
73 * vnode operations
74 * =========================================================================
75 */
76/*
77 * Note: for the xxxat() versions of these functions, we assume that the
78 * starting vp is always rootdir (which is true for spa_directory.c, the only
79 * ZFS consumer of these interfaces).  We assert this is true, and then emulate
80 * them by adding '/' in front of the path.
81 */
82
83/*ARGSUSED*/
84int
85vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
86{
87	int fd;
88	int dump_fd;
89	vnode_t *vp;
90	int old_umask;
91	char realpath[MAXPATHLEN];
92	struct stat64 st;
93
94	/*
95	 * If we're accessing a real disk from userland, we need to use
96	 * the character interface to avoid caching.  This is particularly
97	 * important if we're trying to look at a real in-kernel storage
98	 * pool from userland, e.g. via zdb, because otherwise we won't
99	 * see the changes occurring under the segmap cache.
100	 * On the other hand, the stupid character device returns zero
101	 * for its size.  So -- gag -- we open the block device to get
102	 * its size, and remember it for subsequent VOP_GETATTR().
103	 */
104	if (strncmp(path, "/dev/", 5) == 0) {
105		char *dsk;
106		fd = open64(path, O_RDONLY);
107		if (fd == -1)
108			return (errno);
109		if (fstat64(fd, &st) == -1) {
110			close(fd);
111			return (errno);
112		}
113		close(fd);
114		(void) sprintf(realpath, "%s", path);
115		dsk = strstr(path, "/dsk/");
116		if (dsk != NULL)
117			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
118			    dsk + 1);
119	} else {
120		(void) sprintf(realpath, "%s", path);
121		if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
122			return (errno);
123	}
124
125	if (flags & FCREAT)
126		old_umask = umask(0);
127
128	/*
129	 * The construct 'flags - FREAD' conveniently maps combinations of
130	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
131	 */
132	fd = open64(realpath, flags - FREAD, mode);
133
134	if (flags & FCREAT)
135		(void) umask(old_umask);
136
137	if (vn_dumpdir != NULL) {
138		char dumppath[MAXPATHLEN];
139		(void) snprintf(dumppath, sizeof (dumppath),
140		    "%s/%s", vn_dumpdir, basename(realpath));
141		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
142		if (dump_fd == -1)
143			return (errno);
144	} else {
145		dump_fd = -1;
146	}
147
148	if (fd == -1)
149		return (errno);
150
151	if (fstat64(fd, &st) == -1) {
152		close(fd);
153		return (errno);
154	}
155
156	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
157
158	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
159
160	vp->v_fd = fd;
161	vp->v_size = st.st_size;
162	vp->v_path = spa_strdup(path);
163	vp->v_dump_fd = dump_fd;
164
165	return (0);
166}
167
168/*ARGSUSED*/
169int
170vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
171    int x3, vnode_t *startvp, int fd)
172{
173	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
174	int ret;
175
176	ASSERT(startvp == rootdir);
177	(void) sprintf(realpath, "/%s", path);
178
179	/* fd ignored for now, need if want to simulate nbmand support */
180	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
181
182	umem_free(realpath, strlen(path) + 2);
183
184	return (ret);
185}
186
187/*ARGSUSED*/
188int
189vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
190    int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
191{
192	ssize_t iolen, split;
193
194	if (uio == UIO_READ) {
195		iolen = pread64(vp->v_fd, addr, len, offset);
196		if (vp->v_dump_fd != -1) {
197			int status =
198			    pwrite64(vp->v_dump_fd, addr, iolen, offset);
199			ASSERT(status != -1);
200		}
201	} else {
202		/*
203		 * To simulate partial disk writes, we split writes into two
204		 * system calls so that the process can be killed in between.
205		 */
206		int sectors = len >> SPA_MINBLOCKSHIFT;
207		split = (sectors > 0 ? rand() % sectors : 0) <<
208		    SPA_MINBLOCKSHIFT;
209		iolen = pwrite64(vp->v_fd, addr, split, offset);
210		iolen += pwrite64(vp->v_fd, (char *)addr + split,
211		    len - split, offset + split);
212	}
213
214	if (iolen == -1)
215		return (errno);
216	if (residp)
217		*residp = len - iolen;
218	else if (iolen != len)
219		return (EIO);
220	return (0);
221}
222
223void
224vn_close(vnode_t *vp)
225{
226	close(vp->v_fd);
227	if (vp->v_dump_fd != -1)
228		close(vp->v_dump_fd);
229	spa_strfree(vp->v_path);
230	umem_free(vp, sizeof (vnode_t));
231}
232
233/*
234 * At a minimum we need to update the size since vdev_reopen()
235 * will no longer call vn_openat().
236 */
237int
238fop_getattr(vnode_t *vp, vattr_t *vap)
239{
240	struct stat64 st;
241
242	if (fstat64(vp->v_fd, &st) == -1) {
243		close(vp->v_fd);
244		return (errno);
245	}
246
247	vap->va_size = st.st_size;
248	return (0);
249}
250
251#ifdef ZFS_DEBUG
252
253/*
254 * =========================================================================
255 * Figure out which debugging statements to print
256 * =========================================================================
257 */
258
259static char *dprintf_string;
260static int dprintf_print_all;
261
262int
263dprintf_find_string(const char *string)
264{
265	char *tmp_str = dprintf_string;
266	int len = strlen(string);
267
268	/*
269	 * Find out if this is a string we want to print.
270	 * String format: file1.c,function_name1,file2.c,file3.c
271	 */
272
273	while (tmp_str != NULL) {
274		if (strncmp(tmp_str, string, len) == 0 &&
275		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
276			return (1);
277		tmp_str = strchr(tmp_str, ',');
278		if (tmp_str != NULL)
279			tmp_str++; /* Get rid of , */
280	}
281	return (0);
282}
283
284void
285dprintf_setup(int *argc, char **argv)
286{
287	int i, j;
288
289	/*
290	 * Debugging can be specified two ways: by setting the
291	 * environment variable ZFS_DEBUG, or by including a
292	 * "debug=..."  argument on the command line.  The command
293	 * line setting overrides the environment variable.
294	 */
295
296	for (i = 1; i < *argc; i++) {
297		int len = strlen("debug=");
298		/* First look for a command line argument */
299		if (strncmp("debug=", argv[i], len) == 0) {
300			dprintf_string = argv[i] + len;
301			/* Remove from args */
302			for (j = i; j < *argc; j++)
303				argv[j] = argv[j+1];
304			argv[j] = NULL;
305			(*argc)--;
306		}
307	}
308
309	if (dprintf_string == NULL) {
310		/* Look for ZFS_DEBUG environment variable */
311		dprintf_string = getenv("ZFS_DEBUG");
312	}
313
314	/*
315	 * Are we just turning on all debugging?
316	 */
317	if (dprintf_find_string("on"))
318		dprintf_print_all = 1;
319
320	if (dprintf_string != NULL)
321		zfs_flags |= ZFS_DEBUG_DPRINTF;
322}
323
324/*
325 * =========================================================================
326 * debug printfs
327 * =========================================================================
328 */
329void
330__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
331{
332	const char *newfile;
333	va_list adx;
334
335	/*
336	 * Get rid of annoying "../common/" prefix to filename.
337	 */
338	newfile = strrchr(file, '/');
339	if (newfile != NULL) {
340		newfile = newfile + 1; /* Get rid of leading / */
341	} else {
342		newfile = file;
343	}
344
345	if (dprintf_print_all ||
346	    dprintf_find_string(newfile) ||
347	    dprintf_find_string(func)) {
348		/* Print out just the function name if requested */
349		flockfile(stdout);
350		if (dprintf_find_string("pid"))
351			(void) printf("%d ", getpid());
352		if (dprintf_find_string("tid"))
353			(void) printf("%u ", thr_self());
354		if (dprintf_find_string("cpu"))
355			(void) printf("%u ", getcpuid());
356		if (dprintf_find_string("time"))
357			(void) printf("%llu ", gethrtime());
358		if (dprintf_find_string("long"))
359			(void) printf("%s, line %d: ", newfile, line);
360		(void) printf("%s: ", func);
361		va_start(adx, fmt);
362		(void) vprintf(fmt, adx);
363		va_end(adx);
364		funlockfile(stdout);
365	}
366}
367
368#endif /* ZFS_DEBUG */
369
370/*
371 * =========================================================================
372 * kobj interfaces
373 * =========================================================================
374 */
375struct _buf *
376kobj_open_file(char *name)
377{
378	struct _buf *file;
379	vnode_t *vp;
380
381	/* set vp as the _fd field of the file */
382	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
383	    -1) != 0)
384		return ((void *)-1UL);
385
386	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
387	file->_fd = (intptr_t)vp;
388	return (file);
389}
390
391int
392kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
393{
394	ssize_t resid;
395
396	vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
397	    UIO_SYSSPACE, 0, 0, 0, &resid);
398
399	return (size - resid);
400}
401
402void
403kobj_close_file(struct _buf *file)
404{
405	vn_close((vnode_t *)file->_fd);
406	umem_free(file, sizeof (struct _buf));
407}
408
409int
410kobj_get_filesize(struct _buf *file, uint64_t *size)
411{
412	struct stat64 st;
413	vnode_t *vp = (vnode_t *)file->_fd;
414
415	if (fstat64(vp->v_fd, &st) == -1) {
416		vn_close(vp);
417		return (errno);
418	}
419	*size = st.st_size;
420	return (0);
421}
422
423/*
424 * =========================================================================
425 * misc routines
426 * =========================================================================
427 */
428
429/*
430 * Find lowest one bit set.
431 * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
432 * This is basically a reimplementation of ffsll(), which is GNU specific.
433 */
434int
435lowbit64(uint64_t i)
436{
437	register int h = 64;
438	if (i == 0)
439		return (0);
440
441	if (i & 0x00000000ffffffffULL)
442		h -= 32;
443	else
444		i >>= 32;
445
446	if (i & 0x0000ffff)
447		h -= 16;
448	else
449		i >>= 16;
450
451	if (i & 0x00ff)
452		h -= 8;
453	else
454		i >>= 8;
455
456	if (i & 0x0f)
457		h -= 4;
458	else
459		i >>= 4;
460
461	if (i & 0x3)
462		h -= 2;
463	else
464		i >>= 2;
465
466	if (i & 0x1)
467		h -= 1;
468
469	return (h);
470}
471
472int
473highbit64(uint64_t i)
474{
475	int h = 1;
476
477	if (i == 0)
478		return (0);
479	if (i & 0xffffffff00000000ULL) {
480		h += 32; i >>= 32;
481	}
482	if (i & 0xffff0000) {
483		h += 16; i >>= 16;
484	}
485	if (i & 0xff00) {
486		h += 8; i >>= 8;
487	}
488	if (i & 0xf0) {
489		h += 4; i >>= 4;
490	}
491	if (i & 0xc) {
492		h += 2; i >>= 2;
493	}
494	if (i & 0x2) {
495		h += 1;
496	}
497	return (h);
498}
499
500/*
501 * =========================================================================
502 * kernel emulation setup & teardown
503 * =========================================================================
504 */
505static int
506umem_out_of_memory(void)
507{
508	char errmsg[] = "out of memory -- generating core dump\n";
509
510	write(fileno(stderr), errmsg, sizeof (errmsg));
511	abort();
512	return (0);
513}
514
515void
516kernel_init(int mode)
517{
518	extern uint_t rrw_tsd_key;
519
520	umem_nofail_callback(umem_out_of_memory);
521
522	physmem = sysconf(_SC_PHYS_PAGES);
523
524	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
525	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
526
527	(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
528	    (mode & FWRITE) ? get_system_hostid() : 0);
529
530	system_taskq_init();
531
532	mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
533
534	spa_init(mode);
535
536	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
537}
538
539void
540kernel_fini(void)
541{
542	spa_fini();
543
544	system_taskq_fini();
545}
546
547/* ARGSUSED */
548uint32_t
549zone_get_hostid(void *zonep)
550{
551	/*
552	 * We're emulating the system's hostid in userland.
553	 */
554	return (strtoul(hw_serial, NULL, 10));
555}
556
557int
558z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
559{
560	int ret;
561	uLongf len = *dstlen;
562
563	if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
564		*dstlen = (size_t)len;
565
566	return (ret);
567}
568
569int
570z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
571    int level)
572{
573	int ret;
574	uLongf len = *dstlen;
575
576	if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
577		*dstlen = (size_t)len;
578
579	return (ret);
580}
581
582int
583zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
584{
585	return (0);
586}
587
588int
589zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
590{
591	return (0);
592}
593
594int
595zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
596{
597	return (0);
598}
599
600/* ARGSUSED */
601int
602zfs_onexit_fd_hold(int fd, minor_t *minorp)
603{
604	*minorp = 0;
605	return (0);
606}
607
608/* ARGSUSED */
609void
610zfs_onexit_fd_rele(int fd)
611{
612}
613
614/* ARGSUSED */
615int
616zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
617    uint64_t *action_handle)
618{
619	return (0);
620}
621
622/* ARGSUSED */
623int
624zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
625{
626	return (0);
627}
628
629/* ARGSUSED */
630int
631zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
632{
633	return (0);
634}
635
636void
637bioinit(buf_t *bp)
638{
639	bzero(bp, sizeof (buf_t));
640}
641
642void
643biodone(buf_t *bp)
644{
645	if (bp->b_iodone != NULL) {
646		(*(bp->b_iodone))(bp);
647		return;
648	}
649	ASSERT((bp->b_flags & B_DONE) == 0);
650	bp->b_flags |= B_DONE;
651}
652
653void
654bioerror(buf_t *bp, int error)
655{
656	ASSERT(bp != NULL);
657	ASSERT(error >= 0);
658
659	if (error != 0) {
660		bp->b_flags |= B_ERROR;
661	} else {
662		bp->b_flags &= ~B_ERROR;
663	}
664	bp->b_error = error;
665}
666
667
668int
669geterror(struct buf *bp)
670{
671	int error = 0;
672
673	if (bp->b_flags & B_ERROR) {
674		error = bp->b_error;
675		if (!error)
676			error = EIO;
677	}
678	return (error);
679}
680
681int
682crypto_create_ctx_template(crypto_mechanism_t *mech,
683    crypto_key_t *key, crypto_ctx_template_t *tmpl, int kmflag)
684{
685	return (0);
686}
687
688crypto_mech_type_t
689crypto_mech2id(crypto_mech_name_t name)
690{
691	return (CRYPTO_MECH_INVALID);
692}
693
694int
695crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data,
696    crypto_key_t *key, crypto_ctx_template_t impl,
697    crypto_data_t *mac, crypto_call_req_t *cr)
698{
699	return (0);
700}
701
702int
703crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext,
704    crypto_key_t *key, crypto_ctx_template_t tmpl,
705    crypto_data_t *ciphertext, crypto_call_req_t *cr)
706{
707	return (0);
708}
709
710/* This could probably be a weak reference */
711int
712crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext,
713    crypto_key_t *key, crypto_ctx_template_t tmpl,
714    crypto_data_t *ciphertext, crypto_call_req_t *cr)
715{
716	return (0);
717}
718
719
720int
721crypto_digest_final(crypto_context_t context, crypto_data_t *digest,
722    crypto_call_req_t *cr)
723{
724	return (0);
725}
726
727int
728crypto_digest_update(crypto_context_t context, crypto_data_t *data,
729    crypto_call_req_t *cr)
730{
731	return (0);
732}
733
734int
735crypto_digest_init(crypto_mechanism_t *mech, crypto_context_t *ctxp,
736    crypto_call_req_t  *crq)
737{
738	return (0);
739}
740
741void
742crypto_destroy_ctx_template(crypto_ctx_template_t tmpl)
743{
744}
745
746extern int crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key,
747	crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
748    crypto_call_req_t *cr)
749{
750	return (0);
751}
752
753extern int crypto_mac_update(crypto_context_t ctx, crypto_data_t *data,
754	crypto_call_req_t *cr)
755{
756	return (0);
757}
758
759extern int crypto_mac_final(crypto_context_t ctx, crypto_data_t *data,
760	crypto_call_req_t *cr)
761{
762	return (0);
763}
764