1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
25 * Copyright 2017 RackTop Systems.
26 */
27
28#include <assert.h>
29#include <fcntl.h>
30#include <poll.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <zlib.h>
35#include <libgen.h>
36#include <sys/spa.h>
37#include <sys/stat.h>
38#include <sys/processor.h>
39#include <sys/zfs_context.h>
40#include <sys/rrwlock.h>
41#include <sys/zmod.h>
42#include <sys/utsname.h>
43#include <sys/systeminfo.h>
44#include <libzfs.h>
45#include <sys/crypto/common.h>
46#include <sys/crypto/impl.h>
47#include <sys/crypto/api.h>
48#include <sys/sha2.h>
49#include <crypto/aes/aes_impl.h>
50
51extern void system_taskq_init(void);
52extern void system_taskq_fini(void);
53
54/*
55 * Emulation of kernel services in userland.
56 */
57
58pgcnt_t physmem;
59vnode_t *rootdir = (vnode_t *)0xabcd1234;
60char hw_serial[HW_HOSTID_LEN];
61kmutex_t cpu_lock;
62vmem_t *zio_arena = NULL;
63
64/* If set, all blocks read will be copied to the specified directory. */
65char *vn_dumpdir = NULL;
66
67struct utsname utsname = {
68	"userland", "libzpool", "1", "1", "na"
69};
70
71/*
72 * =========================================================================
73 * vnode operations
74 * =========================================================================
75 */
76/*
77 * Note: for the xxxat() versions of these functions, we assume that the
78 * starting vp is always rootdir (which is true for spa_directory.c, the only
79 * ZFS consumer of these interfaces).  We assert this is true, and then emulate
80 * them by adding '/' in front of the path.
81 */
82
83/*ARGSUSED*/
84int
85vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
86{
87	int fd;
88	int dump_fd;
89	vnode_t *vp;
90	int old_umask;
91	char realpath[MAXPATHLEN];
92	struct stat64 st;
93
94	/*
95	 * If we're accessing a real disk from userland, we need to use
96	 * the character interface to avoid caching.  This is particularly
97	 * important if we're trying to look at a real in-kernel storage
98	 * pool from userland, e.g. via zdb, because otherwise we won't
99	 * see the changes occurring under the segmap cache.
100	 * On the other hand, the stupid character device returns zero
101	 * for its size.  So -- gag -- we open the block device to get
102	 * its size, and remember it for subsequent VOP_GETATTR().
103	 */
104	if (strncmp(path, "/dev/", 5) == 0) {
105		char *dsk;
106		fd = open64(path, O_RDONLY);
107		if (fd == -1)
108			return (errno);
109		if (fstat64(fd, &st) == -1) {
110			close(fd);
111			return (errno);
112		}
113		close(fd);
114		(void) sprintf(realpath, "%s", path);
115		dsk = strstr(path, "/dsk/");
116		if (dsk != NULL)
117			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
118			    dsk + 1);
119	} else {
120		(void) sprintf(realpath, "%s", path);
121		if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
122			return (errno);
123	}
124
125	if (flags & FCREAT)
126		old_umask = umask(0);
127
128	/*
129	 * The construct 'flags - FREAD' conveniently maps combinations of
130	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
131	 */
132	fd = open64(realpath, flags - FREAD, mode);
133
134	if (flags & FCREAT)
135		(void) umask(old_umask);
136
137	if (vn_dumpdir != NULL) {
138		char dumppath[MAXPATHLEN];
139		(void) snprintf(dumppath, sizeof (dumppath),
140		    "%s/%s", vn_dumpdir, basename(realpath));
141		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
142		if (dump_fd == -1)
143			return (errno);
144	} else {
145		dump_fd = -1;
146	}
147
148	if (fd == -1)
149		return (errno);
150
151	if (fstat64(fd, &st) == -1) {
152		close(fd);
153		return (errno);
154	}
155
156	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
157
158	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
159
160	vp->v_fd = fd;
161	vp->v_size = st.st_size;
162	vp->v_path = spa_strdup(path);
163	vp->v_dump_fd = dump_fd;
164
165	return (0);
166}
167
168/*ARGSUSED*/
169int
170vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
171    int x3, vnode_t *startvp, int fd)
172{
173	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
174	int ret;
175
176	ASSERT(startvp == rootdir);
177	(void) sprintf(realpath, "/%s", path);
178
179	/* fd ignored for now, need if want to simulate nbmand support */
180	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
181
182	umem_free(realpath, strlen(path) + 2);
183
184	return (ret);
185}
186
187/*ARGSUSED*/
188int
189vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
190    int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
191{
192	ssize_t iolen, split;
193
194	if (uio == UIO_READ) {
195		iolen = pread64(vp->v_fd, addr, len, offset);
196		if (vp->v_dump_fd != -1) {
197			int status =
198			    pwrite64(vp->v_dump_fd, addr, iolen, offset);
199			ASSERT(status != -1);
200		}
201	} else {
202		/*
203		 * To simulate partial disk writes, we split writes into two
204		 * system calls so that the process can be killed in between.
205		 */
206		int sectors = len >> SPA_MINBLOCKSHIFT;
207		split = (sectors > 0 ? rand() % sectors : 0) <<
208		    SPA_MINBLOCKSHIFT;
209		iolen = pwrite64(vp->v_fd, addr, split, offset);
210		iolen += pwrite64(vp->v_fd, (char *)addr + split,
211		    len - split, offset + split);
212	}
213
214	if (iolen == -1)
215		return (errno);
216	if (residp)
217		*residp = len - iolen;
218	else if (iolen != len)
219		return (EIO);
220	return (0);
221}
222
223void
224vn_close(vnode_t *vp)
225{
226	close(vp->v_fd);
227	if (vp->v_dump_fd != -1)
228		close(vp->v_dump_fd);
229	spa_strfree(vp->v_path);
230	umem_free(vp, sizeof (vnode_t));
231}
232
233/*
234 * At a minimum we need to update the size since vdev_reopen()
235 * will no longer call vn_openat().
236 */
237int
238fop_getattr(vnode_t *vp, vattr_t *vap)
239{
240	struct stat64 st;
241
242	if (fstat64(vp->v_fd, &st) == -1) {
243		close(vp->v_fd);
244		return (errno);
245	}
246
247	vap->va_size = st.st_size;
248	return (0);
249}
250
251#ifdef ZFS_DEBUG
252
253/*
254 * =========================================================================
255 * Figure out which debugging statements to print
256 * =========================================================================
257 */
258
259static char *dprintf_string;
260static int dprintf_print_all;
261
262int
263dprintf_find_string(const char *string)
264{
265	char *tmp_str = dprintf_string;
266	int len = strlen(string);
267
268	/*
269	 * Find out if this is a string we want to print.
270	 * String format: file1.c,function_name1,file2.c,file3.c
271	 */
272
273	while (tmp_str != NULL) {
274		if (strncmp(tmp_str, string, len) == 0 &&
275		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
276			return (1);
277		tmp_str = strchr(tmp_str, ',');
278		if (tmp_str != NULL)
279			tmp_str++; /* Get rid of , */
280	}
281	return (0);
282}
283
284void
285dprintf_setup(int *argc, char **argv)
286{
287	int i, j;
288
289	/*
290	 * Debugging can be specified two ways: by setting the
291	 * environment variable ZFS_DEBUG, or by including a
292	 * "debug=..."  argument on the command line.  The command
293	 * line setting overrides the environment variable.
294	 */
295
296	for (i = 1; i < *argc; i++) {
297		int len = strlen("debug=");
298		/* First look for a command line argument */
299		if (strncmp("debug=", argv[i], len) == 0) {
300			dprintf_string = argv[i] + len;
301			/* Remove from args */
302			for (j = i; j < *argc; j++)
303				argv[j] = argv[j+1];
304			argv[j] = NULL;
305			(*argc)--;
306		}
307	}
308
309	if (dprintf_string == NULL) {
310		/* Look for ZFS_DEBUG environment variable */
311		dprintf_string = getenv("ZFS_DEBUG");
312	}
313
314	/*
315	 * Are we just turning on all debugging?
316	 */
317	if (dprintf_find_string("on"))
318		dprintf_print_all = 1;
319
320	if (dprintf_string != NULL)
321		zfs_flags |= ZFS_DEBUG_DPRINTF;
322}
323
324/*
325 * =========================================================================
326 * debug printfs
327 * =========================================================================
328 */
329void
330__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
331{
332	const char *newfile;
333	va_list adx;
334
335	/*
336	 * Get rid of annoying "../common/" prefix to filename.
337	 */
338	newfile = strrchr(file, '/');
339	if (newfile != NULL) {
340		newfile = newfile + 1; /* Get rid of leading / */
341	} else {
342		newfile = file;
343	}
344
345	if (dprintf_print_all ||
346	    dprintf_find_string(newfile) ||
347	    dprintf_find_string(func)) {
348		/* Print out just the function name if requested */
349		flockfile(stdout);
350		if (dprintf_find_string("pid"))
351			(void) printf("%d ", getpid());
352		if (dprintf_find_string("tid"))
353			(void) printf("%u ", thr_self());
354		if (dprintf_find_string("cpu"))
355			(void) printf("%u ", getcpuid());
356		if (dprintf_find_string("time"))
357			(void) printf("%llu ", gethrtime());
358		if (dprintf_find_string("long"))
359			(void) printf("%s, line %d: ", newfile, line);
360		(void) printf("%s: ", func);
361		va_start(adx, fmt);
362		(void) vprintf(fmt, adx);
363		va_end(adx);
364		funlockfile(stdout);
365	}
366}
367
368#endif /* ZFS_DEBUG */
369
370/*
371 * =========================================================================
372 * kobj interfaces
373 * =========================================================================
374 */
375struct _buf *
376kobj_open_file(char *name)
377{
378	struct _buf *file;
379	vnode_t *vp;
380
381	/* set vp as the _fd field of the file */
382	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
383	    -1) != 0)
384		return ((void *)-1UL);
385
386	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
387	file->_fd = (intptr_t)vp;
388	return (file);
389}
390
391int
392kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
393{
394	ssize_t resid;
395
396	vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
397	    UIO_SYSSPACE, 0, 0, 0, &resid);
398
399	return (size - resid);
400}
401
402void
403kobj_close_file(struct _buf *file)
404{
405	vn_close((vnode_t *)file->_fd);
406	umem_free(file, sizeof (struct _buf));
407}
408
409int
410kobj_get_filesize(struct _buf *file, uint64_t *size)
411{
412	struct stat64 st;
413	vnode_t *vp = (vnode_t *)file->_fd;
414
415	if (fstat64(vp->v_fd, &st) == -1) {
416		vn_close(vp);
417		return (errno);
418	}
419	*size = st.st_size;
420	return (0);
421}
422
423/*
424 * =========================================================================
425 * kernel emulation setup & teardown
426 * =========================================================================
427 */
428static int
429umem_out_of_memory(void)
430{
431	char errmsg[] = "out of memory -- generating core dump\n";
432
433	write(fileno(stderr), errmsg, sizeof (errmsg));
434	abort();
435	return (0);
436}
437
438void
439kernel_init(int mode)
440{
441	extern uint_t rrw_tsd_key;
442
443	umem_nofail_callback(umem_out_of_memory);
444
445	physmem = sysconf(_SC_PHYS_PAGES);
446
447	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
448	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
449
450	(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
451	    (mode & FWRITE) ? get_system_hostid() : 0);
452
453	system_taskq_init();
454
455	mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
456
457	spa_init(mode);
458
459	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
460}
461
462void
463kernel_fini(void)
464{
465	spa_fini();
466
467	system_taskq_fini();
468}
469
470/* ARGSUSED */
471uint32_t
472zone_get_hostid(void *zonep)
473{
474	/*
475	 * We're emulating the system's hostid in userland.
476	 */
477	return (strtoul(hw_serial, NULL, 10));
478}
479
480int
481z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
482{
483	int ret;
484	uLongf len = *dstlen;
485
486	if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
487		*dstlen = (size_t)len;
488
489	return (ret);
490}
491
492int
493z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
494    int level)
495{
496	int ret;
497	uLongf len = *dstlen;
498
499	if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
500		*dstlen = (size_t)len;
501
502	return (ret);
503}
504
505int
506zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
507{
508	return (0);
509}
510
511int
512zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
513{
514	return (0);
515}
516
517int
518zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
519{
520	return (0);
521}
522
523/* ARGSUSED */
524int
525zfs_onexit_fd_hold(int fd, minor_t *minorp)
526{
527	*minorp = 0;
528	return (0);
529}
530
531/* ARGSUSED */
532void
533zfs_onexit_fd_rele(int fd)
534{
535}
536
537/* ARGSUSED */
538int
539zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
540    uint64_t *action_handle)
541{
542	return (0);
543}
544
545/* ARGSUSED */
546int
547zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
548{
549	return (0);
550}
551
552/* ARGSUSED */
553int
554zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
555{
556	return (0);
557}
558
559void
560bioinit(buf_t *bp)
561{
562	bzero(bp, sizeof (buf_t));
563}
564
565void
566biodone(buf_t *bp)
567{
568	if (bp->b_iodone != NULL) {
569		(*(bp->b_iodone))(bp);
570		return;
571	}
572	ASSERT((bp->b_flags & B_DONE) == 0);
573	bp->b_flags |= B_DONE;
574}
575
576void
577bioerror(buf_t *bp, int error)
578{
579	ASSERT(bp != NULL);
580	ASSERT(error >= 0);
581
582	if (error != 0) {
583		bp->b_flags |= B_ERROR;
584	} else {
585		bp->b_flags &= ~B_ERROR;
586	}
587	bp->b_error = error;
588}
589
590
591int
592geterror(struct buf *bp)
593{
594	int error = 0;
595
596	if (bp->b_flags & B_ERROR) {
597		error = bp->b_error;
598		if (!error)
599			error = EIO;
600	}
601	return (error);
602}
603
604int
605crypto_create_ctx_template(crypto_mechanism_t *mech,
606    crypto_key_t *key, crypto_ctx_template_t *tmpl, int kmflag)
607{
608	return (0);
609}
610
611crypto_mech_type_t
612crypto_mech2id(crypto_mech_name_t name)
613{
614	return (CRYPTO_MECH_INVALID);
615}
616
617int
618crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data,
619    crypto_key_t *key, crypto_ctx_template_t impl,
620    crypto_data_t *mac, crypto_call_req_t *cr)
621{
622	return (0);
623}
624
625int
626crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext,
627    crypto_key_t *key, crypto_ctx_template_t tmpl,
628    crypto_data_t *ciphertext, crypto_call_req_t *cr)
629{
630	return (0);
631}
632
633/* This could probably be a weak reference */
634int
635crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext,
636    crypto_key_t *key, crypto_ctx_template_t tmpl,
637    crypto_data_t *ciphertext, crypto_call_req_t *cr)
638{
639	return (0);
640}
641
642
643int
644crypto_digest_final(crypto_context_t context, crypto_data_t *digest,
645    crypto_call_req_t *cr)
646{
647	return (0);
648}
649
650int
651crypto_digest_update(crypto_context_t context, crypto_data_t *data,
652    crypto_call_req_t *cr)
653{
654	return (0);
655}
656
657int
658crypto_digest_init(crypto_mechanism_t *mech, crypto_context_t *ctxp,
659    crypto_call_req_t  *crq)
660{
661	return (0);
662}
663
664void
665crypto_destroy_ctx_template(crypto_ctx_template_t tmpl)
666{
667}
668
669extern int crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key,
670	crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
671    crypto_call_req_t *cr)
672{
673	return (0);
674}
675
676extern int crypto_mac_update(crypto_context_t ctx, crypto_data_t *data,
677	crypto_call_req_t *cr)
678{
679	return (0);
680}
681
682extern int crypto_mac_final(crypto_context_t ctx, crypto_data_t *data,
683	crypto_call_req_t *cr)
684{
685	return (0);
686}
687