xref: /illumos-gate/usr/src/uts/common/fs/zfs/spa_history.c (revision b11fe8c01471a5bff68e83e1fe5f809ad16b3be8)
106eeb2adSek /*
206eeb2adSek  * CDDL HEADER START
306eeb2adSek  *
406eeb2adSek  * The contents of this file are subject to the terms of the
506eeb2adSek  * Common Development and Distribution License (the "License").
606eeb2adSek  * You may not use this file except in compliance with the License.
706eeb2adSek  *
806eeb2adSek  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
906eeb2adSek  * or http://www.opensolaris.org/os/licensing.
1006eeb2adSek  * See the License for the specific language governing permissions
1106eeb2adSek  * and limitations under the License.
1206eeb2adSek  *
1306eeb2adSek  * When distributing Covered Code, include this CDDL HEADER in each
1406eeb2adSek  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1506eeb2adSek  * If applicable, add the following below this CDDL HEADER, with the
1606eeb2adSek  * fields enclosed by brackets "[]" replaced with your own identifying
1706eeb2adSek  * information: Portions Copyright [yyyy] [name of copyright owner]
1806eeb2adSek  *
1906eeb2adSek  * CDDL HEADER END
2006eeb2adSek  */
2106eeb2adSek 
2206eeb2adSek /*
233f9d6ad7SLin Ling  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
249adfa60dSMatthew Ahrens  * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25c3d26abcSMatthew Ahrens  * Copyright (c) 2014 Integros [integros.com]
26ce1577b0SDave Eddy  * Copyright 2017 Joyent, Inc.
2706eeb2adSek  */
2806eeb2adSek 
29ecd6cf80Smarks #include <sys/spa.h>
3006eeb2adSek #include <sys/spa_impl.h>
3106eeb2adSek #include <sys/zap.h>
3206eeb2adSek #include <sys/dsl_synctask.h>
33ecd6cf80Smarks #include <sys/dmu_tx.h>
34ecd6cf80Smarks #include <sys/dmu_objset.h>
354445fffbSMatthew Ahrens #include <sys/dsl_dataset.h>
364445fffbSMatthew Ahrens #include <sys/dsl_dir.h>
37ecd6cf80Smarks #include <sys/utsname.h>
38ecd6cf80Smarks #include <sys/cmn_err.h>
39ecd6cf80Smarks #include <sys/sunddi.h>
404445fffbSMatthew Ahrens #include <sys/cred.h>
413f9d6ad7SLin Ling #include "zfs_comutil.h"
42ecd6cf80Smarks #ifdef _KERNEL
43ecd6cf80Smarks #include <sys/zone.h>
44ecd6cf80Smarks #endif
4506eeb2adSek 
4606eeb2adSek /*
4706eeb2adSek  * Routines to manage the on-disk history log.
4806eeb2adSek  *
4906eeb2adSek  * The history log is stored as a dmu object containing
5006eeb2adSek  * <packed record length, record nvlist> tuples.
5106eeb2adSek  *
5206eeb2adSek  * Where "record nvlist" is a nvlist containing uint64_ts and strings, and
5306eeb2adSek  * "packed record length" is the packed length of the "record nvlist" stored
5406eeb2adSek  * as a little endian uint64_t.
5506eeb2adSek  *
5606eeb2adSek  * The log is implemented as a ring buffer, though the original creation
5706eeb2adSek  * of the pool ('zpool create') is never overwritten.
5806eeb2adSek  *
5906eeb2adSek  * The history log is tracked as object 'spa_t::spa_history'.  The bonus buffer
6006eeb2adSek  * of 'spa_history' stores the offsets for logging/retrieving history as
6106eeb2adSek  * 'spa_history_phys_t'.  'sh_pool_create_len' is the ending offset in bytes of
6206eeb2adSek  * where the 'zpool create' record is stored.  This allows us to never
6306eeb2adSek  * overwrite the original creation of the pool.  'sh_phys_max_off' is the
6406eeb2adSek  * physical ending offset in bytes of the log.  This tells you the length of
6506eeb2adSek  * the buffer. 'sh_eof' is the logical EOF (in bytes).  Whenever a record
6606eeb2adSek  * is added, 'sh_eof' is incremented by the the size of the record.
6706eeb2adSek  * 'sh_eof' is never decremented.  'sh_bof' is the logical BOF (in bytes).
6806eeb2adSek  * This is where the consumer should start reading from after reading in
6906eeb2adSek  * the 'zpool create' portion of the log.
7006eeb2adSek  *
7106eeb2adSek  * 'sh_records_lost' keeps track of how many records have been overwritten
7206eeb2adSek  * and permanently lost.
7306eeb2adSek  */
7406eeb2adSek 
7506eeb2adSek /* convert a logical offset to physical */
7606eeb2adSek static uint64_t
7706eeb2adSek spa_history_log_to_phys(uint64_t log_off, spa_history_phys_t *shpp)
7806eeb2adSek {
7906eeb2adSek 	uint64_t phys_len;
8006eeb2adSek 
8106eeb2adSek 	phys_len = shpp->sh_phys_max_off - shpp->sh_pool_create_len;
8206eeb2adSek 	return ((log_off - shpp->sh_pool_create_len) % phys_len
8306eeb2adSek 	    + shpp->sh_pool_create_len);
8406eeb2adSek }
8506eeb2adSek 
8606eeb2adSek void
8706eeb2adSek spa_history_create_obj(spa_t *spa, dmu_tx_t *tx)
8806eeb2adSek {
8906eeb2adSek 	dmu_buf_t *dbp;
9006eeb2adSek 	spa_history_phys_t *shpp;
9106eeb2adSek 	objset_t *mos = spa->spa_meta_objset;
9206eeb2adSek 
9306eeb2adSek 	ASSERT(spa->spa_history == 0);
9406eeb2adSek 	spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY,
95b5152584SMatthew Ahrens 	    SPA_OLD_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS,
9606eeb2adSek 	    sizeof (spa_history_phys_t), tx);
9706eeb2adSek 
9806eeb2adSek 	VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
9906eeb2adSek 	    DMU_POOL_HISTORY, sizeof (uint64_t), 1,
10006eeb2adSek 	    &spa->spa_history, tx) == 0);
10106eeb2adSek 
10206eeb2adSek 	VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
10306eeb2adSek 	ASSERT(dbp->db_size >= sizeof (spa_history_phys_t));
10406eeb2adSek 
10506eeb2adSek 	shpp = dbp->db_data;
10606eeb2adSek 	dmu_buf_will_dirty(dbp, tx);
10706eeb2adSek 
10806eeb2adSek 	/*
10906eeb2adSek 	 * Figure out maximum size of history log.  We set it at
11019b94df9SMatthew Ahrens 	 * 0.1% of pool size, with a max of 1G and min of 128KB.
11106eeb2adSek 	 */
112b24ab676SJeff Bonwick 	shpp->sh_phys_max_off =
11319b94df9SMatthew Ahrens 	    metaslab_class_get_dspace(spa_normal_class(spa)) / 1000;
11419b94df9SMatthew Ahrens 	shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30);
11506eeb2adSek 	shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10);
11606eeb2adSek 
11706eeb2adSek 	dmu_buf_rele(dbp, FTAG);
11806eeb2adSek }
11906eeb2adSek 
12006eeb2adSek /*
12106eeb2adSek  * Change 'sh_bof' to the beginning of the next record.
12206eeb2adSek  */
12306eeb2adSek static int
12406eeb2adSek spa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp)
12506eeb2adSek {
12606eeb2adSek 	objset_t *mos = spa->spa_meta_objset;
12706eeb2adSek 	uint64_t firstread, reclen, phys_bof;
12806eeb2adSek 	char buf[sizeof (reclen)];
12906eeb2adSek 	int err;
13006eeb2adSek 
13106eeb2adSek 	phys_bof = spa_history_log_to_phys(shpp->sh_bof, shpp);
13206eeb2adSek 	firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof);
13306eeb2adSek 
13406eeb2adSek 	if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread,
1357bfdf011SNeil Perrin 	    buf, DMU_READ_PREFETCH)) != 0)
13606eeb2adSek 		return (err);
13706eeb2adSek 	if (firstread != sizeof (reclen)) {
13806eeb2adSek 		if ((err = dmu_read(mos, spa->spa_history,
13906eeb2adSek 		    shpp->sh_pool_create_len, sizeof (reclen) - firstread,
1407bfdf011SNeil Perrin 		    buf + firstread, DMU_READ_PREFETCH)) != 0)
14106eeb2adSek 			return (err);
14206eeb2adSek 	}
14306eeb2adSek 
14406eeb2adSek 	reclen = LE_64(*((uint64_t *)buf));
14506eeb2adSek 	shpp->sh_bof += reclen + sizeof (reclen);
14606eeb2adSek 	shpp->sh_records_lost++;
14706eeb2adSek 	return (0);
14806eeb2adSek }
14906eeb2adSek 
15006eeb2adSek static int
15106eeb2adSek spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp,
15206eeb2adSek     dmu_tx_t *tx)
15306eeb2adSek {
15406eeb2adSek 	uint64_t firstwrite, phys_eof;
15506eeb2adSek 	objset_t *mos = spa->spa_meta_objset;
15606eeb2adSek 	int err;
15706eeb2adSek 
15806eeb2adSek 	ASSERT(MUTEX_HELD(&spa->spa_history_lock));
15906eeb2adSek 
16006eeb2adSek 	/* see if we need to reset logical BOF */
16106eeb2adSek 	while (shpp->sh_phys_max_off - shpp->sh_pool_create_len -
16206eeb2adSek 	    (shpp->sh_eof - shpp->sh_bof) <= len) {
163ecd6cf80Smarks 		if ((err = spa_history_advance_bof(spa, shpp)) != 0) {
16406eeb2adSek 			return (err);
165ecd6cf80Smarks 		}
16606eeb2adSek 	}
16706eeb2adSek 
16806eeb2adSek 	phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
16906eeb2adSek 	firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof);
17006eeb2adSek 	shpp->sh_eof += len;
17106eeb2adSek 	dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx);
17206eeb2adSek 
17306eeb2adSek 	len -= firstwrite;
17406eeb2adSek 	if (len > 0) {
17506eeb2adSek 		/* write out the rest at the beginning of physical file */
17606eeb2adSek 		dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len,
17706eeb2adSek 		    len, (char *)buf + firstwrite, tx);
17806eeb2adSek 	}
17906eeb2adSek 
18006eeb2adSek 	return (0);
18106eeb2adSek }
18206eeb2adSek 
183ecd6cf80Smarks static char *
1844445fffbSMatthew Ahrens spa_history_zone(void)
185ecd6cf80Smarks {
186ecd6cf80Smarks #ifdef _KERNEL
1874445fffbSMatthew Ahrens 	if (INGLOBALZONE(curproc))
1884445fffbSMatthew Ahrens 		return (NULL);
189ecd6cf80Smarks 	return (curproc->p_zone->zone_name);
190ecd6cf80Smarks #else
1914445fffbSMatthew Ahrens 	return (NULL);
192ecd6cf80Smarks #endif
193ecd6cf80Smarks }
194ecd6cf80Smarks 
195ce1577b0SDave Eddy /*
196ce1577b0SDave Eddy  * Post a history sysevent.
197ce1577b0SDave Eddy  *
198ce1577b0SDave Eddy  * The nvlist_t* passed into this function will be transformed into a new
199ce1577b0SDave Eddy  * nvlist where:
200ce1577b0SDave Eddy  *
201ce1577b0SDave Eddy  * 1. Nested nvlists will be flattened to a single level
202ce1577b0SDave Eddy  * 2. Keys will have their names normalized (to remove any problematic
203ce1577b0SDave Eddy  * characters, such as whitespace)
204ce1577b0SDave Eddy  *
205ce1577b0SDave Eddy  * The nvlist_t passed into this function will duplicated and should be freed
206ce1577b0SDave Eddy  * by caller.
207ce1577b0SDave Eddy  *
208ce1577b0SDave Eddy  */
209ce1577b0SDave Eddy static void
210ce1577b0SDave Eddy spa_history_log_notify(spa_t *spa, nvlist_t *nvl)
211ce1577b0SDave Eddy {
212ce1577b0SDave Eddy 	nvlist_t *hist_nvl = fnvlist_alloc();
213ce1577b0SDave Eddy 	uint64_t uint64;
214ce1577b0SDave Eddy 	char *string;
215ce1577b0SDave Eddy 
216ce1577b0SDave Eddy 	if (nvlist_lookup_string(nvl, ZPOOL_HIST_CMD, &string) == 0)
217ce1577b0SDave Eddy 		fnvlist_add_string(hist_nvl, ZFS_EV_HIST_CMD, string);
218ce1577b0SDave Eddy 
219ce1577b0SDave Eddy 	if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0)
220ce1577b0SDave Eddy 		fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string);
221ce1577b0SDave Eddy 
222ce1577b0SDave Eddy 	if (nvlist_lookup_string(nvl, ZPOOL_HIST_ZONE, &string) == 0)
223ce1577b0SDave Eddy 		fnvlist_add_string(hist_nvl, ZFS_EV_HIST_ZONE, string);
224ce1577b0SDave Eddy 
225ce1577b0SDave Eddy 	if (nvlist_lookup_string(nvl, ZPOOL_HIST_HOST, &string) == 0)
226ce1577b0SDave Eddy 		fnvlist_add_string(hist_nvl, ZFS_EV_HIST_HOST, string);
227ce1577b0SDave Eddy 
228ce1577b0SDave Eddy 	if (nvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME, &string) == 0)
229ce1577b0SDave Eddy 		fnvlist_add_string(hist_nvl, ZFS_EV_HIST_DSNAME, string);
230ce1577b0SDave Eddy 
231ce1577b0SDave Eddy 	if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR, &string) == 0)
232ce1577b0SDave Eddy 		fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_STR, string);
233ce1577b0SDave Eddy 
234ce1577b0SDave Eddy 	if (nvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL, &string) == 0)
235ce1577b0SDave Eddy 		fnvlist_add_string(hist_nvl, ZFS_EV_HIST_IOCTL, string);
236ce1577b0SDave Eddy 
237ce1577b0SDave Eddy 	if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0)
238ce1577b0SDave Eddy 		fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string);
239ce1577b0SDave Eddy 
240ce1577b0SDave Eddy 	if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID, &uint64) == 0)
241ce1577b0SDave Eddy 		fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_DSID, uint64);
242ce1577b0SDave Eddy 
243ce1577b0SDave Eddy 	if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG, &uint64) == 0)
244ce1577b0SDave Eddy 		fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TXG, uint64);
245ce1577b0SDave Eddy 
246ce1577b0SDave Eddy 	if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TIME, &uint64) == 0)
247ce1577b0SDave Eddy 		fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TIME, uint64);
248ce1577b0SDave Eddy 
249ce1577b0SDave Eddy 	if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_WHO, &uint64) == 0)
250ce1577b0SDave Eddy 		fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_WHO, uint64);
251ce1577b0SDave Eddy 
252ce1577b0SDave Eddy 	if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_INT_EVENT, &uint64) == 0)
253ce1577b0SDave Eddy 		fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_INT_EVENT, uint64);
254ce1577b0SDave Eddy 
255ce1577b0SDave Eddy 	spa_event_notify(spa, NULL, hist_nvl, ESC_ZFS_HISTORY_EVENT);
256ce1577b0SDave Eddy 
257ce1577b0SDave Eddy 	nvlist_free(hist_nvl);
258ce1577b0SDave Eddy }
259ce1577b0SDave Eddy 
26006eeb2adSek /*
26106eeb2adSek  * Write out a history event.
26206eeb2adSek  */
263495807d7SMatthew Ahrens /*ARGSUSED*/
264e7437265Sahrens static void
2653b2aab18SMatthew Ahrens spa_history_log_sync(void *arg, dmu_tx_t *tx)
26606eeb2adSek {
2673b2aab18SMatthew Ahrens 	nvlist_t	*nvl = arg;
2683b2aab18SMatthew Ahrens 	spa_t		*spa = dmu_tx_pool(tx)->dp_spa;
26906eeb2adSek 	objset_t	*mos = spa->spa_meta_objset;
27006eeb2adSek 	dmu_buf_t	*dbp;
27106eeb2adSek 	spa_history_phys_t *shpp;
27206eeb2adSek 	size_t		reclen;
27306eeb2adSek 	uint64_t	le_len;
27406eeb2adSek 	char		*record_packed = NULL;
27506eeb2adSek 	int		ret;
27606eeb2adSek 
27706eeb2adSek 	/*
27806eeb2adSek 	 * If we have an older pool that doesn't have a command
27906eeb2adSek 	 * history object, create it now.
28006eeb2adSek 	 */
28106eeb2adSek 	mutex_enter(&spa->spa_history_lock);
28206eeb2adSek 	if (!spa->spa_history)
28306eeb2adSek 		spa_history_create_obj(spa, tx);
28406eeb2adSek 	mutex_exit(&spa->spa_history_lock);
28506eeb2adSek 
28606eeb2adSek 	/*
28706eeb2adSek 	 * Get the offset of where we need to write via the bonus buffer.
28806eeb2adSek 	 * Update the offset when the write completes.
28906eeb2adSek 	 */
2903b2aab18SMatthew Ahrens 	VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
29106eeb2adSek 	shpp = dbp->db_data;
29206eeb2adSek 
29306eeb2adSek 	dmu_buf_will_dirty(dbp, tx);
29406eeb2adSek 
29506eeb2adSek #ifdef ZFS_DEBUG
29606eeb2adSek 	{
29706eeb2adSek 		dmu_object_info_t doi;
29806eeb2adSek 		dmu_object_info_from_db(dbp, &doi);
29906eeb2adSek 		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
30006eeb2adSek 	}
30106eeb2adSek #endif
30206eeb2adSek 
3034445fffbSMatthew Ahrens 	fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
304ecd6cf80Smarks #ifdef _KERNEL
3054445fffbSMatthew Ahrens 	fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename);
306ecd6cf80Smarks #endif
3074445fffbSMatthew Ahrens 	if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
3084445fffbSMatthew Ahrens 		zfs_dbgmsg("command: %s",
3094445fffbSMatthew Ahrens 		    fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD));
3104445fffbSMatthew Ahrens 	} else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) {
3114445fffbSMatthew Ahrens 		if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) {
3124445fffbSMatthew Ahrens 			zfs_dbgmsg("txg %lld %s %s (id %llu) %s",
3134445fffbSMatthew Ahrens 			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
3144445fffbSMatthew Ahrens 			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
3154445fffbSMatthew Ahrens 			    fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME),
3164445fffbSMatthew Ahrens 			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID),
3174445fffbSMatthew Ahrens 			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
3184445fffbSMatthew Ahrens 		} else {
3194445fffbSMatthew Ahrens 			zfs_dbgmsg("txg %lld %s %s",
3204445fffbSMatthew Ahrens 			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
3214445fffbSMatthew Ahrens 			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
3224445fffbSMatthew Ahrens 			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
3234445fffbSMatthew Ahrens 		}
324ce1577b0SDave Eddy 		/*
325ce1577b0SDave Eddy 		 * The history sysevent is posted only for internal history
326ce1577b0SDave Eddy 		 * messages to show what has happened, not how it happened. For
327ce1577b0SDave Eddy 		 * example, the following command:
328ce1577b0SDave Eddy 		 *
329ce1577b0SDave Eddy 		 * # zfs destroy -r tank/foo
330ce1577b0SDave Eddy 		 *
331ce1577b0SDave Eddy 		 * will result in one sysevent posted per dataset that is
332ce1577b0SDave Eddy 		 * destroyed as a result of the command - which could be more
333ce1577b0SDave Eddy 		 * than one event in total.  By contrast, if the sysevent was
334ce1577b0SDave Eddy 		 * posted as a result of the ZPOOL_HIST_CMD key being present
335ce1577b0SDave Eddy 		 * it would result in only one sysevent being posted with the
336ce1577b0SDave Eddy 		 * full command line arguments, requiring the consumer to know
337ce1577b0SDave Eddy 		 * how to parse and understand zfs(1M) command invocations.
338ce1577b0SDave Eddy 		 */
339ce1577b0SDave Eddy 		spa_history_log_notify(spa, nvl);
3404445fffbSMatthew Ahrens 	} else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) {
3414445fffbSMatthew Ahrens 		zfs_dbgmsg("ioctl %s",
3424445fffbSMatthew Ahrens 		    fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL));
343ecd6cf80Smarks 	}
344ecd6cf80Smarks 
3454445fffbSMatthew Ahrens 	record_packed = fnvlist_pack(nvl, &reclen);
34606eeb2adSek 
34706eeb2adSek 	mutex_enter(&spa->spa_history_lock);
34806eeb2adSek 
34906eeb2adSek 	/* write out the packed length as little endian */
35055434c77Sek 	le_len = LE_64((uint64_t)reclen);
35106eeb2adSek 	ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
35206eeb2adSek 	if (!ret)
35306eeb2adSek 		ret = spa_history_write(spa, record_packed, reclen, shpp, tx);
35406eeb2adSek 
3554445fffbSMatthew Ahrens 	/* The first command is the create, which we keep forever */
3564445fffbSMatthew Ahrens 	if (ret == 0 && shpp->sh_pool_create_len == 0 &&
3574445fffbSMatthew Ahrens 	    nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
3584445fffbSMatthew Ahrens 		shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof;
35906eeb2adSek 	}
36006eeb2adSek 
36106eeb2adSek 	mutex_exit(&spa->spa_history_lock);
3624445fffbSMatthew Ahrens 	fnvlist_pack_free(record_packed, reclen);
36306eeb2adSek 	dmu_buf_rele(dbp, FTAG);
3644445fffbSMatthew Ahrens 	fnvlist_free(nvl);
36506eeb2adSek }
36606eeb2adSek 
36706eeb2adSek /*
36806eeb2adSek  * Write out a history event.
36906eeb2adSek  */
37006eeb2adSek int
3714445fffbSMatthew Ahrens spa_history_log(spa_t *spa, const char *msg)
3724445fffbSMatthew Ahrens {
3734445fffbSMatthew Ahrens 	int err;
3744445fffbSMatthew Ahrens 	nvlist_t *nvl = fnvlist_alloc();
3754445fffbSMatthew Ahrens 
3764445fffbSMatthew Ahrens 	fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg);
3774445fffbSMatthew Ahrens 	err = spa_history_log_nvl(spa, nvl);
3784445fffbSMatthew Ahrens 	fnvlist_free(nvl);
3794445fffbSMatthew Ahrens 	return (err);
3804445fffbSMatthew Ahrens }
3814445fffbSMatthew Ahrens 
3824445fffbSMatthew Ahrens int
3834445fffbSMatthew Ahrens spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
38406eeb2adSek {
385495807d7SMatthew Ahrens 	int err = 0;
386495807d7SMatthew Ahrens 	dmu_tx_t *tx;
3874445fffbSMatthew Ahrens 	nvlist_t *nvarg;
38806eeb2adSek 
389cd1c8b85SMatthew Ahrens 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
390be6fd75aSMatthew Ahrens 		return (SET_ERROR(EINVAL));
391e7437265Sahrens 
392495807d7SMatthew Ahrens 	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
393495807d7SMatthew Ahrens 	err = dmu_tx_assign(tx, TXG_WAIT);
394495807d7SMatthew Ahrens 	if (err) {
395495807d7SMatthew Ahrens 		dmu_tx_abort(tx);
396495807d7SMatthew Ahrens 		return (err);
397495807d7SMatthew Ahrens 	}
398495807d7SMatthew Ahrens 
3994445fffbSMatthew Ahrens 	nvarg = fnvlist_dup(nvl);
4004445fffbSMatthew Ahrens 	if (spa_history_zone() != NULL) {
4014445fffbSMatthew Ahrens 		fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE,
4024445fffbSMatthew Ahrens 		    spa_history_zone());
4034445fffbSMatthew Ahrens 	}
4044445fffbSMatthew Ahrens 	fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));
405495807d7SMatthew Ahrens 
406495807d7SMatthew Ahrens 	/* Kick this off asynchronously; errors are ignored. */
4073b2aab18SMatthew Ahrens 	dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync,
4087d46dc6cSMatthew Ahrens 	    nvarg, 0, ZFS_SPACE_CHECK_NONE, tx);
409495807d7SMatthew Ahrens 	dmu_tx_commit(tx);
410495807d7SMatthew Ahrens 
4114445fffbSMatthew Ahrens 	/* spa_history_log_sync will free nvl */
412495807d7SMatthew Ahrens 	return (err);
4134445fffbSMatthew Ahrens 
41406eeb2adSek }
41506eeb2adSek 
41606eeb2adSek /*
41706eeb2adSek  * Read out the command history.
41806eeb2adSek  */
41906eeb2adSek int
42006eeb2adSek spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
42106eeb2adSek {
42206eeb2adSek 	objset_t *mos = spa->spa_meta_objset;
42306eeb2adSek 	dmu_buf_t *dbp;
42406eeb2adSek 	uint64_t read_len, phys_read_off, phys_eof;
42506eeb2adSek 	uint64_t leftover = 0;
42606eeb2adSek 	spa_history_phys_t *shpp;
42706eeb2adSek 	int err;
42806eeb2adSek 
42906eeb2adSek 	/*
4304445fffbSMatthew Ahrens 	 * If the command history doesn't exist (older pool),
43106eeb2adSek 	 * that's ok, just return ENOENT.
43206eeb2adSek 	 */
43306eeb2adSek 	if (!spa->spa_history)
434be6fd75aSMatthew Ahrens 		return (SET_ERROR(ENOENT));
43506eeb2adSek 
436495807d7SMatthew Ahrens 	/*
437495807d7SMatthew Ahrens 	 * The history is logged asynchronously, so when they request
438495807d7SMatthew Ahrens 	 * the first chunk of history, make sure everything has been
439495807d7SMatthew Ahrens 	 * synced to disk so that we get it.
440495807d7SMatthew Ahrens 	 */
4413c708518SMark J Musante 	if (*offp == 0 && spa_writeable(spa))
442495807d7SMatthew Ahrens 		txg_wait_synced(spa_get_dsl(spa), 0);
443495807d7SMatthew Ahrens 
44406eeb2adSek 	if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0)
44506eeb2adSek 		return (err);
44606eeb2adSek 	shpp = dbp->db_data;
44706eeb2adSek 
44806eeb2adSek #ifdef ZFS_DEBUG
44906eeb2adSek 	{
45006eeb2adSek 		dmu_object_info_t doi;
45106eeb2adSek 		dmu_object_info_from_db(dbp, &doi);
45206eeb2adSek 		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
45306eeb2adSek 	}
45406eeb2adSek #endif
45506eeb2adSek 
45606eeb2adSek 	mutex_enter(&spa->spa_history_lock);
45706eeb2adSek 	phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
45806eeb2adSek 
45906eeb2adSek 	if (*offp < shpp->sh_pool_create_len) {
46006eeb2adSek 		/* read in just the zpool create history */
46106eeb2adSek 		phys_read_off = *offp;
46206eeb2adSek 		read_len = MIN(*len, shpp->sh_pool_create_len -
46306eeb2adSek 		    phys_read_off);
46406eeb2adSek 	} else {
46506eeb2adSek 		/*
46606eeb2adSek 		 * Need to reset passed in offset to BOF if the passed in
46706eeb2adSek 		 * offset has since been overwritten.
46806eeb2adSek 		 */
46906eeb2adSek 		*offp = MAX(*offp, shpp->sh_bof);
47006eeb2adSek 		phys_read_off = spa_history_log_to_phys(*offp, shpp);
47106eeb2adSek 
47206eeb2adSek 		/*
47306eeb2adSek 		 * Read up to the minimum of what the user passed down or
47406eeb2adSek 		 * the EOF (physical or logical).  If we hit physical EOF,
47506eeb2adSek 		 * use 'leftover' to read from the physical BOF.
47606eeb2adSek 		 */
47706eeb2adSek 		if (phys_read_off <= phys_eof) {
47806eeb2adSek 			read_len = MIN(*len, phys_eof - phys_read_off);
47906eeb2adSek 		} else {
48006eeb2adSek 			read_len = MIN(*len,
48106eeb2adSek 			    shpp->sh_phys_max_off - phys_read_off);
48206eeb2adSek 			if (phys_read_off + *len > shpp->sh_phys_max_off) {
48306eeb2adSek 				leftover = MIN(*len - read_len,
48406eeb2adSek 				    phys_eof - shpp->sh_pool_create_len);
48506eeb2adSek 			}
48606eeb2adSek 		}
48706eeb2adSek 	}
48806eeb2adSek 
48906eeb2adSek 	/* offset for consumer to use next */
49006eeb2adSek 	*offp += read_len + leftover;
49106eeb2adSek 
49206eeb2adSek 	/* tell the consumer how much you actually read */
49306eeb2adSek 	*len = read_len + leftover;
49406eeb2adSek 
49506eeb2adSek 	if (read_len == 0) {
49606eeb2adSek 		mutex_exit(&spa->spa_history_lock);
49706eeb2adSek 		dmu_buf_rele(dbp, FTAG);
49806eeb2adSek 		return (0);
49906eeb2adSek 	}
50006eeb2adSek 
5017bfdf011SNeil Perrin 	err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf,
5027bfdf011SNeil Perrin 	    DMU_READ_PREFETCH);
50306eeb2adSek 	if (leftover && err == 0) {
50406eeb2adSek 		err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len,
5057bfdf011SNeil Perrin 		    leftover, buf + read_len, DMU_READ_PREFETCH);
50606eeb2adSek 	}
50706eeb2adSek 	mutex_exit(&spa->spa_history_lock);
50806eeb2adSek 
50906eeb2adSek 	dmu_buf_rele(dbp, FTAG);
51006eeb2adSek 	return (err);
51106eeb2adSek }
512ecd6cf80Smarks 
5134445fffbSMatthew Ahrens /*
5144445fffbSMatthew Ahrens  * The nvlist will be consumed by this call.
5154445fffbSMatthew Ahrens  */
516c8e1f6d2SMark J Musante static void
5174445fffbSMatthew Ahrens log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
5183f9d6ad7SLin Ling     dmu_tx_t *tx, const char *fmt, va_list adx)
519ecd6cf80Smarks {
5204445fffbSMatthew Ahrens 	char *msg;
521ecd6cf80Smarks 
522088f3894Sahrens 	/*
523088f3894Sahrens 	 * If this is part of creating a pool, not everything is
524088f3894Sahrens 	 * initialized yet, so don't bother logging the internal events.
525cd1c8b85SMatthew Ahrens 	 * Likewise if the pool is not writeable.
526088f3894Sahrens 	 */
527cd1c8b85SMatthew Ahrens 	if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) {
528347eec8eSChristopher Siden 		fnvlist_free(nvl);
529088f3894Sahrens 		return;
530347eec8eSChristopher Siden 	}
531088f3894Sahrens 
5324445fffbSMatthew Ahrens 	msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP);
5334445fffbSMatthew Ahrens 	(void) vsprintf(msg, fmt, adx);
5344445fffbSMatthew Ahrens 	fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg);
5354445fffbSMatthew Ahrens 	strfree(msg);
536ecd6cf80Smarks 
5374445fffbSMatthew Ahrens 	fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation);
5384445fffbSMatthew Ahrens 	fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);
539e7437265Sahrens 
540e7437265Sahrens 	if (dmu_tx_is_syncing(tx)) {
5413b2aab18SMatthew Ahrens 		spa_history_log_sync(nvl, tx);
542e7437265Sahrens 	} else {
5433b2aab18SMatthew Ahrens 		dsl_sync_task_nowait(spa_get_dsl(spa),
5447d46dc6cSMatthew Ahrens 		    spa_history_log_sync, nvl, 0, ZFS_SPACE_CHECK_NONE, tx);
545e7437265Sahrens 	}
5464445fffbSMatthew Ahrens 	/* spa_history_log_sync() will free nvl */
547ecd6cf80Smarks }
548c8e1f6d2SMark J Musante 
549c8e1f6d2SMark J Musante void
5504445fffbSMatthew Ahrens spa_history_log_internal(spa_t *spa, const char *operation,
5513f9d6ad7SLin Ling     dmu_tx_t *tx, const char *fmt, ...)
552c8e1f6d2SMark J Musante {
553c8e1f6d2SMark J Musante 	dmu_tx_t *htx = tx;
554c8e1f6d2SMark J Musante 	va_list adx;
555c8e1f6d2SMark J Musante 
556c8e1f6d2SMark J Musante 	/* create a tx if we didn't get one */
557c8e1f6d2SMark J Musante 	if (tx == NULL) {
558c8e1f6d2SMark J Musante 		htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
559c8e1f6d2SMark J Musante 		if (dmu_tx_assign(htx, TXG_WAIT) != 0) {
560c8e1f6d2SMark J Musante 			dmu_tx_abort(htx);
561c8e1f6d2SMark J Musante 			return;
562c8e1f6d2SMark J Musante 		}
563c8e1f6d2SMark J Musante 	}
564c8e1f6d2SMark J Musante 
565c8e1f6d2SMark J Musante 	va_start(adx, fmt);
5664445fffbSMatthew Ahrens 	log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx);
567c8e1f6d2SMark J Musante 	va_end(adx);
568c8e1f6d2SMark J Musante 
569c8e1f6d2SMark J Musante 	/* if we didn't get a tx from the caller, commit the one we made */
570c8e1f6d2SMark J Musante 	if (tx == NULL)
571c8e1f6d2SMark J Musante 		dmu_tx_commit(htx);
572c8e1f6d2SMark J Musante }
573c8e1f6d2SMark J Musante 
574c8e1f6d2SMark J Musante void
5754445fffbSMatthew Ahrens spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation,
5764445fffbSMatthew Ahrens     dmu_tx_t *tx, const char *fmt, ...)
5774445fffbSMatthew Ahrens {
5784445fffbSMatthew Ahrens 	va_list adx;
5799adfa60dSMatthew Ahrens 	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
5804445fffbSMatthew Ahrens 	nvlist_t *nvl = fnvlist_alloc();
5814445fffbSMatthew Ahrens 
5824445fffbSMatthew Ahrens 	ASSERT(tx != NULL);
5834445fffbSMatthew Ahrens 
5844445fffbSMatthew Ahrens 	dsl_dataset_name(ds, namebuf);
5854445fffbSMatthew Ahrens 	fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf);
5864445fffbSMatthew Ahrens 	fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object);
5874445fffbSMatthew Ahrens 
5884445fffbSMatthew Ahrens 	va_start(adx, fmt);
5894445fffbSMatthew Ahrens 	log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx);
5904445fffbSMatthew Ahrens 	va_end(adx);
5914445fffbSMatthew Ahrens }
5924445fffbSMatthew Ahrens 
5934445fffbSMatthew Ahrens void
5944445fffbSMatthew Ahrens spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
5954445fffbSMatthew Ahrens     dmu_tx_t *tx, const char *fmt, ...)
5964445fffbSMatthew Ahrens {
5974445fffbSMatthew Ahrens 	va_list adx;
5989adfa60dSMatthew Ahrens 	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
5994445fffbSMatthew Ahrens 	nvlist_t *nvl = fnvlist_alloc();
6004445fffbSMatthew Ahrens 
6014445fffbSMatthew Ahrens 	ASSERT(tx != NULL);
6024445fffbSMatthew Ahrens 
6034445fffbSMatthew Ahrens 	dsl_dir_name(dd, namebuf);
6044445fffbSMatthew Ahrens 	fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf);
6054445fffbSMatthew Ahrens 	fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID,
606c1379625SJustin T. Gibbs 	    dsl_dir_phys(dd)->dd_head_dataset_obj);
6074445fffbSMatthew Ahrens 
6084445fffbSMatthew Ahrens 	va_start(adx, fmt);
6094445fffbSMatthew Ahrens 	log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx);
6104445fffbSMatthew Ahrens 	va_end(adx);
6114445fffbSMatthew Ahrens }
6124445fffbSMatthew Ahrens 
6134445fffbSMatthew Ahrens void
6144445fffbSMatthew Ahrens spa_history_log_version(spa_t *spa, const char *operation)
615c8e1f6d2SMark J Musante {
6164445fffbSMatthew Ahrens 	spa_history_log_internal(spa, operation, NULL,
617*b11fe8c0SJustin Hibbits 	    "pool version %llu; software version %llu/%llu; uts %s %s %s %s",
6183b2aab18SMatthew Ahrens 	    (u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION,
6194445fffbSMatthew Ahrens 	    utsname.nodename, utsname.release, utsname.version,
6204445fffbSMatthew Ahrens 	    utsname.machine);
621c8e1f6d2SMark J Musante }
622