106eeb2adSek /*
206eeb2adSek * CDDL HEADER START
306eeb2adSek *
406eeb2adSek * The contents of this file are subject to the terms of the
506eeb2adSek * Common Development and Distribution License (the "License").
606eeb2adSek * You may not use this file except in compliance with the License.
706eeb2adSek *
806eeb2adSek * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
906eeb2adSek * or http://www.opensolaris.org/os/licensing.
1006eeb2adSek * See the License for the specific language governing permissions
1106eeb2adSek * and limitations under the License.
1206eeb2adSek *
1306eeb2adSek * When distributing Covered Code, include this CDDL HEADER in each
1406eeb2adSek * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1506eeb2adSek * If applicable, add the following below this CDDL HEADER, with the
1606eeb2adSek * fields enclosed by brackets "[]" replaced with your own identifying
1706eeb2adSek * information: Portions Copyright [yyyy] [name of copyright owner]
1806eeb2adSek *
1906eeb2adSek * CDDL HEADER END
2006eeb2adSek */
2106eeb2adSek
2206eeb2adSek /*
233f9d6ad7SLin Ling * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
249adfa60dSMatthew Ahrens * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25c3d26abcSMatthew Ahrens * Copyright (c) 2014 Integros [integros.com]
26ce1577b0SDave Eddy * Copyright 2017 Joyent, Inc.
2706eeb2adSek */
2806eeb2adSek
29ecd6cf80Smarks #include <sys/spa.h>
3006eeb2adSek #include <sys/spa_impl.h>
3106eeb2adSek #include <sys/zap.h>
3206eeb2adSek #include <sys/dsl_synctask.h>
33ecd6cf80Smarks #include <sys/dmu_tx.h>
34ecd6cf80Smarks #include <sys/dmu_objset.h>
354445fffbSMatthew Ahrens #include <sys/dsl_dataset.h>
364445fffbSMatthew Ahrens #include <sys/dsl_dir.h>
37ecd6cf80Smarks #include <sys/utsname.h>
38ecd6cf80Smarks #include <sys/cmn_err.h>
39ecd6cf80Smarks #include <sys/sunddi.h>
404445fffbSMatthew Ahrens #include <sys/cred.h>
413f9d6ad7SLin Ling #include "zfs_comutil.h"
42ecd6cf80Smarks #ifdef _KERNEL
43ecd6cf80Smarks #include <sys/zone.h>
44ecd6cf80Smarks #endif
4506eeb2adSek
4606eeb2adSek /*
4706eeb2adSek * Routines to manage the on-disk history log.
4806eeb2adSek *
4906eeb2adSek * The history log is stored as a dmu object containing
5006eeb2adSek * <packed record length, record nvlist> tuples.
5106eeb2adSek *
5206eeb2adSek * Where "record nvlist" is a nvlist containing uint64_ts and strings, and
5306eeb2adSek * "packed record length" is the packed length of the "record nvlist" stored
5406eeb2adSek * as a little endian uint64_t.
5506eeb2adSek *
5606eeb2adSek * The log is implemented as a ring buffer, though the original creation
5706eeb2adSek * of the pool ('zpool create') is never overwritten.
5806eeb2adSek *
5906eeb2adSek * The history log is tracked as object 'spa_t::spa_history'. The bonus buffer
6006eeb2adSek * of 'spa_history' stores the offsets for logging/retrieving history as
6106eeb2adSek * 'spa_history_phys_t'. 'sh_pool_create_len' is the ending offset in bytes of
6206eeb2adSek * where the 'zpool create' record is stored. This allows us to never
6306eeb2adSek * overwrite the original creation of the pool. 'sh_phys_max_off' is the
6406eeb2adSek * physical ending offset in bytes of the log. This tells you the length of
6506eeb2adSek * the buffer. 'sh_eof' is the logical EOF (in bytes). Whenever a record
6606eeb2adSek * is added, 'sh_eof' is incremented by the the size of the record.
6706eeb2adSek * 'sh_eof' is never decremented. 'sh_bof' is the logical BOF (in bytes).
6806eeb2adSek * This is where the consumer should start reading from after reading in
6906eeb2adSek * the 'zpool create' portion of the log.
7006eeb2adSek *
7106eeb2adSek * 'sh_records_lost' keeps track of how many records have been overwritten
7206eeb2adSek * and permanently lost.
7306eeb2adSek */
7406eeb2adSek
7506eeb2adSek /* convert a logical offset to physical */
7606eeb2adSek static uint64_t
spa_history_log_to_phys(uint64_t log_off,spa_history_phys_t * shpp)7706eeb2adSek spa_history_log_to_phys(uint64_t log_off, spa_history_phys_t *shpp)
7806eeb2adSek {
7906eeb2adSek uint64_t phys_len;
8006eeb2adSek
8106eeb2adSek phys_len = shpp->sh_phys_max_off - shpp->sh_pool_create_len;
8206eeb2adSek return ((log_off - shpp->sh_pool_create_len) % phys_len
8306eeb2adSek + shpp->sh_pool_create_len);
8406eeb2adSek }
8506eeb2adSek
8606eeb2adSek void
spa_history_create_obj(spa_t * spa,dmu_tx_t * tx)8706eeb2adSek spa_history_create_obj(spa_t *spa, dmu_tx_t *tx)
8806eeb2adSek {
8906eeb2adSek dmu_buf_t *dbp;
9006eeb2adSek spa_history_phys_t *shpp;
9106eeb2adSek objset_t *mos = spa->spa_meta_objset;
9206eeb2adSek
9306eeb2adSek ASSERT(spa->spa_history == 0);
9406eeb2adSek spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY,
95b5152584SMatthew Ahrens SPA_OLD_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS,
9606eeb2adSek sizeof (spa_history_phys_t), tx);
9706eeb2adSek
9806eeb2adSek VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
9906eeb2adSek DMU_POOL_HISTORY, sizeof (uint64_t), 1,
10006eeb2adSek &spa->spa_history, tx) == 0);
10106eeb2adSek
10206eeb2adSek VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
10306eeb2adSek ASSERT(dbp->db_size >= sizeof (spa_history_phys_t));
10406eeb2adSek
10506eeb2adSek shpp = dbp->db_data;
10606eeb2adSek dmu_buf_will_dirty(dbp, tx);
10706eeb2adSek
10806eeb2adSek /*
10906eeb2adSek * Figure out maximum size of history log. We set it at
11019b94df9SMatthew Ahrens * 0.1% of pool size, with a max of 1G and min of 128KB.
11106eeb2adSek */
112b24ab676SJeff Bonwick shpp->sh_phys_max_off =
11319b94df9SMatthew Ahrens metaslab_class_get_dspace(spa_normal_class(spa)) / 1000;
11419b94df9SMatthew Ahrens shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30);
11506eeb2adSek shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10);
11606eeb2adSek
11706eeb2adSek dmu_buf_rele(dbp, FTAG);
11806eeb2adSek }
11906eeb2adSek
12006eeb2adSek /*
12106eeb2adSek * Change 'sh_bof' to the beginning of the next record.
12206eeb2adSek */
12306eeb2adSek static int
spa_history_advance_bof(spa_t * spa,spa_history_phys_t * shpp)12406eeb2adSek spa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp)
12506eeb2adSek {
12606eeb2adSek objset_t *mos = spa->spa_meta_objset;
12706eeb2adSek uint64_t firstread, reclen, phys_bof;
12806eeb2adSek char buf[sizeof (reclen)];
12906eeb2adSek int err;
13006eeb2adSek
13106eeb2adSek phys_bof = spa_history_log_to_phys(shpp->sh_bof, shpp);
13206eeb2adSek firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof);
13306eeb2adSek
13406eeb2adSek if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread,
1357bfdf011SNeil Perrin buf, DMU_READ_PREFETCH)) != 0)
13606eeb2adSek return (err);
13706eeb2adSek if (firstread != sizeof (reclen)) {
13806eeb2adSek if ((err = dmu_read(mos, spa->spa_history,
13906eeb2adSek shpp->sh_pool_create_len, sizeof (reclen) - firstread,
1407bfdf011SNeil Perrin buf + firstread, DMU_READ_PREFETCH)) != 0)
14106eeb2adSek return (err);
14206eeb2adSek }
14306eeb2adSek
14406eeb2adSek reclen = LE_64(*((uint64_t *)buf));
14506eeb2adSek shpp->sh_bof += reclen + sizeof (reclen);
14606eeb2adSek shpp->sh_records_lost++;
14706eeb2adSek return (0);
14806eeb2adSek }
14906eeb2adSek
15006eeb2adSek static int
spa_history_write(spa_t * spa,void * buf,uint64_t len,spa_history_phys_t * shpp,dmu_tx_t * tx)15106eeb2adSek spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp,
15206eeb2adSek dmu_tx_t *tx)
15306eeb2adSek {
15406eeb2adSek uint64_t firstwrite, phys_eof;
15506eeb2adSek objset_t *mos = spa->spa_meta_objset;
15606eeb2adSek int err;
15706eeb2adSek
15806eeb2adSek ASSERT(MUTEX_HELD(&spa->spa_history_lock));
15906eeb2adSek
16006eeb2adSek /* see if we need to reset logical BOF */
16106eeb2adSek while (shpp->sh_phys_max_off - shpp->sh_pool_create_len -
16206eeb2adSek (shpp->sh_eof - shpp->sh_bof) <= len) {
163ecd6cf80Smarks if ((err = spa_history_advance_bof(spa, shpp)) != 0) {
16406eeb2adSek return (err);
165ecd6cf80Smarks }
16606eeb2adSek }
16706eeb2adSek
16806eeb2adSek phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
16906eeb2adSek firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof);
17006eeb2adSek shpp->sh_eof += len;
17106eeb2adSek dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx);
17206eeb2adSek
17306eeb2adSek len -= firstwrite;
17406eeb2adSek if (len > 0) {
17506eeb2adSek /* write out the rest at the beginning of physical file */
17606eeb2adSek dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len,
17706eeb2adSek len, (char *)buf + firstwrite, tx);
17806eeb2adSek }
17906eeb2adSek
18006eeb2adSek return (0);
18106eeb2adSek }
18206eeb2adSek
183ecd6cf80Smarks static char *
spa_history_zone(void)1844445fffbSMatthew Ahrens spa_history_zone(void)
185ecd6cf80Smarks {
186ecd6cf80Smarks #ifdef _KERNEL
1874445fffbSMatthew Ahrens if (INGLOBALZONE(curproc))
1884445fffbSMatthew Ahrens return (NULL);
189ecd6cf80Smarks return (curproc->p_zone->zone_name);
190ecd6cf80Smarks #else
1914445fffbSMatthew Ahrens return (NULL);
192ecd6cf80Smarks #endif
193ecd6cf80Smarks }
194ecd6cf80Smarks
195ce1577b0SDave Eddy /*
196ce1577b0SDave Eddy * Post a history sysevent.
197ce1577b0SDave Eddy *
198ce1577b0SDave Eddy * The nvlist_t* passed into this function will be transformed into a new
199ce1577b0SDave Eddy * nvlist where:
200ce1577b0SDave Eddy *
201ce1577b0SDave Eddy * 1. Nested nvlists will be flattened to a single level
202ce1577b0SDave Eddy * 2. Keys will have their names normalized (to remove any problematic
203ce1577b0SDave Eddy * characters, such as whitespace)
204ce1577b0SDave Eddy *
205ce1577b0SDave Eddy * The nvlist_t passed into this function will duplicated and should be freed
206ce1577b0SDave Eddy * by caller.
207ce1577b0SDave Eddy *
208ce1577b0SDave Eddy */
209ce1577b0SDave Eddy static void
spa_history_log_notify(spa_t * spa,nvlist_t * nvl)210ce1577b0SDave Eddy spa_history_log_notify(spa_t *spa, nvlist_t *nvl)
211ce1577b0SDave Eddy {
212ce1577b0SDave Eddy nvlist_t *hist_nvl = fnvlist_alloc();
213ce1577b0SDave Eddy uint64_t uint64;
214ce1577b0SDave Eddy char *string;
215ce1577b0SDave Eddy
216ce1577b0SDave Eddy if (nvlist_lookup_string(nvl, ZPOOL_HIST_CMD, &string) == 0)
217ce1577b0SDave Eddy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_CMD, string);
218ce1577b0SDave Eddy
219ce1577b0SDave Eddy if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0)
220ce1577b0SDave Eddy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string);
221ce1577b0SDave Eddy
222ce1577b0SDave Eddy if (nvlist_lookup_string(nvl, ZPOOL_HIST_ZONE, &string) == 0)
223ce1577b0SDave Eddy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_ZONE, string);
224ce1577b0SDave Eddy
225ce1577b0SDave Eddy if (nvlist_lookup_string(nvl, ZPOOL_HIST_HOST, &string) == 0)
226ce1577b0SDave Eddy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_HOST, string);
227ce1577b0SDave Eddy
228ce1577b0SDave Eddy if (nvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME, &string) == 0)
229ce1577b0SDave Eddy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_DSNAME, string);
230ce1577b0SDave Eddy
231ce1577b0SDave Eddy if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR, &string) == 0)
232ce1577b0SDave Eddy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_STR, string);
233ce1577b0SDave Eddy
234ce1577b0SDave Eddy if (nvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL, &string) == 0)
235ce1577b0SDave Eddy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_IOCTL, string);
236ce1577b0SDave Eddy
237ce1577b0SDave Eddy if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0)
238ce1577b0SDave Eddy fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string);
239ce1577b0SDave Eddy
240ce1577b0SDave Eddy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID, &uint64) == 0)
241ce1577b0SDave Eddy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_DSID, uint64);
242ce1577b0SDave Eddy
243ce1577b0SDave Eddy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG, &uint64) == 0)
244ce1577b0SDave Eddy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TXG, uint64);
245ce1577b0SDave Eddy
246ce1577b0SDave Eddy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TIME, &uint64) == 0)
247ce1577b0SDave Eddy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TIME, uint64);
248ce1577b0SDave Eddy
249ce1577b0SDave Eddy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_WHO, &uint64) == 0)
250ce1577b0SDave Eddy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_WHO, uint64);
251ce1577b0SDave Eddy
252ce1577b0SDave Eddy if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_INT_EVENT, &uint64) == 0)
253ce1577b0SDave Eddy fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_INT_EVENT, uint64);
254ce1577b0SDave Eddy
255ce1577b0SDave Eddy spa_event_notify(spa, NULL, hist_nvl, ESC_ZFS_HISTORY_EVENT);
256ce1577b0SDave Eddy
257ce1577b0SDave Eddy nvlist_free(hist_nvl);
258ce1577b0SDave Eddy }
259ce1577b0SDave Eddy
26006eeb2adSek /*
26106eeb2adSek * Write out a history event.
26206eeb2adSek */
263495807d7SMatthew Ahrens /*ARGSUSED*/
264e7437265Sahrens static void
spa_history_log_sync(void * arg,dmu_tx_t * tx)2653b2aab18SMatthew Ahrens spa_history_log_sync(void *arg, dmu_tx_t *tx)
26606eeb2adSek {
2673b2aab18SMatthew Ahrens nvlist_t *nvl = arg;
2683b2aab18SMatthew Ahrens spa_t *spa = dmu_tx_pool(tx)->dp_spa;
26906eeb2adSek objset_t *mos = spa->spa_meta_objset;
27006eeb2adSek dmu_buf_t *dbp;
27106eeb2adSek spa_history_phys_t *shpp;
27206eeb2adSek size_t reclen;
27306eeb2adSek uint64_t le_len;
27406eeb2adSek char *record_packed = NULL;
27506eeb2adSek int ret;
27606eeb2adSek
27706eeb2adSek /*
27806eeb2adSek * If we have an older pool that doesn't have a command
27906eeb2adSek * history object, create it now.
28006eeb2adSek */
28106eeb2adSek mutex_enter(&spa->spa_history_lock);
28206eeb2adSek if (!spa->spa_history)
28306eeb2adSek spa_history_create_obj(spa, tx);
28406eeb2adSek mutex_exit(&spa->spa_history_lock);
28506eeb2adSek
28606eeb2adSek /*
28706eeb2adSek * Get the offset of where we need to write via the bonus buffer.
28806eeb2adSek * Update the offset when the write completes.
28906eeb2adSek */
2903b2aab18SMatthew Ahrens VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
29106eeb2adSek shpp = dbp->db_data;
29206eeb2adSek
29306eeb2adSek dmu_buf_will_dirty(dbp, tx);
29406eeb2adSek
29506eeb2adSek #ifdef ZFS_DEBUG
29606eeb2adSek {
29706eeb2adSek dmu_object_info_t doi;
29806eeb2adSek dmu_object_info_from_db(dbp, &doi);
29906eeb2adSek ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
30006eeb2adSek }
30106eeb2adSek #endif
30206eeb2adSek
3034445fffbSMatthew Ahrens fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
304ecd6cf80Smarks #ifdef _KERNEL
3054445fffbSMatthew Ahrens fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename);
306ecd6cf80Smarks #endif
3074445fffbSMatthew Ahrens if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
3084445fffbSMatthew Ahrens zfs_dbgmsg("command: %s",
3094445fffbSMatthew Ahrens fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD));
3104445fffbSMatthew Ahrens } else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) {
3114445fffbSMatthew Ahrens if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) {
3124445fffbSMatthew Ahrens zfs_dbgmsg("txg %lld %s %s (id %llu) %s",
3134445fffbSMatthew Ahrens fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
3144445fffbSMatthew Ahrens fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
3154445fffbSMatthew Ahrens fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME),
3164445fffbSMatthew Ahrens fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID),
3174445fffbSMatthew Ahrens fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
3184445fffbSMatthew Ahrens } else {
3194445fffbSMatthew Ahrens zfs_dbgmsg("txg %lld %s %s",
3204445fffbSMatthew Ahrens fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
3214445fffbSMatthew Ahrens fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
3224445fffbSMatthew Ahrens fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
3234445fffbSMatthew Ahrens }
324ce1577b0SDave Eddy /*
325ce1577b0SDave Eddy * The history sysevent is posted only for internal history
326ce1577b0SDave Eddy * messages to show what has happened, not how it happened. For
327ce1577b0SDave Eddy * example, the following command:
328ce1577b0SDave Eddy *
329ce1577b0SDave Eddy * # zfs destroy -r tank/foo
330ce1577b0SDave Eddy *
331ce1577b0SDave Eddy * will result in one sysevent posted per dataset that is
332ce1577b0SDave Eddy * destroyed as a result of the command - which could be more
333ce1577b0SDave Eddy * than one event in total. By contrast, if the sysevent was
334ce1577b0SDave Eddy * posted as a result of the ZPOOL_HIST_CMD key being present
335ce1577b0SDave Eddy * it would result in only one sysevent being posted with the
336ce1577b0SDave Eddy * full command line arguments, requiring the consumer to know
337*bbf21555SRichard Lowe * how to parse and understand zfs(8) command invocations.
338ce1577b0SDave Eddy */
339ce1577b0SDave Eddy spa_history_log_notify(spa, nvl);
3404445fffbSMatthew Ahrens } else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) {
3414445fffbSMatthew Ahrens zfs_dbgmsg("ioctl %s",
3424445fffbSMatthew Ahrens fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL));
343ecd6cf80Smarks }
344ecd6cf80Smarks
3454445fffbSMatthew Ahrens record_packed = fnvlist_pack(nvl, &reclen);
34606eeb2adSek
34706eeb2adSek mutex_enter(&spa->spa_history_lock);
34806eeb2adSek
34906eeb2adSek /* write out the packed length as little endian */
35055434c77Sek le_len = LE_64((uint64_t)reclen);
35106eeb2adSek ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
35206eeb2adSek if (!ret)
35306eeb2adSek ret = spa_history_write(spa, record_packed, reclen, shpp, tx);
35406eeb2adSek
3554445fffbSMatthew Ahrens /* The first command is the create, which we keep forever */
3564445fffbSMatthew Ahrens if (ret == 0 && shpp->sh_pool_create_len == 0 &&
3574445fffbSMatthew Ahrens nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
3584445fffbSMatthew Ahrens shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof;
35906eeb2adSek }
36006eeb2adSek
36106eeb2adSek mutex_exit(&spa->spa_history_lock);
3624445fffbSMatthew Ahrens fnvlist_pack_free(record_packed, reclen);
36306eeb2adSek dmu_buf_rele(dbp, FTAG);
3644445fffbSMatthew Ahrens fnvlist_free(nvl);
36506eeb2adSek }
36606eeb2adSek
36706eeb2adSek /*
36806eeb2adSek * Write out a history event.
36906eeb2adSek */
37006eeb2adSek int
spa_history_log(spa_t * spa,const char * msg)3714445fffbSMatthew Ahrens spa_history_log(spa_t *spa, const char *msg)
3724445fffbSMatthew Ahrens {
3734445fffbSMatthew Ahrens int err;
3744445fffbSMatthew Ahrens nvlist_t *nvl = fnvlist_alloc();
3754445fffbSMatthew Ahrens
3764445fffbSMatthew Ahrens fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg);
3774445fffbSMatthew Ahrens err = spa_history_log_nvl(spa, nvl);
3784445fffbSMatthew Ahrens fnvlist_free(nvl);
3794445fffbSMatthew Ahrens return (err);
3804445fffbSMatthew Ahrens }
3814445fffbSMatthew Ahrens
3824445fffbSMatthew Ahrens int
spa_history_log_nvl(spa_t * spa,nvlist_t * nvl)3834445fffbSMatthew Ahrens spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
38406eeb2adSek {
385495807d7SMatthew Ahrens int err = 0;
386495807d7SMatthew Ahrens dmu_tx_t *tx;
387eb633035STom Caputi nvlist_t *nvarg, *in_nvl = NULL;
38806eeb2adSek
389cd1c8b85SMatthew Ahrens if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
390be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL));
391e7437265Sahrens
392eb633035STom Caputi err = nvlist_lookup_nvlist(nvl, ZPOOL_HIST_INPUT_NVL, &in_nvl);
393eb633035STom Caputi if (err == 0) {
394eb633035STom Caputi (void) nvlist_remove_all(in_nvl, ZPOOL_HIDDEN_ARGS);
395eb633035STom Caputi }
396eb633035STom Caputi
397495807d7SMatthew Ahrens tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
398495807d7SMatthew Ahrens err = dmu_tx_assign(tx, TXG_WAIT);
399495807d7SMatthew Ahrens if (err) {
400495807d7SMatthew Ahrens dmu_tx_abort(tx);
401495807d7SMatthew Ahrens return (err);
402495807d7SMatthew Ahrens }
403495807d7SMatthew Ahrens
4044445fffbSMatthew Ahrens nvarg = fnvlist_dup(nvl);
4054445fffbSMatthew Ahrens if (spa_history_zone() != NULL) {
4064445fffbSMatthew Ahrens fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE,
4074445fffbSMatthew Ahrens spa_history_zone());
4084445fffbSMatthew Ahrens }
4094445fffbSMatthew Ahrens fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));
410495807d7SMatthew Ahrens
411495807d7SMatthew Ahrens /* Kick this off asynchronously; errors are ignored. */
4123b2aab18SMatthew Ahrens dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync,
4137d46dc6cSMatthew Ahrens nvarg, 0, ZFS_SPACE_CHECK_NONE, tx);
414495807d7SMatthew Ahrens dmu_tx_commit(tx);
415495807d7SMatthew Ahrens
4164445fffbSMatthew Ahrens /* spa_history_log_sync will free nvl */
417495807d7SMatthew Ahrens return (err);
4184445fffbSMatthew Ahrens
41906eeb2adSek }
42006eeb2adSek
42106eeb2adSek /*
42206eeb2adSek * Read out the command history.
42306eeb2adSek */
42406eeb2adSek int
spa_history_get(spa_t * spa,uint64_t * offp,uint64_t * len,char * buf)42506eeb2adSek spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
42606eeb2adSek {
42706eeb2adSek objset_t *mos = spa->spa_meta_objset;
42806eeb2adSek dmu_buf_t *dbp;
42906eeb2adSek uint64_t read_len, phys_read_off, phys_eof;
43006eeb2adSek uint64_t leftover = 0;
43106eeb2adSek spa_history_phys_t *shpp;
43206eeb2adSek int err;
43306eeb2adSek
43406eeb2adSek /*
4354445fffbSMatthew Ahrens * If the command history doesn't exist (older pool),
43606eeb2adSek * that's ok, just return ENOENT.
43706eeb2adSek */
43806eeb2adSek if (!spa->spa_history)
439be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT));
44006eeb2adSek
441495807d7SMatthew Ahrens /*
442495807d7SMatthew Ahrens * The history is logged asynchronously, so when they request
443495807d7SMatthew Ahrens * the first chunk of history, make sure everything has been
444495807d7SMatthew Ahrens * synced to disk so that we get it.
445495807d7SMatthew Ahrens */
4463c708518SMark J Musante if (*offp == 0 && spa_writeable(spa))
447495807d7SMatthew Ahrens txg_wait_synced(spa_get_dsl(spa), 0);
448495807d7SMatthew Ahrens
44906eeb2adSek if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0)
45006eeb2adSek return (err);
45106eeb2adSek shpp = dbp->db_data;
45206eeb2adSek
45306eeb2adSek #ifdef ZFS_DEBUG
45406eeb2adSek {
45506eeb2adSek dmu_object_info_t doi;
45606eeb2adSek dmu_object_info_from_db(dbp, &doi);
45706eeb2adSek ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
45806eeb2adSek }
45906eeb2adSek #endif
46006eeb2adSek
46106eeb2adSek mutex_enter(&spa->spa_history_lock);
46206eeb2adSek phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
46306eeb2adSek
46406eeb2adSek if (*offp < shpp->sh_pool_create_len) {
46506eeb2adSek /* read in just the zpool create history */
46606eeb2adSek phys_read_off = *offp;
46706eeb2adSek read_len = MIN(*len, shpp->sh_pool_create_len -
46806eeb2adSek phys_read_off);
46906eeb2adSek } else {
47006eeb2adSek /*
47106eeb2adSek * Need to reset passed in offset to BOF if the passed in
47206eeb2adSek * offset has since been overwritten.
47306eeb2adSek */
47406eeb2adSek *offp = MAX(*offp, shpp->sh_bof);
47506eeb2adSek phys_read_off = spa_history_log_to_phys(*offp, shpp);
47606eeb2adSek
47706eeb2adSek /*
47806eeb2adSek * Read up to the minimum of what the user passed down or
47906eeb2adSek * the EOF (physical or logical). If we hit physical EOF,
48006eeb2adSek * use 'leftover' to read from the physical BOF.
48106eeb2adSek */
48206eeb2adSek if (phys_read_off <= phys_eof) {
48306eeb2adSek read_len = MIN(*len, phys_eof - phys_read_off);
48406eeb2adSek } else {
48506eeb2adSek read_len = MIN(*len,
48606eeb2adSek shpp->sh_phys_max_off - phys_read_off);
48706eeb2adSek if (phys_read_off + *len > shpp->sh_phys_max_off) {
48806eeb2adSek leftover = MIN(*len - read_len,
48906eeb2adSek phys_eof - shpp->sh_pool_create_len);
49006eeb2adSek }
49106eeb2adSek }
49206eeb2adSek }
49306eeb2adSek
49406eeb2adSek /* offset for consumer to use next */
49506eeb2adSek *offp += read_len + leftover;
49606eeb2adSek
49706eeb2adSek /* tell the consumer how much you actually read */
49806eeb2adSek *len = read_len + leftover;
49906eeb2adSek
50006eeb2adSek if (read_len == 0) {
50106eeb2adSek mutex_exit(&spa->spa_history_lock);
50206eeb2adSek dmu_buf_rele(dbp, FTAG);
50306eeb2adSek return (0);
50406eeb2adSek }
50506eeb2adSek
5067bfdf011SNeil Perrin err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf,
5077bfdf011SNeil Perrin DMU_READ_PREFETCH);
50806eeb2adSek if (leftover && err == 0) {
50906eeb2adSek err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len,
5107bfdf011SNeil Perrin leftover, buf + read_len, DMU_READ_PREFETCH);
51106eeb2adSek }
51206eeb2adSek mutex_exit(&spa->spa_history_lock);
51306eeb2adSek
51406eeb2adSek dmu_buf_rele(dbp, FTAG);
51506eeb2adSek return (err);
51606eeb2adSek }
517ecd6cf80Smarks
5184445fffbSMatthew Ahrens /*
5194445fffbSMatthew Ahrens * The nvlist will be consumed by this call.
5204445fffbSMatthew Ahrens */
521c8e1f6d2SMark J Musante static void
log_internal(nvlist_t * nvl,const char * operation,spa_t * spa,dmu_tx_t * tx,const char * fmt,va_list adx)5224445fffbSMatthew Ahrens log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
5233f9d6ad7SLin Ling dmu_tx_t *tx, const char *fmt, va_list adx)
524ecd6cf80Smarks {
5254445fffbSMatthew Ahrens char *msg;
526ecd6cf80Smarks
527088f3894Sahrens /*
528088f3894Sahrens * If this is part of creating a pool, not everything is
529088f3894Sahrens * initialized yet, so don't bother logging the internal events.
530cd1c8b85SMatthew Ahrens * Likewise if the pool is not writeable.
531088f3894Sahrens */
532cd1c8b85SMatthew Ahrens if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) {
533347eec8eSChristopher Siden fnvlist_free(nvl);
534088f3894Sahrens return;
535347eec8eSChristopher Siden }
536088f3894Sahrens
5374445fffbSMatthew Ahrens msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, KM_SLEEP);
5384445fffbSMatthew Ahrens (void) vsprintf(msg, fmt, adx);
5394445fffbSMatthew Ahrens fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg);
5404445fffbSMatthew Ahrens strfree(msg);
541ecd6cf80Smarks
5424445fffbSMatthew Ahrens fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation);
5434445fffbSMatthew Ahrens fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);
544e7437265Sahrens
545e7437265Sahrens if (dmu_tx_is_syncing(tx)) {
5463b2aab18SMatthew Ahrens spa_history_log_sync(nvl, tx);
547e7437265Sahrens } else {
5483b2aab18SMatthew Ahrens dsl_sync_task_nowait(spa_get_dsl(spa),
5497d46dc6cSMatthew Ahrens spa_history_log_sync, nvl, 0, ZFS_SPACE_CHECK_NONE, tx);
550e7437265Sahrens }
5514445fffbSMatthew Ahrens /* spa_history_log_sync() will free nvl */
552ecd6cf80Smarks }
553c8e1f6d2SMark J Musante
554c8e1f6d2SMark J Musante void
spa_history_log_internal(spa_t * spa,const char * operation,dmu_tx_t * tx,const char * fmt,...)5554445fffbSMatthew Ahrens spa_history_log_internal(spa_t *spa, const char *operation,
5563f9d6ad7SLin Ling dmu_tx_t *tx, const char *fmt, ...)
557c8e1f6d2SMark J Musante {
558c8e1f6d2SMark J Musante dmu_tx_t *htx = tx;
559c8e1f6d2SMark J Musante va_list adx;
560c8e1f6d2SMark J Musante
561c8e1f6d2SMark J Musante /* create a tx if we didn't get one */
562c8e1f6d2SMark J Musante if (tx == NULL) {
563c8e1f6d2SMark J Musante htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
564c8e1f6d2SMark J Musante if (dmu_tx_assign(htx, TXG_WAIT) != 0) {
565c8e1f6d2SMark J Musante dmu_tx_abort(htx);
566c8e1f6d2SMark J Musante return;
567c8e1f6d2SMark J Musante }
568c8e1f6d2SMark J Musante }
569c8e1f6d2SMark J Musante
570c8e1f6d2SMark J Musante va_start(adx, fmt);
5714445fffbSMatthew Ahrens log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx);
572c8e1f6d2SMark J Musante va_end(adx);
573c8e1f6d2SMark J Musante
574c8e1f6d2SMark J Musante /* if we didn't get a tx from the caller, commit the one we made */
575c8e1f6d2SMark J Musante if (tx == NULL)
576c8e1f6d2SMark J Musante dmu_tx_commit(htx);
577c8e1f6d2SMark J Musante }
578c8e1f6d2SMark J Musante
579c8e1f6d2SMark J Musante void
spa_history_log_internal_ds(dsl_dataset_t * ds,const char * operation,dmu_tx_t * tx,const char * fmt,...)5804445fffbSMatthew Ahrens spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation,
5814445fffbSMatthew Ahrens dmu_tx_t *tx, const char *fmt, ...)
5824445fffbSMatthew Ahrens {
5834445fffbSMatthew Ahrens va_list adx;
5849adfa60dSMatthew Ahrens char namebuf[ZFS_MAX_DATASET_NAME_LEN];
5854445fffbSMatthew Ahrens nvlist_t *nvl = fnvlist_alloc();
5864445fffbSMatthew Ahrens
5874445fffbSMatthew Ahrens ASSERT(tx != NULL);
5884445fffbSMatthew Ahrens
5894445fffbSMatthew Ahrens dsl_dataset_name(ds, namebuf);
5904445fffbSMatthew Ahrens fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf);
5914445fffbSMatthew Ahrens fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object);
5924445fffbSMatthew Ahrens
5934445fffbSMatthew Ahrens va_start(adx, fmt);
5944445fffbSMatthew Ahrens log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx);
5954445fffbSMatthew Ahrens va_end(adx);
5964445fffbSMatthew Ahrens }
5974445fffbSMatthew Ahrens
5984445fffbSMatthew Ahrens void
spa_history_log_internal_dd(dsl_dir_t * dd,const char * operation,dmu_tx_t * tx,const char * fmt,...)5994445fffbSMatthew Ahrens spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
6004445fffbSMatthew Ahrens dmu_tx_t *tx, const char *fmt, ...)
6014445fffbSMatthew Ahrens {
6024445fffbSMatthew Ahrens va_list adx;
6039adfa60dSMatthew Ahrens char namebuf[ZFS_MAX_DATASET_NAME_LEN];
6044445fffbSMatthew Ahrens nvlist_t *nvl = fnvlist_alloc();
6054445fffbSMatthew Ahrens
6064445fffbSMatthew Ahrens ASSERT(tx != NULL);
6074445fffbSMatthew Ahrens
6084445fffbSMatthew Ahrens dsl_dir_name(dd, namebuf);
6094445fffbSMatthew Ahrens fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf);
6104445fffbSMatthew Ahrens fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID,
611c1379625SJustin T. Gibbs dsl_dir_phys(dd)->dd_head_dataset_obj);
6124445fffbSMatthew Ahrens
6134445fffbSMatthew Ahrens va_start(adx, fmt);
6144445fffbSMatthew Ahrens log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx);
6154445fffbSMatthew Ahrens va_end(adx);
6164445fffbSMatthew Ahrens }
6174445fffbSMatthew Ahrens
6184445fffbSMatthew Ahrens void
spa_history_log_version(spa_t * spa,const char * operation)6194445fffbSMatthew Ahrens spa_history_log_version(spa_t *spa, const char *operation)
620c8e1f6d2SMark J Musante {
6214445fffbSMatthew Ahrens spa_history_log_internal(spa, operation, NULL,
622b11fe8c0SJustin Hibbits "pool version %llu; software version %llu/%llu; uts %s %s %s %s",
6233b2aab18SMatthew Ahrens (u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION,
6244445fffbSMatthew Ahrens utsname.nodename, utsname.release, utsname.version,
6254445fffbSMatthew Ahrens utsname.machine);
626c8e1f6d2SMark J Musante }
627