1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/types.h>
28#include <sys/mkdev.h>
29#include <sys/stat.h>
30
31#include <strings.h>
32#include <unistd.h>
33#include <limits.h>
34#include <fcntl.h>
35
36#include <fmd_module.h>
37#include <fmd_error.h>
38#include <fmd_alloc.h>
39#include <fmd_case.h>
40#include <fmd_serd.h>
41#include <fmd_subr.h>
42#include <fmd_conf.h>
43#include <fmd_event.h>
44#include <fmd_log.h>
45#include <fmd_api.h>
46#include <fmd_ckpt.h>
47
48#include <fmd.h>
49
50#define	P2ROUNDUP(x, align)	(-(-(x) & -(align)))
51#define	IS_P2ALIGNED(v, a)	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
52
53/*
54 * The fmd_ckpt_t structure is used to manage all of the state needed by the
55 * various subroutines that save and restore checkpoints.  The structure is
56 * initialized using fmd_ckpt_create() or fmd_ckpt_open() and is destroyed
57 * by fmd_ckpt_destroy().  Refer to the subroutines below for more details.
58 */
59typedef struct fmd_ckpt {
60	char ckp_src[PATH_MAX];	/* ckpt input or output filename */
61	char ckp_dst[PATH_MAX];	/* ckpt rename filename */
62	uchar_t *ckp_buf;	/* data buffer base address */
63	fcf_hdr_t *ckp_hdr;	/* file header pointer */
64	uchar_t *ckp_ptr;	/* data buffer pointer */
65	size_t ckp_size;	/* data buffer size */
66	fcf_sec_t *ckp_secp;	/* section header table pointer */
67	fcf_sec_t *ckp_modp;	/* section header for module */
68	uint_t ckp_secs;	/* number of sections */
69	char *ckp_strs;		/* string table base pointer */
70	char *ckp_strp;		/* string table pointer */
71	size_t ckp_strn;	/* string table size */
72	int ckp_fd;		/* output descriptor */
73	fmd_module_t *ckp_mp;	/* checkpoint module */
74	void *ckp_arg;		/* private arg for callbacks */
75} fmd_ckpt_t;
76
77typedef struct fmd_ckpt_desc {
78	uint64_t secd_size;	/* minimum section size */
79	uint32_t secd_entsize;	/* minimum section entry size */
80	uint32_t secd_align;	/* section alignment */
81} fmd_ckpt_desc_t;
82
83/*
84 * Table of FCF section descriptions.  Here we record the minimum size for each
85 * section (for use during restore) and the expected entry size and alignment
86 * for each section (for use during both checkpoint and restore).
87 */
88static const fmd_ckpt_desc_t _fmd_ckpt_sections[] = {
89{ 0, 0, sizeof (uint8_t) },					   /* NONE */
90{ 1, 0, sizeof (char) },					   /* STRTAB */
91{ sizeof (fcf_module_t), 0, sizeof (uint32_t) },		   /* MODULE */
92{ sizeof (fcf_case_t), 0, sizeof (uint32_t) },			   /* CASE */
93{ sizeof (fcf_buf_t), sizeof (fcf_buf_t), sizeof (uint32_t) },	   /* BUFS */
94{ 0, 0, _MAX_ALIGNMENT },					   /* BUFFER */
95{ sizeof (fcf_serd_t), sizeof (fcf_serd_t), sizeof (uint64_t) },   /* SERD */
96{ sizeof (fcf_event_t), sizeof (fcf_event_t), sizeof (uint64_t) }, /* EVENTS */
97{ sizeof (fcf_nvl_t), sizeof (fcf_nvl_t), sizeof (uint64_t) },	   /* NVLISTS */
98};
99
100static int
101fmd_ckpt_create(fmd_ckpt_t *ckp, fmd_module_t *mp)
102{
103	const char *dir = mp->mod_ckpt;
104	const char *name = mp->mod_name;
105	mode_t mode;
106
107	bzero(ckp, sizeof (fmd_ckpt_t));
108	ckp->ckp_mp = mp;
109
110	ckp->ckp_size = sizeof (fcf_hdr_t);
111	ckp->ckp_strn = 1; /* for \0 */
112
113	(void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s+", dir, name);
114	(void) snprintf(ckp->ckp_dst, PATH_MAX, "%s/%s", dir, name);
115
116	(void) unlink(ckp->ckp_src);
117	(void) fmd_conf_getprop(fmd.d_conf, "ckpt.mode", &mode);
118	ckp->ckp_fd = open64(ckp->ckp_src, O_WRONLY | O_CREAT | O_EXCL, mode);
119
120	return (ckp->ckp_fd);
121}
122
123/*PRINTFLIKE2*/
124static int
125fmd_ckpt_inval(fmd_ckpt_t *ckp, const char *format, ...)
126{
127	va_list ap;
128
129	va_start(ap, format);
130	fmd_verror(EFMD_CKPT_INVAL, format, ap);
131	va_end(ap);
132
133	fmd_free(ckp->ckp_buf, ckp->ckp_size);
134	return (fmd_set_errno(EFMD_CKPT_INVAL));
135}
136
137static int
138fmd_ckpt_open(fmd_ckpt_t *ckp, fmd_module_t *mp)
139{
140	struct stat64 st;
141	uint64_t seclen;
142	uint_t i;
143	int err;
144
145	bzero(ckp, sizeof (fmd_ckpt_t));
146	ckp->ckp_mp = mp;
147
148	(void) snprintf(ckp->ckp_src, PATH_MAX, "%s/%s",
149	    mp->mod_ckpt, mp->mod_name);
150
151	if ((ckp->ckp_fd = open(ckp->ckp_src, O_RDONLY)) == -1)
152		return (-1); /* failed to open checkpoint file */
153
154	if (fstat64(ckp->ckp_fd, &st) == -1) {
155		err = errno;
156		(void) close(ckp->ckp_fd);
157		return (fmd_set_errno(err));
158	}
159
160	ckp->ckp_buf = fmd_alloc(st.st_size, FMD_SLEEP);
161	ckp->ckp_hdr = (void *)ckp->ckp_buf;
162	ckp->ckp_size = read(ckp->ckp_fd, ckp->ckp_buf, st.st_size);
163
164	if (ckp->ckp_size != st.st_size || ckp->ckp_size < sizeof (fcf_hdr_t) ||
165	    ckp->ckp_size != ckp->ckp_hdr->fcfh_filesz) {
166		err = ckp->ckp_size == (size_t)-1L ? errno : EFMD_CKPT_SHORT;
167		fmd_free(ckp->ckp_buf, st.st_size);
168		(void) close(ckp->ckp_fd);
169		return (fmd_set_errno(err));
170	}
171
172	(void) close(ckp->ckp_fd);
173	ckp->ckp_fd = -1;
174
175	/*
176	 * Once we've read in a consistent copy of the FCF file and we're sure
177	 * the header can be accessed, go through it and make sure everything
178	 * is valid.  We also check that unused bits are zero so we can expand
179	 * to use them safely in the future and support old files if needed.
180	 */
181	if (bcmp(&ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0],
182	    FCF_MAG_STRING, FCF_MAG_STRLEN) != 0)
183		return (fmd_ckpt_inval(ckp, "bad checkpoint magic string\n"));
184
185	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] != FCF_MODEL_NATIVE)
186		return (fmd_ckpt_inval(ckp, "bad checkpoint data model\n"));
187
188	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] != FCF_ENCODE_NATIVE)
189		return (fmd_ckpt_inval(ckp, "bad checkpoint data encoding\n"));
190
191	if (ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] != FCF_VERSION_1) {
192		return (fmd_ckpt_inval(ckp, "bad checkpoint version %u\n",
193		    ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION]));
194	}
195
196	for (i = FCF_ID_PAD; i < FCF_ID_SIZE; i++) {
197		if (ckp->ckp_hdr->fcfh_ident[i] != 0) {
198			return (fmd_ckpt_inval(ckp,
199			    "bad checkpoint padding at id[%d]", i));
200		}
201	}
202
203	if (ckp->ckp_hdr->fcfh_flags & ~FCF_FL_VALID)
204		return (fmd_ckpt_inval(ckp, "bad checkpoint flags\n"));
205
206	if (ckp->ckp_hdr->fcfh_pad != 0)
207		return (fmd_ckpt_inval(ckp, "reserved field in use\n"));
208
209	if (ckp->ckp_hdr->fcfh_hdrsize < sizeof (fcf_hdr_t) ||
210	    ckp->ckp_hdr->fcfh_secsize < sizeof (fcf_sec_t)) {
211		return (fmd_ckpt_inval(ckp,
212		    "bad header and/or section size\n"));
213	}
214
215	seclen = (uint64_t)ckp->ckp_hdr->fcfh_secnum *
216	    (uint64_t)ckp->ckp_hdr->fcfh_secsize;
217
218	if (ckp->ckp_hdr->fcfh_secoff > ckp->ckp_size ||
219	    seclen > ckp->ckp_size ||
220	    ckp->ckp_hdr->fcfh_secoff + seclen > ckp->ckp_size ||
221	    ckp->ckp_hdr->fcfh_secoff + seclen < ckp->ckp_hdr->fcfh_secoff)
222		return (fmd_ckpt_inval(ckp, "truncated section headers\n"));
223
224	if (!IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secoff, sizeof (uint64_t)) ||
225	    !IS_P2ALIGNED(ckp->ckp_hdr->fcfh_secsize, sizeof (uint64_t)))
226		return (fmd_ckpt_inval(ckp, "misaligned section headers\n"));
227
228	/*
229	 * Once the header is validated, iterate over the section headers
230	 * ensuring that each one is valid w.r.t. offset, alignment, and size.
231	 * We also pick up the string table pointer during this pass.
232	 */
233	ckp->ckp_secp = (void *)(ckp->ckp_buf + ckp->ckp_hdr->fcfh_secoff);
234	ckp->ckp_secs = ckp->ckp_hdr->fcfh_secnum;
235
236	for (i = 0; i < ckp->ckp_secs; i++) {
237		fcf_sec_t *sp = (void *)(ckp->ckp_buf +
238		    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
239
240		const fmd_ckpt_desc_t *dp = &_fmd_ckpt_sections[sp->fcfs_type];
241
242		if (sp->fcfs_flags != 0) {
243			return (fmd_ckpt_inval(ckp, "section %u has invalid "
244			    "section flags (0x%x)\n", i, sp->fcfs_flags));
245		}
246
247		if (sp->fcfs_align & (sp->fcfs_align - 1)) {
248			return (fmd_ckpt_inval(ckp, "section %u has invalid "
249			    "alignment (%u)\n", i, sp->fcfs_align));
250		}
251
252		if (sp->fcfs_offset & (sp->fcfs_align - 1)) {
253			return (fmd_ckpt_inval(ckp, "section %u is not properly"
254			    " aligned (offset %llu)\n", i, sp->fcfs_offset));
255		}
256
257		if (sp->fcfs_entsize != 0 &&
258		    (sp->fcfs_entsize & (sp->fcfs_align - 1)) != 0) {
259			return (fmd_ckpt_inval(ckp, "section %u has misaligned "
260			    "entsize %u\n", i, sp->fcfs_entsize));
261		}
262
263		if (sp->fcfs_offset > ckp->ckp_size ||
264		    sp->fcfs_size > ckp->ckp_size ||
265		    sp->fcfs_offset + sp->fcfs_size > ckp->ckp_size ||
266		    sp->fcfs_offset + sp->fcfs_size < sp->fcfs_offset) {
267			return (fmd_ckpt_inval(ckp, "section %u has corrupt "
268			    "size or offset\n", i));
269		}
270
271		if (sp->fcfs_type >= sizeof (_fmd_ckpt_sections) /
272		    sizeof (_fmd_ckpt_sections[0])) {
273			return (fmd_ckpt_inval(ckp, "section %u has unknown "
274			    "section type %u\n", i, sp->fcfs_type));
275		}
276
277		if (sp->fcfs_align != dp->secd_align) {
278			return (fmd_ckpt_inval(ckp, "section %u has align %u "
279			    "(not %u)\n", i, sp->fcfs_align, dp->secd_align));
280		}
281
282		if (sp->fcfs_size < dp->secd_size ||
283		    sp->fcfs_entsize < dp->secd_entsize) {
284			return (fmd_ckpt_inval(ckp, "section %u has short "
285			    "size or entsize\n", i));
286		}
287
288		switch (sp->fcfs_type) {
289		case FCF_SECT_STRTAB:
290			if (ckp->ckp_strs != NULL) {
291				return (fmd_ckpt_inval(ckp, "multiple string "
292				    "tables are present in checkpoint file\n"));
293			}
294
295			ckp->ckp_strs = (char *)ckp->ckp_buf + sp->fcfs_offset;
296			ckp->ckp_strn = sp->fcfs_size;
297
298			if (ckp->ckp_strs[ckp->ckp_strn - 1] != '\0') {
299				return (fmd_ckpt_inval(ckp, "string table %u "
300				    "is missing terminating nul byte\n", i));
301			}
302			break;
303
304		case FCF_SECT_MODULE:
305			if (ckp->ckp_modp != NULL) {
306				return (fmd_ckpt_inval(ckp, "multiple module "
307				    "sects are present in checkpoint file\n"));
308			}
309			ckp->ckp_modp = sp;
310			break;
311		}
312	}
313
314	/*
315	 * Ensure that the first section is an empty one of type FCF_SECT_NONE.
316	 * This is done to ensure that links can use index 0 as a null section.
317	 */
318	if (ckp->ckp_secs == 0 || ckp->ckp_secp->fcfs_type != FCF_SECT_NONE ||
319	    ckp->ckp_secp->fcfs_entsize != 0 || ckp->ckp_secp->fcfs_size != 0) {
320		return (fmd_ckpt_inval(ckp, "section 0 is not of the "
321		    "appropriate size and/or attributes (SECT_NONE)\n"));
322	}
323
324	if (ckp->ckp_modp == NULL) {
325		return (fmd_ckpt_inval(ckp,
326		    "no module section found in file\n"));
327	}
328
329	return (0);
330}
331
332static void
333fmd_ckpt_destroy(fmd_ckpt_t *ckp)
334{
335	if (ckp->ckp_buf != NULL)
336		fmd_free(ckp->ckp_buf, ckp->ckp_size);
337	if (ckp->ckp_fd >= 0)
338		(void) close(ckp->ckp_fd);
339}
340
341/*
342 * fmd_ckpt_error() is used as a wrapper around fmd_error() for ckpt routines.
343 * It calls fmd_module_unlock() on behalf of its caller, logs the error, and
344 * then aborts the API call and the surrounding module entry point by doing an
345 * fmd_module_abort(), which longjmps to the place where we entered the module.
346 * Depending on the type of error and conf settings, we will reset or fail.
347 */
348/*PRINTFLIKE3*/
349static void
350fmd_ckpt_error(fmd_ckpt_t *ckp, int err, const char *format, ...)
351{
352	fmd_module_t *mp = ckp->ckp_mp;
353	va_list ap;
354
355	va_start(ap, format);
356	fmd_verror(err, format, ap);
357	va_end(ap);
358
359	if (fmd_module_locked(mp))
360		fmd_module_unlock(mp);
361
362	fmd_ckpt_destroy(ckp);
363	fmd_module_abort(mp, err);
364}
365
366static fcf_secidx_t
367fmd_ckpt_section(fmd_ckpt_t *ckp, const void *data, uint_t type, uint64_t size)
368{
369	const fmd_ckpt_desc_t *dp;
370
371	ASSERT(type < sizeof (_fmd_ckpt_sections) / sizeof (fmd_ckpt_desc_t));
372	dp = &_fmd_ckpt_sections[type];
373
374	ckp->ckp_ptr = (uchar_t *)
375	    P2ROUNDUP((uintptr_t)ckp->ckp_ptr, dp->secd_align);
376
377	ckp->ckp_secp->fcfs_type = type;
378	ckp->ckp_secp->fcfs_align = dp->secd_align;
379	ckp->ckp_secp->fcfs_flags = 0;
380	ckp->ckp_secp->fcfs_entsize = dp->secd_entsize;
381	ckp->ckp_secp->fcfs_offset = (size_t)(ckp->ckp_ptr - ckp->ckp_buf);
382	ckp->ckp_secp->fcfs_size = size;
383
384	/*
385	 * If the data pointer is non-NULL, copy the data to our buffer; else
386	 * the caller is responsible for doing so and updating ckp->ckp_ptr.
387	 */
388	if (data != NULL) {
389		bcopy(data, ckp->ckp_ptr, size);
390		ckp->ckp_ptr += size;
391	}
392
393	ckp->ckp_secp++;
394	return (ckp->ckp_secs++);
395}
396
397static fcf_stridx_t
398fmd_ckpt_string(fmd_ckpt_t *ckp, const char *s)
399{
400	fcf_stridx_t idx = (fcf_stridx_t)(ckp->ckp_strp - ckp->ckp_strs);
401
402	(void) strcpy(ckp->ckp_strp, s);
403	ckp->ckp_strp += strlen(s) + 1;
404
405	return (idx);
406}
407
408static int
409fmd_ckpt_alloc(fmd_ckpt_t *ckp, uint64_t gen)
410{
411	/*
412	 * We've added up all the sections by now: add two more for SECT_NONE
413	 * and SECT_STRTAB, and add the size of the section header table and
414	 * string table to the total size.  We know that the fcf_hdr_t is
415	 * aligned so that that fcf_sec_t's can follow it, and that fcf_sec_t
416	 * is aligned so that any section can follow it, so no extra padding
417	 * bytes need to be allocated between any of these items.
418	 */
419	ckp->ckp_secs += 2; /* for FCF_SECT_NONE and FCF_SECT_STRTAB */
420	ckp->ckp_size += sizeof (fcf_sec_t) * ckp->ckp_secs;
421	ckp->ckp_size += ckp->ckp_strn;
422
423	TRACE((FMD_DBG_CKPT, "alloc fcf buf size %u", ckp->ckp_size));
424	ckp->ckp_buf = fmd_zalloc(ckp->ckp_size, FMD_NOSLEEP);
425
426	if (ckp->ckp_buf == NULL)
427		return (-1); /* errno is set for us */
428
429	ckp->ckp_hdr = (void *)ckp->ckp_buf;
430
431	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG0] = FCF_MAG_MAG0;
432	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG1] = FCF_MAG_MAG1;
433	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG2] = FCF_MAG_MAG2;
434	ckp->ckp_hdr->fcfh_ident[FCF_ID_MAG3] = FCF_MAG_MAG3;
435	ckp->ckp_hdr->fcfh_ident[FCF_ID_MODEL] = FCF_MODEL_NATIVE;
436	ckp->ckp_hdr->fcfh_ident[FCF_ID_ENCODING] = FCF_ENCODE_NATIVE;
437	ckp->ckp_hdr->fcfh_ident[FCF_ID_VERSION] = FCF_VERSION;
438
439	ckp->ckp_hdr->fcfh_hdrsize = sizeof (fcf_hdr_t);
440	ckp->ckp_hdr->fcfh_secsize = sizeof (fcf_sec_t);
441	ckp->ckp_hdr->fcfh_secnum = ckp->ckp_secs;
442	ckp->ckp_hdr->fcfh_secoff = sizeof (fcf_hdr_t);
443	ckp->ckp_hdr->fcfh_filesz = ckp->ckp_size;
444	ckp->ckp_hdr->fcfh_cgen = gen;
445
446	ckp->ckp_secs = 0; /* reset section counter for second pass */
447	ckp->ckp_secp = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
448	ckp->ckp_strs = (char *)ckp->ckp_buf + ckp->ckp_size - ckp->ckp_strn;
449	ckp->ckp_strp = ckp->ckp_strs + 1; /* use first byte as \0 */
450	ckp->ckp_ptr = (uchar_t *)(ckp->ckp_secp + ckp->ckp_hdr->fcfh_secnum);
451
452	(void) fmd_ckpt_section(ckp, NULL, FCF_SECT_NONE, 0);
453	return (0);
454}
455
456static int
457fmd_ckpt_commit(fmd_ckpt_t *ckp)
458{
459	fcf_sec_t *secbase = (void *)(ckp->ckp_buf + sizeof (fcf_hdr_t));
460	size_t stroff = ckp->ckp_size - ckp->ckp_strn;
461
462	/*
463	 * Before committing the checkpoint, we assert that fmd_ckpt_t's sizes
464	 * and current pointer locations all add up appropriately.  Any ASSERTs
465	 * which trip here likely indicate an inconsistency in the code for the
466	 * reservation pass and the buffer update pass of the FCF subroutines.
467	 */
468	ASSERT((size_t)(ckp->ckp_ptr - ckp->ckp_buf) == stroff);
469	(void) fmd_ckpt_section(ckp, NULL, FCF_SECT_STRTAB, ckp->ckp_strn);
470	ckp->ckp_ptr += ckp->ckp_strn; /* string table is already filled in */
471
472	ASSERT(ckp->ckp_secs == ckp->ckp_hdr->fcfh_secnum);
473	ASSERT(ckp->ckp_secp == secbase + ckp->ckp_hdr->fcfh_secnum);
474	ASSERT(ckp->ckp_ptr == ckp->ckp_buf + ckp->ckp_hdr->fcfh_filesz);
475
476	if (write(ckp->ckp_fd, ckp->ckp_buf, ckp->ckp_size) != ckp->ckp_size ||
477	    fsync(ckp->ckp_fd) != 0 || close(ckp->ckp_fd) != 0)
478		return (-1); /* errno is set for us */
479
480	ckp->ckp_fd = -1; /* fd is now closed */
481	return (rename(ckp->ckp_src, ckp->ckp_dst) != 0);
482}
483
484static void
485fmd_ckpt_resv(fmd_ckpt_t *ckp, size_t size, size_t align)
486{
487	if (size != 0) {
488		ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, align) + size;
489		ckp->ckp_secs++;
490	}
491}
492
493static void
494fmd_ckpt_resv_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
495{
496	ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, _MAX_ALIGNMENT) + bp->buf_size;
497	ckp->ckp_strn += strlen(bp->buf_name) + 1;
498	ckp->ckp_secs++;
499}
500
501static void
502fmd_ckpt_save_buf(fmd_buf_t *bp, fmd_ckpt_t *ckp)
503{
504	fcf_buf_t *fcfb = ckp->ckp_arg;
505
506	fcfb->fcfb_name = fmd_ckpt_string(ckp, bp->buf_name);
507	fcfb->fcfb_data = fmd_ckpt_section(ckp,
508	    bp->buf_data, FCF_SECT_BUFFER, bp->buf_size);
509
510	ckp->ckp_arg = fcfb + 1;
511}
512
513static void
514fmd_ckpt_save_event(fmd_ckpt_t *ckp, fmd_event_t *e)
515{
516	fcf_event_t *fcfe = (void *)ckp->ckp_ptr;
517	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
518	fmd_log_t *lp = ep->ev_log;
519
520	fcfe->fcfe_todsec = ep->ev_time.ftv_sec;
521	fcfe->fcfe_todnsec = ep->ev_time.ftv_nsec;
522	fcfe->fcfe_major = lp ? major(lp->log_stat.st_dev) : -1U;
523	fcfe->fcfe_minor = lp ? minor(lp->log_stat.st_dev) : -1U;
524	fcfe->fcfe_inode = lp ? lp->log_stat.st_ino : -1ULL;
525	fcfe->fcfe_offset = ep->ev_off;
526
527	ckp->ckp_ptr += sizeof (fcf_event_t);
528}
529
530static void
531fmd_ckpt_save_nvlist(fmd_ckpt_t *ckp, nvlist_t *nvl)
532{
533	fcf_nvl_t *fcfn = (void *)ckp->ckp_ptr;
534	char *nvbuf = (char *)ckp->ckp_ptr + sizeof (fcf_nvl_t);
535	size_t nvsize = 0;
536
537	(void) nvlist_size(nvl, &nvsize, NV_ENCODE_NATIVE);
538	fcfn->fcfn_size = (uint64_t)nvsize;
539
540	(void) nvlist_pack(nvl, &nvbuf, &nvsize, NV_ENCODE_NATIVE, 0);
541	ckp->ckp_ptr += sizeof (fcf_nvl_t) + nvsize;
542
543	ckp->ckp_ptr = (uchar_t *)
544	    P2ROUNDUP((uintptr_t)ckp->ckp_ptr, sizeof (uint64_t));
545}
546
547static void
548fmd_ckpt_resv_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
549{
550	fmd_ckpt_resv(ckp,
551	    sizeof (fcf_event_t) * sgp->sg_count, sizeof (uint64_t));
552
553	ckp->ckp_strn += strlen(sgp->sg_name) + 1;
554}
555
556static void
557fmd_ckpt_save_serd(fmd_serd_eng_t *sgp, fmd_ckpt_t *ckp)
558{
559	fcf_serd_t *fcfd = ckp->ckp_arg;
560	fcf_secidx_t evsec = FCF_SECT_NONE;
561	fmd_serd_elem_t *sep;
562
563	if (sgp->sg_count != 0) {
564		evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
565		    sizeof (fcf_event_t) * sgp->sg_count);
566
567		for (sep = fmd_list_next(&sgp->sg_list);
568		    sep != NULL; sep = fmd_list_next(sep))
569			fmd_ckpt_save_event(ckp, sep->se_event);
570	}
571
572	fcfd->fcfd_name = fmd_ckpt_string(ckp, sgp->sg_name);
573	fcfd->fcfd_events = evsec;
574	fcfd->fcfd_pad = 0;
575	fcfd->fcfd_n = sgp->sg_n;
576	fcfd->fcfd_t = sgp->sg_t;
577
578	ckp->ckp_arg = fcfd + 1;
579}
580
581static void
582fmd_ckpt_resv_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
583{
584	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
585	fmd_case_susp_t *cis;
586	uint_t n;
587
588	if (cip->ci_xprt != NULL)
589		return; /* do not checkpoint cases from remote transports */
590
591	n = fmd_buf_hash_count(&cip->ci_bufs);
592	fmd_buf_hash_apply(&cip->ci_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
593	fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
594
595	if (cip->ci_principal != NULL)
596		fmd_ckpt_resv(ckp, sizeof (fcf_event_t), sizeof (uint64_t));
597
598	fmd_ckpt_resv(ckp,
599	    sizeof (fcf_event_t) * cip->ci_nitems, sizeof (uint64_t));
600
601	if (cip->ci_nsuspects != 0)
602		ckp->ckp_size = P2ROUNDUP(ckp->ckp_size, sizeof (uint64_t));
603
604	cip->ci_nvsz = 0; /* compute size of packed suspect nvlist array */
605
606	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
607		size_t nvsize = 0;
608
609		(void) nvlist_size(cis->cis_nvl, &nvsize, NV_ENCODE_NATIVE);
610		cip->ci_nvsz += sizeof (fcf_nvl_t) + nvsize;
611		cip->ci_nvsz = P2ROUNDUP(cip->ci_nvsz, sizeof (uint64_t));
612	}
613
614	fmd_ckpt_resv(ckp, cip->ci_nvsz, sizeof (uint64_t));
615	fmd_ckpt_resv(ckp, sizeof (fcf_case_t), sizeof (uint32_t));
616	ckp->ckp_strn += strlen(cip->ci_uuid) + 1;
617}
618
619static void
620fmd_ckpt_save_case(fmd_ckpt_t *ckp, fmd_case_t *cp)
621{
622	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
623
624	fmd_case_item_t *cit;
625	fmd_case_susp_t *cis;
626	fcf_case_t fcfc;
627	uint_t n;
628
629	fcf_secidx_t bufsec = FCF_SECIDX_NONE;
630	fcf_secidx_t evsec = FCF_SECIDX_NONE;
631	fcf_secidx_t nvsec = FCF_SECIDX_NONE;
632	fcf_secidx_t prsec = FCF_SECIDX_NONE;
633
634	if (cip->ci_xprt != NULL)
635		return; /* do not checkpoint cases from remote transports */
636
637	if ((n = fmd_buf_hash_count(&cip->ci_bufs)) != 0) {
638		size_t size = sizeof (fcf_buf_t) * n;
639		fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
640
641		fmd_buf_hash_apply(&cip->ci_bufs,
642		    (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
643
644		bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
645		fmd_free(bufs, size);
646	}
647
648	if (cip->ci_principal != NULL) {
649		prsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
650		    sizeof (fcf_event_t));
651
652		fmd_ckpt_save_event(ckp, cip->ci_principal);
653	}
654
655	if (cip->ci_nitems != 0) {
656		evsec = fmd_ckpt_section(ckp, NULL, FCF_SECT_EVENTS,
657		    sizeof (fcf_event_t) * cip->ci_nitems);
658
659		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
660			fmd_ckpt_save_event(ckp, cit->cit_event);
661	}
662
663	if (cip->ci_nsuspects != 0) {
664		nvsec = fmd_ckpt_section(ckp, NULL,
665		    FCF_SECT_NVLISTS, cip->ci_nvsz);
666
667		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
668			fmd_ckpt_save_nvlist(ckp, cis->cis_nvl);
669	}
670
671	fcfc.fcfc_uuid = fmd_ckpt_string(ckp, cip->ci_uuid);
672	fcfc.fcfc_bufs = bufsec;
673	fcfc.fcfc_principal = prsec;
674	fcfc.fcfc_events = evsec;
675	fcfc.fcfc_suspects = nvsec;
676
677	switch (cip->ci_state) {
678	case FMD_CASE_UNSOLVED:
679		fcfc.fcfc_state = FCF_CASE_UNSOLVED;
680		break;
681	case FMD_CASE_SOLVED:
682		fcfc.fcfc_state = FCF_CASE_SOLVED;
683		break;
684	case FMD_CASE_CLOSE_WAIT:
685		fcfc.fcfc_state = FCF_CASE_CLOSE_WAIT;
686		break;
687	default:
688		fmd_panic("case %p (%s) has invalid state %u",
689		    (void *)cp, cip->ci_uuid, cip->ci_state);
690	}
691
692	(void) fmd_ckpt_section(ckp, &fcfc, FCF_SECT_CASE, sizeof (fcf_case_t));
693}
694
695static void
696fmd_ckpt_resv_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
697{
698	fmd_case_t *cp;
699	uint_t n;
700
701	for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
702		fmd_ckpt_resv_case(ckp, cp);
703
704	n = fmd_serd_hash_count(&mp->mod_serds);
705	fmd_serd_hash_apply(&mp->mod_serds,
706	    (fmd_serd_eng_f *)fmd_ckpt_resv_serd, ckp);
707	fmd_ckpt_resv(ckp, sizeof (fcf_serd_t) * n, sizeof (uint64_t));
708
709	n = fmd_buf_hash_count(&mp->mod_bufs);
710	fmd_buf_hash_apply(&mp->mod_bufs, (fmd_buf_f *)fmd_ckpt_resv_buf, ckp);
711	fmd_ckpt_resv(ckp, sizeof (fcf_buf_t) * n, sizeof (uint32_t));
712
713	fmd_ckpt_resv(ckp, sizeof (fcf_module_t), sizeof (uint32_t));
714	ckp->ckp_strn += strlen(mp->mod_name) + 1;
715	ckp->ckp_strn += strlen(mp->mod_path) + 1;
716	ckp->ckp_strn += strlen(mp->mod_info->fmdi_desc) + 1;
717	ckp->ckp_strn += strlen(mp->mod_info->fmdi_vers) + 1;
718}
719
720static void
721fmd_ckpt_save_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
722{
723	fcf_secidx_t bufsec = FCF_SECIDX_NONE;
724	fcf_module_t fcfm;
725	fmd_case_t *cp;
726	uint_t n;
727
728	for (cp = fmd_list_next(&mp->mod_cases); cp; cp = fmd_list_next(cp))
729		fmd_ckpt_save_case(ckp, cp);
730
731	if ((n = fmd_serd_hash_count(&mp->mod_serds)) != 0) {
732		size_t size = sizeof (fcf_serd_t) * n;
733		fcf_serd_t *serds = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
734
735		fmd_serd_hash_apply(&mp->mod_serds,
736		    (fmd_serd_eng_f *)fmd_ckpt_save_serd, ckp);
737
738		(void) fmd_ckpt_section(ckp, serds, FCF_SECT_SERD, size);
739		fmd_free(serds, size);
740	}
741
742	if ((n = fmd_buf_hash_count(&mp->mod_bufs)) != 0) {
743		size_t size = sizeof (fcf_buf_t) * n;
744		fcf_buf_t *bufs = ckp->ckp_arg = fmd_alloc(size, FMD_SLEEP);
745
746		fmd_buf_hash_apply(&mp->mod_bufs,
747		    (fmd_buf_f *)fmd_ckpt_save_buf, ckp);
748
749		bufsec = fmd_ckpt_section(ckp, bufs, FCF_SECT_BUFS, size);
750		fmd_free(bufs, size);
751	}
752
753	fcfm.fcfm_name = fmd_ckpt_string(ckp, mp->mod_name);
754	fcfm.fcfm_path = fmd_ckpt_string(ckp, mp->mod_path);
755	fcfm.fcfm_desc = fmd_ckpt_string(ckp, mp->mod_info->fmdi_desc);
756	fcfm.fcfm_vers = fmd_ckpt_string(ckp, mp->mod_info->fmdi_vers);
757	fcfm.fcfm_bufs = bufsec;
758
759	(void) fmd_ckpt_section(ckp, &fcfm,
760	    FCF_SECT_MODULE, sizeof (fcf_module_t));
761}
762
763void
764fmd_ckpt_save(fmd_module_t *mp)
765{
766	struct stat64 st;
767	char path[PATH_MAX];
768	mode_t dirmode;
769
770	hrtime_t now = gethrtime();
771	fmd_ckpt_t ckp;
772	int err;
773
774	ASSERT(fmd_module_locked(mp));
775
776	/*
777	 * If checkpointing is disabled for the module, just return.  We must
778	 * commit the module state anyway to transition pending log events.
779	 */
780	if (mp->mod_stats->ms_ckpt_save.fmds_value.bool == FMD_B_FALSE) {
781		fmd_module_commit(mp);
782		return;
783	}
784
785	if (!(mp->mod_flags & (FMD_MOD_MDIRTY | FMD_MOD_CDIRTY)))
786		return; /* no checkpoint is necessary for this module */
787
788	TRACE((FMD_DBG_CKPT, "ckpt save begin %s %llu",
789	    mp->mod_name, mp->mod_gen + 1));
790
791	/*
792	 * If the per-module checkpoint directory isn't found or isn't of type
793	 * directory, move aside whatever is there (if anything) and attempt
794	 * to mkdir(2) a new module checkpoint directory.  If this fails, we
795	 * have no choice but to abort the checkpoint and try again later.
796	 */
797	if (stat64(mp->mod_ckpt, &st) != 0 || !S_ISDIR(st.st_mode)) {
798		(void) snprintf(path, sizeof (path), "%s-", mp->mod_ckpt);
799		(void) rename(mp->mod_ckpt, path);
800		(void) fmd_conf_getprop(fmd.d_conf, "ckpt.dirmode", &dirmode);
801
802		if (mkdir(mp->mod_ckpt, dirmode) != 0) {
803			fmd_error(EFMD_CKPT_MKDIR,
804			    "failed to mkdir %s", mp->mod_ckpt);
805			return; /* return without clearing dirty bits */
806		}
807	}
808
809	/*
810	 * Create a temporary file to write out the checkpoint into, and create
811	 * a fmd_ckpt_t structure to manage construction of the checkpoint.  We
812	 * then figure out how much space will be required, and allocate it.
813	 */
814	if (fmd_ckpt_create(&ckp, mp) == -1) {
815		fmd_error(EFMD_CKPT_CREATE, "failed to create %s", ckp.ckp_src);
816		return;
817	}
818
819	fmd_ckpt_resv_module(&ckp, mp);
820
821	if (fmd_ckpt_alloc(&ckp, mp->mod_gen + 1) != 0) {
822		fmd_error(EFMD_CKPT_NOMEM, "failed to build %s", ckp.ckp_src);
823		fmd_ckpt_destroy(&ckp);
824		return;
825	}
826
827	/*
828	 * Fill in the checkpoint content, write it to disk, sync it, and then
829	 * atomically rename it to the destination path.  If this fails, we
830	 * have no choice but to leave all our dirty bits set and return.
831	 */
832	fmd_ckpt_save_module(&ckp, mp);
833	err = fmd_ckpt_commit(&ckp);
834	fmd_ckpt_destroy(&ckp);
835
836	if (err != 0) {
837		fmd_error(EFMD_CKPT_COMMIT, "failed to commit %s", ckp.ckp_dst);
838		return; /* return without clearing dirty bits */
839	}
840
841	fmd_module_commit(mp);
842	TRACE((FMD_DBG_CKPT, "ckpt save end %s", mp->mod_name));
843
844	mp->mod_stats->ms_ckpt_cnt.fmds_value.ui64++;
845	mp->mod_stats->ms_ckpt_time.fmds_value.ui64 += gethrtime() - now;
846
847	fmd_dprintf(FMD_DBG_CKPT, "saved checkpoint of %s (%llu)\n",
848	    mp->mod_name, mp->mod_gen);
849}
850
851/*
852 * Utility function to retrieve a pointer to a section's header and verify that
853 * it is of the expected type or it is a FCF_SECT_NONE reference.
854 */
855static const fcf_sec_t *
856fmd_ckpt_secptr(fmd_ckpt_t *ckp, fcf_secidx_t sid, uint_t type)
857{
858	const fcf_sec_t *sp = (void *)(ckp->ckp_buf +
859	    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * sid);
860
861	return (sid < ckp->ckp_secs && (sp->fcfs_type == type ||
862	    sp->fcfs_type == FCF_SECT_NONE) ? sp : NULL);
863}
864
865/*
866 * Utility function to retrieve the data pointer for a particular section.  The
867 * validity of the header values has already been checked by fmd_ckpt_open().
868 */
869static const void *
870fmd_ckpt_dataptr(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
871{
872	return (ckp->ckp_buf + sp->fcfs_offset);
873}
874
875/*
876 * Utility function to retrieve the end of the data region for a particular
877 * section.  The validity of this value has been confirmed by fmd_ckpt_open().
878 */
879static const void *
880fmd_ckpt_datalim(fmd_ckpt_t *ckp, const fcf_sec_t *sp)
881{
882	return (ckp->ckp_buf + sp->fcfs_offset + sp->fcfs_size);
883}
884
885/*
886 * Utility function to retrieve a string pointer (fcf_stridx_t).  If the string
887 * index is valid, the string data is returned; otherwise 'defstr' is returned.
888 */
889static const char *
890fmd_ckpt_strptr(fmd_ckpt_t *ckp, fcf_stridx_t sid, const char *defstr)
891{
892	return (sid < ckp->ckp_strn ? ckp->ckp_strs + sid : defstr);
893}
894
895static void
896fmd_ckpt_restore_events(fmd_ckpt_t *ckp, fcf_secidx_t sid,
897    int (*func)(void *, fmd_event_t *), void *arg)
898{
899	const fcf_event_t *fcfe;
900	const fcf_sec_t *sp;
901	fmd_timeval_t ftv;
902	fmd_log_t *lp, *errlp;
903	uint_t i, n;
904	uint32_t e_maj, e_min;
905	uint64_t e_ino;
906
907	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_EVENTS)) == NULL) {
908		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
909		    "invalid link to section %u: expected events\n", sid);
910	}
911
912	if (sp->fcfs_size == 0)
913		return; /* empty events section or type none */
914
915	fcfe = fmd_ckpt_dataptr(ckp, sp);
916	n = sp->fcfs_size / sp->fcfs_entsize;
917
918	/*
919	 * Hold the reader lock on log pointers to block log rotation during
920	 * the section restore so that we can safely insert refs to d_errlog.
921	 */
922	(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
923	errlp = fmd.d_errlog;
924
925	e_maj = major(errlp->log_stat.st_dev);
926	e_min = minor(errlp->log_stat.st_dev);
927	e_ino = errlp->log_stat.st_ino;
928
929	for (i = 0; i < n; i++) {
930		fmd_event_t *ep;
931
932		ftv.ftv_sec = fcfe->fcfe_todsec;
933		ftv.ftv_nsec = fcfe->fcfe_todnsec;
934
935		if (e_ino == fcfe->fcfe_inode &&
936		    e_maj == fcfe->fcfe_major &&
937		    e_min == fcfe->fcfe_minor)
938			lp = errlp;
939		else
940			lp = NULL;
941
942		ep = fmd_event_recreate(FMD_EVT_PROTOCOL,
943		    &ftv, NULL, NULL, lp, fcfe->fcfe_offset, 0);
944		fmd_event_hold(ep);
945		(void) func(arg, ep);
946		fmd_event_rele(ep);
947
948		fcfe = (fcf_event_t *)((uintptr_t)fcfe + sp->fcfs_entsize);
949	}
950
951	(void) pthread_rwlock_unlock(&fmd.d_log_lock);
952}
953
954static int
955fmd_ckpt_restore_suspects(fmd_ckpt_t *ckp, fmd_case_t *cp, fcf_secidx_t sid)
956{
957	const fcf_nvl_t *fcfn, *endn;
958	const fcf_sec_t *sp;
959	nvlist_t *nvl;
960	int err, i;
961
962	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_NVLISTS)) == NULL) {
963		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
964		    "invalid link to section %u: expected nvlists\n", sid);
965	}
966
967	fcfn = fmd_ckpt_dataptr(ckp, sp);
968	endn = fmd_ckpt_datalim(ckp, sp);
969
970	for (i = 0; fcfn < endn; i++) {
971		char *data = (char *)fcfn + sp->fcfs_entsize;
972		size_t size = (size_t)fcfn->fcfn_size;
973
974		if (fcfn->fcfn_size > (size_t)((char *)endn - data)) {
975			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "nvlist %u [%d] "
976			    "size %u exceeds buffer\n", sid, i, size);
977		}
978
979		if ((err = nvlist_xunpack(data, size, &nvl, &fmd.d_nva)) != 0) {
980			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "failed to "
981			    "unpack nvlist %u [%d]: %s\n", sid, i,
982			    fmd_strerror(err));
983		}
984
985		fmd_case_insert_suspect(cp, nvl);
986
987		size = sp->fcfs_entsize + fcfn->fcfn_size;
988		size = P2ROUNDUP(size, sizeof (uint64_t));
989		fcfn = (fcf_nvl_t *)((uintptr_t)fcfn + size);
990	}
991
992	return (i);
993}
994
995static void
996fmd_ckpt_restore_bufs(fmd_ckpt_t *ckp, fmd_module_t *mp,
997    fmd_case_t *cp, fcf_secidx_t sid)
998{
999	const fcf_sec_t *sp, *dsp;
1000	const fcf_buf_t *fcfb;
1001	uint_t i, n;
1002
1003	if ((sp = fmd_ckpt_secptr(ckp, sid, FCF_SECT_BUFS)) == NULL) {
1004		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1005		    "invalid link to section %u: expected bufs\n", sid);
1006	}
1007
1008	if (sp->fcfs_size == 0)
1009		return; /* empty events section or type none */
1010
1011	fcfb = fmd_ckpt_dataptr(ckp, sp);
1012	n = sp->fcfs_size / sp->fcfs_entsize;
1013
1014	for (i = 0; i < n; i++) {
1015		dsp = fmd_ckpt_secptr(ckp, fcfb->fcfb_data, FCF_SECT_BUFFER);
1016
1017		if (dsp == NULL) {
1018			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "invalid %u "
1019			    "buffer link %u\n", sid, fcfb->fcfb_data);
1020		}
1021
1022		fmd_buf_write((fmd_hdl_t *)mp, cp,
1023		    fmd_ckpt_strptr(ckp, fcfb->fcfb_name, "<CORRUPT>"),
1024		    ckp->ckp_buf + dsp->fcfs_offset, dsp->fcfs_size);
1025
1026		fcfb = (fcf_buf_t *)((uintptr_t)fcfb + sp->fcfs_entsize);
1027	}
1028}
1029
1030static void
1031fmd_ckpt_restore_case(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
1032{
1033	const fcf_case_t *fcfc = fmd_ckpt_dataptr(ckp, sp);
1034	const char *uuid = fmd_ckpt_strptr(ckp, fcfc->fcfc_uuid, NULL);
1035	fmd_case_t *cp;
1036	int n;
1037
1038	if (uuid == NULL || fcfc->fcfc_state > FCF_CASE_CLOSE_WAIT) {
1039		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL, "corrupt %u case uuid "
1040		    "and/or state\n", (uint_t)(sp - ckp->ckp_secp));
1041	}
1042
1043	fmd_module_lock(mp);
1044
1045	if ((cp = fmd_case_recreate(mp, NULL,
1046	    fcfc->fcfc_state != FCF_CASE_UNSOLVED ? FCF_CASE_SOLVED :
1047	    FMD_CASE_UNSOLVED, uuid, NULL)) == NULL) {
1048		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1049		    "duplicate case uuid: %s\n", uuid);
1050	}
1051
1052	fmd_ckpt_restore_events(ckp, fcfc->fcfc_principal,
1053	    fmd_case_insert_principal, cp);
1054
1055	fmd_ckpt_restore_events(ckp, fcfc->fcfc_events,
1056	    fmd_case_insert_event, cp);
1057
1058	/*
1059	 * Once solved, treat suspects from resource cache as master copy.
1060	 *
1061	 * If !fmd.d_running, this module must be a builtin, and so we don't
1062	 * want to restore suspects or call fmd_case_transition_update() at this
1063	 * stage. The suspects will be added later from the resource cache.
1064	 * Calling fmd_case_transition("SOLVED") is OK here as the state is
1065	 * already solved, so all it does is update the case flags.
1066	 */
1067	if (fmd.d_running && (n = ((fmd_case_impl_t *)cp)->ci_nsuspects) == 0)
1068		n = fmd_ckpt_restore_suspects(ckp, cp, fcfc->fcfc_suspects);
1069
1070	if (!fmd.d_running)
1071		fmd_case_transition(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
1072	else if (fcfc->fcfc_state == FCF_CASE_SOLVED)
1073		fmd_case_transition_update(cp, FMD_CASE_SOLVED, FMD_CF_SOLVED);
1074	else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n != 0)
1075		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_SOLVED);
1076	else if (fcfc->fcfc_state == FCF_CASE_CLOSE_WAIT && n == 0)
1077		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
1078
1079	fmd_module_unlock(mp);
1080	fmd_ckpt_restore_bufs(ckp, mp, cp, fcfc->fcfc_bufs);
1081}
1082
1083static void
1084fmd_ckpt_restore_serd(fmd_ckpt_t *ckp, fmd_module_t *mp, const fcf_sec_t *sp)
1085{
1086	const fcf_serd_t *fcfd = fmd_ckpt_dataptr(ckp, sp);
1087	uint_t i, n = sp->fcfs_size / sp->fcfs_entsize;
1088	const fcf_sec_t *esp;
1089	const char *s;
1090
1091	for (i = 0; i < n; i++) {
1092		esp = fmd_ckpt_secptr(ckp, fcfd->fcfd_events, FCF_SECT_EVENTS);
1093
1094		if (esp == NULL) {
1095			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1096			    "invalid events link %u\n", fcfd->fcfd_events);
1097		}
1098
1099		if ((s = fmd_ckpt_strptr(ckp, fcfd->fcfd_name, NULL)) == NULL) {
1100			fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1101			    "serd name %u is corrupt\n", fcfd->fcfd_name);
1102		}
1103
1104		fmd_serd_create((fmd_hdl_t *)mp, s, fcfd->fcfd_n, fcfd->fcfd_t);
1105		fmd_module_lock(mp);
1106
1107		fmd_ckpt_restore_events(ckp, fcfd->fcfd_events,
1108		    fmd_serd_eng_record,
1109		    fmd_serd_eng_lookup(&mp->mod_serds, s));
1110
1111		fmd_module_unlock(mp);
1112		fcfd = (fcf_serd_t *)((uintptr_t)fcfd + sp->fcfs_entsize);
1113	}
1114}
1115
1116static void
1117fmd_ckpt_restore_module(fmd_ckpt_t *ckp, fmd_module_t *mp)
1118{
1119	const fcf_module_t *fcfm = fmd_ckpt_dataptr(ckp, ckp->ckp_modp);
1120	const fcf_sec_t *sp;
1121	uint_t i;
1122
1123	if (strcmp(mp->mod_name, fmd_ckpt_strptr(ckp, fcfm->fcfm_name, "")) ||
1124	    strcmp(mp->mod_path, fmd_ckpt_strptr(ckp, fcfm->fcfm_path, ""))) {
1125		fmd_ckpt_error(ckp, EFMD_CKPT_INVAL,
1126		    "checkpoint is not for module %s\n", mp->mod_name);
1127	}
1128
1129	for (i = 0; i < ckp->ckp_secs; i++) {
1130		sp = (void *)(ckp->ckp_buf +
1131		    ckp->ckp_hdr->fcfh_secoff + ckp->ckp_hdr->fcfh_secsize * i);
1132
1133		switch (sp->fcfs_type) {
1134		case FCF_SECT_CASE:
1135			fmd_ckpt_restore_case(ckp, mp, sp);
1136			break;
1137		case FCF_SECT_SERD:
1138			fmd_ckpt_restore_serd(ckp, mp, sp);
1139			break;
1140		}
1141	}
1142
1143	fmd_ckpt_restore_bufs(ckp, mp, NULL, fcfm->fcfm_bufs);
1144	mp->mod_gen = ckp->ckp_hdr->fcfh_cgen;
1145}
1146
1147/*
1148 * Restore a checkpoint for the specified module.  Any errors which occur
1149 * during restore will call fmd_ckpt_error() or trigger an fmd_api_error(),
1150 * either of which will automatically unlock the module and trigger an abort.
1151 */
1152void
1153fmd_ckpt_restore(fmd_module_t *mp)
1154{
1155	fmd_ckpt_t ckp;
1156
1157	if (mp->mod_stats->ms_ckpt_restore.fmds_value.bool == FMD_B_FALSE)
1158		return; /* never restore checkpoints for this module */
1159
1160	TRACE((FMD_DBG_CKPT, "ckpt restore begin %s", mp->mod_name));
1161
1162	if (fmd_ckpt_open(&ckp, mp) == -1) {
1163		if (errno != ENOENT)
1164			fmd_error(EFMD_CKPT_OPEN, "can't open %s", ckp.ckp_src);
1165		TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
1166		return;
1167	}
1168
1169	ASSERT(!fmd_module_locked(mp));
1170	fmd_ckpt_restore_module(&ckp, mp);
1171	fmd_ckpt_destroy(&ckp);
1172	fmd_module_clrdirty(mp);
1173
1174	TRACE((FMD_DBG_CKPT, "ckpt restore end %s", mp->mod_name));
1175	fmd_dprintf(FMD_DBG_CKPT, "restored checkpoint of %s\n", mp->mod_name);
1176}
1177
1178/*
1179 * Delete the module's checkpoint file.  This is used by the ckpt.zero property
1180 * code or by the fmadm reset RPC service path to force a checkpoint delete.
1181 */
1182void
1183fmd_ckpt_delete(fmd_module_t *mp)
1184{
1185	char path[PATH_MAX];
1186
1187	(void) snprintf(path, sizeof (path),
1188	    "%s/%s", mp->mod_ckpt, mp->mod_name);
1189
1190	TRACE((FMD_DBG_CKPT, "delete %s ckpt", mp->mod_name));
1191
1192	if (unlink(path) != 0 && errno != ENOENT)
1193		fmd_error(EFMD_CKPT_DELETE, "failed to delete %s", path);
1194}
1195
1196/*
1197 * Move aside the module's checkpoint file if checkpoint restore has failed.
1198 * We rename the file rather than deleting it in the hopes that someone might
1199 * send it to us for post-mortem analysis of whether we have a checkpoint bug.
1200 */
1201void
1202fmd_ckpt_rename(fmd_module_t *mp)
1203{
1204	char src[PATH_MAX], dst[PATH_MAX];
1205
1206	(void) snprintf(src, sizeof (src), "%s/%s", mp->mod_ckpt, mp->mod_name);
1207	(void) snprintf(dst, sizeof (dst), "%s-", src);
1208
1209	TRACE((FMD_DBG_CKPT, "rename %s ckpt", mp->mod_name));
1210
1211	if (rename(src, dst) != 0 && errno != ENOENT)
1212		fmd_error(EFMD_CKPT_DELETE, "failed to rename %s", src);
1213}
1214