1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2015 Joyent, Inc.  All rights reserved.
14  */
15 
16 /*
17  * varpd persistence backend
18  */
19 
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <fcntl.h>
23 #include <stdlib.h>
24 #include <unistd.h>
25 #include <errno.h>
26 #include <strings.h>
27 #include <librename.h>
28 #include <md5.h>
29 #include <sys/sysmacros.h>
30 #include <dirent.h>
31 #include <sys/mman.h>
32 #include <umem.h>
33 #include <sys/debug.h>
34 
35 #include <libvarpd_impl.h>
36 
37 static uint8_t varpd_persist_magic[4] = {
38 	'v',
39 	'a',
40 	'r',
41 	'p',
42 };
43 
44 #define	VARPD_PERSIST_MAXWRITE		4096
45 #define	VARPD_PERSIST_VERSION_ONE	1
46 #define	VARPD_PERSIST_SUFFIX		".varpd"
47 
48 typedef struct varpd_persist_header {
49 	uint8_t		vph_magic[4];
50 	uint32_t	vph_version;
51 	uint8_t		vph_md5[16];
52 } varpd_persist_header_t;
53 
54 void
libvarpd_persist_init(varpd_impl_t * vip)55 libvarpd_persist_init(varpd_impl_t *vip)
56 {
57 	vip->vdi_persistfd = -1;
58 	if (rwlock_init(&vip->vdi_pfdlock, USYNC_THREAD, NULL) != 0)
59 		libvarpd_panic("failed to create rw vdi_pfdlock");
60 }
61 
62 void
libvarpd_persist_fini(varpd_impl_t * vip)63 libvarpd_persist_fini(varpd_impl_t *vip)
64 {
65 	/*
66 	 * Clean up for someone that left something behind.
67 	 */
68 	if (vip->vdi_persistfd != -1) {
69 		if (close(vip->vdi_persistfd) != 0)
70 			libvarpd_panic("failed to close persist fd %d: %d",
71 			    vip->vdi_persistfd, errno);
72 		vip->vdi_persistfd = -1;
73 	}
74 	if (rwlock_destroy(&vip->vdi_pfdlock) != 0)
75 		libvarpd_panic("failed to destroy rw vdi_pfdlock");
76 }
77 
78 int
libvarpd_persist_enable(varpd_handle_t * vhp,const char * rootdir)79 libvarpd_persist_enable(varpd_handle_t *vhp, const char *rootdir)
80 {
81 	int fd;
82 	struct stat st;
83 	varpd_impl_t *vip = (varpd_impl_t *)vhp;
84 
85 	fd = open(rootdir, O_RDONLY);
86 	if (fd < 0)
87 		return (errno);
88 
89 	if (fstat(fd, &st) != 0) {
90 		int ret = errno;
91 		if (close(fd) != 0)
92 			libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
93 			    rootdir, fd, errno);
94 		return (ret);
95 	}
96 
97 	if (!S_ISDIR(st.st_mode)) {
98 		if (close(fd) != 0)
99 			libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
100 			    rootdir, fd, errno);
101 		return (EINVAL);
102 	}
103 
104 
105 	VERIFY0(rw_wrlock(&vip->vdi_pfdlock));
106 	if (vip->vdi_persistfd != -1) {
107 		VERIFY0(rw_unlock(&vip->vdi_pfdlock));
108 		if (close(fd) != 0)
109 			libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
110 			    rootdir, fd, errno);
111 		return (EEXIST);
112 	}
113 	vip->vdi_persistfd = fd;
114 	VERIFY0(rw_unlock(&vip->vdi_pfdlock));
115 
116 	return (0);
117 }
118 
119 static int
libvarpd_persist_write(int fd,const void * buf,size_t buflen)120 libvarpd_persist_write(int fd, const void *buf, size_t buflen)
121 {
122 	ssize_t ret;
123 	off_t off = 0;
124 
125 	while (buflen > 0) {
126 		ret = write(fd, (void *)((uintptr_t)buf + off),
127 		    MIN(buflen, VARPD_PERSIST_MAXWRITE));
128 		if (ret == -1 && errno == EINTR)
129 			continue;
130 		if (ret == -1)
131 			return (errno);
132 
133 		off += ret;
134 		buflen -= ret;
135 	}
136 
137 	return (0);
138 }
139 
140 static int
libvarpd_persist_nvlist(int dirfd,uint64_t id,nvlist_t * nvl)141 libvarpd_persist_nvlist(int dirfd, uint64_t id, nvlist_t *nvl)
142 {
143 	int err, fd;
144 	size_t size;
145 	varpd_persist_header_t hdr;
146 	librename_atomic_t *lrap;
147 	char *buf = NULL, *name;
148 
149 	if ((err = nvlist_pack(nvl, &buf, &size, NV_ENCODE_XDR, 0)) != 0)
150 		return (err);
151 
152 	if (asprintf(&name, "%llu%s", (unsigned long long)id, ".varpd") == -1) {
153 		err = errno;
154 		free(buf);
155 		return (err);
156 	}
157 
158 	if ((err = librename_atomic_fdinit(dirfd, name, NULL, 0600, 0,
159 	    &lrap)) != 0) {
160 		free(name);
161 		free(buf);
162 		return (err);
163 	}
164 
165 	fd = librename_atomic_fd(lrap);
166 
167 	bzero(&hdr, sizeof (varpd_persist_header_t));
168 	bcopy(varpd_persist_magic, hdr.vph_magic, sizeof (varpd_persist_magic));
169 	hdr.vph_version = VARPD_PERSIST_VERSION_ONE;
170 	md5_calc(hdr.vph_md5, buf, size);
171 
172 	if ((err = libvarpd_persist_write(fd, &hdr,
173 	    sizeof (varpd_persist_header_t))) != 0) {
174 		librename_atomic_fini(lrap);
175 		free(name);
176 		free(buf);
177 		return (err);
178 	}
179 
180 	if ((err = libvarpd_persist_write(fd, buf, size)) != 0) {
181 		librename_atomic_fini(lrap);
182 		free(name);
183 		free(buf);
184 		return (err);
185 	}
186 
187 	do {
188 		err = librename_atomic_commit(lrap);
189 	} while (err == EINTR);
190 
191 	librename_atomic_fini(lrap);
192 	free(name);
193 	free(buf);
194 	return (err);
195 }
196 
197 int
libvarpd_persist_instance(varpd_impl_t * vip,varpd_instance_t * inst)198 libvarpd_persist_instance(varpd_impl_t *vip, varpd_instance_t *inst)
199 {
200 	int err = 0;
201 	nvlist_t *nvl = NULL, *cvl = NULL;
202 
203 	VERIFY0(rw_rdlock(&vip->vdi_pfdlock));
204 	/* Check if persistence exists */
205 	if (vip->vdi_persistfd == -1)
206 		goto out;
207 
208 	if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
209 		goto out;
210 
211 	if ((err = nvlist_alloc(&cvl, NV_UNIQUE_NAME, 0)) != 0)
212 		goto out;
213 
214 	if ((err = nvlist_add_uint64(nvl, "vri_id", inst->vri_id)) != 0)
215 		goto out;
216 
217 	if ((err = nvlist_add_uint32(nvl, "vri_linkid", inst->vri_linkid)) != 0)
218 		goto out;
219 
220 	if ((err = nvlist_add_uint32(nvl, "vri_dest",
221 	    (uint32_t)inst->vri_dest)) != 0)
222 		goto out;
223 	if ((err = nvlist_add_uint32(nvl, "vri_mode",
224 	    (uint32_t)inst->vri_mode)) != 0)
225 		goto out;
226 
227 	if ((err = nvlist_add_string(nvl, "vri_plugin",
228 	    inst->vri_plugin->vpp_name)) != 0)
229 		goto out;
230 
231 	err = inst->vri_plugin->vpp_ops->vpo_save(inst->vri_private, cvl);
232 	if (err != 0)
233 		goto out;
234 
235 	if ((err = nvlist_add_nvlist(nvl, "vri_private", cvl)) != 0)
236 		goto out;
237 
238 	err = libvarpd_persist_nvlist(vip->vdi_persistfd, inst->vri_id, nvl);
239 out:
240 	nvlist_free(nvl);
241 	nvlist_free(cvl);
242 	VERIFY0(rw_unlock(&vip->vdi_pfdlock));
243 	return (err);
244 }
245 
246 void
libvarpd_torch_instance(varpd_impl_t * vip,varpd_instance_t * inst)247 libvarpd_torch_instance(varpd_impl_t *vip, varpd_instance_t *inst)
248 {
249 	char buf[32];
250 	int ret;
251 
252 	VERIFY0(rw_rdlock(&vip->vdi_pfdlock));
253 	if (vip->vdi_persistfd == -1) {
254 		VERIFY0(rw_unlock(&vip->vdi_pfdlock));
255 		return;
256 	}
257 
258 	if (snprintf(buf, sizeof (buf), "%lld.varpd", inst->vri_id) >= 32)
259 		libvarpd_panic("somehow exceeded static value for "
260 		    "libvarpd_torch_instance buffer");
261 
262 	do {
263 		ret = unlinkat(vip->vdi_persistfd, buf, 0);
264 	} while (ret == -1 && errno == EINTR);
265 	if (ret != 0) {
266 		switch (errno) {
267 		case ENOENT:
268 			break;
269 		default:
270 			libvarpd_panic("failed to unlinkat %d`%s: %s",
271 			    vip->vdi_persistfd, buf, strerror(errno));
272 		}
273 	}
274 
275 	VERIFY0(rw_unlock(&vip->vdi_pfdlock));
276 }
277 
278 static int
libvarpd_persist_restore_instance(varpd_impl_t * vip,nvlist_t * nvl)279 libvarpd_persist_restore_instance(varpd_impl_t *vip, nvlist_t *nvl)
280 {
281 	int err;
282 	nvlist_t *pvl;
283 	uint64_t id, flags, vid;
284 	uint32_t linkid, dest, mode;
285 	char *pluginstr;
286 	varpd_plugin_t *plugin;
287 	overlay_plugin_dest_t adest;
288 	varpd_instance_t *inst, lookup;
289 
290 	if (nvlist_lookup_uint64(nvl, "vri_id", &id) != 0)
291 		return (EINVAL);
292 
293 	if (nvlist_lookup_uint32(nvl, "vri_linkid", &linkid) != 0)
294 		return (EINVAL);
295 
296 	if (nvlist_lookup_uint32(nvl, "vri_dest", &dest) != 0)
297 		return (EINVAL);
298 
299 	if (nvlist_lookup_uint32(nvl, "vri_mode", &mode) != 0)
300 		return (EINVAL);
301 
302 	if (nvlist_lookup_string(nvl, "vri_plugin", &pluginstr) != 0)
303 		return (EINVAL);
304 
305 	if (nvlist_lookup_nvlist(nvl, "vri_private", &pvl) != 0)
306 		return (EINVAL);
307 
308 	plugin = libvarpd_plugin_lookup(vip, pluginstr);
309 	if (plugin == NULL)
310 		return (EINVAL);
311 
312 	if (plugin->vpp_mode != mode)
313 		return (EINVAL);
314 
315 	if (libvarpd_overlay_info(vip, linkid, &adest, &flags, &vid) != 0)
316 		return (EINVAL);
317 
318 	if (dest != adest)
319 		return (EINVAL);
320 
321 	inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT);
322 	if (inst == NULL)
323 		libvarpd_panic("failed to allocate instance for restore");
324 
325 	inst->vri_id = id_alloc_specific(vip->vdi_idspace, id);
326 	if (inst->vri_id != id) {
327 		umem_free(inst, sizeof (varpd_instance_t));
328 		return (EINVAL);
329 	}
330 
331 	inst->vri_linkid = linkid;
332 	inst->vri_vnetid = vid;
333 	inst->vri_mode = plugin->vpp_mode;
334 	inst->vri_dest = dest;
335 	inst->vri_plugin = plugin;
336 	inst->vri_impl = vip;
337 	inst->vri_flags = 0;
338 	if (plugin->vpp_ops->vpo_restore(pvl, (varpd_provider_handle_t *)inst,
339 	    dest, &inst->vri_private) != 0) {
340 		id_free(vip->vdi_idspace, id);
341 		umem_free(inst, sizeof (varpd_instance_t));
342 		return (EINVAL);
343 	}
344 
345 	if (mutex_init(&inst->vri_lock, USYNC_THREAD | LOCK_ERRORCHECK,
346 	    NULL) != 0)
347 		libvarpd_panic("failed to create vri_lock mutex");
348 
349 	mutex_enter(&vip->vdi_lock);
350 	lookup.vri_id = inst->vri_id;
351 	if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL)
352 		libvarpd_panic("found duplicate instance with id %d",
353 		    lookup.vri_id);
354 	avl_add(&vip->vdi_instances, inst);
355 	lookup.vri_linkid = inst->vri_linkid;
356 	if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL)
357 		libvarpd_panic("found duplicate linstance with id %d",
358 		    lookup.vri_linkid);
359 	avl_add(&vip->vdi_linstances, inst);
360 	mutex_exit(&vip->vdi_lock);
361 
362 	if (plugin->vpp_ops->vpo_start(inst->vri_private) != 0) {
363 		libvarpd_instance_destroy((varpd_instance_handle_t *)inst);
364 		return (EINVAL);
365 	}
366 
367 	if (flags & OVERLAY_TARG_INFO_F_ACTIVE)
368 		(void) libvarpd_overlay_disassociate(inst);
369 
370 	if (libvarpd_overlay_associate(inst) != 0) {
371 		libvarpd_instance_destroy((varpd_instance_handle_t *)inst);
372 		return (EINVAL);
373 	}
374 
375 	if (flags & OVERLAY_TARG_INFO_F_DEGRADED) {
376 		if ((err = libvarpd_overlay_restore(inst)) != 0) {
377 			libvarpd_panic("failed to restore instance %p: %d\n",
378 			    inst, err);
379 		}
380 	}
381 
382 	mutex_enter(&inst->vri_lock);
383 	inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED;
384 	mutex_exit(&inst->vri_lock);
385 
386 	return (0);
387 }
388 
389 static int
libvarpd_persist_restore_one(varpd_impl_t * vip,int fd)390 libvarpd_persist_restore_one(varpd_impl_t *vip, int fd)
391 {
392 	int err;
393 	size_t fsize;
394 	struct stat st;
395 	void *buf, *datap;
396 	varpd_persist_header_t *hdr;
397 	uint8_t md5[16];
398 	nvlist_t *nvl;
399 
400 	if (fstat(fd, &st) != 0)
401 		return (errno);
402 
403 	if (st.st_size <= sizeof (varpd_persist_header_t))
404 		return (EINVAL);
405 	fsize = st.st_size - sizeof (varpd_persist_header_t);
406 
407 	buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
408 	if (buf == MAP_FAILED)
409 		return (errno);
410 
411 	hdr = buf;
412 	if (bcmp(varpd_persist_magic, hdr->vph_magic,
413 	    sizeof (varpd_persist_magic)) != 0) {
414 		if (munmap(buf, st.st_size) != 0)
415 			libvarpd_panic("failed to munmap %p: %d", buf, errno);
416 		return (EINVAL);
417 	}
418 
419 	if (hdr->vph_version != VARPD_PERSIST_VERSION_ONE) {
420 		if (munmap(buf, st.st_size) != 0)
421 			libvarpd_panic("failed to munmap %p: %d", buf, errno);
422 		return (EINVAL);
423 	}
424 
425 	datap = (void *)((uintptr_t)buf + sizeof (varpd_persist_header_t));
426 	md5_calc(md5, datap, fsize);
427 	if (bcmp(md5, hdr->vph_md5, sizeof (uint8_t) * 16) != 0) {
428 		if (munmap(buf, st.st_size) != 0)
429 			libvarpd_panic("failed to munmap %p: %d", buf, errno);
430 		return (EINVAL);
431 	}
432 
433 	err = nvlist_unpack(datap, fsize, &nvl, 0);
434 	if (munmap(buf, st.st_size) != 0)
435 		libvarpd_panic("failed to munmap %p: %d", buf, errno);
436 
437 	if (err != 0)
438 		return (EINVAL);
439 
440 	err = libvarpd_persist_restore_instance(vip, nvl);
441 	nvlist_free(nvl);
442 	return (err);
443 }
444 
445 /* ARGSUSED */
446 static int
libvarpd_check_degrade_cb(varpd_impl_t * vip,datalink_id_t linkid,void * arg)447 libvarpd_check_degrade_cb(varpd_impl_t *vip, datalink_id_t linkid, void *arg)
448 {
449 	varpd_instance_t *inst;
450 
451 	mutex_enter(&vip->vdi_lock);
452 	for (inst = avl_first(&vip->vdi_instances); inst != NULL;
453 	    inst = AVL_NEXT(&vip->vdi_instances, inst)) {
454 		if (inst->vri_linkid == linkid) {
455 			mutex_exit(&vip->vdi_lock);
456 			return (0);
457 		}
458 	}
459 
460 	mutex_exit(&vip->vdi_lock);
461 
462 	(void) libvarpd_overlay_degrade_datalink(vip, linkid,
463 	    "no varpd instance exists");
464 	return (0);
465 }
466 
467 static void
libvarpd_check_degrade(varpd_impl_t * vip)468 libvarpd_check_degrade(varpd_impl_t *vip)
469 {
470 	(void) libvarpd_overlay_iter(vip, libvarpd_check_degrade_cb, NULL);
471 }
472 
473 int
libvarpd_persist_restore(varpd_handle_t * vhp)474 libvarpd_persist_restore(varpd_handle_t *vhp)
475 {
476 	int dirfd;
477 	int ret = 0;
478 	DIR *dirp = NULL;
479 	struct dirent *dp;
480 	varpd_impl_t *vip = (varpd_impl_t *)vhp;
481 
482 	VERIFY0(rw_rdlock(&vip->vdi_pfdlock));
483 	if ((dirfd = dup(vip->vdi_persistfd)) < 0) {
484 		ret = errno;
485 		goto out;
486 	}
487 
488 	if ((dirp = fdopendir(dirfd)) == NULL) {
489 		ret = errno;
490 		if (close(dirfd) != 0)
491 			libvarpd_panic("failed to close dirfd %d: %d",
492 			    dirfd, errno);
493 		goto out;
494 	}
495 
496 	for (;;) {
497 		int fd;
498 		uint64_t id;
499 		char *eptr;
500 		struct stat st;
501 
502 		errno = 0;
503 		dp = readdir(dirp);
504 		if (dp == NULL) {
505 			ret = errno;
506 			break;
507 		}
508 
509 		if (strcmp(dp->d_name, ".") == 0 ||
510 		    strcmp(dp->d_name, "..") == 0)
511 			continue;
512 
513 		/*
514 		 * Leave files that we don't recognize alone. A valid file has
515 		 * the format `%llu.varpd`.
516 		 */
517 		errno = 0;
518 		id = strtoull(dp->d_name, &eptr, 10);
519 		if ((id == 0 && errno == EINVAL) ||
520 		    (id == ULLONG_MAX && errno == ERANGE))
521 			continue;
522 
523 		if (strcmp(eptr, VARPD_PERSIST_SUFFIX) != 0)
524 			continue;
525 
526 		fd = openat(vip->vdi_persistfd, dp->d_name, O_RDONLY);
527 		if (fd < 0) {
528 			ret = errno;
529 			break;
530 		}
531 
532 		if (fstat(fd, &st) != 0) {
533 			ret = errno;
534 			break;
535 		}
536 
537 		if (!S_ISREG(st.st_mode)) {
538 			if (close(fd) != 0)
539 				libvarpd_panic("failed to close fd (%s) %d: "
540 				    "%d\n", dp->d_name, fd, errno);
541 			continue;
542 		}
543 
544 		ret = libvarpd_persist_restore_one(vip, fd);
545 		if (close(fd) != 0)
546 			libvarpd_panic("failed to close fd (%s) %d: "
547 			    "%d\n", dp->d_name, fd, errno);
548 		/*
549 		 * This is an invalid file. We'll unlink it to save us this
550 		 * trouble in the future.
551 		 */
552 		if (ret != 0) {
553 			if (unlinkat(vip->vdi_persistfd, dp->d_name, 0) != 0) {
554 				ret = errno;
555 				break;
556 			}
557 		}
558 	}
559 
560 	libvarpd_check_degrade(vip);
561 
562 out:
563 	if (dirp != NULL)
564 		(void) closedir(dirp);
565 	VERIFY0(rw_unlock(&vip->vdi_pfdlock));
566 	return (ret);
567 }
568 
569 int
libvarpd_persist_disable(varpd_handle_t * vhp)570 libvarpd_persist_disable(varpd_handle_t *vhp)
571 {
572 	varpd_impl_t *vip = (varpd_impl_t *)vhp;
573 
574 	VERIFY0(rw_wrlock(&vip->vdi_pfdlock));
575 	if (vip->vdi_persistfd == -1) {
576 		mutex_exit(&vip->vdi_lock);
577 		VERIFY0(rw_unlock(&vip->vdi_pfdlock));
578 		return (ENOENT);
579 	}
580 	if (close(vip->vdi_persistfd) != 0)
581 		libvarpd_panic("failed to close persist fd %d: %d",
582 		    vip->vdi_persistfd, errno);
583 	vip->vdi_persistfd = -1;
584 	VERIFY0(rw_unlock(&vip->vdi_pfdlock));
585 	return (0);
586 }
587