xref: /illumos-gate/usr/src/uts/common/fs/tmpfs/tmp_subr.c (revision fd3bae1d)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5134a1f4eSCasper H.S. Dik  * Common Development and Distribution License (the "License").
6134a1f4eSCasper H.S. Dik  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22134a1f4eSCasper H.S. Dik  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
23401bc9afSJoshua M. Clulow  * Copyright 2015 Joyent, Inc.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/types.h>
277c478bd9Sstevel@tonic-gate #include <sys/errno.h>
287c478bd9Sstevel@tonic-gate #include <sys/param.h>
297c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
307c478bd9Sstevel@tonic-gate #include <sys/systm.h>
317c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
327c478bd9Sstevel@tonic-gate #include <sys/debug.h>
337c478bd9Sstevel@tonic-gate #include <sys/time.h>
347c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
357c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
367c478bd9Sstevel@tonic-gate #include <sys/stat.h>
377c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
387c478bd9Sstevel@tonic-gate #include <sys/cred.h>
397c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
407c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
417c478bd9Sstevel@tonic-gate #include <sys/policy.h>
427c478bd9Sstevel@tonic-gate #include <sys/fs/tmp.h>
437c478bd9Sstevel@tonic-gate #include <sys/fs/tmpnode.h>
44401bc9afSJoshua M. Clulow #include <sys/ddi.h>
45401bc9afSJoshua M. Clulow #include <sys/sunddi.h>
46401bc9afSJoshua M. Clulow 
47401bc9afSJoshua M. Clulow #define	KILOBYTE	1024
48401bc9afSJoshua M. Clulow #define	MEGABYTE	(1024 * KILOBYTE)
49401bc9afSJoshua M. Clulow #define	GIGABYTE	(1024 * MEGABYTE)
507c478bd9Sstevel@tonic-gate 
517c478bd9Sstevel@tonic-gate #define	MODESHIFT	3
527c478bd9Sstevel@tonic-gate 
53401bc9afSJoshua M. Clulow #define	VALIDMODEBITS	07777
54401bc9afSJoshua M. Clulow 
55*fd3bae1dSJerry Jelinek extern pgcnt_t swapfs_minfree;
56*fd3bae1dSJerry Jelinek 
577c478bd9Sstevel@tonic-gate int
tmp_taccess(void * vtp,int mode,struct cred * cred)587c478bd9Sstevel@tonic-gate tmp_taccess(void *vtp, int mode, struct cred *cred)
597c478bd9Sstevel@tonic-gate {
607c478bd9Sstevel@tonic-gate 	struct tmpnode *tp = vtp;
617c478bd9Sstevel@tonic-gate 	int shift = 0;
627c478bd9Sstevel@tonic-gate 	/*
637c478bd9Sstevel@tonic-gate 	 * Check access based on owner, group and
647c478bd9Sstevel@tonic-gate 	 * public permissions in tmpnode.
657c478bd9Sstevel@tonic-gate 	 */
667c478bd9Sstevel@tonic-gate 	if (crgetuid(cred) != tp->tn_uid) {
677c478bd9Sstevel@tonic-gate 		shift += MODESHIFT;
687c478bd9Sstevel@tonic-gate 		if (groupmember(tp->tn_gid, cred) == 0)
697c478bd9Sstevel@tonic-gate 			shift += MODESHIFT;
707c478bd9Sstevel@tonic-gate 	}
717c478bd9Sstevel@tonic-gate 
72134a1f4eSCasper H.S. Dik 	return (secpolicy_vnode_access2(cred, TNTOV(tp), tp->tn_uid,
73134a1f4eSCasper H.S. Dik 	    tp->tn_mode << shift, mode));
747c478bd9Sstevel@tonic-gate }
757c478bd9Sstevel@tonic-gate 
767c478bd9Sstevel@tonic-gate /*
777c478bd9Sstevel@tonic-gate  * Decide whether it is okay to remove within a sticky directory.
787c478bd9Sstevel@tonic-gate  * Two conditions need to be met:  write access to the directory
797c478bd9Sstevel@tonic-gate  * is needed.  In sticky directories, write access is not sufficient;
807c478bd9Sstevel@tonic-gate  * you can remove entries from a directory only if you own the directory,
817c478bd9Sstevel@tonic-gate  * if you are privileged, if you own the entry or if they entry is
827c478bd9Sstevel@tonic-gate  * a plain file and you have write access to that file.
837c478bd9Sstevel@tonic-gate  * Function returns 0 if remove access is granted.
847c478bd9Sstevel@tonic-gate  */
857c478bd9Sstevel@tonic-gate int
tmp_sticky_remove_access(struct tmpnode * dir,struct tmpnode * entry,struct cred * cr)867c478bd9Sstevel@tonic-gate tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
87*fd3bae1dSJerry Jelinek     struct cred *cr)
887c478bd9Sstevel@tonic-gate {
897c478bd9Sstevel@tonic-gate 	uid_t uid = crgetuid(cr);
907c478bd9Sstevel@tonic-gate 
917c478bd9Sstevel@tonic-gate 	if ((dir->tn_mode & S_ISVTX) &&
927c478bd9Sstevel@tonic-gate 	    uid != dir->tn_uid &&
937c478bd9Sstevel@tonic-gate 	    uid != entry->tn_uid &&
947c478bd9Sstevel@tonic-gate 	    (entry->tn_type != VREG ||
957c478bd9Sstevel@tonic-gate 	    tmp_taccess(entry, VWRITE, cr) != 0))
967c478bd9Sstevel@tonic-gate 		return (secpolicy_vnode_remove(cr));
977c478bd9Sstevel@tonic-gate 
987c478bd9Sstevel@tonic-gate 	return (0);
997c478bd9Sstevel@tonic-gate }
1007c478bd9Sstevel@tonic-gate 
1017c478bd9Sstevel@tonic-gate /*
1027c478bd9Sstevel@tonic-gate  * Allocate zeroed memory if tmpfs_maxkmem has not been exceeded
1037c478bd9Sstevel@tonic-gate  * or the 'musthave' flag is set.  'musthave' allocations should
1047c478bd9Sstevel@tonic-gate  * always be subordinate to normal allocations so that tmpfs_maxkmem
1057c478bd9Sstevel@tonic-gate  * can't be exceeded by more than a few KB.  Example: when creating
1067c478bd9Sstevel@tonic-gate  * a new directory, the tmpnode is a normal allocation; if that
1077c478bd9Sstevel@tonic-gate  * succeeds, the dirents for "." and ".." are 'musthave' allocations.
1087c478bd9Sstevel@tonic-gate  */
1097c478bd9Sstevel@tonic-gate void *
tmp_memalloc(size_t size,int musthave)1107c478bd9Sstevel@tonic-gate tmp_memalloc(size_t size, int musthave)
1117c478bd9Sstevel@tonic-gate {
1127c478bd9Sstevel@tonic-gate 	static time_t last_warning;
1137c478bd9Sstevel@tonic-gate 	time_t now;
1147c478bd9Sstevel@tonic-gate 
1157c478bd9Sstevel@tonic-gate 	if (atomic_add_long_nv(&tmp_kmemspace, size) < tmpfs_maxkmem ||
1167c478bd9Sstevel@tonic-gate 	    musthave)
1177c478bd9Sstevel@tonic-gate 		return (kmem_zalloc(size, KM_SLEEP));
1187c478bd9Sstevel@tonic-gate 
1197c478bd9Sstevel@tonic-gate 	atomic_add_long(&tmp_kmemspace, -size);
1207c478bd9Sstevel@tonic-gate 	now = gethrestime_sec();
1217c478bd9Sstevel@tonic-gate 	if (last_warning != now) {
1227c478bd9Sstevel@tonic-gate 		last_warning = now;
1237c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "tmp_memalloc: tmpfs over memory limit");
1247c478bd9Sstevel@tonic-gate 	}
1257c478bd9Sstevel@tonic-gate 	return (NULL);
1267c478bd9Sstevel@tonic-gate }
1277c478bd9Sstevel@tonic-gate 
1287c478bd9Sstevel@tonic-gate void
tmp_memfree(void * cp,size_t size)1297c478bd9Sstevel@tonic-gate tmp_memfree(void *cp, size_t size)
1307c478bd9Sstevel@tonic-gate {
1317c478bd9Sstevel@tonic-gate 	kmem_free(cp, size);
1327c478bd9Sstevel@tonic-gate 	atomic_add_long(&tmp_kmemspace, -size);
1337c478bd9Sstevel@tonic-gate }
1347c478bd9Sstevel@tonic-gate 
1357c478bd9Sstevel@tonic-gate /*
1367c478bd9Sstevel@tonic-gate  * Convert a string containing a number (number of bytes) to a pgcnt_t,
1377c478bd9Sstevel@tonic-gate  * containing the corresponding number of pages. On 32-bit kernels, the
1387c478bd9Sstevel@tonic-gate  * maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value
1397c478bd9Sstevel@tonic-gate  * returned in 'maxpg' is at most ULONG_MAX.
1407c478bd9Sstevel@tonic-gate  *
141401bc9afSJoshua M. Clulow  * The number may be followed by a magnitude suffix: "k" or "K" for kilobytes;
142401bc9afSJoshua M. Clulow  * "m" or "M" for megabytes; "g" or "G" for gigabytes.  This interface allows
143401bc9afSJoshua M. Clulow  * for an arguably esoteric interpretation of multiple suffix characters:
144401bc9afSJoshua M. Clulow  * namely, they cascade.  For example, the caller may specify "2mk", which is
145401bc9afSJoshua M. Clulow  * interpreted as 2 gigabytes.  It would seem, at this late stage, that the
146401bc9afSJoshua M. Clulow  * horse has left not only the barn but indeed the country, and possibly the
147*fd3bae1dSJerry Jelinek  * entire planetary system. Alternatively, the number may be followed by a
148*fd3bae1dSJerry Jelinek  * single '%' sign, indicating the size is a percentage of either the zone's
149*fd3bae1dSJerry Jelinek  * swap limit or the system's overall swap size.
1507c478bd9Sstevel@tonic-gate  *
1517c478bd9Sstevel@tonic-gate  * Parse and overflow errors are detected and a non-zero number returned on
1527c478bd9Sstevel@tonic-gate  * error.
1537c478bd9Sstevel@tonic-gate  */
1547c478bd9Sstevel@tonic-gate int
tmp_convnum(char * str,pgcnt_t * maxpg)1557c478bd9Sstevel@tonic-gate tmp_convnum(char *str, pgcnt_t *maxpg)
1567c478bd9Sstevel@tonic-gate {
157401bc9afSJoshua M. Clulow 	u_longlong_t num = 0;
1587c478bd9Sstevel@tonic-gate #ifdef _LP64
159401bc9afSJoshua M. Clulow 	u_longlong_t max_bytes = ULONG_MAX;
1607c478bd9Sstevel@tonic-gate #else
161401bc9afSJoshua M. Clulow 	u_longlong_t max_bytes = PAGESIZE * (uint64_t)ULONG_MAX;
1627c478bd9Sstevel@tonic-gate #endif
1637c478bd9Sstevel@tonic-gate 	char *c;
164401bc9afSJoshua M. Clulow 	const struct convchar {
165401bc9afSJoshua M. Clulow 		char *cc_char;
166401bc9afSJoshua M. Clulow 		uint64_t cc_factor;
167401bc9afSJoshua M. Clulow 	} convchars[] = {
168401bc9afSJoshua M. Clulow 		{ "kK", KILOBYTE },
169401bc9afSJoshua M. Clulow 		{ "mM", MEGABYTE },
170401bc9afSJoshua M. Clulow 		{ "gG", GIGABYTE },
171401bc9afSJoshua M. Clulow 		{ NULL, 0 }
172401bc9afSJoshua M. Clulow 	};
173401bc9afSJoshua M. Clulow 
174401bc9afSJoshua M. Clulow 	if (str == NULL) {
1757c478bd9Sstevel@tonic-gate 		return (EINVAL);
176401bc9afSJoshua M. Clulow 	}
1777c478bd9Sstevel@tonic-gate 	c = str;
1787c478bd9Sstevel@tonic-gate 
1797c478bd9Sstevel@tonic-gate 	/*
180401bc9afSJoshua M. Clulow 	 * Convert the initial numeric portion of the input string.
1817c478bd9Sstevel@tonic-gate 	 */
182401bc9afSJoshua M. Clulow 	if (ddi_strtoull(str, &c, 10, &num) != 0) {
183401bc9afSJoshua M. Clulow 		return (EINVAL);
1847c478bd9Sstevel@tonic-gate 	}
1857c478bd9Sstevel@tonic-gate 
186*fd3bae1dSJerry Jelinek 	/*
187*fd3bae1dSJerry Jelinek 	 * Handle a size in percent. Anything other than a single percent
188*fd3bae1dSJerry Jelinek 	 * modifier is invalid. We use either the zone's swap limit or the
189*fd3bae1dSJerry Jelinek 	 * system's total available swap size as the initial value. Perform the
190*fd3bae1dSJerry Jelinek 	 * intermediate calculation in pages to avoid overflow.
191*fd3bae1dSJerry Jelinek 	 */
192*fd3bae1dSJerry Jelinek 	if (*c == '%') {
193*fd3bae1dSJerry Jelinek 		u_longlong_t cap;
194*fd3bae1dSJerry Jelinek 
195*fd3bae1dSJerry Jelinek 		if (*(c + 1) != '\0')
196*fd3bae1dSJerry Jelinek 			return (EINVAL);
197*fd3bae1dSJerry Jelinek 
198*fd3bae1dSJerry Jelinek 		if (num > 100)
199*fd3bae1dSJerry Jelinek 			return (EINVAL);
200*fd3bae1dSJerry Jelinek 
201*fd3bae1dSJerry Jelinek 		cap = (u_longlong_t)curproc->p_zone->zone_max_swap_ctl;
202*fd3bae1dSJerry Jelinek 		if (cap == UINT64_MAX) {
203*fd3bae1dSJerry Jelinek 			/*
204*fd3bae1dSJerry Jelinek 			 * Use the amount of available physical and memory swap
205*fd3bae1dSJerry Jelinek 			 */
206*fd3bae1dSJerry Jelinek 			mutex_enter(&anoninfo_lock);
207*fd3bae1dSJerry Jelinek 			cap = TOTAL_AVAILABLE_SWAP;
208*fd3bae1dSJerry Jelinek 			mutex_exit(&anoninfo_lock);
209*fd3bae1dSJerry Jelinek 		} else {
210*fd3bae1dSJerry Jelinek 			cap = btop(cap);
211*fd3bae1dSJerry Jelinek 		}
212*fd3bae1dSJerry Jelinek 
213*fd3bae1dSJerry Jelinek 		num = ptob(cap * num / 100);
214*fd3bae1dSJerry Jelinek 		goto done;
215*fd3bae1dSJerry Jelinek 	}
216*fd3bae1dSJerry Jelinek 
2177c478bd9Sstevel@tonic-gate 	/*
218401bc9afSJoshua M. Clulow 	 * Apply the (potentially cascading) magnitude suffixes until an
219401bc9afSJoshua M. Clulow 	 * invalid character is found, or the string comes to an end.
2207c478bd9Sstevel@tonic-gate 	 */
221401bc9afSJoshua M. Clulow 	for (; *c != '\0'; c++) {
222401bc9afSJoshua M. Clulow 		int i;
223401bc9afSJoshua M. Clulow 
224401bc9afSJoshua M. Clulow 		for (i = 0; convchars[i].cc_char != NULL; i++) {
225401bc9afSJoshua M. Clulow 			/*
226401bc9afSJoshua M. Clulow 			 * Check if this character matches this multiplier
227401bc9afSJoshua M. Clulow 			 * class:
228401bc9afSJoshua M. Clulow 			 */
229401bc9afSJoshua M. Clulow 			if (strchr(convchars[i].cc_char, *c) != NULL) {
230401bc9afSJoshua M. Clulow 				/*
231401bc9afSJoshua M. Clulow 				 * Check for overflow:
232401bc9afSJoshua M. Clulow 				 */
233401bc9afSJoshua M. Clulow 				if (num > max_bytes / convchars[i].cc_factor) {
234401bc9afSJoshua M. Clulow 					return (EINVAL);
235401bc9afSJoshua M. Clulow 				}
236401bc9afSJoshua M. Clulow 
237401bc9afSJoshua M. Clulow 				num *= convchars[i].cc_factor;
238401bc9afSJoshua M. Clulow 				goto valid_char;
239401bc9afSJoshua M. Clulow 			}
240401bc9afSJoshua M. Clulow 		}
2417c478bd9Sstevel@tonic-gate 
2427c478bd9Sstevel@tonic-gate 		/*
243401bc9afSJoshua M. Clulow 		 * This was not a valid multiplier suffix character.
2447c478bd9Sstevel@tonic-gate 		 */
245401bc9afSJoshua M. Clulow 		return (EINVAL);
2467c478bd9Sstevel@tonic-gate 
247401bc9afSJoshua M. Clulow valid_char:
248401bc9afSJoshua M. Clulow 		continue;
2497c478bd9Sstevel@tonic-gate 	}
2507c478bd9Sstevel@tonic-gate 
251*fd3bae1dSJerry Jelinek done:
2527c478bd9Sstevel@tonic-gate 	/*
2537c478bd9Sstevel@tonic-gate 	 * Since btopr() rounds up to page granularity, this round-up can
2547c478bd9Sstevel@tonic-gate 	 * cause an overflow only if 'num' is between (max_bytes - PAGESIZE)
2557c478bd9Sstevel@tonic-gate 	 * and (max_bytes). In this case the resulting number is zero, which
2567c478bd9Sstevel@tonic-gate 	 * is what we check for below.
2577c478bd9Sstevel@tonic-gate 	 */
2587c478bd9Sstevel@tonic-gate 	if ((*maxpg = (pgcnt_t)btopr(num)) == 0 && num != 0)
2597c478bd9Sstevel@tonic-gate 		return (EINVAL);
2607c478bd9Sstevel@tonic-gate 	return (0);
2617c478bd9Sstevel@tonic-gate }
262401bc9afSJoshua M. Clulow 
263401bc9afSJoshua M. Clulow /*
264401bc9afSJoshua M. Clulow  * Parse an octal mode string for use as the permissions set for the root
265401bc9afSJoshua M. Clulow  * of the tmpfs mount.
266401bc9afSJoshua M. Clulow  */
267401bc9afSJoshua M. Clulow int
tmp_convmode(char * str,mode_t * mode)268401bc9afSJoshua M. Clulow tmp_convmode(char *str, mode_t *mode)
269401bc9afSJoshua M. Clulow {
270401bc9afSJoshua M. Clulow 	ulong_t num;
271401bc9afSJoshua M. Clulow 	char *c;
272401bc9afSJoshua M. Clulow 
273401bc9afSJoshua M. Clulow 	if (str == NULL) {
274401bc9afSJoshua M. Clulow 		return (EINVAL);
275401bc9afSJoshua M. Clulow 	}
276401bc9afSJoshua M. Clulow 
277401bc9afSJoshua M. Clulow 	if (ddi_strtoul(str, &c, 8, &num) != 0) {
278401bc9afSJoshua M. Clulow 		return (EINVAL);
279401bc9afSJoshua M. Clulow 	}
280401bc9afSJoshua M. Clulow 
281401bc9afSJoshua M. Clulow 	if ((num & ~VALIDMODEBITS) != 0) {
282401bc9afSJoshua M. Clulow 		return (EINVAL);
283401bc9afSJoshua M. Clulow 	}
284401bc9afSJoshua M. Clulow 
285401bc9afSJoshua M. Clulow 	*mode = VALIDMODEBITS & num;
286401bc9afSJoshua M. Clulow 	return (0);
287401bc9afSJoshua M. Clulow }
288