1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Copyright 2019 Joyent, Inc.
28 */
29
30/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
31/*	  All Rights Reserved	*/
32
33#include <sys/types.h>
34#include <sys/sysmacros.h>
35#include <sys/param.h>
36#include <sys/vmparam.h>
37#include <sys/systm.h>
38#include <sys/cred.h>
39#include <sys/user.h>
40#include <sys/proc.h>
41#include <sys/conf.h>
42#include <sys/tuneable.h>
43#include <sys/cpuvar.h>
44#include <sys/archsystm.h>
45#include <sys/vmem.h>
46#include <vm/seg_kmem.h>
47#include <sys/errno.h>
48#include <sys/cmn_err.h>
49#include <sys/debug.h>
50#include <sys/atomic.h>
51#include <sys/model.h>
52#include <sys/kmem.h>
53#include <sys/memlist.h>
54#include <sys/autoconf.h>
55#include <sys/ontrap.h>
56#include <sys/utsname.h>
57#include <sys/zone.h>
58
59#ifdef __sparc
60#include <sys/membar.h>
61#endif
62
63/*
64 * Routine which sets a user error; placed in
65 * illegal entries in the bdevsw and cdevsw tables.
66 */
67
68int
69nodev()
70{
71	return (curthread->t_lwp ?
72	    ttolwp(curthread)->lwp_error = ENXIO : ENXIO);
73}
74
75/*
76 * Null routine; placed in insignificant entries
77 * in the bdevsw and cdevsw tables.
78 */
79
80int
81nulldev()
82{
83	return (0);
84}
85
86static kmutex_t udevlock;
87
88/*
89 * Generate an unused major device number.
90 */
91major_t
92getudev()
93{
94	static major_t next = 0;
95	major_t ret;
96
97	/*
98	 * Ensure that we start allocating major numbers above the 'devcnt'
99	 * count.  The only limit we place on the number is that it should be a
100	 * legal 32-bit SVR4 major number and be greater than or equal to devcnt
101	 * in the current system).
102	 */
103	mutex_enter(&udevlock);
104	if (next == 0)
105		next = devcnt;
106	if (next <= L_MAXMAJ32 && next >= devcnt)
107		ret = next++;
108	else {
109		/*
110		 * If we fail to allocate a major number because devcnt has
111		 * reached L_MAXMAJ32, we may be the victim of a sparsely
112		 * populated devnames array.  We scan the array backwards
113		 * looking for an empty slot;  if we find one, mark it as
114		 * DN_GETUDEV so it doesn't get taken by subsequent consumers
115		 * users of the devnames array, and issue a warning.
116		 * It is vital for this routine to take drastic measures to
117		 * succeed, since the kernel really needs it to boot.
118		 */
119		int i;
120		for (i = devcnt - 1; i >= 0; i--) {
121			LOCK_DEV_OPS(&devnamesp[i].dn_lock);
122			if (devnamesp[i].dn_name == NULL &&
123			    ((devnamesp[i].dn_flags & DN_TAKEN_GETUDEV) == 0))
124				break;
125			UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
126		}
127		if (i != -1) {
128			cmn_err(CE_WARN, "Reusing device major number %d.", i);
129			ASSERT(i >= 0 && i < devcnt);
130			devnamesp[i].dn_flags |= DN_TAKEN_GETUDEV;
131			UNLOCK_DEV_OPS(&devnamesp[i].dn_lock);
132			ret = (major_t)i;
133		} else {
134			ret = DDI_MAJOR_T_NONE;
135		}
136	}
137	mutex_exit(&udevlock);
138	return (ret);
139}
140
141
142/*
143 * Compress 'long' device number encoding to 32-bit device number
144 * encoding.  If it won't fit, we return failure, but set the
145 * device number to 32-bit NODEV for the sake of our callers.
146 */
147int
148cmpldev(dev32_t *dst, dev_t dev)
149{
150#if defined(_LP64)
151	if (dev == NODEV) {
152		*dst = NODEV32;
153	} else {
154		major_t major = dev >> L_BITSMINOR;
155		minor_t minor = dev & L_MAXMIN;
156
157		if (major > L_MAXMAJ32 || minor > L_MAXMIN32) {
158			*dst = NODEV32;
159			return (0);
160		}
161
162		*dst = (dev32_t)((major << L_BITSMINOR32) | minor);
163	}
164#else
165	*dst = (dev32_t)dev;
166#endif
167	return (1);
168}
169
170/*
171 * Expand 32-bit dev_t's to long dev_t's.  Expansion always "fits"
172 * into the return type, but we're careful to expand NODEV explicitly.
173 */
174dev_t
175expldev(dev32_t dev32)
176{
177#ifdef _LP64
178	if (dev32 == NODEV32)
179		return (NODEV);
180	return (makedevice((dev32 >> L_BITSMINOR32) & L_MAXMAJ32,
181	    dev32 & L_MAXMIN32));
182#else
183	return ((dev_t)dev32);
184#endif
185}
186
187#ifndef _LP64
188/*
189 * Keep these entry points for 32-bit systems but enforce the use
190 * of MIN/MAX macros on 64-bit systems.  The DDI header files already
191 * define min/max as macros so drivers shouldn't need these functions.
192 */
193
194int
195min(int a, int b)
196{
197	return (a < b ? a : b);
198}
199
200int
201max(int a, int b)
202{
203	return (a > b ? a : b);
204}
205
206uint_t
207umin(uint_t a, uint_t b)
208{
209	return (a < b ? a : b);
210}
211
212uint_t
213umax(uint_t a, uint_t b)
214{
215	return (a > b ? a : b);
216}
217
218#endif /* !_LP64 */
219
220/*
221 * Parse suboptions from a string.
222 * Same as getsubopt(3C).
223 */
224int
225getsubopt(char **optionsp, char * const *tokens, char **valuep)
226{
227	char *s = *optionsp, *p;
228	int i;
229	size_t optlen;
230
231	*valuep = NULL;
232	if (*s == '\0')
233		return (-1);
234	p = strchr(s, ',');		/* find next option */
235	if (p == NULL) {
236		p = s + strlen(s);
237	} else {
238		*p++ = '\0';		/* mark end and point to next */
239	}
240	*optionsp = p;			/* point to next option */
241	p = strchr(s, '=');		/* find value */
242	if (p == NULL) {
243		optlen = strlen(s);
244		*valuep = NULL;
245	} else {
246		optlen = p - s;
247		*valuep = ++p;
248	}
249	for (i = 0; tokens[i] != NULL; i++) {
250		if ((optlen == strlen(tokens[i])) &&
251		    (strncmp(s, tokens[i], optlen) == 0))
252			return (i);
253	}
254	/* no match, point value at option and return error */
255	*valuep = s;
256	return (-1);
257}
258
259/*
260 * Append the suboption string 'opt' starting at the position 'str'
261 * within the buffer defined by 'buf' and 'len'. If 'buf' is not null,
262 * a comma is appended first.
263 * Return a pointer to the end of the resulting string (the null byte).
264 * Return NULL if there isn't enough space left to append 'opt'.
265 */
266char *
267append_subopt(const char *buf, size_t len, char *str, const char *opt)
268{
269	size_t l = strlen(opt);
270
271	/*
272	 * Include a ',' if this is not the first option.
273	 * Include space for the null byte.
274	 */
275	if (strlen(buf) + (buf[0] != '\0') + l + 1 > len)
276		return (NULL);
277
278	if (buf[0] != '\0')
279		*str++ = ',';
280	(void) strcpy(str, opt);
281	return (str + l);
282}
283
284/*
285 * Tables to convert a single byte to/from binary-coded decimal (BCD).
286 */
287uchar_t byte_to_bcd[256] = {
288	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
289	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
290	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
291	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
292	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
293	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
294	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
295	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
296	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
297	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
298};
299
300uchar_t bcd_to_byte[256] = {		/* CSTYLED */
301	 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  0,  0,  0,  0,  0,
302	10, 11, 12, 13, 14, 15, 16, 17, 18, 19,  0,  0,  0,  0,  0,  0,
303	20, 21, 22, 23, 24, 25, 26, 27, 28, 29,  0,  0,  0,  0,  0,  0,
304	30, 31, 32, 33, 34, 35, 36, 37, 38, 39,  0,  0,  0,  0,  0,  0,
305	40, 41, 42, 43, 44, 45, 46, 47, 48, 49,  0,  0,  0,  0,  0,  0,
306	50, 51, 52, 53, 54, 55, 56, 57, 58, 59,  0,  0,  0,  0,  0,  0,
307	60, 61, 62, 63, 64, 65, 66, 67, 68, 69,  0,  0,  0,  0,  0,  0,
308	70, 71, 72, 73, 74, 75, 76, 77, 78, 79,  0,  0,  0,  0,  0,  0,
309	80, 81, 82, 83, 84, 85, 86, 87, 88, 89,  0,  0,  0,  0,  0,  0,
310	90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
311};
312
313/*
314 * Hot-patch a single instruction in the kernel's text.
315 *
316 * If you want to patch multiple instructions you must arrange to do it so that
317 * all intermediate stages are sane -- we don't stop other cpus while doing
318 * this.
319 *
320 * Size must be 1, 2, or 4 bytes with iaddr aligned accordingly.
321 *
322 * The instruction itself might straddle a page boundary, so we have to account
323 * for that.
324 */
325void
326hot_patch_kernel_text(caddr_t iaddr, uint32_t new_instr, uint_t size)
327{
328	const uintptr_t pageoff = (uintptr_t)iaddr & PAGEOFFSET;
329	const boolean_t straddles = (pageoff + size > PAGESIZE);
330	const size_t mapsize = straddles ? PAGESIZE * 2 : PAGESIZE;
331	caddr_t ipageaddr = iaddr - pageoff;
332	caddr_t vaddr;
333	page_t **ppp;
334
335	vaddr = vmem_alloc(heap_arena, mapsize, VM_SLEEP);
336
337	(void) as_pagelock(&kas, &ppp, ipageaddr, mapsize, S_WRITE);
338
339	hat_devload(kas.a_hat, vaddr, PAGESIZE,
340	    hat_getpfnum(kas.a_hat, ipageaddr), PROT_READ | PROT_WRITE,
341	    HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
342
343	if (straddles) {
344		hat_devload(kas.a_hat, vaddr + PAGESIZE, PAGESIZE,
345		    hat_getpfnum(kas.a_hat, ipageaddr + PAGESIZE),
346		    PROT_READ | PROT_WRITE, HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
347	}
348
349	switch (size) {
350	case 1:
351		*(uint8_t *)(vaddr + pageoff) = new_instr;
352		break;
353	case 2:
354		*(uint16_t *)(vaddr + pageoff) = new_instr;
355		break;
356	case 4:
357		*(uint32_t *)(vaddr + pageoff) = new_instr;
358		break;
359	default:
360		panic("illegal hot-patch");
361	}
362
363	membar_enter();
364	sync_icache(vaddr + pageoff, size);
365	sync_icache(iaddr, size);
366	as_pageunlock(&kas, ppp, ipageaddr, mapsize, S_WRITE);
367	hat_unload(kas.a_hat, vaddr, mapsize, HAT_UNLOAD_UNLOCK);
368	vmem_free(heap_arena, vaddr, mapsize);
369}
370
371/*
372 * Routine to report an attempt to execute non-executable data.  If the
373 * address executed lies in the stack, explicitly say so.
374 */
375void
376report_stack_exec(proc_t *p, caddr_t addr)
377{
378	if (!noexec_user_stack_log)
379		return;
380
381	if (addr < p->p_usrstack && addr >= (p->p_usrstack - p->p_stksize)) {
382		cmn_err(CE_NOTE, "%s[%d] attempt to execute code "
383		    "on stack by uid %d", p->p_user.u_comm,
384		    p->p_pid, crgetruid(p->p_cred));
385	} else {
386		cmn_err(CE_NOTE, "%s[%d] attempt to execute non-executable "
387		    "data at 0x%p by uid %d", p->p_user.u_comm,
388		    p->p_pid, (void *) addr, crgetruid(p->p_cred));
389	}
390
391	delay(hz / 50);
392}
393
394/*
395 * Determine whether the address range [addr, addr + len) is in memlist mp.
396 */
397int
398address_in_memlist(struct memlist *mp, uint64_t addr, size_t len)
399{
400	while (mp != 0)	 {
401		if ((addr >= mp->ml_address) &&
402		    (addr + len <= mp->ml_address + mp->ml_size))
403			return (1);	 /* TRUE */
404		mp = mp->ml_next;
405	}
406	return (0);	/* FALSE */
407}
408
409/*
410 * Pop the topmost element from the t_ontrap stack, removing the current set of
411 * on_trap() protections.  Refer to <sys/ontrap.h> for more info.  If the
412 * stack is already empty, no_trap() just returns.
413 */
414void
415no_trap(void)
416{
417	if (curthread->t_ontrap != NULL) {
418#ifdef __sparc
419		membar_sync(); /* deferred error barrier (see sparcv9_subr.s) */
420#endif
421		curthread->t_ontrap = curthread->t_ontrap->ot_prev;
422	}
423}
424
425/*
426 * Return utsname.nodename outside a zone, or the zone name within.
427 */
428char *
429uts_nodename(void)
430{
431	if (curproc == NULL)
432		return (utsname.nodename);
433	return (curproc->p_zone->zone_nodename);
434}
435