xref: /illumos-gate/usr/src/uts/common/sys/mman.h (revision df5cd018)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
23 /*
24  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
25  *
26  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  * Copyright 2015 Joyent, Inc.  All rights reserved.
29  * Copyright 2022 Oxide Computer Company
30  */
31 
32 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
33 /*	  All Rights Reserved	*/
34 
35 /*
36  * University Copyright- Copyright (c) 1982, 1986, 1988
37  * The Regents of the University of California
38  * All Rights Reserved
39  *
40  * University Acknowledgment- Portions of this document are derived from
41  * software developed by the University of California, Berkeley, and its
42  * contributors.
43  */
44 
45 #ifndef	_SYS_MMAN_H
46 #define	_SYS_MMAN_H
47 
48 #include <sys/feature_tests.h>
49 
50 /*
51  * <sys/mman.h> has had a bit of a tortured symbol visibility history. In
52  * particular, when things were honored under __EXTENSIONS__ or not in the past
53  * wasn't very consistent. As this was not a header that was part of ISO-C it
54  * traditionally just checked around XOPEN/POSIX related feature tests. This
55  * makes the use of the standard _STRICT_POSIX something that actually is more
56  * restrictive than previously was used.
57  */
58 
59 #ifdef	__cplusplus
60 extern "C" {
61 #endif
62 
63 #if	!defined(_ASM) && !defined(_KERNEL)
64 #include <sys/types.h>
65 #endif	/* !_ASM && !_KERNEL */
66 
67 /*
68  * Protections are chosen from these bits, or-ed together.
69  * Note - not all implementations literally provide all possible
70  * combinations.  PROT_WRITE is often implemented as (PROT_READ |
71  * PROT_WRITE) and (PROT_EXECUTE as PROT_READ | PROT_EXECUTE).
72  * However, no implementation will permit a write to succeed
73  * where PROT_WRITE has not been set.  Also, no implementation will
74  * allow any access to succeed where prot is specified as PROT_NONE.
75  */
76 #define	PROT_READ	0x1		/* pages can be read */
77 #define	PROT_WRITE	0x2		/* pages can be written */
78 #define	PROT_EXEC	0x4		/* pages can be executed */
79 
80 #ifdef	_KERNEL
81 #define	PROT_USER	0x8		/* pages are user accessible */
82 #define	PROT_ZFOD	(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_USER)
83 #define	PROT_ALL	(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_USER)
84 #endif	/* _KERNEL */
85 
86 #define	PROT_NONE	0x0		/* pages cannot be accessed */
87 
88 /* sharing types:  must choose either SHARED or PRIVATE */
89 #define	MAP_SHARED	1		/* share changes */
90 #define	MAP_PRIVATE	2		/* changes are private */
91 #define	MAP_TYPE	0xf		/* mask for share type */
92 
93 /* other flags to mmap (or-ed in to MAP_SHARED or MAP_PRIVATE) */
94 #define	MAP_FILE	0		/* map from file (default) */
95 #define	MAP_FIXED	0x10		/* user assigns address */
96 /* Not implemented */
97 #define	MAP_RENAME	0x20		/* rename private pages to file */
98 #define	MAP_NORESERVE	0x40		/* don't reserve needed swap area */
99 /* Note that 0x80 is _MAP_LOW32, defined below */
100 #define	MAP_ANON	0x100		/* map anonymous pages directly */
101 #define	MAP_ANONYMOUS	MAP_ANON	/* (source compatibility) */
102 #define	MAP_ALIGN	0x200		/* addr specifies alignment */
103 #define	MAP_TEXT	0x400		/* map code segment */
104 #define	MAP_INITDATA	0x800		/* map data segment */
105 
106 /*
107  * Internal to the kernel, extensions to mmap flags.
108  */
109 #ifdef _KERNEL
110 #define	_MAP_TEXTREPL	0x1000
111 #define	_MAP_RANDOMIZE	0x2000
112 #endif /* _KERNEL */
113 
114 /*
115  * Extensions to mmap flags. These are available in the default compilation
116  * environment, but not in a strict environment.
117  */
118 #if !defined(_STRICT_POSIX)
119 #define	_MAP_LOW32	0x80	/* force mapping in lower 4G of address space */
120 #define	MAP_32BIT	_MAP_LOW32
121 
122 /*
123  * For the sake of backward object compatibility, we use the _MAP_NEW flag.
124  * This flag will be automatically or'ed in by the C library for all
125  * new mmap calls.  Previous binaries with old mmap calls will continue
126  * to get 0 or -1 for return values.  New mmap calls will get the mapped
127  * address as the return value if successful and -1 on errors.  By default,
128  * new mmap calls automatically have the kernel assign the map address
129  * unless the MAP_FIXED flag is given.
130  */
131 #define	_MAP_NEW	0x80000000	/* users should not need to use this */
132 #endif	/* !defined(_STRICT_POSIX) */
133 
134 #if !defined(_STRICT_POSIX)
135 /* External flags for mmapobj syscall (Exclusive of MAP_* flags above) */
136 #define	MMOBJ_PADDING		0x10000
137 #define	MMOBJ_INTERPRET		0x20000
138 
139 #define	MMOBJ_ALL_FLAGS		(MMOBJ_PADDING | MMOBJ_INTERPRET)
140 
141 /*
142  * Values for mr_flags field of mmapobj_result_t below.
143  * The bottom 16 bits are mutually exclusive and thus only one
144  * of them can be set at a time.  Use MR_GET_TYPE below to check this value.
145  * The top 16 bits are used for flags which are not mutually exclusive and
146  * thus more than one of these flags can be set for a given mmapobj_result_t.
147  *
148  * MR_PADDING being set indicates that this memory range represents the user
149  * requested padding.
150  *
151  * MR_HDR_ELF being set indicates that the ELF header of the mapped object
152  * is mapped at mr_addr + mr_offset.
153  *
154  * MR_HDR_AOUT being set indicates that the AOUT (4.x) header of the mapped
155  * object is mapped at mr_addr + mr_offset.
156  */
157 
158 /*
159  * External flags for mr_flags field below.
160  */
161 #define	MR_PADDING	0x1
162 #define	MR_HDR_ELF	0x2
163 #define	MR_HDR_AOUT	0x3
164 
165 /*
166  * Internal flags for mr_flags field below.
167  */
168 #ifdef	_KERNEL
169 #define	MR_RESV	0x80000000	/* overmapped /dev/null */
170 #endif	/* _KERNEL */
171 
172 #define	MR_TYPE_MASK 0x0000ffff
173 #define	MR_GET_TYPE(val)	((val) & MR_TYPE_MASK)
174 
175 #if	!defined(_ASM)
176 typedef struct mmapobj_result {
177 	caddr_t		mr_addr;	/* mapping address */
178 	size_t		mr_msize;	/* mapping size */
179 	size_t		mr_fsize;	/* file size */
180 	size_t		mr_offset;	/* offset into file */
181 	uint_t		mr_prot;	/* the protections provided */
182 	uint_t		mr_flags;	/* info on the mapping */
183 } mmapobj_result_t;
184 
185 #if defined(_KERNEL) || defined(_SYSCALL32)
186 typedef struct mmapobj_result32 {
187 	caddr32_t	mr_addr;	/* mapping address */
188 	size32_t	mr_msize;	/* mapping size */
189 	size32_t	mr_fsize;	/* file size */
190 	size32_t	mr_offset;	/* offset into file */
191 	uint_t		mr_prot;	/* the protections provided */
192 	uint_t		mr_flags;	/* info on the mapping */
193 } mmapobj_result32_t;
194 #endif	/* defined(_KERNEL) || defined(_SYSCALL32) */
195 #endif	/* !defined(_ASM) */
196 #endif	/* !defined(_STRICT_POSIX) */
197 
198 #if	!defined(_ASM) && !defined(_KERNEL)
199 /*
200  * large file compilation environment setup
201  *
202  * In the LP64 compilation environment, map large file interfaces
203  * back to native versions where possible.
204  */
205 
206 #if !defined(_LP64) && _FILE_OFFSET_BITS == 64
207 #ifdef	__PRAGMA_REDEFINE_EXTNAME
208 #pragma redefine_extname	mmap	mmap64
209 #else
210 #define	mmap			mmap64
211 #endif
212 #endif /* !_LP64 && _FILE_OFFSET_BITS == 64 */
213 
214 #if defined(_LP64) && defined(_LARGEFILE64_SOURCE)
215 #ifdef	__PRAGMA_REDEFINE_EXTNAME
216 #pragma	redefine_extname	mmap64	mmap
217 #else
218 #define	mmap64			mmap
219 #endif
220 #endif	/* _LP64 && _LARGEFILE64_SOURCE */
221 
222 #ifdef __PRAGMA_REDEFINE_EXTNAME
223 #pragma redefine_extname	getpagesizes	getpagesizes2
224 #else
225 #define	getpagesizes	getpagesizes2
226 #endif
227 
228 /*
229  * Except for old binaries mmap() will return the resultant address of mapping
230  * on success and (void *)-1 on error.  illumos traditionally used a 'caddr_t'
231  * instead of a void * and did not require certain addresses to be const.
232  *
233  * Note, the following group of symbols are always visible since we have always
234  * exposed them and they appear to have been defined in most relevant versions
235  * of the specifications. While these are not strictly defined in ISO C, this
236  * header isn't a part of it and it isn't our job to guard against that.
237  */
238 extern void *mmap(void *, size_t, int, int, int, off_t);
239 extern int munmap(void *, size_t);
240 extern int mprotect(void *, size_t, int);
241 extern int msync(void *, size_t, int);
242 
243 #if	defined(_LARGEFILE64_SOURCE) && !((_FILE_OFFSET_BITS == 64) && \
244 	    !defined(__PRAGMA_REDEFINE_EXTNAME))
245 extern void *mmap64(void *, size_t, int, int, int, off64_t);
246 #endif  /* _LARGEFILE64_SOURCE... */
247 
248 /*
249  * These functions were all part of the older POSIX realtime suite and didn't
250  * make it into XPG until v5.
251  */
252 
253 #if !defined(_STRICT_POSIX) || (_POSIX_C_SOURCE > 2) || defined(_XPG5)
254 extern int mlock(const void *, size_t);
255 extern int munlock(const void *, size_t);
256 extern int mlockall(int);
257 extern int munlockall(void);
258 extern int shm_open(const char *, int, mode_t);
259 extern int shm_unlink(const char *);
260 #endif	/* !_STRICT_POSIX || _POSIX_C_SOURCE > 2 || _XPG5 */
261 
262 #if !defined(_STRICT_POSIX) || defined(_XPG6)
263 extern int posix_madvise(void *, size_t, int);
264 #endif
265 
266 /*
267  * The following are extensions that we have added.
268  */
269 #if !defined(_STRICT_POSIX)
270 extern int mincore(caddr_t, size_t, char *);
271 extern int memcntl(void *, size_t, int, void *, int, int);
272 extern int madvise(void *, size_t, int);
273 extern int getpagesizes(size_t *, int);
274 extern int getpagesizes2(size_t *, int);
275 extern int mmapobj(int, uint_t, mmapobj_result_t *, uint_t *, void *);
276 /* guard visibility of uint64_t */
277 #if defined(_INT64_TYPE)
278 extern int meminfo(const uint64_t *, int, const uint_t *, int, uint64_t *,
279 	uint_t *);
280 #endif /* defined(_INT64_TYPE) */
281 #endif /* !defined(_STRICT_POSIX) */
282 
283 
284 /* mmap failure value */
285 #define	MAP_FAILED	((void *) -1)
286 
287 #endif	/* !_ASM && !_KERNEL */
288 
289 #if !defined(_STRICT_POSIX)
290 #if !defined(_ASM)
291 /*
292  * structure for memcntl hat advise operations.
293  */
294 struct memcntl_mha {
295 	uint_t		mha_cmd;	/* command(s) */
296 	uint_t		mha_flags;
297 	size_t		mha_pagesize;
298 };
299 
300 #if defined(_SYSCALL32)
301 struct memcntl_mha32 {
302 	uint_t		mha_cmd;	/* command(s) */
303 	uint_t		mha_flags;
304 	size32_t	mha_pagesize;
305 };
306 #endif	/* _SYSCALL32 */
307 #endif	/* !defined(_ASM) */
308 
309 /*
310  * advice to madvise
311  *
312  * Note, if more than 4 bits worth of advice (eg. 16) are specified then
313  * changes will be necessary to the struct vpage.
314  */
315 #define	MADV_NORMAL		0	/* no further special treatment */
316 #define	MADV_RANDOM		1	/* expect random page references */
317 #define	MADV_SEQUENTIAL		2	/* expect sequential page references */
318 #define	MADV_WILLNEED		3	/* will need these pages */
319 #define	MADV_DONTNEED		4	/* don't need these pages */
320 #define	MADV_FREE		5	/* contents can be freed */
321 #define	MADV_ACCESS_DEFAULT	6	/* default access */
322 #define	MADV_ACCESS_LWP		7	/* next LWP to access heavily */
323 #define	MADV_ACCESS_MANY	8	/* many processes to access heavily */
324 #define	MADV_PURGE		9	/* contents will be purged */
325 
326 #endif	/* !defined(_STRICT_POSIX) */
327 
328 #if !defined(_STRICT_POSIX) || defined(_XPG6)
329 /* advice to posix_madvise */
330 /* these values must be kept in sync with the MADV_* values, above */
331 #define	POSIX_MADV_NORMAL	0	/* MADV_NORMAL */
332 #define	POSIX_MADV_RANDOM	1	/* MADV_RANDOM */
333 #define	POSIX_MADV_SEQUENTIAL	2	/* MADV_SEQUENTIAL */
334 #define	POSIX_MADV_WILLNEED	3	/* MADV_WILLNEED */
335 #define	POSIX_MADV_DONTNEED	4	/* MADV_DONTNEED */
336 #endif
337 
338 /* flags to msync, always visible to match the function */
339 #define	MS_OLDSYNC	0x0		/* old value of MS_SYNC */
340 					/* modified for UNIX98 compliance */
341 #define	MS_SYNC		0x4		/* wait for msync */
342 #define	MS_ASYNC	0x1		/* return immediately */
343 #define	MS_INVALIDATE	0x2		/* invalidate caches */
344 
345 #if !defined(_STRICT_POSIX) || (_POSIX_C_SOURCE > 2) || defined(_XPG5)
346 /* flags to mlockall */
347 #define	MCL_CURRENT	0x1		/* lock current mappings */
348 #define	MCL_FUTURE	0x2		/* lock future mappings */
349 #endif	/* !_STRICT_POSIX || _POSIX_C_SOURCE > 2 || _XPG5 */
350 
351 /*
352  * The following flags are older variants used by memcntl that if more generally
353  * visible under more generous rules basically conflict all over the place due
354  * to the use of common words. As such, these retain their original feature
355  * guards, as weird as they may be.
356  */
357 #if	(_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2)
358 #define	SHARED		0x10	/* Use MEMCNTL_SHARED */
359 #define	PRIVATE		0x20	/* Use MEMCNTL_PRIVATE */
360 #define	VALID_ATTR	(PROT_READ|PROT_WRITE|PROT_EXEC|SHARED|PRIVATE)
361 #endif	/* (_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2) */
362 
363 #if !defined(_STRICT_POSIX)
364 /* these flags are used by memcntl */
365 #define	PROC_TEXT		(PROT_EXEC | PROT_READ)
366 #define	PROC_DATA		(PROT_READ | PROT_WRITE | PROT_EXEC)
367 #define	MEMCNTL_SHARED		0x10
368 #define	MENCNTL_PRIVATE		0x20
369 #define	MEMCNTL_VALID_ATTR	(PROT_READ |PROT_WRITE |PROT_EXEC | \
370 				    MEMCNTL_SHARED | MEMCNTL_PRIVATE)
371 
372 /* functions to memcntl */
373 #define	MC_SYNC		1		/* sync with backing store */
374 #define	MC_LOCK		2		/* lock pages in memory */
375 #define	MC_UNLOCK	3		/* unlock pages from memory */
376 #define	MC_ADVISE	4		/* give advice to management */
377 #define	MC_LOCKAS	5		/* lock address space in memory */
378 #define	MC_UNLOCKAS	6		/* unlock address space from memory */
379 #define	MC_HAT_ADVISE	7		/* advise hat map size */
380 #define	MC_INHERIT_ZERO	8		/* zero out regions on fork() */
381 
382 /* sub-commands for MC_HAT_ADVISE */
383 #define	MHA_MAPSIZE_VA		0x1	/* set preferred page size */
384 #define	MHA_MAPSIZE_BSSBRK	0x2	/* set preferred page size */
385 					/* for last bss adjacent to */
386 					/* brk area and brk area itself */
387 #define	MHA_MAPSIZE_STACK	0x4	/* set preferred page size */
388 					/* processes main stack */
389 /* definitions for meminfosys syscall */
390 #define	MISYS_MEMINFO		0x0
391 
392 #if !defined(_ASM)
393 
394 #if defined(_INT64_TYPE)
395 /* private structure for meminfo */
396 typedef struct meminfo {
397 	const uint64_t *mi_inaddr;	/* array of input addresses */
398 	const uint_t *mi_info_req;	/* array of types of info requested */
399 	uint64_t *mi_outdata;		/* array of results are placed */
400 	uint_t *mi_validity;		/* array of bitwise result codes */
401 	int mi_info_count;		/* number of pieces of info requested */
402 } meminfo_t;
403 #endif /* defined(_INT64_TYPE) */
404 
405 #if defined(_SYSCALL32)
406 typedef struct meminfo32 {
407 	caddr32_t mi_inaddr;	/* array of input addresses */
408 	caddr32_t mi_info_req;	/* array of types of information requested */
409 	caddr32_t mi_outdata;	/* array of results are placed */
410 	caddr32_t mi_validity;	/* array of bitwise result codes */
411 	int32_t mi_info_count;	/* number of pieces of information requested */
412 } meminfo32_t;
413 #endif /* defined(_SYSCALL32) */
414 
415 #endif /* !defined(_ASM) */
416 
417 /*
418  * info_req request type definitions for meminfo
419  * request types starting with MEMINFO_V are used for Virtual addresses
420  * and should not be mixed with MEMINFO_PLGRP which is targeted for Physical
421  * addresses
422  */
423 #define	MEMINFO_SHIFT		16
424 #define	MEMINFO_MASK		(0xFF << MEMINFO_SHIFT)
425 #define	MEMINFO_VPHYSICAL	(0x01 << MEMINFO_SHIFT)	/* get physical addr */
426 #define	MEMINFO_VLGRP		(0x02 << MEMINFO_SHIFT) /* get lgroup */
427 #define	MEMINFO_VPAGESIZE	(0x03 << MEMINFO_SHIFT) /* size of phys page */
428 #define	MEMINFO_VREPLCNT	(0x04 << MEMINFO_SHIFT) /* no. of replica */
429 #define	MEMINFO_VREPL		(0x05 << MEMINFO_SHIFT) /* physical replica */
430 #define	MEMINFO_VREPL_LGRP	(0x06 << MEMINFO_SHIFT) /* lgrp of replica */
431 #define	MEMINFO_PLGRP		(0x07 << MEMINFO_SHIFT) /* lgroup for paddr */
432 
433 /* maximum number of addresses meminfo() can process at a time */
434 #define	MAX_MEMINFO_CNT	256
435 
436 /* maximum number of request types */
437 #define	MAX_MEMINFO_REQ	31
438 
439 #endif /* !defined(_STRICT_POSIX) */
440 
441 #ifdef	__cplusplus
442 }
443 #endif
444 
445 #endif	/* _SYS_MMAN_H */
446