1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2001-2003 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#include <stdio.h>
30#include <stdlib.h>
31#include <unistd.h>
32#include <fcntl.h>
33#include <ctype.h>
34#include <string.h>
35#include <signal.h>
36#include <errno.h>
37#include <dirent.h>
38#include <limits.h>
39#include <sys/types.h>
40#include <sys/stat.h>
41#include <sys/mman.h>
42#include <sys/wait.h>
43#include <libproc.h>
44#include <sys/sysmacros.h>
45#include <libgen.h>
46#include <thread.h>
47
48#ifndef TRUE
49#define	TRUE	1
50#endif
51#ifndef FALSE
52#define	FALSE	0
53#endif
54
55static struct	ps_prochandle *Pr;
56static char	*command;
57static volatile int interrupt;
58static int	Fflag;
59static int	cflag = 1;
60
61static void	intr(int);
62static int	setpgsz(struct ps_prochandle *, int, size_t *);
63static int	setpgsz_anon(struct ps_prochandle *, size_t, int);
64static caddr_t	setup_mha(uint_t, size_t, int);
65static size_t	discover_optimal_pagesize(struct ps_prochandle *,
66		uint_t, pid_t);
67static void	usage();
68
69#define	INVPGSZ		3
70
71/* subopt */
72
73static char	*suboptstr[] = {
74	"heap",
75	"stack",
76	"anon",
77	NULL
78};
79
80enum	suboptenum {
81	E_HEAP,
82	E_STACK,
83	E_ANON
84};
85
86static size_t
87atosz(char *optarg)
88{
89	size_t		sz = 0;
90	char		*endptr;
91
92	if (optarg == NULL || optarg[0] == '\0')
93		return (INVPGSZ);
94
95	sz = strtoll(optarg, &endptr, 0);
96
97	switch (*endptr) {
98	case 'T':
99	case 't':
100		sz *= 1024;
101	/*FALLTHRU*/
102	case 'G':
103	case 'g':
104		sz *= 1024;
105	/*FALLTHRU*/
106	case 'M':
107	case 'm':
108		sz *= 1024;
109	/*FALLTHRU*/
110	case 'K':
111	case 'k':
112		sz *= 1024;
113	/*FALLTHRU*/
114	case 'B':
115	case 'b':
116	default:
117		break;
118	}
119	return (sz);
120}
121
122/* pgsz array sufficient for max page sizes */
123
124static size_t	pgsza[8 * sizeof (void *)];
125static int	nelem;
126
127static void
128getpgsz()
129{
130	if ((nelem = getpagesizes(NULL, 0)) == 0) {
131		(void) fprintf(stderr, "%s: cannot determine system page"
132		    " sizes\n", command);
133		exit(125);
134	}
135
136	(void) getpagesizes(pgsza, nelem);
137}
138
139static size_t
140cnvpgsz(char *optarg)
141{
142	size_t		pgsz = atosz(optarg);
143	int		i;
144
145	if (!ISP2(pgsz) || ((pgsz < pgsza[0]) && pgsz != 0)) {
146		pgsz = INVPGSZ;
147	} else {
148		for (i = nelem - 1; i >= 0; i--) {
149			if (pgsz == pgsza[i])
150				break;
151			if (pgsz > pgsza[i]) {
152				pgsz = INVPGSZ;
153				break;
154			}
155		}
156	}
157	if (pgsz == INVPGSZ) {
158		if (optarg != NULL) {
159			(void) fprintf(stderr,
160			    "%s: invalid page size specified (%s)\n",
161			    command, optarg);
162		} else {
163			usage();
164		}
165		exit(125);
166	}
167	return (pgsz);
168}
169
170static void
171usage()
172{
173	(void) fprintf(stderr,
174	    "usage:\t%s -o option[,option] [-F] cmd | -p pid ...\n"
175	    "    (set preferred page size of cmd or each process)\n"
176	    "    -o option[,option]: options are\n"
177	    "         stack=sz\n"
178	    "         heap=sz\n"
179	    "         anon=sz		(sz: valid page size or 0 (zero))\n"
180	    "    -F: force grabbing of the target process(es)\n"
181	    "    cmd: launch command\n"
182	    "    -p pid ...: process id list\n",
183	    command);
184	exit(125);
185}
186
187int
188main(int argc, char *argv[])
189{
190	int		rc, err = 0;
191	int		opt, subopt;
192	int		errflg = 0;
193	char		*options, *value;
194	size_t		pgsz[] = {INVPGSZ, INVPGSZ, INVPGSZ};
195	pid_t		pid;
196	int		status;
197
198	if ((command = strrchr(argv[0], '/')) != NULL)
199		command++;
200	else
201		command = argv[0];
202
203	getpgsz();
204
205	/* options */
206	while ((opt = getopt(argc, argv, "o:Fp")) != EOF) {
207		switch (opt) {
208		case 'o':		/* options */
209			options = optarg;
210			while (*options != '\0') {
211				subopt = getsubopt(&options, suboptstr, &value);
212				switch (subopt) {
213				case E_HEAP:
214				case E_STACK:
215				case E_ANON:
216					pgsz[subopt] = cnvpgsz(value);
217					break;
218				default:
219					errflg = 1;
220					break;
221				}
222			}
223			break;
224		case 'F':		/* force grabbing (no O_EXCL) */
225			Fflag = PGRAB_FORCE;
226			break;
227		case 'p':
228			cflag = 0;
229			break;
230		default:
231			errflg = 1;
232			break;
233		}
234	}
235
236	argc -= optind;
237	argv += optind;
238
239	if ((pgsz[E_HEAP] == INVPGSZ && pgsz[E_STACK] == INVPGSZ &&
240	    pgsz[E_ANON] == INVPGSZ) || errflg || argc <= 0) {
241		usage();
242	}
243
244	/* catch signals from terminal */
245	if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
246		(void) sigset(SIGHUP, intr);
247	if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
248		(void) sigset(SIGINT, intr);
249	if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
250		(void) sigset(SIGQUIT, intr);
251	(void) sigset(SIGTERM, intr);
252
253	if (cflag && !interrupt) {		/* command */
254		int		err;
255		char		path[PATH_MAX];
256
257		Pr = Pcreate(argv[0], &argv[0], &err, path, sizeof (path));
258		if (Pr == NULL) {
259			switch (err) {
260			case C_PERM:
261				(void) fprintf(stderr,
262				    "%s: cannot control set-id or "
263				    "unreadable object file: %s\n",
264				    command, path);
265				break;
266			case C_LP64:
267				(void) fprintf(stderr,
268				    "%s: cannot control _LP64 "
269				    "program: %s\n", command, path);
270				break;
271			case C_NOEXEC:
272				(void) fprintf(stderr, "%s: cannot execute "
273				    "program: %s\n", command, argv[0]);
274				exit(126);
275				break;
276			case C_NOENT:
277				(void) fprintf(stderr, "%s: cannot find "
278				    "program: %s\n", command, argv[0]);
279				exit(127);
280				break;
281			case C_STRANGE:
282				break;
283			default:
284				(void) fprintf(stderr,
285				    "%s: %s\n", command, Pcreate_error(err));
286				break;
287			}
288			exit(125);
289		}
290
291		if ((rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz)) != 0) {
292			(void) fprintf(stderr, "%s: set page size "
293			    "failed for program: %s\n", command, argv[0]);
294			(void) pr_exit(Pr, 1);
295			exit(125);
296		}
297
298		/*
299		 * release the command to run, wait for it and
300		 * return it's exit status if we can.
301		 */
302		Prelease(Pr, 0);
303		do {
304			pid = wait(&status);
305		} while (pid == -1 && errno == EINTR);
306
307		if (pid == -1) {
308			(void) fprintf(stderr, "%s: wait() error: %s\n",
309			    command, strerror(errno));
310			exit(125);
311		}
312
313		/*
314		 * Pass thru the child's exit value.
315		 */
316		if (WIFEXITED(status))
317			exit(WEXITSTATUS(status));
318		exit(status | WCOREFLG);
319	}
320
321	/* process pids */
322
323	while (--argc >= 0 && !interrupt) {
324		char *arg;
325		psinfo_t psinfo;
326		int gret;
327
328		(void) fflush(stdout);	/* line-at-a-time */
329
330		/* get the specified pid and the psinfo struct */
331		arg = *argv++;
332		pid = proc_arg_psinfo(arg, PR_ARG_PIDS, &psinfo, &gret);
333
334		if (pid == -1) {
335			(void) fprintf(stderr, "%s: cannot examine pid %s:"
336			    " %s\n", command, arg, Pgrab_error(gret));
337			if (!isdigit(arg[0]) && strncmp(arg, "/proc/", 6)) {
338				(void) fprintf(stderr,
339				    "\tdo not use -p option"
340				    " to launch a command\n");
341			}
342			err++;
343		} else if ((Pr = Pgrab(pid, Fflag, &gret)) != NULL) {
344			rc = setpgsz(Pr, Pstatus(Pr)->pr_dmodel, pgsz);
345			if (rc != 0) {
346				(void) fprintf(stderr, "%s: set page size "
347				    "failed for pid: %d\n", command, (int)pid);
348				err++;
349			}
350			Prelease(Pr, 0);
351			Pr = NULL;
352		} else {
353			switch (gret) {
354			case G_SYS:
355				proc_unctrl_psinfo(&psinfo);
356				(void) fprintf(stderr, "%s: cannot set page "
357				    "size for system process: %d [ %s ]\n",
358				    command, (int)pid, psinfo.pr_psargs);
359				err++;
360				break;
361			case G_SELF:
362				/* do it to own self */
363				rc = setpgsz(NULL, psinfo.pr_dmodel, pgsz);
364				if (rc != 0) {
365					(void) fprintf(stderr, "%s: set page"
366					    "size failed for self: %d\n",
367					    command, (int)pid);
368					err++;
369				}
370				break;
371			default:
372				(void) fprintf(stderr, "%s: %s: %d\n",
373				    command, Pgrab_error(gret), (int)pid);
374				err++;
375				break;
376			}
377		}
378	}
379
380	if (interrupt || err)
381		exit(125);
382
383	return (0);
384}
385
386/* ARGSUSED */
387static void
388intr(int sig)
389{
390	interrupt = 1;
391}
392
393/* ------ begin specific code ------ */
394
395/* set process page size */
396/*ARGSUSED*/
397static int
398setpgsz(struct	ps_prochandle *Pr, int dmodel, size_t pgsz[])
399{
400	int			rc;
401	int			err = 0;
402	caddr_t			mpss;
403	int			i;
404	static uint_t	pgszcmd[] =
405	{MHA_MAPSIZE_BSSBRK, MHA_MAPSIZE_STACK, MHA_MAPSIZE_VA};
406
407	for (i = E_HEAP; i <= E_ANON; i++) {
408		if (pgsz[i] == INVPGSZ)
409			continue;
410
411		if (i == E_ANON)
412			rc = setpgsz_anon(Pr, pgsz[i], dmodel);
413		else {
414			mpss = setup_mha(pgszcmd[i], pgsz[i], dmodel);
415			rc = pr_memcntl(Pr, NULL, 0, MC_HAT_ADVISE, mpss, 0, 0);
416		}
417
418		if (rc < 0) {
419			(void) fprintf(stderr, "%s: warning: set %s page size "
420			    "failed (%s) for pid %d\n", command, suboptstr[i],
421			    strerror(errno), (int)Pstatus(Pr)->pr_pid);
422			err++;
423		}
424	}
425	return (err);
426}
427
428
429/*
430 * Walk through the process' address space segments.  Set all anonymous
431 * segments to the new page size.
432 */
433static int
434setpgsz_anon(struct ps_prochandle *Pr, size_t pgsz, int dmodel)
435{
436	caddr_t		mpss;
437	prmap_t		map;
438	uintptr_t	addr;
439	size_t		size;
440	const psinfo_t	*psinfo;
441	const pstatus_t	*pstatus;
442	int		fd;
443	int		rc;
444	char		path[PATH_MAX];
445
446	/*
447	 * Setting the page size for anonymous segments on a process before it
448	 * has run will have no effect, since it has not configured anonymous
449	 * memory and the page size setting is not "sticky" inside the kernel.
450	 * Any anonymous memory subsequently mapped will have the default page
451	 * size.
452	 */
453	if (cflag)
454		return (0);
455
456	if ((psinfo = Ppsinfo(Pr)) == NULL)
457		return (-1);
458	if ((pstatus = Pstatus(Pr)) == NULL)
459		return (-1);
460
461	if (pgsz == 0)
462		pgsz = discover_optimal_pagesize(Pr, dmodel, psinfo->pr_pid);
463
464	mpss = setup_mha(MHA_MAPSIZE_VA, pgsz, dmodel);
465
466	(void) snprintf(path, PATH_MAX, "/proc/%d/map", (int)psinfo->pr_pid);
467	if ((fd = open(path, O_RDONLY)) < 0)
468		return (-1);
469
470	while (read(fd, &map, sizeof (map)) == sizeof (map)) {
471		if ((map.pr_mflags & MA_ANON) == 0) {
472			/* Not anon. */
473			continue;
474		} else if (map.pr_mflags & MA_SHARED) {
475			/* Can't change pagesize for shared mappings. */
476			continue;
477		} else if (map.pr_vaddr + map.pr_size >
478		    pstatus->pr_brkbase &&
479		    map.pr_vaddr <
480		    pstatus->pr_brkbase + pstatus->pr_brksize) {
481			/* Heap. */
482			continue;
483		} else if (map.pr_vaddr >= pstatus->pr_stkbase &&
484		    map.pr_vaddr + map.pr_size <=
485		    pstatus->pr_stkbase + pstatus->pr_stksize) {
486			/* Stack. */
487			continue;
488		} else if (map.pr_size < pgsz) {
489			/* Too small. */
490			continue;
491		}
492
493		/*
494		 * Find the first address in the segment that is page-aligned.
495		 */
496		if (pgsz == 0 || ((map.pr_vaddr % pgsz) == 0))
497			addr = map.pr_vaddr;
498		else
499			addr = map.pr_vaddr + (pgsz - (map.pr_vaddr % pgsz));
500
501		/*
502		 * Calculate how many pages will fit in the segment.
503		 */
504		if (pgsz == 0)
505			size = map.pr_size;
506		else
507			size = map.pr_size - (addr % map.pr_vaddr) -
508			    ((map.pr_vaddr + map.pr_size) % pgsz);
509
510		/*
511		 * If no aligned pages fit in the segment, ignore it.
512		 */
513		if (size < pgsz) {
514			continue;
515		}
516
517		rc = pr_memcntl(Pr, (caddr_t)addr, size,
518		    MC_HAT_ADVISE, mpss, 0, 0);
519
520		/*
521		 * If an error occurs on any segment, report the error here and
522		 * then go on to try setting the page size for the remaining
523		 * segments.
524		 */
525		if (rc < 0) {
526			(void) fprintf(stderr, "%s: warning: set page size "
527			    "failed (%s) for pid %d for anon segment at "
528			    "address: %p\n", command, strerror(errno),
529			    (int)psinfo->pr_pid, (void *)map.pr_vaddr);
530		}
531	}
532
533	(void) close(fd);
534	return (0);
535}
536
537/*
538 * Discover the optimal page size for the process.
539 * Do this by creating a 4M segment in the target process, set its pagesize
540 * to 0, and read the map file to discover the page size selected by the system.
541 */
542static size_t
543discover_optimal_pagesize(struct ps_prochandle *Pr, uint_t dmodel, pid_t pid)
544{
545	size_t			size = 0;
546	size_t			len = pgsza[nelem - 1];
547	prxmap_t		xmap;
548	caddr_t			mha;
549	void			*addr;
550	int			fd = -1;
551	char			path[PATH_MAX];
552
553	(void) snprintf(path, PATH_MAX, "/proc/%d/xmap", (int)pid);
554	if ((fd = open(path, O_RDONLY)) < 0)
555		return (size);
556
557	if ((addr = pr_mmap(Pr, (void *)len, len, PROT_READ | PROT_WRITE,
558	    MAP_PRIVATE | MAP_ANON | MAP_ALIGN, -1, 0)) == MAP_FAILED) {
559		goto err;
560	}
561
562	mha = setup_mha(MHA_MAPSIZE_VA, 0, dmodel);
563	if (pr_memcntl(Pr, addr, len, MC_HAT_ADVISE, mha, 0, 0) < 0) {
564		goto err;
565	}
566
567	/*
568	 * Touch a page in the segment so the hat mapping gets created.
569	 */
570	(void) Pwrite(Pr, &len, sizeof (len), (uintptr_t)addr);
571
572	/*
573	 * Read through the address map looking for our segment.
574	 */
575
576	while (read(fd, &xmap, sizeof (xmap)) == sizeof (xmap)) {
577		if (xmap.pr_vaddr == (uintptr_t)addr)
578			break;
579	}
580	if (xmap.pr_vaddr != (uintptr_t)addr)
581		goto err;
582
583	size = xmap.pr_hatpagesize;
584
585err:
586	if (addr != MAP_FAILED) {
587		if (pr_munmap(Pr, addr, len) == -1) {
588			(void) fprintf(stderr,
589			    "%s: couldn't delete segment at %p\n",
590			    command, addr);
591		}
592	}
593	if (fd != -1)
594		(void) close(fd);
595
596	return (size);
597}
598
599static struct memcntl_mha	gmha;
600#ifdef _LP64
601static struct memcntl_mha32	gmha32;
602#endif
603
604static caddr_t
605/* ARGSUSED */
606setup_mha(uint_t command, size_t pagesize, int dmodel)
607{
608#ifdef _LP64
609	if (dmodel == PR_MODEL_ILP32) {
610		gmha32.mha_cmd = command;
611		gmha32.mha_flags = 0;
612		gmha32.mha_pagesize = pagesize;
613		return ((caddr_t)&gmha32);
614	}
615#endif
616	gmha.mha_cmd = command;
617	gmha.mha_flags = 0;
618	gmha.mha_pagesize = pagesize;
619	return ((caddr_t)&gmha);
620}
621