1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2015, Joyent, Inc. All rights reserved.
29  */
30 
31 /*
32  * pmadvise
33  *
34  * ptool wrapper for madvise(3C) to apply memory advice to running processes
35  *
36  * usage:	pmadvise -o option[,option] [-v] [-F] pid ...
37  *  (Give "advice" about a process's memory)
38  *  -o option[,option]: options are
39  *      private=<advice>
40  *      shared=<advice>
41  *      heap=<advice>
42  *      stack=<advice>
43  *      <segaddr>[:<length>]=<advice>
44  *     valid <advice> is one of:
45  *      normal, random, sequential, willneed, dontneed,
46  *      free, access_lwp, access_many, access_default
47  *  -v: verbose output
48  *  -F: force grabbing of the target process(es)
49  *  -l: show unresolved dynamic linker map names
50  *  pid: process id list
51  *
52  *
53  * Advice passed to this tool are organized into various lists described here:
54  *  rawadv_list: includes all specific advice from command line (specific
55  *               advice being those given to a particular address range rather
56  *               than a type like "heap" or "stack".  In contrast, these
57  *               types are referred to as generic advice). Duplicates allowed.
58  *               List ordered by addr, then by size (largest size first).
59  *               Created once per run.
60  *  merged_list: includes all specific advice from the rawadv_list as well as
61  *               all generic advice.  This must be recreated for each process
62  *               as the generic advice will apply to different regions for
63  *               different processes. Duplicates allowed. List ordered by addr,
64  *               then by size (largest size first). Created once per pid.
65  *  chopped_list: used for verbose output only. This list parses the merged
66  *                list such that it eliminates any overlap and combines the
67  *                advice. Easiest to think of this visually: if you take all
68  *                the advice in the merged list and lay them down on a memory
69  *                range of the entire process (laying on top of each other when
70  *                necessary), then flatten them into one layer, combining advice
71  *                in the case of overlap, you get the chopped_list of advice.
72  *                Duplicate entries not allowed (since there is no overlap by
73  *                definition in this list).  List ordered by addr. Created once
74  *                per pid.
75  *
76  *                Example:
77  *                   merged_list:   |-----adv1----|---------adv3---------|
78  *                                       |--adv2--|--adv4--|-----adv5----|
79  *                                                  ||
80  *                                                  \/
81  *                   chopped_list:  |adv1|-adv1,2-|-adv3,4-|----adv3,5---|
82  *
83  *  maplist: list of memory mappings for a particular process. Used to create
84  *           generic advice entries for merged_list and for pmap like verbose
85  *           output. Created once per pid.
86  *
87  * Multiple lists are necessary because the actual advice applied given a set
88  * of generic and specific advice changes from process to process, so for each
89  * pid pmadvise is passed, it must create a new merged_list from which to apply
90  * advice (and a new chopped_list if verbose output is requested).
91  *
92  * Pseudo-code:
93  * I.	Input advice from command line
94  * II.	Create [raw advice list] of specific advice
95  * III.	Iterate through PIDs:
96  *	A.	Create [map list]
97  *	B.	Merge generic advice and [raw advice list] into [merged list]
98  *	C.	Apply advice from [merged list]; upon error:
99  *		i.	output madvise error message
100  *		ii.	remove element from [merged list]
101  *	D.	If verbose output:
102  *		i.	Create [chopped list] from [merged list]
103  *		ii.	Iterate through [map list]:
104  *			a.	output advice as given by [merged list]
105  *		iii.	Delete [chopped list]
106  *	E.	Delete [merged list]
107  *	F.	Delete [map list]
108  */
109 
110 #include <stdio.h>
111 #include <stdlib.h>
112 #include <unistd.h>
113 #include <ctype.h>
114 #include <fcntl.h>
115 #include <string.h>
116 #include <dirent.h>
117 #include <limits.h>
118 #include <link.h>
119 #include <libelf.h>
120 #include <locale.h>
121 #include <sys/types.h>
122 #include <sys/mman.h>
123 #include <sys/stat.h>
124 #include <sys/mkdev.h>
125 #include <assert.h>
126 #include <libproc.h>
127 #include <libgen.h>
128 #include <signal.h>
129 
130 #include "pmap_common.h"
131 
132 #ifndef	TEXT_DOMAIN			/* should be defined by cc -D */
133 #define	TEXT_DOMAIN	"SYS_TEST"	/* use this only if it wasn't */
134 #endif
135 
136 #define	KILOBYTE	1024
137 
138 /*
139  * Round up the value to the nearest kilobyte
140  */
141 #define	ROUNDUP_KB(x)	(((x) + (KILOBYTE - 1)) / KILOBYTE)
142 
143 #define	NO_ADVICE		0
144 
145 /*
146  * The following definitions are used as the third argument in insert_addr()
147  *   NODUPS = no duplicates are not allowed, thus if the addr being inserted
148  *   already exists in the list, return without inserting again.
149  *
150  *   YESDUPS = yes duplicates are allowed, thus always insert the addr
151  *   regardless of whether it already exists in the list or not.
152  */
153 #define	NODUPS	1
154 #define	YESDUPS	0
155 
156 /*
157  * Advice that can be passed to madvise fit into three groups that each
158  * contain 3 mutually exclusive options.  These groups are defined below:
159  *   Group 1: normal, random, sequential
160  *   Group 2: willneed, dontneed, free, purge
161  *   Group 3: default, accesslwp, accessmany
162  * Thus, advice that includes (at most) one from each group is valid.
163  *
164  * The following #define's are used as masks to determine which group(s) a
165  * particular advice fall under.
166  */
167 
168 #define	GRP1_ADV	(1 << MADV_NORMAL | 1 << MADV_RANDOM | \
169 			1 << MADV_SEQUENTIAL)
170 #define	GRP2_ADV	(1 << MADV_WILLNEED | 1 << MADV_DONTNEED | \
171 			1 << MADV_FREE | 1 << MADV_PURGE)
172 #define	GRP3_ADV	(1 << MADV_ACCESS_DEFAULT | 1 << MADV_ACCESS_LWP | \
173 			1 << MADV_ACCESS_MANY)
174 
175 static	int	create_maplist(void *, const prmap_t *, const char *);
176 static	int	pr_madvise(struct ps_prochandle *, caddr_t, size_t, int);
177 
178 static	char	*mflags(uint_t);
179 static	char	*advtostr(int);
180 
181 static	int	lflag = 0;
182 
183 static	int	addr_width, size_width;
184 static	char	*progname;
185 static	struct ps_prochandle *Pr;
186 
187 static	lwpstack_t *stacks;
188 static	uint_t	nstacks;
189 
190 static char	*suboptstr[] = {
191 	"private",
192 	"shared",
193 	"heap",
194 	"stack",
195 	NULL
196 };
197 
198 
199 int	generic_adv[] = {NO_ADVICE, NO_ADVICE, NO_ADVICE, NO_ADVICE};
200 int	at_map = 0;
201 
202 typedef struct saddr_struct {
203 	uintptr_t	addr;
204 	size_t		length;
205 	int		adv;
206 	struct saddr_struct	*next;
207 } saddr_t;
208 static int	apply_advice(saddr_t **);
209 static void	set_advice(int *, int);
210 static void	create_choplist(saddr_t **, saddr_t *);
211 
212 /*
213  * The segment address advice from the command line
214  */
215 saddr_t	*rawadv_list = NULL;
216 /*
217  * The rawadv_list + list entries for the generic advice (if any).
218  * This must be recreated for each PID as the memory maps might be different.
219  */
220 saddr_t *merged_list = NULL;
221 /*
222  * The merged_list cut up so as to remove all overlap
223  * e.g. if merged_list contained two entries:
224  *
225  * [0x38000:0x3e000) = adv1
226  * [0x3a000:0x3c000) = adv2
227  *
228  * the chopped list will contain three entries:
229  *
230  * [0x38000:0x3a000) = adv1
231  * [0x3a000:0x3c000) = adv1,adv2
232  * [0x3c000:0x3e000) = adv1
233  *
234  */
235 saddr_t *chopped_list = NULL;
236 
237 typedef struct mapnode_struct {
238 	prmap_t			*pmp;
239 	char			label[PATH_MAX];
240 	int			mtypes;
241 	struct mapnode_struct	*next;
242 } mapnode_t;
243 
244 mapnode_t *maplist_head = NULL;
245 mapnode_t *maplist_tail = NULL;
246 static void	print_advice(saddr_t *, mapnode_t *);
247 
248 int	opt_verbose;
249 
250 static char	*advicestr[] = {
251 	"normal",
252 	"random",
253 	"sequential",
254 	"willneed",
255 	"dontneed",
256 	"free",
257 	"access_default",
258 	"access_lwp",
259 	"access_many"
260 };
261 
262 /*
263  * How many signals caught from terminal
264  * We bail out as soon as possible when interrupt is set
265  */
266 static int	interrupt = 0;
267 
268 /*
269  * Interrupt handler
270  */
271 static void	intr(int);
272 
273 /*
274  * Iterative function passed to Plwp_iter to
275  * get alt and main stacks for given lwp.
276  */
277 static int
getstack(void * data,const lwpstatus_t * lsp)278 getstack(void *data, const lwpstatus_t *lsp)
279 {
280 	int *np = (int *)data;
281 
282 	if (Plwp_alt_stack(Pr, lsp->pr_lwpid, &stacks[*np].lwps_stack) == 0) {
283 		stacks[*np].lwps_stack.ss_flags |= SS_ONSTACK;
284 		stacks[*np].lwps_lwpid = lsp->pr_lwpid;
285 		(*np)++;
286 	}
287 
288 	if (Plwp_main_stack(Pr, lsp->pr_lwpid, &stacks[*np].lwps_stack) == 0) {
289 		stacks[*np].lwps_lwpid = lsp->pr_lwpid;
290 		(*np)++;
291 	}
292 
293 	return (0);
294 }
295 
296 /*
297  * Prints usage and exits
298  */
299 static void
usage()300 usage()
301 {
302 	(void) fprintf(stderr,
303 	    gettext("usage:\t%s [-o option[,option]] [-Flv] pid ...\n"),
304 	    progname);
305 	(void) fprintf(stderr,
306 	    gettext("    (Give \"advice\" about a process's memory)\n"
307 	    "    -o option[,option]: options are\n"
308 	    "        private=<advice>\n"
309 	    "        shared=<advice>\n"
310 	    "        heap=<advice>\n"
311 	    "        stack=<advice>\n"
312 	    "        <segaddr>[:<length>]=<advice>\n"
313 	    "       valid <advice> is one of:\n"
314 	    "        normal, random, sequential, willneed, dontneed,\n"
315 	    "        free, access_lwp, access_many, access_default\n"
316 	    "    -v: verbose output\n"
317 	    "    -F: force grabbing of the target process(es)\n"
318 	    "    -l: show unresolved dynamic linker map names\n"
319 	    "    pid: process id list\n"));
320 	exit(2);
321 }
322 
323 /*
324  * Function to parse advice from options string
325  */
326 static int
get_advice(char * optarg)327 get_advice(char *optarg)
328 {
329 	/*
330 	 * Determine which advice is given, we use shifted values as
331 	 * multiple pieces of advice may apply for a particular region.
332 	 * (See comment above regarding GRP[1,2,3]_ADV definitions for
333 	 * breakdown of advice groups).
334 	 */
335 	if (strcmp(optarg, "access_default") == 0)
336 		return (1 << MADV_ACCESS_DEFAULT);
337 	else if (strcmp(optarg, "access_many") == 0)
338 		return (1 << MADV_ACCESS_MANY);
339 	else if (strcmp(optarg, "access_lwp") == 0)
340 		return (1 << MADV_ACCESS_LWP);
341 	else if (strcmp(optarg, "sequential") == 0)
342 		return (1 << MADV_SEQUENTIAL);
343 	else if (strcmp(optarg, "willneed") == 0)
344 		return (1 << MADV_WILLNEED);
345 	else if (strcmp(optarg, "dontneed") == 0)
346 		return (1 << MADV_DONTNEED);
347 	else if (strcmp(optarg, "random") == 0)
348 		return (1 << MADV_RANDOM);
349 	else if (strcmp(optarg, "normal") == 0)
350 		return (1 << MADV_NORMAL);
351 	else if (strcmp(optarg, "free") == 0)
352 		return (1 << MADV_FREE);
353 	else if (strcmp(optarg, "purge") == 0)
354 		return (1 << MADV_PURGE);
355 	else {
356 		(void) fprintf(stderr, gettext("%s: invalid advice: %s\n"),
357 		    progname, optarg);
358 		usage();
359 		return (-1);
360 	}
361 }
362 
363 /*
364  * Function to convert character size indicators into actual size
365  * (i.e., 123M => sz = 123 * 1024 * 1024)
366  */
367 static size_t
atosz(char * optarg,char ** endptr)368 atosz(char *optarg, char **endptr)
369 {
370 	size_t	sz = 0;
371 
372 	if (optarg == NULL || optarg[0] == '\0')
373 		return (0);
374 
375 	sz = strtoll(optarg, endptr, 0);
376 
377 	switch (**endptr) {
378 	case 'E':
379 	case 'e':
380 		sz *= KILOBYTE;
381 		/* FALLTHRU */
382 	case 'P':
383 	case 'p':
384 		sz *= KILOBYTE;
385 		/* FALLTHRU */
386 	case 'T':
387 	case 't':
388 		sz *= KILOBYTE;
389 		/* FALLTHRU */
390 	case 'G':
391 	case 'g':
392 		sz *= KILOBYTE;
393 		/* FALLTHRU */
394 	case 'M':
395 	case 'm':
396 		sz *= KILOBYTE;
397 		/* FALLTHRU */
398 	case 'K':
399 	case 'k':
400 		sz *= KILOBYTE;
401 		/* FALLTHRU */
402 	case 'B':
403 	case 'b':
404 		(*endptr)++;
405 		/* FALLTHRU */
406 	default:
407 		break;
408 	}
409 	return (sz);
410 }
411 
412 /*
413  * Inserts newaddr into list.  dups indicates whether we allow duplicate
414  * addr entries in the list (valid values are NODUPS and YESDUPS).
415  */
416 static void
insert_addr(saddr_t ** list,saddr_t * newaddr,int dups)417 insert_addr(saddr_t **list, saddr_t *newaddr, int dups)
418 {
419 	saddr_t *prev = *list;
420 	saddr_t *psaddr;
421 
422 	if (*list == NULL) {
423 		newaddr->next = *list;
424 		*list = newaddr;
425 		return;
426 	}
427 
428 	for (psaddr = (*list)->next; psaddr != NULL; psaddr = psaddr->next) {
429 		if ((dups == NODUPS) && (psaddr->addr == newaddr->addr)) {
430 			free(newaddr);
431 			return;
432 		}
433 
434 		/*
435 		 * primary level of comparison is by address; smaller addr 1st
436 		 * secondary level of comparison is by length; bigger length 1st
437 		 */
438 		if ((psaddr->addr > newaddr->addr) ||
439 		    (psaddr->addr == newaddr->addr &&
440 		    psaddr->length < newaddr->length))
441 			break;
442 
443 		prev = psaddr;
444 	}
445 
446 	prev->next = newaddr;
447 	newaddr->next = psaddr;
448 }
449 
450 /*
451  * Deletes given element from list
452  */
453 static void
delete_addr(saddr_t ** list,saddr_t * delme)454 delete_addr(saddr_t **list, saddr_t *delme)
455 {
456 	saddr_t	*prev = *list;
457 
458 	if (delme == *list) {
459 		*list = delme->next;
460 		free(delme);
461 		return;
462 	}
463 
464 	while (prev != NULL && prev->next != delme) {
465 		prev = prev->next;
466 	}
467 
468 	if (prev) {
469 		prev->next = delme->next;
470 		free(delme);
471 	}
472 }
473 
474 /*
475  * Delete entire list
476  */
477 static void
delete_list(saddr_t ** list)478 delete_list(saddr_t **list)
479 {
480 	saddr_t *psaddr = *list;
481 
482 	while (psaddr != NULL) {
483 		saddr_t *temp = psaddr;
484 
485 		psaddr = psaddr->next;
486 		free(temp);
487 	}
488 	*list = NULL;
489 }
490 
491 static saddr_t *
parse_suboptions(char * value)492 parse_suboptions(char *value)
493 {
494 	char	*endptr;
495 	saddr_t *psaddr = malloc(sizeof (saddr_t));
496 
497 	/*
498 	 * This must (better) be a segment addr
499 	 */
500 	psaddr->addr =
501 	    strtoull(value, &endptr, 16);
502 
503 	/*
504 	 * Check to make sure strtoul worked correctly (a properly formatted
505 	 * string will terminate in a ':' (if size is given) or an '=' (if size
506 	 * is not specified). Also check to make sure a 0 addr wasn't returned
507 	 * indicating strtoll was unable to convert).
508 	 */
509 	if ((psaddr->addr == 0) || (*endptr != ':' && *endptr != '=')) {
510 		free(psaddr);
511 		(void) fprintf(stderr,
512 		    gettext("%s: invalid option %s\n"),
513 		    progname, value);
514 		usage();
515 	} else {
516 		/* init other fields */
517 		psaddr->length = 0;
518 		psaddr->adv = NO_ADVICE;
519 		psaddr->next = NULL;
520 
521 		/* skip past address */
522 		value = endptr;
523 
524 		/* check for length */
525 		if (*value == ':') {
526 			/* skip the ":" */
527 			value++;
528 			psaddr->length = atosz(value, &endptr);
529 		}
530 
531 		if (*endptr != '=') {
532 			(void) fprintf(stderr,
533 			    gettext("%s: invalid option %s\n"),
534 			    progname, value);
535 			/*
536 			 * if improperly formatted, free mem, print usage, and
537 			 * exit Note: usage ends with a call to exit()
538 			 */
539 			free(psaddr);
540 			usage();
541 		}
542 		/* skip the "=" */
543 		value = endptr + 1;
544 		at_map |= (1 << AT_SEG);
545 		psaddr->adv =
546 		    get_advice(value);
547 	}
548 
549 	return (psaddr);
550 }
551 
552 /*
553  * Create linked list of mappings for current process
554  * In addition, add generic advice and raw advice
555  * entries to merged_list.
556  */
557 /* ARGSUSED */
558 static int
create_maplist(void * arg,const prmap_t * pmp,const char * object_name)559 create_maplist(void *arg, const prmap_t *pmp, const char *object_name)
560 {
561 	const pstatus_t *Psp = Pstatus(Pr);
562 	mapnode_t *newmap = malloc(sizeof (mapnode_t));
563 	saddr_t	*newaddr;
564 	saddr_t	*psaddr;
565 	char	*lname = NULL;
566 	int	i;
567 
568 	if (interrupt)
569 		return (0);
570 
571 	newmap->pmp = malloc(sizeof (prmap_t));
572 	newmap->label[0] = '\0';
573 	newmap->mtypes = 0;
574 	newmap->next = NULL;
575 	(void) memcpy(newmap->pmp, pmp, sizeof (prmap_t));
576 
577 	/*
578 	 * If the mapping is not anon or not part of the heap, make a name
579 	 * for it.  We don't want to report the heap as a.out's data.
580 	 */
581 	if (!(pmp->pr_mflags & MA_ANON) ||
582 	    (pmp->pr_vaddr + pmp->pr_size <= Psp->pr_brkbase ||
583 	    pmp->pr_vaddr >= Psp->pr_brkbase + Psp->pr_brksize)) {
584 		lname = make_name(Pr, lflag, pmp->pr_vaddr, pmp->pr_mapname,
585 		    newmap->label, sizeof (newmap->label));
586 		if (pmp->pr_mflags & MA_SHARED)
587 			newmap->mtypes |= 1 << AT_SHARED;
588 		else
589 			newmap->mtypes |= 1 << AT_PRIVM;
590 	}
591 
592 	if (lname == NULL && (pmp->pr_mflags & MA_ANON)) {
593 		lname = anon_name(newmap->label, Psp, stacks, nstacks,
594 		    pmp->pr_vaddr, pmp->pr_size, pmp->pr_mflags, pmp->pr_shmid,
595 		    &newmap->mtypes);
596 	}
597 
598 	/*
599 	 * Add raw advice that applies to this mapping to the merged_list
600 	 */
601 	psaddr = rawadv_list;
602 	/*
603 	 * Advance to point in rawadv_list that applies to this mapping
604 	 */
605 	while (psaddr && psaddr->addr < pmp->pr_vaddr)
606 		psaddr = psaddr->next;
607 	/*
608 	 * Copy over to merged_list, check to see if size needs to be filled in
609 	 */
610 	while (psaddr && psaddr->addr < (pmp->pr_vaddr + pmp->pr_size)) {
611 		newaddr = malloc(sizeof (saddr_t));
612 		(void) memcpy(newaddr, psaddr, sizeof (saddr_t));
613 		insert_addr(&merged_list, newaddr, YESDUPS);
614 		/*
615 		 * For raw advice that is given without size, try to default
616 		 * size to size of mapping (only allowed if raw adv addr is
617 		 * equal to beginning of mapping). Don't change the entry
618 		 * in rawadv_list, only in the merged_list as the mappings
619 		 * (and thus the default sizes) will be different for
620 		 * different processes.
621 		 */
622 		if ((pmp->pr_vaddr == psaddr->addr) && (psaddr->length == 0))
623 			newaddr->length = pmp->pr_size;
624 		psaddr = psaddr->next;
625 	}
626 
627 	/*
628 	 * Put mapping into merged list with no advice, then
629 	 * check to see if any generic advice applies.
630 	 */
631 	newaddr = malloc(sizeof (saddr_t));
632 	newaddr->addr = pmp->pr_vaddr;
633 	newaddr->length = pmp->pr_size;
634 	newaddr->adv = NO_ADVICE;
635 	insert_addr(&merged_list, newaddr, YESDUPS);
636 
637 	newmap->mtypes &= at_map;
638 	for (i = AT_STACK; i >= AT_PRIVM; i--) {
639 		if (newmap->mtypes & (1 << i)) {
640 			assert(generic_adv[i] != NO_ADVICE);
641 			newaddr->adv = generic_adv[i];
642 			break;
643 		}
644 	}
645 
646 	/*
647 	 * Add to linked list of mappings
648 	 */
649 	if (maplist_tail == NULL) {
650 		maplist_head = maplist_tail = newmap;
651 	} else {
652 		maplist_tail->next = newmap;
653 		maplist_tail = newmap;
654 	}
655 
656 
657 	return (0);
658 }
659 
660 /*
661  * Traverse advice list and apply all applicable advice to each region
662  */
663 static int
apply_advice(saddr_t ** advicelist)664 apply_advice(saddr_t **advicelist)
665 {
666 	saddr_t	*psaddr = *advicelist;
667 	saddr_t	*next;
668 	int	i;
669 
670 
671 	while (!interrupt && psaddr != NULL) {
672 		/*
673 		 * Save next pointer since element may be removed before
674 		 * we get a chance to advance psaddr.
675 		 */
676 		next = psaddr->next;
677 
678 		/*
679 		 * Since mappings have been added to the merged list
680 		 * even if no generic advice was given for the map,
681 		 * check to make sure advice exists before bothering
682 		 * with the for loop.
683 		 */
684 		if (psaddr->adv != NO_ADVICE) {
685 			for (i = MADV_NORMAL; i <= MADV_PURGE; i++) {
686 				if ((psaddr->adv & (1 << i)) &&
687 				    (pr_madvise(Pr, (caddr_t)psaddr->addr,
688 				    psaddr->length, i) < 0)) {
689 					/*
690 					 * madvise(3C) call failed trying to
691 					 * apply advice output error and remove
692 					 * from advice list
693 					 */
694 					(void) fprintf(stderr,
695 					    gettext("Error applying "
696 					    "advice (%s) to memory range "
697 					    "[%lx, %lx):\n"),
698 					    advicestr[i], (ulong_t)psaddr->addr,
699 					    (ulong_t)psaddr->addr +
700 					    psaddr->length);
701 					perror("madvise");
702 					/*
703 					 * Clear this advice from the advice
704 					 * mask. If no more advice is given
705 					 * for this element, remove element
706 					 * from list.
707 					 */
708 					psaddr->adv &= ~(1 << i);
709 					if (psaddr->adv == 0) {
710 						delete_addr(advicelist, psaddr);
711 						break;
712 					}
713 				}
714 			}
715 		}
716 		psaddr = next;
717 	}
718 	return (0);
719 }
720 
721 /*
722  * Set advice but keep mutual exclusive property of advice groupings
723  */
724 static void
set_advice(int * combined_adv,int new_adv)725 set_advice(int *combined_adv, int new_adv)
726 {
727 	/*
728 	 * Since advice falls in 3 groups of mutually exclusive options,
729 	 * clear previous value if new advice overwrites that group.
730 	 */
731 
732 	/*
733 	 * If this is the first advice to be applied, clear invalid value (-1)
734 	 */
735 	if (*combined_adv == -1)
736 		*combined_adv = 0;
737 
738 	if (new_adv & GRP1_ADV)
739 		*combined_adv &= ~GRP1_ADV;
740 	else if (new_adv & GRP2_ADV)
741 		*combined_adv &= ~GRP2_ADV;
742 	else
743 		*combined_adv &= ~GRP3_ADV;
744 
745 	*combined_adv |= new_adv;
746 }
747 
748 /*
749  * Create chopped list from merged list for use with verbose output
750  */
751 static void
create_choplist(saddr_t ** choppedlist,saddr_t * mergedlist)752 create_choplist(saddr_t **choppedlist, saddr_t *mergedlist)
753 {
754 	saddr_t	*mlptr, *clptr;
755 
756 	for (mlptr = mergedlist; mlptr != NULL; mlptr = mlptr->next) {
757 		clptr = malloc(sizeof (saddr_t));
758 		clptr->addr = mlptr->addr;
759 		clptr->length = 0;
760 		/*
761 		 * Initialize the adv to -1 as an indicator for invalid
762 		 * elements in the chopped list (created from gaps between
763 		 * memory maps).
764 		 */
765 		clptr->adv = -1;
766 		clptr->next = NULL;
767 		insert_addr(choppedlist, clptr, NODUPS);
768 
769 		clptr = malloc(sizeof (saddr_t));
770 		clptr->addr = mlptr->addr + mlptr->length;
771 		clptr->length = 0;
772 		/*
773 		 * Again, initialize to -1 as an indicatorfor invalid elements
774 		 */
775 		clptr->adv = -1;
776 		clptr->next = NULL;
777 		insert_addr(choppedlist, clptr, NODUPS);
778 	}
779 
780 	for (clptr = *choppedlist; clptr != NULL; clptr = clptr->next) {
781 		if (clptr->next) {
782 			clptr->length = clptr->next->addr - clptr->addr;
783 		} else {
784 			/*
785 			 * must be last element, now that we've calculated
786 			 * all segment lengths, we can remove this node
787 			 */
788 			delete_addr(choppedlist, clptr);
789 			break;
790 		}
791 	}
792 
793 	for (mlptr = mergedlist; mlptr != NULL; mlptr = mlptr->next) {
794 		for (clptr = *choppedlist; clptr != NULL; clptr = clptr->next) {
795 			if (mlptr->addr <= clptr->addr &&
796 			    mlptr->addr + mlptr->length >=
797 			    clptr->addr + clptr->length)
798 				/*
799 				 * set_advice() will take care of conflicting
800 				 * advice by taking only the last advice
801 				 * applied for each of the 3 groups of advice.
802 				 */
803 				set_advice(&clptr->adv, mlptr->adv);
804 			if (mlptr->addr + mlptr->length <
805 			    clptr->addr)
806 				break;
807 		}
808 	}
809 }
810 
811 /*
812  * Print advice in pmap style for verbose output
813  */
814 static void
print_advice(saddr_t * advlist,mapnode_t * maplist)815 print_advice(saddr_t *advlist, mapnode_t *maplist)
816 {
817 	saddr_t		*psaddr = advlist;
818 	mapnode_t	*pmapnode;
819 	char		*advice;
820 
821 	pmapnode = maplist;
822 
823 	while (psaddr) {
824 		/*
825 		 * Using indicator flag from create_choppedlist, we know
826 		 * which entries in the chopped_list are gaps and should
827 		 * not be printed.
828 		 */
829 		if (psaddr->adv == -1) {
830 			psaddr = psaddr->next;
831 			continue;
832 		}
833 
834 		while (pmapnode && (pmapnode->pmp->pr_vaddr +
835 		    pmapnode->pmp->pr_size <= psaddr->addr))
836 			pmapnode = pmapnode->next;
837 
838 		advice = advtostr(psaddr->adv);
839 
840 		/*
841 		 * Print segment mapping and advice if there is any, or just a
842 		 * segment mapping.
843 		 */
844 		if (strlen(advice) > 0) {
845 			(void) printf("%.*lX %*uK %6s %s\t%s\n",
846 			    addr_width, (ulong_t)psaddr->addr, size_width - 1,
847 			    (int)ROUNDUP_KB(psaddr->length),
848 			    mflags(pmapnode->pmp->pr_mflags), pmapnode->label,
849 			    advice);
850 		} else {
851 			(void) printf("%.*lX %*uK %6s %s\n",
852 			    addr_width, (ulong_t)psaddr->addr, size_width - 1,
853 			    (int)ROUNDUP_KB(psaddr->length),
854 			    mflags(pmapnode->pmp->pr_mflags), pmapnode->label);
855 		}
856 		psaddr = psaddr->next;
857 
858 	}
859 }
860 
861 /*
862  * Call madvise(3c) in the context of the target process
863  */
864 static int
pr_madvise(struct ps_prochandle * Pr,caddr_t addr,size_t len,int advice)865 pr_madvise(struct ps_prochandle *Pr, caddr_t addr, size_t len, int advice)
866 {
867 	return (pr_memcntl(Pr, addr, len, MC_ADVISE,
868 	    (caddr_t)(uintptr_t)advice, 0, 0));
869 }
870 
871 static char *
mflags(uint_t arg)872 mflags(uint_t arg)
873 {
874 	static char code_buf[80];
875 
876 	/*
877 	 * rwxsR
878 	 *
879 	 * r - segment is readable
880 	 * w - segment is writable
881 	 * x - segment is executable
882 	 * s - segment is shared
883 	 * R - segment is mapped MAP_NORESERVE
884 	 *
885 	 */
886 	(void) snprintf(code_buf, sizeof (code_buf), "%c%c%c%c%c ",
887 	    arg & MA_READ ? 'r' : '-',
888 	    arg & MA_WRITE ? 'w' : '-',
889 	    arg & MA_EXEC ? 'x' : '-',
890 	    arg & MA_SHARED ? 's' : '-',
891 	    arg & MA_NORESERVE ? 'R' : '-');
892 
893 	return (code_buf);
894 }
895 
896 /*
897  * Convert advice to a string containing a commented list of applicable advice
898  */
899 static char *
advtostr(int adv)900 advtostr(int adv)
901 {
902 	static char buf[50];
903 	int i;
904 
905 	*buf = '\0';
906 
907 	if (adv != NO_ADVICE) {
908 		for (i = MADV_NORMAL; i <= MADV_PURGE; i++) {
909 			if (adv & (1 << i)) {
910 				/*
911 				 * check if it's the first advice entry
912 				 */
913 				if (*buf == '\0') {
914 					(void) snprintf(buf, sizeof (buf) - 1,
915 					    "<= %s", advicestr[i]);
916 				} else {
917 					(void) strlcat(buf, ",", sizeof (buf));
918 					(void) strlcat(buf, advicestr[i],
919 					    sizeof (buf));
920 				}
921 			}
922 		}
923 	}
924 
925 	return (buf);
926 }
927 
928 /*
929  * Handler for catching signals from terminal
930  */
931 /* ARGSUSED */
932 static void
intr(int sig)933 intr(int sig)
934 {
935 	interrupt++;
936 }
937 
938 int
main(int argc,char ** argv)939 main(int argc, char **argv)
940 {
941 	int Fflag = 0;
942 	int rc = 0;
943 	int opt, subopt;
944 	int tmpadv;
945 	char	*options, *value;
946 	saddr_t	*psaddr;
947 	mapnode_t *pmapnode, *tempmapnode;
948 
949 	(void) setlocale(LC_ALL, "");
950 	(void) textdomain(TEXT_DOMAIN);
951 
952 	/*
953 	 * Get name of program for error messages
954 	 */
955 	progname = basename(argv[0]);
956 
957 	/*
958 	 * Not much to do when only name of program given
959 	 */
960 	if (argc == 1)
961 		usage();
962 
963 	/*
964 	 * Catch signals from terminal, so they can be handled asynchronously
965 	 * when we're ready instead of when we're not (;-)
966 	 */
967 	if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
968 		(void) sigset(SIGHUP, intr);
969 	if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
970 		(void) sigset(SIGINT, intr);
971 	if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
972 		(void) sigset(SIGQUIT, intr);
973 	(void) sigset(SIGPIPE, intr);
974 	(void) sigset(SIGTERM, intr);
975 
976 	/*
977 	 * Parse options, record generic advice if any and create
978 	 * rawadv_list from specific address advice.
979 	 */
980 
981 	while ((opt = getopt(argc, argv, "Flo:v")) != EOF) {
982 		switch (opt) {
983 		case 'o':
984 			options = optarg;
985 			while (*options != '\0') {
986 				subopt = getsubopt(&options, suboptstr,
987 				    &value);
988 				switch (subopt) {
989 				case AT_PRIVM:
990 				case AT_HEAP:
991 				case AT_SHARED:
992 				case AT_STACK:
993 					at_map |= (1 << subopt);
994 					tmpadv = get_advice(value);
995 					set_advice(&generic_adv[subopt],
996 					    tmpadv);
997 					break;
998 				default:
999 					at_map |= (1 << AT_SEG);
1000 					psaddr = parse_suboptions(value);
1001 					if (psaddr == NULL) {
1002 						usage();
1003 					} else {
1004 						insert_addr(&rawadv_list,
1005 						    psaddr, YESDUPS);
1006 					}
1007 					break;
1008 				}
1009 			}
1010 			break;
1011 		case 'v':
1012 			opt_verbose = 1;
1013 			break;
1014 		case 'F':		/* force grabbing (no O_EXCL) */
1015 			Fflag = PGRAB_FORCE;
1016 			break;
1017 		case 'l':		/* show unresolved link map names */
1018 			lflag = 1;
1019 			break;
1020 		default:
1021 			usage();
1022 			break;
1023 		}
1024 	}
1025 
1026 	argc -= optind;
1027 	argv += optind;
1028 
1029 	if (argc <= 0) {
1030 		usage();
1031 	}
1032 
1033 	(void) proc_initstdio();
1034 
1035 	/*
1036 	 * Iterate through all pid arguments, create new merged_list, maplist,
1037 	 * (and chopped_list if using verbose output) based on each process'
1038 	 * memory map.
1039 	 */
1040 
1041 	while (!interrupt && argc-- > 0) {
1042 		char *arg;
1043 		int gcode;
1044 		psinfo_t psinfo;
1045 
1046 		(void) proc_flushstdio();
1047 
1048 		if ((Pr = proc_arg_grab(arg = *argv++, PR_ARG_PIDS,
1049 		    PGRAB_RETAIN | Fflag, &gcode)) == NULL) {
1050 			(void) fprintf(stderr,
1051 			    gettext("%s: cannot examine %s: %s\n"),
1052 			    progname, arg, Pgrab_error(gcode));
1053 			rc++;
1054 			continue;
1055 		}
1056 
1057 
1058 		addr_width =
1059 		    (Pstatus(Pr)->pr_dmodel == PR_MODEL_LP64) ? 16 : 8;
1060 		size_width =
1061 		    (Pstatus(Pr)->pr_dmodel == PR_MODEL_LP64) ? 11 : 8;
1062 		(void) memcpy(&psinfo, Ppsinfo(Pr), sizeof (psinfo_t));
1063 
1064 		if (opt_verbose) {
1065 			proc_unctrl_psinfo(&psinfo);
1066 			(void) printf("%d:\t%.70s\n",
1067 			    (int)psinfo.pr_pid, psinfo.pr_psargs);
1068 		}
1069 
1070 		/*
1071 		 * Get mappings for a process unless it is a system process.
1072 		 */
1073 		if (!(Pstatus(Pr)->pr_flags & PR_ISSYS)) {
1074 			nstacks = psinfo.pr_nlwp * 2;
1075 			stacks = calloc(nstacks, sizeof (stacks[0]));
1076 			if (stacks != NULL) {
1077 				int n = 0;
1078 				(void) Plwp_iter(Pr, getstack, &n);
1079 				qsort(stacks, nstacks, sizeof (stacks[0]),
1080 				    cmpstacks);
1081 			}
1082 
1083 			if (Pgetauxval(Pr, AT_BASE) != -1L &&
1084 			    Prd_agent(Pr) == NULL) {
1085 				(void) fprintf(stderr,
1086 				    gettext("%s: warning: "
1087 				    "librtld_db failed to initialize; "
1088 				    "shared library information will not "
1089 				    "be available\n"),
1090 				    progname);
1091 			}
1092 
1093 			/*
1094 			 * Create linked list of mappings for current process
1095 			 * In addition, add generic advice and raw advice
1096 			 * entries to merged_list.
1097 			 * e.g. if rawadv_list contains:
1098 			 *   [0x38000,0x3a000) = adv1
1099 			 *   [0x3a000,0x3c000) = adv2
1100 			 * and there is generic advice:
1101 			 *   heap = adv3
1102 			 * where heap corresponds to 0x38000, then merged_list
1103 			 * will contain:
1104 			 *   ... (include all other mappings from process)
1105 			 *   [0x38000,0x3c000) = adv3
1106 			 *   [0x38000,0x3a000) = adv1
1107 			 *   [0x3a000,0x3c000) = adv2
1108 			 *   ... (include all other mappings from process)
1109 			 */
1110 			assert(merged_list == NULL);
1111 			maplist_head = maplist_tail = NULL;
1112 			rc += Pmapping_iter(Pr, (proc_map_f *)create_maplist,
1113 			    NULL);
1114 
1115 			/*
1116 			 * Apply advice by iterating through merged list
1117 			 */
1118 			(void) apply_advice(&merged_list);
1119 
1120 			if (opt_verbose) {
1121 				assert(chopped_list == NULL);
1122 				/*
1123 				 * Create chopped_list from merged_list
1124 				 */
1125 				create_choplist(&chopped_list, merged_list);
1126 
1127 				/*
1128 				 * Iterate through maplist and output as
1129 				 * given by chopped_list
1130 				 */
1131 				print_advice(chopped_list, maplist_head);
1132 				delete_list(&chopped_list);
1133 			}
1134 
1135 			delete_list(&merged_list);
1136 
1137 			/*
1138 			 * Clear maplist
1139 			 */
1140 			pmapnode = maplist_head;
1141 			while (pmapnode) {
1142 				tempmapnode = pmapnode;
1143 				pmapnode = pmapnode->next;
1144 				free(tempmapnode);
1145 			}
1146 
1147 			if (stacks != NULL) {
1148 				free(stacks);
1149 				stacks = NULL;
1150 			}
1151 		}
1152 
1153 		Prelease(Pr, 0);
1154 	}
1155 
1156 	(void) proc_finistdio();
1157 
1158 	return (rc);
1159 }
1160