1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 #include <alloca.h> 26 #include <assert.h> 27 #include <dirent.h> 28 #include <dlfcn.h> 29 #include <door.h> 30 #include <errno.h> 31 #include <exacct.h> 32 #include <ctype.h> 33 #include <fcntl.h> 34 #include <kstat.h> 35 #include <libcontract.h> 36 #include <libintl.h> 37 #include <libscf.h> 38 #include <zonestat.h> 39 #include <zonestat_impl.h> 40 #include <limits.h> 41 #include <pool.h> 42 #include <procfs.h> 43 #include <rctl.h> 44 #include <thread.h> 45 #include <signal.h> 46 #include <stdarg.h> 47 #include <stddef.h> 48 #include <stdio.h> 49 #include <stdlib.h> 50 #include <strings.h> 51 #include <synch.h> 52 #include <sys/acctctl.h> 53 #include <sys/contract/process.h> 54 #include <sys/ctfs.h> 55 #include <sys/fork.h> 56 #include <sys/param.h> 57 #include <sys/priocntl.h> 58 #include <sys/fxpriocntl.h> 59 #include <sys/processor.h> 60 #include <sys/pset.h> 61 #include <sys/socket.h> 62 #include <sys/stat.h> 63 #include <sys/statvfs.h> 64 #include <sys/swap.h> 65 #include <sys/systeminfo.h> 66 #include <thread.h> 67 #include <sys/list.h> 68 #include <sys/time.h> 69 #include <sys/types.h> 70 #include <sys/vm_usage.h> 71 #include <sys/wait.h> 72 #include <sys/zone.h> 73 #include <time.h> 74 #include <ucred.h> 75 #include <unistd.h> 76 #include <vm/anon.h> 77 #include <zone.h> 78 #include <zonestat.h> 79 80 #define MAX_PSET_NAME 1024 /* Taken from PV_NAME_MAX_LEN */ 81 #define ZSD_PSET_UNLIMITED UINT16_MAX 82 #define ZONESTAT_EXACCT_FILE "/var/adm/exacct/zonestat-process" 83 84 /* 85 * zonestatd implements gathering cpu and memory utilization data for 86 * running zones. It has these components: 87 * 88 * zsd_server: 89 * Door server to respond to client connections. Each client 90 * will connect using libzonestat.so, which will open and 91 * call /var/tmp/.zonestat_door. Each connecting client is given 92 * a file descriptor to the stat server. 93 * 94 * The zsd_server also responds to zoneadmd, which reports when a 95 * new zone is booted. This is used to fattach the zsd_server door 96 * into the new zone. 97 * 98 * zsd_stat_server: 99 * Receives client requests for the current utilization data. Each 100 * client request will cause zonestatd to update the current utilization 101 * data by kicking the stat_thread. 102 * 103 * If the client is in a non-global zone, the utilization data will 104 * be filtered to only show the given zone. The usage by all other zones 105 * will be added to the system utilization. 106 * 107 * stat_thread: 108 * The stat thread implements querying the system to determine the 109 * current utilization data for each running zone. This includes 110 * inspecting the system's processor set configuration, as well as details 111 * of each zone, such as their configured limits, and which processor 112 * sets they are running in. 113 * 114 * The stat_thread will only update memory utilization data as often as 115 * the configured config/sample_interval on the zones-monitoring service. 116 */ 117 118 /* 119 * The private vmusage structure unfortunately uses size_t types, and assumes 120 * the caller's bitness matches the kernel's bitness. Since the getvmusage() 121 * system call is contracted, and zonestatd is 32 bit, the following structures 122 * are used to interact with a 32bit or 64 bit kernel. 123 */ 124 typedef struct zsd_vmusage32 { 125 id_t vmu_zoneid; 126 uint_t vmu_type; 127 id_t vmu_id; 128 129 uint32_t vmu_rss_all; 130 uint32_t vmu_rss_private; 131 uint32_t vmu_rss_shared; 132 uint32_t vmu_swap_all; 133 uint32_t vmu_swap_private; 134 uint32_t vmu_swap_shared; 135 } zsd_vmusage32_t; 136 137 typedef struct zsd_vmusage64 { 138 id_t vmu_zoneid; 139 uint_t vmu_type; 140 id_t vmu_id; 141 /* 142 * An amd64 kernel will align the following uint64_t members, but a 143 * 32bit i386 process will not without help. 144 */ 145 int vmu_align_next_members_on_8_bytes; 146 uint64_t vmu_rss_all; 147 uint64_t vmu_rss_private; 148 uint64_t vmu_rss_shared; 149 uint64_t vmu_swap_all; 150 uint64_t vmu_swap_private; 151 uint64_t vmu_swap_shared; 152 } zsd_vmusage64_t; 153 154 struct zsd_zone; 155 156 /* Used to store a zone's usage of a pset */ 157 typedef struct zsd_pset_usage { 158 struct zsd_zone *zsu_zone; 159 struct zsd_pset *zsu_pset; 160 161 list_node_t zsu_next; 162 163 zoneid_t zsu_zoneid; 164 boolean_t zsu_found; /* zone bound at end of interval */ 165 boolean_t zsu_active; /* zone was bound during interval */ 166 boolean_t zsu_new; /* zone newly bound in this interval */ 167 boolean_t zsu_deleted; /* zone was unbound in this interval */ 168 boolean_t zsu_empty; /* no procs in pset in this interval */ 169 time_t zsu_start; /* time when zone was found in pset */ 170 hrtime_t zsu_hrstart; /* time when zone was found in pset */ 171 uint64_t zsu_cpu_shares; 172 uint_t zsu_scheds; /* schedulers found in this pass */ 173 timestruc_t zsu_cpu_usage; /* cpu time used */ 174 } zsd_pset_usage_t; 175 176 /* Used to store a pset's utilization */ 177 typedef struct zsd_pset { 178 psetid_t zsp_id; 179 list_node_t zsp_next; 180 char zsp_name[ZS_PSETNAME_MAX]; 181 182 uint_t zsp_cputype; /* default, dedicated or shared */ 183 boolean_t zsp_found; /* pset found at end of interval */ 184 boolean_t zsp_new; /* pset new in this interval */ 185 boolean_t zsp_deleted; /* pset deleted in this interval */ 186 boolean_t zsp_active; /* pset existed during interval */ 187 boolean_t zsp_empty; /* no processes in pset */ 188 time_t zsp_start; 189 hrtime_t zsp_hrstart; 190 191 uint64_t zsp_online; /* online cpus in interval */ 192 uint64_t zsp_size; /* size in this interval */ 193 uint64_t zsp_min; /* configured min in this interval */ 194 uint64_t zsp_max; /* configured max in this interval */ 195 int64_t zsp_importance; /* configured max in this interval */ 196 197 uint_t zsp_scheds; /* scheds of processes found in pset */ 198 uint64_t zsp_cpu_shares; /* total shares in this interval */ 199 200 timestruc_t zsp_total_time; 201 timestruc_t zsp_usage_kern; 202 timestruc_t zsp_usage_zones; 203 204 /* Individual zone usages of pset */ 205 list_t zsp_usage_list; 206 int zsp_nusage; 207 208 /* Summed kstat values from individual cpus in pset */ 209 timestruc_t zsp_idle; 210 timestruc_t zsp_intr; 211 timestruc_t zsp_kern; 212 timestruc_t zsp_user; 213 214 } zsd_pset_t; 215 216 /* Used to track an individual cpu's utilization as reported by kstats */ 217 typedef struct zsd_cpu { 218 processorid_t zsc_id; 219 list_node_t zsc_next; 220 psetid_t zsc_psetid; 221 psetid_t zsc_psetid_prev; 222 zsd_pset_t *zsc_pset; 223 224 boolean_t zsc_found; /* cpu online in this interval */ 225 boolean_t zsc_onlined; /* cpu onlined during this interval */ 226 boolean_t zsc_offlined; /* cpu offlined during this interval */ 227 boolean_t zsc_active; /* cpu online during this interval */ 228 boolean_t zsc_allocated; /* True if cpu has ever been found */ 229 230 /* kstats this interval */ 231 uint64_t zsc_nsec_idle; 232 uint64_t zsc_nsec_intr; 233 uint64_t zsc_nsec_kern; 234 uint64_t zsc_nsec_user; 235 236 /* kstats in most recent interval */ 237 uint64_t zsc_nsec_idle_prev; 238 uint64_t zsc_nsec_intr_prev; 239 uint64_t zsc_nsec_kern_prev; 240 uint64_t zsc_nsec_user_prev; 241 242 /* Total kstat increases since zonestatd started reading kstats */ 243 timestruc_t zsc_idle; 244 timestruc_t zsc_intr; 245 timestruc_t zsc_kern; 246 timestruc_t zsc_user; 247 248 } zsd_cpu_t; 249 250 /* Used to describe an individual zone and its utilization */ 251 typedef struct zsd_zone { 252 zoneid_t zsz_id; 253 list_node_t zsz_next; 254 char zsz_name[ZS_ZONENAME_MAX]; 255 uint_t zsz_cputype; 256 uint_t zsz_iptype; 257 time_t zsz_start; 258 hrtime_t zsz_hrstart; 259 260 char zsz_pool[ZS_POOLNAME_MAX]; 261 char zsz_pset[ZS_PSETNAME_MAX]; 262 int zsz_default_sched; 263 /* These are deduced by inspecting processes */ 264 psetid_t zsz_psetid; 265 uint_t zsz_scheds; 266 267 boolean_t zsz_new; /* zone booted during this interval */ 268 boolean_t zsz_deleted; /* halted during this interval */ 269 boolean_t zsz_active; /* running in this interval */ 270 boolean_t zsz_empty; /* no processes in this interval */ 271 boolean_t zsz_gone; /* not installed in this interval */ 272 boolean_t zsz_found; /* Running at end of this interval */ 273 274 uint64_t zsz_cpu_shares; 275 uint64_t zsz_cpu_cap; 276 uint64_t zsz_ram_cap; 277 uint64_t zsz_locked_cap; 278 uint64_t zsz_vm_cap; 279 280 uint64_t zsz_cpus_online; 281 timestruc_t zsz_cpu_usage; /* cpu time of cpu cap */ 282 timestruc_t zsz_cap_time; /* cpu time of cpu cap */ 283 timestruc_t zsz_share_time; /* cpu time of share of cpu */ 284 timestruc_t zsz_pset_time; /* time of all psets zone is bound to */ 285 286 uint64_t zsz_usage_ram; 287 uint64_t zsz_usage_locked; 288 uint64_t zsz_usage_vm; 289 290 uint64_t zsz_processes_cap; 291 uint64_t zsz_lwps_cap; 292 uint64_t zsz_shm_cap; 293 uint64_t zsz_shmids_cap; 294 uint64_t zsz_semids_cap; 295 uint64_t zsz_msgids_cap; 296 uint64_t zsz_lofi_cap; 297 298 uint64_t zsz_processes; 299 uint64_t zsz_lwps; 300 uint64_t zsz_shm; 301 uint64_t zsz_shmids; 302 uint64_t zsz_semids; 303 uint64_t zsz_msgids; 304 uint64_t zsz_lofi; 305 306 } zsd_zone_t; 307 308 /* 309 * Used to track the cpu usage of an individual processes. 310 * 311 * zonestatd sweeps /proc each interval and charges the cpu usage of processes. 312 * to their zone. As processes exit, their extended accounting records are 313 * read and the difference of their total and known usage is charged to their 314 * zone. 315 * 316 * If a process is never seen in /proc, the total usage on its extended 317 * accounting record will be charged to its zone. 318 */ 319 typedef struct zsd_proc { 320 list_node_t zspr_next; 321 pid_t zspr_ppid; 322 psetid_t zspr_psetid; 323 zoneid_t zspr_zoneid; 324 int zspr_sched; 325 timestruc_t zspr_usage; 326 } zsd_proc_t; 327 328 /* Used to track the overall resource usage of the system */ 329 typedef struct zsd_system { 330 331 uint64_t zss_ram_total; 332 uint64_t zss_ram_kern; 333 uint64_t zss_ram_zones; 334 335 uint64_t zss_locked_kern; 336 uint64_t zss_locked_zones; 337 338 uint64_t zss_vm_total; 339 uint64_t zss_vm_kern; 340 uint64_t zss_vm_zones; 341 342 uint64_t zss_swap_total; 343 uint64_t zss_swap_used; 344 345 timestruc_t zss_idle; 346 timestruc_t zss_intr; 347 timestruc_t zss_kern; 348 timestruc_t zss_user; 349 350 timestruc_t zss_cpu_total_time; 351 timestruc_t zss_cpu_usage_kern; 352 timestruc_t zss_cpu_usage_zones; 353 354 uint64_t zss_maxpid; 355 uint64_t zss_processes_max; 356 uint64_t zss_lwps_max; 357 uint64_t zss_shm_max; 358 uint64_t zss_shmids_max; 359 uint64_t zss_semids_max; 360 uint64_t zss_msgids_max; 361 uint64_t zss_lofi_max; 362 363 uint64_t zss_processes; 364 uint64_t zss_lwps; 365 uint64_t zss_shm; 366 uint64_t zss_shmids; 367 uint64_t zss_semids; 368 uint64_t zss_msgids; 369 uint64_t zss_lofi; 370 371 uint64_t zss_ncpus; 372 uint64_t zss_ncpus_online; 373 374 } zsd_system_t; 375 376 /* 377 * A dumping ground for various information and structures used to compute 378 * utilization. 379 * 380 * This structure is used to track the system while clients are connected. 381 * When The first client connects, a zsd_ctl is allocated and configured by 382 * zsd_open(). When all clients disconnect, the zsd_ctl is closed. 383 */ 384 typedef struct zsd_ctl { 385 kstat_ctl_t *zsctl_kstat_ctl; 386 387 /* To track extended accounting */ 388 int zsctl_proc_fd; /* Log currently being used */ 389 ea_file_t zsctl_proc_eaf; 390 struct stat64 zsctl_proc_stat; 391 int zsctl_proc_open; 392 int zsctl_proc_fd_next; /* Log file to use next */ 393 ea_file_t zsctl_proc_eaf_next; 394 struct stat64 zsctl_proc_stat_next; 395 int zsctl_proc_open_next; 396 397 /* pool configuration handle */ 398 pool_conf_t *zsctl_pool_conf; 399 int zsctl_pool_status; 400 int zsctl_pool_changed; 401 402 /* The above usage tacking structures */ 403 zsd_system_t *zsctl_system; 404 list_t zsctl_zones; 405 list_t zsctl_psets; 406 list_t zsctl_cpus; 407 zsd_cpu_t *zsctl_cpu_array; 408 zsd_proc_t *zsctl_proc_array; 409 410 /* Various system info */ 411 uint64_t zsctl_maxcpuid; 412 uint64_t zsctl_maxproc; 413 uint64_t zsctl_kern_bits; 414 uint64_t zsctl_pagesize; 415 416 /* Used to track time available under a cpu cap. */ 417 uint64_t zsctl_hrtime; 418 uint64_t zsctl_hrtime_prev; 419 timestruc_t zsctl_hrtime_total; 420 421 struct timeval zsctl_timeofday; 422 423 /* Caches for arrays allocated for use by various system calls */ 424 psetid_t *zsctl_pset_cache; 425 uint_t zsctl_pset_ncache; 426 processorid_t *zsctl_cpu_cache; 427 uint_t zsctl_cpu_ncache; 428 zoneid_t *zsctl_zone_cache; 429 uint_t zsctl_zone_ncache; 430 struct swaptable *zsctl_swap_cache; 431 uint64_t zsctl_swap_cache_size; 432 uint64_t zsctl_swap_cache_num; 433 zsd_vmusage64_t *zsctl_vmusage_cache; 434 uint64_t zsctl_vmusage_cache_num; 435 436 /* Info about procfs for scanning /proc */ 437 struct dirent *zsctl_procfs_dent; 438 long zsctl_procfs_dent_size; 439 pool_value_t *zsctl_pool_vals[3]; 440 441 /* Counts on tracked entities */ 442 uint_t zsctl_nzones; 443 uint_t zsctl_npsets; 444 uint_t zsctl_npset_usages; 445 } zsd_ctl_t; 446 447 zsd_ctl_t *g_ctl; 448 boolean_t g_open; /* True if g_ctl is open */ 449 int g_hasclient; /* True if any clients are connected */ 450 451 /* 452 * The usage cache is updated by the stat_thread, and copied to clients by 453 * the zsd_stat_server. Mutex and cond are to synchronize between the 454 * stat_thread and the stat_server. 455 */ 456 zs_usage_cache_t *g_usage_cache; 457 mutex_t g_usage_cache_lock; 458 cond_t g_usage_cache_kick; 459 uint_t g_usage_cache_kickers; 460 cond_t g_usage_cache_wait; 461 char *g_usage_cache_buf; 462 uint_t g_usage_cache_bufsz; 463 uint64_t g_gen_next; 464 465 /* fds of door servers */ 466 int g_server_door; 467 int g_stat_door; 468 469 /* 470 * Starting and current time. Used to throttle memory calculation, and to 471 * mark new zones and psets with their boot and creation time. 472 */ 473 time_t g_now; 474 time_t g_start; 475 hrtime_t g_hrnow; 476 hrtime_t g_hrstart; 477 uint64_t g_interval; 478 479 /* 480 * main() thread. 481 */ 482 thread_t g_main; 483 484 /* PRINTFLIKE1 */ 485 static void 486 zsd_warn(const char *fmt, ...) 487 { 488 va_list alist; 489 490 va_start(alist, fmt); 491 492 (void) fprintf(stderr, gettext("zonestat: Warning: ")); 493 (void) vfprintf(stderr, fmt, alist); 494 (void) fprintf(stderr, "\n"); 495 va_end(alist); 496 } 497 498 /* PRINTFLIKE1 */ 499 static void 500 zsd_error(const char *fmt, ...) 501 { 502 va_list alist; 503 504 va_start(alist, fmt); 505 506 (void) fprintf(stderr, gettext("zonestat: Error: ")); 507 (void) vfprintf(stderr, fmt, alist); 508 (void) fprintf(stderr, "\n"); 509 va_end(alist); 510 exit(1); 511 } 512 513 /* Turns on extended accounting if not configured externally */ 514 int 515 zsd_enable_cpu_stats() 516 { 517 char *path = ZONESTAT_EXACCT_FILE; 518 char oldfile[MAXPATHLEN]; 519 int ret, state = AC_ON; 520 ac_res_t res[6]; 521 522 /* 523 * Start a new accounting file if accounting not configured 524 * externally. 525 */ 526 527 res[0].ar_id = AC_PROC_PID; 528 res[0].ar_state = AC_ON; 529 res[1].ar_id = AC_PROC_ANCPID; 530 res[1].ar_state = AC_ON; 531 res[2].ar_id = AC_PROC_CPU; 532 res[2].ar_state = AC_ON; 533 res[3].ar_id = AC_PROC_TIME; 534 res[3].ar_state = AC_ON; 535 res[4].ar_id = AC_PROC_ZONENAME; 536 res[4].ar_state = AC_ON; 537 res[5].ar_id = AC_NONE; 538 res[5].ar_state = AC_ON; 539 if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) { 540 zsd_warn(gettext("Unable to set accounting resources")); 541 return (-1); 542 } 543 /* Only set accounting file if none is configured */ 544 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile)); 545 if (ret < 0) { 546 547 (void) unlink(path); 548 if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1) 549 == -1) { 550 zsd_warn(gettext("Unable to set accounting file")); 551 return (-1); 552 } 553 } 554 if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) { 555 zsd_warn(gettext("Unable to enable accounting")); 556 return (-1); 557 } 558 return (0); 559 } 560 561 /* Turns off extended accounting if not configured externally */ 562 int 563 zsd_disable_cpu_stats() 564 { 565 char *path = ZONESTAT_EXACCT_FILE; 566 int ret, state = AC_OFF; 567 ac_res_t res[6]; 568 char oldfile[MAXPATHLEN]; 569 570 /* If accounting file is externally configured, leave it alone */ 571 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile)); 572 if (ret == 0 && strcmp(oldfile, path) != 0) 573 return (0); 574 575 res[0].ar_id = AC_PROC_PID; 576 res[0].ar_state = AC_OFF; 577 res[1].ar_id = AC_PROC_ANCPID; 578 res[1].ar_state = AC_OFF; 579 res[2].ar_id = AC_PROC_CPU; 580 res[2].ar_state = AC_OFF; 581 res[3].ar_id = AC_PROC_TIME; 582 res[3].ar_state = AC_OFF; 583 res[4].ar_id = AC_PROC_ZONENAME; 584 res[4].ar_state = AC_OFF; 585 res[5].ar_id = AC_NONE; 586 res[5].ar_state = AC_OFF; 587 if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) { 588 zsd_warn(gettext("Unable to clear accounting resources")); 589 return (-1); 590 } 591 if (acctctl(AC_PROC | AC_FILE_SET, NULL, 0) == -1) { 592 zsd_warn(gettext("Unable to clear accounting file")); 593 return (-1); 594 } 595 if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) { 596 zsd_warn(gettext("Unable to diable accounting")); 597 return (-1); 598 } 599 600 (void) unlink(path); 601 return (0); 602 } 603 604 /* 605 * If not configured externally, deletes the current extended accounting file 606 * and starts a new one. 607 * 608 * Since the stat_thread holds an open handle to the accounting file, it will 609 * read all remaining entries from the old file before switching to 610 * read the new one. 611 */ 612 int 613 zsd_roll_exacct(void) 614 { 615 int ret; 616 char *path = ZONESTAT_EXACCT_FILE; 617 char oldfile[MAXPATHLEN]; 618 619 /* If accounting file is externally configured, leave it alone */ 620 ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile)); 621 if (ret == 0 && strcmp(oldfile, path) != 0) 622 return (0); 623 624 if (unlink(path) != 0) 625 /* Roll it next time */ 626 return (0); 627 628 if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1) == -1) { 629 zsd_warn(gettext("Unable to set accounting file")); 630 return (-1); 631 } 632 return (0); 633 } 634 635 /* Contract stuff for zone_enter() */ 636 int 637 init_template(void) 638 { 639 int fd; 640 int err = 0; 641 642 fd = open64(CTFS_ROOT "/process/template", O_RDWR); 643 if (fd == -1) 644 return (-1); 645 646 /* 647 * For now, zoneadmd doesn't do anything with the contract. 648 * Deliver no events, don't inherit, and allow it to be orphaned. 649 */ 650 err |= ct_tmpl_set_critical(fd, 0); 651 err |= ct_tmpl_set_informative(fd, 0); 652 err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR); 653 err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT); 654 if (err || ct_tmpl_activate(fd)) { 655 (void) close(fd); 656 return (-1); 657 } 658 659 return (fd); 660 } 661 662 /* 663 * Contract stuff for zone_enter() 664 */ 665 int 666 contract_latest(ctid_t *id) 667 { 668 int cfd, r; 669 ct_stathdl_t st; 670 ctid_t result; 671 672 if ((cfd = open64(CTFS_ROOT "/process/latest", O_RDONLY)) == -1) 673 return (errno); 674 675 if ((r = ct_status_read(cfd, CTD_COMMON, &st)) != 0) { 676 (void) close(cfd); 677 return (r); 678 } 679 680 result = ct_status_get_id(st); 681 ct_status_free(st); 682 (void) close(cfd); 683 684 *id = result; 685 return (0); 686 } 687 688 static int 689 close_on_exec(int fd) 690 { 691 int flags = fcntl(fd, F_GETFD, 0); 692 if ((flags != -1) && (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) != -1)) 693 return (0); 694 return (-1); 695 } 696 697 int 698 contract_open(ctid_t ctid, const char *type, const char *file, int oflag) 699 { 700 char path[PATH_MAX]; 701 int n, fd; 702 703 if (type == NULL) 704 type = "all"; 705 706 n = snprintf(path, PATH_MAX, CTFS_ROOT "/%s/%ld/%s", type, ctid, file); 707 if (n >= sizeof (path)) { 708 errno = ENAMETOOLONG; 709 return (-1); 710 } 711 712 fd = open64(path, oflag); 713 if (fd != -1) { 714 if (close_on_exec(fd) == -1) { 715 int err = errno; 716 (void) close(fd); 717 errno = err; 718 return (-1); 719 } 720 } 721 return (fd); 722 } 723 724 int 725 contract_abandon_id(ctid_t ctid) 726 { 727 int fd, err; 728 729 fd = contract_open(ctid, "all", "ctl", O_WRONLY); 730 if (fd == -1) 731 return (errno); 732 733 err = ct_ctl_abandon(fd); 734 (void) close(fd); 735 736 return (err); 737 } 738 /* 739 * Attach the zsd_server to a zone. Called for each zone when zonestatd 740 * starts, and for each newly booted zone when zoneadmd contacts the zsd_server 741 * 742 * Zone_enter is used to avoid reaching into zone to fattach door. 743 */ 744 static void 745 zsd_fattach_zone(zoneid_t zid, int door, boolean_t detach_only) 746 { 747 char *path = ZS_DOOR_PATH; 748 int fd, pid, stat, tmpl_fd; 749 ctid_t ct; 750 751 if ((tmpl_fd = init_template()) == -1) { 752 zsd_warn("Unable to init template"); 753 return; 754 } 755 756 pid = forkx(0); 757 if (pid < 0) { 758 (void) ct_tmpl_clear(tmpl_fd); 759 zsd_warn(gettext( 760 "Unable to fork to add zonestat to zoneid %d\n"), zid); 761 return; 762 } 763 764 if (pid == 0) { 765 (void) ct_tmpl_clear(tmpl_fd); 766 (void) close(tmpl_fd); 767 if (zid != 0 && zone_enter(zid) != 0) { 768 if (errno == EINVAL) { 769 _exit(0); 770 } 771 _exit(1); 772 } 773 (void) fdetach(path); 774 (void) unlink(path); 775 if (detach_only) 776 _exit(0); 777 fd = open(path, O_CREAT|O_RDWR, 0644); 778 if (fd < 0) 779 _exit(2); 780 if (fattach(door, path) != 0) 781 _exit(3); 782 _exit(0); 783 } 784 if (contract_latest(&ct) == -1) 785 ct = -1; 786 (void) ct_tmpl_clear(tmpl_fd); 787 (void) close(tmpl_fd); 788 (void) contract_abandon_id(ct); 789 while (waitpid(pid, &stat, 0) != pid) 790 ; 791 if (WIFEXITED(stat) && WEXITSTATUS(stat) == 0) 792 return; 793 794 zsd_warn(gettext("Unable to attach door to zoneid: %d"), zid); 795 796 if (WEXITSTATUS(stat) == 1) 797 zsd_warn(gettext("Cannot entering zone")); 798 else if (WEXITSTATUS(stat) == 2) 799 zsd_warn(gettext("Unable to create door file: %s"), path); 800 else if (WEXITSTATUS(stat) == 3) 801 zsd_warn(gettext("Unable to fattach file: %s"), path); 802 803 zsd_warn(gettext("Internal error entering zone: %d"), zid); 804 } 805 806 /* 807 * Zone lookup and allocation functions to manage list of currently running 808 * zones. 809 */ 810 static zsd_zone_t * 811 zsd_lookup_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid) 812 { 813 zsd_zone_t *zone; 814 815 for (zone = list_head(&ctl->zsctl_zones); zone != NULL; 816 zone = list_next(&ctl->zsctl_zones, zone)) { 817 if (strcmp(zone->zsz_name, zonename) == 0) { 818 if (zoneid != -1) 819 zone->zsz_id = zoneid; 820 return (zone); 821 } 822 } 823 return (NULL); 824 } 825 826 static zsd_zone_t * 827 zsd_lookup_zone_byid(zsd_ctl_t *ctl, zoneid_t zoneid) 828 { 829 zsd_zone_t *zone; 830 831 for (zone = list_head(&ctl->zsctl_zones); zone != NULL; 832 zone = list_next(&ctl->zsctl_zones, zone)) { 833 if (zone->zsz_id == zoneid) 834 return (zone); 835 } 836 return (NULL); 837 } 838 839 static zsd_zone_t * 840 zsd_allocate_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid) 841 { 842 zsd_zone_t *zone; 843 844 if ((zone = (zsd_zone_t *)calloc(1, sizeof (zsd_zone_t))) == NULL) 845 return (NULL); 846 847 (void) strlcpy(zone->zsz_name, zonename, sizeof (zone->zsz_name)); 848 zone->zsz_id = zoneid; 849 zone->zsz_found = B_FALSE; 850 851 /* 852 * Allocate as deleted so if not found in first pass, zone is deleted 853 * from list. This can happen if zone is returned by zone_list, but 854 * exits before first attempt to fetch zone details. 855 */ 856 zone->zsz_start = g_now; 857 zone->zsz_hrstart = g_hrnow; 858 zone->zsz_deleted = B_TRUE; 859 860 zone->zsz_cpu_shares = ZS_LIMIT_NONE; 861 zone->zsz_cpu_cap = ZS_LIMIT_NONE; 862 zone->zsz_ram_cap = ZS_LIMIT_NONE; 863 zone->zsz_locked_cap = ZS_LIMIT_NONE; 864 zone->zsz_vm_cap = ZS_LIMIT_NONE; 865 866 zone->zsz_processes_cap = ZS_LIMIT_NONE; 867 zone->zsz_lwps_cap = ZS_LIMIT_NONE; 868 zone->zsz_shm_cap = ZS_LIMIT_NONE; 869 zone->zsz_shmids_cap = ZS_LIMIT_NONE; 870 zone->zsz_semids_cap = ZS_LIMIT_NONE; 871 zone->zsz_msgids_cap = ZS_LIMIT_NONE; 872 zone->zsz_lofi_cap = ZS_LIMIT_NONE; 873 874 ctl->zsctl_nzones++; 875 876 return (zone); 877 } 878 879 static zsd_zone_t * 880 zsd_lookup_insert_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid) 881 { 882 zsd_zone_t *zone, *tmp; 883 884 if ((zone = zsd_lookup_zone(ctl, zonename, zoneid)) != NULL) 885 return (zone); 886 887 if ((zone = zsd_allocate_zone(ctl, zonename, zoneid)) == NULL) 888 return (NULL); 889 890 /* Insert sorted by zonename */ 891 tmp = list_head(&ctl->zsctl_zones); 892 while (tmp != NULL && strcmp(zonename, tmp->zsz_name) > 0) 893 tmp = list_next(&ctl->zsctl_zones, tmp); 894 895 list_insert_before(&ctl->zsctl_zones, tmp, zone); 896 return (zone); 897 } 898 899 /* 900 * Mark all zones as not existing. As zones are found, they will 901 * be marked as existing. If a zone is not found, then it must have 902 * halted. 903 */ 904 static void 905 zsd_mark_zones_start(zsd_ctl_t *ctl) 906 { 907 908 zsd_zone_t *zone; 909 910 for (zone = list_head(&ctl->zsctl_zones); zone != NULL; 911 zone = list_next(&ctl->zsctl_zones, zone)) { 912 zone->zsz_found = B_FALSE; 913 } 914 } 915 916 /* 917 * Mark each zone as not using pset. If processes are found using the 918 * pset, the zone will remain bound to the pset. If none of a zones 919 * processes are bound to the pset, the zone's usage of the pset will 920 * be deleted. 921 * 922 */ 923 static void 924 zsd_mark_pset_usage_start(zsd_pset_t *pset) 925 { 926 zsd_pset_usage_t *usage; 927 928 for (usage = list_head(&pset->zsp_usage_list); 929 usage != NULL; 930 usage = list_next(&pset->zsp_usage_list, usage)) { 931 usage->zsu_found = B_FALSE; 932 usage->zsu_empty = B_TRUE; 933 } 934 } 935 936 /* 937 * Mark each pset as not existing. If a pset is found, it will be marked 938 * as existing. If a pset is not found, it wil be deleted. 939 */ 940 static void 941 zsd_mark_psets_start(zsd_ctl_t *ctl) 942 { 943 zsd_pset_t *pset; 944 945 for (pset = list_head(&ctl->zsctl_psets); pset != NULL; 946 pset = list_next(&ctl->zsctl_psets, pset)) { 947 pset->zsp_found = B_FALSE; 948 zsd_mark_pset_usage_start(pset); 949 } 950 } 951 952 /* 953 * A pset was found. Update its information 954 */ 955 static void 956 zsd_mark_pset_found(zsd_pset_t *pset, uint_t type, uint64_t online, 957 uint64_t size, uint64_t min, uint64_t max, int64_t importance) 958 { 959 pset->zsp_empty = B_TRUE; 960 pset->zsp_deleted = B_FALSE; 961 962 assert(pset->zsp_found == B_FALSE); 963 964 /* update pset flags */ 965 if (pset->zsp_active == B_FALSE) 966 /* pset not seen on previous interval. It is new. */ 967 pset->zsp_new = B_TRUE; 968 else 969 pset->zsp_new = B_FALSE; 970 971 pset->zsp_found = B_TRUE; 972 pset->zsp_cputype = type; 973 pset->zsp_online = online; 974 pset->zsp_size = size; 975 pset->zsp_min = min; 976 pset->zsp_max = max; 977 pset->zsp_importance = importance; 978 pset->zsp_cpu_shares = 0; 979 pset->zsp_scheds = 0; 980 pset->zsp_active = B_TRUE; 981 } 982 983 /* 984 * A zone's process was found using a pset. Charge the process to the pset and 985 * the per-zone data for the pset. 986 */ 987 static void 988 zsd_mark_pset_usage_found(zsd_pset_usage_t *usage, uint_t sched) 989 { 990 zsd_zone_t *zone = usage->zsu_zone; 991 zsd_pset_t *pset = usage->zsu_pset; 992 993 /* Nothing to do if already found */ 994 if (usage->zsu_found == B_TRUE) 995 goto add_stats; 996 997 usage->zsu_found = B_TRUE; 998 usage->zsu_empty = B_FALSE; 999 1000 usage->zsu_deleted = B_FALSE; 1001 /* update usage flags */ 1002 if (usage->zsu_active == B_FALSE) 1003 usage->zsu_new = B_TRUE; 1004 else 1005 usage->zsu_new = B_FALSE; 1006 1007 usage->zsu_scheds = 0; 1008 usage->zsu_cpu_shares = ZS_LIMIT_NONE; 1009 usage->zsu_active = B_TRUE; 1010 pset->zsp_empty = B_FALSE; 1011 zone->zsz_empty = B_FALSE; 1012 1013 add_stats: 1014 /* Detect zone's pset id, and if it is bound to multiple psets */ 1015 if (zone->zsz_psetid == ZS_PSET_ERROR) 1016 zone->zsz_psetid = pset->zsp_id; 1017 else if (zone->zsz_psetid != pset->zsp_id) 1018 zone->zsz_psetid = ZS_PSET_MULTI; 1019 1020 usage->zsu_scheds |= sched; 1021 pset->zsp_scheds |= sched; 1022 zone->zsz_scheds |= sched; 1023 1024 /* Record if FSS is co-habitating with conflicting scheduler */ 1025 if ((pset->zsp_scheds & ZS_SCHED_FSS) && 1026 usage->zsu_scheds & ( 1027 ZS_SCHED_TS | ZS_SCHED_IA | ZS_SCHED_FX)) { 1028 usage->zsu_scheds |= ZS_SCHED_CONFLICT; 1029 1030 pset->zsp_scheds |= ZS_SCHED_CONFLICT; 1031 } 1032 1033 } 1034 1035 /* Add cpu time for a process to a pset, zone, and system totals */ 1036 static void 1037 zsd_add_usage(zsd_ctl_t *ctl, zsd_pset_usage_t *usage, timestruc_t *delta) 1038 { 1039 zsd_system_t *system = ctl->zsctl_system; 1040 zsd_zone_t *zone = usage->zsu_zone; 1041 zsd_pset_t *pset = usage->zsu_pset; 1042 1043 TIMESTRUC_ADD_TIMESTRUC(usage->zsu_cpu_usage, *delta); 1044 TIMESTRUC_ADD_TIMESTRUC(pset->zsp_usage_zones, *delta); 1045 TIMESTRUC_ADD_TIMESTRUC(zone->zsz_cpu_usage, *delta); 1046 TIMESTRUC_ADD_TIMESTRUC(system->zss_cpu_usage_zones, *delta); 1047 } 1048 1049 /* Determine which processor sets have been deleted */ 1050 static void 1051 zsd_mark_psets_end(zsd_ctl_t *ctl) 1052 { 1053 zsd_pset_t *pset, *tmp; 1054 1055 /* 1056 * Mark pset as not exists, and deleted if it existed 1057 * previous interval. 1058 */ 1059 pset = list_head(&ctl->zsctl_psets); 1060 while (pset != NULL) { 1061 if (pset->zsp_found == B_FALSE) { 1062 pset->zsp_empty = B_TRUE; 1063 if (pset->zsp_deleted == B_TRUE) { 1064 tmp = pset; 1065 pset = list_next(&ctl->zsctl_psets, pset); 1066 list_remove(&ctl->zsctl_psets, tmp); 1067 free(tmp); 1068 ctl->zsctl_npsets--; 1069 continue; 1070 } else { 1071 /* Pset vanished during this interval */ 1072 pset->zsp_new = B_FALSE; 1073 pset->zsp_deleted = B_TRUE; 1074 pset->zsp_active = B_TRUE; 1075 } 1076 } 1077 pset = list_next(&ctl->zsctl_psets, pset); 1078 } 1079 } 1080 1081 /* Determine which zones are no longer bound to processor sets */ 1082 static void 1083 zsd_mark_pset_usages_end(zsd_ctl_t *ctl) 1084 { 1085 zsd_pset_t *pset; 1086 zsd_zone_t *zone; 1087 zsd_pset_usage_t *usage, *tmp; 1088 1089 /* 1090 * Mark pset as not exists, and deleted if it existed previous 1091 * interval. 1092 */ 1093 for (pset = list_head(&ctl->zsctl_psets); pset != NULL; 1094 pset = list_next(&ctl->zsctl_psets, pset)) { 1095 usage = list_head(&pset->zsp_usage_list); 1096 while (usage != NULL) { 1097 /* 1098 * Mark pset as not exists, and deleted if it existed 1099 * previous interval. 1100 */ 1101 if (usage->zsu_found == B_FALSE || 1102 usage->zsu_zone->zsz_deleted == B_TRUE || 1103 usage->zsu_pset->zsp_deleted == B_TRUE) { 1104 tmp = usage; 1105 usage = list_next(&pset->zsp_usage_list, 1106 usage); 1107 list_remove(&pset->zsp_usage_list, tmp); 1108 free(tmp); 1109 pset->zsp_nusage--; 1110 ctl->zsctl_npset_usages--; 1111 continue; 1112 } else { 1113 usage->zsu_new = B_FALSE; 1114 usage->zsu_deleted = B_TRUE; 1115 usage->zsu_active = B_TRUE; 1116 } 1117 /* Add cpu shares for usages that are in FSS */ 1118 zone = usage->zsu_zone; 1119 if (usage->zsu_scheds & ZS_SCHED_FSS && 1120 zone->zsz_cpu_shares != ZS_SHARES_UNLIMITED && 1121 zone->zsz_cpu_shares != 0) { 1122 zone = usage->zsu_zone; 1123 usage->zsu_cpu_shares = zone->zsz_cpu_shares; 1124 pset->zsp_cpu_shares += zone->zsz_cpu_shares; 1125 } 1126 usage = list_next(&pset->zsp_usage_list, 1127 usage); 1128 } 1129 } 1130 } 1131 1132 /* A zone has been found. Update its information */ 1133 static void 1134 zsd_mark_zone_found(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t cpu_shares, 1135 uint64_t cpu_cap, uint64_t ram_cap, uint64_t locked_cap, 1136 uint64_t vm_cap, uint64_t processes_cap, uint64_t processes, 1137 uint64_t lwps_cap, uint64_t lwps, uint64_t shm_cap, uint64_t shm, 1138 uint64_t shmids_cap, uint64_t shmids, uint64_t semids_cap, 1139 uint64_t semids, uint64_t msgids_cap, uint64_t msgids, uint64_t lofi_cap, 1140 uint64_t lofi, char *poolname, char *psetname, uint_t sched, uint_t cputype, 1141 uint_t iptype) 1142 { 1143 zsd_system_t *sys = ctl->zsctl_system; 1144 1145 assert(zone->zsz_found == B_FALSE); 1146 1147 /* 1148 * Mark zone as exists, and new if it did not exist in previous 1149 * interval. 1150 */ 1151 zone->zsz_found = B_TRUE; 1152 zone->zsz_empty = B_TRUE; 1153 zone->zsz_deleted = B_FALSE; 1154 1155 /* 1156 * Zone is new. Assume zone's properties are the same over entire 1157 * interval. 1158 */ 1159 if (zone->zsz_active == B_FALSE) 1160 zone->zsz_new = B_TRUE; 1161 else 1162 zone->zsz_new = B_FALSE; 1163 1164 (void) strlcpy(zone->zsz_pool, poolname, sizeof (zone->zsz_pool)); 1165 (void) strlcpy(zone->zsz_pset, psetname, sizeof (zone->zsz_pset)); 1166 zone->zsz_default_sched = sched; 1167 1168 /* Schedulers updated later as processes are found */ 1169 zone->zsz_scheds = 0; 1170 1171 /* Cpus updated later as psets bound are identified */ 1172 zone->zsz_cpus_online = 0; 1173 1174 zone->zsz_cputype = cputype; 1175 zone->zsz_iptype = iptype; 1176 zone->zsz_psetid = ZS_PSET_ERROR; 1177 zone->zsz_cpu_cap = cpu_cap; 1178 zone->zsz_cpu_shares = cpu_shares; 1179 zone->zsz_ram_cap = ram_cap; 1180 zone->zsz_locked_cap = locked_cap; 1181 zone->zsz_vm_cap = vm_cap; 1182 zone->zsz_processes_cap = processes_cap; 1183 zone->zsz_processes = processes; 1184 zone->zsz_lwps_cap = lwps_cap; 1185 zone->zsz_lwps = lwps; 1186 zone->zsz_shm_cap = shm_cap; 1187 zone->zsz_shm = shm; 1188 zone->zsz_shmids_cap = shmids_cap; 1189 zone->zsz_shmids = shmids; 1190 zone->zsz_semids_cap = semids_cap; 1191 zone->zsz_semids = semids; 1192 zone->zsz_msgids_cap = msgids_cap; 1193 zone->zsz_msgids = msgids; 1194 zone->zsz_lofi_cap = lofi_cap; 1195 zone->zsz_lofi = lofi; 1196 1197 sys->zss_processes += processes; 1198 sys->zss_lwps += lwps; 1199 sys->zss_shm += shm; 1200 sys->zss_shmids += shmids; 1201 sys->zss_semids += semids; 1202 sys->zss_msgids += msgids; 1203 sys->zss_lofi += lofi; 1204 zone->zsz_active = B_TRUE; 1205 } 1206 1207 1208 /* Determine which zones have halted */ 1209 static void 1210 zsd_mark_zones_end(zsd_ctl_t *ctl) 1211 { 1212 zsd_zone_t *zone, *tmp; 1213 1214 /* 1215 * Mark zone as not existing, or delete if it did not exist in 1216 * previous interval. 1217 */ 1218 zone = list_head(&ctl->zsctl_zones); 1219 while (zone != NULL) { 1220 if (zone->zsz_found == B_FALSE) { 1221 zone->zsz_empty = B_TRUE; 1222 if (zone->zsz_deleted == B_TRUE) { 1223 /* 1224 * Zone deleted in prior interval, 1225 * so it no longer exists. 1226 */ 1227 tmp = zone; 1228 zone = list_next(&ctl->zsctl_zones, zone); 1229 list_remove(&ctl->zsctl_zones, tmp); 1230 free(tmp); 1231 ctl->zsctl_nzones--; 1232 continue; 1233 } else { 1234 zone->zsz_new = B_FALSE; 1235 zone->zsz_deleted = B_TRUE; 1236 zone->zsz_active = B_TRUE; 1237 } 1238 } 1239 zone = list_next(&ctl->zsctl_zones, zone); 1240 } 1241 } 1242 1243 /* 1244 * Mark cpus as not existing. If a cpu is found, it will be updated. If 1245 * a cpu is not found, then it must have gone offline, so it will be 1246 * deleted. 1247 * 1248 * The kstat tracking data is rolled so that the usage since the previous 1249 * interval can be determined. 1250 */ 1251 static void 1252 zsd_mark_cpus_start(zsd_ctl_t *ctl, boolean_t roll) 1253 { 1254 zsd_cpu_t *cpu; 1255 1256 /* 1257 * Mark all cpus as not existing. As cpus are found, they will 1258 * be marked as existing. 1259 */ 1260 for (cpu = list_head(&ctl->zsctl_cpus); cpu != NULL; 1261 cpu = list_next(&ctl->zsctl_cpus, cpu)) { 1262 cpu->zsc_found = B_FALSE; 1263 if (cpu->zsc_active == B_TRUE && roll) { 1264 cpu->zsc_psetid_prev = cpu->zsc_psetid; 1265 cpu->zsc_nsec_idle_prev = cpu->zsc_nsec_idle; 1266 cpu->zsc_nsec_intr_prev = cpu->zsc_nsec_intr; 1267 cpu->zsc_nsec_kern_prev = cpu->zsc_nsec_kern; 1268 cpu->zsc_nsec_user_prev = cpu->zsc_nsec_user; 1269 } 1270 } 1271 } 1272 1273 /* 1274 * An array the size of the maximum number of cpus is kept. Within this array 1275 * a list of the online cpus is maintained. 1276 */ 1277 zsd_cpu_t * 1278 zsd_lookup_insert_cpu(zsd_ctl_t *ctl, processorid_t cpuid) 1279 { 1280 zsd_cpu_t *cpu; 1281 1282 assert(cpuid < ctl->zsctl_maxcpuid); 1283 cpu = &(ctl->zsctl_cpu_array[cpuid]); 1284 assert(cpuid == cpu->zsc_id); 1285 1286 if (cpu->zsc_allocated == B_FALSE) { 1287 cpu->zsc_allocated = B_TRUE; 1288 list_insert_tail(&ctl->zsctl_cpus, cpu); 1289 } 1290 return (cpu); 1291 } 1292 1293 /* A cpu has been found. Update its information */ 1294 static void 1295 zsd_mark_cpu_found(zsd_cpu_t *cpu, zsd_pset_t *pset, psetid_t psetid) 1296 { 1297 /* 1298 * legacy processor sets, the cpu may move while zonestatd is 1299 * inspecting, causing it to be found twice. In this case, just 1300 * leave cpu in the first processor set in which it was found. 1301 */ 1302 if (cpu->zsc_found == B_TRUE) 1303 return; 1304 1305 /* Mark cpu as online */ 1306 cpu->zsc_found = B_TRUE; 1307 cpu->zsc_offlined = B_FALSE; 1308 cpu->zsc_pset = pset; 1309 /* 1310 * cpu is newly online. 1311 */ 1312 if (cpu->zsc_active == B_FALSE) { 1313 /* 1314 * Cpu is newly online. 1315 */ 1316 cpu->zsc_onlined = B_TRUE; 1317 cpu->zsc_psetid = psetid; 1318 cpu->zsc_psetid_prev = psetid; 1319 } else { 1320 /* 1321 * cpu online during previous interval. Save properties at 1322 * start of interval 1323 */ 1324 cpu->zsc_onlined = B_FALSE; 1325 cpu->zsc_psetid = psetid; 1326 1327 } 1328 cpu->zsc_active = B_TRUE; 1329 } 1330 1331 /* Remove all offlined cpus from the list of tracked cpus */ 1332 static void 1333 zsd_mark_cpus_end(zsd_ctl_t *ctl) 1334 { 1335 zsd_cpu_t *cpu, *tmp; 1336 int id; 1337 1338 /* Mark cpu as online or offline */ 1339 cpu = list_head(&ctl->zsctl_cpus); 1340 while (cpu != NULL) { 1341 if (cpu->zsc_found == B_FALSE) { 1342 if (cpu->zsc_offlined == B_TRUE) { 1343 /* 1344 * cpu offlined in prior interval. It is gone. 1345 */ 1346 tmp = cpu; 1347 cpu = list_next(&ctl->zsctl_cpus, cpu); 1348 list_remove(&ctl->zsctl_cpus, tmp); 1349 /* Clear structure for future use */ 1350 id = tmp->zsc_id; 1351 bzero(tmp, sizeof (zsd_cpu_t)); 1352 tmp->zsc_id = id; 1353 tmp->zsc_allocated = B_FALSE; 1354 tmp->zsc_psetid = ZS_PSET_ERROR; 1355 tmp->zsc_psetid_prev = ZS_PSET_ERROR; 1356 1357 } else { 1358 /* 1359 * cpu online at start of interval. Treat 1360 * as still online, since it was online for 1361 * some portion of the interval. 1362 */ 1363 cpu->zsc_offlined = B_TRUE; 1364 cpu->zsc_onlined = B_FALSE; 1365 cpu->zsc_active = B_TRUE; 1366 cpu->zsc_psetid = cpu->zsc_psetid_prev; 1367 cpu->zsc_pset = NULL; 1368 } 1369 } 1370 cpu = list_next(&ctl->zsctl_cpus, cpu); 1371 } 1372 } 1373 1374 /* Some utility functions for managing the list of processor sets */ 1375 static zsd_pset_t * 1376 zsd_lookup_pset_byid(zsd_ctl_t *ctl, psetid_t psetid) 1377 { 1378 zsd_pset_t *pset; 1379 1380 for (pset = list_head(&ctl->zsctl_psets); pset != NULL; 1381 pset = list_next(&ctl->zsctl_psets, pset)) { 1382 if (pset->zsp_id == psetid) 1383 return (pset); 1384 } 1385 return (NULL); 1386 } 1387 1388 static zsd_pset_t * 1389 zsd_lookup_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid) 1390 { 1391 zsd_pset_t *pset; 1392 1393 for (pset = list_head(&ctl->zsctl_psets); pset != NULL; 1394 pset = list_next(&ctl->zsctl_psets, pset)) { 1395 if (strcmp(pset->zsp_name, psetname) == 0) { 1396 if (psetid != -1) 1397 pset->zsp_id = psetid; 1398 return (pset); 1399 } 1400 } 1401 return (NULL); 1402 } 1403 1404 static zsd_pset_t * 1405 zsd_allocate_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid) 1406 { 1407 zsd_pset_t *pset; 1408 1409 if ((pset = (zsd_pset_t *)calloc(1, sizeof (zsd_pset_t))) == NULL) 1410 return (NULL); 1411 1412 (void) strlcpy(pset->zsp_name, psetname, sizeof (pset->zsp_name)); 1413 pset->zsp_id = psetid; 1414 pset->zsp_found = B_FALSE; 1415 /* 1416 * Allocate as deleted so if not found in first pass, pset is deleted 1417 * from list. This can happen if pset is returned by pset_list, but 1418 * is destroyed before first attempt to fetch pset details. 1419 */ 1420 list_create(&pset->zsp_usage_list, sizeof (zsd_pset_usage_t), 1421 offsetof(zsd_pset_usage_t, zsu_next)); 1422 1423 pset->zsp_hrstart = g_hrnow; 1424 pset->zsp_deleted = B_TRUE; 1425 pset->zsp_empty = B_TRUE; 1426 ctl->zsctl_npsets++; 1427 1428 return (pset); 1429 } 1430 1431 static zsd_pset_t * 1432 zsd_lookup_insert_pset(zsd_ctl_t *ctl, char *psetname, psetid_t psetid) 1433 { 1434 zsd_pset_t *pset, *tmp; 1435 1436 if ((pset = zsd_lookup_pset(ctl, psetname, psetid)) != NULL) 1437 return (pset); 1438 1439 if ((pset = zsd_allocate_pset(ctl, psetname, psetid)) == NULL) 1440 return (NULL); 1441 1442 /* Insert sorted by psetname */ 1443 tmp = list_head(&ctl->zsctl_psets); 1444 while (tmp != NULL && strcmp(psetname, tmp->zsp_name) > 0) 1445 tmp = list_next(&ctl->zsctl_psets, tmp); 1446 1447 list_insert_before(&ctl->zsctl_psets, tmp, pset); 1448 return (pset); 1449 } 1450 1451 /* Some utility functions for managing the list of zones using each pset */ 1452 static zsd_pset_usage_t * 1453 zsd_lookup_usage(zsd_pset_t *pset, zsd_zone_t *zone) 1454 { 1455 zsd_pset_usage_t *usage; 1456 1457 for (usage = list_head(&pset->zsp_usage_list); usage != NULL; 1458 usage = list_next(&pset->zsp_usage_list, usage)) 1459 if (usage->zsu_zone == zone) 1460 return (usage); 1461 1462 return (NULL); 1463 } 1464 1465 static zsd_pset_usage_t * 1466 zsd_allocate_pset_usage(zsd_ctl_t *ctl, zsd_pset_t *pset, zsd_zone_t *zone) 1467 { 1468 zsd_pset_usage_t *usage; 1469 1470 if ((usage = (zsd_pset_usage_t *)calloc(1, sizeof (zsd_pset_usage_t))) 1471 == NULL) 1472 return (NULL); 1473 1474 list_link_init(&usage->zsu_next); 1475 usage->zsu_zone = zone; 1476 usage->zsu_zoneid = zone->zsz_id; 1477 usage->zsu_pset = pset; 1478 usage->zsu_found = B_FALSE; 1479 usage->zsu_active = B_FALSE; 1480 usage->zsu_new = B_FALSE; 1481 /* 1482 * Allocate as not deleted. If a process is found in a pset for 1483 * a zone, the usage will not be deleted until at least the next 1484 * interval. 1485 */ 1486 usage->zsu_start = g_now; 1487 usage->zsu_hrstart = g_hrnow; 1488 usage->zsu_deleted = B_FALSE; 1489 usage->zsu_empty = B_TRUE; 1490 usage->zsu_scheds = 0; 1491 usage->zsu_cpu_shares = ZS_LIMIT_NONE; 1492 1493 ctl->zsctl_npset_usages++; 1494 pset->zsp_nusage++; 1495 1496 return (usage); 1497 } 1498 1499 static zsd_pset_usage_t * 1500 zsd_lookup_insert_usage(zsd_ctl_t *ctl, zsd_pset_t *pset, zsd_zone_t *zone) 1501 { 1502 zsd_pset_usage_t *usage, *tmp; 1503 1504 if ((usage = zsd_lookup_usage(pset, zone)) 1505 != NULL) 1506 return (usage); 1507 1508 if ((usage = zsd_allocate_pset_usage(ctl, pset, zone)) == NULL) 1509 return (NULL); 1510 1511 tmp = list_head(&pset->zsp_usage_list); 1512 while (tmp != NULL && strcmp(zone->zsz_name, tmp->zsu_zone->zsz_name) 1513 > 0) 1514 tmp = list_next(&pset->zsp_usage_list, tmp); 1515 1516 list_insert_before(&pset->zsp_usage_list, tmp, usage); 1517 return (usage); 1518 } 1519 1520 static void 1521 zsd_refresh_system(zsd_ctl_t *ctl) 1522 { 1523 zsd_system_t *system = ctl->zsctl_system; 1524 1525 /* Re-count these values each interval */ 1526 system->zss_processes = 0; 1527 system->zss_lwps = 0; 1528 system->zss_shm = 0; 1529 system->zss_shmids = 0; 1530 system->zss_semids = 0; 1531 system->zss_msgids = 0; 1532 system->zss_lofi = 0; 1533 } 1534 1535 1536 /* Reads each cpu's kstats, and adds the usage to the cpu's pset */ 1537 static void 1538 zsd_update_cpu_stats(zsd_ctl_t *ctl, zsd_cpu_t *cpu) 1539 { 1540 zsd_system_t *sys; 1541 processorid_t cpuid; 1542 zsd_pset_t *pset_prev; 1543 zsd_pset_t *pset; 1544 kstat_t *kstat; 1545 kstat_named_t *knp; 1546 kid_t kid; 1547 uint64_t idle, intr, kern, user; 1548 1549 sys = ctl->zsctl_system; 1550 pset = cpu->zsc_pset; 1551 knp = NULL; 1552 kid = -1; 1553 cpuid = cpu->zsc_id; 1554 1555 /* Get the cpu time totals for this cpu */ 1556 kstat = kstat_lookup(ctl->zsctl_kstat_ctl, "cpu", cpuid, "sys"); 1557 if (kstat == NULL) 1558 return; 1559 1560 kid = kstat_read(ctl->zsctl_kstat_ctl, kstat, NULL); 1561 if (kid == -1) 1562 return; 1563 1564 knp = kstat_data_lookup(kstat, "cpu_nsec_idle"); 1565 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64) 1566 return; 1567 1568 idle = knp->value.ui64; 1569 1570 knp = kstat_data_lookup(kstat, "cpu_nsec_kernel"); 1571 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64) 1572 return; 1573 1574 kern = knp->value.ui64; 1575 1576 knp = kstat_data_lookup(kstat, "cpu_nsec_user"); 1577 if (knp == NULL || knp->data_type != KSTAT_DATA_UINT64) 1578 return; 1579 1580 user = knp->value.ui64; 1581 1582 /* 1583 * Tracking intr time per cpu just exists for future enhancements. 1584 * The value is presently always zero. 1585 */ 1586 intr = 0; 1587 cpu->zsc_nsec_idle = idle; 1588 cpu->zsc_nsec_intr = intr; 1589 cpu->zsc_nsec_kern = kern; 1590 cpu->zsc_nsec_user = user; 1591 1592 if (cpu->zsc_onlined == B_TRUE) { 1593 /* 1594 * cpu is newly online. There is no reference value, 1595 * so just record its current stats for comparison 1596 * on next stat read. 1597 */ 1598 cpu->zsc_nsec_idle_prev = cpu->zsc_nsec_idle; 1599 cpu->zsc_nsec_intr_prev = cpu->zsc_nsec_intr; 1600 cpu->zsc_nsec_kern_prev = cpu->zsc_nsec_kern; 1601 cpu->zsc_nsec_user_prev = cpu->zsc_nsec_user; 1602 return; 1603 } 1604 1605 /* 1606 * Calculate relative time since previous refresh. 1607 * Paranoia. Don't let time go backwards. 1608 */ 1609 idle = intr = kern = user = 0; 1610 if (cpu->zsc_nsec_idle > cpu->zsc_nsec_idle_prev) 1611 idle = cpu->zsc_nsec_idle - cpu->zsc_nsec_idle_prev; 1612 1613 if (cpu->zsc_nsec_intr > cpu->zsc_nsec_intr_prev) 1614 intr = cpu->zsc_nsec_intr - cpu->zsc_nsec_intr_prev; 1615 1616 if (cpu->zsc_nsec_kern > cpu->zsc_nsec_kern_prev) 1617 kern = cpu->zsc_nsec_kern - cpu->zsc_nsec_kern_prev; 1618 1619 if (cpu->zsc_nsec_user > cpu->zsc_nsec_user_prev) 1620 user = cpu->zsc_nsec_user - cpu->zsc_nsec_user_prev; 1621 1622 /* Update totals for cpu usage */ 1623 TIMESTRUC_ADD_NANOSEC(cpu->zsc_idle, idle); 1624 TIMESTRUC_ADD_NANOSEC(cpu->zsc_intr, intr); 1625 TIMESTRUC_ADD_NANOSEC(cpu->zsc_kern, kern); 1626 TIMESTRUC_ADD_NANOSEC(cpu->zsc_user, user); 1627 1628 /* 1629 * Add cpu's stats to its pset if it is known to be in 1630 * the pset since previous read. 1631 */ 1632 if (cpu->zsc_psetid == cpu->zsc_psetid_prev || 1633 cpu->zsc_psetid_prev == ZS_PSET_ERROR || 1634 (pset_prev = zsd_lookup_pset_byid(ctl, 1635 cpu->zsc_psetid_prev)) == NULL) { 1636 TIMESTRUC_ADD_NANOSEC(pset->zsp_idle, idle); 1637 TIMESTRUC_ADD_NANOSEC(pset->zsp_intr, intr); 1638 TIMESTRUC_ADD_NANOSEC(pset->zsp_kern, kern); 1639 TIMESTRUC_ADD_NANOSEC(pset->zsp_user, user); 1640 } else { 1641 /* 1642 * Last pset was different than current pset. 1643 * Best guess is to split usage between the two. 1644 */ 1645 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_idle, idle / 2); 1646 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_intr, intr / 2); 1647 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_kern, kern / 2); 1648 TIMESTRUC_ADD_NANOSEC(pset_prev->zsp_user, user / 2); 1649 1650 TIMESTRUC_ADD_NANOSEC(pset->zsp_idle, 1651 (idle / 2) + (idle % 2)); 1652 TIMESTRUC_ADD_NANOSEC(pset->zsp_intr, 1653 (intr / 2) + (intr % 2)); 1654 TIMESTRUC_ADD_NANOSEC(pset->zsp_kern, 1655 (kern / 2) + (kern % 2)); 1656 TIMESTRUC_ADD_NANOSEC(pset->zsp_user, 1657 (user / 2) + (user % 2)); 1658 } 1659 TIMESTRUC_ADD_NANOSEC(sys->zss_idle, idle); 1660 TIMESTRUC_ADD_NANOSEC(sys->zss_intr, intr); 1661 TIMESTRUC_ADD_NANOSEC(sys->zss_kern, kern); 1662 TIMESTRUC_ADD_NANOSEC(sys->zss_user, user); 1663 } 1664 1665 /* Determine the details of a processor set by pset_id */ 1666 static int 1667 zsd_get_pool_pset(zsd_ctl_t *ctl, psetid_t psetid, char *psetname, 1668 size_t namelen, uint_t *cputype, uint64_t *online, uint64_t *size, 1669 uint64_t *min, uint64_t *max, int64_t *importance) 1670 { 1671 uint_t old, num; 1672 1673 pool_conf_t *conf = ctl->zsctl_pool_conf; 1674 pool_value_t **vals = ctl->zsctl_pool_vals; 1675 pool_resource_t **res_list = NULL; 1676 pool_resource_t *pset; 1677 pool_component_t **cpus = NULL; 1678 processorid_t *cache; 1679 const char *string; 1680 uint64_t uint64; 1681 int64_t int64; 1682 int i, ret, type; 1683 1684 if (ctl->zsctl_pool_status == POOL_DISABLED) { 1685 1686 /* 1687 * Inspect legacy psets 1688 */ 1689 for (;;) { 1690 old = num = ctl->zsctl_cpu_ncache; 1691 ret = pset_info(psetid, &type, &num, 1692 ctl->zsctl_cpu_cache); 1693 if (ret < 0) { 1694 /* pset is gone. Tell caller to retry */ 1695 errno = EINTR; 1696 return (-1); 1697 } 1698 if (num <= old) { 1699 /* Success */ 1700 break; 1701 } 1702 if ((cache = (processorid_t *)realloc( 1703 ctl->zsctl_cpu_cache, num * 1704 sizeof (processorid_t))) != NULL) { 1705 ctl->zsctl_cpu_ncache = num; 1706 ctl->zsctl_cpu_cache = cache; 1707 } else { 1708 /* 1709 * Could not allocate to get new cpu list. 1710 */ 1711 zsd_warn(gettext( 1712 "Could not allocate for cpu list")); 1713 errno = ENOMEM; 1714 return (-1); 1715 } 1716 } 1717 /* 1718 * Old school pset. Just make min and max equal 1719 * to its size 1720 */ 1721 if (psetid == ZS_PSET_DEFAULT) { 1722 *cputype = ZS_CPUTYPE_DEFAULT_PSET; 1723 (void) strlcpy(psetname, "pset_default", namelen); 1724 } else { 1725 *cputype = ZS_CPUTYPE_PSRSET_PSET; 1726 (void) snprintf(psetname, namelen, 1727 "SUNWlegacy_pset_%d", psetid); 1728 } 1729 1730 /* 1731 * Just treat legacy pset as a simple pool pset 1732 */ 1733 *online = num; 1734 *size = num; 1735 *min = num; 1736 *max = num; 1737 *importance = 1; 1738 1739 return (0); 1740 } 1741 1742 /* Look up the pool pset using the pset id */ 1743 res_list = NULL; 1744 pool_value_set_int64(vals[1], psetid); 1745 if (pool_value_set_name(vals[1], "pset.sys_id") 1746 != PO_SUCCESS) 1747 goto err; 1748 1749 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS) 1750 goto err; 1751 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS) 1752 goto err; 1753 if ((res_list = pool_query_resources(conf, &num, vals)) == NULL) 1754 goto err; 1755 if (num != 1) 1756 goto err; 1757 pset = res_list[0]; 1758 free(res_list); 1759 res_list = NULL; 1760 if (pool_get_property(conf, pool_resource_to_elem(conf, pset), 1761 "pset.name", vals[0]) != POC_STRING || 1762 pool_value_get_string(vals[0], &string) != PO_SUCCESS) 1763 goto err; 1764 1765 (void) strlcpy(psetname, string, namelen); 1766 if (strncmp(psetname, "SUNWtmp", strlen("SUNWtmp")) == 0) 1767 *cputype = ZS_CPUTYPE_DEDICATED; 1768 else if (psetid == ZS_PSET_DEFAULT) 1769 *cputype = ZS_CPUTYPE_DEFAULT_PSET; 1770 else 1771 *cputype = ZS_CPUTYPE_POOL_PSET; 1772 1773 /* Get size, min, max, and importance */ 1774 if (pool_get_property(conf, pool_resource_to_elem(conf, 1775 pset), "pset.size", vals[0]) == POC_UINT && 1776 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS) 1777 *size = uint64; 1778 else 1779 *size = 0; 1780 1781 /* Get size, min, max, and importance */ 1782 if (pool_get_property(conf, pool_resource_to_elem(conf, 1783 pset), "pset.min", vals[0]) == POC_UINT && 1784 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS) 1785 *min = uint64; 1786 else 1787 *min = 0; 1788 if (*min >= ZSD_PSET_UNLIMITED) 1789 *min = ZS_LIMIT_NONE; 1790 1791 if (pool_get_property(conf, pool_resource_to_elem(conf, 1792 pset), "pset.max", vals[0]) == POC_UINT && 1793 pool_value_get_uint64(vals[0], &uint64) == PO_SUCCESS) 1794 *max = uint64; 1795 else 1796 *max = ZS_LIMIT_NONE; 1797 1798 if (*max >= ZSD_PSET_UNLIMITED) 1799 *max = ZS_LIMIT_NONE; 1800 1801 if (pool_get_property(conf, pool_resource_to_elem(conf, 1802 pset), "pset.importance", vals[0]) == POC_INT && 1803 pool_value_get_int64(vals[0], &int64) == PO_SUCCESS) 1804 *importance = int64; 1805 else 1806 *importance = (uint64_t)1; 1807 1808 *online = 0; 1809 if (*size == 0) 1810 return (0); 1811 1812 /* get cpus */ 1813 cpus = pool_query_resource_components(conf, pset, &num, NULL); 1814 if (cpus == NULL) 1815 goto err; 1816 1817 /* Make sure there is space for cpu id list */ 1818 if (num > ctl->zsctl_cpu_ncache) { 1819 if ((cache = (processorid_t *)realloc( 1820 ctl->zsctl_cpu_cache, num * 1821 sizeof (processorid_t))) != NULL) { 1822 ctl->zsctl_cpu_ncache = num; 1823 ctl->zsctl_cpu_cache = cache; 1824 } else { 1825 /* 1826 * Could not allocate to get new cpu list. 1827 */ 1828 zsd_warn(gettext( 1829 "Could not allocate for cpu list")); 1830 goto err; 1831 } 1832 } 1833 1834 /* count the online cpus */ 1835 for (i = 0; i < num; i++) { 1836 if (pool_get_property(conf, pool_component_to_elem( 1837 conf, cpus[i]), "cpu.status", vals[0]) != POC_STRING || 1838 pool_value_get_string(vals[0], &string) != PO_SUCCESS) 1839 goto err; 1840 1841 if (strcmp(string, "on-line") != 0 && 1842 strcmp(string, "no-intr") != 0) 1843 continue; 1844 1845 if (pool_get_property(conf, pool_component_to_elem( 1846 conf, cpus[i]), "cpu.sys_id", vals[0]) != POC_INT || 1847 pool_value_get_int64(vals[0], &int64) != PO_SUCCESS) 1848 goto err; 1849 1850 (*online)++; 1851 ctl->zsctl_cpu_cache[i] = (psetid_t)int64; 1852 } 1853 free(cpus); 1854 return (0); 1855 err: 1856 if (res_list != NULL) 1857 free(res_list); 1858 if (cpus != NULL) 1859 free(cpus); 1860 1861 /* 1862 * The pools operations should succeed since the conf is a consistent 1863 * snapshot. Tell caller there is no need to retry. 1864 */ 1865 errno = EINVAL; 1866 return (-1); 1867 } 1868 1869 /* 1870 * Update the current list of processor sets. 1871 * This also updates the list of online cpus, and each cpu's pset membership. 1872 */ 1873 static void 1874 zsd_refresh_psets(zsd_ctl_t *ctl) 1875 { 1876 int i, j, ret, state; 1877 uint_t old, num; 1878 uint_t cputype; 1879 int64_t sys_id, importance; 1880 uint64_t online, size, min, max; 1881 zsd_system_t *system; 1882 zsd_pset_t *pset; 1883 zsd_cpu_t *cpu; 1884 psetid_t *cache; 1885 char psetname[ZS_PSETNAME_MAX]; 1886 processorid_t cpuid; 1887 pool_value_t *pv_save = NULL; 1888 pool_resource_t **res_list = NULL; 1889 pool_resource_t *res; 1890 pool_value_t **vals; 1891 pool_conf_t *conf; 1892 boolean_t roll_cpus = B_TRUE; 1893 1894 /* Zero cpu counters to recount them */ 1895 system = ctl->zsctl_system; 1896 system->zss_ncpus = 0; 1897 system->zss_ncpus_online = 0; 1898 retry: 1899 ret = pool_get_status(&state); 1900 if (ret == 0 && state == POOL_ENABLED) { 1901 1902 conf = ctl->zsctl_pool_conf; 1903 vals = ctl->zsctl_pool_vals; 1904 pv_save = vals[1]; 1905 vals[1] = NULL; 1906 1907 if (ctl->zsctl_pool_status == POOL_DISABLED) { 1908 if (pool_conf_open(ctl->zsctl_pool_conf, 1909 pool_dynamic_location(), PO_RDONLY) == 0) { 1910 ctl->zsctl_pool_status = POOL_ENABLED; 1911 ctl->zsctl_pool_changed = POU_PSET; 1912 } 1913 } else { 1914 ctl->zsctl_pool_changed = 0; 1915 ret = pool_conf_update(ctl->zsctl_pool_conf, 1916 &(ctl->zsctl_pool_changed)); 1917 if (ret < 0) { 1918 /* Pools must have become disabled */ 1919 (void) pool_conf_close(ctl->zsctl_pool_conf); 1920 ctl->zsctl_pool_status = POOL_DISABLED; 1921 if (pool_error() == POE_SYSTEM && errno == 1922 ENOTACTIVE) 1923 goto retry; 1924 1925 zsd_warn(gettext( 1926 "Unable to update pool configuration")); 1927 /* Not able to get pool info. Don't update. */ 1928 goto err; 1929 } 1930 } 1931 /* Get the list of psets using libpool */ 1932 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS) 1933 goto err; 1934 1935 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS) 1936 goto err; 1937 if ((res_list = pool_query_resources(conf, &num, vals)) 1938 == NULL) 1939 goto err; 1940 1941 if (num > ctl->zsctl_pset_ncache) { 1942 if ((cache = (psetid_t *)realloc(ctl->zsctl_pset_cache, 1943 (num) * sizeof (psetid_t))) == NULL) { 1944 goto err; 1945 } 1946 ctl->zsctl_pset_ncache = num; 1947 ctl->zsctl_pset_cache = cache; 1948 } 1949 /* Save the pset id of each pset */ 1950 for (i = 0; i < num; i++) { 1951 res = res_list[i]; 1952 if (pool_get_property(conf, pool_resource_to_elem(conf, 1953 res), "pset.sys_id", vals[0]) != POC_INT || 1954 pool_value_get_int64(vals[0], &sys_id) 1955 != PO_SUCCESS) 1956 goto err; 1957 ctl->zsctl_pset_cache[i] = (int)sys_id; 1958 } 1959 vals[1] = pv_save; 1960 pv_save = NULL; 1961 } else { 1962 if (ctl->zsctl_pool_status == POOL_ENABLED) { 1963 (void) pool_conf_close(ctl->zsctl_pool_conf); 1964 ctl->zsctl_pool_status = POOL_DISABLED; 1965 } 1966 /* Get the pset list using legacy psets */ 1967 for (;;) { 1968 old = num = ctl->zsctl_pset_ncache; 1969 (void) pset_list(ctl->zsctl_pset_cache, &num); 1970 if ((num + 1) <= old) { 1971 break; 1972 } 1973 if ((cache = (psetid_t *)realloc(ctl->zsctl_pset_cache, 1974 (num + 1) * sizeof (psetid_t))) != NULL) { 1975 ctl->zsctl_pset_ncache = num + 1; 1976 ctl->zsctl_pset_cache = cache; 1977 } else { 1978 /* 1979 * Could not allocate to get new pset list. 1980 * Give up 1981 */ 1982 return; 1983 } 1984 } 1985 /* Add the default pset to list */ 1986 ctl->zsctl_pset_cache[num] = ctl->zsctl_pset_cache[0]; 1987 ctl->zsctl_pset_cache[0] = ZS_PSET_DEFAULT; 1988 num++; 1989 } 1990 psets_changed: 1991 zsd_mark_cpus_start(ctl, roll_cpus); 1992 zsd_mark_psets_start(ctl); 1993 roll_cpus = B_FALSE; 1994 1995 /* Refresh cpu membership of all psets */ 1996 for (i = 0; i < num; i++) { 1997 1998 /* Get pool pset information */ 1999 sys_id = ctl->zsctl_pset_cache[i]; 2000 if (zsd_get_pool_pset(ctl, sys_id, psetname, sizeof (psetname), 2001 &cputype, &online, &size, &min, &max, &importance) 2002 != 0) { 2003 if (errno == EINTR) 2004 goto psets_changed; 2005 zsd_warn(gettext("Failed to get info for pset %d"), 2006 sys_id); 2007 continue; 2008 } 2009 2010 system->zss_ncpus += size; 2011 system->zss_ncpus_online += online; 2012 2013 pset = zsd_lookup_insert_pset(ctl, psetname, 2014 ctl->zsctl_pset_cache[i]); 2015 2016 /* update pset info */ 2017 zsd_mark_pset_found(pset, cputype, online, size, min, 2018 max, importance); 2019 2020 /* update each cpu in pset */ 2021 for (j = 0; j < pset->zsp_online; j++) { 2022 cpuid = ctl->zsctl_cpu_cache[j]; 2023 cpu = zsd_lookup_insert_cpu(ctl, cpuid); 2024 zsd_mark_cpu_found(cpu, pset, sys_id); 2025 } 2026 } 2027 err: 2028 if (res_list != NULL) 2029 free(res_list); 2030 if (pv_save != NULL) 2031 vals[1] = pv_save; 2032 } 2033 2034 2035 2036 /* 2037 * Fetch the current pool and pset name for the given zone. 2038 */ 2039 static void 2040 zsd_get_zone_pool_pset(zsd_ctl_t *ctl, zsd_zone_t *zone, 2041 char *pool, int poollen, char *pset, int psetlen, uint_t *cputype) 2042 { 2043 poolid_t poolid; 2044 pool_t **pools = NULL; 2045 pool_resource_t **res_list = NULL; 2046 char poolname[ZS_POOLNAME_MAX]; 2047 char psetname[ZS_PSETNAME_MAX]; 2048 pool_conf_t *conf = ctl->zsctl_pool_conf; 2049 pool_value_t *pv_save = NULL; 2050 pool_value_t **vals = ctl->zsctl_pool_vals; 2051 const char *string; 2052 int ret; 2053 int64_t int64; 2054 uint_t num; 2055 2056 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_POOLID, 2057 &poolid, sizeof (poolid)); 2058 if (ret < 0) 2059 goto lookup_done; 2060 2061 pv_save = vals[1]; 2062 vals[1] = NULL; 2063 pools = NULL; 2064 res_list = NULL; 2065 2066 /* Default values if lookup fails */ 2067 (void) strlcpy(poolname, "pool_default", sizeof (poolname)); 2068 (void) strlcpy(psetname, "pset_default", sizeof (poolname)); 2069 *cputype = ZS_CPUTYPE_DEFAULT_PSET; 2070 2071 /* no dedicated cpu if pools are disabled */ 2072 if (ctl->zsctl_pool_status == POOL_DISABLED) 2073 goto lookup_done; 2074 2075 /* Get the pool name using the id */ 2076 pool_value_set_int64(vals[0], poolid); 2077 if (pool_value_set_name(vals[0], "pool.sys_id") != PO_SUCCESS) 2078 goto lookup_done; 2079 2080 if ((pools = pool_query_pools(conf, &num, vals)) == NULL) 2081 goto lookup_done; 2082 2083 if (num != 1) 2084 goto lookup_done; 2085 2086 if (pool_get_property(conf, pool_to_elem(conf, pools[0]), 2087 "pool.name", vals[0]) != POC_STRING || 2088 pool_value_get_string(vals[0], &string) != PO_SUCCESS) 2089 goto lookup_done; 2090 (void) strlcpy(poolname, (char *)string, sizeof (poolname)); 2091 2092 /* Get the name of the pset for the pool */ 2093 if (pool_value_set_name(vals[0], "type") != PO_SUCCESS) 2094 goto lookup_done; 2095 2096 if (pool_value_set_string(vals[0], "pset") != PO_SUCCESS) 2097 goto lookup_done; 2098 2099 if ((res_list = pool_query_pool_resources(conf, pools[0], &num, vals)) 2100 == NULL) 2101 goto lookup_done; 2102 2103 if (num != 1) 2104 goto lookup_done; 2105 2106 if (pool_get_property(conf, pool_resource_to_elem(conf, 2107 res_list[0]), "pset.sys_id", vals[0]) != POC_INT || 2108 pool_value_get_int64(vals[0], &int64) != PO_SUCCESS) 2109 goto lookup_done; 2110 2111 if (int64 == ZS_PSET_DEFAULT) 2112 *cputype = ZS_CPUTYPE_DEFAULT_PSET; 2113 2114 if (pool_get_property(conf, pool_resource_to_elem(conf, 2115 res_list[0]), "pset.name", vals[0]) != POC_STRING || 2116 pool_value_get_string(vals[0], &string) != PO_SUCCESS) 2117 goto lookup_done; 2118 2119 (void) strlcpy(psetname, (char *)string, sizeof (psetname)); 2120 2121 if (strncmp(psetname, "SUNWtmp_", strlen("SUNWtmp_")) == 0) 2122 *cputype = ZS_CPUTYPE_DEDICATED; 2123 if (strncmp(psetname, "SUNW_legacy_", strlen("SUNW_legacy_")) == 0) 2124 *cputype = ZS_CPUTYPE_PSRSET_PSET; 2125 else 2126 *cputype = ZS_CPUTYPE_POOL_PSET; 2127 2128 lookup_done: 2129 2130 if (pv_save != NULL) 2131 vals[1] = pv_save; 2132 2133 if (res_list) 2134 free(res_list); 2135 if (pools) 2136 free(pools); 2137 2138 (void) strlcpy(pool, poolname, poollen); 2139 (void) strlcpy(pset, psetname, psetlen); 2140 } 2141 2142 /* Convert scheduler names to ZS_* scheduler flags */ 2143 static uint_t 2144 zsd_schedname2int(char *clname, int pri) 2145 { 2146 uint_t sched = 0; 2147 2148 if (strcmp(clname, "TS") == 0) { 2149 sched = ZS_SCHED_TS; 2150 } else if (strcmp(clname, "IA") == 0) { 2151 sched = ZS_SCHED_IA; 2152 } else if (strcmp(clname, "FX") == 0) { 2153 if (pri > 59) { 2154 sched = ZS_SCHED_FX_60; 2155 } else { 2156 sched = ZS_SCHED_FX; 2157 } 2158 } else if (strcmp(clname, "RT") == 0) { 2159 sched = ZS_SCHED_RT; 2160 2161 } else if (strcmp(clname, "FSS") == 0) { 2162 sched = ZS_SCHED_FSS; 2163 } 2164 return (sched); 2165 } 2166 2167 static uint64_t 2168 zsd_get_zone_rctl_limit(char *name) 2169 { 2170 rctlblk_t *rblk; 2171 2172 rblk = (rctlblk_t *)alloca(rctlblk_size()); 2173 if (getrctl(name, NULL, rblk, RCTL_FIRST) 2174 != 0) { 2175 return (ZS_LIMIT_NONE); 2176 } 2177 return (rctlblk_get_value(rblk)); 2178 } 2179 2180 static uint64_t 2181 zsd_get_zone_rctl_usage(char *name) 2182 { 2183 rctlblk_t *rblk; 2184 2185 rblk = (rctlblk_t *)alloca(rctlblk_size()); 2186 if (getrctl(name, NULL, rblk, RCTL_USAGE) 2187 != 0) { 2188 return (0); 2189 } 2190 return (rctlblk_get_value(rblk)); 2191 } 2192 2193 #define ZSD_NUM_RCTL_VALS 19 2194 2195 /* 2196 * Fetch the limit information for a zone. This uses zone_enter() as the 2197 * getrctl(2) system call only returns rctl information for the zone of 2198 * the caller. 2199 */ 2200 static int 2201 zsd_get_zone_caps(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t *cpu_shares, 2202 uint64_t *cpu_cap, uint64_t *ram_cap, uint64_t *locked_cap, 2203 uint64_t *vm_cap, uint64_t *processes_cap, uint64_t *processes, 2204 uint64_t *lwps_cap, uint64_t *lwps, uint64_t *shm_cap, uint64_t *shm, 2205 uint64_t *shmids_cap, uint64_t *shmids, uint64_t *semids_cap, 2206 uint64_t *semids, uint64_t *msgids_cap, uint64_t *msgids, 2207 uint64_t *lofi_cap, uint64_t *lofi, uint_t *sched) 2208 { 2209 int p[2], pid, tmpl_fd, ret; 2210 ctid_t ct; 2211 char class[PC_CLNMSZ]; 2212 uint64_t vals[ZSD_NUM_RCTL_VALS]; 2213 zsd_system_t *sys = ctl->zsctl_system; 2214 int i = 0; 2215 int res = 0; 2216 2217 /* Treat all caps as no cap on error */ 2218 *cpu_shares = ZS_LIMIT_NONE; 2219 *cpu_cap = ZS_LIMIT_NONE; 2220 *ram_cap = ZS_LIMIT_NONE; 2221 *locked_cap = ZS_LIMIT_NONE; 2222 *vm_cap = ZS_LIMIT_NONE; 2223 2224 *processes_cap = ZS_LIMIT_NONE; 2225 *lwps_cap = ZS_LIMIT_NONE; 2226 *shm_cap = ZS_LIMIT_NONE; 2227 *shmids_cap = ZS_LIMIT_NONE; 2228 *semids_cap = ZS_LIMIT_NONE; 2229 *msgids_cap = ZS_LIMIT_NONE; 2230 *lofi_cap = ZS_LIMIT_NONE; 2231 2232 *processes = 0; 2233 *lwps = 0; 2234 *shm = 0; 2235 *shmids = 0; 2236 *semids = 0; 2237 *msgids = 0; 2238 *lofi = 0; 2239 2240 /* Get the ram cap first since it is a zone attr */ 2241 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_PHYS_MCAP, 2242 ram_cap, sizeof (*ram_cap)); 2243 if (ret < 0 || *ram_cap == 0) 2244 *ram_cap = ZS_LIMIT_NONE; 2245 2246 /* Get the zone's default scheduling class */ 2247 ret = zone_getattr(zone->zsz_id, ZONE_ATTR_SCHED_CLASS, 2248 class, sizeof (class)); 2249 if (ret < 0) 2250 return (-1); 2251 2252 *sched = zsd_schedname2int(class, 0); 2253 2254 /* rctl caps must be fetched from within the zone */ 2255 if (pipe(p) != 0) 2256 return (-1); 2257 2258 if ((tmpl_fd = init_template()) == -1) { 2259 (void) close(p[0]); 2260 (void) close(p[1]); 2261 return (-1); 2262 } 2263 pid = forkx(0); 2264 if (pid < 0) { 2265 (void) ct_tmpl_clear(tmpl_fd); 2266 (void) close(p[0]); 2267 (void) close(p[1]); 2268 return (-1); 2269 } 2270 if (pid == 0) { 2271 2272 (void) ct_tmpl_clear(tmpl_fd); 2273 (void) close(tmpl_fd); 2274 (void) close(p[0]); 2275 if (zone->zsz_id != getzoneid()) { 2276 if (zone_enter(zone->zsz_id) < 0) { 2277 (void) close(p[1]); 2278 _exit(0); 2279 } 2280 } 2281 2282 /* Get caps for zone, and write them to zonestatd parent. */ 2283 vals[i++] = zsd_get_zone_rctl_limit("zone.cpu-shares"); 2284 vals[i++] = zsd_get_zone_rctl_limit("zone.cpu-cap"); 2285 vals[i++] = zsd_get_zone_rctl_limit("zone.max-locked-memory"); 2286 vals[i++] = zsd_get_zone_rctl_limit("zone.max-swap"); 2287 vals[i++] = zsd_get_zone_rctl_limit("zone.max-processes"); 2288 vals[i++] = zsd_get_zone_rctl_usage("zone.max-processes"); 2289 vals[i++] = zsd_get_zone_rctl_limit("zone.max-lwps"); 2290 vals[i++] = zsd_get_zone_rctl_usage("zone.max-lwps"); 2291 vals[i++] = zsd_get_zone_rctl_limit("zone.max-shm-memory"); 2292 vals[i++] = zsd_get_zone_rctl_usage("zone.max-shm-memory"); 2293 vals[i++] = zsd_get_zone_rctl_limit("zone.max-shm-ids"); 2294 vals[i++] = zsd_get_zone_rctl_usage("zone.max-shm-ids"); 2295 vals[i++] = zsd_get_zone_rctl_limit("zone.max-sem-ids"); 2296 vals[i++] = zsd_get_zone_rctl_usage("zone.max-sem-ids"); 2297 vals[i++] = zsd_get_zone_rctl_limit("zone.max-msg-ids"); 2298 vals[i++] = zsd_get_zone_rctl_usage("zone.max-msg-ids"); 2299 vals[i++] = zsd_get_zone_rctl_limit("zone.max-lofi"); 2300 vals[i++] = zsd_get_zone_rctl_usage("zone.max-lofi"); 2301 2302 if (write(p[1], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) != 2303 ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) { 2304 (void) close(p[1]); 2305 _exit(1); 2306 } 2307 2308 (void) close(p[1]); 2309 _exit(0); 2310 } 2311 if (contract_latest(&ct) == -1) 2312 ct = -1; 2313 2314 (void) ct_tmpl_clear(tmpl_fd); 2315 (void) close(tmpl_fd); 2316 (void) close(p[1]); 2317 while (waitpid(pid, NULL, 0) != pid) 2318 ; 2319 2320 /* Read cap from child in zone */ 2321 if (read(p[0], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) != 2322 ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) { 2323 res = -1; 2324 goto cleanup; 2325 } 2326 i = 0; 2327 *cpu_shares = vals[i++]; 2328 *cpu_cap = vals[i++]; 2329 *locked_cap = vals[i++]; 2330 *vm_cap = vals[i++]; 2331 *processes_cap = vals[i++]; 2332 *processes = vals[i++]; 2333 *lwps_cap = vals[i++]; 2334 *lwps = vals[i++]; 2335 *shm_cap = vals[i++]; 2336 *shm = vals[i++]; 2337 *shmids_cap = vals[i++]; 2338 *shmids = vals[i++]; 2339 *semids_cap = vals[i++]; 2340 *semids = vals[i++]; 2341 *msgids_cap = vals[i++]; 2342 *msgids = vals[i++]; 2343 *lofi_cap = vals[i++]; 2344 *lofi = vals[i++]; 2345 2346 /* Interpret maximum values as no cap */ 2347 if (*cpu_cap == UINT32_MAX || *cpu_cap == 0) 2348 *cpu_cap = ZS_LIMIT_NONE; 2349 if (*processes_cap == sys->zss_processes_max) 2350 *processes_cap = ZS_LIMIT_NONE; 2351 if (*lwps_cap == sys->zss_lwps_max) 2352 *lwps_cap = ZS_LIMIT_NONE; 2353 if (*shm_cap == sys->zss_shm_max) 2354 *shm_cap = ZS_LIMIT_NONE; 2355 if (*shmids_cap == sys->zss_shmids_max) 2356 *shmids_cap = ZS_LIMIT_NONE; 2357 if (*semids_cap == sys->zss_semids_max) 2358 *semids_cap = ZS_LIMIT_NONE; 2359 if (*msgids_cap == sys->zss_msgids_max) 2360 *msgids_cap = ZS_LIMIT_NONE; 2361 if (*lofi_cap == sys->zss_lofi_max) 2362 *lofi_cap = ZS_LIMIT_NONE; 2363 2364 2365 cleanup: 2366 (void) close(p[0]); 2367 (void) ct_tmpl_clear(tmpl_fd); 2368 (void) close(tmpl_fd); 2369 (void) contract_abandon_id(ct); 2370 2371 return (res); 2372 } 2373 2374 /* Update the current list of running zones */ 2375 static void 2376 zsd_refresh_zones(zsd_ctl_t *ctl) 2377 { 2378 zsd_zone_t *zone; 2379 uint_t old, num; 2380 ushort_t flags; 2381 int i, ret; 2382 zoneid_t *cache; 2383 uint64_t cpu_shares; 2384 uint64_t cpu_cap; 2385 uint64_t ram_cap; 2386 uint64_t locked_cap; 2387 uint64_t vm_cap; 2388 uint64_t processes_cap; 2389 uint64_t processes; 2390 uint64_t lwps_cap; 2391 uint64_t lwps; 2392 uint64_t shm_cap; 2393 uint64_t shm; 2394 uint64_t shmids_cap; 2395 uint64_t shmids; 2396 uint64_t semids_cap; 2397 uint64_t semids; 2398 uint64_t msgids_cap; 2399 uint64_t msgids; 2400 uint64_t lofi_cap; 2401 uint64_t lofi; 2402 2403 char zonename[ZS_ZONENAME_MAX]; 2404 char poolname[ZS_POOLNAME_MAX]; 2405 char psetname[ZS_PSETNAME_MAX]; 2406 uint_t sched; 2407 uint_t cputype; 2408 uint_t iptype; 2409 2410 /* Get the current list of running zones */ 2411 for (;;) { 2412 old = num = ctl->zsctl_zone_ncache; 2413 (void) zone_list(ctl->zsctl_zone_cache, &num); 2414 if (num <= old) 2415 break; 2416 if ((cache = (zoneid_t *)realloc(ctl->zsctl_zone_cache, 2417 (num) * sizeof (zoneid_t))) != NULL) { 2418 ctl->zsctl_zone_ncache = num; 2419 ctl->zsctl_zone_cache = cache; 2420 } else { 2421 /* Could not allocate to get new zone list. Give up */ 2422 return; 2423 } 2424 } 2425 2426 zsd_mark_zones_start(ctl); 2427 2428 for (i = 0; i < num; i++) { 2429 2430 ret = getzonenamebyid(ctl->zsctl_zone_cache[i], 2431 zonename, sizeof (zonename)); 2432 if (ret < 0) 2433 continue; 2434 2435 zone = zsd_lookup_insert_zone(ctl, zonename, 2436 ctl->zsctl_zone_cache[i]); 2437 2438 ret = zone_getattr(ctl->zsctl_zone_cache[i], ZONE_ATTR_FLAGS, 2439 &flags, sizeof (flags)); 2440 if (ret < 0) 2441 continue; 2442 2443 if (flags & ZF_NET_EXCL) 2444 iptype = ZS_IPTYPE_EXCLUSIVE; 2445 else 2446 iptype = ZS_IPTYPE_SHARED; 2447 2448 zsd_get_zone_pool_pset(ctl, zone, poolname, sizeof (poolname), 2449 psetname, sizeof (psetname), &cputype); 2450 2451 if (zsd_get_zone_caps(ctl, zone, &cpu_shares, &cpu_cap, 2452 &ram_cap, &locked_cap, &vm_cap, &processes_cap, &processes, 2453 &lwps_cap, &lwps, &shm_cap, &shm, &shmids_cap, &shmids, 2454 &semids_cap, &semids, &msgids_cap, &msgids, &lofi_cap, 2455 &lofi, &sched) != 0) 2456 continue; 2457 2458 zsd_mark_zone_found(ctl, zone, cpu_shares, cpu_cap, ram_cap, 2459 locked_cap, vm_cap, processes_cap, processes, lwps_cap, 2460 lwps, shm_cap, shm, shmids_cap, shmids, semids_cap, 2461 semids, msgids_cap, msgids, lofi_cap, lofi, poolname, 2462 psetname, sched, cputype, iptype); 2463 } 2464 } 2465 2466 /* Fetch the details of a process from its psinfo_t */ 2467 static void 2468 zsd_get_proc_info(zsd_ctl_t *ctl, psinfo_t *psinfo, psetid_t *psetid, 2469 psetid_t *prev_psetid, zoneid_t *zoneid, zoneid_t *prev_zoneid, 2470 timestruc_t *delta, uint_t *sched) 2471 { 2472 timestruc_t d; 2473 zsd_proc_t *proc; 2474 2475 /* Get cached data for proc */ 2476 proc = &(ctl->zsctl_proc_array[psinfo->pr_pid]); 2477 *psetid = psinfo->pr_lwp.pr_bindpset; 2478 2479 if (proc->zspr_psetid == ZS_PSET_ERROR) 2480 *prev_psetid = *psetid; 2481 else 2482 *prev_psetid = proc->zspr_psetid; 2483 2484 *zoneid = psinfo->pr_zoneid; 2485 if (proc->zspr_zoneid == -1) 2486 *prev_zoneid = *zoneid; 2487 else 2488 *prev_zoneid = proc->zspr_zoneid; 2489 2490 TIMESTRUC_DELTA(d, psinfo->pr_time, proc->zspr_usage); 2491 *delta = d; 2492 2493 *sched = zsd_schedname2int(psinfo->pr_lwp.pr_clname, 2494 psinfo->pr_lwp.pr_pri); 2495 2496 /* Update cached data for proc */ 2497 proc->zspr_psetid = psinfo->pr_lwp.pr_bindpset; 2498 proc->zspr_zoneid = psinfo->pr_zoneid; 2499 proc->zspr_sched = *sched; 2500 proc->zspr_usage.tv_sec = psinfo->pr_time.tv_sec; 2501 proc->zspr_usage.tv_nsec = psinfo->pr_time.tv_nsec; 2502 proc->zspr_ppid = psinfo->pr_ppid; 2503 } 2504 2505 /* 2506 * Reset the known cpu usage of a process. This is done after a process 2507 * exits so that if the pid is recycled, data from its previous life is 2508 * not reused 2509 */ 2510 static void 2511 zsd_flush_proc_info(zsd_proc_t *proc) 2512 { 2513 proc->zspr_usage.tv_sec = 0; 2514 proc->zspr_usage.tv_nsec = 0; 2515 } 2516 2517 /* 2518 * Open the current extended accounting file. On initialization, open the 2519 * file as the current file to be used. Otherwise, open the file as the 2520 * next file to use of the current file reaches EOF. 2521 */ 2522 static int 2523 zsd_open_exacct(zsd_ctl_t *ctl, boolean_t init) 2524 { 2525 int ret, oret, state, trys = 0, flags; 2526 int *fd, *open; 2527 ea_file_t *eaf; 2528 struct stat64 *stat; 2529 char path[MAXPATHLEN]; 2530 2531 /* 2532 * The accounting file is first opened at the tail. Following 2533 * opens to new accounting files are opened at the head. 2534 */ 2535 if (init == B_TRUE) { 2536 flags = EO_NO_VALID_HDR | EO_TAIL; 2537 fd = &ctl->zsctl_proc_fd; 2538 eaf = &ctl->zsctl_proc_eaf; 2539 stat = &ctl->zsctl_proc_stat; 2540 open = &ctl->zsctl_proc_open; 2541 } else { 2542 flags = EO_NO_VALID_HDR | EO_HEAD; 2543 fd = &ctl->zsctl_proc_fd_next; 2544 eaf = &ctl->zsctl_proc_eaf_next; 2545 stat = &ctl->zsctl_proc_stat_next; 2546 open = &ctl->zsctl_proc_open_next; 2547 } 2548 2549 *fd = -1; 2550 *open = 0; 2551 retry: 2552 /* open accounting files for cpu consumption */ 2553 ret = acctctl(AC_STATE_GET | AC_PROC, &state, sizeof (state)); 2554 if (ret != 0) { 2555 zsd_warn(gettext("Unable to get process accounting state")); 2556 goto err; 2557 } 2558 if (state != AC_ON) { 2559 if (trys > 0) { 2560 zsd_warn(gettext( 2561 "Unable to enable process accounting")); 2562 goto err; 2563 } 2564 (void) zsd_enable_cpu_stats(); 2565 trys++; 2566 goto retry; 2567 } 2568 2569 ret = acctctl(AC_FILE_GET | AC_PROC, path, sizeof (path)); 2570 if (ret != 0) { 2571 zsd_warn(gettext("Unable to get process accounting file")); 2572 goto err; 2573 } 2574 2575 if ((*fd = open64(path, O_RDONLY, 0)) >= 0 && 2576 (oret = ea_fdopen(eaf, *fd, NULL, flags, O_RDONLY)) == 0) 2577 ret = fstat64(*fd, stat); 2578 2579 if (*fd < 0 || oret < 0 || ret < 0) { 2580 struct timespec ts; 2581 2582 /* 2583 * It is possible the accounting file is momentarily unavailable 2584 * because it is being rolled. Try for up to half a second. 2585 * 2586 * If failure to open accounting file persists, give up. 2587 */ 2588 if (oret == 0) 2589 (void) ea_close(eaf); 2590 else if (*fd >= 0) 2591 (void) close(*fd); 2592 if (trys > 500) { 2593 zsd_warn(gettext( 2594 "Unable to open process accounting file")); 2595 goto err; 2596 } 2597 /* wait one millisecond */ 2598 ts.tv_sec = 0; 2599 ts.tv_nsec = NANOSEC / 1000; 2600 (void) nanosleep(&ts, NULL); 2601 goto retry; 2602 } 2603 *open = 1; 2604 return (0); 2605 err: 2606 if (*fd >= 0) 2607 (void) close(*fd); 2608 *open = 0; 2609 *fd = -1; 2610 return (-1); 2611 } 2612 2613 /* 2614 * Walk /proc and charge each process to its zone and processor set. 2615 * Then read exacct data for exited processes, and charge them as well. 2616 */ 2617 static void 2618 zsd_refresh_procs(zsd_ctl_t *ctl, boolean_t init) 2619 { 2620 DIR *dir; 2621 struct dirent *dent; 2622 psinfo_t psinfo; 2623 int fd, ret; 2624 zsd_proc_t *proc, *pproc, *tmp, *next; 2625 list_t pplist, plist; 2626 zsd_zone_t *zone, *prev_zone; 2627 zsd_pset_t *pset, *prev_pset; 2628 psetid_t psetid, prev_psetid; 2629 zoneid_t zoneid, prev_zoneid; 2630 zsd_pset_usage_t *usage, *prev_usage; 2631 char path[MAXPATHLEN]; 2632 2633 ea_object_t object; 2634 ea_object_t pobject; 2635 boolean_t hrtime_expired = B_FALSE; 2636 struct timeval interval_end; 2637 2638 timestruc_t delta, d1, d2; 2639 uint_t sched = 0; 2640 2641 /* 2642 * Get the current accounting file. The current accounting file 2643 * may be different than the file in use, as the accounting file 2644 * may have been rolled, or manually changed by an admin. 2645 */ 2646 ret = zsd_open_exacct(ctl, init); 2647 if (ret != 0) { 2648 zsd_warn(gettext("Unable to track process accounting")); 2649 return; 2650 } 2651 2652 /* 2653 * Mark the current time as the interval end time. Don't track 2654 * processes that exit after this time. 2655 */ 2656 (void) gettimeofday(&interval_end, NULL); 2657 2658 dir = opendir("/proc"); 2659 if (dir == NULL) { 2660 zsd_warn(gettext("Unable to open /proc")); 2661 return; 2662 } 2663 2664 dent = ctl->zsctl_procfs_dent; 2665 2666 (void) memset(dent, 0, ctl->zsctl_procfs_dent_size); 2667 2668 /* Walk all processes and compute each zone's usage on each pset. */ 2669 while (readdir_r(dir, dent) != 0) { 2670 2671 if (strcmp(dent->d_name, ".") == 0 || 2672 strcmp(dent->d_name, "..") == 0) 2673 continue; 2674 2675 (void) snprintf(path, sizeof (path), "/proc/%s/psinfo", 2676 dent->d_name); 2677 2678 fd = open(path, O_RDONLY); 2679 if (fd < 0) 2680 continue; 2681 2682 if (read(fd, &psinfo, sizeof (psinfo)) != sizeof (psinfo)) { 2683 (void) close(fd); 2684 continue; 2685 } 2686 (void) close(fd); 2687 2688 zsd_get_proc_info(ctl, &psinfo, &psetid, &prev_psetid, 2689 &zoneid, &prev_zoneid, &delta, &sched); 2690 2691 d1.tv_sec = delta.tv_sec / 2; 2692 d1.tv_nsec = delta.tv_nsec / 2; 2693 d2.tv_sec = (delta.tv_sec / 2) + (delta.tv_sec % 2); 2694 d2.tv_nsec = (delta.tv_nsec / 2) + (delta.tv_nsec % 2); 2695 2696 /* Get the zone and pset this process is running in */ 2697 zone = zsd_lookup_zone_byid(ctl, zoneid); 2698 if (zone == NULL) 2699 continue; 2700 pset = zsd_lookup_pset_byid(ctl, psetid); 2701 if (pset == NULL) 2702 continue; 2703 usage = zsd_lookup_insert_usage(ctl, pset, zone); 2704 if (usage == NULL) 2705 continue; 2706 2707 /* 2708 * Get the usage of the previous zone and pset if they were 2709 * different. 2710 */ 2711 if (zoneid != prev_zoneid) 2712 prev_zone = zsd_lookup_zone_byid(ctl, prev_zoneid); 2713 else 2714 prev_zone = NULL; 2715 2716 if (psetid != prev_psetid) 2717 prev_pset = zsd_lookup_pset_byid(ctl, prev_psetid); 2718 else 2719 prev_pset = NULL; 2720 2721 prev_usage = NULL; 2722 if (prev_zone != NULL || prev_pset != NULL) { 2723 if (prev_zone == NULL) 2724 prev_zone = zone; 2725 if (prev_pset == NULL) 2726 prev_pset = pset; 2727 2728 prev_usage = zsd_lookup_insert_usage(ctl, prev_pset, 2729 prev_zone); 2730 } 2731 2732 /* Update the usage with the processes info */ 2733 if (prev_usage == NULL) { 2734 zsd_mark_pset_usage_found(usage, sched); 2735 } else { 2736 zsd_mark_pset_usage_found(usage, sched); 2737 zsd_mark_pset_usage_found(prev_usage, sched); 2738 } 2739 2740 /* 2741 * First time around is just to get a starting point. All 2742 * usages will be zero. 2743 */ 2744 if (init == B_TRUE) 2745 continue; 2746 2747 if (prev_usage == NULL) { 2748 zsd_add_usage(ctl, usage, &delta); 2749 } else { 2750 zsd_add_usage(ctl, usage, &d1); 2751 zsd_add_usage(ctl, prev_usage, &d2); 2752 } 2753 } 2754 (void) closedir(dir); 2755 2756 /* 2757 * No need to collect exited proc data on initialization. Just 2758 * caching the usage of the known processes to get a zero starting 2759 * point. 2760 */ 2761 if (init == B_TRUE) 2762 return; 2763 2764 /* 2765 * Add accounting records to account for processes which have 2766 * exited. 2767 */ 2768 list_create(&plist, sizeof (zsd_proc_t), 2769 offsetof(zsd_proc_t, zspr_next)); 2770 list_create(&pplist, sizeof (zsd_proc_t), 2771 offsetof(zsd_proc_t, zspr_next)); 2772 2773 for (;;) { 2774 pid_t pid; 2775 pid_t ppid; 2776 timestruc_t user, sys, proc_usage; 2777 timestruc_t finish; 2778 int numfound = 0; 2779 2780 bzero(&object, sizeof (object)); 2781 proc = NULL; 2782 zone = NULL; 2783 pset = NULL; 2784 usage = NULL; 2785 ret = ea_get_object(&ctl->zsctl_proc_eaf, &object); 2786 if (ret == EO_ERROR) { 2787 if (ea_error() == EXR_EOF) { 2788 2789 struct stat64 *stat; 2790 struct stat64 *stat_next; 2791 2792 /* 2793 * See if the next accounting file is the 2794 * same as the current accounting file. 2795 */ 2796 stat = &(ctl->zsctl_proc_stat); 2797 stat_next = &(ctl->zsctl_proc_stat_next); 2798 if (stat->st_ino == stat_next->st_ino && 2799 stat->st_dev == stat_next->st_dev) { 2800 /* 2801 * End of current accounting file is 2802 * reached, so finished. Clear EOF 2803 * bit for next time around. 2804 */ 2805 ea_clear(&ctl->zsctl_proc_eaf); 2806 break; 2807 } else { 2808 /* 2809 * Accounting file has changed. Move 2810 * to current accounting file. 2811 */ 2812 (void) ea_close(&ctl->zsctl_proc_eaf); 2813 2814 ctl->zsctl_proc_fd = 2815 ctl->zsctl_proc_fd_next; 2816 ctl->zsctl_proc_eaf = 2817 ctl->zsctl_proc_eaf_next; 2818 ctl->zsctl_proc_stat = 2819 ctl->zsctl_proc_stat_next; 2820 2821 ctl->zsctl_proc_fd_next = -1; 2822 ctl->zsctl_proc_open_next = 0; 2823 continue; 2824 } 2825 } else { 2826 /* 2827 * Other accounting error. Give up on 2828 * accounting. 2829 */ 2830 goto ea_err; 2831 } 2832 } 2833 /* Skip if not a process group */ 2834 if ((object.eo_catalog & EXT_TYPE_MASK) != EXT_GROUP || 2835 (object.eo_catalog & EXD_DATA_MASK) != EXD_GROUP_PROC) { 2836 (void) ea_free_item(&object, EUP_ALLOC); 2837 continue; 2838 } 2839 2840 /* The process group entry should be complete */ 2841 while (numfound < 9) { 2842 bzero(&pobject, sizeof (pobject)); 2843 ret = ea_get_object(&ctl->zsctl_proc_eaf, 2844 &pobject); 2845 if (ret < 0) { 2846 (void) ea_free_item(&object, EUP_ALLOC); 2847 zsd_warn( 2848 "unable to get process accounting data"); 2849 goto ea_err; 2850 } 2851 /* Next entries should be process data */ 2852 if ((pobject.eo_catalog & EXT_TYPE_MASK) == 2853 EXT_GROUP) { 2854 (void) ea_free_item(&object, EUP_ALLOC); 2855 (void) ea_free_item(&pobject, EUP_ALLOC); 2856 zsd_warn( 2857 "process data of wrong type"); 2858 goto ea_err; 2859 } 2860 switch (pobject.eo_catalog & EXD_DATA_MASK) { 2861 case EXD_PROC_PID: 2862 pid = pobject.eo_item.ei_uint32; 2863 proc = &(ctl->zsctl_proc_array[pid]); 2864 /* 2865 * This process should not be currently in 2866 * the list of processes to process. 2867 */ 2868 assert(!list_link_active(&proc->zspr_next)); 2869 numfound++; 2870 break; 2871 case EXD_PROC_ANCPID: 2872 ppid = pobject.eo_item.ei_uint32; 2873 pproc = &(ctl->zsctl_proc_array[ppid]); 2874 numfound++; 2875 break; 2876 case EXD_PROC_ZONENAME: 2877 zone = zsd_lookup_zone(ctl, 2878 pobject.eo_item.ei_string, -1); 2879 numfound++; 2880 break; 2881 case EXD_PROC_CPU_USER_SEC: 2882 user.tv_sec = 2883 pobject.eo_item.ei_uint64; 2884 numfound++; 2885 break; 2886 case EXD_PROC_CPU_USER_NSEC: 2887 user.tv_nsec = 2888 pobject.eo_item.ei_uint64; 2889 numfound++; 2890 break; 2891 case EXD_PROC_CPU_SYS_SEC: 2892 sys.tv_sec = 2893 pobject.eo_item.ei_uint64; 2894 numfound++; 2895 break; 2896 case EXD_PROC_CPU_SYS_NSEC: 2897 sys.tv_nsec = 2898 pobject.eo_item.ei_uint64; 2899 numfound++; 2900 break; 2901 case EXD_PROC_FINISH_SEC: 2902 finish.tv_sec = 2903 pobject.eo_item.ei_uint64; 2904 numfound++; 2905 break; 2906 case EXD_PROC_FINISH_NSEC: 2907 finish.tv_nsec = 2908 pobject.eo_item.ei_uint64; 2909 numfound++; 2910 break; 2911 } 2912 (void) ea_free_item(&pobject, EUP_ALLOC); 2913 } 2914 (void) ea_free_item(&object, EUP_ALLOC); 2915 if (numfound != 9) { 2916 zsd_warn(gettext( 2917 "Malformed process accounting entry found")); 2918 goto proc_done; 2919 } 2920 2921 if (finish.tv_sec > interval_end.tv_sec || 2922 (finish.tv_sec == interval_end.tv_sec && 2923 finish.tv_nsec > (interval_end.tv_usec * 1000))) 2924 hrtime_expired = B_TRUE; 2925 2926 /* 2927 * Try to identify the zone and pset to which this 2928 * exited process belongs. 2929 */ 2930 if (zone == NULL) 2931 goto proc_done; 2932 2933 /* Save proc info */ 2934 proc->zspr_ppid = ppid; 2935 proc->zspr_zoneid = zone->zsz_id; 2936 2937 prev_psetid = ZS_PSET_ERROR; 2938 sched = 0; 2939 2940 /* 2941 * The following tries to deduce the processes pset. 2942 * 2943 * First choose pset and sched using cached value from the 2944 * most recent time the process has been seen. 2945 * 2946 * pset and sched can change across zone_enter, so make sure 2947 * most recent sighting of this process was in the same 2948 * zone before using most recent known value. 2949 * 2950 * If there is no known value, use value of processes 2951 * parent. If parent is unknown, walk parents until a known 2952 * parent is found. 2953 * 2954 * If no parent in the zone is found, use the zone's default 2955 * pset and scheduling class. 2956 */ 2957 if (proc->zspr_psetid != ZS_PSET_ERROR) { 2958 prev_psetid = proc->zspr_psetid; 2959 pset = zsd_lookup_pset_byid(ctl, prev_psetid); 2960 sched = proc->zspr_sched; 2961 } else if (pproc->zspr_zoneid == zone->zsz_id && 2962 pproc->zspr_psetid != ZS_PSET_ERROR) { 2963 prev_psetid = pproc->zspr_psetid; 2964 pset = zsd_lookup_pset_byid(ctl, prev_psetid); 2965 sched = pproc->zspr_sched; 2966 } 2967 2968 if (pset == NULL) { 2969 /* 2970 * Process or processes parent has never been seen. 2971 * Save to deduce a known parent later. 2972 */ 2973 proc_usage = sys; 2974 TIMESTRUC_ADD_TIMESTRUC(proc_usage, user); 2975 TIMESTRUC_DELTA(delta, proc_usage, 2976 proc->zspr_usage); 2977 proc->zspr_usage = delta; 2978 list_insert_tail(&plist, proc); 2979 continue; 2980 } 2981 2982 /* Add the zone's usage to the pset */ 2983 usage = zsd_lookup_insert_usage(ctl, pset, zone); 2984 if (usage == NULL) 2985 goto proc_done; 2986 2987 zsd_mark_pset_usage_found(usage, sched); 2988 2989 /* compute the usage to add for the exited proc */ 2990 proc_usage = sys; 2991 TIMESTRUC_ADD_TIMESTRUC(proc_usage, user); 2992 TIMESTRUC_DELTA(delta, proc_usage, 2993 proc->zspr_usage); 2994 2995 zsd_add_usage(ctl, usage, &delta); 2996 proc_done: 2997 zsd_flush_proc_info(proc); 2998 2999 if (hrtime_expired == B_TRUE) 3000 break; 3001 } 3002 /* 3003 * close next accounting file. 3004 */ 3005 if (ctl->zsctl_proc_open_next) { 3006 (void) ea_close( 3007 &ctl->zsctl_proc_eaf_next); 3008 ctl->zsctl_proc_open_next = 0; 3009 ctl->zsctl_proc_fd_next = -1; 3010 } 3011 3012 /* For the remaining processes, use pset and sched of a known parent */ 3013 proc = list_head(&plist); 3014 while (proc != NULL) { 3015 next = proc; 3016 for (;;) { 3017 if (next->zspr_ppid == 0 || next->zspr_ppid == -1) { 3018 /* 3019 * Kernel process, or parent is unknown, skip 3020 * process, remove from process list. 3021 */ 3022 tmp = proc; 3023 proc = list_next(&plist, proc); 3024 list_link_init(&tmp->zspr_next); 3025 break; 3026 } 3027 pproc = &(ctl->zsctl_proc_array[next->zspr_ppid]); 3028 if (pproc->zspr_zoneid != proc->zspr_zoneid) { 3029 /* 3030 * Parent in different zone. Save process and 3031 * use zone's default pset and sched below 3032 */ 3033 tmp = proc; 3034 proc = list_next(&plist, proc); 3035 list_remove(&plist, tmp); 3036 list_insert_tail(&pplist, tmp); 3037 break; 3038 } 3039 /* Parent has unknown pset, Search parent's parent */ 3040 if (pproc->zspr_psetid == ZS_PSET_ERROR) { 3041 next = pproc; 3042 continue; 3043 } 3044 /* Found parent with known pset. Use its info */ 3045 proc->zspr_psetid = pproc->zspr_psetid; 3046 proc->zspr_sched = pproc->zspr_sched; 3047 next->zspr_psetid = pproc->zspr_psetid; 3048 next->zspr_sched = pproc->zspr_sched; 3049 zone = zsd_lookup_zone_byid(ctl, 3050 proc->zspr_zoneid); 3051 if (zone == NULL) { 3052 tmp = proc; 3053 proc = list_next(&plist, proc); 3054 list_remove(&plist, tmp); 3055 list_link_init(&tmp->zspr_next); 3056 break; 3057 } 3058 pset = zsd_lookup_pset_byid(ctl, 3059 proc->zspr_psetid); 3060 if (pset == NULL) { 3061 tmp = proc; 3062 proc = list_next(&plist, proc); 3063 list_remove(&plist, tmp); 3064 list_link_init(&tmp->zspr_next); 3065 break; 3066 } 3067 /* Add the zone's usage to the pset */ 3068 usage = zsd_lookup_insert_usage(ctl, pset, zone); 3069 if (usage == NULL) { 3070 tmp = proc; 3071 proc = list_next(&plist, proc); 3072 list_remove(&plist, tmp); 3073 list_link_init(&tmp->zspr_next); 3074 break; 3075 } 3076 zsd_mark_pset_usage_found(usage, proc->zspr_sched); 3077 zsd_add_usage(ctl, usage, &proc->zspr_usage); 3078 zsd_flush_proc_info(proc); 3079 tmp = proc; 3080 proc = list_next(&plist, proc); 3081 list_remove(&plist, tmp); 3082 list_link_init(&tmp->zspr_next); 3083 break; 3084 } 3085 } 3086 /* 3087 * Process has never been seen. Using zone info to 3088 * determine pset and scheduling class. 3089 */ 3090 proc = list_head(&pplist); 3091 while (proc != NULL) { 3092 3093 zone = zsd_lookup_zone_byid(ctl, proc->zspr_zoneid); 3094 if (zone == NULL) 3095 goto next; 3096 if (zone->zsz_psetid != ZS_PSET_ERROR && 3097 zone->zsz_psetid != ZS_PSET_MULTI) { 3098 prev_psetid = zone->zsz_psetid; 3099 pset = zsd_lookup_pset_byid(ctl, prev_psetid); 3100 } else { 3101 pset = zsd_lookup_pset(ctl, zone->zsz_pset, -1); 3102 if (pset != NULL) 3103 prev_psetid = pset->zsp_id; 3104 } 3105 if (pset == NULL) 3106 goto next; 3107 3108 sched = zone->zsz_scheds; 3109 /* 3110 * Ignore FX high scheduling class if it is not the 3111 * only scheduling class in the zone. 3112 */ 3113 if (sched != ZS_SCHED_FX_60) 3114 sched &= (~ZS_SCHED_FX_60); 3115 /* 3116 * If more than one scheduling class has been found 3117 * in the zone, use zone's default scheduling class for 3118 * this process. 3119 */ 3120 if ((sched & (sched - 1)) != 0) 3121 sched = zone->zsz_default_sched; 3122 3123 /* Add the zone's usage to the pset */ 3124 usage = zsd_lookup_insert_usage(ctl, pset, zone); 3125 if (usage == NULL) 3126 goto next; 3127 3128 zsd_mark_pset_usage_found(usage, sched); 3129 zsd_add_usage(ctl, usage, &proc->zspr_usage); 3130 next: 3131 tmp = proc; 3132 proc = list_next(&pplist, proc); 3133 zsd_flush_proc_info(tmp); 3134 list_link_init(&tmp->zspr_next); 3135 } 3136 return; 3137 ea_err: 3138 /* 3139 * Close the next accounting file if we have not transitioned to it 3140 * yet. 3141 */ 3142 if (ctl->zsctl_proc_open_next) { 3143 (void) ea_close(&ctl->zsctl_proc_eaf_next); 3144 ctl->zsctl_proc_open_next = 0; 3145 ctl->zsctl_proc_fd_next = -1; 3146 } 3147 } 3148 3149 /* 3150 * getvmusage(2) uses size_t's in the passwd data structure, which differ 3151 * in size for 32bit and 64 bit kernels. Since this is a contracted interface, 3152 * and zonestatd does not necessarily match the kernel's bitness, marshal 3153 * results appropriately. 3154 */ 3155 static int 3156 zsd_getvmusage(zsd_ctl_t *ctl, uint_t flags, time_t age, zsd_vmusage64_t *buf, 3157 uint64_t *nres) 3158 { 3159 zsd_vmusage32_t *vmu32; 3160 zsd_vmusage64_t *vmu64; 3161 uint32_t nres32; 3162 int i; 3163 int ret; 3164 3165 if (ctl->zsctl_kern_bits == 32) { 3166 nres32 = *nres; 3167 ret = syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE, 3168 flags, age, (uintptr_t)buf, (uintptr_t)&nres32); 3169 *nres = nres32; 3170 if (ret == 0 && buf != NULL) { 3171 /* 3172 * An array of vmusage32_t's has been returned. 3173 * Convert it to an array of vmusage64_t's. 3174 */ 3175 vmu32 = (zsd_vmusage32_t *)buf; 3176 vmu64 = (zsd_vmusage64_t *)buf; 3177 for (i = nres32 - 1; i >= 0; i--) { 3178 3179 vmu64[i].vmu_zoneid = vmu32[i].vmu_zoneid; 3180 vmu64[i].vmu_type = vmu32[i].vmu_type; 3181 vmu64[i].vmu_type = vmu32[i].vmu_type; 3182 vmu64[i].vmu_rss_all = vmu32[i].vmu_rss_all; 3183 vmu64[i].vmu_rss_private = 3184 vmu32[i].vmu_rss_private; 3185 vmu64[i].vmu_rss_shared = 3186 vmu32[i].vmu_rss_shared; 3187 vmu64[i].vmu_swap_all = vmu32[i].vmu_swap_all; 3188 vmu64[i].vmu_swap_private = 3189 vmu32[i].vmu_swap_private; 3190 vmu64[i].vmu_swap_shared = 3191 vmu32[i].vmu_swap_shared; 3192 } 3193 } 3194 return (ret); 3195 } else { 3196 /* 3197 * kernel is 64 bit, so use 64 bit structures as zonestat 3198 * expects. 3199 */ 3200 return (syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE, 3201 flags, age, (uintptr_t)buf, (