1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2015 Joyent, Inc.
26 */
27
28/*
29 * lgroup system calls
30 */
31
32#include <sys/types.h>
33#include <sys/errno.h>
34#include <sys/sunddi.h>
35#include <sys/systm.h>
36#include <sys/mman.h>
37#include <sys/cpupart.h>
38#include <sys/lgrp.h>
39#include <sys/lgrp_user.h>
40#include <sys/promif.h>		/* for prom_printf() */
41#include <sys/sysmacros.h>
42#include <sys/policy.h>
43
44#include <vm/as.h>
45
46
47/* definitions for mi_validity */
48#define	VALID_ADDR	1
49#define	VALID_REQ	2
50
51/*
52 * run through the given number of addresses and requests and return the
53 * corresponding memory information for each address
54 */
55static int
56meminfo(int addr_count, struct meminfo *mip)
57{
58	size_t		in_size, out_size, req_size, val_size;
59	struct as	*as;
60	struct hat	*hat;
61	int		i, j, out_idx, info_count;
62	lgrp_t		*lgrp;
63	pfn_t		pfn;
64	ssize_t		pgsz;
65	int		*req_array, *val_array;
66	uint64_t	*in_array, *out_array;
67	uint64_t	addr, paddr;
68	uintptr_t	vaddr;
69	int		ret = 0;
70	struct meminfo minfo;
71#if defined(_SYSCALL32_IMPL)
72	struct meminfo32 minfo32;
73#endif
74
75	/*
76	 * Make sure that there is at least one address to translate and
77	 * limit how many virtual addresses the kernel can do per call
78	 */
79	if (addr_count < 1)
80		return (set_errno(EINVAL));
81	else if (addr_count > MAX_MEMINFO_CNT)
82		addr_count = MAX_MEMINFO_CNT;
83
84	if (get_udatamodel() == DATAMODEL_NATIVE) {
85		if (copyin(mip, &minfo, sizeof (struct meminfo)))
86			return (set_errno(EFAULT));
87	}
88#if defined(_SYSCALL32_IMPL)
89	else {
90		bzero(&minfo, sizeof (minfo));
91		if (copyin(mip, &minfo32, sizeof (struct meminfo32)))
92			return (set_errno(EFAULT));
93		minfo.mi_inaddr = (const uint64_t *)(uintptr_t)
94		    minfo32.mi_inaddr;
95		minfo.mi_info_req = (const uint_t *)(uintptr_t)
96		    minfo32.mi_info_req;
97		minfo.mi_info_count = minfo32.mi_info_count;
98		minfo.mi_outdata = (uint64_t *)(uintptr_t)
99		    minfo32.mi_outdata;
100		minfo.mi_validity = (uint_t *)(uintptr_t)
101		    minfo32.mi_validity;
102	}
103#endif
104	/*
105	 * all the input parameters have been copied in:-
106	 * addr_count - number of input addresses
107	 * minfo.mi_inaddr - array of input addresses
108	 * minfo.mi_info_req - array of types of information requested
109	 * minfo.mi_info_count - no. of pieces of info requested for each addr
110	 * minfo.mi_outdata - array into which the results are placed
111	 * minfo.mi_validity -  array containing bitwise result codes; 0th bit
112	 *			evaluates validity of corresponding input
113	 *			address, 1st bit validity of response to first
114	 *			member of info_req, etc.
115	 */
116
117	/* make sure mi_info_count is within limit */
118	info_count = minfo.mi_info_count;
119	if (info_count < 1 || info_count > MAX_MEMINFO_REQ)
120		return (set_errno(EINVAL));
121
122	/*
123	 * allocate buffer in_array for the input addresses and copy them in
124	 */
125	in_size = sizeof (uint64_t) * addr_count;
126	in_array = kmem_alloc(in_size, KM_SLEEP);
127	if (copyin(minfo.mi_inaddr, in_array, in_size)) {
128		kmem_free(in_array, in_size);
129		return (set_errno(EFAULT));
130	}
131
132	/*
133	 * allocate buffer req_array for the input info_reqs and copy them in
134	 */
135	req_size = sizeof (uint_t) * info_count;
136	req_array = kmem_alloc(req_size, KM_SLEEP);
137	if (copyin(minfo.mi_info_req, req_array, req_size)) {
138		kmem_free(req_array, req_size);
139		kmem_free(in_array, in_size);
140		return (set_errno(EFAULT));
141	}
142
143	/*
144	 * Validate privs for each req.
145	 */
146	for (i = 0; i < info_count; i++) {
147		switch (req_array[i] & MEMINFO_MASK) {
148		case MEMINFO_VLGRP:
149		case MEMINFO_VPAGESIZE:
150			break;
151		default:
152			if (secpolicy_meminfo(CRED()) != 0) {
153				kmem_free(req_array, req_size);
154				kmem_free(in_array, in_size);
155				return (set_errno(EPERM));
156			}
157			break;
158		}
159	}
160
161	/*
162	 * allocate buffer out_array which holds the results and will have
163	 * to be copied out later
164	 */
165	out_size = sizeof (uint64_t) * addr_count * info_count;
166	out_array = kmem_alloc(out_size, KM_SLEEP);
167
168	/*
169	 * allocate buffer val_array which holds the validity bits and will
170	 * have to be copied out later
171	 */
172	val_size = sizeof (uint_t) * addr_count;
173	val_array = kmem_alloc(val_size, KM_SLEEP);
174
175	if ((req_array[0] & MEMINFO_MASK) == MEMINFO_PLGRP) {
176		/* find the corresponding lgroup for each physical address */
177		for (i = 0; i < addr_count; i++) {
178			paddr = in_array[i];
179			pfn = btop(paddr);
180			lgrp = lgrp_pfn_to_lgrp(pfn);
181			if (lgrp) {
182				out_array[i] = lgrp->lgrp_id;
183				val_array[i] = VALID_ADDR | VALID_REQ;
184			} else {
185				out_array[i] = 0;
186				val_array[i] = 0;
187			}
188		}
189	} else {
190		/* get the corresponding memory info for each virtual address */
191		as = curproc->p_as;
192
193		AS_LOCK_ENTER(as, RW_READER);
194		hat = as->a_hat;
195		for (i = out_idx = 0; i < addr_count; i++, out_idx +=
196		    info_count) {
197			addr = in_array[i];
198			vaddr = (uintptr_t)(addr & ~PAGEOFFSET);
199			if (!as_segat(as, (caddr_t)vaddr)) {
200				val_array[i] = 0;
201				continue;
202			}
203			val_array[i] = VALID_ADDR;
204			pfn = hat_getpfnum(hat, (caddr_t)vaddr);
205			if (pfn != PFN_INVALID) {
206				paddr = (uint64_t)((pfn << PAGESHIFT) |
207				    (addr & PAGEOFFSET));
208				for (j = 0; j < info_count; j++) {
209					switch (req_array[j] & MEMINFO_MASK) {
210					case MEMINFO_VPHYSICAL:
211						/*
212						 * return the physical address
213						 * corresponding to the input
214						 * virtual address
215						 */
216						out_array[out_idx + j] = paddr;
217						val_array[i] |= VALID_REQ << j;
218						break;
219					case MEMINFO_VLGRP:
220						/*
221						 * return the lgroup of physical
222						 * page corresponding to the
223						 * input virtual address
224						 */
225						lgrp = lgrp_pfn_to_lgrp(pfn);
226						if (lgrp) {
227							out_array[out_idx + j] =
228							    lgrp->lgrp_id;
229							val_array[i] |=
230							    VALID_REQ << j;
231						}
232						break;
233					case MEMINFO_VPAGESIZE:
234						/*
235						 * return the size of physical
236						 * page corresponding to the
237						 * input virtual address
238						 */
239						pgsz = hat_getpagesize(hat,
240						    (caddr_t)vaddr);
241						if (pgsz != -1) {
242							out_array[out_idx + j] =
243							    pgsz;
244							val_array[i] |=
245							    VALID_REQ << j;
246						}
247						break;
248					case MEMINFO_VREPLCNT:
249						/*
250						 * for future use:-
251						 * return the no. replicated
252						 * physical pages corresponding
253						 * to the input virtual address,
254						 * so it is always 0 at the
255						 * moment
256						 */
257						out_array[out_idx + j] = 0;
258						val_array[i] |= VALID_REQ << j;
259						break;
260					case MEMINFO_VREPL:
261						/*
262						 * for future use:-
263						 * return the nth physical
264						 * replica of the specified
265						 * virtual address
266						 */
267						break;
268					case MEMINFO_VREPL_LGRP:
269						/*
270						 * for future use:-
271						 * return the lgroup of nth
272						 * physical replica of the
273						 * specified virtual address
274						 */
275						break;
276					case MEMINFO_PLGRP:
277						/*
278						 * this is for physical address
279						 * only, shouldn't mix with
280						 * virtual address
281						 */
282						break;
283					default:
284						break;
285					}
286				}
287			}
288		}
289		AS_LOCK_EXIT(as);
290	}
291
292	/* copy out the results and validity bits and free the buffers */
293	if ((copyout(out_array, minfo.mi_outdata, out_size) != 0) ||
294	    (copyout(val_array, minfo.mi_validity, val_size) != 0))
295		ret = set_errno(EFAULT);
296
297	kmem_free(in_array, in_size);
298	kmem_free(out_array, out_size);
299	kmem_free(req_array, req_size);
300	kmem_free(val_array, val_size);
301
302	return (ret);
303}
304
305
306/*
307 * Initialize lgroup affinities for thread
308 */
309void
310lgrp_affinity_init(lgrp_affinity_t **bufaddr)
311{
312	if (bufaddr)
313		*bufaddr = NULL;
314}
315
316
317/*
318 * Free lgroup affinities for thread and set to NULL
319 * just in case thread gets recycled
320 */
321void
322lgrp_affinity_free(lgrp_affinity_t **bufaddr)
323{
324	if (bufaddr && *bufaddr) {
325		kmem_free(*bufaddr, nlgrpsmax * sizeof (lgrp_affinity_t));
326		*bufaddr = NULL;
327	}
328}
329
330
331#define	P_ANY	-2	/* cookie specifying any ID */
332
333
334/*
335 * Find LWP with given ID in specified process and get its affinity for
336 * specified lgroup
337 */
338lgrp_affinity_t
339lgrp_affinity_get_thread(proc_t *p, id_t lwpid, lgrp_id_t lgrp)
340{
341	lgrp_affinity_t aff;
342	int		found;
343	kthread_t	*t;
344
345	ASSERT(MUTEX_HELD(&p->p_lock));
346
347	aff = LGRP_AFF_NONE;
348	found = 0;
349	t = p->p_tlist;
350	/*
351	 * The process may be executing in proc_exit() and its p->p_list may be
352	 * already NULL.
353	 */
354	if (t == NULL)
355		return (set_errno(ESRCH));
356
357	do {
358		if (t->t_tid == lwpid || lwpid == P_ANY) {
359			thread_lock(t);
360			/*
361			 * Check to see whether caller has permission to set
362			 * affinity for LWP
363			 */
364			if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
365				thread_unlock(t);
366				return (set_errno(EPERM));
367			}
368
369			if (t->t_lgrp_affinity)
370				aff = t->t_lgrp_affinity[lgrp];
371			thread_unlock(t);
372			found = 1;
373			break;
374		}
375	} while ((t = t->t_forw) != p->p_tlist);
376	if (!found)
377		aff = set_errno(ESRCH);
378
379	return (aff);
380}
381
382
383/*
384 * Get lgroup affinity for given LWP
385 */
386lgrp_affinity_t
387lgrp_affinity_get(lgrp_affinity_args_t *ap)
388{
389	lgrp_affinity_t		aff;
390	lgrp_affinity_args_t	args;
391	id_t			id;
392	idtype_t		idtype;
393	lgrp_id_t		lgrp;
394	proc_t			*p;
395	kthread_t		*t;
396
397	/*
398	 * Copyin arguments
399	 */
400	if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
401		return (set_errno(EFAULT));
402
403	id = args.id;
404	idtype = args.idtype;
405	lgrp = args.lgrp;
406
407	/*
408	 * Check for invalid lgroup
409	 */
410	if (lgrp < 0 || lgrp == LGRP_NONE)
411		return (set_errno(EINVAL));
412
413	/*
414	 * Check for existing lgroup
415	 */
416	if (lgrp > lgrp_alloc_max)
417		return (set_errno(ESRCH));
418
419	/*
420	 * Get lgroup affinity for given LWP or process
421	 */
422	switch (idtype) {
423
424	case P_LWPID:
425		/*
426		 * LWP in current process
427		 */
428		p = curproc;
429		mutex_enter(&p->p_lock);
430		if (id != P_MYID)	/* different thread */
431			aff = lgrp_affinity_get_thread(p, id, lgrp);
432		else {			/* current thread */
433			aff = LGRP_AFF_NONE;
434			t = curthread;
435			thread_lock(t);
436			if (t->t_lgrp_affinity)
437				aff = t->t_lgrp_affinity[lgrp];
438			thread_unlock(t);
439		}
440		mutex_exit(&p->p_lock);
441		break;
442
443	case P_PID:
444		/*
445		 * Process
446		 */
447		mutex_enter(&pidlock);
448
449		if (id == P_MYID)
450			p = curproc;
451		else {
452			p = prfind(id);
453			if (p == NULL) {
454				mutex_exit(&pidlock);
455				return (set_errno(ESRCH));
456			}
457		}
458
459		mutex_enter(&p->p_lock);
460		aff = lgrp_affinity_get_thread(p, P_ANY, lgrp);
461		mutex_exit(&p->p_lock);
462
463		mutex_exit(&pidlock);
464		break;
465
466	default:
467		aff = set_errno(EINVAL);
468		break;
469	}
470
471	return (aff);
472}
473
474
475/*
476 * Find lgroup for which this thread has most affinity in specified partition
477 * starting from home lgroup unless specified starting lgroup is preferred
478 */
479lpl_t *
480lgrp_affinity_best(kthread_t *t, struct cpupart *cpupart, lgrp_id_t start,
481    boolean_t prefer_start)
482{
483	lgrp_affinity_t	*affs;
484	lgrp_affinity_t	best_aff;
485	lpl_t		*best_lpl;
486	lgrp_id_t	finish;
487	lgrp_id_t	home;
488	lgrp_id_t	lgrpid;
489	lpl_t		*lpl;
490
491	ASSERT(t != NULL);
492	ASSERT((MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0) ||
493	    (MUTEX_HELD(&ttoproc(t)->p_lock) && THREAD_LOCK_HELD(t)));
494	ASSERT(cpupart != NULL);
495
496	if (t->t_lgrp_affinity == NULL)
497		return (NULL);
498
499	affs = t->t_lgrp_affinity;
500
501	/*
502	 * Thread bound to CPU
503	 */
504	if (t->t_bind_cpu != PBIND_NONE) {
505		cpu_t	*cp;
506
507		/*
508		 * Find which lpl has most affinity among leaf lpl directly
509		 * containing CPU and its ancestor lpls
510		 */
511		cp = cpu[t->t_bind_cpu];
512
513		best_lpl = lpl = cp->cpu_lpl;
514		best_aff = affs[best_lpl->lpl_lgrpid];
515		while (lpl->lpl_parent != NULL) {
516			lpl = lpl->lpl_parent;
517			lgrpid = lpl->lpl_lgrpid;
518			if (affs[lgrpid] > best_aff) {
519				best_lpl = lpl;
520				best_aff = affs[lgrpid];
521			}
522		}
523		return (best_lpl);
524	}
525
526	/*
527	 * Start searching from home lgroup unless given starting lgroup is
528	 * preferred or home lgroup isn't in given pset.  Use root lgroup as
529	 * starting point if both home and starting lgroups aren't in given
530	 * pset.
531	 */
532	ASSERT(start >= 0 && start <= lgrp_alloc_max);
533	home = t->t_lpl->lpl_lgrpid;
534	if (!prefer_start && LGRP_CPUS_IN_PART(home, cpupart))
535		lgrpid = home;
536	else if (start != LGRP_NONE && LGRP_CPUS_IN_PART(start, cpupart))
537		lgrpid = start;
538	else
539		lgrpid = LGRP_ROOTID;
540
541	best_lpl = &cpupart->cp_lgrploads[lgrpid];
542	best_aff = affs[lgrpid];
543	finish = lgrpid;
544	do {
545		/*
546		 * Skip any lgroups that don't have CPU resources
547		 * in this processor set.
548		 */
549		if (!LGRP_CPUS_IN_PART(lgrpid, cpupart)) {
550			if (++lgrpid > lgrp_alloc_max)
551				lgrpid = 0;	/* wrap the search */
552			continue;
553		}
554
555		/*
556		 * Find lgroup with most affinity
557		 */
558		lpl = &cpupart->cp_lgrploads[lgrpid];
559		if (affs[lgrpid] > best_aff) {
560			best_aff = affs[lgrpid];
561			best_lpl = lpl;
562		}
563
564		if (++lgrpid > lgrp_alloc_max)
565			lgrpid = 0;	/* wrap the search */
566
567	} while (lgrpid != finish);
568
569	/*
570	 * No lgroup (in this pset) with any affinity
571	 */
572	if (best_aff == LGRP_AFF_NONE)
573		return (NULL);
574
575	lgrpid = best_lpl->lpl_lgrpid;
576	ASSERT(LGRP_CPUS_IN_PART(lgrpid, cpupart) && best_lpl->lpl_ncpu > 0);
577
578	return (best_lpl);
579}
580
581
582/*
583 * Set thread's affinity for given lgroup
584 */
585int
586lgrp_affinity_set_thread(kthread_t *t, lgrp_id_t lgrp, lgrp_affinity_t aff,
587    lgrp_affinity_t **aff_buf)
588{
589	lgrp_affinity_t	*affs;
590	lgrp_id_t	best;
591	lpl_t		*best_lpl;
592	lgrp_id_t	home;
593	int		retval;
594
595	ASSERT(t != NULL);
596	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
597
598	retval = 0;
599
600	thread_lock(t);
601
602	/*
603	 * Check to see whether caller has permission to set affinity for
604	 * thread
605	 */
606	if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
607		thread_unlock(t);
608		return (set_errno(EPERM));
609	}
610
611	if (t->t_lgrp_affinity == NULL) {
612		if (aff == LGRP_AFF_NONE) {
613			thread_unlock(t);
614			return (0);
615		}
616		ASSERT(aff_buf != NULL && *aff_buf != NULL);
617		t->t_lgrp_affinity = *aff_buf;
618		*aff_buf = NULL;
619	}
620
621	affs = t->t_lgrp_affinity;
622	affs[lgrp] = aff;
623
624	/*
625	 * Find lgroup for which thread has most affinity,
626	 * starting with lgroup for which affinity being set
627	 */
628	best_lpl = lgrp_affinity_best(t, t->t_cpupart, lgrp, B_TRUE);
629
630	/*
631	 * Rehome if found lgroup with more affinity than home or lgroup for
632	 * which affinity is being set has same affinity as home
633	 */
634	home = t->t_lpl->lpl_lgrpid;
635	if (best_lpl != NULL && best_lpl != t->t_lpl) {
636		best = best_lpl->lpl_lgrpid;
637		if (affs[best] > affs[home] || (affs[best] == affs[home] &&
638		    best == lgrp))
639			lgrp_move_thread(t, best_lpl, 1);
640	}
641
642	thread_unlock(t);
643
644	return (retval);
645}
646
647
648/*
649 * Set process' affinity for specified lgroup
650 */
651int
652lgrp_affinity_set_proc(proc_t *p, lgrp_id_t lgrp, lgrp_affinity_t aff,
653    lgrp_affinity_t **aff_buf_array)
654{
655	lgrp_affinity_t	*buf;
656	int		err = 0;
657	int		i;
658	int		retval;
659	kthread_t	*t;
660
661	ASSERT(MUTEX_HELD(&pidlock) && MUTEX_HELD(&p->p_lock));
662	ASSERT(aff_buf_array != NULL);
663
664	i = 0;
665	t = p->p_tlist;
666	if (t != NULL) {
667		do {
668			/*
669			 * Set lgroup affinity for thread
670			 */
671			buf = aff_buf_array[i];
672			retval = lgrp_affinity_set_thread(t, lgrp, aff, &buf);
673
674			if (err == 0 && retval != 0)
675				err = retval;
676
677			/*
678			 * Advance pointer to next buffer
679			 */
680			if (buf == NULL) {
681				ASSERT(i < p->p_lwpcnt);
682				aff_buf_array[i] = NULL;
683				i++;
684			}
685
686		} while ((t = t->t_forw) != p->p_tlist);
687	}
688	return (err);
689}
690
691
692/*
693 * Set LWP's or process' affinity for specified lgroup
694 *
695 * When setting affinities, pidlock, process p_lock, and thread_lock()
696 * need to be held in that order to protect target thread's pset, process,
697 * process contents, and thread contents.  thread_lock() does splhigh(),
698 * so it ends up having similiar effect as kpreempt_disable(), so it will
699 * protect calls to lgrp_move_thread() and lgrp_choose() from pset changes.
700 */
701int
702lgrp_affinity_set(lgrp_affinity_args_t *ap)
703{
704	lgrp_affinity_t		aff;
705	lgrp_affinity_t		*aff_buf;
706	lgrp_affinity_args_t	args;
707	id_t			id;
708	idtype_t		idtype;
709	lgrp_id_t		lgrp;
710	int			nthreads;
711	proc_t			*p;
712	int			retval;
713
714	/*
715	 * Copyin arguments
716	 */
717	if (copyin(ap, &args, sizeof (lgrp_affinity_args_t)) != 0)
718		return (set_errno(EFAULT));
719
720	idtype = args.idtype;
721	id = args.id;
722	lgrp = args.lgrp;
723	aff = args.aff;
724
725	/*
726	 * Check for invalid lgroup
727	 */
728	if (lgrp < 0 || lgrp == LGRP_NONE)
729		return (set_errno(EINVAL));
730
731	/*
732	 * Check for existing lgroup
733	 */
734	if (lgrp > lgrp_alloc_max)
735		return (set_errno(ESRCH));
736
737	/*
738	 * Check for legal affinity
739	 */
740	if (aff != LGRP_AFF_NONE && aff != LGRP_AFF_WEAK &&
741	    aff != LGRP_AFF_STRONG)
742		return (set_errno(EINVAL));
743
744	/*
745	 * Must be process or LWP ID
746	 */
747	if (idtype != P_LWPID && idtype != P_PID)
748		return (set_errno(EINVAL));
749
750	retval = EINVAL;
751	/*
752	 * Set given LWP's or process' affinity for specified lgroup
753	 */
754	switch (idtype) {
755
756	case P_LWPID:
757		/*
758		 * Allocate memory for thread's lgroup affinities
759		 * ahead of time w/o holding locks
760		 */
761		aff_buf = kmem_zalloc(nlgrpsmax * sizeof (lgrp_affinity_t),
762		    KM_SLEEP);
763
764		p = curproc;
765
766		/*
767		 * Set affinity for thread
768		 */
769		mutex_enter(&p->p_lock);
770		if (id == P_MYID) {		/* current thread */
771			retval = lgrp_affinity_set_thread(curthread, lgrp, aff,
772			    &aff_buf);
773		} else if (p->p_tlist == NULL) {
774			retval = set_errno(ESRCH);
775		} else {			/* other thread */
776			int		found = 0;
777			kthread_t	*t;
778
779			t = p->p_tlist;
780			do {
781				if (t->t_tid == id) {
782					retval = lgrp_affinity_set_thread(t,
783					    lgrp, aff, &aff_buf);
784					found = 1;
785					break;
786				}
787			} while ((t = t->t_forw) != p->p_tlist);
788			if (!found)
789				retval = set_errno(ESRCH);
790		}
791		mutex_exit(&p->p_lock);
792
793		/*
794		 * Free memory for lgroup affinities,
795		 * since thread didn't need it
796		 */
797		if (aff_buf)
798			kmem_free(aff_buf,
799			    nlgrpsmax * sizeof (lgrp_affinity_t));
800
801		break;
802
803	case P_PID:
804
805		do {
806			lgrp_affinity_t	**aff_buf_array;
807			int		i;
808			size_t		size;
809
810			/*
811			 * Get process
812			 */
813			mutex_enter(&pidlock);
814
815			if (id == P_MYID)
816				p = curproc;
817			else
818				p = prfind(id);
819
820			if (p == NULL) {
821				mutex_exit(&pidlock);
822				return (set_errno(ESRCH));
823			}
824
825			/*
826			 * Get number of threads in process
827			 *
828			 * NOTE: Only care about user processes,
829			 *	 so p_lwpcnt should be number of threads.
830			 */
831			mutex_enter(&p->p_lock);
832			nthreads = p->p_lwpcnt;
833			mutex_exit(&p->p_lock);
834
835			mutex_exit(&pidlock);
836
837			if (nthreads < 1)
838				return (set_errno(ESRCH));
839
840			/*
841			 * Preallocate memory for lgroup affinities for
842			 * each thread in process now to avoid holding
843			 * any locks.  Allocate an array to hold a buffer
844			 * for each thread.
845			 */
846			aff_buf_array = kmem_zalloc(nthreads *
847			    sizeof (lgrp_affinity_t *), KM_SLEEP);
848
849			size = nlgrpsmax * sizeof (lgrp_affinity_t);
850			for (i = 0; i < nthreads; i++)
851				aff_buf_array[i] = kmem_zalloc(size, KM_SLEEP);
852
853			mutex_enter(&pidlock);
854
855			/*
856			 * Get process again since dropped locks to allocate
857			 * memory (except current process)
858			 */
859			if (id != P_MYID)
860				p = prfind(id);
861
862			/*
863			 * Process went away after we dropped locks and before
864			 * reacquiring them, so drop locks, free memory, and
865			 * return.
866			 */
867			if (p == NULL) {
868				mutex_exit(&pidlock);
869				for (i = 0; i < nthreads; i++)
870					kmem_free(aff_buf_array[i], size);
871				kmem_free(aff_buf_array,
872				    nthreads * sizeof (lgrp_affinity_t *));
873				return (set_errno(ESRCH));
874			}
875
876			mutex_enter(&p->p_lock);
877
878			/*
879			 * See whether number of threads is same
880			 * If not, drop locks, free memory, and try again
881			 */
882			if (nthreads != p->p_lwpcnt) {
883				mutex_exit(&p->p_lock);
884				mutex_exit(&pidlock);
885				for (i = 0; i < nthreads; i++)
886					kmem_free(aff_buf_array[i], size);
887				kmem_free(aff_buf_array,
888				    nthreads * sizeof (lgrp_affinity_t *));
889				continue;
890			}
891
892			/*
893			 * Set lgroup affinity for threads in process
894			 */
895			retval = lgrp_affinity_set_proc(p, lgrp, aff,
896			    aff_buf_array);
897
898			mutex_exit(&p->p_lock);
899			mutex_exit(&pidlock);
900
901			/*
902			 * Free any leftover memory, since some threads may
903			 * have already allocated memory and set lgroup
904			 * affinities before
905			 */
906			for (i = 0; i < nthreads; i++)
907				if (aff_buf_array[i] != NULL)
908					kmem_free(aff_buf_array[i], size);
909			kmem_free(aff_buf_array,
910			    nthreads * sizeof (lgrp_affinity_t *));
911
912			break;
913
914		} while (nthreads != p->p_lwpcnt);
915
916		break;
917
918	default:
919		retval = set_errno(EINVAL);
920		break;
921	}
922
923	return (retval);
924}
925
926
927/*
928 * Return the latest generation number for the lgroup hierarchy
929 * with the given view
930 */
931lgrp_gen_t
932lgrp_generation(lgrp_view_t view)
933{
934	cpupart_t	*cpupart;
935	uint_t		gen;
936
937	kpreempt_disable();
938
939	/*
940	 * Determine generation number for given view
941	 */
942	if (view == LGRP_VIEW_OS)
943		/*
944		 * Return generation number of lgroup hierarchy for OS view
945		 */
946		gen = lgrp_gen;
947	else {
948		/*
949		 * For caller's view, use generation numbers for lgroup
950		 * hierarchy and caller's pset
951		 * NOTE: Caller needs to check for change in pset ID
952		 */
953		cpupart = curthread->t_cpupart;
954		ASSERT(cpupart);
955		gen = lgrp_gen + cpupart->cp_gen;
956	}
957
958	kpreempt_enable();
959
960	return (gen);
961}
962
963
964lgrp_id_t
965lgrp_home_thread(kthread_t *t)
966{
967	lgrp_id_t	home;
968
969	ASSERT(t != NULL);
970	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
971
972	thread_lock(t);
973
974	/*
975	 * Check to see whether caller has permission to set affinity for
976	 * thread
977	 */
978	if (t->t_cid == 0 || !hasprocperm(t->t_cred, CRED())) {
979		thread_unlock(t);
980		return (set_errno(EPERM));
981	}
982
983	home = lgrp_home_id(t);
984
985	thread_unlock(t);
986	return (home);
987}
988
989
990/*
991 * Get home lgroup of given process or thread
992 */
993lgrp_id_t
994lgrp_home_get(idtype_t idtype, id_t id)
995{
996	proc_t		*p;
997	lgrp_id_t	retval;
998	kthread_t	*t;
999
1000	/*
1001	 * Get home lgroup of given LWP or process
1002	 */
1003	switch (idtype) {
1004
1005	case P_LWPID:
1006		p = curproc;
1007
1008		/*
1009		 * Set affinity for thread
1010		 */
1011		mutex_enter(&p->p_lock);
1012		if (id == P_MYID) {		/* current thread */
1013			retval = lgrp_home_thread(curthread);
1014		} else if (p->p_tlist == NULL) {
1015			retval = set_errno(ESRCH);
1016		} else {			/* other thread */
1017			int	found = 0;
1018
1019			t = p->p_tlist;
1020			do {
1021				if (t->t_tid == id) {
1022					retval = lgrp_home_thread(t);
1023					found = 1;
1024					break;
1025				}
1026			} while ((t = t->t_forw) != p->p_tlist);
1027			if (!found)
1028				retval = set_errno(ESRCH);
1029		}
1030		mutex_exit(&p->p_lock);
1031		break;
1032
1033	case P_PID:
1034		/*
1035		 * Get process
1036		 */
1037		mutex_enter(&pidlock);
1038
1039		if (id == P_MYID)
1040			p = curproc;
1041		else
1042			p = prfind(id);
1043
1044		if (p == NULL) {
1045			mutex_exit(&pidlock);
1046			return (set_errno(ESRCH));
1047		}
1048
1049		mutex_enter(&p->p_lock);
1050		t = p->p_tlist;
1051		if (t == NULL)
1052			retval = set_errno(ESRCH);
1053		else
1054			retval = lgrp_home_thread(t);
1055		mutex_exit(&p->p_lock);
1056
1057		mutex_exit(&pidlock);
1058
1059		break;
1060
1061	default:
1062		retval = set_errno(EINVAL);
1063		break;
1064	}
1065
1066	return (retval);
1067}
1068
1069
1070/*
1071 * Return latency between "from" and "to" lgroups
1072 *
1073 * This latency number can only be used for relative comparison
1074 * between lgroups on the running system, cannot be used across platforms,
1075 * and may not reflect the actual latency.  It is platform and implementation
1076 * specific, so platform gets to decide its value.  It would be nice if the
1077 * number was at least proportional to make comparisons more meaningful though.
1078 */
1079int
1080lgrp_latency(lgrp_id_t from, lgrp_id_t to)
1081{
1082	lgrp_t		*from_lgrp;
1083	int		i;
1084	int		latency;
1085	int		latency_max;
1086	lgrp_t		*to_lgrp;
1087
1088	ASSERT(MUTEX_HELD(&cpu_lock));
1089
1090	if (from < 0 || to < 0)
1091		return (set_errno(EINVAL));
1092
1093	if (from > lgrp_alloc_max || to > lgrp_alloc_max)
1094		return (set_errno(ESRCH));
1095
1096	from_lgrp = lgrp_table[from];
1097	to_lgrp = lgrp_table[to];
1098
1099	if (!LGRP_EXISTS(from_lgrp) || !LGRP_EXISTS(to_lgrp)) {
1100		return (set_errno(ESRCH));
1101	}
1102
1103	/*
1104	 * Get latency for same lgroup
1105	 */
1106	if (from == to) {
1107		latency = from_lgrp->lgrp_latency;
1108		return (latency);
1109	}
1110
1111	/*
1112	 * Get latency between leaf lgroups
1113	 */
1114	if (from_lgrp->lgrp_childcnt == 0 && to_lgrp->lgrp_childcnt == 0)
1115		return (lgrp_plat_latency(from_lgrp->lgrp_plathand,
1116		    to_lgrp->lgrp_plathand));
1117
1118	/*
1119	 * Determine max latency between resources in two lgroups
1120	 */
1121	latency_max = 0;
1122	for (i = 0; i <= lgrp_alloc_max; i++) {
1123		lgrp_t	*from_rsrc;
1124		int	j;
1125		lgrp_t	*to_rsrc;
1126
1127		from_rsrc = lgrp_table[i];
1128		if (!LGRP_EXISTS(from_rsrc) ||
1129		    !klgrpset_ismember(from_lgrp->lgrp_set[LGRP_RSRC_CPU], i))
1130			continue;
1131
1132		for (j = 0; j <= lgrp_alloc_max; j++) {
1133			to_rsrc = lgrp_table[j];
1134			if (!LGRP_EXISTS(to_rsrc) ||
1135			    klgrpset_ismember(to_lgrp->lgrp_set[LGRP_RSRC_MEM],
1136			    j) == 0)
1137				continue;
1138			latency = lgrp_plat_latency(from_rsrc->lgrp_plathand,
1139			    to_rsrc->lgrp_plathand);
1140			if (latency > latency_max)
1141				latency_max = latency;
1142		}
1143	}
1144	return (latency_max);
1145}
1146
1147
1148/*
1149 * Return lgroup interface version number
1150 * 0 - none
1151 * 1 - original
1152 * 2 - lgrp_latency_cookie() and lgrp_resources() added
1153 */
1154int
1155lgrp_version(int version)
1156{
1157	/*
1158	 * Return LGRP_VER_NONE when requested version isn't supported
1159	 */
1160	if (version < LGRP_VER_NONE || version > LGRP_VER_CURRENT)
1161		return (LGRP_VER_NONE);
1162
1163	/*
1164	 * Return current version when LGRP_VER_NONE passed in
1165	 */
1166	if (version == LGRP_VER_NONE)
1167		return (LGRP_VER_CURRENT);
1168
1169	/*
1170	 * Otherwise, return supported version.
1171	 */
1172	return (version);
1173}
1174
1175
1176/*
1177 * Snapshot of lgroup hieararchy
1178 *
1179 * One snapshot is kept and is based on the kernel's native data model, so
1180 * a 32-bit snapshot is kept for the 32-bit kernel and a 64-bit one for the
1181 * 64-bit kernel.  If a 32-bit user wants a snapshot from the 64-bit kernel,
1182 * the kernel generates a 32-bit snapshot from the data in its 64-bit snapshot.
1183 *
1184 * The format is defined by lgroup snapshot header and the layout of
1185 * the snapshot in memory is as follows:
1186 * 1) lgroup snapshot header
1187 *    - specifies format of snapshot
1188 *    - defined by lgrp_snapshot_header_t
1189 * 2) lgroup info array
1190 *    - contains information about each lgroup
1191 *    - one element for each lgroup
1192 *    - each element is defined by lgrp_info_t
1193 * 3) lgroup CPU ID array
1194 *    - contains list (array) of CPU IDs for each lgroup
1195 *    - lgrp_info_t points into array and specifies how many CPUs belong to
1196 *      given lgroup
1197 * 4) lgroup parents array
1198 *    - contains lgroup bitmask of parents for each lgroup
1199 *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1200 * 5) lgroup children array
1201 *    - contains lgroup bitmask of children for each lgroup
1202 *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1203 * 6) lgroup resources array
1204 *    - contains lgroup bitmask of resources for each lgroup
1205 *    - bitmask is an array of unsigned longs and its size depends on nlgrpsmax
1206 * 7) lgroup latency table
1207 *    - contains latency from each lgroup to each of other lgroups
1208 *
1209 * NOTE:  Must use nlgrpsmax for per lgroup data structures because lgroups
1210 *	  may be sparsely allocated.
1211 */
1212lgrp_snapshot_header_t	*lgrp_snap = NULL;	/* lgroup snapshot */
1213static kmutex_t		lgrp_snap_lock;		/* snapshot lock */
1214
1215
1216/*
1217 * Take a snapshot of lgroup hierarchy and return size of buffer
1218 * needed to hold snapshot
1219 */
1220static int
1221lgrp_snapshot(void)
1222{
1223	size_t		bitmask_size;
1224	size_t		bitmasks_size;
1225	size_t		bufsize;
1226	int		cpu_index;
1227	size_t		cpuids_size;
1228	int		i;
1229	int		j;
1230	size_t		info_size;
1231	size_t		lats_size;
1232	ulong_t		*lgrp_children;
1233	processorid_t	*lgrp_cpuids;
1234	lgrp_info_t	*lgrp_info;
1235	int		**lgrp_lats;
1236	ulong_t		*lgrp_parents;
1237	ulong_t		*lgrp_rsets;
1238	ulong_t		*lgrpset;
1239	int		snap_ncpus;
1240	int		snap_nlgrps;
1241	int		snap_nlgrpsmax;
1242	size_t		snap_hdr_size;
1243#ifdef	_SYSCALL32_IMPL
1244	model_t		model = DATAMODEL_NATIVE;
1245
1246	/*
1247	 * Have up-to-date snapshot, so check to see whether caller is 32-bit
1248	 * program and need to return size of 32-bit snapshot now.
1249	 */
1250	model = get_udatamodel();
1251	if (model == DATAMODEL_ILP32 && lgrp_snap &&
1252	    lgrp_snap->ss_gen == lgrp_gen) {
1253
1254		snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1255
1256		/*
1257		 * Calculate size of buffer needed for 32-bit snapshot,
1258		 * rounding up size of each object to allow for alignment
1259		 * of next object in buffer.
1260		 */
1261		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1262		    sizeof (caddr32_t));
1263		info_size =
1264		    P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1265		    sizeof (processorid_t));
1266		cpuids_size =
1267		    P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1268		    sizeof (ulong_t));
1269
1270		/*
1271		 * lgroup bitmasks needed for parents, children, and resources
1272		 * for each lgroup and pset lgroup set
1273		 */
1274		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1275		bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1276		    snap_nlgrpsmax) + 1) * bitmask_size;
1277
1278		/*
1279		 * Size of latency table and buffer
1280		 */
1281		lats_size = snap_nlgrpsmax * sizeof (caddr32_t) +
1282		    snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1283
1284		bufsize = snap_hdr_size + info_size + cpuids_size +
1285		    bitmasks_size + lats_size;
1286		return (bufsize);
1287	}
1288#endif	/* _SYSCALL32_IMPL */
1289
1290	/*
1291	 * Check whether snapshot is up-to-date
1292	 * Free it and take another one if not
1293	 */
1294	if (lgrp_snap) {
1295		if (lgrp_snap->ss_gen == lgrp_gen)
1296			return (lgrp_snap->ss_size);
1297
1298		kmem_free(lgrp_snap, lgrp_snap->ss_size);
1299		lgrp_snap = NULL;
1300	}
1301
1302	/*
1303	 * Allocate memory for snapshot
1304	 * w/o holding cpu_lock while waiting for memory
1305	 */
1306	while (lgrp_snap == NULL) {
1307		int	old_generation;
1308
1309		/*
1310		 * Take snapshot of lgroup generation number
1311		 * and configuration size dependent information
1312		 * NOTE: Only count number of online CPUs,
1313		 * since only online CPUs appear in lgroups.
1314		 */
1315		mutex_enter(&cpu_lock);
1316		old_generation = lgrp_gen;
1317		snap_ncpus = ncpus_online;
1318		snap_nlgrps = nlgrps;
1319		snap_nlgrpsmax = nlgrpsmax;
1320		mutex_exit(&cpu_lock);
1321
1322		/*
1323		 * Calculate size of buffer needed for snapshot,
1324		 * rounding up size of each object to allow for alignment
1325		 * of next object in buffer.
1326		 */
1327		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1328		    sizeof (void *));
1329		info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1330		    sizeof (processorid_t));
1331		cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1332		    sizeof (ulong_t));
1333		/*
1334		 * lgroup bitmasks needed for pset lgroup set and  parents,
1335		 * children, and resource sets for each lgroup
1336		 */
1337		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1338		bitmasks_size = (((2 + LGRP_RSRC_COUNT) *
1339		    snap_nlgrpsmax) + 1) * bitmask_size;
1340
1341		/*
1342		 * Size of latency table and buffer
1343		 */
1344		lats_size = snap_nlgrpsmax * sizeof (int *) +
1345		    snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int);
1346
1347		bufsize = snap_hdr_size + info_size + cpuids_size +
1348		    bitmasks_size + lats_size;
1349
1350		/*
1351		 * Allocate memory for buffer
1352		 */
1353		lgrp_snap = kmem_zalloc(bufsize, KM_NOSLEEP);
1354		if (lgrp_snap == NULL)
1355			return (set_errno(ENOMEM));
1356
1357		/*
1358		 * Check whether generation number has changed
1359		 */
1360		mutex_enter(&cpu_lock);
1361		if (lgrp_gen == old_generation)
1362			break;		/* hasn't change, so done. */
1363
1364		/*
1365		 * Generation number changed, so free memory and try again.
1366		 */
1367		mutex_exit(&cpu_lock);
1368		kmem_free(lgrp_snap, bufsize);
1369		lgrp_snap = NULL;
1370	}
1371
1372	/*
1373	 * Fill in lgroup snapshot header
1374	 * (including pointers to tables of lgroup info, CPU IDs, and parents
1375	 * and children)
1376	 */
1377	lgrp_snap->ss_version = LGRP_VER_CURRENT;
1378
1379	/*
1380	 * XXX For now, liblgrp only needs to know whether the hierarchy
1381	 * XXX only has one level or not
1382	 */
1383	if (snap_nlgrps == 1)
1384		lgrp_snap->ss_levels = 1;
1385	else
1386		lgrp_snap->ss_levels = 2;
1387
1388	lgrp_snap->ss_root = LGRP_ROOTID;
1389
1390	lgrp_snap->ss_nlgrps = lgrp_snap->ss_nlgrps_os = snap_nlgrps;
1391	lgrp_snap->ss_nlgrps_max = snap_nlgrpsmax;
1392	lgrp_snap->ss_ncpus = snap_ncpus;
1393	lgrp_snap->ss_gen = lgrp_gen;
1394	lgrp_snap->ss_view = LGRP_VIEW_OS;
1395	lgrp_snap->ss_pset = 0;		/* NOTE: caller should set if needed */
1396	lgrp_snap->ss_size = bufsize;
1397	lgrp_snap->ss_magic = (uintptr_t)lgrp_snap;
1398
1399	lgrp_snap->ss_info = lgrp_info =
1400	    (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1401
1402	lgrp_snap->ss_cpuids = lgrp_cpuids =
1403	    (processorid_t *)((uintptr_t)lgrp_info + info_size);
1404
1405	lgrp_snap->ss_lgrpset = lgrpset =
1406	    (ulong_t *)((uintptr_t)lgrp_cpuids + cpuids_size);
1407
1408	lgrp_snap->ss_parents = lgrp_parents =
1409	    (ulong_t *)((uintptr_t)lgrpset + bitmask_size);
1410
1411	lgrp_snap->ss_children = lgrp_children =
1412	    (ulong_t *)((uintptr_t)lgrp_parents + (snap_nlgrpsmax *
1413	    bitmask_size));
1414
1415	lgrp_snap->ss_rsets = lgrp_rsets =
1416	    (ulong_t *)((uintptr_t)lgrp_children + (snap_nlgrpsmax *
1417	    bitmask_size));
1418
1419	lgrp_snap->ss_latencies = lgrp_lats =
1420	    (int **)((uintptr_t)lgrp_rsets + (LGRP_RSRC_COUNT *
1421	    snap_nlgrpsmax * bitmask_size));
1422
1423	/*
1424	 * Fill in lgroup information
1425	 */
1426	cpu_index = 0;
1427	for (i = 0; i < snap_nlgrpsmax; i++) {
1428		struct cpu	*cp;
1429		int		cpu_count;
1430		struct cpu	*head;
1431		int		k;
1432		lgrp_t		*lgrp;
1433
1434		lgrp = lgrp_table[i];
1435		if (!LGRP_EXISTS(lgrp)) {
1436			bzero(&lgrp_info[i], sizeof (lgrp_info[i]));
1437			lgrp_info[i].info_lgrpid = LGRP_NONE;
1438			continue;
1439		}
1440
1441		lgrp_info[i].info_lgrpid = i;
1442		lgrp_info[i].info_latency = lgrp->lgrp_latency;
1443
1444		/*
1445		 * Fill in parents, children, and lgroup resources
1446		 */
1447		lgrp_info[i].info_parents =
1448		    (ulong_t *)((uintptr_t)lgrp_parents + (i * bitmask_size));
1449
1450		if (lgrp->lgrp_parent)
1451			BT_SET(lgrp_info[i].info_parents,
1452			    lgrp->lgrp_parent->lgrp_id);
1453
1454		lgrp_info[i].info_children =
1455		    (ulong_t *)((uintptr_t)lgrp_children + (i * bitmask_size));
1456
1457		for (j = 0; j < snap_nlgrpsmax; j++)
1458			if (klgrpset_ismember(lgrp->lgrp_children, j))
1459				BT_SET(lgrp_info[i].info_children, j);
1460
1461		lgrp_info[i].info_rset =
1462		    (ulong_t *)((uintptr_t)lgrp_rsets +
1463		    (i * LGRP_RSRC_COUNT * bitmask_size));
1464
1465		for (j = 0; j < LGRP_RSRC_COUNT; j++) {
1466			ulong_t	*rset;
1467
1468			rset = (ulong_t *)((uintptr_t)lgrp_info[i].info_rset +
1469			    (j * bitmask_size));
1470			for (k = 0; k < snap_nlgrpsmax; k++)
1471				if (klgrpset_ismember(lgrp->lgrp_set[j], k))
1472					BT_SET(rset, k);
1473		}
1474
1475		/*
1476		 * Fill in CPU IDs
1477		 */
1478		cpu_count = 0;
1479		lgrp_info[i].info_cpuids = NULL;
1480		cp = head = lgrp->lgrp_cpu;
1481		if (head != NULL) {
1482			lgrp_info[i].info_cpuids = &lgrp_cpuids[cpu_index];
1483			do {
1484				lgrp_cpuids[cpu_index] = cp->cpu_id;
1485				cpu_index++;
1486				cpu_count++;
1487				cp = cp->cpu_next_lgrp;
1488			} while (cp != head);
1489		}
1490		ASSERT(cpu_count == lgrp->lgrp_cpucnt);
1491		lgrp_info[i].info_ncpus = cpu_count;
1492
1493		/*
1494		 * Fill in memory sizes for lgroups that directly contain
1495		 * memory
1496		 */
1497		if (klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], i)) {
1498			lgrp_info[i].info_mem_free =
1499			    lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1500			lgrp_info[i].info_mem_install =
1501			    lgrp_mem_size(i, LGRP_MEM_SIZE_INSTALL);
1502		}
1503
1504		/*
1505		 * Fill in latency table and buffer
1506		 */
1507		lgrp_lats[i] = (int *)((uintptr_t)lgrp_lats + snap_nlgrpsmax *
1508		    sizeof (int *) + i * snap_nlgrpsmax * sizeof (int));
1509		for (j = 0; j < snap_nlgrpsmax; j++) {
1510			lgrp_t	*to;
1511
1512			to = lgrp_table[j];
1513			if (!LGRP_EXISTS(to))
1514				continue;
1515			lgrp_lats[i][j] = lgrp_latency(lgrp->lgrp_id,
1516			    to->lgrp_id);
1517		}
1518	}
1519	ASSERT(cpu_index == snap_ncpus);
1520
1521
1522	mutex_exit(&cpu_lock);
1523
1524#ifdef	_SYSCALL32_IMPL
1525	/*
1526	 * Check to see whether caller is 32-bit program and need to return
1527	 * size of 32-bit snapshot now that snapshot has been taken/updated.
1528	 * May not have been able to do this earlier if snapshot was out of
1529	 * date or didn't exist yet.
1530	 */
1531	if (model == DATAMODEL_ILP32) {
1532
1533		snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1534
1535		/*
1536		 * Calculate size of buffer needed for 32-bit snapshot,
1537		 * rounding up size of each object to allow for alignment
1538		 * of next object in buffer.
1539		 */
1540		snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1541		    sizeof (caddr32_t));
1542		info_size =
1543		    P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1544		    sizeof (processorid_t));
1545		cpuids_size =
1546		    P2ROUNDUP(lgrp_snap->ss_ncpus * sizeof (processorid_t),
1547		    sizeof (ulong_t));
1548
1549		bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1550		bitmasks_size = (((2 + LGRP_RSRC_COUNT) * snap_nlgrpsmax) +
1551		    1) * bitmask_size;
1552
1553
1554		/*
1555		 * Size of latency table and buffer
1556		 */
1557		lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1558		    (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1559
1560		bufsize = snap_hdr_size + info_size + cpuids_size +
1561		    bitmasks_size + lats_size;
1562		return (bufsize);
1563	}
1564#endif	/* _SYSCALL32_IMPL */
1565
1566	return (lgrp_snap->ss_size);
1567}
1568
1569
1570/*
1571 * Copy snapshot into given user buffer, fix up any pointers in buffer to point
1572 * into user instead of kernel address space, and return size of buffer
1573 * needed to hold snapshot
1574 */
1575static int
1576lgrp_snapshot_copy(char *buf, size_t bufsize)
1577{
1578	size_t			bitmask_size;
1579	int			cpu_index;
1580	size_t			cpuids_size;
1581	int			i;
1582	size_t			info_size;
1583	lgrp_info_t		*lgrp_info;
1584	int			retval;
1585	size_t			snap_hdr_size;
1586	int			snap_ncpus;
1587	int			snap_nlgrpsmax;
1588	lgrp_snapshot_header_t	*user_snap;
1589	lgrp_info_t		*user_info;
1590	lgrp_info_t		*user_info_buffer;
1591	processorid_t		*user_cpuids;
1592	ulong_t			*user_lgrpset;
1593	ulong_t			*user_parents;
1594	ulong_t			*user_children;
1595	int			**user_lats;
1596	int			**user_lats_buffer;
1597	ulong_t			*user_rsets;
1598
1599	if (lgrp_snap == NULL)
1600		return (0);
1601
1602	if (buf == NULL || bufsize <= 0)
1603		return (lgrp_snap->ss_size);
1604
1605	/*
1606	 * User needs to try getting size of buffer again
1607	 * because given buffer size is too small.
1608	 * The lgroup hierarchy may have changed after they asked for the size
1609	 * but before the snapshot was taken.
1610	 */
1611	if (bufsize < lgrp_snap->ss_size)
1612		return (set_errno(EAGAIN));
1613
1614	snap_ncpus = lgrp_snap->ss_ncpus;
1615	snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1616
1617	/*
1618	 * Fill in lgrpset now because caller may have change psets
1619	 */
1620	kpreempt_disable();
1621	for (i = 0; i < snap_nlgrpsmax; i++) {
1622		if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1623		    i)) {
1624			BT_SET(lgrp_snap->ss_lgrpset, i);
1625		}
1626	}
1627	kpreempt_enable();
1628
1629	/*
1630	 * Copy lgroup snapshot (snapshot header, lgroup info, and CPU IDs)
1631	 * into user buffer all at once
1632	 */
1633	if (copyout(lgrp_snap, buf, lgrp_snap->ss_size) != 0)
1634		return (set_errno(EFAULT));
1635
1636	/*
1637	 * Round up sizes of lgroup snapshot header and info for alignment
1638	 */
1639	snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header_t),
1640	    sizeof (void *));
1641	info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info_t),
1642	    sizeof (processorid_t));
1643	cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1644	    sizeof (ulong_t));
1645
1646	bitmask_size = BT_SIZEOFMAP(snap_nlgrpsmax);
1647
1648	/*
1649	 * Calculate pointers into user buffer for lgroup snapshot header,
1650	 * info, and CPU IDs
1651	 */
1652	user_snap = (lgrp_snapshot_header_t *)buf;
1653	user_info = (lgrp_info_t *)((uintptr_t)user_snap + snap_hdr_size);
1654	user_cpuids = (processorid_t *)((uintptr_t)user_info + info_size);
1655	user_lgrpset = (ulong_t *)((uintptr_t)user_cpuids + cpuids_size);
1656	user_parents = (ulong_t *)((uintptr_t)user_lgrpset + bitmask_size);
1657	user_children = (ulong_t *)((uintptr_t)user_parents +
1658	    (snap_nlgrpsmax * bitmask_size));
1659	user_rsets = (ulong_t *)((uintptr_t)user_children +
1660	    (snap_nlgrpsmax * bitmask_size));
1661	user_lats = (int **)((uintptr_t)user_rsets +
1662	    (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size));
1663
1664	/*
1665	 * Copyout magic number (ie. pointer to beginning of buffer)
1666	 */
1667	if (copyout(&buf, &user_snap->ss_magic, sizeof (buf)) != 0)
1668		return (set_errno(EFAULT));
1669
1670	/*
1671	 * Fix up pointers in user buffer to point into user buffer
1672	 * not kernel snapshot
1673	 */
1674	if (copyout(&user_info, &user_snap->ss_info, sizeof (user_info)) != 0)
1675		return (set_errno(EFAULT));
1676
1677	if (copyout(&user_cpuids, &user_snap->ss_cpuids,
1678	    sizeof (user_cpuids)) != 0)
1679		return (set_errno(EFAULT));
1680
1681	if (copyout(&user_lgrpset, &user_snap->ss_lgrpset,
1682	    sizeof (user_lgrpset)) != 0)
1683		return (set_errno(EFAULT));
1684
1685	if (copyout(&user_parents, &user_snap->ss_parents,
1686	    sizeof (user_parents)) != 0)
1687		return (set_errno(EFAULT));
1688
1689	if (copyout(&user_children, &user_snap->ss_children,
1690	    sizeof (user_children)) != 0)
1691		return (set_errno(EFAULT));
1692
1693	if (copyout(&user_rsets, &user_snap->ss_rsets,
1694	    sizeof (user_rsets)) != 0)
1695		return (set_errno(EFAULT));
1696
1697	if (copyout(&user_lats, &user_snap->ss_latencies,
1698	    sizeof (user_lats)) != 0)
1699		return (set_errno(EFAULT));
1700
1701	/*
1702	 * Make copies of lgroup info and latency table, fix up pointers,
1703	 * and then copy them into user buffer
1704	 */
1705	user_info_buffer = kmem_zalloc(info_size, KM_NOSLEEP);
1706	if (user_info_buffer == NULL)
1707		return (set_errno(ENOMEM));
1708
1709	user_lats_buffer = kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1710	    KM_NOSLEEP);
1711	if (user_lats_buffer == NULL) {
1712		kmem_free(user_info_buffer, info_size);
1713		return (set_errno(ENOMEM));
1714	}
1715
1716	lgrp_info = (lgrp_info_t *)((uintptr_t)lgrp_snap + snap_hdr_size);
1717	bcopy(lgrp_info, user_info_buffer, info_size);
1718
1719	cpu_index = 0;
1720	for (i = 0; i < snap_nlgrpsmax; i++) {
1721		ulong_t	*snap_rset;
1722
1723		/*
1724		 * Skip non-existent lgroups
1725		 */
1726		if (user_info_buffer[i].info_lgrpid == LGRP_NONE)
1727			continue;
1728
1729		/*
1730		 * Update free memory size since it changes frequently
1731		 * Only do so for lgroups directly containing memory
1732		 *
1733		 * NOTE: This must be done before changing the pointers to
1734		 *	 point into user space since we need to dereference
1735		 *	 lgroup resource set
1736		 */
1737		snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
1738		    BT_BITOUL(snap_nlgrpsmax)];
1739		if (BT_TEST(snap_rset, i))
1740			user_info_buffer[i].info_mem_free =
1741			    lgrp_mem_size(i, LGRP_MEM_SIZE_FREE);
1742
1743		/*
1744		 * Fix up pointers to parents, children, resources, and
1745		 * latencies
1746		 */
1747		user_info_buffer[i].info_parents =
1748		    (ulong_t *)((uintptr_t)user_parents + (i * bitmask_size));
1749		user_info_buffer[i].info_children =
1750		    (ulong_t *)((uintptr_t)user_children + (i * bitmask_size));
1751		user_info_buffer[i].info_rset =
1752		    (ulong_t *)((uintptr_t)user_rsets +
1753		    (i * LGRP_RSRC_COUNT * bitmask_size));
1754		user_lats_buffer[i] = (int *)((uintptr_t)user_lats +
1755		    (snap_nlgrpsmax * sizeof (int *)) + (i * snap_nlgrpsmax *
1756		    sizeof (int)));
1757
1758		/*
1759		 * Fix up pointer to CPU IDs
1760		 */
1761		if (user_info_buffer[i].info_ncpus == 0) {
1762			user_info_buffer[i].info_cpuids = NULL;
1763			continue;
1764		}
1765		user_info_buffer[i].info_cpuids = &user_cpuids[cpu_index];
1766		cpu_index += user_info_buffer[i].info_ncpus;
1767	}
1768	ASSERT(cpu_index == snap_ncpus);
1769
1770	/*
1771	 * Copy lgroup info and latency table with pointers fixed up to point
1772	 * into user buffer out to user buffer now
1773	 */
1774	retval = lgrp_snap->ss_size;
1775	if (copyout(user_info_buffer, user_info, info_size) != 0)
1776		retval = set_errno(EFAULT);
1777	kmem_free(user_info_buffer, info_size);
1778
1779	if (copyout(user_lats_buffer, user_lats, snap_nlgrpsmax *
1780	    sizeof (int *)) != 0)
1781		retval = set_errno(EFAULT);
1782	kmem_free(user_lats_buffer, snap_nlgrpsmax * sizeof (int *));
1783
1784	return (retval);
1785}
1786
1787
1788#ifdef	_SYSCALL32_IMPL
1789/*
1790 * Make 32-bit copy of snapshot, fix up any pointers in buffer to point
1791 * into user instead of kernel address space, copy 32-bit snapshot into
1792 * given user buffer, and return size of buffer needed to hold snapshot
1793 */
1794static int
1795lgrp_snapshot_copy32(caddr32_t buf, size32_t bufsize)
1796{
1797	size32_t			bitmask_size;
1798	size32_t			bitmasks_size;
1799	size32_t			children_size;
1800	int				cpu_index;
1801	size32_t			cpuids_size;
1802	int				i;
1803	int				j;
1804	size32_t			info_size;
1805	size32_t			lats_size;
1806	lgrp_info_t			*lgrp_info;
1807	lgrp_snapshot_header32_t	*lgrp_snap32;
1808	lgrp_info32_t			*lgrp_info32;
1809	processorid_t			*lgrp_cpuids32;
1810	caddr32_t			*lgrp_lats32;
1811	int				**lgrp_lats32_kernel;
1812	uint_t				*lgrp_set32;
1813	uint_t				*lgrp_parents32;
1814	uint_t				*lgrp_children32;
1815	uint_t				*lgrp_rsets32;
1816	size32_t			parents_size;
1817	size32_t			rsets_size;
1818	size32_t			set_size;
1819	size32_t			snap_hdr_size;
1820	int				snap_ncpus;
1821	int				snap_nlgrpsmax;
1822	size32_t			snap_size;
1823
1824	if (lgrp_snap == NULL)
1825		return (0);
1826
1827	snap_ncpus = lgrp_snap->ss_ncpus;
1828	snap_nlgrpsmax = lgrp_snap->ss_nlgrps_max;
1829
1830	/*
1831	 * Calculate size of buffer needed for 32-bit snapshot,
1832	 * rounding up size of each object to allow for alignment
1833	 * of next object in buffer.
1834	 */
1835	snap_hdr_size = P2ROUNDUP(sizeof (lgrp_snapshot_header32_t),
1836	    sizeof (caddr32_t));
1837	info_size = P2ROUNDUP(snap_nlgrpsmax * sizeof (lgrp_info32_t),
1838	    sizeof (processorid_t));
1839	cpuids_size = P2ROUNDUP(snap_ncpus * sizeof (processorid_t),
1840	    sizeof (ulong_t));
1841
1842	bitmask_size = BT_SIZEOFMAP32(snap_nlgrpsmax);
1843
1844	set_size = bitmask_size;
1845	parents_size = snap_nlgrpsmax * bitmask_size;
1846	children_size = snap_nlgrpsmax * bitmask_size;
1847	rsets_size = P2ROUNDUP(LGRP_RSRC_COUNT * snap_nlgrpsmax *
1848	    (int)bitmask_size, sizeof (caddr32_t));
1849
1850	bitmasks_size = set_size + parents_size + children_size + rsets_size;
1851
1852	/*
1853	 * Size of latency table and buffer
1854	 */
1855	lats_size = (snap_nlgrpsmax * sizeof (caddr32_t)) +
1856	    (snap_nlgrpsmax * snap_nlgrpsmax * sizeof (int));
1857
1858	snap_size = snap_hdr_size + info_size + cpuids_size + bitmasks_size +
1859	    lats_size;
1860
1861	if (buf == 0 || bufsize <= 0) {
1862		return (snap_size);
1863	}
1864
1865	/*
1866	 * User needs to try getting size of buffer again
1867	 * because given buffer size is too small.
1868	 * The lgroup hierarchy may have changed after they asked for the size
1869	 * but before the snapshot was taken.
1870	 */
1871	if (bufsize < snap_size)
1872		return (set_errno(EAGAIN));
1873
1874	/*
1875	 * Make 32-bit copy of snapshot, fix up pointers to point into user
1876	 * buffer not kernel, and then copy whole thing into user buffer
1877	 */
1878	lgrp_snap32 = kmem_zalloc(snap_size, KM_NOSLEEP);
1879	if (lgrp_snap32 == NULL)
1880		return (set_errno(ENOMEM));
1881
1882	/*
1883	 * Calculate pointers into 32-bit copy of snapshot
1884	 * for lgroup info, CPU IDs, pset lgroup bitmask, parents, children,
1885	 * resources, and latency table and buffer
1886	 */
1887	lgrp_info32 = (lgrp_info32_t *)((uintptr_t)lgrp_snap32 +
1888	    snap_hdr_size);
1889	lgrp_cpuids32 = (processorid_t *)((uintptr_t)lgrp_info32 + info_size);
1890	lgrp_set32 = (uint_t *)((uintptr_t)lgrp_cpuids32 + cpuids_size);
1891	lgrp_parents32 = (uint_t *)((uintptr_t)lgrp_set32 + set_size);
1892	lgrp_children32 = (uint_t *)((uintptr_t)lgrp_parents32 + parents_size);
1893	lgrp_rsets32 = (uint_t *)((uintptr_t)lgrp_children32 + children_size);
1894	lgrp_lats32 = (caddr32_t *)((uintptr_t)lgrp_rsets32 + rsets_size);
1895
1896	/*
1897	 * Make temporary lgroup latency table of pointers for kernel to use
1898	 * to fill in rows of table with latencies from each lgroup
1899	 */
1900	lgrp_lats32_kernel =  kmem_zalloc(snap_nlgrpsmax * sizeof (int *),
1901	    KM_NOSLEEP);
1902	if (lgrp_lats32_kernel == NULL) {
1903		kmem_free(lgrp_snap32, snap_size);
1904		return (set_errno(ENOMEM));
1905	}
1906
1907	/*
1908	 * Fill in 32-bit lgroup snapshot header
1909	 * (with pointers into user's buffer for lgroup info, CPU IDs,
1910	 * bit masks, and latencies)
1911	 */
1912	lgrp_snap32->ss_version = lgrp_snap->ss_version;
1913	lgrp_snap32->ss_levels = lgrp_snap->ss_levels;
1914	lgrp_snap32->ss_nlgrps = lgrp_snap32->ss_nlgrps_os =
1915	    lgrp_snap->ss_nlgrps;
1916	lgrp_snap32->ss_nlgrps_max = snap_nlgrpsmax;
1917	lgrp_snap32->ss_root = lgrp_snap->ss_root;
1918	lgrp_snap32->ss_ncpus = lgrp_snap->ss_ncpus;
1919	lgrp_snap32->ss_gen = lgrp_snap->ss_gen;
1920	lgrp_snap32->ss_view = LGRP_VIEW_OS;
1921	lgrp_snap32->ss_size = snap_size;
1922	lgrp_snap32->ss_magic = buf;
1923	lgrp_snap32->ss_info = buf + snap_hdr_size;
1924	lgrp_snap32->ss_cpuids = lgrp_snap32->ss_info + info_size;
1925	lgrp_snap32->ss_lgrpset = lgrp_snap32->ss_cpuids + cpuids_size;
1926	lgrp_snap32->ss_parents = lgrp_snap32->ss_lgrpset + bitmask_size;
1927	lgrp_snap32->ss_children = lgrp_snap32->ss_parents +
1928	    (snap_nlgrpsmax * bitmask_size);
1929	lgrp_snap32->ss_rsets = lgrp_snap32->ss_children +
1930	    (snap_nlgrpsmax * bitmask_size);
1931	lgrp_snap32->ss_latencies = lgrp_snap32->ss_rsets +
1932	    (LGRP_RSRC_COUNT * snap_nlgrpsmax * bitmask_size);
1933
1934	/*
1935	 * Fill in lgrpset now because caller may have change psets
1936	 */
1937	kpreempt_disable();
1938	for (i = 0; i < snap_nlgrpsmax; i++) {
1939		if (klgrpset_ismember(curthread->t_cpupart->cp_lgrpset,
1940		    i)) {
1941			BT_SET32(lgrp_set32, i);
1942		}
1943	}
1944	kpreempt_enable();
1945
1946	/*
1947	 * Fill in 32-bit copy of lgroup info and fix up pointers
1948	 * to point into user's buffer instead of kernel's
1949	 */
1950	cpu_index = 0;
1951	lgrp_info = lgrp_snap->ss_info;
1952	for (i = 0; i < snap_nlgrpsmax; i++) {
1953		uint_t	*children;
1954		uint_t	*lgrp_rset;
1955		uint_t	*parents;
1956		ulong_t	*snap_rset;
1957
1958		/*
1959		 * Skip non-existent lgroups
1960		 */
1961		if (lgrp_info[i].info_lgrpid == LGRP_NONE) {
1962			bzero(&lgrp_info32[i], sizeof (lgrp_info32[i]));
1963			lgrp_info32[i].info_lgrpid = LGRP_NONE;
1964			continue;
1965		}
1966
1967		/*
1968		 * Fill in parents, children, lgroup resource set, and
1969		 * latencies from snapshot
1970		 */
1971		parents = (uint_t *)((uintptr_t)lgrp_parents32 +
1972		    i * bitmask_size);
1973		children = (uint_t *)((uintptr_t)lgrp_children32 +
1974		    i * bitmask_size);
1975		snap_rset = (ulong_t *)((uintptr_t)lgrp_snap->ss_rsets +
1976		    (i * LGRP_RSRC_COUNT * BT_SIZEOFMAP(snap_nlgrpsmax)));
1977		lgrp_rset = (uint_t *)((uintptr_t)lgrp_rsets32 +
1978		    (i * LGRP_RSRC_COUNT * bitmask_size));
1979		lgrp_lats32_kernel[i] = (int *)((uintptr_t)lgrp_lats32 +
1980		    snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
1981		    sizeof (int));
1982		for (j = 0; j < snap_nlgrpsmax; j++) {
1983			int	k;
1984			uint_t	*rset;
1985
1986			if (BT_TEST(&lgrp_snap->ss_parents[i], j))
1987				BT_SET32(parents, j);
1988
1989			if (BT_TEST(&lgrp_snap->ss_children[i], j))
1990				BT_SET32(children, j);
1991
1992			for (k = 0; k < LGRP_RSRC_COUNT; k++) {
1993				rset = (uint_t *)((uintptr_t)lgrp_rset +
1994				    k * bitmask_size);
1995				if (BT_TEST(&snap_rset[k], j))
1996					BT_SET32(rset, j);
1997			}
1998
1999			lgrp_lats32_kernel[i][j] =
2000			    lgrp_snap->ss_latencies[i][j];
2001		}
2002
2003		/*
2004		 * Fix up pointer to latency buffer
2005		 */
2006		lgrp_lats32[i] = lgrp_snap32->ss_latencies +
2007		    snap_nlgrpsmax * sizeof (caddr32_t) + i * snap_nlgrpsmax *
2008		    sizeof (int);
2009
2010		/*
2011		 * Fix up pointers for parents, children, and resources
2012		 */
2013		lgrp_info32[i].info_parents = lgrp_snap32->ss_parents +
2014		    (i * bitmask_size);
2015		lgrp_info32[i].info_children = lgrp_snap32->ss_children +
2016		    (i * bitmask_size);
2017		lgrp_info32[i].info_rset = lgrp_snap32->ss_rsets +
2018		    (i * LGRP_RSRC_COUNT * bitmask_size);
2019
2020		/*
2021		 * Fill in memory and CPU info
2022		 * Only fill in memory for lgroups directly containing memory
2023		 */
2024		snap_rset = &lgrp_info[i].info_rset[LGRP_RSRC_MEM *
2025		    BT_BITOUL(snap_nlgrpsmax)];
2026		if (BT_TEST(snap_rset, i)) {
2027			lgrp_info32[i].info_mem_free = lgrp_mem_size(i,
2028			    LGRP_MEM_SIZE_FREE);
2029			lgrp_info32[i].info_mem_install =
2030			    lgrp_info[i].info_mem_install;
2031		}
2032
2033		lgrp_info32[i].info_ncpus = lgrp_info[i].info_ncpus;
2034
2035		lgrp_info32[i].info_lgrpid = lgrp_info[i].info_lgrpid;
2036		lgrp_info32[i].info_latency = lgrp_info[i].info_latency;
2037
2038		if (lgrp_info32[i].info_ncpus == 0) {
2039			lgrp_info32[i].info_cpuids = 0;
2040			continue;
2041		}
2042
2043		/*
2044		 * Fix up pointer for CPU IDs
2045		 */
2046		lgrp_info32[i].info_cpuids = lgrp_snap32->ss_cpuids +
2047		    (cpu_index * sizeof (processorid_t));
2048		cpu_index += lgrp_info32[i].info_ncpus;
2049	}
2050	ASSERT(cpu_index == snap_ncpus);
2051
2052	/*
2053	 * Copy lgroup CPU IDs into 32-bit snapshot
2054	 * before copying it out into user's buffer
2055	 */
2056	bcopy(lgrp_snap->ss_cpuids, lgrp_cpuids32, cpuids_size);
2057
2058	/*
2059	 * Copy 32-bit lgroup snapshot into user's buffer all at once
2060	 */
2061	if (copyout(lgrp_snap32, (void *)(uintptr_t)buf, snap_size) != 0) {
2062		kmem_free(lgrp_snap32, snap_size);
2063		kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2064		return (set_errno(EFAULT));
2065	}
2066
2067	kmem_free(lgrp_snap32, snap_size);
2068	kmem_free(lgrp_lats32_kernel, snap_nlgrpsmax * sizeof (int *));
2069
2070	return (snap_size);
2071}
2072#endif	/* _SYSCALL32_IMPL */
2073
2074
2075int
2076lgrpsys(int subcode, long ia, void *ap)
2077{
2078	size_t	bufsize;
2079	int	latency;
2080
2081	switch (subcode) {
2082
2083	case LGRP_SYS_AFFINITY_GET:
2084		return (lgrp_affinity_get((lgrp_affinity_args_t *)ap));
2085
2086	case LGRP_SYS_AFFINITY_SET:
2087		return (lgrp_affinity_set((lgrp_affinity_args_t *)ap));
2088
2089	case LGRP_SYS_GENERATION:
2090		return (lgrp_generation(ia));
2091
2092	case LGRP_SYS_HOME:
2093		return (lgrp_home_get((idtype_t)ia, (id_t)(uintptr_t)ap));
2094
2095	case LGRP_SYS_LATENCY:
2096		mutex_enter(&cpu_lock);
2097		latency = lgrp_latency(ia, (lgrp_id_t)(uintptr_t)ap);
2098		mutex_exit(&cpu_lock);
2099		return (latency);
2100
2101	case LGRP_SYS_MEMINFO:
2102		return (meminfo(ia, (struct meminfo *)ap));
2103
2104	case LGRP_SYS_VERSION:
2105		return (lgrp_version(ia));
2106
2107	case LGRP_SYS_SNAPSHOT:
2108		mutex_enter(&lgrp_snap_lock);
2109		bufsize = lgrp_snapshot();
2110		if (ap && ia > 0) {
2111			if (get_udatamodel() == DATAMODEL_NATIVE)
2112				bufsize = lgrp_snapshot_copy(ap, ia);
2113#ifdef	_SYSCALL32_IMPL
2114			else
2115				bufsize = lgrp_snapshot_copy32(
2116				    (caddr32_t)(uintptr_t)ap, ia);
2117#endif	/* _SYSCALL32_IMPL */
2118		}
2119		mutex_exit(&lgrp_snap_lock);
2120		return (bufsize);
2121
2122	default:
2123		break;
2124
2125	}
2126
2127	return (set_errno(EINVAL));
2128}
2129