1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25/*
26 * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
27 */
28
29/*
30 * sunpm.c builds sunpm.o	"power management framework"
31 *	kernel-resident power management code.  Implements power management
32 *	policy
33 *	Assumes: all backwards compat. device components wake up on &
34 *		 the pm_info pointer in dev_info is initially NULL
35 *
36 * PM - (device) Power Management
37 *
38 * Each device may have 0 or more components.  If a device has no components,
39 * then it can't be power managed.  Each component has 2 or more
40 * power states.
41 *
42 * "Backwards Compatible" (bc) devices:
43 * There are two different types of devices from the point of view of this
44 * code.  The original type, left over from the original PM implementation on
45 * the voyager platform are known in this code as "backwards compatible"
46 * devices (PM_ISBC(dip) returns true).
47 * They are recognized by the pm code by the lack of a pm-components property
48 * and a call made by the driver to pm_create_components(9F).
49 * For these devices, component 0 is special, and represents the power state
50 * of the device.  If component 0 is to be set to power level 0 (off), then
51 * the framework must first call into the driver's detach(9E) routine with
52 * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
53 * After setting component 0 from 0 to a non-zero power level, a call must be
54 * made into the driver's attach(9E) routine with DDI_PM_RESUME.
55 *
56 * Currently, the only way to get a bc device power managed is via a set of
57 * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
58 *
59 * For non-bc devices, the driver describes the components by exporting a
60 * pm-components(9P) property that tells how many components there are,
61 * tells what each component's power state values are, and provides human
62 * readable strings (currently unused) for each component name and power state.
63 * Devices which export pm-components(9P) are automatically power managed
64 * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
65 * after parsing power.conf(4)). The exception to this rule is that power
66 * manageable CPU devices may be automatically managed independently of autopm
67 * by either enabling or disabling (via PM_START_CPUPM and PM_STOP_CPUPM
68 * ioctls) cpupm. If the CPU devices are not managed independently, then they
69 * are managed by autopm. In either case, for automatically power managed
70 * devices, all components are considered independent of each other, and it is
71 * up to the driver to decide when a transition requires saving or restoring
72 * hardware state.
73 *
74 * Each device component also has a threshold time associated with each power
75 * transition (see power.conf(4)), and a busy/idle state maintained by the
76 * driver calling pm_idle_component(9F) and pm_busy_component(9F).
77 * Components are created idle.
78 *
79 * The PM framework provides several functions:
80 * -implement PM policy as described in power.conf(4)
81 *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
82 *  Policies consist of:
83 *    -set threshold values (defaults if none provided by pmconfig)
84 *    -set dependencies among devices
85 *    -enable/disable autopm
86 *    -enable/disable cpupm
87 *    -turn down idle components based on thresholds (if autopm or cpupm is
88 *     enabled) (aka scanning)
89 *    -maintain power states based on dependencies among devices
90 *    -upon request, or when the frame buffer powers off, attempt to turn off
91 *     all components that are idle or become idle over the next (10 sec)
92 *     period in an attempt to get down to an EnergyStar compliant state
93 *    -prevent powering off of a device which exported the
94 *     pm-no-involuntary-power-cycles property without active involvement of
95 *     the device's driver (so no removing power when the device driver is
96 *     not attached)
97 * -provide a mechanism for a device driver to request that a device's component
98 *  be brought back to the power level necessary for the use of the device
99 * -allow a process to directly control the power levels of device components
100 *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
101 * -ensure that the console frame buffer is powered up before being referenced
102 *  via prom_printf() or other prom calls that might generate console output
103 * -maintain implicit dependencies (e.g. parent must be powered up if child is)
104 * -provide "backwards compatible" behavior for devices without pm-components
105 *  property
106 *
107 * Scanning:
108 * Whenever autopm or cpupm  is enabled, the framework attempts to bring each
109 * component of each managed device to its lowest power based on the threshold
110 * of idleness associated with each transition and the busy/idle state of the
111 * component.
112 *
113 * The actual work of this is done by pm_scan_dev(), which cycles through each
114 * component of a device, checking its idleness against its current threshold,
115 * and calling pm_set_power() as appropriate to change the power level.
116 * This function also indicates when it would next be profitable to scan the
117 * device again, and a new scan is scheduled after that time.
118 *
119 * Dependencies:
120 * It is possible to establish a dependency between the power states of two
121 * otherwise unrelated devices.  This is currently done to ensure that the
122 * cdrom is always up whenever the console framebuffer is up, so that the user
123 * can insert a cdrom and see a popup as a result.
124 *
125 * The dependency terminology used in power.conf(4) is not easy to understand,
126 * so we've adopted a different terminology in the implementation.  We write
127 * of a "keeps up" and a "kept up" device.  A relationship can be established
128 * where one device keeps up another.  That means that if the keepsup device
129 * has any component that is at a non-zero power level, all components of the
130 * "kept up" device must be brought to full power.  This relationship is
131 * asynchronous.  When the keeping device is powered up, a request is queued
132 * to a worker thread to bring up the kept device.  The caller does not wait.
133 * Scan will not turn down a kept up device.
134 *
135 * Direct PM:
136 * A device may be directly power managed by a process.  If a device is
137 * directly pm'd, then it will not be scanned, and dependencies will not be
138 * enforced.  * If a directly pm'd device's driver requests a power change (via
139 * pm_raise_power(9F)), then the request is blocked and notification is sent
140 * to the controlling process, which must issue the requested power change for
141 * the driver to proceed.
142 *
143 */
144
145#include <sys/types.h>
146#include <sys/errno.h>
147#include <sys/callb.h>		/* callback registration during CPR */
148#include <sys/conf.h>		/* driver flags and functions */
149#include <sys/open.h>		/* OTYP_CHR definition */
150#include <sys/stat.h>		/* S_IFCHR definition */
151#include <sys/pathname.h>	/* name -> dev_info xlation */
152#include <sys/ddi_impldefs.h>	/* dev_info node fields */
153#include <sys/kmem.h>		/* memory alloc stuff */
154#include <sys/debug.h>
155#include <sys/archsystm.h>
156#include <sys/pm.h>
157#include <sys/ddi.h>
158#include <sys/sunddi.h>
159#include <sys/sunndi.h>
160#include <sys/sunpm.h>
161#include <sys/epm.h>
162#include <sys/vfs.h>
163#include <sys/mode.h>
164#include <sys/mkdev.h>
165#include <sys/promif.h>
166#include <sys/consdev.h>
167#include <sys/esunddi.h>
168#include <sys/modctl.h>
169#include <sys/fs/ufs_fs.h>
170#include <sys/note.h>
171#include <sys/taskq.h>
172#include <sys/bootconf.h>
173#include <sys/reboot.h>
174#include <sys/spl.h>
175#include <sys/disp.h>
176#include <sys/sobject.h>
177#include <sys/sunmdi.h>
178#include <sys/systm.h>
179#include <sys/cpuvar.h>
180#include <sys/cyclic.h>
181#include <sys/uadmin.h>
182#include <sys/srn.h>
183
184
185/*
186 * PM LOCKING
187 *	The list of locks:
188 * Global pm mutex locks.
189 *
190 * pm_scan_lock:
191 *		It protects the timeout id of the scan thread, and the value
192 *		of autopm_enabled and cpupm.  This lock is not held
193 *		concurrently with any other PM locks.
194 *
195 * pm_clone_lock:	Protects the clone list and count of poll events
196 *		pending for the pm driver.
197 *		Lock ordering:
198 *			pm_clone_lock -> pm_pscc_interest_rwlock,
199 *			pm_clone_lock -> pm_pscc_direct_rwlock.
200 *
201 * pm_rsvp_lock:
202 *		Used to synchronize the data structures used for processes
203 *		to rendezvous with state change information when doing
204 *		direct PM.
205 *		Lock ordering:
206 *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
207 *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
208 *			pm_rsvp_lock -> pm_clone_lock.
209 *
210 * ppm_lock:	protects the list of registered ppm drivers
211 *		Lock ordering:
212 *			ppm_lock -> ppm driver unit_lock
213 *
214 * pm_compcnt_lock:
215 *		Protects count of components that are not at their lowest
216 *		power level.
217 *		Lock ordering:
218 *			pm_compcnt_lock -> ppm_lock.
219 *
220 * pm_dep_thread_lock:
221 *		Protects work list for pm_dep_thread.  Not taken concurrently
222 *		with any other pm lock.
223 *
224 * pm_remdrv_lock:
225 *		Serializes the operation of removing noinvol data structure
226 *		entries for a branch of the tree when a driver has been
227 *		removed from the system (modctl_rem_major).
228 *		Lock ordering:
229 *			pm_remdrv_lock -> pm_noinvol_rwlock.
230 *
231 * pm_cfb_lock: (High level spin lock)
232 *		Protects the count of how many components of the console
233 *		frame buffer are off (so we know if we have to bring up the
234 *		console as a result of a prom_printf, etc.
235 *		No other locks are taken while holding this lock.
236 *
237 * pm_loan_lock:
238 *		Protects the lock_loan list.  List is used to record that one
239 *		thread has acquired a power lock but has launched another thread
240 *		to complete its processing.  An entry in the list indicates that
241 *		the worker thread can borrow the lock held by the other thread,
242 *		which must block on the completion of the worker.  Use is
243 *		specific to module loading.
244 *		No other locks are taken while holding this lock.
245 *
246 * Global PM rwlocks
247 *
248 * pm_thresh_rwlock:
249 *		Protects the list of thresholds recorded for future use (when
250 *		devices attach).
251 *		Lock ordering:
252 *			pm_thresh_rwlock -> devi_pm_lock
253 *
254 * pm_noinvol_rwlock:
255 *		Protects list of detached nodes that had noinvol registered.
256 *		No other PM locks are taken while holding pm_noinvol_rwlock.
257 *
258 * pm_pscc_direct_rwlock:
259 *		Protects the list that maps devices being directly power
260 *		managed to the processes that manage them.
261 *		Lock ordering:
262 *			pm_pscc_direct_rwlock -> psce_lock
263 *
264 * pm_pscc_interest_rwlock;
265 *		Protects the list that maps state change events to processes
266 *		that want to know about them.
267 *		Lock ordering:
268 *			pm_pscc_interest_rwlock -> psce_lock
269 *
270 * per-dip locks:
271 *
272 * Each node has these per-dip locks, which are only used if the device is
273 * a candidate for power management (e.g. has pm components)
274 *
275 * devi_pm_lock:
276 *		Protects all power management state of the node except for
277 *		power level, which is protected by ndi_devi_enter().
278 *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
279 *		Lock ordering:
280 *			devi_pm_lock -> pm_rsvp_lock,
281 *			devi_pm_lock -> pm_dep_thread_lock,
282 *			devi_pm_lock -> pm_noinvol_rwlock,
283 *			devi_pm_lock -> power lock
284 *
285 * power lock (ndi_devi_enter()):
286 *		Since changing power level is possibly a slow operation (30
287 *		seconds to spin up a disk drive), this is locked separately.
288 *		Since a call into the driver to change the power level of one
289 *		component may result in a call back into the framework to change
290 *		the power level of another, this lock allows re-entrancy by
291 *		the same thread (ndi_devi_enter is used for this because
292 *		the USB framework uses ndi_devi_enter in its power entry point,
293 *		and use of any other lock would produce a deadlock.
294 *
295 * devi_pm_busy_lock:
296 *		This lock protects the integrity of the busy count.  It is
297 *		only taken by pm_busy_component() and pm_idle_component and
298 *		some code that adjust the busy time after the timer gets set
299 *		up or after a CPR operation.  It is per-dip to keep from
300 *		single-threading all the disk drivers on a system.
301 *		It could be per component instead, but most devices have
302 *		only one component.
303 *		No other PM locks are taken while holding this lock.
304 *
305 */
306
307static int stdout_is_framebuffer;
308static kmutex_t	e_pm_power_lock;
309static kmutex_t pm_loan_lock;
310kmutex_t	pm_scan_lock;
311callb_id_t	pm_cpr_cb_id;
312callb_id_t	pm_panic_cb_id;
313callb_id_t	pm_halt_cb_id;
314int		pm_comps_notlowest;	/* no. of comps not at lowest power */
315int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
316
317clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
318clock_t pm_default_min_scan = PM_DEFAULT_MIN_SCAN;
319clock_t pm_cpu_min_scan = PM_CPU_MIN_SCAN;
320
321#define	PM_MIN_SCAN(dip)	(PM_ISCPU(dip) ? pm_cpu_min_scan : \
322				    pm_default_min_scan)
323
324static int pm_busop_set_power(dev_info_t *,
325    void *, pm_bus_power_op_t, void *, void *);
326static int pm_busop_match_request(dev_info_t *, void *);
327static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
328static void e_pm_set_max_power(dev_info_t *, int, int);
329static int e_pm_get_max_power(dev_info_t *, int);
330
331/*
332 * Dependency Processing is done thru a seperate thread.
333 */
334kmutex_t	pm_dep_thread_lock;
335kcondvar_t	pm_dep_thread_cv;
336pm_dep_wk_t	*pm_dep_thread_workq = NULL;
337pm_dep_wk_t	*pm_dep_thread_tail = NULL;
338
339/*
340 * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
341 * power managing things in single user mode that have been suppressed via
342 * power.conf entries.  Protected by pm_scan_lock.
343 */
344int		autopm_enabled;
345
346/*
347 * cpupm is turned on and off, by the PM_START_CPUPM and PM_STOP_CPUPM ioctls,
348 * to define the power management behavior of CPU devices separate from
349 * autopm. Protected by pm_scan_lock.
350 */
351pm_cpupm_t	cpupm = PM_CPUPM_NOTSET;
352
353/*
354 * Defines the default mode of operation for CPU power management,
355 * either the polling implementation, or the event based dispatcher driven
356 * implementation.
357 */
358pm_cpupm_t	cpupm_default_mode = PM_CPUPM_EVENT;
359
360/*
361 * AutoS3 depends on autopm being enabled, and must be enabled by
362 * PM_START_AUTOS3 command.
363 */
364int		autoS3_enabled;
365
366#if !defined(__sparc)
367/*
368 * on sparc these live in fillsysinfo.c
369 *
370 * If this variable is non-zero, cpr should return "not supported" when
371 * it is queried even though it would normally be supported on this platform.
372 */
373int cpr_supported_override;
374
375/*
376 * Some platforms may need to support CPR even in the absence of
377 * having the correct platform id information.  If this
378 * variable is non-zero, cpr should proceed even in the absence
379 * of otherwise being qualified.
380 */
381int cpr_platform_enable = 0;
382
383#endif
384
385/*
386 * pm_S3_enabled indicates that we believe the platform can support S3,
387 * which we get from pmconfig(1M)
388 */
389int		pm_S3_enabled;
390
391/*
392 * This flag is true while processes are stopped for a checkpoint/resume.
393 * Controlling processes of direct pm'd devices are not available to
394 * participate in power level changes, so we bypass them when this is set.
395 */
396static int	pm_processes_stopped;
397
398#ifdef	DEBUG
399
400/*
401 * see common/sys/epm.h for PMD_* values
402 */
403
404uint_t		pm_debug = 0;
405
406/*
407 * If pm_divertdebug is set, then no prom_printf calls will be made by
408 * PMD(), which will prevent debug output from bringing up the console
409 * frame buffer.  Clearing this variable before setting pm_debug will result
410 * in PMD output going to the console.
411 *
412 * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
413 * deadlocks and decremented at the end of pm_set_power()
414 */
415uint_t		pm_divertdebug = 1;
416volatile uint_t pm_debug_to_console = 0;
417kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
418
419void prdeps(char *);
420#endif
421
422/* Globals */
423
424/*
425 * List of recorded thresholds and dependencies
426 */
427pm_thresh_rec_t *pm_thresh_head;
428krwlock_t pm_thresh_rwlock;
429
430pm_pdr_t *pm_dep_head;
431static int pm_unresolved_deps = 0;
432static int pm_prop_deps = 0;
433
434/*
435 * List of devices that exported no-involuntary-power-cycles property
436 */
437pm_noinvol_t *pm_noinvol_head;
438
439/*
440 * Locks used in noinvol processing
441 */
442krwlock_t pm_noinvol_rwlock;
443kmutex_t pm_remdrv_lock;
444
445int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
446int pm_system_idle_threshold;
447int pm_cpu_idle_threshold;
448
449/*
450 * By default nexus has 0 threshold, and depends on its children to keep it up
451 */
452int pm_default_nexus_threshold = 0;
453
454/*
455 * Data structures shared with common/io/pm.c
456 */
457kmutex_t	pm_clone_lock;
458kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
459uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
460unsigned char	pm_interest[PM_MAX_CLONE];
461struct pollhead	pm_pollhead;
462
463/*
464 * Data structures shared with common/io/srn.c
465 */
466kmutex_t	srn_clone_lock;		/* protects srn_signal, srn_inuse */
467void (*srn_signal)(int type, int event);
468int srn_inuse;				/* stop srn detach */
469
470extern int	hz;
471extern char	*platform_module_list[];
472
473/*
474 * Wrappers for use in ddi_walk_devs
475 */
476
477static int		pm_set_dev_thr_walk(dev_info_t *, void *);
478static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
479static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
480static int		pm_discard_dep_walk(dev_info_t *, void *);
481#ifdef DEBUG
482static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
483#endif
484
485/*
486 * Routines for managing noinvol devices
487 */
488int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
489void			pm_noinvol_update_node(dev_info_t *,
490			    pm_bp_noinvol_t *req);
491
492kmutex_t pm_rsvp_lock;
493kmutex_t pm_compcnt_lock;
494krwlock_t pm_pscc_direct_rwlock;
495krwlock_t pm_pscc_interest_rwlock;
496
497#define	PSC_INTEREST	0	/* belongs to interest psc list */
498#define	PSC_DIRECT	1	/* belongs to direct psc list */
499
500pscc_t *pm_pscc_interest;
501pscc_t *pm_pscc_direct;
502
503#define	PM_MAJOR(dip) ddi_driver_major(dip)
504#define	PM_IS_NEXUS(dip) ((PM_MAJOR(dip) == DDI_MAJOR_T_NONE) ? 0 : \
505	NEXUS_DRV(devopsp[PM_MAJOR(dip)]))
506#define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
507#define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
508
509#define	PM_INCR_NOTLOWEST(dip) {					\
510	mutex_enter(&pm_compcnt_lock);					\
511	if (!PM_IS_NEXUS(dip) ||					\
512	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
513		if (pm_comps_notlowest == 0)				\
514			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
515		pm_comps_notlowest++;					\
516		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
517		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
518	}								\
519	mutex_exit(&pm_compcnt_lock);					\
520}
521#define	PM_DECR_NOTLOWEST(dip) {					\
522	mutex_enter(&pm_compcnt_lock);					\
523	if (!PM_IS_NEXUS(dip) ||					\
524	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
525		ASSERT(pm_comps_notlowest);				\
526		pm_comps_notlowest--;					\
527		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
528			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
529		if (pm_comps_notlowest == 0)				\
530			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
531	}								\
532	mutex_exit(&pm_compcnt_lock);					\
533}
534
535/*
536 * console frame-buffer power-management is not enabled when
537 * debugging services are present.  to override, set pm_cfb_override
538 * to non-zero.
539 */
540uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
541kmutex_t pm_cfb_lock;
542int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
543#ifdef DEBUG
544int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
545#else
546int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
547#endif
548
549static dev_info_t *cfb_dip = 0;
550static dev_info_t *cfb_dip_detaching = 0;
551uint_t cfb_inuse = 0;
552static ddi_softintr_t pm_soft_id;
553static boolean_t pm_soft_pending;
554int	pm_scans_disabled = 0;
555
556/*
557 * A structure to record the fact that one thread has borrowed a lock held
558 * by another thread.  The context requires that the lender block on the
559 * completion of the borrower.
560 */
561typedef struct lock_loan {
562	struct lock_loan	*pmlk_next;
563	kthread_t		*pmlk_borrower;
564	kthread_t		*pmlk_lender;
565	dev_info_t		*pmlk_dip;
566} lock_loan_t;
567static lock_loan_t lock_loan_head;	/* list head is a dummy element */
568
569#ifdef	DEBUG
570#ifdef	PMDDEBUG
571#define	PMD_FUNC(func, name)	char *(func) = (name);
572#else	/* !PMDDEBUG */
573#define	PMD_FUNC(func, name)
574#endif	/* PMDDEBUG */
575#else	/* !DEBUG */
576#define	PMD_FUNC(func, name)
577#endif	/* DEBUG */
578
579
580/*
581 * Must be called before first device (including pseudo) attach
582 */
583void
584pm_init_locks(void)
585{
586	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
587	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
588	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
589	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
590	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
591	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
592	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
593	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
594	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
595}
596
597static int pm_reset_timestamps(dev_info_t *, void *);
598
599static boolean_t
600pm_cpr_callb(void *arg, int code)
601{
602	_NOTE(ARGUNUSED(arg))
603	static int auto_save;
604	static pm_cpupm_t cpupm_save;
605
606	switch (code) {
607	case CB_CODE_CPR_CHKPT:
608		/*
609		 * Cancel scan or wait for scan in progress to finish
610		 * Other threads may be trying to restart the scan, so we
611		 * have to keep at it unil it sticks
612		 */
613		mutex_enter(&pm_scan_lock);
614		ASSERT(!pm_scans_disabled);
615		pm_scans_disabled = 1;
616		auto_save = autopm_enabled;
617		autopm_enabled = 0;
618		cpupm_save = cpupm;
619		cpupm = PM_CPUPM_NOTSET;
620		mutex_exit(&pm_scan_lock);
621		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
622		break;
623
624	case CB_CODE_CPR_RESUME:
625		ASSERT(!autopm_enabled);
626		ASSERT(cpupm == PM_CPUPM_NOTSET);
627		ASSERT(pm_scans_disabled);
628		pm_scans_disabled = 0;
629		/*
630		 * Call pm_reset_timestamps to reset timestamps of each
631		 * device to the time when the system is resumed so that their
632		 * idleness can be re-calculated. That's to avoid devices from
633		 * being powered down right after resume if the system was in
634		 * suspended mode long enough.
635		 */
636		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
637
638		autopm_enabled = auto_save;
639		cpupm = cpupm_save;
640		/*
641		 * If there is any auto-pm device, get the scanning
642		 * going. Otherwise don't bother.
643		 */
644		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
645		break;
646	}
647	return (B_TRUE);
648}
649
650/*
651 * This callback routine is called when there is a system panic.  This function
652 * exists for prototype matching.
653 */
654static boolean_t
655pm_panic_callb(void *arg, int code)
656{
657	_NOTE(ARGUNUSED(arg, code))
658	void pm_cfb_check_and_powerup(void);
659	PMD(PMD_CFB, ("pm_panic_callb\n"))
660	pm_cfb_check_and_powerup();
661	return (B_TRUE);
662}
663
664static boolean_t
665pm_halt_callb(void *arg, int code)
666{
667	_NOTE(ARGUNUSED(arg, code))
668	return (B_TRUE);
669}
670
671static void pm_dep_thread(void);
672
673/*
674 * This needs to be called after the root and platform drivers are loaded
675 * and be single-threaded with respect to driver attach/detach
676 */
677void
678pm_init(void)
679{
680	PMD_FUNC(pmf, "pm_init")
681	char **mod;
682	extern pri_t minclsyspri;
683
684	pm_comps_notlowest = 0;
685	pm_system_idle_threshold = pm_default_idle_threshold;
686	pm_cpu_idle_threshold = 0;
687
688	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
689	    CB_CL_CPR_PM, "pm_cpr");
690	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
691	    CB_CL_PANIC, "pm_panic");
692	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
693	    CB_CL_HALT, "pm_halt");
694
695	/*
696	 * Create a thread to do dependency processing.
697	 */
698	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
699	    TS_RUN, minclsyspri);
700
701	/*
702	 * loadrootmodules already loaded these ppm drivers, now get them
703	 * attached so they can claim the root drivers as they attach
704	 */
705	for (mod = platform_module_list; *mod; mod++) {
706		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
707			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
708			    *mod);
709		} else {
710			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
711			    ddi_major_to_name(ddi_name_to_major(*mod))))
712		}
713	}
714}
715
716/*
717 * pm_scan_init - create pm scan data structure.  Called (if autopm or cpupm
718 * enabled) when device becomes power managed or after a failed detach and
719 * when autopm is started via PM_START_PM or PM_START_CPUPM ioctls, and after
720 * a CPR resume to get all the devices scanning again.
721 */
722void
723pm_scan_init(dev_info_t *dip)
724{
725	PMD_FUNC(pmf, "scan_init")
726	pm_scan_t	*scanp;
727
728	ASSERT(!PM_ISBC(dip));
729
730	PM_LOCK_DIP(dip);
731	scanp = PM_GET_PM_SCAN(dip);
732	if (!scanp) {
733		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
734		    pmf, PM_DEVICE(dip)))
735		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
736		DEVI(dip)->devi_pm_scan = scanp;
737	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
738		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
739		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
740		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
741	}
742	PM_UNLOCK_DIP(dip);
743}
744
745/*
746 * pm_scan_fini - remove pm scan data structure when stopping pm on the device
747 */
748void
749pm_scan_fini(dev_info_t *dip)
750{
751	PMD_FUNC(pmf, "scan_fini")
752	pm_scan_t	*scanp;
753
754	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
755	ASSERT(!PM_ISBC(dip));
756	PM_LOCK_DIP(dip);
757	scanp = PM_GET_PM_SCAN(dip);
758	if (!scanp) {
759		PM_UNLOCK_DIP(dip);
760		return;
761	}
762
763	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
764	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
765
766	kmem_free(scanp, sizeof (pm_scan_t));
767	DEVI(dip)->devi_pm_scan = NULL;
768	PM_UNLOCK_DIP(dip);
769}
770
771/*
772 * Given a pointer to a component struct, return the current power level
773 * (struct contains index unless it is a continuous level).
774 * Located here in hopes of getting both this and dev_is_needed into the
775 * cache together
776 */
777static int
778cur_power(pm_component_t *cp)
779{
780	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
781		return (cp->pmc_cur_pwr);
782
783	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
784}
785
786static char *
787pm_decode_direction(int direction)
788{
789	switch (direction) {
790	case PM_LEVEL_UPONLY:
791		return ("up");
792
793	case PM_LEVEL_EXACT:
794		return ("exact");
795
796	case PM_LEVEL_DOWNONLY:
797		return ("down");
798
799	default:
800		return ("INVALID DIRECTION");
801	}
802}
803
804char *
805pm_decode_op(pm_bus_power_op_t op)
806{
807	switch (op) {
808	case BUS_POWER_CHILD_PWRCHG:
809		return ("CHILD_PWRCHG");
810	case BUS_POWER_NEXUS_PWRUP:
811		return ("NEXUS_PWRUP");
812	case BUS_POWER_PRE_NOTIFICATION:
813		return ("PRE_NOTIFICATION");
814	case BUS_POWER_POST_NOTIFICATION:
815		return ("POST_NOTIFICATION");
816	case BUS_POWER_HAS_CHANGED:
817		return ("HAS_CHANGED");
818	case BUS_POWER_NOINVOL:
819		return ("NOINVOL");
820	default:
821		return ("UNKNOWN OP");
822	}
823}
824
825/*
826 * Returns true if level is a possible (valid) power level for component
827 */
828int
829e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
830{
831	PMD_FUNC(pmf, "e_pm_valid_power")
832	pm_component_t *cp = PM_CP(dip, cmpt);
833	int i;
834	int *ip = cp->pmc_comp.pmc_lvals;
835	int limit = cp->pmc_comp.pmc_numlevels;
836
837	if (level < 0)
838		return (0);
839	for (i = 0; i < limit; i++) {
840		if (level == *ip++)
841			return (1);
842	}
843#ifdef DEBUG
844	if (pm_debug & PMD_FAIL) {
845		ip = cp->pmc_comp.pmc_lvals;
846
847		for (i = 0; i < limit; i++)
848			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
849			    pmf, i, *ip++))
850	}
851#endif
852	return (0);
853}
854
855static int pm_start(dev_info_t *dip);
856/*
857 * Returns true if device is pm'd (after calling pm_start if need be)
858 */
859int
860e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
861{
862	pm_info_t *info;
863
864	/*
865	 * Check if the device is power managed if not.
866	 * To make the common case (device is power managed already)
867	 * fast, we check without the lock.  If device is not already
868	 * power managed, then we take the lock and the long route through
869	 * go get it managed.  Devices never go unmanaged until they
870	 * detach.
871	 */
872	info = PM_GET_PM_INFO(dip);
873	if (!info) {
874		if (!DEVI_IS_ATTACHING(dip)) {
875			return (0);
876		}
877		if (pm_start(dip) != DDI_SUCCESS) {
878			return (0);
879		}
880		info = PM_GET_PM_INFO(dip);
881	}
882	ASSERT(info);
883	if (infop != NULL)
884		*infop = info;
885	return (1);
886}
887
888int
889e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
890{
891	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
892		if (cpp != NULL)
893			*cpp = PM_CP(dip, cmpt);
894		return (1);
895	} else {
896		return (0);
897	}
898}
899
900/*
901 * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
902 */
903static int
904dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
905{
906	PMD_FUNC(pmf, "din")
907	pm_component_t *cp;
908	char *pathbuf;
909	int result;
910
911	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
912	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
913	    !e_pm_valid_power(dip, cmpt, level))
914		return (DDI_FAILURE);
915
916	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
917	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
918	    level, cur_power(cp)))
919
920	if (pm_set_power(dip, cmpt, level,  direction,
921	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
922		if (direction == PM_LEVEL_UPONLY) {
923			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
924			(void) ddi_pathname(dip, pathbuf);
925			cmn_err(CE_WARN, "Device %s failed to power up.",
926			    pathbuf);
927			kmem_free(pathbuf, MAXPATHLEN);
928		}
929		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
930		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
931		    pm_decode_direction(direction), level, result))
932		return (DDI_FAILURE);
933	}
934
935	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
936	    PM_DEVICE(dip)))
937	pm_rescan(dip);
938	return (DDI_SUCCESS);
939}
940
941/*
942 * We can get multiple pm_rescan() threads, if one of them discovers
943 * that no scan is running at the moment, it kicks it into action.
944 * Otherwise, it tells the current scanning thread to scan again when
945 * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
946 * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
947 * thread at a time runs the pm_scan_dev() code.
948 */
949void
950pm_rescan(void *arg)
951{
952	PMD_FUNC(pmf, "rescan")
953	dev_info_t	*dip = (dev_info_t *)arg;
954	pm_info_t	*info;
955	pm_scan_t	*scanp;
956	timeout_id_t	scanid;
957
958	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
959	PM_LOCK_DIP(dip);
960	info = PM_GET_PM_INFO(dip);
961	scanp = PM_GET_PM_SCAN(dip);
962	if (pm_scans_disabled || !PM_SCANABLE(dip) || !info || !scanp ||
963	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
964		PM_UNLOCK_DIP(dip);
965		return;
966	}
967	if (scanp->ps_scan_flags & PM_SCANNING) {
968		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
969		PM_UNLOCK_DIP(dip);
970		return;
971	} else if (scanp->ps_scan_id) {
972		scanid = scanp->ps_scan_id;
973		scanp->ps_scan_id = 0;
974		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
975		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
976		PM_UNLOCK_DIP(dip);
977		(void) untimeout(scanid);
978		PM_LOCK_DIP(dip);
979	}
980
981	/*
982	 * Dispatching pm_scan during attach time is risky due to the fact that
983	 * attach might soon fail and dip dissolved, and panic may happen while
984	 * attempting to stop scan. So schedule a pm_rescan instead.
985	 * (Note that if either of the first two terms are true, taskq_dispatch
986	 * will not be invoked).
987	 *
988	 * Multiple pm_scan dispatching is unecessary and costly to keep track
989	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
990	 * to regulate the dispatching.
991	 *
992	 * Scan is stopped before the device is detached (in pm_detaching())
993	 * but it may get re-started during the post_detach processing if the
994	 * driver fails to detach.
995	 */
996	if (DEVI_IS_ATTACHING(dip) ||
997	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
998	    taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP) ==
999	    TASKQID_INVALID) {
1000		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
1001		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
1002		if (scanp->ps_scan_id) {
1003			scanid = scanp->ps_scan_id;
1004			scanp->ps_scan_id = 0;
1005			PM_UNLOCK_DIP(dip);
1006			(void) untimeout(scanid);
1007			PM_LOCK_DIP(dip);
1008			if (scanp->ps_scan_id) {
1009				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
1010				    "thread scheduled pm_rescan, scanid %lx\n",
1011				    pmf, PM_DEVICE(dip),
1012				    (ulong_t)scanp->ps_scan_id))
1013				PM_UNLOCK_DIP(dip);
1014				return;
1015			}
1016		}
1017		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1018		    (scanp->ps_idle_down ? pm_id_ticks :
1019		    (PM_MIN_SCAN(dip) * hz)));
1020		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
1021		    "scanid %lx\n", pmf, PM_DEVICE(dip),
1022		    (ulong_t)scanp->ps_scan_id))
1023	} else {
1024		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
1025		    pmf, PM_DEVICE(dip)))
1026		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
1027	}
1028	PM_UNLOCK_DIP(dip);
1029}
1030
1031void
1032pm_scan(void *arg)
1033{
1034	PMD_FUNC(pmf, "scan")
1035	dev_info_t	*dip = (dev_info_t *)arg;
1036	pm_scan_t	*scanp;
1037	time_t		nextscan;
1038
1039	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
1040
1041	PM_LOCK_DIP(dip);
1042	scanp = PM_GET_PM_SCAN(dip);
1043	ASSERT(scanp && PM_GET_PM_INFO(dip));
1044
1045	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1046	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
1047		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
1048		PM_UNLOCK_DIP(dip);
1049		return;
1050	}
1051
1052	if (scanp->ps_idle_down) {
1053		/*
1054		 * make sure we remember idledown was in affect until
1055		 * we've completed the scan
1056		 */
1057		PMID_SET_SCANS(scanp->ps_idle_down)
1058		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
1059		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1060	}
1061
1062	/* possible having two threads running pm_scan() */
1063	if (scanp->ps_scan_flags & PM_SCANNING) {
1064		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
1065		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
1066		    pmf, PM_DEVICE(dip)))
1067		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1068		PM_UNLOCK_DIP(dip);
1069		return;
1070	}
1071
1072	scanp->ps_scan_flags |= PM_SCANNING;
1073	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1074	do {
1075		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
1076		PM_UNLOCK_DIP(dip);
1077		nextscan = pm_scan_dev(dip);
1078		PM_LOCK_DIP(dip);
1079	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
1080
1081	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
1082	scanp->ps_scan_flags &= ~PM_SCANNING;
1083
1084	if (scanp->ps_idle_down) {
1085		scanp->ps_idle_down &= ~PMID_SCANS;
1086		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1087		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1088	}
1089
1090	/* schedule for next idle check */
1091	if (nextscan != LONG_MAX) {
1092		if (nextscan > (LONG_MAX / hz))
1093			nextscan = (LONG_MAX - 1) / hz;
1094		if (scanp->ps_scan_id) {
1095			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1096			    "another rescan scheduled scanid(%lx)\n", pmf,
1097			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1098			PM_UNLOCK_DIP(dip);
1099			return;
1100		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1101			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1102			    (clock_t)(nextscan * hz));
1103			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1104			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1105			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1106		}
1107	}
1108	PM_UNLOCK_DIP(dip);
1109}
1110
1111void
1112pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1113{
1114	int components = PM_NUMCMPTS(dip);
1115	int i;
1116
1117	ASSERT(components > 0);
1118	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1119	for (i = 0; i < components; i++) {
1120		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1121	}
1122	PM_UNLOCK_BUSY(dip);
1123}
1124
1125/*
1126 * Returns true if device needs to be kept up because it exported the
1127 * "no-involuntary-power-cycles" property or we're pretending it did (console
1128 * fb case) or it is an ancestor of such a device and has used up the "one
1129 * free cycle" allowed when all such leaf nodes have voluntarily powered down
1130 * upon detach
1131 */
1132int
1133pm_noinvol(dev_info_t *dip)
1134{
1135	PMD_FUNC(pmf, "noinvol")
1136
1137	/*
1138	 * This doesn't change over the life of a driver, so no locking needed
1139	 */
1140	if (PM_IS_CFB(dip)) {
1141		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1142		    pmf, PM_DEVICE(dip)))
1143		return (1);
1144	}
1145	/*
1146	 * Not an issue if no such kids
1147	 */
1148	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1149#ifdef DEBUG
1150		if (DEVI(dip)->devi_pm_volpmd != 0) {
1151			dev_info_t *pdip = dip;
1152			do {
1153				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1154				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1155				    DEVI(pdip)->devi_pm_noinvolpm,
1156				    DEVI(pdip)->devi_pm_volpmd))
1157				pdip = ddi_get_parent(pdip);
1158			} while (pdip);
1159		}
1160#endif
1161		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1162		return (0);
1163	}
1164
1165	/*
1166	 * Since we now maintain the counts correct at every node, we no longer
1167	 * need to look up the tree.  An ancestor cannot use up the free cycle
1168	 * without the children getting their counts adjusted.
1169	 */
1170
1171#ifdef	DEBUG
1172	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1173		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1174		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1175		    PM_DEVICE(dip)))
1176#endif
1177	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1178}
1179
1180static int	cur_threshold(dev_info_t *, int);
1181static int	pm_next_lower_power(pm_component_t *, int);
1182
1183/*
1184 * This function performs the actual scanning of the device.
1185 * It attempts to power off the indicated device's components if they have
1186 * been idle and other restrictions are met.
1187 * pm_scan_dev calculates and returns when the next scan should happen for
1188 * this device.
1189 */
1190time_t
1191pm_scan_dev(dev_info_t *dip)
1192{
1193	PMD_FUNC(pmf, "scan_dev")
1194	pm_scan_t	*scanp;
1195	time_t		*timestamp, idletime, now, thresh;
1196	time_t		timeleft = 0;
1197#ifdef PMDDEBUG
1198	int		curpwr;
1199#endif
1200	int		i, nxtpwr, pwrndx, unused;
1201	size_t		size;
1202	pm_component_t	 *cp;
1203	dev_info_t	*pdip = ddi_get_parent(dip);
1204	int		circ;
1205	clock_t		min_scan = pm_default_min_scan;
1206
1207	/*
1208	 * skip attaching device
1209	 */
1210	if (DEVI_IS_ATTACHING(dip)) {
1211		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1212		    pmf, PM_DEVICE(dip), min_scan))
1213		return (min_scan);
1214	}
1215
1216	PM_LOCK_DIP(dip);
1217	scanp = PM_GET_PM_SCAN(dip);
1218	min_scan = PM_MIN_SCAN(dip);
1219	ASSERT(scanp && PM_GET_PM_INFO(dip));
1220
1221	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1222	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1223	    PM_KUC(dip)))
1224
1225	/* no scan under the following conditions */
1226	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1227	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1228	    (PM_KUC(dip) != 0) ||
1229	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1230		PM_UNLOCK_DIP(dip);
1231		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1232		    "scan_disabled(%d), apm_enabled(%d), cpupm(%d), "
1233		    "kuc(%d), %s directpm, %s pm_noinvol\n",
1234		    pmf, PM_DEVICE(dip), pm_scans_disabled, autopm_enabled,
1235		    cpupm, PM_KUC(dip),
1236		    PM_ISDIRECT(dip) ? "is" : "is not",
1237		    pm_noinvol(dip) ? "is" : "is not"))
1238		return (LONG_MAX);
1239	}
1240	PM_UNLOCK_DIP(dip);
1241
1242	if (!ndi_devi_tryenter(pdip, &circ)) {
1243		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1244		    pmf, PM_DEVICE(pdip)))
1245		return ((time_t)1);
1246	}
1247	now = gethrestime_sec();
1248	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1249	timestamp = kmem_alloc(size, KM_SLEEP);
1250	pm_get_timestamps(dip, timestamp);
1251
1252	/*
1253	 * Since we removed support for backwards compatible devices,
1254	 * (see big comment at top of file)
1255	 * it is no longer required to deal with component 0 last.
1256	 */
1257	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1258		/*
1259		 * If already off (an optimization, perhaps)
1260		 */
1261		cp = PM_CP(dip, i);
1262		pwrndx = cp->pmc_cur_pwr;
1263#ifdef PMDDEBUG
1264		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1265		    PM_LEVEL_UNKNOWN :
1266		    cp->pmc_comp.pmc_lvals[pwrndx];
1267#endif
1268
1269		if (pwrndx == 0) {
1270			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1271			    "lowest\n", pmf, PM_DEVICE(dip), i))
1272			/* skip device if off or at its lowest */
1273			continue;
1274		}
1275
1276		thresh = cur_threshold(dip, i);		/* comp i threshold */
1277		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1278			/* were busy or newly became busy by another thread */
1279			if (timeleft == 0)
1280				timeleft = max(thresh, min_scan);
1281			else
1282				timeleft = min(
1283				    timeleft, max(thresh, min_scan));
1284			continue;
1285		}
1286
1287		idletime = now - timestamp[i];		/* idle time */
1288		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1289		    pmf, PM_DEVICE(dip), i, idletime))
1290		if (idletime >= thresh || PM_IS_PID(dip)) {
1291			nxtpwr = pm_next_lower_power(cp, pwrndx);
1292			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1293			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1294			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1295			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1296			    PM_CURPOWER(dip, i) != nxtpwr) {
1297				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1298				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1299				    i, curpwr, nxtpwr))
1300				timeleft = min_scan;
1301				continue;
1302			} else {
1303				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1304				    "%d->%d, GOOD curpwr %d\n", pmf,
1305				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1306				    cur_power(cp)))
1307
1308				if (nxtpwr == 0)	/* component went off */
1309					continue;
1310
1311				/*
1312				 * scan to next lower level
1313				 */
1314				if (timeleft == 0)
1315					timeleft = max(
1316					    1, cur_threshold(dip, i));
1317				else
1318					timeleft = min(timeleft,
1319					    max(1, cur_threshold(dip, i)));
1320				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1321				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1322				    i, timeleft))
1323			}
1324		} else {	/* comp not idle long enough */
1325			if (timeleft == 0)
1326				timeleft = thresh - idletime;
1327			else
1328				timeleft = min(timeleft, (thresh - idletime));
1329			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1330			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1331		}
1332	}
1333	ndi_devi_exit(pdip, circ);
1334	kmem_free(timestamp, size);
1335	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1336	    PM_DEVICE(dip), timeleft))
1337
1338	/*
1339	 * if components are already at lowest level, timeleft is left 0
1340	 */
1341	return ((timeleft == 0) ? LONG_MAX : timeleft);
1342}
1343
1344/*
1345 * pm_scan_stop - cancel scheduled pm_rescan,
1346 *                wait for termination of dispatched pm_scan thread
1347 *                     and active pm_scan_dev thread.
1348 */
1349void
1350pm_scan_stop(dev_info_t *dip)
1351{
1352	PMD_FUNC(pmf, "scan_stop")
1353	pm_scan_t	*scanp;
1354	timeout_id_t	scanid;
1355
1356	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1357	PM_LOCK_DIP(dip);
1358	scanp = PM_GET_PM_SCAN(dip);
1359	if (!scanp) {
1360		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1361		    pmf, PM_DEVICE(dip)))
1362		PM_UNLOCK_DIP(dip);
1363		return;
1364	}
1365	scanp->ps_scan_flags |= PM_SCAN_STOP;
1366
1367	/* cancel scheduled scan taskq */
1368	while (scanp->ps_scan_id) {
1369		scanid = scanp->ps_scan_id;
1370		scanp->ps_scan_id = 0;
1371		PM_UNLOCK_DIP(dip);
1372		(void) untimeout(scanid);
1373		PM_LOCK_DIP(dip);
1374	}
1375
1376	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1377		PM_UNLOCK_DIP(dip);
1378		delay(1);
1379		PM_LOCK_DIP(dip);
1380	}
1381	PM_UNLOCK_DIP(dip);
1382	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1383}
1384
1385int
1386pm_scan_stop_walk(dev_info_t *dip, void *arg)
1387{
1388	_NOTE(ARGUNUSED(arg))
1389
1390	if (!PM_GET_PM_SCAN(dip))
1391		return (DDI_WALK_CONTINUE);
1392	ASSERT(!PM_ISBC(dip));
1393	pm_scan_stop(dip);
1394	return (DDI_WALK_CONTINUE);
1395}
1396
1397/*
1398 * Converts a power level value to its index
1399 */
1400static int
1401power_val_to_index(pm_component_t *cp, int val)
1402{
1403	int limit, i, *ip;
1404
1405	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1406	    val != PM_LEVEL_EXACT);
1407	/*  convert power value into index (i) */
1408	limit = cp->pmc_comp.pmc_numlevels;
1409	ip = cp->pmc_comp.pmc_lvals;
1410	for (i = 0; i < limit; i++)
1411		if (val == *ip++)
1412			return (i);
1413	return (-1);
1414}
1415
1416/*
1417 * Converts a numeric power level to a printable string
1418 */
1419static char *
1420power_val_to_string(pm_component_t *cp, int val)
1421{
1422	int index;
1423
1424	if (val == PM_LEVEL_UPONLY)
1425		return ("<UPONLY>");
1426
1427	if (val == PM_LEVEL_UNKNOWN ||
1428	    (index = power_val_to_index(cp, val)) == -1)
1429		return ("<LEVEL_UNKNOWN>");
1430
1431	return (cp->pmc_comp.pmc_lnames[index]);
1432}
1433
1434/*
1435 * Return true if this node has been claimed by a ppm.
1436 */
1437static int
1438pm_ppm_claimed(dev_info_t *dip)
1439{
1440	return (PPM(dip) != NULL);
1441}
1442
1443/*
1444 * A node which was voluntarily power managed has just used up its "free cycle"
1445 * and need is volpmd field cleared, and the same done to all its descendents
1446 */
1447static void
1448pm_clear_volpm_dip(dev_info_t *dip)
1449{
1450	PMD_FUNC(pmf, "clear_volpm_dip")
1451
1452	if (dip == NULL)
1453		return;
1454	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1455	    PM_DEVICE(dip)))
1456	DEVI(dip)->devi_pm_volpmd = 0;
1457	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1458		pm_clear_volpm_dip(dip);
1459	}
1460}
1461
1462/*
1463 * A node which was voluntarily power managed has used up the "free cycles"
1464 * for the subtree that it is the root of.  Scan through the list of detached
1465 * nodes and adjust the counts of any that are descendents of the node.
1466 */
1467static void
1468pm_clear_volpm_list(dev_info_t *dip)
1469{
1470	PMD_FUNC(pmf, "clear_volpm_list")
1471	char	*pathbuf;
1472	size_t	len;
1473	pm_noinvol_t *ip;
1474
1475	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1476	(void) ddi_pathname(dip, pathbuf);
1477	len = strlen(pathbuf);
1478	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1479	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1480	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1481		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1482		    ip->ni_path))
1483		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1484		    ip->ni_path[len] == '/') {
1485			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1486			    ip->ni_path))
1487			ip->ni_volpmd = 0;
1488			ip->ni_wasvolpmd = 0;
1489		}
1490	}
1491	kmem_free(pathbuf, MAXPATHLEN);
1492	rw_exit(&pm_noinvol_rwlock);
1493}
1494
1495/*
1496 * Powers a device, suspending or resuming the driver if it is a backward
1497 * compatible device, calling into ppm to change power level.
1498 * Called with the component's power lock held.
1499 */
1500static int
1501power_dev(dev_info_t *dip, int comp, int level, int old_level,
1502    pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1503{
1504	PMD_FUNC(pmf, "power_dev")
1505	power_req_t power_req;
1506	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1507	int		resume_needed = 0;
1508	int		suspended = 0;
1509	int		result;
1510#ifdef PMDDEBUG
1511	struct pm_component *cp = PM_CP(dip, comp);
1512#endif
1513	int		bc = PM_ISBC(dip);
1514	int pm_all_components_off(dev_info_t *);
1515	int		clearvolpmd = 0;
1516	char		pathbuf[MAXNAMELEN];
1517#ifdef PMDDEBUG
1518	char *ppmname, *ppmaddr;
1519#endif
1520	/*
1521	 * If this is comp 0 of a backwards compat device and we are
1522	 * going to take the power away, we need to detach it with
1523	 * DDI_PM_SUSPEND command.
1524	 */
1525	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1526		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1527			/* We could not suspend before turning cmpt zero off */
1528			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1529			    pmf, PM_DEVICE(dip)))
1530			return (DDI_FAILURE);
1531		} else {
1532			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1533			suspended++;
1534		}
1535	}
1536	power_req.request_type = PMR_PPM_SET_POWER;
1537	power_req.req.ppm_set_power_req.who = dip;
1538	power_req.req.ppm_set_power_req.cmpt = comp;
1539	power_req.req.ppm_set_power_req.old_level = old_level;
1540	power_req.req.ppm_set_power_req.new_level = level;
1541	power_req.req.ppm_set_power_req.canblock = canblock;
1542	power_req.req.ppm_set_power_req.cookie = NULL;
1543#ifdef PMDDEBUG
1544	if (pm_ppm_claimed(dip)) {
1545		ppmname = PM_NAME(PPM(dip));
1546		ppmaddr = PM_ADDR(PPM(dip));
1547
1548	} else {
1549		ppmname = "noppm";
1550		ppmaddr = "0";
1551	}
1552	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1553	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1554	    power_val_to_string(cp, old_level), old_level,
1555	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1556#endif
1557	/*
1558	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1559	 * bc device comp 0 is powering on, then we count it as a power cycle
1560	 * against its voluntary count.
1561	 */
1562	if (DEVI(dip)->devi_pm_volpmd &&
1563	    (!bc && pm_all_components_off(dip) && level != 0) ||
1564	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1565		clearvolpmd = 1;
1566	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1567	    &power_req, &result)) == DDI_SUCCESS) {
1568		/*
1569		 * Now do involuntary pm accounting;  If we've just cycled power
1570		 * on a voluntarily pm'd node, and by inference on its entire
1571		 * subtree, we need to set the subtree (including those nodes
1572		 * already detached) volpmd counts to 0, and subtract out the
1573		 * value of the current node's volpmd count from the ancestors
1574		 */
1575		if (clearvolpmd) {
1576			int volpmd = DEVI(dip)->devi_pm_volpmd;
1577			pm_clear_volpm_dip(dip);
1578			pm_clear_volpm_list(dip);
1579			if (volpmd) {
1580				(void) ddi_pathname(dip, pathbuf);
1581				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1582				    volpmd, 0, pathbuf, dip);
1583			}
1584		}
1585	} else {
1586		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1587		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1588		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1589	}
1590	/*
1591	 * If some other devices were also powered up (e.g. other cpus in
1592	 * the same domain) return a pointer to that list
1593	 */
1594	if (devlist) {
1595		*devlist = (pm_ppm_devlist_t *)
1596		    power_req.req.ppm_set_power_req.cookie;
1597	}
1598	/*
1599	 * We will have to resume the device if the device is backwards compat
1600	 * device and either of the following is true:
1601	 * -This is comp 0 and we have successfully powered it up
1602	 * -This is comp 0 and we have failed to power it down. Resume is
1603	 *  needed because we have suspended it above
1604	 */
1605
1606	if (bc && comp == 0) {
1607		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1608		if (power_op_ret == DDI_SUCCESS) {
1609			if (POWERING_ON(old_level, level)) {
1610				/*
1611				 * It must be either suspended or resumed
1612				 * via pm_power_has_changed path
1613				 */
1614				ASSERT((DEVI(dip)->devi_pm_flags &
1615				    PMC_SUSPENDED) ||
1616				    (PM_CP(dip, comp)->pmc_flags &
1617				    PM_PHC_WHILE_SET_POWER));
1618
1619					resume_needed = suspended;
1620			}
1621		} else {
1622			if (POWERING_OFF(old_level, level)) {
1623				/*
1624				 * It must be either suspended or resumed
1625				 * via pm_power_has_changed path
1626				 */
1627				ASSERT((DEVI(dip)->devi_pm_flags &
1628				    PMC_SUSPENDED) ||
1629				    (PM_CP(dip, comp)->pmc_flags &
1630				    PM_PHC_WHILE_SET_POWER));
1631
1632					resume_needed = suspended;
1633			}
1634		}
1635	}
1636	if (resume_needed) {
1637		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1638		/* ppm is not interested in DDI_PM_RESUME */
1639		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1640		    DDI_SUCCESS) {
1641			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1642		} else
1643			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1644			    PM_DEVICE(dip));
1645	}
1646	return (power_op_ret);
1647}
1648
1649/*
1650 * Return true if we are the owner or a borrower of the devi lock.  See
1651 * pm_lock_power_single() about borrowing the lock.
1652 */
1653static int
1654pm_devi_lock_held(dev_info_t *dip)
1655{
1656	lock_loan_t *cur;
1657
1658	if (DEVI_BUSY_OWNED(dip))
1659		return (1);
1660
1661	/* return false if no locks borrowed */
1662	if (lock_loan_head.pmlk_next == NULL)
1663		return (0);
1664
1665	mutex_enter(&pm_loan_lock);
1666	/* see if our thread is registered as a lock borrower. */
1667	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1668		if (cur->pmlk_borrower == curthread)
1669			break;
1670	mutex_exit(&pm_loan_lock);
1671
1672	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1673}
1674
1675/*
1676 * pm_set_power: adjusts power level of device.	 Assumes device is power
1677 * manageable & component exists.
1678 *
1679 * Cases which require us to bring up devices we keep up ("wekeepups") for
1680 * backwards compatible devices:
1681 *	component 0 is off and we're bringing it up from 0
1682 *		bring up wekeepup first
1683 *	and recursively when component 0 is off and we bring some other
1684 *	component up from 0
1685 * For devices which are not backward compatible, our dependency notion is much
1686 * simpler.  Unless all components are off, then wekeeps must be on.
1687 * We don't treat component 0 differently.
1688 * Canblock tells how to deal with a direct pm'd device.
1689 * Scan arg tells us if we were called from scan, in which case we don't need
1690 * to go back to the root node and walk down to change power.
1691 */
1692int
1693pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1694    pm_canblock_t canblock, int scan, int *retp)
1695{
1696	PMD_FUNC(pmf, "set_power")
1697	char		*pathbuf;
1698	pm_bp_child_pwrchg_t bpc;
1699	pm_sp_misc_t	pspm;
1700	int		ret = DDI_SUCCESS;
1701	int		unused = DDI_SUCCESS;
1702	dev_info_t	*pdip = ddi_get_parent(dip);
1703
1704#ifdef DEBUG
1705	int		diverted = 0;
1706
1707	/*
1708	 * This prevents operations on the console from calling prom_printf and
1709	 * either deadlocking or bringing up the console because of debug
1710	 * output
1711	 */
1712	if (dip == cfb_dip) {
1713		diverted++;
1714		mutex_enter(&pm_debug_lock);
1715		pm_divertdebug++;
1716		mutex_exit(&pm_debug_lock);
1717	}
1718#endif
1719	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1720	    direction == PM_LEVEL_EXACT);
1721	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1722	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1723	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1724	(void) ddi_pathname(dip, pathbuf);
1725	bpc.bpc_dip = dip;
1726	bpc.bpc_path = pathbuf;
1727	bpc.bpc_comp = comp;
1728	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1729	bpc.bpc_nlevel = level;
1730	pspm.pspm_direction = direction;
1731	pspm.pspm_errnop = retp;
1732	pspm.pspm_canblock = canblock;
1733	pspm.pspm_scan = scan;
1734	bpc.bpc_private = &pspm;
1735
1736	/*
1737	 * If a config operation is being done (we've locked the parent) or
1738	 * we already hold the power lock (we've locked the node)
1739	 * then we can operate directly on the node because we have already
1740	 * brought up all the ancestors, otherwise, we have to go back to the
1741	 * top of the tree.
1742	 */
1743	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1744		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1745		    (void *)&bpc, (void *)&unused);
1746	else
1747		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1748		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1749#ifdef DEBUG
1750	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1751		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1752		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1753	}
1754	if (diverted) {
1755		mutex_enter(&pm_debug_lock);
1756		pm_divertdebug--;
1757		mutex_exit(&pm_debug_lock);
1758	}
1759#endif
1760	kmem_free(pathbuf, MAXPATHLEN);
1761	return (ret);
1762}
1763
1764/*
1765 * If holddip is set, then if a dip is found we return with the node held.
1766 *
1767 * This code uses the same locking scheme as e_ddi_hold_devi_by_path
1768 * (resolve_pathname), but it does not drive attach.
1769 */
1770dev_info_t *
1771pm_name_to_dip(char *pathname, int holddip)
1772{
1773	struct pathname pn;
1774	char		*component;
1775	dev_info_t	*parent, *child;
1776	int		circ;
1777
1778	if ((pathname == NULL) || (*pathname != '/'))
1779		return (NULL);
1780
1781	/* setup pathname and allocate component */
1782	if (pn_get(pathname, UIO_SYSSPACE, &pn))
1783		return (NULL);
1784	component = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1785
1786	/* start at top, process '/' component */
1787	parent = child = ddi_root_node();
1788	ndi_hold_devi(parent);
1789	pn_skipslash(&pn);
1790	ASSERT(i_ddi_devi_attached(parent));
1791
1792	/* process components of pathname */
1793	while (pn_pathleft(&pn)) {
1794		(void) pn_getcomponent(&pn, component);
1795
1796		/* enter parent and search for component child */
1797		ndi_devi_enter(parent, &circ);
1798		child = ndi_devi_findchild(parent, component);
1799		if ((child == NULL) || !i_ddi_devi_attached(child)) {
1800			child = NULL;
1801			ndi_devi_exit(parent, circ);
1802			ndi_rele_devi(parent);
1803			goto out;
1804		}
1805
1806		/* attached child found, hold child and release parent */
1807		ndi_hold_devi(child);
1808		ndi_devi_exit(parent, circ);
1809		ndi_rele_devi(parent);
1810
1811		/* child becomes parent, and process next component */
1812		parent = child;
1813		pn_skipslash(&pn);
1814
1815		/* loop with active ndi_devi_hold of child->parent */
1816	}
1817
1818out:
1819	pn_free(&pn);
1820	kmem_free(component, MAXNAMELEN);
1821
1822	/* if we are not asked to return with hold, drop current hold */
1823	if (child && !holddip)
1824		ndi_rele_devi(child);
1825	return (child);
1826}
1827
1828/*
1829 * Search for a dependency and mark it unsatisfied
1830 */
1831static void
1832pm_unsatisfy(char *keeper, char *kept)
1833{
1834	PMD_FUNC(pmf, "unsatisfy")
1835	pm_pdr_t *dp;
1836
1837	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1838	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1839		if (!dp->pdr_isprop) {
1840			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1841			    (dp->pdr_kept_count > 0) &&
1842			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1843				if (dp->pdr_satisfied) {
1844					dp->pdr_satisfied = 0;
1845					pm_unresolved_deps++;
1846					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1847					    "pm_unresolved_deps now %d\n", pmf,
1848					    pm_unresolved_deps))
1849				}
1850			}
1851		}
1852	}
1853}
1854
1855/*
1856 * Device dip is being un power managed, it keeps up count other devices.
1857 * We need to release any hold we have on the kept devices, and also
1858 * mark the dependency no longer satisfied.
1859 */
1860static void
1861pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1862{
1863	PMD_FUNC(pmf, "unkeeps")
1864	int i, j;
1865	dev_info_t *kept;
1866	dev_info_t *dip;
1867	struct pm_component *cp;
1868	int keeper_on = 0, circ;
1869
1870	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1871	    keeper, (void *)keptpaths))
1872	/*
1873	 * Try to grab keeper. Keeper may have gone away by now,
1874	 * in this case, used the passed in value pwr
1875	 */
1876	dip = pm_name_to_dip(keeper, 1);
1877	for (i = 0; i < count; i++) {
1878		/* Release power hold */
1879		kept = pm_name_to_dip(keptpaths[i], 1);
1880		if (kept) {
1881			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1882			    PM_DEVICE(kept), i))
1883			/*
1884			 * We need to check if we skipped a bringup here
1885			 * because we could have failed the bringup
1886			 * (ie DIRECT PM device) and have
1887			 * not increment the count.
1888			 */
1889			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1890				keeper_on = 0;
1891				PM_LOCK_POWER(dip, &circ);
1892				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1893					cp = &DEVI(dip)->devi_pm_components[j];
1894					if (cur_power(cp)) {
1895						keeper_on++;
1896						break;
1897					}
1898				}
1899				if (keeper_on && (PM_SKBU(kept) == 0)) {
1900					pm_rele_power(kept);
1901					DEVI(kept)->devi_pm_flags
1902					    &= ~PMC_SKIP_BRINGUP;
1903				}
1904				PM_UNLOCK_POWER(dip, circ);
1905			} else if (pwr) {
1906				if (PM_SKBU(kept) == 0) {
1907					pm_rele_power(kept);
1908					DEVI(kept)->devi_pm_flags
1909					    &= ~PMC_SKIP_BRINGUP;
1910				}
1911			}
1912			ddi_release_devi(kept);
1913		}
1914		/*
1915		 * mark this dependency not satisfied
1916		 */
1917		pm_unsatisfy(keeper, keptpaths[i]);
1918	}
1919	if (dip)
1920		ddi_release_devi(dip);
1921}
1922
1923/*
1924 * Device kept is being un power managed, it is kept up by keeper.
1925 * We need to mark the dependency no longer satisfied.
1926 */
1927static void
1928pm_unkepts(char *kept, char *keeper)
1929{
1930	PMD_FUNC(pmf, "unkepts")
1931	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1932	ASSERT(keeper != NULL);
1933	/*
1934	 * mark this dependency not satisfied
1935	 */
1936	pm_unsatisfy(keeper, kept);
1937}
1938
1939/*
1940 * Removes dependency information and hold on the kepts, if the path is a
1941 * path of a keeper.
1942 */
1943static void
1944pm_free_keeper(char *path, int pwr)
1945{
1946	pm_pdr_t *dp;
1947	int i;
1948	size_t length;
1949
1950	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1951		if (strcmp(dp->pdr_keeper, path) != 0)
1952			continue;
1953		/*
1954		 * Remove all our kept holds and the dependency records,
1955		 * then free up the kept lists.
1956		 */
1957		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1958		if (dp->pdr_kept_count)  {
1959			for (i = 0; i < dp->pdr_kept_count; i++) {
1960				length = strlen(dp->pdr_kept_paths[i]);
1961				kmem_free(dp->pdr_kept_paths[i], length + 1);
1962			}
1963			kmem_free(dp->pdr_kept_paths,
1964			    dp->pdr_kept_count * sizeof (char **));
1965			dp->pdr_kept_paths = NULL;
1966			dp->pdr_kept_count = 0;
1967		}
1968	}
1969}
1970
1971/*
1972 * Removes the device represented by path from the list of kepts, if the
1973 * path is a path of a kept
1974 */
1975static void
1976pm_free_kept(char *path)
1977{
1978	pm_pdr_t *dp;
1979	int i;
1980	int j, count;
1981	size_t length;
1982	char **paths;
1983
1984	paths = NULL;
1985	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1986		if (dp->pdr_kept_count == 0)
1987			continue;
1988		count = dp->pdr_kept_count;
1989		/* Remove this device from the kept path lists */
1990		for (i = 0; i < count; i++) {
1991			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1992				pm_unkepts(path, dp->pdr_keeper);
1993				length = strlen(dp->pdr_kept_paths[i]) + 1;
1994				kmem_free(dp->pdr_kept_paths[i], length);
1995				dp->pdr_kept_paths[i] = NULL;
1996				dp->pdr_kept_count--;
1997			}
1998		}
1999		/* Compact the kept paths array */
2000		if (dp->pdr_kept_count) {
2001			length = dp->pdr_kept_count * sizeof (char **);
2002			paths = kmem_zalloc(length, KM_SLEEP);
2003			j = 0;
2004			for (i = 0; i < count; i++) {
2005				if (dp->pdr_kept_paths[i] != NULL) {
2006					paths[j] = dp->pdr_kept_paths[i];
2007					j++;
2008				}
2009			}
2010			ASSERT(j == dp->pdr_kept_count);
2011		}
2012		/* Now free the old array and point to the new one */
2013		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
2014		dp->pdr_kept_paths = paths;
2015	}
2016}
2017
2018/*
2019 * Free the dependency information for a device.
2020 */
2021void
2022pm_free_keeps(char *path, int pwr)
2023{
2024	PMD_FUNC(pmf, "free_keeps")
2025
2026#ifdef DEBUG
2027	int doprdeps = 0;
2028	void prdeps(char *);
2029
2030	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
2031	if (pm_debug & PMD_KEEPS) {
2032		doprdeps = 1;
2033		prdeps("pm_free_keeps before");
2034	}
2035#endif
2036	/*
2037	 * First assume we are a keeper and remove all our kepts.
2038	 */
2039	pm_free_keeper(path, pwr);
2040	/*
2041	 * Now assume we a kept device, and remove all our records.
2042	 */
2043	pm_free_kept(path);
2044#ifdef	DEBUG
2045	if (doprdeps) {
2046		prdeps("pm_free_keeps after");
2047	}
2048#endif
2049}
2050
2051static int
2052pm_is_kept(char *path)
2053{
2054	pm_pdr_t *dp;
2055	int i;
2056
2057	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
2058		if (dp->pdr_kept_count == 0)
2059			continue;
2060		for (i = 0; i < dp->pdr_kept_count; i++) {
2061			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
2062				return (1);
2063		}
2064	}
2065	return (0);
2066}
2067
2068static void
2069e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2070{
2071	PMD_FUNC(pmf, "hold_rele_power")
2072	int circ;
2073
2074	if ((dip == NULL) ||
2075	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2076		return;
2077
2078	PM_LOCK_POWER(dip, &circ);
2079	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2080	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2081	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2082
2083	PM_KUC(dip) += cnt;
2084
2085	ASSERT(PM_KUC(dip) >= 0);
2086	PM_UNLOCK_POWER(dip, circ);
2087
2088	if (cnt < 0 && PM_KUC(dip) == 0)
2089		pm_rescan(dip);
2090}
2091
2092#define	MAX_PPM_HANDLERS	4
2093
2094kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2095
2096struct	ppm_callbacks {
2097	int (*ppmc_func)(dev_info_t *);
2098	dev_info_t	*ppmc_dip;
2099} ppm_callbacks[MAX_PPM_HANDLERS + 1];
2100
2101
2102/*
2103 * This routine calls into all the registered ppms to notify them
2104 * that either all components of power-managed devices are at their
2105 * lowest levels or no longer all are at their lowest levels.
2106 */
2107static void
2108pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2109{
2110	struct ppm_callbacks *ppmcp;
2111	power_req_t power_req;
2112	int result = 0;
2113
2114	power_req.request_type = PMR_PPM_ALL_LOWEST;
2115	power_req.req.ppm_all_lowest_req.mode = mode;
2116	mutex_enter(&ppm_lock);
2117	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2118		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2119		    DDI_CTLOPS_POWER, &power_req, &result);
2120	mutex_exit(&ppm_lock);
2121	if (mode == PM_ALL_LOWEST) {
2122		if (autoS3_enabled) {
2123			PMD(PMD_SX, ("pm_ppm_notify_all_lowest triggering "
2124			    "autos3\n"))
2125			mutex_enter(&srn_clone_lock);
2126			if (srn_signal) {
2127				srn_inuse++;
2128				PMD(PMD_SX, ("(*srn_signal)(AUTOSX, 3)\n"))
2129				(*srn_signal)(SRN_TYPE_AUTOSX, 3);
2130				srn_inuse--;
2131			} else {
2132				PMD(PMD_SX, ("srn_signal NULL\n"))
2133			}
2134			mutex_exit(&srn_clone_lock);
2135		} else {
2136			PMD(PMD_SX, ("pm_ppm_notify_all_lowest autos3 "
2137			    "disabled\n"));
2138		}
2139	}
2140}
2141
2142static void
2143pm_set_pm_info(dev_info_t *dip, void *value)
2144{
2145	DEVI(dip)->devi_pm_info = value;
2146}
2147
2148pm_rsvp_t *pm_blocked_list;
2149
2150/*
2151 * Look up an entry in the blocked list by dip and component
2152 */
2153static pm_rsvp_t *
2154pm_rsvp_lookup(dev_info_t *dip, int comp)
2155{
2156	pm_rsvp_t *p;
2157	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2158	for (p = pm_blocked_list; p; p = p->pr_next)
2159		if (p->pr_dip == dip && p->pr_comp == comp) {
2160			return (p);
2161		}
2162	return (NULL);
2163}
2164
2165/*
2166 * Called when a device which is direct power managed (or the parent or
2167 * dependent of such a device) changes power, or when a pm clone is closed
2168 * that was direct power managing a device.  This call results in pm_blocked()
2169 * (below) returning.
2170 */
2171void
2172pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2173{
2174	PMD_FUNC(pmf, "proceed")
2175	pm_rsvp_t *found = NULL;
2176	pm_rsvp_t *p;
2177
2178	mutex_enter(&pm_rsvp_lock);
2179	switch (cmd) {
2180	/*
2181	 * we're giving up control, let any pending op continue
2182	 */
2183	case PMP_RELEASE:
2184		for (p = pm_blocked_list; p; p = p->pr_next) {
2185			if (dip == p->pr_dip) {
2186				p->pr_retval = PMP_RELEASE;
2187				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2188				    pmf, PM_DEVICE(dip)))
2189				cv_signal(&p->pr_cv);
2190			}
2191		}
2192		break;
2193
2194	/*
2195	 * process has done PM_SET_CURRENT_POWER; let a matching request
2196	 * succeed and a non-matching request for the same device fail
2197	 */
2198	case PMP_SETPOWER:
2199		found = pm_rsvp_lookup(dip, comp);
2200		if (!found)	/* if driver not waiting */
2201			break;
2202		/*
2203		 * This cannot be pm_lower_power, since that can only happen
2204		 * during detach or probe
2205		 */
2206		if (found->pr_newlevel <= newlevel) {
2207			found->pr_retval = PMP_SUCCEED;
2208			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2209			    PM_DEVICE(dip)))
2210		} else {
2211			found->pr_retval = PMP_FAIL;
2212			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2213			    PM_DEVICE(dip)))
2214		}
2215		cv_signal(&found->pr_cv);
2216		break;
2217
2218	default:
2219		panic("pm_proceed unknown cmd %d", cmd);
2220	}
2221	mutex_exit(&pm_rsvp_lock);
2222}
2223
2224/*
2225 * This routine dispatches new work to the dependency thread. Caller must
2226 * be prepared to block for memory if necessary.
2227 */
2228void
2229pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2230    int *res, int cached_pwr)
2231{
2232	pm_dep_wk_t	*new_work;
2233
2234	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2235	new_work->pdw_type = cmd;
2236	new_work->pdw_wait = wait;
2237	new_work->pdw_done = 0;
2238	new_work->pdw_ret = 0;
2239	new_work->pdw_pwr = cached_pwr;
2240	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2241	if (keeper != NULL) {
2242		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2243		    KM_SLEEP);
2244		(void) strcpy(new_work->pdw_keeper, keeper);
2245	}
2246	if (kept != NULL) {
2247		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2248		(void) strcpy(new_work->pdw_kept, kept);
2249	}
2250	mutex_enter(&pm_dep_thread_lock);
2251	if (pm_dep_thread_workq == NULL) {
2252		pm_dep_thread_workq = new_work;
2253		pm_dep_thread_tail = new_work;
2254		new_work->pdw_next = NULL;
2255	} else {
2256		pm_dep_thread_tail->pdw_next = new_work;
2257		pm_dep_thread_tail = new_work;
2258		new_work->pdw_next = NULL;
2259	}
2260	cv_signal(&pm_dep_thread_cv);
2261	/* If caller asked for it, wait till it is done. */
2262	if (wait)  {
2263		while (!new_work->pdw_done)
2264			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2265		/*
2266		 * Pass return status, if any, back.
2267		 */
2268		if (res != NULL)
2269			*res = new_work->pdw_ret;
2270		/*
2271		 * If we asked to wait, it is our job to free the request
2272		 * structure.
2273		 */
2274		if (new_work->pdw_keeper)
2275			kmem_free(new_work->pdw_keeper,
2276			    strlen(new_work->pdw_keeper) + 1);
2277		if (new_work->pdw_kept)
2278			kmem_free(new_work->pdw_kept,
2279			    strlen(new_work->pdw_kept) + 1);
2280		kmem_free(new_work, sizeof (pm_dep_wk_t));
2281	}
2282	mutex_exit(&pm_dep_thread_lock);
2283}
2284
2285/*
2286 * Release the pm resource for this device.
2287 */
2288void
2289pm_rem_info(dev_info_t *dip)
2290{
2291	PMD_FUNC(pmf, "rem_info")
2292	int		i, count = 0;
2293	pm_info_t	*info = PM_GET_PM_INFO(dip);
2294	dev_info_t	*pdip = ddi_get_parent(dip);
2295	char		*pathbuf;
2296	int		work_type = PM_DEP_WK_DETACH;
2297
2298	ASSERT(info);
2299
2300	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2301	if (PM_ISDIRECT(dip)) {
2302		info->pmi_dev_pm_state &= ~PM_DIRECT;
2303		ASSERT(info->pmi_clone);
2304		info->pmi_clone = 0;
2305		pm_proceed(dip, PMP_RELEASE, -1, -1);
2306	}
2307	ASSERT(!PM_GET_PM_SCAN(dip));
2308
2309	/*
2310	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2311	 * Others we check all components.  BC node that has already
2312	 * called pm_destroy_components() has zero component count.
2313	 * Parents that get notification are not adjusted because their
2314	 * kidsupcnt is always 0 (or 1 during configuration).
2315	 */
2316	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2317	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2318
2319	/* node is detached, so we can examine power without locking */
2320	if (PM_ISBC(dip)) {
2321		count = (PM_CURPOWER(dip, 0) != 0);
2322	} else {
2323		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2324			count += (PM_CURPOWER(dip, i) != 0);
2325	}
2326
2327	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2328		e_pm_hold_rele_power(pdip, -count);
2329
2330	/* Schedule a request to clean up dependency records */
2331	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2332	(void) ddi_pathname(dip, pathbuf);
2333	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2334	    PM_DEP_NOWAIT, NULL, (count > 0));
2335	kmem_free(pathbuf, MAXPATHLEN);
2336
2337	/*
2338	 * Adjust the pm_comps_notlowest count since this device is
2339	 * not being power-managed anymore.
2340	 */
2341	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2342		pm_component_t *cp = PM_CP(dip, i);
2343		if (cp->pmc_cur_pwr != 0)
2344			PM_DECR_NOTLOWEST(dip)
2345	}
2346	/*
2347	 * Once we clear the info pointer, it looks like it is not power
2348	 * managed to everybody else.
2349	 */
2350	pm_set_pm_info(dip, NULL);
2351	kmem_free(info, sizeof (pm_info_t));
2352}
2353
2354int
2355pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2356{
2357	int components = PM_NUMCMPTS(dip);
2358	int *bufp;
2359	size_t size;
2360	int i;
2361
2362	if (components <= 0) {
2363		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2364		    "can't get normal power values\n", PM_DEVICE(dip));
2365		return (DDI_FAILURE);
2366	} else {
2367		size = components * sizeof (int);
2368		bufp = kmem_alloc(size, KM_SLEEP);
2369		for (i = 0; i < components; i++) {
2370			bufp[i] = pm_get_normal_power(dip, i);
2371		}
2372	}
2373	*length = size;
2374	*valuep = bufp;
2375	return (DDI_SUCCESS);
2376}
2377
2378static int
2379pm_reset_timestamps(dev_info_t *dip, void *arg)
2380{
2381	_NOTE(ARGUNUSED(arg))
2382
2383	int components;
2384	int	i;
2385
2386	if (!PM_GET_PM_INFO(dip))
2387		return (DDI_WALK_CONTINUE);
2388	components = PM_NUMCMPTS(dip);
2389	ASSERT(components > 0);
2390	PM_LOCK_BUSY(dip);
2391	for (i = 0; i < components; i++) {
2392		struct pm_component *cp;
2393		/*
2394		 * If the component was not marked as busy,
2395		 * reset its timestamp to now.
2396		 */
2397		cp = PM_CP(dip, i);
2398		if (cp->pmc_timestamp)
2399			cp->pmc_timestamp = gethrestime_sec();
2400	}
2401	PM_UNLOCK_BUSY(dip);
2402	return (DDI_WALK_CONTINUE);
2403}
2404
2405/*
2406 * Convert a power level to an index into the levels array (or
2407 * just PM_LEVEL_UNKNOWN in that special case).
2408 */
2409static int
2410pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2411{
2412	PMD_FUNC(pmf, "level_to_index")
2413	int i;
2414	int limit = cp->pmc_comp.pmc_numlevels;
2415	int *ip = cp->pmc_comp.pmc_lvals;
2416
2417	if (level == PM_LEVEL_UNKNOWN)
2418		return (level);
2419
2420	for (i = 0; i < limit; i++) {
2421		if (level == *ip++) {
2422			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2423			    pmf, PM_DEVICE(dip),
2424			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2425			return (i);
2426		}
2427	}
2428	panic("pm_level_to_index: level %d not found for device "
2429	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2430	/*NOTREACHED*/
2431}
2432
2433/*
2434 * Internal function to set current power level
2435 */
2436static void
2437e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2438{
2439	PMD_FUNC(pmf, "set_cur_pwr")
2440	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2441	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2442
2443	/*
2444	 * Nothing to adjust if current & new levels are the same.
2445	 */
2446	if (curpwr != PM_LEVEL_UNKNOWN &&
2447	    level == cp->pmc_comp.pmc_lvals[curpwr])
2448		return;
2449
2450	/*
2451	 * Keep the count for comps doing transition to/from lowest
2452	 * level.
2453	 */
2454	if (curpwr == 0) {
2455		PM_INCR_NOTLOWEST(dip);
2456	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2457		PM_DECR_NOTLOWEST(dip);
2458	}
2459	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2460	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2461}
2462
2463static int pm_phc_impl(dev_info_t *, int, int, int);
2464
2465/*
2466 * This is the default method of setting the power of a device if no ppm
2467 * driver has claimed it.
2468 */
2469int
2470pm_power(dev_info_t *dip, int comp, int level)
2471{
2472	PMD_FUNC(pmf, "power")
2473	struct dev_ops	*ops;
2474	int		(*fn)(dev_info_t *, int, int);
2475	struct pm_component *cp = PM_CP(dip, comp);
2476	int retval;
2477	pm_info_t *info = PM_GET_PM_INFO(dip);
2478
2479	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2480	    PM_DEVICE(dip), comp, level))
2481	if (!(ops = ddi_get_driver(dip))) {
2482		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2483		    PM_DEVICE(dip)))
2484		return (DDI_FAILURE);
2485	}
2486	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2487		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2488		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2489		    (!fn ? " devo_power NULL" : "")))
2490		return (DDI_FAILURE);
2491	}
2492	cp->pmc_flags |= PM_POWER_OP;
2493	retval = (*fn)(dip, comp, level);
2494	cp->pmc_flags &= ~PM_POWER_OP;
2495	if (retval == DDI_SUCCESS) {
2496		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2497		return (DDI_SUCCESS);
2498	}
2499
2500	/*
2501	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2502	 * updated only the power level of the component.  If our attempt to
2503	 * set the device new to a power level above has failed we sync the
2504	 * total power state via phc code now.
2505	 */
2506	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2507		int phc_lvl =
2508		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2509
2510		ASSERT(info);
2511		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2512		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2513		    pmf, PM_DEVICE(dip), comp, phc_lvl))
2514	}
2515
2516	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2517	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2518	    level, power_val_to_string(cp, level)));
2519	return (DDI_FAILURE);
2520}
2521
2522int
2523pm_unmanage(dev_info_t *dip)
2524{
2525	PMD_FUNC(pmf, "unmanage")
2526	power_req_t power_req;
2527	int result, retval = 0;
2528
2529	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2530	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2531	    PM_DEVICE(dip)))
2532	power_req.request_type = PMR_PPM_UNMANAGE;
2533	power_req.req.ppm_config_req.who = dip;
2534	if (pm_ppm_claimed(dip))
2535		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2536		    &power_req, &result);
2537#ifdef DEBUG
2538	else
2539		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2540		    &power_req, &result);
2541#endif
2542	ASSERT(retval == DDI_SUCCESS);
2543	pm_rem_info(dip);
2544	return (retval);
2545}
2546
2547int
2548pm_raise_power(dev_info_t *dip, int comp, int level)
2549{
2550	if (level < 0)
2551		return (DDI_FAILURE);
2552	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2553	    !e_pm_valid_power(dip, comp, level))
2554		return (DDI_FAILURE);
2555
2556	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2557}
2558
2559int
2560pm_lower_power(dev_info_t *dip, int comp, int level)
2561{
2562	PMD_FUNC(pmf, "pm_lower_power")
2563
2564	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2565	    !e_pm_valid_power(dip, comp, level)) {
2566		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2567		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2568		return (DDI_FAILURE);
2569	}
2570
2571	if (!DEVI_IS_DETACHING(dip)) {
2572		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2573		    pmf, PM_DEVICE(dip)))
2574		return (DDI_FAILURE);
2575	}
2576
2577	/*
2578	 * If we don't care about saving power, or we're treating this node
2579	 * specially, then this is a no-op
2580	 */
2581	if (!PM_SCANABLE(dip) || pm_noinvol(dip)) {
2582		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s%s%s\n",
2583		    pmf, PM_DEVICE(dip),
2584		    !autopm_enabled ? "!autopm_enabled " : "",
2585		    !PM_POLLING_CPUPM ? "!cpupm_polling " : "",
2586		    PM_CPUPM_DISABLED ? "cpupm_disabled " : "",
2587		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2588		return (DDI_SUCCESS);
2589	}
2590
2591	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2592		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2593		    PM_DEVICE(dip)))
2594		return (DDI_FAILURE);
2595	}
2596	return (DDI_SUCCESS);
2597}
2598
2599/*
2600 * Find the entries struct for a given dip in the blocked list, return it locked
2601 */
2602static psce_t *
2603pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2604{
2605	pscc_t *p;
2606	psce_t *psce;
2607
2608	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2609	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2610		if (p->pscc_dip == dip) {
2611			*psccp = p;
2612			psce = p->pscc_entries;
2613			mutex_enter(&psce->psce_lock);
2614			ASSERT(psce);
2615			rw_exit(&pm_pscc_direct_rwlock);
2616			return (psce);
2617		}
2618	}
2619	rw_exit(&pm_pscc_direct_rwlock);
2620	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2621	/*NOTREACHED*/
2622}
2623
2624/*
2625 * Write an entry indicating a power level change (to be passed to a process
2626 * later) in the given psce.
2627 * If we were called in the path that brings up the console fb in the
2628 * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2629 * we create a record that has a size of -1, a physaddr of NULL, and that
2630 * has the overflow flag set.
2631 */
2632static int
2633psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2634    int old, int which, pm_canblock_t canblock)
2635{
2636	char	buf[MAXNAMELEN];
2637	pm_state_change_t *p;
2638	size_t	size;
2639	caddr_t physpath = NULL;
2640	int	overrun = 0;
2641
2642	ASSERT(MUTEX_HELD(&psce->psce_lock));
2643	(void) ddi_pathname(dip, buf);
2644	size = strlen(buf) + 1;
2645	p = psce->psce_in;
2646	if (canblock == PM_CANBLOCK_BYPASS) {
2647		physpath = kmem_alloc(size, KM_NOSLEEP);
2648		if (physpath == NULL) {
2649			/*
2650			 * mark current entry as overrun
2651			 */
2652			p->flags |= PSC_EVENT_LOST;
2653			size = (size_t)-1;
2654		}
2655	} else
2656		physpath = kmem_alloc(size, KM_SLEEP);
2657	if (p->size) {	/* overflow; mark the next entry */
2658		if (p->size != (size_t)-1)
2659			kmem_free(p->physpath, p->size);
2660		ASSERT(psce->psce_out == p);
2661		if (p == psce->psce_last) {
2662			psce->psce_first->flags |= PSC_EVENT_LOST;
2663			psce->psce_out = psce->psce_first;
2664		} else {
2665			(p + 1)->flags |= PSC_EVENT_LOST;
2666			psce->psce_out = (p + 1);
2667		}
2668		overrun++;
2669	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2670		p->flags |= PSC_EVENT_LOST;
2671		p->size = 0;
2672		p->physpath = NULL;
2673	}
2674	if (which == PSC_INTEREST) {
2675		mutex_enter(&pm_compcnt_lock);
2676		if (pm_comps_notlowest == 0)
2677			p->flags |= PSC_ALL_LOWEST;
2678		else
2679			p->flags &= ~PSC_ALL_LOWEST;
2680		mutex_exit(&pm_compcnt_lock);
2681	}
2682	p->event = event;
2683	p->timestamp = gethrestime_sec();
2684	p->component = comp;
2685	p->old_level = old;
2686	p->new_level = new;
2687	p->physpath = physpath;
2688	p->size = size;
2689	if (physpath != NULL)
2690		(void) strcpy(p->physpath, buf);
2691	if (p == psce->psce_last)
2692		psce->psce_in = psce->psce_first;
2693	else
2694		psce->psce_in = ++p;
2695	mutex_exit(&psce->psce_lock);
2696	return (overrun);
2697}
2698
2699/*
2700 * Find the next entry on the interest list.  We keep a pointer to the item we
2701 * last returned in the user's cooke.  Returns a locked entries struct.
2702 */
2703static psce_t *
2704psc_interest(void **cookie, pscc_t **psccp)
2705{
2706	pscc_t *pscc;
2707	pscc_t **cookiep = (pscc_t **)cookie;
2708
2709	if (*cookiep == NULL)
2710		pscc = pm_pscc_interest;
2711	else
2712		pscc = (*cookiep)->pscc_next;
2713	if (pscc) {
2714		*cookiep = pscc;
2715		*psccp = pscc;
2716		mutex_enter(&pscc->pscc_entries->psce_lock);
2717		return (pscc->pscc_entries);
2718	} else {
2719		return (NULL);
2720	}
2721}
2722
2723/*
2724 * Create an entry for a process to pick up indicating a power level change.
2725 */
2726static void
2727pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2728    int newlevel, int oldlevel, pm_canblock_t canblock)
2729{
2730	PMD_FUNC(pmf, "enqueue_notify")
2731	pscc_t	*pscc;
2732	psce_t	*psce;
2733	void		*cookie = NULL;
2734	int	overrun;
2735
2736	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2737	switch (cmd) {
2738	case PSC_PENDING_CHANGE:	/* only for controlling process */
2739		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2740		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2741		psce = pm_psc_dip_to_direct(dip, &pscc);
2742		ASSERT(psce);
2743		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2744		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2745		    pm_poll_cnt[pscc->pscc_clone]))
2746		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2747		    PSC_DIRECT, canblock);
2748		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2749		mutex_enter(&pm_clone_lock);
2750		if (!overrun)
2751			pm_poll_cnt[pscc->pscc_clone]++;
2752		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2753		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2754		mutex_exit(&pm_clone_lock);
2755		break;
2756	case PSC_HAS_CHANGED:
2757		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2758		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2759		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2760			psce = pm_psc_dip_to_direct(dip, &pscc);
2761			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2762			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2763			    pm_poll_cnt[pscc->pscc_clone]))
2764			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2765			    oldlevel, PSC_DIRECT, canblock);
2766			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2767			mutex_enter(&pm_clone_lock);
2768			if (!overrun)
2769				pm_poll_cnt[pscc->pscc_clone]++;
2770			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2771			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2772			mutex_exit(&pm_clone_lock);
2773		}
2774		mutex_enter(&pm_clone_lock);
2775		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2776		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2777			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2778			    oldlevel, PSC_INTEREST, canblock);
2779			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2780		}
2781		rw_exit(&pm_pscc_interest_rwlock);
2782		mutex_exit(&pm_clone_lock);
2783		break;
2784#ifdef DEBUG
2785	default:
2786		ASSERT(0);
2787#endif
2788	}
2789}
2790
2791static void
2792pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2793{
2794	if (listp) {
2795		pm_ppm_devlist_t *p, *next = NULL;
2796
2797		for (p = *listp; p; p = next) {
2798			next = p->ppd_next;
2799			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2800			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2801			    canblock);
2802			kmem_free(p, sizeof (pm_ppm_devlist_t));
2803		}
2804		*listp = NULL;
2805	}
2806}
2807
2808/*
2809 * Try to get the power locks of the parent node and target (child)
2810 * node.  Return true if successful (with both locks held) or false
2811 * (with no locks held).
2812 */
2813static int
2814pm_try_parent_child_locks(dev_info_t *pdip,
2815    dev_info_t *dip, int *pcircp, int *circp)
2816{
2817	if (ndi_devi_tryenter(pdip, pcircp))
2818		if (PM_TRY_LOCK_POWER(dip, circp)) {
2819			return (1);
2820		} else {
2821			ndi_devi_exit(pdip, *pcircp);
2822		}
2823	return (0);
2824}
2825
2826/*
2827 * Determine if the power lock owner is blocked by current thread.
2828 * returns :
2829 * 	1 - If the thread owning the effective power lock (the first lock on
2830 *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2831 *          a mutex held by the current thread.
2832 *
2833 *	0 - otherwise
2834 *
2835 * Note : This function is called by pm_power_has_changed to determine whether
2836 * it is executing in parallel with pm_set_power.
2837 */
2838static int
2839pm_blocked_by_us(dev_info_t *dip)
2840{
2841	power_req_t power_req;
2842	kthread_t *owner;
2843	int result;
2844	kmutex_t *mp;
2845	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2846
2847	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2848	power_req.req.ppm_power_lock_owner_req.who = dip;
2849	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2850	    DDI_SUCCESS) {
2851		/*
2852		 * It is assumed that if the device is claimed by ppm, ppm
2853		 * will always implement this request type and it'll always
2854		 * return success. We panic here, if it fails.
2855		 */
2856		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2857		    PM_DEVICE(dip));
2858		/*NOTREACHED*/
2859	}
2860
2861	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2862	    owner->t_state == TS_SLEEP &&
2863	    owner->t_sobj_ops &&
2864	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2865	    (mp = (kmutex_t *)owner->t_wchan) &&
2866	    mutex_owner(mp) == curthread)
2867		return (1);
2868
2869	return (0);
2870}
2871
2872/*
2873 * Notify parent which wants to hear about a child's power changes.
2874 */
2875static void
2876pm_notify_parent(dev_info_t *dip,
2877    dev_info_t *pdip, int comp, int old_level, int level)
2878{
2879	pm_bp_has_changed_t bphc;
2880	pm_sp_misc_t pspm;
2881	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2882	int result = DDI_SUCCESS;
2883
2884	bphc.bphc_dip = dip;
2885	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2886	bphc.bphc_comp = comp;
2887	bphc.bphc_olevel = old_level;
2888	bphc.bphc_nlevel = level;
2889	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2890	pspm.pspm_scan = 0;
2891	bphc.bphc_private = &pspm;
2892	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2893	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2894	kmem_free(pathbuf, MAXPATHLEN);
2895}
2896
2897/*
2898 * Check if we need to resume a BC device, and make the attach call as required.
2899 */
2900static int
2901pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2902{
2903	int ret = DDI_SUCCESS;
2904
2905	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2906		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2907		/* ppm is not interested in DDI_PM_RESUME */
2908		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2909			/* XXX Should we mark it resumed, */
2910			/* even though it failed? */
2911			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2912			    PM_NAME(dip), PM_ADDR(dip));
2913		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2914	}
2915
2916	return (ret);
2917}
2918
2919/*
2920 * Tests outside the lock to see if we should bother to enqueue an entry
2921 * for any watching process.  If yes, then caller will take the lock and
2922 * do the full protocol
2923 */
2924static int
2925pm_watchers()
2926{
2927	if (pm_processes_stopped)
2928		return (0);
2929	return (pm_pscc_direct || pm_pscc_interest);
2930}
2931
2932static int pm_phc_impl(dev_info_t *, int, int, int);
2933
2934/*
2935 * A driver is reporting that the power of one of its device's components
2936 * has changed.  Update the power state accordingly.
2937 */
2938int
2939pm_power_has_changed(dev_info_t *dip, int comp, int level)
2940{
2941	PMD_FUNC(pmf, "pm_power_has_changed")
2942	int ret;
2943	dev_info_t *pdip = ddi_get_parent(dip);
2944	struct pm_component *cp;
2945	int blocked, circ, pcirc, old_level;
2946
2947	if (level < 0) {
2948		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2949		    PM_DEVICE(dip), level))
2950		return (DDI_FAILURE);
2951	}
2952
2953	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2954	    PM_DEVICE(dip), comp, level))
2955
2956	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2957	    !e_pm_valid_power(dip, comp, level))
2958		return (DDI_FAILURE);
2959
2960	/*
2961	 * A driver thread calling pm_power_has_changed and another thread
2962	 * calling pm_set_power can deadlock.  The problem is not resolvable
2963	 * by changing lock order, so we use pm_blocked_by_us() to detect
2964	 * this specific deadlock.  If we can't get the lock immediately
2965	 * and we are deadlocked, just update the component's level, do
2966	 * notifications, and return.  We intend to update the total power
2967	 * state later (if the other thread fails to set power to the
2968	 * desired level).  If we were called because of a power change on a
2969	 * component that isn't involved in a set_power op, update all state
2970	 * immediately.
2971	 */
2972	cp = PM_CP(dip, comp);
2973	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2974		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2975		    (cp->pmc_flags & PM_POWER_OP)) {
2976			if (pm_watchers()) {
2977				mutex_enter(&pm_rsvp_lock);
2978				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2979				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2980				mutex_exit(&pm_rsvp_lock);
2981			}
2982			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2983				pm_notify_parent(dip,
2984				    pdip, comp, cur_power(cp), level);
2985			(void) pm_check_and_resume(dip,
2986			    comp, cur_power(cp), level);
2987
2988			/*
2989			 * Stash the old power index, update curpwr, and flag
2990			 * that the total power state needs to be synched.
2991			 */
2992			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2993			/*
2994			 * Several pm_power_has_changed calls could arrive
2995			 * while the set power path remains blocked.  Keep the
2996			 * oldest old power and the newest new power of any
2997			 * sequence of phc calls which arrive during deadlock.
2998			 */
2999			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
3000				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
3001			cp->pmc_cur_pwr =
3002			    pm_level_to_index(dip, cp, level);
3003			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
3004			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3005			return (DDI_SUCCESS);
3006		} else
3007			if (blocked) {	/* blocked, but different cmpt? */
3008				if (!ndi_devi_tryenter(pdip, &pcirc)) {
3009					cmn_err(CE_NOTE,
3010					    "!pm: parent kuc not updated due "
3011					    "to possible deadlock.\n");
3012					return (pm_phc_impl(dip,
3013					    comp, level, 1));
3014				}
3015				old_level = cur_power(cp);
3016				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3017				    (!PM_ISBC(dip) || comp == 0) &&
3018				    POWERING_ON(old_level, level))
3019					pm_hold_power(pdip);
3020				ret = pm_phc_impl(dip, comp, level, 1);
3021				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3022					if ((!PM_ISBC(dip) ||
3023					    comp == 0) && level == 0 &&
3024					    old_level != PM_LEVEL_UNKNOWN)
3025						pm_rele_power(pdip);
3026				}
3027				ndi_devi_exit(pdip, pcirc);
3028				/* child lock not held: deadlock */
3029				return (ret);
3030			}
3031		delay(1);
3032		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
3033	}
3034
3035	/* non-deadlock case */
3036	old_level = cur_power(cp);
3037	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3038	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
3039		pm_hold_power(pdip);
3040	ret = pm_phc_impl(dip, comp, level, 1);
3041	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3042		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
3043		    old_level != PM_LEVEL_UNKNOWN)
3044			pm_rele_power(pdip);
3045	}
3046	PM_UNLOCK_POWER(dip, circ);
3047	ndi_devi_exit(pdip, pcirc);
3048	return (ret);
3049}
3050
3051/*
3052 * Account for power changes to a component of the the console frame buffer.
3053 * If lowering power from full (or "unkown", which is treatd as full)
3054 * we will increment the "components off" count of the fb device.
3055 * Subsequent lowering of the same component doesn't affect the count.  If
3056 * raising a component back to full power, we will decrement the count.
3057 *
3058 * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
3059 */
3060static int
3061calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
3062{
3063	struct pm_component *cp = PM_CP(dip, cmpt);
3064	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
3065	int want_normal = (new == cp->pmc_norm_pwr);
3066	int incr = 0;
3067
3068	if (on && !want_normal)
3069		incr = 1;
3070	else if (!on && want_normal)
3071		incr = -1;
3072	return (incr);
3073}
3074
3075/*
3076 * Adjust the count of console frame buffer components < full power.
3077 */
3078static void
3079update_comps_off(int incr, dev_info_t *dip)
3080{
3081		mutex_enter(&pm_cfb_lock);
3082		pm_cfb_comps_off += incr;
3083		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
3084		mutex_exit(&pm_cfb_lock);
3085}
3086
3087/*
3088 * Update the power state in the framework (via the ppm).  The 'notify'
3089 * argument tells whether to notify watchers.  Power lock is already held.
3090 */
3091static int
3092pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3093{
3094	PMD_FUNC(pmf, "phc_impl")
3095	power_req_t power_req;
3096	int i, dodeps = 0;
3097	dev_info_t *pdip = ddi_get_parent(dip);
3098	int result;
3099	int old_level;
3100	struct pm_component *cp;
3101	int incr = 0;
3102	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3103	int work_type = 0;
3104	char *pathbuf;
3105
3106	/* Must use "official" power level for this test. */
3107	cp = PM_CP(dip, comp);
3108	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3109	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3110	if (old_level != PM_LEVEL_UNKNOWN)
3111		old_level = cp->pmc_comp.pmc_lvals[old_level];
3112
3113	if (level == old_level) {
3114		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3115		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3116		return (DDI_SUCCESS);
3117	}
3118
3119	/*
3120	 * Tell ppm about this.
3121	 */
3122	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3123	power_req.req.ppm_notify_level_req.who = dip;
3124	power_req.req.ppm_notify_level_req.cmpt = comp;
3125	power_req.req.ppm_notify_level_req.new_level = level;
3126	power_req.req.ppm_notify_level_req.old_level = old_level;
3127	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3128	    &result) == DDI_FAILURE) {
3129		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3130		    pmf, PM_DEVICE(dip), level))
3131		return (DDI_FAILURE);
3132	}
3133
3134	if (PM_IS_CFB(dip)) {
3135		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3136
3137		if (incr) {
3138			update_comps_off(incr, dip);
3139			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3140			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3141			    comp, old_level, level, pm_cfb_comps_off))
3142		}
3143	}
3144	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3145	result = DDI_SUCCESS;
3146
3147	if (notify) {
3148		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3149			pm_notify_parent(dip, pdip, comp, old_level, level);
3150		(void) pm_check_and_resume(dip, comp, old_level, level);
3151	}
3152
3153	/*
3154	 * Decrement the dependency kidsup count if we turn a device
3155	 * off.
3156	 */
3157	if (POWERING_OFF(old_level, level)) {
3158		dodeps = 1;
3159		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3160			cp = PM_CP(dip, i);
3161			if (cur_power(cp)) {
3162				dodeps = 0;
3163				break;
3164			}
3165		}
3166		if (dodeps)
3167			work_type = PM_DEP_WK_POWER_OFF;
3168	}
3169
3170	/*
3171	 * Increment if we turn it on. Check to see
3172	 * if other comps are already on, if so,
3173	 * dont increment.
3174	 */
3175	if (POWERING_ON(old_level, level)) {
3176		dodeps = 1;
3177		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3178			cp = PM_CP(dip, i);
3179			if (comp == i)
3180				continue;
3181			/* -1 also treated as 0 in this case */
3182			if (cur_power(cp) > 0) {
3183				dodeps = 0;
3184				break;
3185			}
3186		}
3187		if (dodeps)
3188			work_type = PM_DEP_WK_POWER_ON;
3189	}
3190
3191	if (dodeps) {
3192		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3193		(void) ddi_pathname(dip, pathbuf);
3194		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3195		    PM_DEP_NOWAIT, NULL, 0);
3196		kmem_free(pathbuf, MAXPATHLEN);
3197	}
3198
3199	if (notify && (level != old_level) && pm_watchers()) {
3200		mutex_enter(&pm_rsvp_lock);
3201		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3202		    PM_CANBLOCK_BLOCK);
3203		mutex_exit(&pm_rsvp_lock);
3204	}
3205
3206	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3207	pm_rescan(dip);
3208	return (DDI_SUCCESS);
3209}
3210
3211/*
3212 * This function is called at startup time to notify pm of the existence
3213 * of any platform power managers for this platform.  As a result of
3214 * this registration, each function provided will be called each time
3215 * a device node is attached, until one returns true, and it must claim the
3216 * device node (by returning non-zero) if it wants to be involved in the
3217 * node's power management.  If it does claim the node, then it will
3218 * subsequently be notified of attach and detach events.
3219 *
3220 */
3221
3222int
3223pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3224{
3225	PMD_FUNC(pmf, "register_ppm")
3226	struct ppm_callbacks *ppmcp;
3227	pm_component_t *cp;
3228	int i, pwr, result, circ;
3229	power_req_t power_req;
3230	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3231	void pm_ppm_claim(dev_info_t *);
3232
3233	mutex_enter(&ppm_lock);
3234	ppmcp = ppm_callbacks;
3235	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3236		if (ppmcp->ppmc_func == NULL) {
3237			ppmcp->ppmc_func = func;
3238			ppmcp->ppmc_dip = dip;
3239			break;
3240		}
3241	}
3242	mutex_exit(&ppm_lock);
3243
3244	if (i >= MAX_PPM_HANDLERS)
3245		return (DDI_FAILURE);
3246	while ((dip = ddi_get_parent(dip)) != NULL) {
3247		if (dip != ddi_root_node() && PM_GET_PM_INFO(dip) == NULL)
3248			continue;
3249		pm_ppm_claim(dip);
3250		/* don't bother with the not power-manageable nodes */
3251		if (pm_ppm_claimed(dip) && PM_GET_PM_INFO(dip)) {
3252			/*
3253			 * Tell ppm about this.
3254			 */
3255			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3256			p->old_level = PM_LEVEL_UNKNOWN;
3257			p->who = dip;
3258			PM_LOCK_POWER(dip, &circ);
3259			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3260				cp = PM_CP(dip, i);
3261				pwr = cp->pmc_cur_pwr;
3262				if (pwr != PM_LEVEL_UNKNOWN) {
3263					p->cmpt = i;
3264					p->new_level = cur_power(cp);
3265					p->old_level = PM_LEVEL_UNKNOWN;
3266					if (pm_ctlops(PPM(dip), dip,
3267					    DDI_CTLOPS_POWER, &power_req,
3268					    &result) == DDI_FAILURE) {
3269						PMD(PMD_FAIL, ("%s: pc "
3270						    "%s@%s(%s#%d) to %d "
3271						    "fails\n", pmf,
3272						    PM_DEVICE(dip), pwr))
3273					}
3274				}
3275			}
3276			PM_UNLOCK_POWER(dip, circ);
3277		}
3278	}
3279	return (DDI_SUCCESS);
3280}
3281
3282/*
3283 * Call the ppm's that have registered and adjust the devinfo struct as
3284 * appropriate.  First one to claim it gets it.  The sets of devices claimed
3285 * by each ppm are assumed to be disjoint.
3286 */
3287void
3288pm_ppm_claim(dev_info_t *dip)
3289{
3290	struct ppm_callbacks *ppmcp;
3291
3292	if (PPM(dip)) {
3293		return;
3294	}
3295	mutex_enter(&ppm_lock);
3296	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3297		if ((*ppmcp->ppmc_func)(dip)) {
3298			DEVI(dip)->devi_pm_ppm =
3299			    (struct dev_info *)ppmcp->ppmc_dip;
3300			mutex_exit(&ppm_lock);
3301			return;
3302		}
3303	}
3304	mutex_exit(&ppm_lock);
3305}
3306
3307/*
3308 * Node is being detached so stop autopm until we see if it succeeds, in which
3309 * case pm_stop will be called.  For backwards compatible devices we bring the
3310 * device up to full power on the assumption the detach will succeed.
3311 */
3312void
3313pm_detaching(dev_info_t *dip)
3314{
3315	PMD_FUNC(pmf, "detaching")
3316	pm_info_t *info = PM_GET_PM_INFO(dip);
3317	int iscons;
3318
3319	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3320	    PM_NUMCMPTS(dip)))
3321	if (info == NULL)
3322		return;
3323	ASSERT(DEVI_IS_DETACHING(dip));
3324	PM_LOCK_DIP(dip);
3325	info->pmi_dev_pm_state |= PM_DETACHING;
3326	PM_UNLOCK_DIP(dip);
3327	if (!PM_ISBC(dip))
3328		pm_scan_stop(dip);
3329
3330	/*
3331	 * console and old-style devices get brought up when detaching.
3332	 */
3333	iscons = PM_IS_CFB(dip);
3334	if (iscons || PM_ISBC(dip)) {
3335		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3336		if (iscons) {
3337			mutex_enter(&pm_cfb_lock);
3338			while (cfb_inuse) {
3339				mutex_exit(&pm_cfb_lock);
3340				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3341				delay(1);
3342				mutex_enter(&pm_cfb_lock);
3343			}
3344			ASSERT(cfb_dip_detaching == NULL);
3345			ASSERT(cfb_dip);
3346			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3347			cfb_dip = NULL;
3348			mutex_exit(&pm_cfb_lock);
3349		}
3350	}
3351}
3352
3353/*
3354 * Node failed to detach.  If it used to be autopm'd, make it so again.
3355 */
3356void
3357pm_detach_failed(dev_info_t *dip)
3358{
3359	PMD_FUNC(pmf, "detach_failed")
3360	pm_info_t *info = PM_GET_PM_INFO(dip);
3361	int pm_all_at_normal(dev_info_t *);
3362
3363	if (info == NULL)
3364		return;
3365	ASSERT(DEVI_IS_DETACHING(dip));
3366	if (info->pmi_dev_pm_state & PM_DETACHING) {
3367		info->pmi_dev_pm_state &= ~PM_DETACHING;
3368		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3369			/* Make sure the operation is still needed */
3370			if (!pm_all_at_normal(dip)) {
3371				if (pm_all_to_normal(dip,
3372				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3373					PMD(PMD_ERROR, ("%s: could not bring "
3374					    "%s@%s(%s#%d) to normal\n", pmf,
3375					    PM_DEVICE(dip)))
3376				}
3377			}
3378			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3379		}
3380	}
3381	if (!PM_ISBC(dip)) {
3382		mutex_enter(&pm_scan_lock);
3383		if (PM_SCANABLE(dip))
3384			pm_scan_init(dip);
3385		mutex_exit(&pm_scan_lock);
3386		pm_rescan(dip);
3387	}
3388}
3389
3390/* generic Backwards Compatible component */
3391static char *bc_names[] = {"off", "on"};
3392
3393static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3394
3395static void
3396e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3397{
3398	pm_comp_t *pmc;
3399	pmc = &cp->pmc_comp;
3400	pmc->pmc_numlevels = 2;
3401	pmc->pmc_lvals[0] = 0;
3402	pmc->pmc_lvals[1] = norm;
3403	e_pm_set_cur_pwr(dip, cp, norm);
3404}
3405
3406static void
3407e_pm_default_components(dev_info_t *dip, int cmpts)
3408{
3409	int i;
3410	pm_component_t *p = DEVI(dip)->devi_pm_components;
3411
3412	p = DEVI(dip)->devi_pm_components;
3413	for (i = 0; i < cmpts; i++, p++) {
3414		p->pmc_comp = bc_comp;	/* struct assignment */
3415		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3416		    KM_SLEEP);
3417		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3418		    KM_SLEEP);
3419		p->pmc_comp.pmc_numlevels = 2;
3420		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3421		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3422	}
3423}
3424
3425/*
3426 * Called from functions that require components to exist already to allow
3427 * for their creation by parsing the pm-components property.
3428 * Device will not be power managed as a result of this call
3429 * No locking needed because we're single threaded by the ndi_devi_enter
3430 * done while attaching, and the device isn't visible until after it has
3431 * attached
3432 */
3433int
3434pm_premanage(dev_info_t *dip, int style)
3435{
3436	PMD_FUNC(pmf, "premanage")
3437	pm_comp_t	*pcp, *compp;
3438	int		cmpts, i, norm, error;
3439	pm_component_t *p = DEVI(dip)->devi_pm_components;
3440	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3441
3442	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3443	/*
3444	 * If this dip has already been processed, don't mess with it
3445	 */
3446	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3447		return (DDI_SUCCESS);
3448	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3449		return (DDI_FAILURE);
3450	}
3451	/*
3452	 * Look up pm-components property and create components accordingly
3453	 * If that fails, fall back to backwards compatibility
3454	 */
3455	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3456		/*
3457		 * If error is set, the property existed but was not well formed
3458		 */
3459		if (error || (style == PM_STYLE_NEW)) {
3460			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3461			return (DDI_FAILURE);
3462		}
3463		/*
3464		 * If they don't have the pm-components property, then we
3465		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3466		 * behavior driver must have called pm_create_components, and
3467		 * we need to flesh out dummy components
3468		 */
3469		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3470			/*
3471			 * Not really failure, but we don't want the
3472			 * caller to treat it as success
3473			 */
3474			return (DDI_FAILURE);
3475		}
3476		DEVI(dip)->devi_pm_flags |= PMC_BC;
3477		e_pm_default_components(dip, cmpts);
3478		for (i = 0; i < cmpts; i++) {
3479			/*
3480			 * if normal power not set yet, we don't really know
3481			 * what *ANY* of the power values are.  If normal
3482			 * power is set, then we assume for this backwards
3483			 * compatible case that the values are 0, normal power.
3484			 */
3485			norm = pm_get_normal_power(dip, i);
3486			if (norm == (uint_t)-1) {
3487				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3488				    PM_DEVICE(dip), i))
3489				return (DDI_FAILURE);
3490			}
3491			/*
3492			 * Components of BC devices start at their normal power,
3493			 * so count them to be not at their lowest power.
3494			 */
3495			PM_INCR_NOTLOWEST(dip);
3496			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3497		}
3498	} else {
3499		/*
3500		 * e_pm_create_components was called from pm_autoconfig(), it
3501		 * creates components with no descriptions (or known levels)
3502		 */
3503		cmpts = PM_NUMCMPTS(dip);
3504		ASSERT(cmpts != 0);
3505		pcp = compp;
3506		p = DEVI(dip)->devi_pm_components;
3507		for (i = 0; i < cmpts; i++, p++) {
3508			p->pmc_comp = *pcp++;   /* struct assignment */
3509			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3510			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3511		}
3512		if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3513			pm_set_device_threshold(dip, pm_cpu_idle_threshold,
3514			    PMC_CPU_THRESH);
3515		else
3516			pm_set_device_threshold(dip, pm_system_idle_threshold,
3517			    PMC_DEF_THRESH);
3518		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3519	}
3520	return (DDI_SUCCESS);
3521}
3522
3523/*
3524 * Called from during or after the device's attach to let us know it is ready
3525 * to play autopm.   Look up the pm model and manage the device accordingly.
3526 * Returns system call errno value.
3527 * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3528 * a little cleaner
3529 *
3530 * Called with dip lock held, return with dip lock unheld.
3531 */
3532
3533int
3534e_pm_manage(dev_info_t *dip, int style)
3535{
3536	PMD_FUNC(pmf, "e_manage")
3537	pm_info_t	*info;
3538	dev_info_t	*pdip = ddi_get_parent(dip);
3539	int	pm_thresh_specd(dev_info_t *);
3540	int	count;
3541	char	*pathbuf;
3542
3543	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3544		return (DDI_FAILURE);
3545	}
3546	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3547	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3548	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3549
3550	/*
3551	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3552	 * out at their normal power, so they are "up", others start out
3553	 * unknown, which is effectively "up".  Parent which want notification
3554	 * get kidsupcnt of 0 always.
3555	 */
3556	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3557	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3558		e_pm_hold_rele_power(pdip, count);
3559
3560	pm_set_pm_info(dip, info);
3561	/*
3562	 * Apply any recorded thresholds
3563	 */
3564	(void) pm_thresh_specd(dip);
3565
3566	/*
3567	 * Do dependency processing.
3568	 */
3569	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3570	(void) ddi_pathname(dip, pathbuf);
3571	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3572	    PM_DEP_NOWAIT, NULL, 0);
3573	kmem_free(pathbuf, MAXPATHLEN);
3574
3575	if (!PM_ISBC(dip)) {
3576		mutex_enter(&pm_scan_lock);
3577		if (PM_SCANABLE(dip)) {
3578			pm_scan_init(dip);
3579			mutex_exit(&pm_scan_lock);
3580			pm_rescan(dip);
3581		} else {
3582			mutex_exit(&pm_scan_lock);
3583		}
3584	}
3585	return (0);
3586}
3587
3588/*
3589 * This is the obsolete exported interface for a driver to find out its
3590 * "normal" (max) power.
3591 * We only get components destroyed while no power management is
3592 * going on (and the device is detached), so we don't need a mutex here
3593 */
3594int
3595pm_get_normal_power(dev_info_t *dip, int comp)
3596{
3597
3598	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3599		return (PM_CP(dip, comp)->pmc_norm_pwr);
3600	}
3601	return (DDI_FAILURE);
3602}
3603
3604/*
3605 * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3606 */
3607int
3608pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3609{
3610	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3611		*levelp = PM_CURPOWER(dip, comp);
3612		return (DDI_SUCCESS);
3613	}
3614	return (DDI_FAILURE);
3615}
3616
3617/*
3618 * Returns current threshold of indicated component
3619 */
3620static int
3621cur_threshold(dev_info_t *dip, int comp)
3622{
3623	pm_component_t *cp = PM_CP(dip, comp);
3624	int pwr;
3625
3626	if (PM_ISBC(dip)) {
3627		/*
3628		 * backwards compatible nodes only have one threshold
3629		 */
3630		return (cp->pmc_comp.pmc_thresh[1]);
3631	}
3632	pwr = cp->pmc_cur_pwr;
3633	if (pwr == PM_LEVEL_UNKNOWN) {
3634		int thresh;
3635		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3636			thresh = pm_default_nexus_threshold;
3637		else if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3638			thresh = pm_cpu_idle_threshold;
3639		else
3640			thresh = pm_system_idle_threshold;
3641		return (thresh);
3642	}
3643	ASSERT(cp->pmc_comp.pmc_thresh);
3644	return (cp->pmc_comp.pmc_thresh[pwr]);
3645}
3646
3647/*
3648 * Compute next lower component power level given power index.
3649 */
3650static int
3651pm_next_lower_power(pm_component_t *cp, int pwrndx)
3652{
3653	int nxt_pwr;
3654
3655	if (pwrndx == PM_LEVEL_UNKNOWN) {
3656		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3657	} else {
3658		pwrndx--;
3659		ASSERT(pwrndx >= 0);
3660		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3661	}
3662	return (nxt_pwr);
3663}
3664
3665/*
3666 * Update the maxpower (normal) power of a component. Note that the
3667 * component's power level is only changed if it's current power level
3668 * is higher than the new max power.
3669 */
3670int
3671pm_update_maxpower(dev_info_t *dip, int comp, int level)
3672{
3673	PMD_FUNC(pmf, "update_maxpower")
3674	int old;
3675	int result;
3676
3677	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
3678	    !e_pm_valid_power(dip, comp, level)) {
3679		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
3680		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3681		return (DDI_FAILURE);
3682	}
3683	old = e_pm_get_max_power(dip, comp);
3684	e_pm_set_max_power(dip, comp, level);
3685
3686	if (pm_set_power(dip, comp, level, PM_LEVEL_DOWNONLY,
3687	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
3688		e_pm_set_max_power(dip, comp, old);
3689		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) pm_set_power failed\n", pmf,
3690		    PM_DEVICE(dip)))
3691		return (DDI_FAILURE);
3692	}
3693	return (DDI_SUCCESS);
3694}
3695
3696/*
3697 * Bring all components of device to normal power
3698 */
3699int
3700pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3701{
3702	PMD_FUNC(pmf, "all_to_normal")
3703	int		*normal;
3704	int		i, ncomps, result;
3705	size_t		size;
3706	int		changefailed = 0;
3707
3708	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3709	ASSERT(PM_GET_PM_INFO(dip));
3710	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3711		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3712		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3713		return (DDI_FAILURE);
3714	}
3715	ncomps = PM_NUMCMPTS(dip);
3716	for (i = 0; i < ncomps; i++) {
3717		if (pm_set_power(dip, i, normal[i],
3718		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3719			changefailed++;
3720			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3721			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3722			    PM_DEVICE(dip), i, normal[i], result))
3723		}
3724	}
3725	kmem_free(normal, size);
3726	if (changefailed) {
3727		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3728		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3729		return (DDI_FAILURE);
3730	}
3731	return (DDI_SUCCESS);
3732}
3733
3734/*
3735 * Returns true if all components of device are at normal power
3736 */
3737int
3738pm_all_at_normal(dev_info_t *dip)
3739{
3740	PMD_FUNC(pmf, "all_at_normal")
3741	int		*normal;
3742	int		i;
3743	size_t		size;
3744
3745	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3746	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3747		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3748		return (DDI_FAILURE);
3749	}
3750	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3751		int current = PM_CURPOWER(dip, i);
3752		if (normal[i] > current) {
3753			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3754			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3755			    normal[i], current))
3756			break;
3757		}
3758	}
3759	kmem_free(normal, size);
3760	if (i != PM_NUMCMPTS(dip)) {
3761		return (0);
3762	}
3763	return (1);
3764}
3765
3766static void bring_pmdep_up(dev_info_t *, int);
3767
3768static void
3769bring_wekeeps_up(char *keeper)
3770{
3771	PMD_FUNC(pmf, "bring_wekeeps_up")
3772	int i;
3773	pm_pdr_t *dp;
3774	pm_info_t *wku_info;
3775	char *kept_path;
3776	dev_info_t *kept;
3777
3778	if (panicstr) {
3779		return;
3780	}
3781	/*
3782	 * We process the request even if the keeper detaches because
3783	 * detach processing expects this to increment kidsupcnt of kept.
3784	 */
3785	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3786	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3787		if (strcmp(dp->pdr_keeper, keeper) != 0)
3788			continue;
3789		for (i = 0; i < dp->pdr_kept_count; i++) {
3790			kept_path = dp->pdr_kept_paths[i];
3791			if (kept_path == NULL)
3792				continue;
3793			ASSERT(kept_path[0] != '\0');
3794			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3795				continue;
3796			wku_info = PM_GET_PM_INFO(kept);
3797			if (wku_info == NULL) {
3798				if (kept)
3799					ddi_release_devi(kept);
3800				continue;
3801			}
3802			/*
3803			 * Don't mess with it if it is being detached, it isn't
3804			 * safe to call its power entry point
3805			 */
3806			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3807				if (kept)
3808					ddi_release_devi(kept);
3809				continue;
3810			}
3811			bring_pmdep_up(kept, 1);
3812			ddi_release_devi(kept);
3813		}
3814	}
3815}
3816
3817/*
3818 * Bring up the 'kept' device passed as argument
3819 */
3820static void
3821bring_pmdep_up(dev_info_t *kept_dip, int hold)
3822{
3823	PMD_FUNC(pmf, "bring_pmdep_up")
3824	int is_all_at_normal = 0;
3825
3826	/*
3827	 * If the kept device has been unmanaged, do nothing.
3828	 */
3829	if (!PM_GET_PM_INFO(kept_dip))
3830		return;
3831
3832	/* Just ignore DIRECT PM device till they are released. */
3833	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3834	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3835		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3836		    "controlling process did something else\n", pmf,
3837		    PM_DEVICE(kept_dip)))
3838		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3839		return;
3840	}
3841	/* if we got here the keeper had a transition from OFF->ON */
3842	if (hold)
3843		pm_hold_power(kept_dip);
3844
3845	if (!is_all_at_normal)
3846		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3847}
3848
3849/*
3850 * A bunch of stuff that belongs only to the next routine (or two)
3851 */
3852
3853static const char namestr[] = "NAME=";
3854static const int nameln = sizeof (namestr) - 1;
3855static const char pmcompstr[] = "pm-components";
3856
3857struct pm_comp_pkg {
3858	pm_comp_t		*comp;
3859	struct pm_comp_pkg	*next;
3860};
3861
3862#define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3863
3864#define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3865			((ch) >= 'A' && (ch) <= 'F'))
3866
3867/*
3868 * Rather than duplicate this code ...
3869 * (this code excerpted from the function that follows it)
3870 */
3871#define	FINISH_COMP { \
3872	ASSERT(compp); \
3873	compp->pmc_lnames_sz = size; \
3874	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3875	compp->pmc_numlevels = level; \
3876	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3877	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3878	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3879	/* copy string out of prop array into buffer */ \
3880	for (j = 0; j < level; j++) { \
3881		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3882		compp->pmc_lvals[j] = lvals[j]; \
3883		(void) strcpy(tp, lnames[j]); \
3884		compp->pmc_lnames[j] = tp; \
3885		tp += lszs[j]; \
3886	} \
3887	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3888	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3889	}
3890
3891/*
3892 * Create (empty) component data structures.
3893 */
3894static void
3895e_pm_create_components(dev_info_t *dip, int num_components)
3896{
3897	struct pm_component *compp, *ocompp;
3898	int i, size = 0;
3899
3900	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3901	ASSERT(!DEVI(dip)->devi_pm_components);
3902	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3903	size = sizeof (struct pm_component) * num_components;
3904
3905	compp = kmem_zalloc(size, KM_SLEEP);
3906	ocompp = compp;
3907	DEVI(dip)->devi_pm_comp_size = size;
3908	DEVI(dip)->devi_pm_num_components = num_components;
3909	PM_LOCK_BUSY(dip);
3910	for (i = 0; i < num_components;  i++) {
3911		compp->pmc_timestamp = gethrestime_sec();
3912		compp->pmc_norm_pwr = (uint_t)-1;
3913		compp++;
3914	}
3915	PM_UNLOCK_BUSY(dip);
3916	DEVI(dip)->devi_pm_components = ocompp;
3917	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3918}
3919
3920/*
3921 * Parse hex or decimal value from char string
3922 */
3923static char *
3924pm_parsenum(char *cp, int *valp)
3925{
3926	int ch, offset;
3927	char numbuf[256];
3928	char *np = numbuf;
3929	int value = 0;
3930
3931	ch = *cp++;
3932	if (isdigit(ch)) {
3933		if (ch == '0') {
3934			if ((ch = *cp++) == 'x' || ch == 'X') {
3935				ch = *cp++;
3936				while (isxdigit(ch)) {
3937					*np++ = (char)ch;
3938					ch = *cp++;
3939				}
3940				*np = 0;
3941				cp--;
3942				goto hexval;
3943			} else {
3944				goto digit;
3945			}
3946		} else {
3947digit:
3948			while (isdigit(ch)) {
3949				*np++ = (char)ch;
3950				ch = *cp++;
3951			}
3952			*np = 0;
3953			cp--;
3954			goto decval;
3955		}
3956	} else
3957		return (NULL);
3958
3959hexval:
3960	offset = 0;
3961	for (np = numbuf; *np; np++) {
3962		if (*np >= 'a' && *np <= 'f')
3963			offset = 'a' - 10;
3964		else if (*np >= 'A' && *np <= 'F')
3965			offset = 'A' - 10;
3966		else if (*np >= '0' && *np <= '9')
3967			offset = '0';
3968		value *= 16;
3969		value += *np - offset;
3970	}
3971	*valp = value;
3972	return (cp);
3973
3974decval:
3975	offset = '0';
3976	for (np = numbuf; *np; np++) {
3977		value *= 10;
3978		value += *np - offset;
3979	}
3980	*valp = value;
3981	return (cp);
3982}
3983
3984/*
3985 * Set max (previously documented as "normal") power.
3986 */
3987static void
3988e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3989{
3990	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3991}
3992
3993/*
3994 * Get max (previously documented as "normal") power.
3995 */
3996static int
3997e_pm_get_max_power(dev_info_t *dip, int component_number)
3998{
3999	return (PM_CP(dip, component_number)->pmc_norm_pwr);
4000}
4001
4002/*
4003 * Internal routine for destroying components
4004 * It is called even when there might not be any, so it must be forgiving.
4005 */
4006static void
4007e_pm_destroy_components(dev_info_t *dip)
4008{
4009	int i;
4010	struct pm_component *cp;
4011
4012	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4013	if (PM_NUMCMPTS(dip) == 0)
4014		return;
4015	cp = DEVI(dip)->devi_pm_components;
4016	ASSERT(cp);
4017	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
4018		int nlevels = cp->pmc_comp.pmc_numlevels;
4019		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
4020		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
4021		/*
4022		 * For BC nodes, the rest is static in bc_comp, so skip it
4023		 */
4024		if (PM_ISBC(dip))
4025			continue;
4026		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
4027		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
4028		kmem_free(cp->pmc_comp.pmc_lname_buf,
4029		    cp->pmc_comp.pmc_lnames_sz);
4030	}
4031	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
4032	DEVI(dip)->devi_pm_components = NULL;
4033	DEVI(dip)->devi_pm_num_components = 0;
4034	DEVI(dip)->devi_pm_flags &=
4035	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4036}
4037
4038/*
4039 * Read the pm-components property (if there is one) and use it to set up
4040 * components.  Returns a pointer to an array of component structures if
4041 * pm-components found and successfully parsed, else returns NULL.
4042 * Sets error return *errp to true to indicate a failure (as opposed to no
4043 * property being present).
4044 */
4045pm_comp_t *
4046pm_autoconfig(dev_info_t *dip, int *errp)
4047{
4048	PMD_FUNC(pmf, "autoconfig")
4049	uint_t nelems;
4050	char **pp;
4051	pm_comp_t *compp = NULL;
4052	int i, j, level, components = 0;
4053	size_t size = 0;
4054	struct pm_comp_pkg *p, *ptail;
4055	struct pm_comp_pkg *phead = NULL;
4056	int *lvals = NULL;
4057	int *lszs = NULL;
4058	int *np = NULL;
4059	int npi = 0;
4060	char **lnames = NULL;
4061	char *cp, *tp;
4062	pm_comp_t *ret = NULL;
4063
4064	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4065	*errp = 0;	/* assume success */
4066	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4067	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
4068		return (NULL);
4069	}
4070
4071	if (nelems < 3) {	/* need at least one name and two levels */
4072		goto errout;
4073	}
4074
4075	/*
4076	 * pm_create_components is no longer allowed
4077	 */
4078	if (PM_NUMCMPTS(dip) != 0) {
4079		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
4080		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4081		goto errout;
4082	}
4083
4084	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4085	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4086	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
4087	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4088
4089	level = 0;
4090	phead = NULL;
4091	for (i = 0; i < nelems; i++) {
4092		cp = pp[i];
4093		if (!isdigit(*cp)) {	/*  must be name */
4094			if (strncmp(cp, namestr, nameln) != 0) {
4095				goto errout;
4096			}
4097			if (i != 0) {
4098				if (level == 0) {	/* no level spec'd */
4099					PMD(PMD_ERROR, ("%s: no level spec'd\n",
4100					    pmf))
4101					goto errout;
4102				}
4103				np[npi++] = lvals[level - 1];
4104				/* finish up previous component levels */
4105				FINISH_COMP;
4106			}
4107			cp += nameln;
4108			if (!*cp) {
4109				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
4110				goto errout;
4111			}
4112			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
4113			if (phead == NULL) {
4114				phead = ptail = p;
4115			} else {
4116				ptail->next = p;
4117				ptail = p;
4118			}
4119			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
4120			    KM_SLEEP);
4121			compp->pmc_name_sz = strlen(cp) + 1;
4122			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
4123			    KM_SLEEP);
4124			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
4125			components++;
4126			level = 0;
4127		} else {	/* better be power level <num>=<name> */
4128#ifdef DEBUG
4129			tp = cp;
4130#endif
4131			if (i == 0 ||
4132			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
4133				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
4134				goto errout;
4135			}
4136#ifdef DEBUG
4137			tp = cp;
4138#endif
4139			if (*cp++ != '=' || !*cp) {
4140				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4141				goto errout;
4142			}
4143
4144			lszs[level] = strlen(cp) + 1;
4145			size += lszs[level];
4146			lnames[level] = cp;	/* points into prop string */
4147			level++;
4148		}
4149	}
4150	np[npi++] = lvals[level - 1];
4151	if (level == 0) {	/* ended with a name */
4152		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4153		goto errout;
4154	}
4155	FINISH_COMP;
4156
4157
4158	/*
4159	 * Now we have a list of components--we have to return instead an
4160	 * array of them, but we can just copy the top level and leave
4161	 * the rest as is
4162	 */
4163	(void) e_pm_create_components(dip, components);
4164	for (i = 0; i < components; i++)
4165		e_pm_set_max_power(dip, i, np[i]);
4166
4167	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4168	for (i = 0, p = phead; i < components; i++) {
4169		ASSERT(p);
4170		/*
4171		 * Now sanity-check values:  levels must be monotonically
4172		 * increasing
4173		 */
4174		if (p->comp->pmc_numlevels < 2) {
4175			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4176			    "levels\n", pmf,
4177			    p->comp->pmc_name, PM_DEVICE(dip),
4178			    p->comp->pmc_numlevels))
4179			goto errout;
4180		}
4181		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4182			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4183			    (p->comp->pmc_lvals[j] <=
4184			    p->comp->pmc_lvals[j - 1]))) {
4185				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4186				    "not mono. incr, %d follows %d\n", pmf,
4187				    p->comp->pmc_name, PM_DEVICE(dip),
4188				    p->comp->pmc_lvals[j],
4189				    p->comp->pmc_lvals[j - 1]))
4190				goto errout;
4191			}
4192		}
4193		ret[i] = *p->comp;	/* struct assignment */
4194		for (j = 0; j < i; j++) {
4195			/*
4196			 * Test for unique component names
4197			 */
4198			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4199				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4200				    "unique\n", pmf, ret[j].pmc_name,
4201				    PM_DEVICE(dip)))
4202				goto errout;
4203			}
4204		}
4205		ptail = p;
4206		p = p->next;
4207		phead = p;	/* errout depends on phead making sense */
4208		kmem_free(ptail->comp, sizeof (*ptail->comp));
4209		kmem_free(ptail, sizeof (*ptail));
4210	}
4211out:
4212	ddi_prop_free(pp);
4213	if (lvals)
4214		kmem_free(lvals, nelems * sizeof (int));
4215	if (lszs)
4216		kmem_free(lszs, nelems * sizeof (int));
4217	if (lnames)
4218		kmem_free(lnames, nelems * sizeof (char *));
4219	if (np)
4220		kmem_free(np, nelems * sizeof (int));
4221	return (ret);
4222
4223errout:
4224	e_pm_destroy_components(dip);
4225	*errp = 1;	/* signal failure */
4226	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4227	for (i = 0; i < nelems - 1; i++)
4228		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4229	if (nelems != 0)
4230		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4231	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4232	for (p = phead; p; ) {
4233		pm_comp_t *pp;
4234		int n;
4235
4236		ptail = p;
4237		/*
4238		 * Free component data structures
4239		 */
4240		pp = p->comp;
4241		n = pp->pmc_numlevels;
4242		if (pp->pmc_name_sz) {
4243			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4244		}
4245		if (pp->pmc_lnames_sz) {
4246			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4247		}
4248		if (pp->pmc_lnames) {
4249			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4250		}
4251		if (pp->pmc_thresh) {
4252			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4253		}
4254		if (pp->pmc_lvals) {
4255			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4256		}
4257		p = ptail->next;
4258		kmem_free(ptail, sizeof (*ptail));
4259	}
4260	if (ret != NULL)
4261		kmem_free(ret, components * sizeof (pm_comp_t));
4262	ret = NULL;
4263	goto out;
4264}
4265
4266/*
4267 * Set threshold values for a devices components by dividing the target
4268 * threshold (base) by the number of transitions and assign each transition
4269 * that threshold.  This will get the entire device down in the target time if
4270 * all components are idle and even if there are dependencies among components.
4271 *
4272 * Devices may well get powered all the way down before the target time, but
4273 * at least the EPA will be happy.
4274 */
4275void
4276pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4277{
4278	PMD_FUNC(pmf, "set_device_threshold")
4279	int target_threshold = (base * 95) / 100;
4280	int level, comp;		/* loop counters */
4281	int transitions = 0;
4282	int ncomp = PM_NUMCMPTS(dip);
4283	int thresh;
4284	int remainder;
4285	pm_comp_t *pmc;
4286	int i, circ;
4287
4288	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4289	PM_LOCK_DIP(dip);
4290	/*
4291	 * First we handle the easy one.  If we're setting the default
4292	 * threshold for a node with children, then we set it to the
4293	 * default nexus threshold (currently 0) and mark it as default
4294	 * nexus threshold instead
4295	 */
4296	if (PM_IS_NEXUS(dip)) {
4297		if (flag == PMC_DEF_THRESH) {
4298			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4299			    PM_DEVICE(dip)))
4300			thresh = pm_default_nexus_threshold;
4301			for (comp = 0; comp < ncomp; comp++) {
4302				pmc = &PM_CP(dip, comp)->pmc_comp;
4303				for (level = 1; level < pmc->pmc_numlevels;
4304				    level++) {
4305					pmc->pmc_thresh[level] = thresh;
4306				}
4307			}
4308			DEVI(dip)->devi_pm_dev_thresh =
4309			    pm_default_nexus_threshold;
4310			/*
4311			 * If the nexus node is being reconfigured back to
4312			 * the default threshold, adjust the notlowest count.
4313			 */
4314			if (DEVI(dip)->devi_pm_flags &
4315			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4316				PM_LOCK_POWER(dip, &circ);
4317				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4318					if (PM_CURPOWER(dip, i) == 0)
4319						continue;
4320					mutex_enter(&pm_compcnt_lock);
4321					ASSERT(pm_comps_notlowest);
4322					pm_comps_notlowest--;
4323					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4324					    "notlowest to %d\n", pmf,
4325					    PM_DEVICE(dip), pm_comps_notlowest))
4326					if (pm_comps_notlowest == 0)
4327						pm_ppm_notify_all_lowest(dip,
4328						    PM_ALL_LOWEST);
4329					mutex_exit(&pm_compcnt_lock);
4330				}
4331				PM_UNLOCK_POWER(dip, circ);
4332			}
4333			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4334			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4335			PM_UNLOCK_DIP(dip);
4336			return;
4337		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4338			/*
4339			 * If the nexus node is being configured for a
4340			 * non-default threshold, include that node in
4341			 * the notlowest accounting.
4342			 */
4343			PM_LOCK_POWER(dip, &circ);
4344			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4345				if (PM_CURPOWER(dip, i) == 0)
4346					continue;
4347				mutex_enter(&pm_compcnt_lock);
4348				if (pm_comps_notlowest == 0)
4349					pm_ppm_notify_all_lowest(dip,
4350					    PM_NOT_ALL_LOWEST);
4351				pm_comps_notlowest++;
4352				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4353				    "notlowest to %d\n", pmf,
4354				    PM_DEVICE(dip), pm_comps_notlowest))
4355				mutex_exit(&pm_compcnt_lock);
4356			}
4357			PM_UNLOCK_POWER(dip, circ);
4358		}
4359	}
4360	/*
4361	 * Compute the total number of transitions for all components
4362	 * of the device.  Distribute the threshold evenly over them
4363	 */
4364	for (comp = 0; comp < ncomp; comp++) {
4365		pmc = &PM_CP(dip, comp)->pmc_comp;
4366		ASSERT(pmc->pmc_numlevels > 1);
4367		transitions += pmc->pmc_numlevels - 1;
4368	}
4369	ASSERT(transitions);
4370	thresh = target_threshold / transitions;
4371
4372	for (comp = 0; comp < ncomp; comp++) {
4373		pmc = &PM_CP(dip, comp)->pmc_comp;
4374		for (level = 1; level < pmc->pmc_numlevels; level++) {
4375			pmc->pmc_thresh[level] = thresh;
4376		}
4377	}
4378
4379#ifdef DEBUG
4380	for (comp = 0; comp < ncomp; comp++) {
4381		pmc = &PM_CP(dip, comp)->pmc_comp;
4382		for (level = 1; level < pmc->pmc_numlevels; level++) {
4383			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4384			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4385			    comp, level, pmc->pmc_thresh[level]))
4386		}
4387	}
4388#endif
4389	/*
4390	 * Distribute any remainder till they are all gone
4391	 */
4392	remainder = target_threshold - thresh * transitions;
4393	level = 1;
4394#ifdef DEBUG
4395	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4396	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4397	    transitions))
4398#endif
4399	while (remainder > 0) {
4400		comp = 0;
4401		while (remainder && (comp < ncomp)) {
4402			pmc = &PM_CP(dip, comp)->pmc_comp;
4403			if (level < pmc->pmc_numlevels) {
4404				pmc->pmc_thresh[level] += 1;
4405				remainder--;
4406			}
4407			comp++;
4408		}
4409		level++;
4410	}
4411#ifdef DEBUG
4412	for (comp = 0; comp < ncomp; comp++) {
4413		pmc = &PM_CP(dip, comp)->pmc_comp;
4414		for (level = 1; level < pmc->pmc_numlevels; level++) {
4415			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4416			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4417			    comp, level, pmc->pmc_thresh[level]))
4418		}
4419	}
4420#endif
4421	ASSERT(PM_IAM_LOCKING_DIP(dip));
4422	DEVI(dip)->devi_pm_dev_thresh = base;
4423	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4424	DEVI(dip)->devi_pm_flags |= flag;
4425	PM_UNLOCK_DIP(dip);
4426}
4427
4428/*
4429 * Called when there is no old-style platform power management driver
4430 */
4431static int
4432ddi_no_platform_power(power_req_t *req)
4433{
4434	_NOTE(ARGUNUSED(req))
4435	return (DDI_FAILURE);
4436}
4437
4438/*
4439 * This function calls the entry point supplied by the platform-specific
4440 * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4441 * The use of global for getting the  function name from platform-specific
4442 * pm driver is not ideal, but it is simple and efficient.
4443 * The previous property lookup was being done in the idle loop on swift
4444 * systems without pmc chips and hurt deskbench performance as well as
4445 * violating scheduler locking rules
4446 */
4447int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4448
4449/*
4450 * Old obsolete interface for a device to request a power change (but only
4451 * an increase in power)
4452 */
4453int
4454ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4455{
4456	return (pm_raise_power(dip, cmpt, level));
4457}
4458
4459/*
4460 * The old obsolete interface to platform power management.  Only used by
4461 * Gypsy platform and APM on X86.
4462 */
4463int
4464ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4465{
4466	power_req_t	request;
4467
4468	request.request_type = PMR_SET_POWER;
4469	request.req.set_power_req.who = dip;
4470	request.req.set_power_req.cmpt = pm_cmpt;
4471	request.req.set_power_req.level = pm_level;
4472	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4473}
4474
4475/*
4476 * A driver can invoke this from its detach routine when DDI_SUSPEND is
4477 * passed.  Returns true if subsequent processing could result in power being
4478 * removed from the device.  The arg is not currently used because it is
4479 * implicit in the operation of cpr/DR.
4480 */
4481int
4482ddi_removing_power(dev_info_t *dip)
4483{
4484	_NOTE(ARGUNUSED(dip))
4485	return (pm_powering_down);
4486}
4487
4488/*
4489 * Returns true if a device indicates that its parent handles suspend/resume
4490 * processing for it.
4491 */
4492int
4493e_ddi_parental_suspend_resume(dev_info_t *dip)
4494{
4495	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4496}
4497
4498/*
4499 * Called for devices which indicate that their parent does suspend/resume
4500 * handling for them
4501 */
4502int
4503e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4504{
4505	power_req_t	request;
4506	request.request_type = PMR_SUSPEND;
4507	request.req.suspend_req.who = dip;
4508	request.req.suspend_req.cmd = cmd;
4509	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4510}
4511
4512/*
4513 * Called for devices which indicate that their parent does suspend/resume
4514 * handling for them
4515 */
4516int
4517e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4518{
4519	power_req_t	request;
4520	request.request_type = PMR_RESUME;
4521	request.req.resume_req.who = dip;
4522	request.req.resume_req.cmd = cmd;
4523	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4524}
4525
4526/*
4527 * Old obsolete exported interface for drivers to create components.
4528 * This is now handled by exporting the pm-components property.
4529 */
4530int
4531pm_create_components(dev_info_t *dip, int num_components)
4532{
4533	PMD_FUNC(pmf, "pm_create_components")
4534
4535	if (num_components < 1)
4536		return (DDI_FAILURE);
4537
4538	if (!DEVI_IS_ATTACHING(dip)) {
4539		return (DDI_FAILURE);
4540	}
4541
4542	/* don't need to lock dip because attach is single threaded */
4543	if (DEVI(dip)->devi_pm_components) {
4544		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4545		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4546		return (DDI_FAILURE);
4547	}
4548	e_pm_create_components(dip, num_components);
4549	DEVI(dip)->devi_pm_flags |= PMC_BC;
4550	e_pm_default_components(dip, num_components);
4551	return (DDI_SUCCESS);
4552}
4553
4554/*
4555 * Obsolete interface previously called by drivers to destroy their components
4556 * at detach time.  This is now done automatically.  However, we need to keep
4557 * this for the old drivers.
4558 */
4559void
4560pm_destroy_components(dev_info_t *dip)
4561{
4562	PMD_FUNC(pmf, "pm_destroy_components")
4563	dev_info_t *pdip = ddi_get_parent(dip);
4564
4565	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4566	    PM_DEVICE(dip)))
4567	ASSERT(DEVI_IS_DETACHING(dip));
4568#ifdef DEBUG
4569	if (!PM_ISBC(dip))
4570		cmn_err(CE_WARN, "!driver exporting pm-components property "
4571		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4572		    PM_ADDR(dip));
4573#endif
4574	/*
4575	 * We ignore this unless this is an old-style driver, except for
4576	 * printing the message above
4577	 */
4578	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4579		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4580		    PM_DEVICE(dip)))
4581		return;
4582	}
4583	ASSERT(PM_GET_PM_INFO(dip));
4584
4585	/*
4586	 * pm_unmanage will clear info pointer later, after dealing with
4587	 * dependencies
4588	 */
4589	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4590	/*
4591	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4592	 * Parents that get notification are not adjusted because their
4593	 * kidsupcnt is always 0 (or 1 during probe and attach).
4594	 */
4595	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4596		pm_rele_power(pdip);
4597#ifdef DEBUG
4598	else {
4599		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4600		    pmf, PM_DEVICE(dip)))
4601	}
4602#endif
4603	e_pm_destroy_components(dip);
4604	/*
4605	 * Forget we ever knew anything about the components of this  device
4606	 */
4607	DEVI(dip)->devi_pm_flags &=
4608	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4609}
4610
4611/*
4612 * Exported interface for a driver to set a component busy.
4613 */
4614int
4615pm_busy_component(dev_info_t *dip, int cmpt)
4616{
4617	struct pm_component *cp;
4618
4619	ASSERT(dip != NULL);
4620	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4621		return (DDI_FAILURE);
4622	PM_LOCK_BUSY(dip);
4623	cp->pmc_busycount++;
4624	cp->pmc_timestamp = 0;
4625	PM_UNLOCK_BUSY(dip);
4626	return (DDI_SUCCESS);
4627}
4628
4629/*
4630 * Exported interface for a driver to set a component idle.
4631 */
4632int
4633pm_idle_component(dev_info_t *dip, int cmpt)
4634{
4635	PMD_FUNC(pmf, "pm_idle_component")
4636	struct pm_component *cp;
4637	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4638
4639	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4640		return (DDI_FAILURE);
4641
4642	PM_LOCK_BUSY(dip);
4643	if (cp->pmc_busycount) {
4644		if (--(cp->pmc_busycount) == 0)
4645			cp->pmc_timestamp = gethrestime_sec();
4646	} else {
4647		cp->pmc_timestamp = gethrestime_sec();
4648	}
4649
4650	PM_UNLOCK_BUSY(dip);
4651
4652	/*
4653	 * if device becomes idle during idle down period, try scan it down
4654	 */
4655	if (scanp && PM_IS_PID(dip)) {
4656		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4657		    PM_DEVICE(dip)))
4658		pm_rescan(dip);
4659		return (DDI_SUCCESS);
4660	}
4661
4662	/*
4663	 * handle scan not running with nexus threshold == 0
4664	 */
4665
4666	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4667		pm_rescan(dip);
4668	}
4669
4670	return (DDI_SUCCESS);
4671}
4672
4673/*
4674 * This is the old  obsolete interface called by drivers to set their normal
4675 * power.  Thus we can't fix its behavior or return a value.
4676 * This functionality is replaced by the pm-component property.
4677 * We'll only get components destroyed while no power management is
4678 * going on (and the device is detached), so we don't need a mutex here
4679 */
4680void
4681pm_set_normal_power(dev_info_t *dip, int comp, int level)
4682{
4683	PMD_FUNC(pmf, "set_normal_power")
4684#ifdef DEBUG
4685	if (!PM_ISBC(dip))
4686		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4687		    "(driver exporting pm-components property) ignored",
4688		    PM_NAME(dip), PM_ADDR(dip));
4689#endif
4690	if (PM_ISBC(dip)) {
4691		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4692		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4693		e_pm_set_max_power(dip, comp, level);
4694		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4695	}
4696}
4697
4698/*
4699 * Called on a successfully detached driver to free pm resources
4700 */
4701static void
4702pm_stop(dev_info_t *dip)
4703{
4704	PMD_FUNC(pmf, "stop")
4705	dev_info_t *pdip = ddi_get_parent(dip);
4706
4707	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4708	/* stopping scan, destroy scan data structure */
4709	if (!PM_ISBC(dip)) {
4710		pm_scan_stop(dip);
4711		pm_scan_fini(dip);
4712	}
4713
4714	if (PM_GET_PM_INFO(dip) != NULL) {
4715		if (pm_unmanage(dip) == DDI_SUCCESS) {
4716			/*
4717			 * Old style driver may have called
4718			 * pm_destroy_components already, but just in case ...
4719			 */
4720			e_pm_destroy_components(dip);
4721		} else {
4722			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4723			    pmf, PM_DEVICE(dip)))
4724		}
4725	} else {
4726		if (PM_NUMCMPTS(dip))
4727			e_pm_destroy_components(dip);
4728		else {
4729			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4730				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4731				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4732					pm_rele_power(pdip);
4733				} else if (pdip &&
4734				    MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4735					(void) mdi_power(pdip,
4736					    MDI_PM_RELE_POWER,
4737					    (void *)dip, NULL, 0);
4738				}
4739			}
4740		}
4741	}
4742}
4743
4744/*
4745 * The node is the subject of a reparse pm props ioctl. Throw away the old
4746 * info and start over.
4747 */
4748int
4749e_new_pm_props(dev_info_t *dip)
4750{
4751	if (PM_GET_PM_INFO(dip) != NULL) {
4752		pm_stop(dip);
4753
4754		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4755			return (DDI_FAILURE);
4756		}
4757	}
4758	e_pm_props(dip);
4759	return (DDI_SUCCESS);
4760}
4761
4762/*
4763 * Device has been attached, so process its pm properties
4764 */
4765void
4766e_pm_props(dev_info_t *dip)
4767{
4768	char *pp;
4769	int len;
4770	int flags = 0;
4771	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4772
4773	/*
4774	 * It doesn't matter if we do this more than once, we should always
4775	 * get the same answers, and if not, then the last one in is the
4776	 * best one.
4777	 */
4778	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4779	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4780		if (strcmp(pp, "needs-suspend-resume") == 0) {
4781			flags = PMC_NEEDS_SR;
4782		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4783			flags = PMC_NO_SR;
4784		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4785			flags = PMC_PARENTAL_SR;
4786		} else {
4787			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4788			    "%s property value '%s'", PM_NAME(dip),
4789			    PM_ADDR(dip), "pm-hardware-state", pp);
4790		}
4791		kmem_free(pp, len);
4792	}
4793	/*
4794	 * This next segment (PMC_WANTS_NOTIFY) is in
4795	 * support of nexus drivers which will want to be involved in
4796	 * (or at least notified of) their child node's power level transitions.
4797	 * "pm-want-child-notification?" is defined by the parent.
4798	 */
4799	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4800	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4801		flags |= PMC_WANTS_NOTIFY;
4802	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4803	    dip, propflag, "pm-want-child-notification?"));
4804	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4805	    "no-involuntary-power-cycles"))
4806		flags |= PMC_NO_INVOL;
4807	/*
4808	 * Is the device a CPU device?
4809	 */
4810	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-class",
4811	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4812		if (strcmp(pp, "CPU") == 0) {
4813			flags |= PMC_CPU_DEVICE;
4814		} else {
4815			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4816			    "%s property value '%s'", PM_NAME(dip),
4817			    PM_ADDR(dip), "pm-class", pp);
4818		}
4819		kmem_free(pp, len);
4820	}
4821	/* devfs single threads us */
4822	DEVI(dip)->devi_pm_flags |= flags;
4823}
4824
4825/*
4826 * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4827 * driver which has claimed a node.
4828 * Sets old_power in arg struct.
4829 */
4830static int
4831pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4832    ddi_ctl_enum_t ctlop, void *arg, void *result)
4833{
4834	_NOTE(ARGUNUSED(dip))
4835	PMD_FUNC(pmf, "ctlops")
4836	power_req_t *reqp = (power_req_t *)arg;
4837	int retval;
4838	dev_info_t *target_dip;
4839	int new_level, old_level, cmpt;
4840#ifdef PMDDEBUG
4841	char *format;
4842#endif
4843
4844	/*
4845	 * The interface for doing the actual power level changes is now
4846	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4847	 * different platform-specific power control drivers.
4848	 *
4849	 * This driver implements the "default" version of this interface.
4850	 * If no ppm driver has been installed then this interface is called
4851	 * instead.
4852	 */
4853	ASSERT(dip == NULL);
4854	switch (ctlop) {
4855	case DDI_CTLOPS_POWER:
4856		switch (reqp->request_type) {
4857		case PMR_PPM_SET_POWER:
4858		{
4859			target_dip = reqp->req.ppm_set_power_req.who;
4860			ASSERT(target_dip == rdip);
4861			new_level = reqp->req.ppm_set_power_req.new_level;
4862			cmpt = reqp->req.ppm_set_power_req.cmpt;
4863			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4864			old_level = PM_CURPOWER(target_dip, cmpt);
4865			reqp->req.ppm_set_power_req.old_level = old_level;
4866			retval = pm_power(target_dip, cmpt, new_level);
4867			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4868			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4869			    old_level, new_level, (retval == DDI_SUCCESS ?
4870			    "chd" : "no chg")))
4871			return (retval);
4872		}
4873
4874		case PMR_PPM_PRE_DETACH:
4875		case PMR_PPM_POST_DETACH:
4876		case PMR_PPM_PRE_ATTACH:
4877		case PMR_PPM_POST_ATTACH:
4878		case PMR_PPM_PRE_PROBE:
4879		case PMR_PPM_POST_PROBE:
4880		case PMR_PPM_PRE_RESUME:
4881		case PMR_PPM_INIT_CHILD:
4882		case PMR_PPM_UNINIT_CHILD:
4883#ifdef PMDDEBUG
4884			switch (reqp->request_type) {
4885				case PMR_PPM_PRE_DETACH:
4886					format = "%s: PMR_PPM_PRE_DETACH "
4887					    "%s@%s(%s#%d)\n";
4888					break;
4889				case PMR_PPM_POST_DETACH:
4890					format = "%s: PMR_PPM_POST_DETACH "
4891					    "%s@%s(%s#%d) rets %d\n";
4892					break;
4893				case PMR_PPM_PRE_ATTACH:
4894					format = "%s: PMR_PPM_PRE_ATTACH "
4895					    "%s@%s(%s#%d)\n";
4896					break;
4897				case PMR_PPM_POST_ATTACH:
4898					format = "%s: PMR_PPM_POST_ATTACH "
4899					    "%s@%s(%s#%d) rets %d\n";
4900					break;
4901				case PMR_PPM_PRE_PROBE:
4902					format = "%s: PMR_PPM_PRE_PROBE "
4903					    "%s@%s(%s#%d)\n";
4904					break;
4905				case PMR_PPM_POST_PROBE:
4906					format = "%s: PMR_PPM_POST_PROBE "
4907					    "%s@%s(%s#%d) rets %d\n";
4908					break;
4909				case PMR_PPM_PRE_RESUME:
4910					format = "%s: PMR_PPM_PRE_RESUME "
4911					    "%s@%s(%s#%d) rets %d\n";
4912					break;
4913				case PMR_PPM_INIT_CHILD:
4914					format = "%s: PMR_PPM_INIT_CHILD "
4915					    "%s@%s(%s#%d)\n";
4916					break;
4917				case PMR_PPM_UNINIT_CHILD:
4918					format = "%s: PMR_PPM_UNINIT_CHILD "
4919					    "%s@%s(%s#%d)\n";
4920					break;
4921				default:
4922					break;
4923			}
4924			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4925			    reqp->req.ppm_config_req.result))
4926#endif
4927			return (DDI_SUCCESS);
4928
4929		case PMR_PPM_POWER_CHANGE_NOTIFY:
4930			/*
4931			 * Nothing for us to do
4932			 */
4933			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4934			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4935			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4936			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4937			    reqp->req.ppm_notify_level_req.cmpt,
4938			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4939			    reqp->req.ppm_notify_level_req.cmpt),
4940			    reqp->req.ppm_notify_level_req.new_level))
4941			return (DDI_SUCCESS);
4942
4943		case PMR_PPM_UNMANAGE:
4944			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4945			    pmf, PM_DEVICE(rdip)))
4946			return (DDI_SUCCESS);
4947
4948		case PMR_PPM_LOCK_POWER:
4949			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4950			    reqp->req.ppm_lock_power_req.circp);
4951			return (DDI_SUCCESS);
4952
4953		case PMR_PPM_UNLOCK_POWER:
4954			pm_unlock_power_single(
4955			    reqp->req.ppm_unlock_power_req.who,
4956			    reqp->req.ppm_unlock_power_req.circ);
4957			return (DDI_SUCCESS);
4958
4959		case PMR_PPM_TRY_LOCK_POWER:
4960			*(int *)result = pm_try_locking_power_single(
4961			    reqp->req.ppm_lock_power_req.who,
4962			    reqp->req.ppm_lock_power_req.circp);
4963			return (DDI_SUCCESS);
4964
4965		case PMR_PPM_POWER_LOCK_OWNER:
4966			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4967			ASSERT(target_dip == rdip);
4968			reqp->req.ppm_power_lock_owner_req.owner =
4969			    DEVI(rdip)->devi_busy_thread;
4970			return (DDI_SUCCESS);
4971		default:
4972			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4973			return (DDI_FAILURE);
4974		}
4975
4976	default:
4977		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4978		return (DDI_FAILURE);
4979	}
4980}
4981
4982/*
4983 * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4984 * power_ops struct for this functionality instead?
4985 * However, we only ever do this on a ppm driver.
4986 */
4987int
4988pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4989{
4990	int (*fp)();
4991
4992	/* if no ppm handler, call the default routine */
4993	if (d == NULL) {
4994		return (pm_default_ctlops(d, r, op, a, v));
4995	}
4996	if (!d || !r)
4997		return (DDI_FAILURE);
4998	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
4999	    DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
5000
5001	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
5002	return ((*fp)(d, r, op, a, v));
5003}
5004
5005/*
5006 * Called on a node when attach completes or the driver makes its first pm
5007 * call (whichever comes first).
5008 * In the attach case, device may not be power manageable at all.
5009 * Don't need to lock the dip because we're single threaded by the devfs code
5010 */
5011static int
5012pm_start(dev_info_t *dip)
5013{
5014	PMD_FUNC(pmf, "start")
5015	int ret;
5016	dev_info_t *pdip = ddi_get_parent(dip);
5017	int e_pm_manage(dev_info_t *, int);
5018	void pm_noinvol_specd(dev_info_t *dip);
5019
5020	e_pm_props(dip);
5021	pm_noinvol_specd(dip);
5022	/*
5023	 * If this dip has already been processed, don't mess with it
5024	 * (but decrement the speculative count we did above, as whatever
5025	 * code put it under pm already will have dealt with it)
5026	 */
5027	if (PM_GET_PM_INFO(dip)) {
5028		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
5029		    pmf, PM_DEVICE(dip)))
5030		return (0);
5031	}
5032	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
5033
5034	if (PM_GET_PM_INFO(dip) == NULL) {
5035		/*
5036		 * keep the kidsupcount increment as is
5037		 */
5038		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
5039		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
5040			pm_hold_power(pdip);
5041		} else if (pdip && MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
5042			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
5043			    (void *)dip, NULL, 0);
5044		}
5045
5046		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
5047		    "left up\n", pmf, PM_DEVICE(dip)))
5048	}
5049
5050	return (ret);
5051}
5052
5053/*
5054 * Keep a list of recorded thresholds.  For now we just keep a list and
5055 * search it linearly.  We don't expect too many entries.  Can always hash it
5056 * later if we need to.
5057 */
5058void
5059pm_record_thresh(pm_thresh_rec_t *rp)
5060{
5061	pm_thresh_rec_t *pptr, *ptr;
5062
5063	ASSERT(*rp->ptr_physpath);
5064	rw_enter(&pm_thresh_rwlock, RW_WRITER);
5065	for (pptr = NULL, ptr = pm_thresh_head;
5066	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
5067		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
5068			/* replace this one */
5069			rp->ptr_next = ptr->ptr_next;
5070			if (pptr) {
5071				pptr->ptr_next = rp;
5072			} else {
5073				pm_thresh_head = rp;
5074			}
5075			rw_exit(&pm_thresh_rwlock);
5076			kmem_free(ptr, ptr->ptr_size);
5077			return;
5078		}
5079		continue;
5080	}
5081	/*
5082	 * There was not a match in the list, insert this one in front
5083	 */
5084	if (pm_thresh_head) {
5085		rp->ptr_next = pm_thresh_head;
5086		pm_thresh_head = rp;
5087	} else {
5088		rp->ptr_next = NULL;
5089		pm_thresh_head = rp;
5090	}
5091	rw_exit(&pm_thresh_rwlock);
5092}
5093
5094/*
5095 * Create a new dependency record and hang a new dependency entry off of it
5096 */
5097pm_pdr_t *
5098newpdr(char *kept, char *keeps, int isprop)
5099{
5100	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
5101	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
5102	p->pdr_size = size;
5103	p->pdr_isprop = isprop;
5104	p->pdr_kept_paths = NULL;
5105	p->pdr_kept_count = 0;
5106	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
5107	(void) strcpy(p->pdr_kept, kept);
5108	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
5109	(void) strcpy(p->pdr_keeper, keeps);
5110	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
5111	    (intptr_t)p + size);
5112	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
5113	    (intptr_t)p + size);
5114	return (p);
5115}
5116
5117/*
5118 * Keep a list of recorded dependencies.  We only keep the
5119 * keeper -> kept list for simplification. At this point We do not
5120 * care about whether the devices are attached or not yet,
5121 * this would be done in pm_keeper() and pm_kept().
5122 * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
5123 * and it is up to the user to issue the ioctl again if they want it
5124 * (e.g. pmconfig)
5125 * Returns true if dependency already exists in the list.
5126 */
5127int
5128pm_record_keeper(char *kept, char *keeper, int isprop)
5129{
5130	PMD_FUNC(pmf, "record_keeper")
5131	pm_pdr_t *npdr, *ppdr, *pdr;
5132
5133	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
5134	ASSERT(kept && keeper);
5135#ifdef DEBUG
5136	if (pm_debug & PMD_KEEPS)
5137		prdeps("pm_record_keeper entry");
5138#endif
5139	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5140	    ppdr = pdr, pdr = pdr->pdr_next) {
5141		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5142		    pdr->pdr_keeper))
5143		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5144		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5145			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5146			return (1);
5147		}
5148	}
5149	/*
5150	 * We did not find any match, so we have to make an entry
5151	 */
5152	npdr = newpdr(kept, keeper, isprop);
5153	if (ppdr) {
5154		ASSERT(ppdr->pdr_next == NULL);
5155		ppdr->pdr_next = npdr;
5156	} else {
5157		ASSERT(pm_dep_head == NULL);
5158		pm_dep_head = npdr;
5159	}
5160#ifdef DEBUG
5161	if (pm_debug & PMD_KEEPS)
5162		prdeps("pm_record_keeper after new record");
5163#endif
5164	if (!isprop)
5165		pm_unresolved_deps++;
5166	else
5167		pm_prop_deps++;
5168	return (0);
5169}
5170
5171/*
5172 * Look up this device in the set of devices we've seen ioctls for
5173 * to see if we are holding a threshold spec for it.  If so, make it so.
5174 * At ioctl time, we were given the physical path of the device.
5175 */
5176int
5177pm_thresh_specd(dev_info_t *dip)
5178{
5179	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5180	char *path = 0;
5181	char pathbuf[MAXNAMELEN];
5182	pm_thresh_rec_t *rp;
5183
5184	path = ddi_pathname(dip, pathbuf);
5185
5186	rw_enter(&pm_thresh_rwlock, RW_READER);
5187	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5188		if (strcmp(rp->ptr_physpath, path) != 0)
5189			continue;
5190		pm_apply_recorded_thresh(dip, rp);
5191		rw_exit(&pm_thresh_rwlock);
5192		return (1);
5193	}
5194	rw_exit(&pm_thresh_rwlock);
5195	return (0);
5196}
5197
5198static int
5199pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5200{
5201	PMD_FUNC(pmf, "set_keeping")
5202	int j, up = 0, circ;
5203	void prdeps(char *);
5204
5205	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5206	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5207#ifdef DEBUG
5208	if (pm_debug & PMD_KEEPS)
5209		prdeps("Before PAD\n");
5210#endif
5211	ASSERT(keeper != kept);
5212	if (PM_GET_PM_INFO(keeper) == NULL) {
5213		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5214		    "%s@%s(%s#%d), but the former is not power managed",
5215		    PM_DEVICE(keeper), PM_DEVICE(kept));
5216		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5217		    "power managed\n", pmf, PM_DEVICE(keeper)))
5218		return (0);
5219	}
5220	if (PM_GET_PM_INFO(kept) == NULL) {
5221		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5222		    "%s@%s(%s#%d), but the latter is not power managed",
5223		    PM_DEVICE(keeper), PM_DEVICE(kept));
5224		PMD((PMD_FAIL | PMD_KEEPS), ("%s: kept %s@%s(%s#%d) is not"
5225		    "power managed\n", pmf, PM_DEVICE(kept)))
5226		return (0);
5227	}
5228
5229	PM_LOCK_POWER(keeper, &circ);
5230	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5231		if (PM_CURPOWER(keeper, j)) {
5232			up++;
5233			break;
5234		}
5235	}
5236	if (up) {
5237		/* Bringup and maintain a hold on the kept */
5238		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5239		    PM_DEVICE(kept)))
5240		bring_pmdep_up(kept, 1);
5241	}
5242	PM_UNLOCK_POWER(keeper, circ);
5243#ifdef DEBUG
5244	if (pm_debug & PMD_KEEPS)
5245		prdeps("After PAD\n");
5246#endif
5247	return (1);
5248}
5249
5250/*
5251 * Should this device keep up another device?
5252 * Look up this device in the set of devices we've seen ioctls for
5253 * to see if we are holding a dependency spec for it.  If so, make it so.
5254 * Because we require the kept device to be attached already in order to
5255 * make the list entry (and hold it), we only need to look for keepers.
5256 * At ioctl time, we were given the physical path of the device.
5257 */
5258int
5259pm_keeper(char *keeper)
5260{
5261	PMD_FUNC(pmf, "keeper")
5262	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5263	dev_info_t *dip;
5264	pm_pdr_t *dp;
5265	dev_info_t *kept = NULL;
5266	int ret = 0;
5267	int i;
5268
5269	if (!pm_unresolved_deps && !pm_prop_deps)
5270		return (0);
5271	ASSERT(keeper != NULL);
5272	dip = pm_name_to_dip(keeper, 1);
5273	if (dip == NULL)
5274		return (0);
5275	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5276	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5277		if (!dp->pdr_isprop) {
5278			if (!pm_unresolved_deps)
5279				continue;
5280			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5281			if (dp->pdr_satisfied) {
5282				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5283				continue;
5284			}
5285			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5286				ret += pm_apply_recorded_dep(dip, dp);
5287			}
5288		} else {
5289			if (strcmp(dp->pdr_keeper, keeper) != 0)
5290				continue;
5291			for (i = 0; i < dp->pdr_kept_count; i++) {
5292				if (dp->pdr_kept_paths[i] == NULL)
5293					continue;
5294				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5295				if (kept == NULL)
5296					continue;
5297				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5298				    DDI_PROP_DONTPASS, dp->pdr_kept));
5299				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5300				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5301				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5302				    dp->pdr_kept_count))
5303				if (kept != dip) {
5304					ret += pm_set_keeping(dip, kept);
5305				}
5306				ddi_release_devi(kept);
5307			}
5308
5309		}
5310	}
5311	ddi_release_devi(dip);
5312	return (ret);
5313}
5314
5315/*
5316 * Should this device be kept up by another device?
5317 * Look up all dependency recorded from PM_ADD_DEPENDENT and
5318 * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5319 * kept device lists.
5320 */
5321static int
5322pm_kept(char *keptp)
5323{
5324	PMD_FUNC(pmf, "kept")
5325	pm_pdr_t *dp;
5326	int found = 0;
5327	int ret = 0;
5328	dev_info_t *keeper;
5329	dev_info_t *kept;
5330	size_t length;
5331	int i;
5332	char **paths;
5333	char *path;
5334
5335	ASSERT(keptp != NULL);
5336	kept = pm_name_to_dip(keptp, 1);
5337	if (kept == NULL)
5338		return (0);
5339	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5340	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5341		if (dp->pdr_isprop) {
5342			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5343			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5344			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5345				/*
5346				 * Dont allow self dependency.
5347				 */
5348				if (strcmp(dp->pdr_keeper, keptp) == 0)
5349					continue;
5350				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5351				if (keeper == NULL)
5352					continue;
5353				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5354				    "%p\n", pmf, (void *)kept))
5355#ifdef DEBUG
5356				if (pm_debug & PMD_DEP)
5357					prdeps("Before Adding from pm_kept\n");
5358#endif
5359				/*
5360				 * Add ourselves to the dip list.
5361				 */
5362				if (dp->pdr_kept_count == 0) {
5363					length = strlen(keptp) + 1;
5364					path =
5365					    kmem_alloc(length, KM_SLEEP);
5366					paths = kmem_alloc(sizeof (char **),
5367					    KM_SLEEP);
5368					(void) strcpy(path, keptp);
5369					paths[0] = path;
5370					dp->pdr_kept_paths = paths;
5371					dp->pdr_kept_count++;
5372				} else {
5373					/* Check to see if already on list */
5374					for (i = 0; i < dp->pdr_kept_count;
5375					    i++) {
5376						if (strcmp(keptp,
5377						    dp->pdr_kept_paths[i])
5378						    == 0) {
5379							found++;
5380							break;
5381						}
5382					}
5383					if (found) {
5384						ddi_release_devi(keeper);
5385						continue;
5386					}
5387					length = dp->pdr_kept_count *
5388					    sizeof (char **);
5389					paths = kmem_alloc(
5390					    length + sizeof (char **),
5391					    KM_SLEEP);
5392					if (dp->pdr_kept_count) {
5393						bcopy(dp->pdr_kept_paths,
5394						    paths, length);
5395						kmem_free(dp->pdr_kept_paths,
5396						    length);
5397					}
5398					dp->pdr_kept_paths = paths;
5399					length = strlen(keptp) + 1;
5400					path =
5401					    kmem_alloc(length, KM_SLEEP);
5402					(void) strcpy(path, keptp);
5403					dp->pdr_kept_paths[i] = path;
5404					dp->pdr_kept_count++;
5405				}
5406#ifdef DEBUG
5407				if (pm_debug & PMD_DEP)
5408					prdeps("After from pm_kept\n");
5409#endif
5410				if (keeper) {
5411					ret += pm_set_keeping(keeper, kept);
5412					ddi_release_devi(keeper);
5413				}
5414			}
5415		} else {
5416			/*
5417			 * pm_keeper would be called later to do
5418			 * the actual pm_set_keeping.
5419			 */
5420			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5421			    pmf, (void *)kept))
5422#ifdef DEBUG
5423			if (pm_debug & PMD_DEP)
5424				prdeps("Before Adding from pm_kept\n");
5425#endif
5426			if (strcmp(keptp, dp->pdr_kept) == 0) {
5427				if (dp->pdr_kept_paths == NULL) {
5428					length = strlen(keptp) + 1;
5429					path =
5430					    kmem_alloc(length, KM_SLEEP);
5431					paths = kmem_alloc(sizeof (char **),
5432					    KM_SLEEP);
5433					(void) strcpy(path, keptp);
5434					paths[0] = path;
5435					dp->pdr_kept_paths = paths;
5436					dp->pdr_kept_count++;
5437				}
5438			}
5439#ifdef DEBUG
5440			if (pm_debug & PMD_DEP)
5441				prdeps("After from pm_kept\n");
5442#endif
5443		}
5444	}
5445	ddi_release_devi(kept);
5446	return (ret);
5447}
5448
5449/*
5450 * Apply a recorded dependency.  dp specifies the dependency, and
5451 * keeper is already known to be the device that keeps up the other (kept) one.
5452 * We have to the whole tree for the "kept" device, then apply
5453 * the dependency (which may already be applied).
5454 */
5455int
5456pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5457{
5458	PMD_FUNC(pmf, "apply_recorded_dep")
5459	dev_info_t *kept = NULL;
5460	int ret = 0;
5461	char *keptp = NULL;
5462
5463	/*
5464	 * Device to Device dependency can only be 1 to 1.
5465	 */
5466	if (dp->pdr_kept_paths == NULL)
5467		return (0);
5468	keptp = dp->pdr_kept_paths[0];
5469	if (keptp == NULL)
5470		return (0);
5471	ASSERT(*keptp != '\0');
5472	kept = pm_name_to_dip(keptp, 1);
5473	if (kept == NULL)
5474		return (0);
5475	if (kept) {
5476		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5477		    dp->pdr_keeper, keptp))
5478		if (pm_set_keeping(keeper, kept)) {
5479			ASSERT(dp->pdr_satisfied == 0);
5480			dp->pdr_satisfied = 1;
5481			ASSERT(pm_unresolved_deps);
5482			pm_unresolved_deps--;
5483			ret++;
5484		}
5485	}
5486	ddi_release_devi(kept);
5487
5488	return (ret);
5489}
5490
5491/*
5492 * Called from common/io/pm.c
5493 */
5494int
5495pm_cur_power(pm_component_t *cp)
5496{
5497	return (cur_power(cp));
5498}
5499
5500/*
5501 * External interface to sanity-check a power level.
5502 */
5503int
5504pm_valid_power(dev_info_t *dip, int comp, int level)
5505{
5506	PMD_FUNC(pmf, "valid_power")
5507
5508	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5509		return (e_pm_valid_power(dip, comp, level));
5510	else {
5511		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5512		    pmf, comp, PM_NUMCMPTS(dip), level))
5513		return (0);
5514	}
5515}
5516
5517/*
5518 * Called when a device that is direct power managed needs to change state.
5519 * This routine arranges to block the request until the process managing
5520 * the device makes the change (or some other incompatible change) or
5521 * the process closes /dev/pm.
5522 */
5523static int
5524pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5525{
5526	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5527	int ret = 0;
5528	void pm_dequeue_blocked(pm_rsvp_t *);
5529	void pm_enqueue_blocked(pm_rsvp_t *);
5530
5531	ASSERT(!pm_processes_stopped);
5532	ASSERT(PM_IAM_LOCKING_DIP(dip));
5533	new->pr_dip = dip;
5534	new->pr_comp = comp;
5535	new->pr_newlevel = newpower;
5536	new->pr_oldlevel = oldpower;
5537	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5538	mutex_enter(&pm_rsvp_lock);
5539	pm_enqueue_blocked(new);
5540	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5541	    PM_CANBLOCK_BLOCK);
5542	PM_UNLOCK_DIP(dip);
5543	/*
5544	 * truss may make the cv_wait_sig return prematurely
5545	 */
5546	while (ret == 0) {
5547		/*
5548		 * Normally there will be no user context involved, but if
5549		 * there is (e.g. we are here via an ioctl call to a driver)
5550		 * then we should allow the process to abort the request,
5551		 * or we get an unkillable process if the same thread does
5552		 * PM_DIRECT_PM and pm_raise_power
5553		 */
5554		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5555			ret = PMP_FAIL;
5556		} else {
5557			ret = new->pr_retval;
5558		}
5559	}
5560	pm_dequeue_blocked(new);
5561	mutex_exit(&pm_rsvp_lock);
5562	cv_destroy(&new->pr_cv);
5563	kmem_free(new, sizeof (*new));
5564	return (ret);
5565}
5566
5567/*
5568 * Returns true if the process is interested in power level changes (has issued
5569 * PM_GET_STATE_CHANGE ioctl).
5570 */
5571int
5572pm_interest_registered(int clone)
5573{
5574	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5575	return (pm_interest[clone]);
5576}
5577
5578static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5579
5580/*
5581 * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5582 * watch all state transitions (dip == NULL).  Set up data
5583 * structs to communicate with process about state changes.
5584 */
5585void
5586pm_register_watcher(int clone, dev_info_t *dip)
5587{
5588	pscc_t	*p;
5589	psce_t	*psce;
5590
5591	/*
5592	 * We definitely need a control struct, then we have to search to see
5593	 * there is already an entries struct (in the dip != NULL case).
5594	 */
5595	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5596	pscc->pscc_clone = clone;
5597	pscc->pscc_dip = dip;
5598
5599	if (dip) {
5600		int found = 0;
5601		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5602		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5603			/*
5604			 * Already an entry for this clone, so just use it
5605			 * for the new one (for the case where a single
5606			 * process is watching multiple devices)
5607			 */
5608			if (p->pscc_clone == clone) {
5609				pscc->pscc_entries = p->pscc_entries;
5610				pscc->pscc_entries->psce_references++;
5611				found++;
5612				break;
5613			}
5614		}
5615		if (!found) {		/* create a new one */
5616			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5617			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5618			psce->psce_first =
5619			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5620			    KM_SLEEP);
5621			psce->psce_in = psce->psce_out = psce->psce_first;
5622			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5623			psce->psce_references = 1;
5624			pscc->pscc_entries = psce;
5625		}
5626		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5627		rw_exit(&pm_pscc_direct_rwlock);
5628	} else {
5629		ASSERT(!pm_interest_registered(clone));
5630		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5631#ifdef DEBUG
5632		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5633			/*
5634			 * Should not be an entry for this clone!
5635			 */
5636			ASSERT(p->pscc_clone != clone);
5637		}
5638#endif
5639		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5640		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5641		    PSCCOUNT, KM_SLEEP);
5642		psce->psce_in = psce->psce_out = psce->psce_first;
5643		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5644		psce->psce_references = 1;
5645		pscc->pscc_entries = psce;
5646		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5647		pm_interest[clone] = 1;
5648		rw_exit(&pm_pscc_interest_rwlock);
5649	}
5650}
5651
5652/*
5653 * Remove the given entry from the blocked list
5654 */
5655void
5656pm_dequeue_blocked(pm_rsvp_t *p)
5657{
5658	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5659	if (pm_blocked_list == p) {
5660		ASSERT(p->pr_prev == NULL);
5661		if (p->pr_next != NULL)
5662			p->pr_next->pr_prev = NULL;
5663		pm_blocked_list = p->pr_next;
5664	} else {
5665		ASSERT(p->pr_prev != NULL);
5666		p->pr_prev->pr_next = p->pr_next;
5667		if (p->pr_next != NULL)
5668			p->pr_next->pr_prev = p->pr_prev;
5669	}
5670}
5671
5672/*
5673 * Remove the given control struct from the given list
5674 */
5675static void
5676pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5677{
5678	if (*list == p) {
5679		ASSERT(p->pscc_prev == NULL);
5680		if (p->pscc_next != NULL)
5681			p->pscc_next->pscc_prev = NULL;
5682		*list = p->pscc_next;
5683	} else {
5684		ASSERT(p->pscc_prev != NULL);
5685		p->pscc_prev->pscc_next = p->pscc_next;
5686		if (p->pscc_next != NULL)
5687			p->pscc_next->pscc_prev = p->pscc_prev;
5688	}
5689}
5690
5691/*
5692 * Stick the control struct specified on the front of the list
5693 */
5694static void
5695pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5696{
5697	pscc_t *h;	/* entry at head of list */
5698	if ((h = *list) == NULL) {
5699		*list = p;
5700		ASSERT(p->pscc_next == NULL);
5701		ASSERT(p->pscc_prev == NULL);
5702	} else {
5703		p->pscc_next = h;
5704		ASSERT(h->pscc_prev == NULL);
5705		h->pscc_prev = p;
5706		ASSERT(p->pscc_prev == NULL);
5707		*list = p;
5708	}
5709}
5710
5711/*
5712 * If dip is NULL, process is closing "clone" clean up all its registrations.
5713 * Otherwise only clean up those for dip because process is just giving up
5714 * control of a direct device.
5715 */
5716void
5717pm_deregister_watcher(int clone, dev_info_t *dip)
5718{
5719	pscc_t	*p, *pn;
5720	psce_t	*psce;
5721	int found = 0;
5722
5723	if (dip == NULL) {
5724		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5725		for (p = pm_pscc_interest; p; p = pn) {
5726			pn = p->pscc_next;
5727			if (p->pscc_clone == clone) {
5728				pm_dequeue_pscc(p, &pm_pscc_interest);
5729				psce = p->pscc_entries;
5730				ASSERT(psce->psce_references == 1);
5731				mutex_destroy(&psce->psce_lock);
5732				kmem_free(psce->psce_first,
5733				    sizeof (pm_state_change_t) * PSCCOUNT);
5734				kmem_free(psce, sizeof (*psce));
5735				kmem_free(p, sizeof (*p));
5736			}
5737		}
5738		pm_interest[clone] = 0;
5739		rw_exit(&pm_pscc_interest_rwlock);
5740	}
5741	found = 0;
5742	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5743	for (p = pm_pscc_direct; p; p = pn) {
5744		pn = p->pscc_next;
5745		if ((dip && p->pscc_dip == dip) ||
5746		    (dip == NULL && clone == p->pscc_clone)) {
5747			ASSERT(clone == p->pscc_clone);
5748			found++;
5749			/*
5750			 * Remove from control list
5751			 */
5752			pm_dequeue_pscc(p, &pm_pscc_direct);
5753			/*
5754			 * If we're the last reference, free the
5755			 * entries struct.
5756			 */
5757			psce = p->pscc_entries;
5758			ASSERT(psce);
5759			if (psce->psce_references == 1) {
5760				kmem_free(psce->psce_first,
5761				    PSCCOUNT * sizeof (pm_state_change_t));
5762				kmem_free(psce, sizeof (*psce));
5763			} else {
5764				psce->psce_references--;
5765			}
5766			kmem_free(p, sizeof (*p));
5767		}
5768	}
5769	ASSERT(dip == NULL || found);
5770	rw_exit(&pm_pscc_direct_rwlock);
5771}
5772
5773/*
5774 * Search the indicated list for an entry that matches clone, and return a
5775 * pointer to it.  To be interesting, the entry must have something ready to
5776 * be passed up to the controlling process.
5777 * The returned entry will be locked upon return from this call.
5778 */
5779static psce_t *
5780pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5781{
5782	pscc_t	*p;
5783	psce_t	*psce;
5784	rw_enter(lock, RW_READER);
5785	for (p = *list; p; p = p->pscc_next) {
5786		if (clone == p->pscc_clone) {
5787			psce = p->pscc_entries;
5788			mutex_enter(&psce->psce_lock);
5789			if (psce->psce_out->size) {
5790				rw_exit(lock);
5791				return (psce);
5792			} else {
5793				mutex_exit(&psce->psce_lock);
5794			}
5795		}
5796	}
5797	rw_exit(lock);
5798	return (NULL);
5799}
5800
5801static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5802/*
5803 * Find an entry for a particular clone in the direct list.
5804 */
5805psce_t *
5806pm_psc_clone_to_direct(int clone)
5807{
5808	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5809	    &pm_pscc_direct_rwlock));
5810}
5811
5812/*
5813 * Find an entry for a particular clone in the interest list.
5814 */
5815psce_t *
5816pm_psc_clone_to_interest(int clone)
5817{
5818	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5819	    &pm_pscc_interest_rwlock));
5820}
5821
5822/*
5823 * Put the given entry at the head of the blocked list
5824 */
5825void
5826pm_enqueue_blocked(pm_rsvp_t *p)
5827{
5828	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5829	ASSERT(p->pr_next == NULL);
5830	ASSERT(p->pr_prev == NULL);
5831	if (pm_blocked_list != NULL) {
5832		p->pr_next = pm_blocked_list;
5833		ASSERT(pm_blocked_list->pr_prev == NULL);
5834		pm_blocked_list->pr_prev = p;
5835		pm_blocked_list = p;
5836	} else {
5837		pm_blocked_list = p;
5838	}
5839}
5840
5841/*
5842 * Sets every power managed device back to its default threshold
5843 */
5844void
5845pm_all_to_default_thresholds(void)
5846{
5847	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5848	    (void *) &pm_system_idle_threshold);
5849}
5850
5851static int
5852pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5853{
5854	int thr = (int)(*(int *)arg);
5855
5856	if (!PM_GET_PM_INFO(dip))
5857		return (DDI_WALK_CONTINUE);
5858	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5859	return (DDI_WALK_CONTINUE);
5860}
5861
5862/*
5863 * Returns the current threshold value (in seconds) for the indicated component
5864 */
5865int
5866pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5867{
5868	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5869		return (DDI_FAILURE);
5870	} else {
5871		*threshp = cur_threshold(dip, comp);
5872		return (DDI_SUCCESS);
5873	}
5874}
5875
5876/*
5877 * To be called when changing the power level of a component of a device.
5878 * On some platforms, changing power on one device may require that power
5879 * be changed on other, related devices in the same transaction.  Thus, we
5880 * always pass this request to the platform power manager so that all the
5881 * affected devices will be locked.
5882 */
5883void
5884pm_lock_power(dev_info_t *dip, int *circp)
5885{
5886	power_req_t power_req;
5887	int result;
5888
5889	power_req.request_type = PMR_PPM_LOCK_POWER;
5890	power_req.req.ppm_lock_power_req.who = dip;
5891	power_req.req.ppm_lock_power_req.circp = circp;
5892	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5893}
5894
5895/*
5896 * Release the lock (or locks) acquired to change the power of a device.
5897 * See comments for pm_lock_power.
5898 */
5899void
5900pm_unlock_power(dev_info_t *dip, int circ)
5901{
5902	power_req_t power_req;
5903	int result;
5904
5905	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5906	power_req.req.ppm_unlock_power_req.who = dip;
5907	power_req.req.ppm_unlock_power_req.circ = circ;
5908	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5909}
5910
5911
5912/*
5913 * Attempt (without blocking) to acquire the lock(s) needed to change the
5914 * power of a component of a device.  See comments for pm_lock_power.
5915 *
5916 * Return: 1 if lock(s) acquired, 0 if not.
5917 */
5918int
5919pm_try_locking_power(dev_info_t *dip, int *circp)
5920{
5921	power_req_t power_req;
5922	int result;
5923
5924	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5925	power_req.req.ppm_lock_power_req.who = dip;
5926	power_req.req.ppm_lock_power_req.circp = circp;
5927	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5928	return (result);
5929}
5930
5931
5932/*
5933 * Lock power state of a device.
5934 *
5935 * The implementation handles a special case where another thread may have
5936 * acquired the lock and created/launched this thread to do the work.  If
5937 * the lock cannot be acquired immediately, we check to see if this thread
5938 * is registered as a borrower of the lock.  If so, we may proceed without
5939 * the lock.  This assumes that the lending thread blocks on the completion
5940 * of this thread.
5941 *
5942 * Note 1: for use by ppm only.
5943 *
5944 * Note 2: On failing to get the lock immediately, we search lock_loan list
5945 * for curthread (as borrower of the lock).  On a hit, we check that the
5946 * lending thread already owns the lock we want.  It is safe to compare
5947 * devi_busy_thread and thread id of the lender because in the == case (the
5948 * only one we care about) we know that the owner is blocked.  Similarly,
5949 * If we find that curthread isn't registered as a lock borrower, it is safe
5950 * to use the blocking call (ndi_devi_enter) because we know that if we
5951 * weren't already listed as a borrower (upstream on the call stack) we won't
5952 * become one.
5953 */
5954void
5955pm_lock_power_single(dev_info_t *dip, int *circp)
5956{
5957	lock_loan_t *cur;
5958
5959	/* if the lock is available, we are done. */
5960	if (ndi_devi_tryenter(dip, circp))
5961		return;
5962
5963	mutex_enter(&pm_loan_lock);
5964	/* see if our thread is registered as a lock borrower. */
5965	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5966		if (cur->pmlk_borrower == curthread)
5967			break;
5968	mutex_exit(&pm_loan_lock);
5969
5970	/* if this thread not already registered, it is safe to block */
5971	if (cur == NULL)
5972		ndi_devi_enter(dip, circp);
5973	else {
5974		/* registered: does lender own the lock we want? */
5975		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5976			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5977			cur->pmlk_dip = dip;
5978		} else /* no: just block for it */
5979			ndi_devi_enter(dip, circp);
5980
5981	}
5982}
5983
5984/*
5985 * Drop the lock on the device's power state.  See comment for
5986 * pm_lock_power_single() for special implementation considerations.
5987 *
5988 * Note: for use by ppm only.
5989 */
5990void
5991pm_unlock_power_single(dev_info_t *dip, int circ)
5992{
5993	lock_loan_t *cur;
5994
5995	/* optimization: mutex not needed to check empty list */
5996	if (lock_loan_head.pmlk_next == NULL) {
5997		ndi_devi_exit(dip, circ);
5998		return;
5999	}
6000
6001	mutex_enter(&pm_loan_lock);
6002	/* see if our thread is registered as a lock borrower. */
6003	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
6004		if (cur->pmlk_borrower == curthread)
6005			break;
6006	mutex_exit(&pm_loan_lock);
6007
6008	if (cur == NULL || cur->pmlk_dip != dip)
6009		/* we acquired the lock directly, so return it */
6010		ndi_devi_exit(dip, circ);
6011}
6012
6013/*
6014 * Try to take the lock for changing the power level of a component.
6015 *
6016 * Note: for use by ppm only.
6017 */
6018int
6019pm_try_locking_power_single(dev_info_t *dip, int *circp)
6020{
6021	return (ndi_devi_tryenter(dip, circp));
6022}
6023
6024#ifdef	DEBUG
6025/*
6026 * The following are used only to print out data structures for debugging
6027 */
6028void
6029prdeps(char *msg)
6030{
6031
6032	pm_pdr_t *rp;
6033	int i;
6034
6035	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
6036	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
6037		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
6038		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
6039		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
6040		    (void *)rp->pdr_next);
6041		if (rp->pdr_kept_count != 0) {
6042			pm_log("kept list = ");
6043			i = 0;
6044			while (i < rp->pdr_kept_count) {
6045				pm_log("%s ", rp->pdr_kept_paths[i]);
6046				i++;
6047			}
6048			pm_log("\n");
6049		}
6050	}
6051}
6052
6053void
6054pr_noinvol(char *hdr)
6055{
6056	pm_noinvol_t *ip;
6057
6058	pm_log("%s\n", hdr);
6059	rw_enter(&pm_noinvol_rwlock, RW_READER);
6060	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
6061		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
6062		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
6063	rw_exit(&pm_noinvol_rwlock);
6064}
6065#endif
6066
6067/*
6068 * Attempt to apply the thresholds indicated by rp to the node specified by
6069 * dip.
6070 */
6071void
6072pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6073{
6074	PMD_FUNC(pmf, "apply_recorded_thresh")
6075	int i, j;
6076	int comps = PM_NUMCMPTS(dip);
6077	struct pm_component *cp;
6078	pm_pte_t *ep;
6079	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
6080
6081	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
6082	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
6083	PM_LOCK_DIP(dip);
6084	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
6085		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
6086		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
6087		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
6088		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
6089		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
6090		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
6091		PM_UNLOCK_DIP(dip);
6092		return;
6093	}
6094
6095	ep = rp->ptr_entries;
6096	/*
6097	 * Here we do the special case of a device threshold
6098	 */
6099	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
6100		ASSERT(ep && ep->pte_numthresh == 1);
6101		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
6102		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
6103		PM_UNLOCK_DIP(dip);
6104		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
6105		if (PM_SCANABLE(dip))
6106			pm_rescan(dip);
6107		return;
6108	}
6109	for (i = 0; i < comps; i++) {
6110		cp = PM_CP(dip, i);
6111		for (j = 0; j < ep->pte_numthresh; j++) {
6112			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
6113			    "to %x\n", pmf, j, PM_DEVICE(dip),
6114			    i, ep->pte_thresh[j]))
6115			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
6116		}
6117		ep++;
6118	}
6119	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
6120	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
6121	PM_UNLOCK_DIP(dip);
6122
6123	if (PM_SCANABLE(dip))
6124		pm_rescan(dip);
6125}
6126
6127/*
6128 * Returns true if the threshold specified by rp could be applied to dip
6129 * (that is, the number of components and transitions are the same)
6130 */
6131int
6132pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6133{
6134	PMD_FUNC(pmf, "valid_thresh")
6135	int comps, i;
6136	pm_component_t *cp;
6137	pm_pte_t *ep;
6138
6139	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
6140		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
6141		    rp->ptr_physpath))
6142		return (0);
6143	}
6144	/*
6145	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6146	 * an entry with numcomps == 0, (since we don't know how many
6147	 * components there are in advance).  This is always a valid
6148	 * spec.
6149	 */
6150	if (rp->ptr_numcomps == 0) {
6151		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6152		return (1);
6153	}
6154	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6155		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6156		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6157		return (0);
6158	}
6159	ep = rp->ptr_entries;
6160	for (i = 0; i < comps; i++) {
6161		cp = PM_CP(dip, i);
6162		if ((ep + i)->pte_numthresh !=
6163		    cp->pmc_comp.pmc_numlevels - 1) {
6164			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6165			    pmf, rp->ptr_physpath, i,
6166			    cp->pmc_comp.pmc_numlevels - 1,
6167			    (ep + i)->pte_numthresh))
6168			return (0);
6169		}
6170	}
6171	return (1);
6172}
6173
6174/*
6175 * Remove any recorded threshold for device physpath
6176 * We know there will be at most one.
6177 */
6178void
6179pm_unrecord_threshold(char *physpath)
6180{
6181	pm_thresh_rec_t *pptr, *ptr;
6182
6183	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6184	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6185		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6186			if (pptr) {
6187				pptr->ptr_next = ptr->ptr_next;
6188			} else {
6189				ASSERT(pm_thresh_head == ptr);
6190				pm_thresh_head = ptr->ptr_next;
6191			}
6192			kmem_free(ptr, ptr->ptr_size);
6193			break;
6194		}
6195		pptr = ptr;
6196	}
6197	rw_exit(&pm_thresh_rwlock);
6198}
6199
6200/*
6201 * Discard all recorded thresholds.  We are returning to the default pm state.
6202 */
6203void
6204pm_discard_thresholds(void)
6205{
6206	pm_thresh_rec_t *rp;
6207	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6208	while (pm_thresh_head) {
6209		rp = pm_thresh_head;
6210		pm_thresh_head = rp->ptr_next;
6211		kmem_free(rp, rp->ptr_size);
6212	}
6213	rw_exit(&pm_thresh_rwlock);
6214}
6215
6216/*
6217 * Discard all recorded dependencies.  We are returning to the default pm state.
6218 */
6219void
6220pm_discard_dependencies(void)
6221{
6222	pm_pdr_t *rp;
6223	int i;
6224	size_t length;
6225
6226#ifdef DEBUG
6227	if (pm_debug & PMD_DEP)
6228		prdeps("Before discard\n");
6229#endif
6230	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6231
6232#ifdef DEBUG
6233	if (pm_debug & PMD_DEP)
6234		prdeps("After discard\n");
6235#endif
6236	while (pm_dep_head) {
6237		rp = pm_dep_head;
6238		if (!rp->pdr_isprop) {
6239			ASSERT(rp->pdr_satisfied == 0);
6240			ASSERT(pm_unresolved_deps);
6241			pm_unresolved_deps--;
6242		} else {
6243			ASSERT(pm_prop_deps);
6244			pm_prop_deps--;
6245		}
6246		pm_dep_head = rp->pdr_next;
6247		if (rp->pdr_kept_count)  {
6248			for (i = 0; i < rp->pdr_kept_count; i++) {
6249				length = strlen(rp->pdr_kept_paths[i]) + 1;
6250				kmem_free(rp->pdr_kept_paths[i], length);
6251			}
6252			kmem_free(rp->pdr_kept_paths,
6253			    rp->pdr_kept_count * sizeof (char **));
6254		}
6255		kmem_free(rp, rp->pdr_size);
6256	}
6257}
6258
6259
6260static int
6261pm_discard_dep_walk(dev_info_t *dip, void *arg)
6262{
6263	_NOTE(ARGUNUSED(arg))
6264	char *pathbuf;
6265
6266	if (PM_GET_PM_INFO(dip) == NULL)
6267		return (DDI_WALK_CONTINUE);
6268	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6269	(void) ddi_pathname(dip, pathbuf);
6270	pm_free_keeper(pathbuf, 0);
6271	kmem_free(pathbuf, MAXPATHLEN);
6272	return (DDI_WALK_CONTINUE);
6273}
6274
6275static int
6276pm_kept_walk(dev_info_t *dip, void *arg)
6277{
6278	_NOTE(ARGUNUSED(arg))
6279	char *pathbuf;
6280
6281	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6282	(void) ddi_pathname(dip, pathbuf);
6283	(void) pm_kept(pathbuf);
6284	kmem_free(pathbuf, MAXPATHLEN);
6285
6286	return (DDI_WALK_CONTINUE);
6287}
6288
6289static int
6290pm_keeper_walk(dev_info_t *dip, void *arg)
6291{
6292	_NOTE(ARGUNUSED(arg))
6293	char *pathbuf;
6294
6295	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6296	(void) ddi_pathname(dip, pathbuf);
6297	(void) pm_keeper(pathbuf);
6298	kmem_free(pathbuf, MAXPATHLEN);
6299
6300	return (DDI_WALK_CONTINUE);
6301}
6302
6303static char *
6304pdw_type_decode(int type)
6305{
6306	switch (type) {
6307	case PM_DEP_WK_POWER_ON:
6308		return ("power on");
6309	case PM_DEP_WK_POWER_OFF:
6310		return ("power off");
6311	case PM_DEP_WK_DETACH:
6312		return ("detach");
6313	case PM_DEP_WK_REMOVE_DEP:
6314		return ("remove dep");
6315	case PM_DEP_WK_BRINGUP_SELF:
6316		return ("bringup self");
6317	case PM_DEP_WK_RECORD_KEEPER:
6318		return ("add dependent");
6319	case PM_DEP_WK_RECORD_KEEPER_PROP:
6320		return ("add dependent property");
6321	case PM_DEP_WK_