xref: /illumos-gate/usr/src/uts/common/os/sunpm.c (revision c6f039c7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
27  */
28 
29 /*
30  * sunpm.c builds sunpm.o	"power management framework"
31  *	kernel-resident power management code.  Implements power management
32  *	policy
33  *	Assumes: all backwards compat. device components wake up on &
34  *		 the pm_info pointer in dev_info is initially NULL
35  *
36  * PM - (device) Power Management
37  *
38  * Each device may have 0 or more components.  If a device has no components,
39  * then it can't be power managed.  Each component has 2 or more
40  * power states.
41  *
42  * "Backwards Compatible" (bc) devices:
43  * There are two different types of devices from the point of view of this
44  * code.  The original type, left over from the original PM implementation on
45  * the voyager platform are known in this code as "backwards compatible"
46  * devices (PM_ISBC(dip) returns true).
47  * They are recognized by the pm code by the lack of a pm-components property
48  * and a call made by the driver to pm_create_components(9F).
49  * For these devices, component 0 is special, and represents the power state
50  * of the device.  If component 0 is to be set to power level 0 (off), then
51  * the framework must first call into the driver's detach(9E) routine with
52  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
53  * After setting component 0 from 0 to a non-zero power level, a call must be
54  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
55  *
56  * Currently, the only way to get a bc device power managed is via a set of
57  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
58  *
59  * For non-bc devices, the driver describes the components by exporting a
60  * pm-components(9P) property that tells how many components there are,
61  * tells what each component's power state values are, and provides human
62  * readable strings (currently unused) for each component name and power state.
63  * Devices which export pm-components(9P) are automatically power managed
64  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
65  * after parsing power.conf(4)). The exception to this rule is that power
66  * manageable CPU devices may be automatically managed independently of autopm
67  * by either enabling or disabling (via PM_START_CPUPM and PM_STOP_CPUPM
68  * ioctls) cpupm. If the CPU devices are not managed independently, then they
69  * are managed by autopm. In either case, for automatically power managed
70  * devices, all components are considered independent of each other, and it is
71  * up to the driver to decide when a transition requires saving or restoring
72  * hardware state.
73  *
74  * Each device component also has a threshold time associated with each power
75  * transition (see power.conf(4)), and a busy/idle state maintained by the
76  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
77  * Components are created idle.
78  *
79  * The PM framework provides several functions:
80  * -implement PM policy as described in power.conf(4)
81  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
82  *  Policies consist of:
83  *    -set threshold values (defaults if none provided by pmconfig)
84  *    -set dependencies among devices
85  *    -enable/disable autopm
86  *    -enable/disable cpupm
87  *    -turn down idle components based on thresholds (if autopm or cpupm is
88  *     enabled) (aka scanning)
89  *    -maintain power states based on dependencies among devices
90  *    -upon request, or when the frame buffer powers off, attempt to turn off
91  *     all components that are idle or become idle over the next (10 sec)
92  *     period in an attempt to get down to an EnergyStar compliant state
93  *    -prevent powering off of a device which exported the
94  *     pm-no-involuntary-power-cycles property without active involvement of
95  *     the device's driver (so no removing power when the device driver is
96  *     not attached)
97  * -provide a mechanism for a device driver to request that a device's component
98  *  be brought back to the power level necessary for the use of the device
99  * -allow a process to directly control the power levels of device components
100  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
101  * -ensure that the console frame buffer is powered up before being referenced
102  *  via prom_printf() or other prom calls that might generate console output
103  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
104  * -provide "backwards compatible" behavior for devices without pm-components
105  *  property
106  *
107  * Scanning:
108  * Whenever autopm or cpupm  is enabled, the framework attempts to bring each
109  * component of each managed device to its lowest power based on the threshold
110  * of idleness associated with each transition and the busy/idle state of the
111  * component.
112  *
113  * The actual work of this is done by pm_scan_dev(), which cycles through each
114  * component of a device, checking its idleness against its current threshold,
115  * and calling pm_set_power() as appropriate to change the power level.
116  * This function also indicates when it would next be profitable to scan the
117  * device again, and a new scan is scheduled after that time.
118  *
119  * Dependencies:
120  * It is possible to establish a dependency between the power states of two
121  * otherwise unrelated devices.  This is currently done to ensure that the
122  * cdrom is always up whenever the console framebuffer is up, so that the user
123  * can insert a cdrom and see a popup as a result.
124  *
125  * The dependency terminology used in power.conf(4) is not easy to understand,
126  * so we've adopted a different terminology in the implementation.  We write
127  * of a "keeps up" and a "kept up" device.  A relationship can be established
128  * where one device keeps up another.  That means that if the keepsup device
129  * has any component that is at a non-zero power level, all components of the
130  * "kept up" device must be brought to full power.  This relationship is
131  * asynchronous.  When the keeping device is powered up, a request is queued
132  * to a worker thread to bring up the kept device.  The caller does not wait.
133  * Scan will not turn down a kept up device.
134  *
135  * Direct PM:
136  * A device may be directly power managed by a process.  If a device is
137  * directly pm'd, then it will not be scanned, and dependencies will not be
138  * enforced.  * If a directly pm'd device's driver requests a power change (via
139  * pm_raise_power(9F)), then the request is blocked and notification is sent
140  * to the controlling process, which must issue the requested power change for
141  * the driver to proceed.
142  *
143  */
144 
145 #include <sys/types.h>
146 #include <sys/errno.h>
147 #include <sys/callb.h>		/* callback registration during CPR */
148 #include <sys/conf.h>		/* driver flags and functions */
149 #include <sys/open.h>		/* OTYP_CHR definition */
150 #include <sys/stat.h>		/* S_IFCHR definition */
151 #include <sys/pathname.h>	/* name -> dev_info xlation */
152 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
153 #include <sys/kmem.h>		/* memory alloc stuff */
154 #include <sys/debug.h>
155 #include <sys/archsystm.h>
156 #include <sys/pm.h>
157 #include <sys/ddi.h>
158 #include <sys/sunddi.h>
159 #include <sys/sunndi.h>
160 #include <sys/sunpm.h>
161 #include <sys/epm.h>
162 #include <sys/vfs.h>
163 #include <sys/mode.h>
164 #include <sys/mkdev.h>
165 #include <sys/promif.h>
166 #include <sys/consdev.h>
167 #include <sys/esunddi.h>
168 #include <sys/modctl.h>
169 #include <sys/fs/ufs_fs.h>
170 #include <sys/note.h>
171 #include <sys/taskq.h>
172 #include <sys/bootconf.h>
173 #include <sys/reboot.h>
174 #include <sys/spl.h>
175 #include <sys/disp.h>
176 #include <sys/sobject.h>
177 #include <sys/sunmdi.h>
178 #include <sys/systm.h>
179 #include <sys/cpuvar.h>
180 #include <sys/cyclic.h>
181 #include <sys/uadmin.h>
182 #include <sys/srn.h>
183 
184 
185 /*
186  * PM LOCKING
187  *	The list of locks:
188  * Global pm mutex locks.
189  *
190  * pm_scan_lock:
191  *		It protects the timeout id of the scan thread, and the value
192  *		of autopm_enabled and cpupm.  This lock is not held
193  *		concurrently with any other PM locks.
194  *
195  * pm_clone_lock:	Protects the clone list and count of poll events
196  *		pending for the pm driver.
197  *		Lock ordering:
198  *			pm_clone_lock -> pm_pscc_interest_rwlock,
199  *			pm_clone_lock -> pm_pscc_direct_rwlock.
200  *
201  * pm_rsvp_lock:
202  *		Used to synchronize the data structures used for processes
203  *		to rendezvous with state change information when doing
204  *		direct PM.
205  *		Lock ordering:
206  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
207  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
208  *			pm_rsvp_lock -> pm_clone_lock.
209  *
210  * ppm_lock:	protects the list of registered ppm drivers
211  *		Lock ordering:
212  *			ppm_lock -> ppm driver unit_lock
213  *
214  * pm_compcnt_lock:
215  *		Protects count of components that are not at their lowest
216  *		power level.
217  *		Lock ordering:
218  *			pm_compcnt_lock -> ppm_lock.
219  *
220  * pm_dep_thread_lock:
221  *		Protects work list for pm_dep_thread.  Not taken concurrently
222  *		with any other pm lock.
223  *
224  * pm_remdrv_lock:
225  *		Serializes the operation of removing noinvol data structure
226  *		entries for a branch of the tree when a driver has been
227  *		removed from the system (modctl_rem_major).
228  *		Lock ordering:
229  *			pm_remdrv_lock -> pm_noinvol_rwlock.
230  *
231  * pm_cfb_lock: (High level spin lock)
232  *		Protects the count of how many components of the console
233  *		frame buffer are off (so we know if we have to bring up the
234  *		console as a result of a prom_printf, etc.
235  *		No other locks are taken while holding this lock.
236  *
237  * pm_loan_lock:
238  *		Protects the lock_loan list.  List is used to record that one
239  *		thread has acquired a power lock but has launched another thread
240  *		to complete its processing.  An entry in the list indicates that
241  *		the worker thread can borrow the lock held by the other thread,
242  *		which must block on the completion of the worker.  Use is
243  *		specific to module loading.
244  *		No other locks are taken while holding this lock.
245  *
246  * Global PM rwlocks
247  *
248  * pm_thresh_rwlock:
249  *		Protects the list of thresholds recorded for future use (when
250  *		devices attach).
251  *		Lock ordering:
252  *			pm_thresh_rwlock -> devi_pm_lock
253  *
254  * pm_noinvol_rwlock:
255  *		Protects list of detached nodes that had noinvol registered.
256  *		No other PM locks are taken while holding pm_noinvol_rwlock.
257  *
258  * pm_pscc_direct_rwlock:
259  *		Protects the list that maps devices being directly power
260  *		managed to the processes that manage them.
261  *		Lock ordering:
262  *			pm_pscc_direct_rwlock -> psce_lock
263  *
264  * pm_pscc_interest_rwlock;
265  *		Protects the list that maps state change events to processes
266  *		that want to know about them.
267  *		Lock ordering:
268  *			pm_pscc_interest_rwlock -> psce_lock
269  *
270  * per-dip locks:
271  *
272  * Each node has these per-dip locks, which are only used if the device is
273  * a candidate for power management (e.g. has pm components)
274  *
275  * devi_pm_lock:
276  *		Protects all power management state of the node except for
277  *		power level, which is protected by ndi_devi_enter().
278  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
279  *		Lock ordering:
280  *			devi_pm_lock -> pm_rsvp_lock,
281  *			devi_pm_lock -> pm_dep_thread_lock,
282  *			devi_pm_lock -> pm_noinvol_rwlock,
283  *			devi_pm_lock -> power lock
284  *
285  * power lock (ndi_devi_enter()):
286  *		Since changing power level is possibly a slow operation (30
287  *		seconds to spin up a disk drive), this is locked separately.
288  *		Since a call into the driver to change the power level of one
289  *		component may result in a call back into the framework to change
290  *		the power level of another, this lock allows re-entrancy by
291  *		the same thread (ndi_devi_enter is used for this because
292  *		the USB framework uses ndi_devi_enter in its power entry point,
293  *		and use of any other lock would produce a deadlock.
294  *
295  * devi_pm_busy_lock:
296  *		This lock protects the integrity of the busy count.  It is
297  *		only taken by pm_busy_component() and pm_idle_component and
298  *		some code that adjust the busy time after the timer gets set
299  *		up or after a CPR operation.  It is per-dip to keep from
300  *		single-threading all the disk drivers on a system.
301  *		It could be per component instead, but most devices have
302  *		only one component.
303  *		No other PM locks are taken while holding this lock.
304  *
305  */
306 
307 static int stdout_is_framebuffer;
308 static kmutex_t	e_pm_power_lock;
309 static kmutex_t pm_loan_lock;
310 kmutex_t	pm_scan_lock;
311 callb_id_t	pm_cpr_cb_id;
312 callb_id_t	pm_panic_cb_id;
313 callb_id_t	pm_halt_cb_id;
314 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
315 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
316 
317 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
318 clock_t pm_default_min_scan = PM_DEFAULT_MIN_SCAN;
319 clock_t pm_cpu_min_scan = PM_CPU_MIN_SCAN;
320 
321 #define	PM_MIN_SCAN(dip)	(PM_ISCPU(dip) ? pm_cpu_min_scan : \
322 				    pm_default_min_scan)
323 
324 static int pm_busop_set_power(dev_info_t *,
325     void *, pm_bus_power_op_t, void *, void *);
326 static int pm_busop_match_request(dev_info_t *, void *);
327 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
328 static void e_pm_set_max_power(dev_info_t *, int, int);
329 static int e_pm_get_max_power(dev_info_t *, int);
330 
331 /*
332  * Dependency Processing is done thru a seperate thread.
333  */
334 kmutex_t	pm_dep_thread_lock;
335 kcondvar_t	pm_dep_thread_cv;
336 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
337 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
338 
339 /*
340  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
341  * power managing things in single user mode that have been suppressed via
342  * power.conf entries.  Protected by pm_scan_lock.
343  */
344 int		autopm_enabled;
345 
346 /*
347  * cpupm is turned on and off, by the PM_START_CPUPM and PM_STOP_CPUPM ioctls,
348  * to define the power management behavior of CPU devices separate from
349  * autopm. Protected by pm_scan_lock.
350  */
351 pm_cpupm_t	cpupm = PM_CPUPM_NOTSET;
352 
353 /*
354  * Defines the default mode of operation for CPU power management,
355  * either the polling implementation, or the event based dispatcher driven
356  * implementation.
357  */
358 pm_cpupm_t	cpupm_default_mode = PM_CPUPM_EVENT;
359 
360 /*
361  * AutoS3 depends on autopm being enabled, and must be enabled by
362  * PM_START_AUTOS3 command.
363  */
364 int		autoS3_enabled;
365 
366 #if !defined(__sparc)
367 /*
368  * on sparc these live in fillsysinfo.c
369  *
370  * If this variable is non-zero, cpr should return "not supported" when
371  * it is queried even though it would normally be supported on this platform.
372  */
373 int cpr_supported_override;
374 
375 /*
376  * Some platforms may need to support CPR even in the absence of
377  * having the correct platform id information.  If this
378  * variable is non-zero, cpr should proceed even in the absence
379  * of otherwise being qualified.
380  */
381 int cpr_platform_enable = 0;
382 
383 #endif
384 
385 /*
386  * pm_S3_enabled indicates that we believe the platform can support S3,
387  * which we get from pmconfig(1M)
388  */
389 int		pm_S3_enabled;
390 
391 /*
392  * This flag is true while processes are stopped for a checkpoint/resume.
393  * Controlling processes of direct pm'd devices are not available to
394  * participate in power level changes, so we bypass them when this is set.
395  */
396 static int	pm_processes_stopped;
397 
398 #ifdef	DEBUG
399 
400 /*
401  * see common/sys/epm.h for PMD_* values
402  */
403 
404 uint_t		pm_debug = 0;
405 
406 /*
407  * If pm_divertdebug is set, then no prom_printf calls will be made by
408  * PMD(), which will prevent debug output from bringing up the console
409  * frame buffer.  Clearing this variable before setting pm_debug will result
410  * in PMD output going to the console.
411  *
412  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
413  * deadlocks and decremented at the end of pm_set_power()
414  */
415 uint_t		pm_divertdebug = 1;
416 volatile uint_t pm_debug_to_console = 0;
417 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
418 
419 void prdeps(char *);
420 #endif
421 
422 /* Globals */
423 
424 /*
425  * List of recorded thresholds and dependencies
426  */
427 pm_thresh_rec_t *pm_thresh_head;
428 krwlock_t pm_thresh_rwlock;
429 
430 pm_pdr_t *pm_dep_head;
431 static int pm_unresolved_deps = 0;
432 static int pm_prop_deps = 0;
433 
434 /*
435  * List of devices that exported no-involuntary-power-cycles property
436  */
437 pm_noinvol_t *pm_noinvol_head;
438 
439 /*
440  * Locks used in noinvol processing
441  */
442 krwlock_t pm_noinvol_rwlock;
443 kmutex_t pm_remdrv_lock;
444 
445 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
446 int pm_system_idle_threshold;
447 int pm_cpu_idle_threshold;
448 
449 /*
450  * By default nexus has 0 threshold, and depends on its children to keep it up
451  */
452 int pm_default_nexus_threshold = 0;
453 
454 /*
455  * Data structures shared with common/io/pm.c
456  */
457 kmutex_t	pm_clone_lock;
458 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
459 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
460 unsigned char	pm_interest[PM_MAX_CLONE];
461 struct pollhead	pm_pollhead;
462 
463 /*
464  * Data structures shared with common/io/srn.c
465  */
466 kmutex_t	srn_clone_lock;		/* protects srn_signal, srn_inuse */
467 void (*srn_signal)(int type, int event);
468 int srn_inuse;				/* stop srn detach */
469 
470 extern int	hz;
471 extern char	*platform_module_list[];
472 
473 /*
474  * Wrappers for use in ddi_walk_devs
475  */
476 
477 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
478 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
479 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
480 static int		pm_discard_dep_walk(dev_info_t *, void *);
481 #ifdef DEBUG
482 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
483 #endif
484 
485 /*
486  * Routines for managing noinvol devices
487  */
488 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
489 void			pm_noinvol_update_node(dev_info_t *,
490 			    pm_bp_noinvol_t *req);
491 
492 kmutex_t pm_rsvp_lock;
493 kmutex_t pm_compcnt_lock;
494 krwlock_t pm_pscc_direct_rwlock;
495 krwlock_t pm_pscc_interest_rwlock;
496 
497 #define	PSC_INTEREST	0	/* belongs to interest psc list */
498 #define	PSC_DIRECT	1	/* belongs to direct psc list */
499 
500 pscc_t *pm_pscc_interest;
501 pscc_t *pm_pscc_direct;
502 
503 #define	PM_MAJOR(dip) ddi_driver_major(dip)
504 #define	PM_IS_NEXUS(dip) ((PM_MAJOR(dip) == DDI_MAJOR_T_NONE) ? 0 : \
505 	NEXUS_DRV(devopsp[PM_MAJOR(dip)]))
506 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
507 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
508 
509 #define	PM_INCR_NOTLOWEST(dip) {					\
510 	mutex_enter(&pm_compcnt_lock);					\
511 	if (!PM_IS_NEXUS(dip) ||					\
512 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
513 		if (pm_comps_notlowest == 0)				\
514 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
515 		pm_comps_notlowest++;					\
516 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
517 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
518 	}								\
519 	mutex_exit(&pm_compcnt_lock);					\
520 }
521 #define	PM_DECR_NOTLOWEST(dip) {					\
522 	mutex_enter(&pm_compcnt_lock);					\
523 	if (!PM_IS_NEXUS(dip) ||					\
524 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
525 		ASSERT(pm_comps_notlowest);				\
526 		pm_comps_notlowest--;					\
527 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
528 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
529 		if (pm_comps_notlowest == 0)				\
530 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
531 	}								\
532 	mutex_exit(&pm_compcnt_lock);					\
533 }
534 
535 /*
536  * console frame-buffer power-management is not enabled when
537  * debugging services are present.  to override, set pm_cfb_override
538  * to non-zero.
539  */
540 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
541 kmutex_t pm_cfb_lock;
542 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
543 #ifdef DEBUG
544 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
545 #else
546 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
547 #endif
548 
549 static dev_info_t *cfb_dip = 0;
550 static dev_info_t *cfb_dip_detaching = 0;
551 uint_t cfb_inuse = 0;
552 static ddi_softintr_t pm_soft_id;
553 static boolean_t pm_soft_pending;
554 int	pm_scans_disabled = 0;
555 
556 /*
557  * A structure to record the fact that one thread has borrowed a lock held
558  * by another thread.  The context requires that the lender block on the
559  * completion of the borrower.
560  */
561 typedef struct lock_loan {
562 	struct lock_loan	*pmlk_next;
563 	kthread_t		*pmlk_borrower;
564 	kthread_t		*pmlk_lender;
565 	dev_info_t		*pmlk_dip;
566 } lock_loan_t;
567 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
568 
569 #ifdef	DEBUG
570 #ifdef	PMDDEBUG
571 #define	PMD_FUNC(func, name)	char *(func) = (name);
572 #else	/* !PMDDEBUG */
573 #define	PMD_FUNC(func, name)
574 #endif	/* PMDDEBUG */
575 #else	/* !DEBUG */
576 #define	PMD_FUNC(func, name)
577 #endif	/* DEBUG */
578 
579 
580 /*
581  * Must be called before first device (including pseudo) attach
582  */
583 void
pm_init_locks(void)584 pm_init_locks(void)
585 {
586 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
587 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
588 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
589 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
590 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
591 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
592 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
593 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
594 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
595 }
596 
597 static int pm_reset_timestamps(dev_info_t *, void *);
598 
599 static boolean_t
pm_cpr_callb(void * arg,int code)600 pm_cpr_callb(void *arg, int code)
601 {
602 	_NOTE(ARGUNUSED(arg))
603 	static int auto_save;
604 	static pm_cpupm_t cpupm_save;
605 
606 	switch (code) {
607 	case CB_CODE_CPR_CHKPT:
608 		/*
609 		 * Cancel scan or wait for scan in progress to finish
610 		 * Other threads may be trying to restart the scan, so we
611 		 * have to keep at it unil it sticks
612 		 */
613 		mutex_enter(&pm_scan_lock);
614 		ASSERT(!pm_scans_disabled);
615 		pm_scans_disabled = 1;
616 		auto_save = autopm_enabled;
617 		autopm_enabled = 0;
618 		cpupm_save = cpupm;
619 		cpupm = PM_CPUPM_NOTSET;
620 		mutex_exit(&pm_scan_lock);
621 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
622 		break;
623 
624 	case CB_CODE_CPR_RESUME:
625 		ASSERT(!autopm_enabled);
626 		ASSERT(cpupm == PM_CPUPM_NOTSET);
627 		ASSERT(pm_scans_disabled);
628 		pm_scans_disabled = 0;
629 		/*
630 		 * Call pm_reset_timestamps to reset timestamps of each
631 		 * device to the time when the system is resumed so that their
632 		 * idleness can be re-calculated. That's to avoid devices from
633 		 * being powered down right after resume if the system was in
634 		 * suspended mode long enough.
635 		 */
636 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
637 
638 		autopm_enabled = auto_save;
639 		cpupm = cpupm_save;
640 		/*
641 		 * If there is any auto-pm device, get the scanning
642 		 * going. Otherwise don't bother.
643 		 */
644 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
645 		break;
646 	}
647 	return (B_TRUE);
648 }
649 
650 /*
651  * This callback routine is called when there is a system panic.  This function
652  * exists for prototype matching.
653  */
654 static boolean_t
pm_panic_callb(void * arg,int code)655 pm_panic_callb(void *arg, int code)
656 {
657 	_NOTE(ARGUNUSED(arg, code))
658 	void pm_cfb_check_and_powerup(void);
659 	PMD(PMD_CFB, ("pm_panic_callb\n"))
660 	pm_cfb_check_and_powerup();
661 	return (B_TRUE);
662 }
663 
664 static boolean_t
pm_halt_callb(void * arg,int code)665 pm_halt_callb(void *arg, int code)
666 {
667 	_NOTE(ARGUNUSED(arg, code))
668 	return (B_TRUE);
669 }
670 
671 static void pm_dep_thread(void);
672 
673 /*
674  * This needs to be called after the root and platform drivers are loaded
675  * and be single-threaded with respect to driver attach/detach
676  */
677 void
pm_init(void)678 pm_init(void)
679 {
680 	PMD_FUNC(pmf, "pm_init")
681 	char **mod;
682 	extern pri_t minclsyspri;
683 
684 	pm_comps_notlowest = 0;
685 	pm_system_idle_threshold = pm_default_idle_threshold;
686 	pm_cpu_idle_threshold = 0;
687 
688 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
689 	    CB_CL_CPR_PM, "pm_cpr");
690 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
691 	    CB_CL_PANIC, "pm_panic");
692 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
693 	    CB_CL_HALT, "pm_halt");
694 
695 	/*
696 	 * Create a thread to do dependency processing.
697 	 */
698 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
699 	    TS_RUN, minclsyspri);
700 
701 	/*
702 	 * loadrootmodules already loaded these ppm drivers, now get them
703 	 * attached so they can claim the root drivers as they attach
704 	 */
705 	for (mod = platform_module_list; *mod; mod++) {
706 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
707 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
708 			    *mod);
709 		} else {
710 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
711 			    ddi_major_to_name(ddi_name_to_major(*mod))))
712 		}
713 	}
714 }
715 
716 /*
717  * pm_scan_init - create pm scan data structure.  Called (if autopm or cpupm
718  * enabled) when device becomes power managed or after a failed detach and
719  * when autopm is started via PM_START_PM or PM_START_CPUPM ioctls, and after
720  * a CPR resume to get all the devices scanning again.
721  */
722 void
pm_scan_init(dev_info_t * dip)723 pm_scan_init(dev_info_t *dip)
724 {
725 	PMD_FUNC(pmf, "scan_init")
726 	pm_scan_t	*scanp;
727 
728 	ASSERT(!PM_ISBC(dip));
729 
730 	PM_LOCK_DIP(dip);
731 	scanp = PM_GET_PM_SCAN(dip);
732 	if (!scanp) {
733 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
734 		    pmf, PM_DEVICE(dip)))
735 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
736 		DEVI(dip)->devi_pm_scan = scanp;
737 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
738 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
739 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
740 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
741 	}
742 	PM_UNLOCK_DIP(dip);
743 }
744 
745 /*
746  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
747  */
748 void
pm_scan_fini(dev_info_t * dip)749 pm_scan_fini(dev_info_t *dip)
750 {
751 	PMD_FUNC(pmf, "scan_fini")
752 	pm_scan_t	*scanp;
753 
754 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
755 	ASSERT(!PM_ISBC(dip));
756 	PM_LOCK_DIP(dip);
757 	scanp = PM_GET_PM_SCAN(dip);
758 	if (!scanp) {
759 		PM_UNLOCK_DIP(dip);
760 		return;
761 	}
762 
763 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
764 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
765 
766 	kmem_free(scanp, sizeof (pm_scan_t));
767 	DEVI(dip)->devi_pm_scan = NULL;
768 	PM_UNLOCK_DIP(dip);
769 }
770 
771 /*
772  * Given a pointer to a component struct, return the current power level
773  * (struct contains index unless it is a continuous level).
774  * Located here in hopes of getting both this and dev_is_needed into the
775  * cache together
776  */
777 static int
cur_power(pm_component_t * cp)778 cur_power(pm_component_t *cp)
779 {
780 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
781 		return (cp->pmc_cur_pwr);
782 
783 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
784 }
785 
786 static char *
pm_decode_direction(int direction)787 pm_decode_direction(int direction)
788 {
789 	switch (direction) {
790 	case PM_LEVEL_UPONLY:
791 		return ("up");
792 
793 	case PM_LEVEL_EXACT:
794 		return ("exact");
795 
796 	case PM_LEVEL_DOWNONLY:
797 		return ("down");
798 
799 	default:
800 		return ("INVALID DIRECTION");
801 	}
802 }
803 
804 char *
pm_decode_op(pm_bus_power_op_t op)805 pm_decode_op(pm_bus_power_op_t op)
806 {
807 	switch (op) {
808 	case BUS_POWER_CHILD_PWRCHG:
809 		return ("CHILD_PWRCHG");
810 	case BUS_POWER_NEXUS_PWRUP:
811 		return ("NEXUS_PWRUP");
812 	case BUS_POWER_PRE_NOTIFICATION:
813 		return ("PRE_NOTIFICATION");
814 	case BUS_POWER_POST_NOTIFICATION:
815 		return ("POST_NOTIFICATION");
816 	case BUS_POWER_HAS_CHANGED:
817 		return ("HAS_CHANGED");
818 	case BUS_POWER_NOINVOL:
819 		return ("NOINVOL");
820 	default:
821 		return ("UNKNOWN OP");
822 	}
823 }
824 
825 /*
826  * Returns true if level is a possible (valid) power level for component
827  */
828 int
e_pm_valid_power(dev_info_t * dip,int cmpt,int level)829 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
830 {
831 	PMD_FUNC(pmf, "e_pm_valid_power")
832 	pm_component_t *cp = PM_CP(dip, cmpt);
833 	int i;
834 	int *ip = cp->pmc_comp.pmc_lvals;
835 	int limit = cp->pmc_comp.pmc_numlevels;
836 
837 	if (level < 0)
838 		return (0);
839 	for (i = 0; i < limit; i++) {
840 		if (level == *ip++)
841 			return (1);
842 	}
843 #ifdef DEBUG
844 	if (pm_debug & PMD_FAIL) {
845 		ip = cp->pmc_comp.pmc_lvals;
846 
847 		for (i = 0; i < limit; i++)
848 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
849 			    pmf, i, *ip++))
850 	}
851 #endif
852 	return (0);
853 }
854 
855 static int pm_start(dev_info_t *dip);
856 /*
857  * Returns true if device is pm'd (after calling pm_start if need be)
858  */
859 int
e_pm_valid_info(dev_info_t * dip,pm_info_t ** infop)860 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
861 {
862 	pm_info_t *info;
863 
864 	/*
865 	 * Check if the device is power managed if not.
866 	 * To make the common case (device is power managed already)
867 	 * fast, we check without the lock.  If device is not already
868 	 * power managed, then we take the lock and the long route through
869 	 * go get it managed.  Devices never go unmanaged until they
870 	 * detach.
871 	 */
872 	info = PM_GET_PM_INFO(dip);
873 	if (!info) {
874 		if (!DEVI_IS_ATTACHING(dip)) {
875 			return (0);
876 		}
877 		if (pm_start(dip) != DDI_SUCCESS) {
878 			return (0);
879 		}
880 		info = PM_GET_PM_INFO(dip);
881 	}
882 	ASSERT(info);
883 	if (infop != NULL)
884 		*infop = info;
885 	return (1);
886 }
887 
888 int
e_pm_valid_comp(dev_info_t * dip,int cmpt,pm_component_t ** cpp)889 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
890 {
891 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
892 		if (cpp != NULL)
893 			*cpp = PM_CP(dip, cmpt);
894 		return (1);
895 	} else {
896 		return (0);
897 	}
898 }
899 
900 /*
901  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
902  */
903 static int
dev_is_needed(dev_info_t * dip,int cmpt,int level,int direction)904 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
905 {
906 	PMD_FUNC(pmf, "din")
907 	pm_component_t *cp;
908 	char *pathbuf;
909 	int result;
910 
911 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
912 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
913 	    !e_pm_valid_power(dip, cmpt, level))
914 		return (DDI_FAILURE);
915 
916 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
917 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
918 	    level, cur_power(cp)))
919 
920 	if (pm_set_power(dip, cmpt, level,  direction,
921 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
922 		if (direction == PM_LEVEL_UPONLY) {
923 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
924 			(void) ddi_pathname(dip, pathbuf);
925 			cmn_err(CE_WARN, "Device %s failed to power up.",
926 			    pathbuf);
927 			kmem_free(pathbuf, MAXPATHLEN);
928 		}
929 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
930 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
931 		    pm_decode_direction(direction), level, result))
932 		return (DDI_FAILURE);
933 	}
934 
935 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
936 	    PM_DEVICE(dip)))
937 	pm_rescan(dip);
938 	return (DDI_SUCCESS);
939 }
940 
941 /*
942  * We can get multiple pm_rescan() threads, if one of them discovers
943  * that no scan is running at the moment, it kicks it into action.
944  * Otherwise, it tells the current scanning thread to scan again when
945  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
946  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
947  * thread at a time runs the pm_scan_dev() code.
948  */
949 void
pm_rescan(void * arg)950 pm_rescan(void *arg)
951 {
952 	PMD_FUNC(pmf, "rescan")
953 	dev_info_t	*dip = (dev_info_t *)arg;
954 	pm_info_t	*info;
955 	pm_scan_t	*scanp;
956 	timeout_id_t	scanid;
957 
958 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
959 	PM_LOCK_DIP(dip);
960 	info = PM_GET_PM_INFO(dip);
961 	scanp = PM_GET_PM_SCAN(dip);
962 	if (pm_scans_disabled || !PM_SCANABLE(dip) || !info || !scanp ||
963 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
964 		PM_UNLOCK_DIP(dip);
965 		return;
966 	}
967 	if (scanp->ps_scan_flags & PM_SCANNING) {
968 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
969 		PM_UNLOCK_DIP(dip);
970 		return;
971 	} else if (scanp->ps_scan_id) {
972 		scanid = scanp->ps_scan_id;
973 		scanp->ps_scan_id = 0;
974 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
975 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
976 		PM_UNLOCK_DIP(dip);
977 		(void) untimeout(scanid);
978 		PM_LOCK_DIP(dip);
979 	}
980 
981 	/*
982 	 * Dispatching pm_scan during attach time is risky due to the fact that
983 	 * attach might soon fail and dip dissolved, and panic may happen while
984 	 * attempting to stop scan. So schedule a pm_rescan instead.
985 	 * (Note that if either of the first two terms are true, taskq_dispatch
986 	 * will not be invoked).
987 	 *
988 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
989 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
990 	 * to regulate the dispatching.
991 	 *
992 	 * Scan is stopped before the device is detached (in pm_detaching())
993 	 * but it may get re-started during the post_detach processing if the
994 	 * driver fails to detach.
995 	 */
996 	if (DEVI_IS_ATTACHING(dip) ||
997 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
998 	    taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP) ==
999 	    TASKQID_INVALID) {
1000 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
1001 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
1002 		if (scanp->ps_scan_id) {
1003 			scanid = scanp->ps_scan_id;
1004 			scanp->ps_scan_id = 0;
1005 			PM_UNLOCK_DIP(dip);
1006 			(void) untimeout(scanid);
1007 			PM_LOCK_DIP(dip);
1008 			if (scanp->ps_scan_id) {
1009 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
1010 				    "thread scheduled pm_rescan, scanid %lx\n",
1011 				    pmf, PM_DEVICE(dip),
1012 				    (ulong_t)scanp->ps_scan_id))
1013 				PM_UNLOCK_DIP(dip);
1014 				return;
1015 			}
1016 		}
1017 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1018 		    (scanp->ps_idle_down ? pm_id_ticks :
1019 		    (PM_MIN_SCAN(dip) * hz)));
1020 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
1021 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
1022 		    (ulong_t)scanp->ps_scan_id))
1023 	} else {
1024 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
1025 		    pmf, PM_DEVICE(dip)))
1026 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
1027 	}
1028 	PM_UNLOCK_DIP(dip);
1029 }
1030 
1031 void
pm_scan(void * arg)1032 pm_scan(void *arg)
1033 {
1034 	PMD_FUNC(pmf, "scan")
1035 	dev_info_t	*dip = (dev_info_t *)arg;
1036 	pm_scan_t	*scanp;
1037 	time_t		nextscan;
1038 
1039 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
1040 
1041 	PM_LOCK_DIP(dip);
1042 	scanp = PM_GET_PM_SCAN(dip);
1043 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1044 
1045 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1046 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
1047 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
1048 		PM_UNLOCK_DIP(dip);
1049 		return;
1050 	}
1051 
1052 	if (scanp->ps_idle_down) {
1053 		/*
1054 		 * make sure we remember idledown was in affect until
1055 		 * we've completed the scan
1056 		 */
1057 		PMID_SET_SCANS(scanp->ps_idle_down)
1058 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
1059 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1060 	}
1061 
1062 	/* possible having two threads running pm_scan() */
1063 	if (scanp->ps_scan_flags & PM_SCANNING) {
1064 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
1065 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
1066 		    pmf, PM_DEVICE(dip)))
1067 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1068 		PM_UNLOCK_DIP(dip);
1069 		return;
1070 	}
1071 
1072 	scanp->ps_scan_flags |= PM_SCANNING;
1073 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1074 	do {
1075 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
1076 		PM_UNLOCK_DIP(dip);
1077 		nextscan = pm_scan_dev(dip);
1078 		PM_LOCK_DIP(dip);
1079 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
1080 
1081 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
1082 	scanp->ps_scan_flags &= ~PM_SCANNING;
1083 
1084 	if (scanp->ps_idle_down) {
1085 		scanp->ps_idle_down &= ~PMID_SCANS;
1086 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1087 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1088 	}
1089 
1090 	/* schedule for next idle check */
1091 	if (nextscan != LONG_MAX) {
1092 		if (nextscan > (LONG_MAX / hz))
1093 			nextscan = (LONG_MAX - 1) / hz;
1094 		if (scanp->ps_scan_id) {
1095 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1096 			    "another rescan scheduled scanid(%lx)\n", pmf,
1097 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1098 			PM_UNLOCK_DIP(dip);
1099 			return;
1100 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1101 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1102 			    (clock_t)(nextscan * hz));
1103 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1104 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1105 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1106 		}
1107 	}
1108 	PM_UNLOCK_DIP(dip);
1109 }
1110 
1111 void
pm_get_timestamps(dev_info_t * dip,time_t * valuep)1112 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1113 {
1114 	int components = PM_NUMCMPTS(dip);
1115 	int i;
1116 
1117 	ASSERT(components > 0);
1118 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1119 	for (i = 0; i < components; i++) {
1120 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1121 	}
1122 	PM_UNLOCK_BUSY(dip);
1123 }
1124 
1125 /*
1126  * Returns true if device needs to be kept up because it exported the
1127  * "no-involuntary-power-cycles" property or we're pretending it did (console
1128  * fb case) or it is an ancestor of such a device and has used up the "one
1129  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1130  * upon detach
1131  */
1132 int
pm_noinvol(dev_info_t * dip)1133 pm_noinvol(dev_info_t *dip)
1134 {
1135 	PMD_FUNC(pmf, "noinvol")
1136 
1137 	/*
1138 	 * This doesn't change over the life of a driver, so no locking needed
1139 	 */
1140 	if (PM_IS_CFB(dip)) {
1141 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1142 		    pmf, PM_DEVICE(dip)))
1143 		return (1);
1144 	}
1145 	/*
1146 	 * Not an issue if no such kids
1147 	 */
1148 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1149 #ifdef DEBUG
1150 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1151 			dev_info_t *pdip = dip;
1152 			do {
1153 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1154 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1155 				    DEVI(pdip)->devi_pm_noinvolpm,
1156 				    DEVI(pdip)->devi_pm_volpmd))
1157 				pdip = ddi_get_parent(pdip);
1158 			} while (pdip);
1159 		}
1160 #endif
1161 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1162 		return (0);
1163 	}
1164 
1165 	/*
1166 	 * Since we now maintain the counts correct at every node, we no longer
1167 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1168 	 * without the children getting their counts adjusted.
1169 	 */
1170 
1171 #ifdef	DEBUG
1172 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1173 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1174 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1175 		    PM_DEVICE(dip)))
1176 #endif
1177 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1178 }
1179 
1180 static int	cur_threshold(dev_info_t *, int);
1181 static int	pm_next_lower_power(pm_component_t *, int);
1182 
1183 /*
1184  * This function performs the actual scanning of the device.
1185  * It attempts to power off the indicated device's components if they have
1186  * been idle and other restrictions are met.
1187  * pm_scan_dev calculates and returns when the next scan should happen for
1188  * this device.
1189  */
1190 time_t
pm_scan_dev(dev_info_t * dip)1191 pm_scan_dev(dev_info_t *dip)
1192 {
1193 	PMD_FUNC(pmf, "scan_dev")
1194 	pm_scan_t	*scanp;
1195 	time_t		*timestamp, idletime, now, thresh;
1196 	time_t		timeleft = 0;
1197 #ifdef PMDDEBUG
1198 	int		curpwr;
1199 #endif
1200 	int		i, nxtpwr, pwrndx, unused;
1201 	size_t		size;
1202 	pm_component_t	 *cp;
1203 	dev_info_t	*pdip = ddi_get_parent(dip);
1204 	int		circ;
1205 	clock_t		min_scan = pm_default_min_scan;
1206 
1207 	/*
1208 	 * skip attaching device
1209 	 */
1210 	if (DEVI_IS_ATTACHING(dip)) {
1211 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1212 		    pmf, PM_DEVICE(dip), min_scan))
1213 		return (min_scan);
1214 	}
1215 
1216 	PM_LOCK_DIP(dip);
1217 	scanp = PM_GET_PM_SCAN(dip);
1218 	min_scan = PM_MIN_SCAN(dip);
1219 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1220 
1221 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1222 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1223 	    PM_KUC(dip)))
1224 
1225 	/* no scan under the following conditions */
1226 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1227 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1228 	    (PM_KUC(dip) != 0) ||
1229 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1230 		PM_UNLOCK_DIP(dip);
1231 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1232 		    "scan_disabled(%d), apm_enabled(%d), cpupm(%d), "
1233 		    "kuc(%d), %s directpm, %s pm_noinvol\n",
1234 		    pmf, PM_DEVICE(dip), pm_scans_disabled, autopm_enabled,
1235 		    cpupm, PM_KUC(dip),
1236 		    PM_ISDIRECT(dip) ? "is" : "is not",
1237 		    pm_noinvol(dip) ? "is" : "is not"))
1238 		return (LONG_MAX);
1239 	}
1240 	PM_UNLOCK_DIP(dip);
1241 
1242 	if (!ndi_devi_tryenter(pdip, &circ)) {
1243 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1244 		    pmf, PM_DEVICE(pdip)))
1245 		return ((time_t)1);
1246 	}
1247 	now = gethrestime_sec();
1248 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1249 	timestamp = kmem_alloc(size, KM_SLEEP);
1250 	pm_get_timestamps(dip, timestamp);
1251 
1252 	/*
1253 	 * Since we removed support for backwards compatible devices,
1254 	 * (see big comment at top of file)
1255 	 * it is no longer required to deal with component 0 last.
1256 	 */
1257 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1258 		/*
1259 		 * If already off (an optimization, perhaps)
1260 		 */
1261 		cp = PM_CP(dip, i);
1262 		pwrndx = cp->pmc_cur_pwr;
1263 #ifdef PMDDEBUG
1264 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1265 		    PM_LEVEL_UNKNOWN :
1266 		    cp->pmc_comp.pmc_lvals[pwrndx];
1267 #endif
1268 
1269 		if (pwrndx == 0) {
1270 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1271 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1272 			/* skip device if off or at its lowest */
1273 			continue;
1274 		}
1275 
1276 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1277 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1278 			/* were busy or newly became busy by another thread */
1279 			if (timeleft == 0)
1280 				timeleft = max(thresh, min_scan);
1281 			else
1282 				timeleft = min(
1283 				    timeleft, max(thresh, min_scan));
1284 			continue;
1285 		}
1286 
1287 		idletime = now - timestamp[i];		/* idle time */
1288 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1289 		    pmf, PM_DEVICE(dip), i, idletime))
1290 		if (idletime >= thresh || PM_IS_PID(dip)) {
1291 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1292 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1293 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1294 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1295 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1296 			    PM_CURPOWER(dip, i) != nxtpwr) {
1297 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1298 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1299 				    i, curpwr, nxtpwr))
1300 				timeleft = min_scan;
1301 				continue;
1302 			} else {
1303 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1304 				    "%d->%d, GOOD curpwr %d\n", pmf,
1305 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1306 				    cur_power(cp)))
1307 
1308 				if (nxtpwr == 0)	/* component went off */
1309 					continue;
1310 
1311 				/*
1312 				 * scan to next lower level
1313 				 */
1314 				if (timeleft == 0)
1315 					timeleft = max(
1316 					    1, cur_threshold(dip, i));
1317 				else
1318 					timeleft = min(timeleft,
1319 					    max(1, cur_threshold(dip, i)));
1320 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1321 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1322 				    i, timeleft))
1323 			}
1324 		} else {	/* comp not idle long enough */
1325 			if (timeleft == 0)
1326 				timeleft = thresh - idletime;
1327 			else
1328 				timeleft = min(timeleft, (thresh - idletime));
1329 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1330 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1331 		}
1332 	}
1333 	ndi_devi_exit(pdip, circ);
1334 	kmem_free(timestamp, size);
1335 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1336 	    PM_DEVICE(dip), timeleft))
1337 
1338 	/*
1339 	 * if components are already at lowest level, timeleft is left 0
1340 	 */
1341 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1342 }
1343 
1344 /*
1345  * pm_scan_stop - cancel scheduled pm_rescan,
1346  *                wait for termination of dispatched pm_scan thread
1347  *                     and active pm_scan_dev thread.
1348  */
1349 void
pm_scan_stop(dev_info_t * dip)1350 pm_scan_stop(dev_info_t *dip)
1351 {
1352 	PMD_FUNC(pmf, "scan_stop")
1353 	pm_scan_t	*scanp;
1354 	timeout_id_t	scanid;
1355 
1356 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1357 	PM_LOCK_DIP(dip);
1358 	scanp = PM_GET_PM_SCAN(dip);
1359 	if (!scanp) {
1360 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1361 		    pmf, PM_DEVICE(dip)))
1362 		PM_UNLOCK_DIP(dip);
1363 		return;
1364 	}
1365 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1366 
1367 	/* cancel scheduled scan taskq */
1368 	while (scanp->ps_scan_id) {
1369 		scanid = scanp->ps_scan_id;
1370 		scanp->ps_scan_id = 0;
1371 		PM_UNLOCK_DIP(dip);
1372 		(void) untimeout(scanid);
1373 		PM_LOCK_DIP(dip);
1374 	}
1375 
1376 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1377 		PM_UNLOCK_DIP(dip);
1378 		delay(1);
1379 		PM_LOCK_DIP(dip);
1380 	}
1381 	PM_UNLOCK_DIP(dip);
1382 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1383 }
1384 
1385 int
pm_scan_stop_walk(dev_info_t * dip,void * arg)1386 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1387 {
1388 	_NOTE(ARGUNUSED(arg))
1389 
1390 	if (!PM_GET_PM_SCAN(dip))
1391 		return (DDI_WALK_CONTINUE);
1392 	ASSERT(!PM_ISBC(dip));
1393 	pm_scan_stop(dip);
1394 	return (DDI_WALK_CONTINUE);
1395 }
1396 
1397 /*
1398  * Converts a power level value to its index
1399  */
1400 static int
power_val_to_index(pm_component_t * cp,int val)1401 power_val_to_index(pm_component_t *cp, int val)
1402 {
1403 	int limit, i, *ip;
1404 
1405 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1406 	    val != PM_LEVEL_EXACT);
1407 	/*  convert power value into index (i) */
1408 	limit = cp->pmc_comp.pmc_numlevels;
1409 	ip = cp->pmc_comp.pmc_lvals;
1410 	for (i = 0; i < limit; i++)
1411 		if (val == *ip++)
1412 			return (i);
1413 	return (-1);
1414 }
1415 
1416 /*
1417  * Converts a numeric power level to a printable string
1418  */
1419 static char *
power_val_to_string(pm_component_t * cp,int val)1420 power_val_to_string(pm_component_t *cp, int val)
1421 {
1422 	int index;
1423 
1424 	if (val == PM_LEVEL_UPONLY)
1425 		return ("<UPONLY>");
1426 
1427 	if (val == PM_LEVEL_UNKNOWN ||
1428 	    (index = power_val_to_index(cp, val)) == -1)
1429 		return ("<LEVEL_UNKNOWN>");
1430 
1431 	return (cp->pmc_comp.pmc_lnames[index]);
1432 }
1433 
1434 /*
1435  * Return true if this node has been claimed by a ppm.
1436  */
1437 static int
pm_ppm_claimed(dev_info_t * dip)1438 pm_ppm_claimed(dev_info_t *dip)
1439 {
1440 	return (PPM(dip) != NULL);
1441 }
1442 
1443 /*
1444  * A node which was voluntarily power managed has just used up its "free cycle"
1445  * and need is volpmd field cleared, and the same done to all its descendents
1446  */
1447 static void
pm_clear_volpm_dip(dev_info_t * dip)1448 pm_clear_volpm_dip(dev_info_t *dip)
1449 {
1450 	PMD_FUNC(pmf, "clear_volpm_dip")
1451 
1452 	if (dip == NULL)
1453 		return;
1454 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1455 	    PM_DEVICE(dip)))
1456 	DEVI(dip)->devi_pm_volpmd = 0;
1457 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1458 		pm_clear_volpm_dip(dip);
1459 	}
1460 }
1461 
1462 /*
1463  * A node which was voluntarily power managed has used up the "free cycles"
1464  * for the subtree that it is the root of.  Scan through the list of detached
1465  * nodes and adjust the counts of any that are descendents of the node.
1466  */
1467 static void
pm_clear_volpm_list(dev_info_t * dip)1468 pm_clear_volpm_list(dev_info_t *dip)
1469 {
1470 	PMD_FUNC(pmf, "clear_volpm_list")
1471 	char	*pathbuf;
1472 	size_t	len;
1473 	pm_noinvol_t *ip;
1474 
1475 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1476 	(void) ddi_pathname(dip, pathbuf);
1477 	len = strlen(pathbuf);
1478 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1479 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1480 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1481 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1482 		    ip->ni_path))
1483 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1484 		    ip->ni_path[len] == '/') {
1485 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1486 			    ip->ni_path))
1487 			ip->ni_volpmd = 0;
1488 			ip->ni_wasvolpmd = 0;
1489 		}
1490 	}
1491 	kmem_free(pathbuf, MAXPATHLEN);
1492 	rw_exit(&pm_noinvol_rwlock);
1493 }
1494 
1495 /*
1496  * Powers a device, suspending or resuming the driver if it is a backward
1497  * compatible device, calling into ppm to change power level.
1498  * Called with the component's power lock held.
1499  */
1500 static int
power_dev(dev_info_t * dip,int comp,int level,int old_level,pm_canblock_t canblock,pm_ppm_devlist_t ** devlist)1501 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1502     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1503 {
1504 	PMD_FUNC(pmf, "power_dev")
1505 	power_req_t power_req;
1506 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1507 	int		resume_needed = 0;
1508 	int		suspended = 0;
1509 	int		result;
1510 #ifdef PMDDEBUG
1511 	struct pm_component *cp = PM_CP(dip, comp);
1512 #endif
1513 	int		bc = PM_ISBC(dip);
1514 	int pm_all_components_off(dev_info_t *);
1515 	int		clearvolpmd = 0;
1516 	char		pathbuf[MAXNAMELEN];
1517 #ifdef PMDDEBUG
1518 	char *ppmname, *ppmaddr;
1519 #endif
1520 	/*
1521 	 * If this is comp 0 of a backwards compat device and we are
1522 	 * going to take the power away, we need to detach it with
1523 	 * DDI_PM_SUSPEND command.
1524 	 */
1525 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1526 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1527 			/* We could not suspend before turning cmpt zero off */
1528 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1529 			    pmf, PM_DEVICE(dip)))
1530 			return (DDI_FAILURE);
1531 		} else {
1532 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1533 			suspended++;
1534 		}
1535 	}
1536 	power_req.request_type = PMR_PPM_SET_POWER;
1537 	power_req.req.ppm_set_power_req.who = dip;
1538 	power_req.req.ppm_set_power_req.cmpt = comp;
1539 	power_req.req.ppm_set_power_req.old_level = old_level;
1540 	power_req.req.ppm_set_power_req.new_level = level;
1541 	power_req.req.ppm_set_power_req.canblock = canblock;
1542 	power_req.req.ppm_set_power_req.cookie = NULL;
1543 #ifdef PMDDEBUG
1544 	if (pm_ppm_claimed(dip)) {
1545 		ppmname = PM_NAME(PPM(dip));
1546 		ppmaddr = PM_ADDR(PPM(dip));
1547 
1548 	} else {
1549 		ppmname = "noppm";
1550 		ppmaddr = "0";
1551 	}
1552 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1553 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1554 	    power_val_to_string(cp, old_level), old_level,
1555 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1556 #endif
1557 	/*
1558 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1559 	 * bc device comp 0 is powering on, then we count it as a power cycle
1560 	 * against its voluntary count.
1561 	 */
1562 	if (DEVI(dip)->devi_pm_volpmd &&
1563 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1564 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1565 		clearvolpmd = 1;
1566 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1567 	    &power_req, &result)) == DDI_SUCCESS) {
1568 		/*
1569 		 * Now do involuntary pm accounting;  If we've just cycled power
1570 		 * on a voluntarily pm'd node, and by inference on its entire
1571 		 * subtree, we need to set the subtree (including those nodes
1572 		 * already detached) volpmd counts to 0, and subtract out the
1573 		 * value of the current node's volpmd count from the ancestors
1574 		 */
1575 		if (clearvolpmd) {
1576 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1577 			pm_clear_volpm_dip(dip);
1578 			pm_clear_volpm_list(dip);
1579 			if (volpmd) {
1580 				(void) ddi_pathname(dip, pathbuf);
1581 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1582 				    volpmd, 0, pathbuf, dip);
1583 			}
1584 		}
1585 	} else {
1586 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1587 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1588 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1589 	}
1590 	/*
1591 	 * If some other devices were also powered up (e.g. other cpus in
1592 	 * the same domain) return a pointer to that list
1593 	 */
1594 	if (devlist) {
1595 		*devlist = (pm_ppm_devlist_t *)
1596 		    power_req.req.ppm_set_power_req.cookie;
1597 	}
1598 	/*
1599 	 * We will have to resume the device if the device is backwards compat
1600 	 * device and either of the following is true:
1601 	 * -This is comp 0 and we have successfully powered it up
1602 	 * -This is comp 0 and we have failed to power it down. Resume is
1603 	 *  needed because we have suspended it above
1604 	 */
1605 
1606 	if (bc && comp == 0) {
1607 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1608 		if (power_op_ret == DDI_SUCCESS) {
1609 			if (POWERING_ON(old_level, level)) {
1610 				/*
1611 				 * It must be either suspended or resumed
1612 				 * via pm_power_has_changed path
1613 				 */
1614 				ASSERT((DEVI(dip)->devi_pm_flags &
1615 				    PMC_SUSPENDED) ||
1616 				    (PM_CP(dip, comp)->pmc_flags &
1617 				    PM_PHC_WHILE_SET_POWER));
1618 
1619 					resume_needed = suspended;
1620 			}
1621 		} else {
1622 			if (POWERING_OFF(old_level, level)) {
1623 				/*
1624 				 * It must be either suspended or resumed
1625 				 * via pm_power_has_changed path
1626 				 */
1627 				ASSERT((DEVI(dip)->devi_pm_flags &
1628 				    PMC_SUSPENDED) ||
1629 				    (PM_CP(dip, comp)->pmc_flags &
1630 				    PM_PHC_WHILE_SET_POWER));
1631 
1632 					resume_needed = suspended;
1633 			}
1634 		}
1635 	}
1636 	if (resume_needed) {
1637 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1638 		/* ppm is not interested in DDI_PM_RESUME */
1639 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1640 		    DDI_SUCCESS) {
1641 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1642 		} else
1643 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1644 			    PM_DEVICE(dip));
1645 	}
1646 	return (power_op_ret);
1647 }
1648 
1649 /*
1650  * Return true if we are the owner or a borrower of the devi lock.  See
1651  * pm_lock_power_single() about borrowing the lock.
1652  */
1653 static int
pm_devi_lock_held(dev_info_t * dip)1654 pm_devi_lock_held(dev_info_t *dip)
1655 {
1656 	lock_loan_t *cur;
1657 
1658 	if (DEVI_BUSY_OWNED(dip))
1659 		return (1);
1660 
1661 	/* return false if no locks borrowed */
1662 	if (lock_loan_head.pmlk_next == NULL)
1663 		return (0);
1664 
1665 	mutex_enter(&pm_loan_lock);
1666 	/* see if our thread is registered as a lock borrower. */
1667 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1668 		if (cur->pmlk_borrower == curthread)
1669 			break;
1670 	mutex_exit(&pm_loan_lock);
1671 
1672 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1673 }
1674 
1675 /*
1676  * pm_set_power: adjusts power level of device.	 Assumes device is power
1677  * manageable & component exists.
1678  *
1679  * Cases which require us to bring up devices we keep up ("wekeepups") for
1680  * backwards compatible devices:
1681  *	component 0 is off and we're bringing it up from 0
1682  *		bring up wekeepup first
1683  *	and recursively when component 0 is off and we bring some other
1684  *	component up from 0
1685  * For devices which are not backward compatible, our dependency notion is much
1686  * simpler.  Unless all components are off, then wekeeps must be on.
1687  * We don't treat component 0 differently.
1688  * Canblock tells how to deal with a direct pm'd device.
1689  * Scan arg tells us if we were called from scan, in which case we don't need
1690  * to go back to the root node and walk down to change power.
1691  */
1692 int
pm_set_power(dev_info_t * dip,int comp,int level,int direction,pm_canblock_t canblock,int scan,int * retp)1693 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1694     pm_canblock_t canblock, int scan, int *retp)
1695 {
1696 	PMD_FUNC(pmf, "set_power")
1697 	char		*pathbuf;
1698 	pm_bp_child_pwrchg_t bpc;
1699 	pm_sp_misc_t	pspm;
1700 	int		ret = DDI_SUCCESS;
1701 	int		unused = DDI_SUCCESS;
1702 	dev_info_t	*pdip = ddi_get_parent(dip);
1703 
1704 #ifdef DEBUG
1705 	int		diverted = 0;
1706 
1707 	/*
1708 	 * This prevents operations on the console from calling prom_printf and
1709 	 * either deadlocking or bringing up the console because of debug
1710 	 * output
1711 	 */
1712 	if (dip == cfb_dip) {
1713 		diverted++;
1714 		mutex_enter(&pm_debug_lock);
1715 		pm_divertdebug++;
1716 		mutex_exit(&pm_debug_lock);
1717 	}
1718 #endif
1719 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1720 	    direction == PM_LEVEL_EXACT);
1721 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1722 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1723 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1724 	(void) ddi_pathname(dip, pathbuf);
1725 	bpc.bpc_dip = dip;
1726 	bpc.bpc_path = pathbuf;
1727 	bpc.bpc_comp = comp;
1728 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1729 	bpc.bpc_nlevel = level;
1730 	pspm.pspm_direction = direction;
1731 	pspm.pspm_errnop = retp;
1732 	pspm.pspm_canblock = canblock;
1733 	pspm.pspm_scan = scan;
1734 	bpc.bpc_private = &pspm;
1735 
1736 	/*
1737 	 * If a config operation is being done (we've locked the parent) or
1738 	 * we already hold the power lock (we've locked the node)
1739 	 * then we can operate directly on the node because we have already
1740 	 * brought up all the ancestors, otherwise, we have to go back to the
1741 	 * top of the tree.
1742 	 */
1743 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1744 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1745 		    (void *)&bpc, (void *)&unused);
1746 	else
1747 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1748 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1749 #ifdef DEBUG
1750 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1751 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1752 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1753 	}
1754 	if (diverted) {
1755 		mutex_enter(&pm_debug_lock);
1756 		pm_divertdebug--;
1757 		mutex_exit(&pm_debug_lock);
1758 	}
1759 #endif
1760 	kmem_free(pathbuf, MAXPATHLEN);
1761 	return (ret);
1762 }
1763 
1764 /*
1765  * If holddip is set, then if a dip is found we return with the node held.
1766  *
1767  * This code uses the same locking scheme as e_ddi_hold_devi_by_path
1768  * (resolve_pathname), but it does not drive attach.
1769  */
1770 dev_info_t *
pm_name_to_dip(char * pathname,int holddip)1771 pm_name_to_dip(char *pathname, int holddip)
1772 {
1773 	struct pathname pn;
1774 	char		*component;
1775 	dev_info_t	*parent, *child;
1776 	int		circ;
1777 
1778 	if ((pathname == NULL) || (*pathname != '/'))
1779 		return (NULL);
1780 
1781 	/* setup pathname and allocate component */
1782 	if (pn_get(pathname, UIO_SYSSPACE, &pn))
1783 		return (NULL);
1784 	component = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1785 
1786 	/* start at top, process '/' component */
1787 	parent = child = ddi_root_node();
1788 	ndi_hold_devi(parent);
1789 	pn_skipslash(&pn);
1790 	ASSERT(i_ddi_devi_attached(parent));
1791 
1792 	/* process components of pathname */
1793 	while (pn_pathleft(&pn)) {
1794 		(void) pn_getcomponent(&pn, component);
1795 
1796 		/* enter parent and search for component child */
1797 		ndi_devi_enter(parent, &circ);
1798 		child = ndi_devi_findchild(parent, component);
1799 		if ((child == NULL) || !i_ddi_devi_attached(child)) {
1800 			child = NULL;
1801 			ndi_devi_exit(parent, circ);
1802 			ndi_rele_devi(parent);
1803 			goto out;
1804 		}
1805 
1806 		/* attached child found, hold child and release parent */
1807 		ndi_hold_devi(child);
1808 		ndi_devi_exit(parent, circ);
1809 		ndi_rele_devi(parent);
1810 
1811 		/* child becomes parent, and process next component */
1812 		parent = child;
1813 		pn_skipslash(&pn);
1814 
1815 		/* loop with active ndi_devi_hold of child->parent */
1816 	}
1817 
1818 out:
1819 	pn_free(&pn);
1820 	kmem_free(component, MAXNAMELEN);
1821 
1822 	/* if we are not asked to return with hold, drop current hold */
1823 	if (child && !holddip)
1824 		ndi_rele_devi(child);
1825 	return (child);
1826 }
1827 
1828 /*
1829  * Search for a dependency and mark it unsatisfied
1830  */
1831 static void
pm_unsatisfy(char * keeper,char * kept)1832 pm_unsatisfy(char *keeper, char *kept)
1833 {
1834 	PMD_FUNC(pmf, "unsatisfy")
1835 	pm_pdr_t *dp;
1836 
1837 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1838 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1839 		if (!dp->pdr_isprop) {
1840 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1841 			    (dp->pdr_kept_count > 0) &&
1842 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1843 				if (dp->pdr_satisfied) {
1844 					dp->pdr_satisfied = 0;
1845 					pm_unresolved_deps++;
1846 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1847 					    "pm_unresolved_deps now %d\n", pmf,
1848 					    pm_unresolved_deps))
1849 				}
1850 			}
1851 		}
1852 	}
1853 }
1854 
1855 /*
1856  * Device dip is being un power managed, it keeps up count other devices.
1857  * We need to release any hold we have on the kept devices, and also
1858  * mark the dependency no longer satisfied.
1859  */
1860 static void
pm_unkeeps(int count,char * keeper,char ** keptpaths,int pwr)1861 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1862 {
1863 	PMD_FUNC(pmf, "unkeeps")
1864 	int i, j;
1865 	dev_info_t *kept;
1866 	dev_info_t *dip;
1867 	struct pm_component *cp;
1868 	int keeper_on = 0, circ;
1869 
1870 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1871 	    keeper, (void *)keptpaths))
1872 	/*
1873 	 * Try to grab keeper. Keeper may have gone away by now,
1874 	 * in this case, used the passed in value pwr
1875 	 */
1876 	dip = pm_name_to_dip(keeper, 1);
1877 	for (i = 0; i < count; i++) {
1878 		/* Release power hold */
1879 		kept = pm_name_to_dip(keptpaths[i], 1);
1880 		if (kept) {
1881 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1882 			    PM_DEVICE(kept), i))
1883 			/*
1884 			 * We need to check if we skipped a bringup here
1885 			 * because we could have failed the bringup
1886 			 * (ie DIRECT PM device) and have
1887 			 * not increment the count.
1888 			 */
1889 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1890 				keeper_on = 0;
1891 				PM_LOCK_POWER(dip, &circ);
1892 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1893 					cp = &DEVI(dip)->devi_pm_components[j];
1894 					if (cur_power(cp)) {
1895 						keeper_on++;
1896 						break;
1897 					}
1898 				}
1899 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1900 					pm_rele_power(kept);
1901 					DEVI(kept)->devi_pm_flags
1902 					    &= ~PMC_SKIP_BRINGUP;
1903 				}
1904 				PM_UNLOCK_POWER(dip, circ);
1905 			} else if (pwr) {
1906 				if (PM_SKBU(kept) == 0) {
1907 					pm_rele_power(kept);
1908 					DEVI(kept)->devi_pm_flags
1909 					    &= ~PMC_SKIP_BRINGUP;
1910 				}
1911 			}
1912 			ddi_release_devi(kept);
1913 		}
1914 		/*
1915 		 * mark this dependency not satisfied
1916 		 */
1917 		pm_unsatisfy(keeper, keptpaths[i]);
1918 	}
1919 	if (dip)
1920 		ddi_release_devi(dip);
1921 }
1922 
1923 /*
1924  * Device kept is being un power managed, it is kept up by keeper.
1925  * We need to mark the dependency no longer satisfied.
1926  */
1927 static void
pm_unkepts(char * kept,char * keeper)1928 pm_unkepts(char *kept, char *keeper)
1929 {
1930 	PMD_FUNC(pmf, "unkepts")
1931 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1932 	ASSERT(keeper != NULL);
1933 	/*
1934 	 * mark this dependency not satisfied
1935 	 */
1936 	pm_unsatisfy(keeper, kept);
1937 }
1938 
1939 /*
1940  * Removes dependency information and hold on the kepts, if the path is a
1941  * path of a keeper.
1942  */
1943 static void
pm_free_keeper(char * path,int pwr)1944 pm_free_keeper(char *path, int pwr)
1945 {
1946 	pm_pdr_t *dp;
1947 	int i;
1948 	size_t length;
1949 
1950 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1951 		if (strcmp(dp->pdr_keeper, path) != 0)
1952 			continue;
1953 		/*
1954 		 * Remove all our kept holds and the dependency records,
1955 		 * then free up the kept lists.
1956 		 */
1957 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1958 		if (dp->pdr_kept_count)  {
1959 			for (i = 0; i < dp->pdr_kept_count; i++) {
1960 				length = strlen(dp->pdr_kept_paths[i]);
1961 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1962 			}
1963 			kmem_free(dp->pdr_kept_paths,
1964 			    dp->pdr_kept_count * sizeof (char **));
1965 			dp->pdr_kept_paths = NULL;
1966 			dp->pdr_kept_count = 0;
1967 		}
1968 	}
1969 }
1970 
1971 /*
1972  * Removes the device represented by path from the list of kepts, if the
1973  * path is a path of a kept
1974  */
1975 static void
pm_free_kept(char * path)1976 pm_free_kept(char *path)
1977 {
1978 	pm_pdr_t *dp;
1979 	int i;
1980 	int j, count;
1981 	size_t length;
1982 	char **paths;
1983 
1984 	paths = NULL;
1985 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1986 		if (dp->pdr_kept_count == 0)
1987 			continue;
1988 		count = dp->pdr_kept_count;
1989 		/* Remove this device from the kept path lists */
1990 		for (i = 0; i < count; i++) {
1991 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1992 				pm_unkepts(path, dp->pdr_keeper);
1993 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1994 				kmem_free(dp->pdr_kept_paths[i], length);
1995 				dp->pdr_kept_paths[i] = NULL;
1996 				dp->pdr_kept_count--;
1997 			}
1998 		}
1999 		/* Compact the kept paths array */
2000 		if (dp->pdr_kept_count) {
2001 			length = dp->pdr_kept_count * sizeof (char **);
2002 			paths = kmem_zalloc(length, KM_SLEEP);
2003 			j = 0;
2004 			for (i = 0; i < count; i++) {
2005 				if (dp->pdr_kept_paths[i] != NULL) {
2006 					paths[j] = dp->pdr_kept_paths[i];
2007 					j++;
2008 				}
2009 			}
2010 			ASSERT(j == dp->pdr_kept_count);
2011 		}
2012 		/* Now free the old array and point to the new one */
2013 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
2014 		dp->pdr_kept_paths = paths;
2015 	}
2016 }
2017 
2018 /*
2019  * Free the dependency information for a device.
2020  */
2021 void
pm_free_keeps(char * path,int pwr)2022 pm_free_keeps(char *path, int pwr)
2023 {
2024 	PMD_FUNC(pmf, "free_keeps")
2025 
2026 #ifdef DEBUG
2027 	int doprdeps = 0;
2028 	void prdeps(char *);
2029 
2030 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
2031 	if (pm_debug & PMD_KEEPS) {
2032 		doprdeps = 1;
2033 		prdeps("pm_free_keeps before");
2034 	}
2035 #endif
2036 	/*
2037 	 * First assume we are a keeper and remove all our kepts.
2038 	 */
2039 	pm_free_keeper(path, pwr);
2040 	/*
2041 	 * Now assume we a kept device, and remove all our records.
2042 	 */
2043 	pm_free_kept(path);
2044 #ifdef	DEBUG
2045 	if (doprdeps) {
2046 		prdeps("pm_free_keeps after");
2047 	}
2048 #endif
2049 }
2050 
2051 static int
pm_is_kept(char * path)2052 pm_is_kept(char *path)
2053 {
2054 	pm_pdr_t *dp;
2055 	int i;
2056 
2057 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
2058 		if (dp->pdr_kept_count == 0)
2059 			continue;
2060 		for (i = 0; i < dp->pdr_kept_count; i++) {
2061 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
2062 				return (1);
2063 		}
2064 	}
2065 	return (0);
2066 }
2067 
2068 static void
e_pm_hold_rele_power(dev_info_t * dip,int cnt)2069 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2070 {
2071 	PMD_FUNC(pmf, "hold_rele_power")
2072 	int circ;
2073 
2074 	if ((dip == NULL) ||
2075 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2076 		return;
2077 
2078 	PM_LOCK_POWER(dip, &circ);
2079 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2080 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2081 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2082 
2083 	PM_KUC(dip) += cnt;
2084 
2085 	ASSERT(PM_KUC(dip) >= 0);
2086 	PM_UNLOCK_POWER(dip, circ);
2087 
2088 	if (cnt < 0 && PM_KUC(dip) == 0)
2089 		pm_rescan(dip);
2090 }
2091 
2092 #define	MAX_PPM_HANDLERS	4
2093 
2094 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2095 
2096 struct	ppm_callbacks {
2097 	int (*ppmc_func)(dev_info_t *);
2098 	dev_info_t	*ppmc_dip;
2099 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2100 
2101 
2102 /*
2103  * This routine calls into all the registered ppms to notify them
2104  * that either all components of power-managed devices are at their
2105  * lowest levels or no longer all are at their lowest levels.
2106  */
2107 static void
pm_ppm_notify_all_lowest(dev_info_t * dip,int mode)2108 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2109 {
2110 	struct ppm_callbacks *ppmcp;
2111 	power_req_t power_req;
2112 	int result = 0;
2113 
2114 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2115 	power_req.req.ppm_all_lowest_req.mode = mode;
2116 	mutex_enter(&ppm_lock);
2117 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2118 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2119 		    DDI_CTLOPS_POWER, &power_req, &result);
2120 	mutex_exit(&ppm_lock);
2121 	if (mode == PM_ALL_LOWEST) {
2122 		if (autoS3_enabled) {
2123 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest triggering "
2124 			    "autos3\n"))
2125 			mutex_enter(&srn_clone_lock);
2126 			if (srn_signal) {
2127 				srn_inuse++;
2128 				PMD(PMD_SX, ("(*srn_signal)(AUTOSX, 3)\n"))
2129 				(*srn_signal)(SRN_TYPE_AUTOSX, 3);
2130 				srn_inuse--;
2131 			} else {
2132 				PMD(PMD_SX, ("srn_signal NULL\n"))
2133 			}
2134 			mutex_exit(&srn_clone_lock);
2135 		} else {
2136 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest autos3 "
2137 			    "disabled\n"));
2138 		}
2139 	}
2140 }
2141 
2142 static void
pm_set_pm_info(dev_info_t * dip,void * value)2143 pm_set_pm_info(dev_info_t *dip, void *value)
2144 {
2145 	DEVI(dip)->devi_pm_info = value;
2146 }
2147 
2148 pm_rsvp_t *pm_blocked_list;
2149 
2150 /*
2151  * Look up an entry in the blocked list by dip and component
2152  */
2153 static pm_rsvp_t *
pm_rsvp_lookup(dev_info_t * dip,int comp)2154 pm_rsvp_lookup(dev_info_t *dip, int comp)
2155 {
2156 	pm_rsvp_t *p;
2157 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2158 	for (p = pm_blocked_list; p; p = p->pr_next)
2159 		if (p->pr_dip == dip && p->pr_comp == comp) {
2160 			return (p);
2161 		}
2162 	return (NULL);
2163 }
2164 
2165 /*
2166  * Called when a device which is direct power managed (or the parent or
2167  * dependent of such a device) changes power, or when a pm clone is closed
2168  * that was direct power managing a device.  This call results in pm_blocked()
2169  * (below) returning.
2170  */
2171 void
pm_proceed(dev_info_t * dip,int cmd,int comp,int newlevel)2172 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2173 {
2174 	PMD_FUNC(pmf, "proceed")
2175 	pm_rsvp_t *found = NULL;
2176 	pm_rsvp_t *p;
2177 
2178 	mutex_enter(&pm_rsvp_lock);
2179 	switch (cmd) {
2180 	/*
2181 	 * we're giving up control, let any pending op continue
2182 	 */
2183 	case PMP_RELEASE:
2184 		for (p = pm_blocked_list; p; p = p->pr_next) {
2185 			if (dip == p->pr_dip) {
2186 				p->pr_retval = PMP_RELEASE;
2187 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2188 				    pmf, PM_DEVICE(dip)))
2189 				cv_signal(&p->pr_cv);
2190 			}
2191 		}
2192 		break;
2193 
2194 	/*
2195 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2196 	 * succeed and a non-matching request for the same device fail
2197 	 */
2198 	case PMP_SETPOWER:
2199 		found = pm_rsvp_lookup(dip, comp);
2200 		if (!found)	/* if driver not waiting */
2201 			break;
2202 		/*
2203 		 * This cannot be pm_lower_power, since that can only happen
2204 		 * during detach or probe
2205 		 */
2206 		if (found->pr_newlevel <= newlevel) {
2207 			found->pr_retval = PMP_SUCCEED;
2208 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2209 			    PM_DEVICE(dip)))
2210 		} else {
2211 			found->pr_retval = PMP_FAIL;
2212 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2213 			    PM_DEVICE(dip)))
2214 		}
2215 		cv_signal(&found->pr_cv);
2216 		break;
2217 
2218 	default:
2219 		panic("pm_proceed unknown cmd %d", cmd);
2220 	}
2221 	mutex_exit(&pm_rsvp_lock);
2222 }
2223 
2224 /*
2225  * This routine dispatches new work to the dependency thread. Caller must
2226  * be prepared to block for memory if necessary.
2227  */
2228 void
pm_dispatch_to_dep_thread(int cmd,char * keeper,char * kept,int wait,int * res,int cached_pwr)2229 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2230     int *res, int cached_pwr)
2231 {
2232 	pm_dep_wk_t	*new_work;
2233 
2234 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2235 	new_work->pdw_type = cmd;
2236 	new_work->pdw_wait = wait;
2237 	new_work->pdw_done = 0;
2238 	new_work->pdw_ret = 0;
2239 	new_work->pdw_pwr = cached_pwr;
2240 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2241 	if (keeper != NULL) {
2242 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2243 		    KM_SLEEP);
2244 		(void) strcpy(new_work->pdw_keeper, keeper);
2245 	}
2246 	if (kept != NULL) {
2247 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2248 		(void) strcpy(new_work->pdw_kept, kept);
2249 	}
2250 	mutex_enter(&pm_dep_thread_lock);
2251 	if (pm_dep_thread_workq == NULL) {
2252 		pm_dep_thread_workq = new_work;
2253 		pm_dep_thread_tail = new_work;
2254 		new_work->pdw_next = NULL;
2255 	} else {
2256 		pm_dep_thread_tail->pdw_next = new_work;
2257 		pm_dep_thread_tail = new_work;
2258 		new_work->pdw_next = NULL;
2259 	}
2260 	cv_signal(&pm_dep_thread_cv);
2261 	/* If caller asked for it, wait till it is done. */
2262 	if (wait)  {
2263 		while (!new_work->pdw_done)
2264 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2265 		/*
2266 		 * Pass return status, if any, back.
2267 		 */
2268 		if (res != NULL)
2269 			*res = new_work->pdw_ret;
2270 		/*
2271 		 * If we asked to wait, it is our job to free the request
2272 		 * structure.
2273 		 */
2274 		if (new_work->pdw_keeper)
2275 			kmem_free(new_work->pdw_keeper,
2276 			    strlen(new_work->pdw_keeper) + 1);
2277 		if (new_work->pdw_kept)
2278 			kmem_free(new_work->pdw_kept,
2279 			    strlen(new_work->pdw_kept) + 1);
2280 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2281 	}
2282 	mutex_exit(&pm_dep_thread_lock);
2283 }
2284 
2285 /*
2286  * Release the pm resource for this device.
2287  */
2288 void
pm_rem_info(dev_info_t * dip)2289 pm_rem_info(dev_info_t *dip)
2290 {
2291 	PMD_FUNC(pmf, "rem_info")
2292 	int		i, count = 0;
2293 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2294 	dev_info_t	*pdip = ddi_get_parent(dip);
2295 	char		*pathbuf;
2296 	int		work_type = PM_DEP_WK_DETACH;
2297 
2298 	ASSERT(info);
2299 
2300 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2301 	if (PM_ISDIRECT(dip)) {
2302 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2303 		ASSERT(info->pmi_clone);
2304 		info->pmi_clone = 0;
2305 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2306 	}
2307 	ASSERT(!PM_GET_PM_SCAN(dip));
2308 
2309 	/*
2310 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2311 	 * Others we check all components.  BC node that has already
2312 	 * called pm_destroy_components() has zero component count.
2313 	 * Parents that get notification are not adjusted because their
2314 	 * kidsupcnt is always 0 (or 1 during configuration).
2315 	 */
2316 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2317 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2318 
2319 	/* node is detached, so we can examine power without locking */
2320 	if (PM_ISBC(dip)) {
2321 		count = (PM_CURPOWER(dip, 0) != 0);
2322 	} else {
2323 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2324 			count += (PM_CURPOWER(dip, i) != 0);
2325 	}
2326 
2327 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2328 		e_pm_hold_rele_power(pdip, -count);
2329 
2330 	/* Schedule a request to clean up dependency records */
2331 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2332 	(void) ddi_pathname(dip, pathbuf);
2333 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2334 	    PM_DEP_NOWAIT, NULL, (count > 0));
2335 	kmem_free(pathbuf, MAXPATHLEN);
2336 
2337 	/*
2338 	 * Adjust the pm_comps_notlowest count since this device is
2339 	 * not being power-managed anymore.
2340 	 */
2341 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2342 		pm_component_t *cp = PM_CP(dip, i);
2343 		if (cp->pmc_cur_pwr != 0)
2344 			PM_DECR_NOTLOWEST(dip)
2345 	}
2346 	/*
2347 	 * Once we clear the info pointer, it looks like it is not power
2348 	 * managed to everybody else.
2349 	 */
2350 	pm_set_pm_info(dip, NULL);
2351 	kmem_free(info, sizeof (pm_info_t));
2352 }
2353 
2354 int
pm_get_norm_pwrs(dev_info_t * dip,int ** valuep,size_t * length)2355 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2356 {
2357 	int components = PM_NUMCMPTS(dip);
2358 	int *bufp;
2359 	size_t size;
2360 	int i;
2361 
2362 	if (components <= 0) {
2363 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2364 		    "can't get normal power values\n", PM_DEVICE(dip));
2365 		return (DDI_FAILURE);
2366 	} else {
2367 		size = components * sizeof (int);
2368 		bufp = kmem_alloc(size, KM_SLEEP);
2369 		for (i = 0; i < components; i++) {
2370 			bufp[i] = pm_get_normal_power(dip, i);
2371 		}
2372 	}
2373 	*length = size;
2374 	*valuep = bufp;
2375 	return (DDI_SUCCESS);
2376 }
2377 
2378 static int
pm_reset_timestamps(dev_info_t * dip,void * arg)2379 pm_reset_timestamps(dev_info_t *dip, void *arg)
2380 {
2381 	_NOTE(ARGUNUSED(arg))
2382 
2383 	int components;
2384 	int	i;
2385 
2386 	if (!PM_GET_PM_INFO(dip))
2387 		return (DDI_WALK_CONTINUE);
2388 	components = PM_NUMCMPTS(dip);
2389 	ASSERT(components > 0);
2390 	PM_LOCK_BUSY(dip);
2391 	for (i = 0; i < components; i++) {
2392 		struct pm_component *cp;
2393 		/*
2394 		 * If the component was not marked as busy,
2395 		 * reset its timestamp to now.
2396 		 */
2397 		cp = PM_CP(dip, i);
2398 		if (cp->pmc_timestamp)
2399 			cp->pmc_timestamp = gethrestime_sec();
2400 	}
2401 	PM_UNLOCK_BUSY(dip);
2402 	return (DDI_WALK_CONTINUE);
2403 }
2404 
2405 /*
2406  * Convert a power level to an index into the levels array (or
2407  * just PM_LEVEL_UNKNOWN in that special case).
2408  */
2409 static int
pm_level_to_index(dev_info_t * dip,pm_component_t * cp,int level)2410 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2411 {
2412 	PMD_FUNC(pmf, "level_to_index")
2413 	int i;
2414 	int limit = cp->pmc_comp.pmc_numlevels;
2415 	int *ip = cp->pmc_comp.pmc_lvals;
2416 
2417 	if (level == PM_LEVEL_UNKNOWN)
2418 		return (level);
2419 
2420 	for (i = 0; i < limit; i++) {
2421 		if (level == *ip++) {
2422 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2423 			    pmf, PM_DEVICE(dip),
2424 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2425 			return (i);
2426 		}
2427 	}
2428 	panic("pm_level_to_index: level %d not found for device "
2429 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2430 	/*NOTREACHED*/
2431 }
2432 
2433 /*
2434  * Internal function to set current power level
2435  */
2436 static void
e_pm_set_cur_pwr(dev_info_t * dip,pm_component_t * cp,int level)2437 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2438 {
2439 	PMD_FUNC(pmf, "set_cur_pwr")
2440 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2441 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2442 
2443 	/*
2444 	 * Nothing to adjust if current & new levels are the same.
2445 	 */
2446 	if (curpwr != PM_LEVEL_UNKNOWN &&
2447 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2448 		return;
2449 
2450 	/*
2451 	 * Keep the count for comps doing transition to/from lowest
2452 	 * level.
2453 	 */
2454 	if (curpwr == 0) {
2455 		PM_INCR_NOTLOWEST(dip);
2456 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2457 		PM_DECR_NOTLOWEST(dip);
2458 	}
2459 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2460 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2461 }
2462 
2463 static int pm_phc_impl(dev_info_t *, int, int, int);
2464 
2465 /*
2466  * This is the default method of setting the power of a device if no ppm
2467  * driver has claimed it.
2468  */
2469 int
pm_power(dev_info_t * dip,int comp,int level)2470 pm_power(dev_info_t *dip, int comp, int level)
2471 {
2472 	PMD_FUNC(pmf, "power")
2473 	struct dev_ops	*ops;
2474 	int		(*fn)(dev_info_t *, int, int);
2475 	struct pm_component *cp = PM_CP(dip, comp);
2476 	int retval;
2477 	pm_info_t *info = PM_GET_PM_INFO(dip);
2478 
2479 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2480 	    PM_DEVICE(dip), comp, level))
2481 	if (!(ops = ddi_get_driver(dip))) {
2482 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2483 		    PM_DEVICE(dip)))
2484 		return (DDI_FAILURE);
2485 	}
2486 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2487 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2488 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2489 		    (!fn ? " devo_power NULL" : "")))
2490 		return (DDI_FAILURE);
2491 	}
2492 	cp->pmc_flags |= PM_POWER_OP;
2493 	retval = (*fn)(dip, comp, level);
2494 	cp->pmc_flags &= ~PM_POWER_OP;
2495 	if (retval == DDI_SUCCESS) {
2496 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2497 		return (DDI_SUCCESS);
2498 	}
2499 
2500 	/*
2501 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2502 	 * updated only the power level of the component.  If our attempt to
2503 	 * set the device new to a power level above has failed we sync the
2504 	 * total power state via phc code now.
2505 	 */
2506 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2507 		int phc_lvl =
2508 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2509 
2510 		ASSERT(info);
2511 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2512 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2513 		    pmf, PM_DEVICE(dip), comp, phc_lvl))
2514 	}
2515 
2516 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2517 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2518 	    level, power_val_to_string(cp, level)));
2519 	return (DDI_FAILURE);
2520 }
2521 
2522 int
pm_unmanage(dev_info_t * dip)2523 pm_unmanage(dev_info_t *dip)
2524 {
2525 	PMD_FUNC(pmf, "unmanage")
2526 	power_req_t power_req;
2527 	int result, retval = 0;
2528 
2529 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2530 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2531 	    PM_DEVICE(dip)))
2532 	power_req.request_type = PMR_PPM_UNMANAGE;
2533 	power_req.req.ppm_config_req.who = dip;
2534 	if (pm_ppm_claimed(dip))
2535 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2536 		    &power_req, &result);
2537 #ifdef DEBUG
2538 	else
2539 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2540 		    &power_req, &result);
2541 #endif
2542 	ASSERT(retval == DDI_SUCCESS);
2543 	pm_rem_info(dip);
2544 	return (retval);
2545 }
2546 
2547 int
pm_raise_power(dev_info_t * dip,int comp,int level)2548 pm_raise_power(dev_info_t *dip, int comp, int level)
2549 {
2550 	if (level < 0)
2551 		return (DDI_FAILURE);
2552 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2553 	    !e_pm_valid_power(dip, comp, level))
2554 		return (DDI_FAILURE);
2555 
2556 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2557 }
2558 
2559 int
pm_lower_power(dev_info_t * dip,int comp,int level)2560 pm_lower_power(dev_info_t *dip, int comp, int level)
2561 {
2562 	PMD_FUNC(pmf, "pm_lower_power")
2563 
2564 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2565 	    !e_pm_valid_power(dip, comp, level)) {
2566 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2567 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2568 		return (DDI_FAILURE);
2569 	}
2570 
2571 	if (!DEVI_IS_DETACHING(dip)) {
2572 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2573 		    pmf, PM_DEVICE(dip)))
2574 		return (DDI_FAILURE);
2575 	}
2576 
2577 	/*
2578 	 * If we don't care about saving power, or we're treating this node
2579 	 * specially, then this is a no-op
2580 	 */
2581 	if (!PM_SCANABLE(dip) || pm_noinvol(dip)) {
2582 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s%s%s\n",
2583 		    pmf, PM_DEVICE(dip),
2584 		    !autopm_enabled ? "!autopm_enabled " : "",
2585 		    !PM_POLLING_CPUPM ? "!cpupm_polling " : "",
2586 		    PM_CPUPM_DISABLED ? "cpupm_disabled " : "",
2587 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2588 		return (DDI_SUCCESS);
2589 	}
2590 
2591 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2592 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2593 		    PM_DEVICE(dip)))
2594 		return (DDI_FAILURE);
2595 	}
2596 	return (DDI_SUCCESS);
2597 }
2598 
2599 /*
2600  * Find the entries struct for a given dip in the blocked list, return it locked
2601  */
2602 static psce_t *
pm_psc_dip_to_direct(dev_info_t * dip,pscc_t ** psccp)2603 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2604 {
2605 	pscc_t *p;
2606 	psce_t *psce;
2607 
2608 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2609 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2610 		if (p->pscc_dip == dip) {
2611 			*psccp = p;
2612 			psce = p->pscc_entries;
2613 			mutex_enter(&psce->psce_lock);
2614 			ASSERT(psce);
2615 			rw_exit(&pm_pscc_direct_rwlock);
2616 			return (psce);
2617 		}
2618 	}
2619 	rw_exit(&pm_pscc_direct_rwlock);
2620 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2621 	/*NOTREACHED*/
2622 }
2623 
2624 /*
2625  * Write an entry indicating a power level change (to be passed to a process
2626  * later) in the given psce.
2627  * If we were called in the path that brings up the console fb in the
2628  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2629  * we create a record that has a size of -1, a physaddr of NULL, and that
2630  * has the overflow flag set.
2631  */
2632 static int
psc_entry(ushort_t event,psce_t * psce,dev_info_t * dip,int comp,int new,int old,int which,pm_canblock_t canblock)2633 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2634     int old, int which, pm_canblock_t canblock)
2635 {
2636 	char	buf[MAXNAMELEN];
2637 	pm_state_change_t *p;
2638 	size_t	size;
2639 	caddr_t physpath = NULL;
2640 	int	overrun = 0;
2641 
2642 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2643 	(void) ddi_pathname(dip, buf);
2644 	size = strlen(buf) + 1;
2645 	p = psce->psce_in;
2646 	if (canblock == PM_CANBLOCK_BYPASS) {
2647 		physpath = kmem_alloc(size, KM_NOSLEEP);
2648 		if (physpath == NULL) {
2649 			/*
2650 			 * mark current entry as overrun
2651 			 */
2652 			p->flags |= PSC_EVENT_LOST;
2653 			size = (size_t)-1;
2654 		}
2655 	} else
2656 		physpath = kmem_alloc(size, KM_SLEEP);
2657 	if (p->size) {	/* overflow; mark the next entry */
2658 		if (p->size != (size_t)-1)
2659 			kmem_free(p->physpath, p->size);
2660 		ASSERT(psce->psce_out == p);
2661 		if (p == psce->psce_last) {
2662 			psce->psce_first->flags |= PSC_EVENT_LOST;
2663 			psce->psce_out = psce->psce_first;
2664 		} else {
2665 			(p + 1)->flags |= PSC_EVENT_LOST;
2666 			psce->psce_out = (p + 1);
2667 		}
2668 		overrun++;
2669 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2670 		p->flags |= PSC_EVENT_LOST;
2671 		p->size = 0;
2672 		p->physpath = NULL;
2673 	}
2674 	if (which == PSC_INTEREST) {
2675 		mutex_enter(&pm_compcnt_lock);
2676 		if (pm_comps_notlowest == 0)
2677 			p->flags |= PSC_ALL_LOWEST;
2678 		else
2679 			p->flags &= ~PSC_ALL_LOWEST;
2680 		mutex_exit(&pm_compcnt_lock);
2681 	}
2682 	p->event = event;
2683 	p->timestamp = gethrestime_sec();
2684 	p->component = comp;
2685 	p->old_level = old;
2686 	p->new_level = new;
2687 	p->physpath = physpath;
2688 	p->size = size;
2689 	if (physpath != NULL)
2690 		(void) strcpy(p->physpath, buf);
2691 	if (p == psce->psce_last)
2692 		psce->psce_in = psce->psce_first;
2693 	else
2694 		psce->psce_in = ++p;
2695 	mutex_exit(&psce->psce_lock);
2696 	return (overrun);
2697 }
2698 
2699 /*
2700  * Find the next entry on the interest list.  We keep a pointer to the item we
2701  * last returned in the user's cooke.  Returns a locked entries struct.
2702  */
2703 static psce_t *
psc_interest(void ** cookie,pscc_t ** psccp)2704 psc_interest(void **cookie, pscc_t **psccp)
2705 {
2706 	pscc_t *pscc;
2707 	pscc_t **cookiep = (pscc_t **)cookie;
2708 
2709 	if (*cookiep == NULL)
2710 		pscc = pm_pscc_interest;
2711 	else
2712 		pscc = (*cookiep)->pscc_next;
2713 	if (pscc) {
2714 		*cookiep = pscc;
2715 		*psccp = pscc;
2716 		mutex_enter(&pscc->pscc_entries->psce_lock);
2717 		return (pscc->pscc_entries);
2718 	} else {
2719 		return (NULL);
2720 	}
2721 }
2722 
2723 /*
2724  * Create an entry for a process to pick up indicating a power level change.
2725  */
2726 static void
pm_enqueue_notify(ushort_t cmd,dev_info_t * dip,int comp,int newlevel,int oldlevel,pm_canblock_t canblock)2727 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2728     int newlevel, int oldlevel, pm_canblock_t canblock)
2729 {
2730 	PMD_FUNC(pmf, "enqueue_notify")
2731 	pscc_t	*pscc;
2732 	psce_t	*psce;
2733 	void		*cookie = NULL;
2734 	int	overrun;
2735 
2736 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2737 	switch (cmd) {
2738 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2739 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2740 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2741 		psce = pm_psc_dip_to_direct(dip, &pscc);
2742 		ASSERT(psce);
2743 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2744 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2745 		    pm_poll_cnt[pscc->pscc_clone]))
2746 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2747 		    PSC_DIRECT, canblock);
2748 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2749 		mutex_enter(&pm_clone_lock);
2750 		if (!overrun)
2751 			pm_poll_cnt[pscc->pscc_clone]++;
2752 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2753 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2754 		mutex_exit(&pm_clone_lock);
2755 		break;
2756 	case PSC_HAS_CHANGED:
2757 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2758 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2759 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2760 			psce = pm_psc_dip_to_direct(dip, &pscc);
2761 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2762 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2763 			    pm_poll_cnt[pscc->pscc_clone]))
2764 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2765 			    oldlevel, PSC_DIRECT, canblock);
2766 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2767 			mutex_enter(&pm_clone_lock);
2768 			if (!overrun)
2769 				pm_poll_cnt[pscc->pscc_clone]++;
2770 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2771 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2772 			mutex_exit(&pm_clone_lock);
2773 		}
2774 		mutex_enter(&pm_clone_lock);
2775 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2776 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2777 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2778 			    oldlevel, PSC_INTEREST, canblock);
2779 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2780 		}
2781 		rw_exit(&pm_pscc_interest_rwlock);
2782 		mutex_exit(&pm_clone_lock);
2783 		break;
2784 #ifdef DEBUG
2785 	default:
2786 		ASSERT(0);
2787 #endif
2788 	}
2789 }
2790 
2791 static void
pm_enqueue_notify_others(pm_ppm_devlist_t ** listp,pm_canblock_t canblock)2792 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2793 {
2794 	if (listp) {
2795 		pm_ppm_devlist_t *p, *next = NULL;
2796 
2797 		for (p = *listp; p; p = next) {
2798 			next = p->ppd_next;
2799 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2800 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2801 			    canblock);
2802 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2803 		}
2804 		*listp = NULL;
2805 	}
2806 }
2807 
2808 /*
2809  * Try to get the power locks of the parent node and target (child)
2810  * node.  Return true if successful (with both locks held) or false
2811  * (with no locks held).
2812  */
2813 static int
pm_try_parent_child_locks(dev_info_t * pdip,dev_info_t * dip,int * pcircp,int * circp)2814 pm_try_parent_child_locks(dev_info_t *pdip,
2815     dev_info_t *dip, int *pcircp, int *circp)
2816 {
2817 	if (ndi_devi_tryenter(pdip, pcircp))
2818 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2819 			return (1);
2820 		} else {
2821 			ndi_devi_exit(pdip, *pcircp);
2822 		}
2823 	return (0);
2824 }
2825 
2826 /*
2827  * Determine if the power lock owner is blocked by current thread.
2828  * returns :
2829  * 	1 - If the thread owning the effective power lock (the first lock on
2830  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2831  *          a mutex held by the current thread.
2832  *
2833  *	0 - otherwise
2834  *
2835  * Note : This function is called by pm_power_has_changed to determine whether
2836  * it is executing in parallel with pm_set_power.
2837  */
2838 static int
pm_blocked_by_us(dev_info_t * dip)2839 pm_blocked_by_us(dev_info_t *dip)
2840 {
2841 	power_req_t power_req;
2842 	kthread_t *owner;
2843 	int result;
2844 	kmutex_t *mp;
2845 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2846 
2847 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2848 	power_req.req.ppm_power_lock_owner_req.who = dip;
2849 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2850 	    DDI_SUCCESS) {
2851 		/*
2852 		 * It is assumed that if the device is claimed by ppm, ppm
2853 		 * will always implement this request type and it'll always
2854 		 * return success. We panic here, if it fails.
2855 		 */
2856 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2857 		    PM_DEVICE(dip));
2858 		/*NOTREACHED*/
2859 	}
2860 
2861 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2862 	    owner->t_state == TS_SLEEP &&
2863 	    owner->t_sobj_ops &&
2864 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2865 	    (mp = (kmutex_t *)owner->t_wchan) &&
2866 	    mutex_owner(mp) == curthread)
2867 		return (1);
2868 
2869 	return (0);
2870 }
2871 
2872 /*
2873  * Notify parent which wants to hear about a child's power changes.
2874  */
2875 static void
pm_notify_parent(dev_info_t * dip,dev_info_t * pdip,int comp,int old_level,int level)2876 pm_notify_parent(dev_info_t *dip,
2877     dev_info_t *pdip, int comp, int old_level, int level)
2878 {
2879 	pm_bp_has_changed_t bphc;
2880 	pm_sp_misc_t pspm;
2881 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2882 	int result = DDI_SUCCESS;
2883 
2884 	bphc.bphc_dip = dip;
2885 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2886 	bphc.bphc_comp = comp;
2887 	bphc.bphc_olevel = old_level;
2888 	bphc.bphc_nlevel = level;
2889 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2890 	pspm.pspm_scan = 0;
2891 	bphc.bphc_private = &pspm;
2892 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2893 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2894 	kmem_free(pathbuf, MAXPATHLEN);
2895 }
2896 
2897 /*
2898  * Check if we need to resume a BC device, and make the attach call as required.
2899  */
2900 static int
pm_check_and_resume(dev_info_t * dip,int comp,int old_level,int level)2901 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2902 {
2903 	int ret = DDI_SUCCESS;
2904 
2905 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2906 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2907 		/* ppm is not interested in DDI_PM_RESUME */
2908 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2909 			/* XXX Should we mark it resumed, */
2910 			/* even though it failed? */
2911 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2912 			    PM_NAME(dip), PM_ADDR(dip));
2913 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2914 	}
2915 
2916 	return (ret);
2917 }
2918 
2919 /*
2920  * Tests outside the lock to see if we should bother to enqueue an entry
2921  * for any watching process.  If yes, then caller will take the lock and
2922  * do the full protocol
2923  */
2924 static int
pm_watchers()2925 pm_watchers()
2926 {
2927 	if (pm_processes_stopped)
2928 		return (0);
2929 	return (pm_pscc_direct || pm_pscc_interest);
2930 }
2931 
2932 static int pm_phc_impl(dev_info_t *, int, int, int);
2933 
2934 /*
2935  * A driver is reporting that the power of one of its device's components
2936  * has changed.  Update the power state accordingly.
2937  */
2938 int
pm_power_has_changed(dev_info_t * dip,int comp,int level)2939 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2940 {
2941 	PMD_FUNC(pmf, "pm_power_has_changed")
2942 	int ret;
2943 	dev_info_t *pdip = ddi_get_parent(dip);
2944 	struct pm_component *cp;
2945 	int blocked, circ, pcirc, old_level;
2946 
2947 	if (level < 0) {
2948 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2949 		    PM_DEVICE(dip), level))
2950 		return (DDI_FAILURE);
2951 	}
2952 
2953 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2954 	    PM_DEVICE(dip), comp, level))
2955 
2956 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2957 	    !e_pm_valid_power(dip, comp, level))
2958 		return (DDI_FAILURE);
2959 
2960 	/*
2961 	 * A driver thread calling pm_power_has_changed and another thread
2962 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2963 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2964 	 * this specific deadlock.  If we can't get the lock immediately
2965 	 * and we are deadlocked, just update the component's level, do
2966 	 * notifications, and return.  We intend to update the total power
2967 	 * state later (if the other thread fails to set power to the
2968 	 * desired level).  If we were called because of a power change on a
2969 	 * component that isn't involved in a set_power op, update all state
2970 	 * immediately.
2971 	 */
2972 	cp = PM_CP(dip, comp);
2973 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2974 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2975 		    (cp->pmc_flags & PM_POWER_OP)) {
2976 			if (pm_watchers()) {
2977 				mutex_enter(&pm_rsvp_lock);
2978 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2979 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2980 				mutex_exit(&pm_rsvp_lock);
2981 			}
2982 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2983 				pm_notify_parent(dip,
2984 				    pdip, comp, cur_power(cp), level);
2985 			(void) pm_check_and_resume(dip,
2986 			    comp, cur_power(cp), level);
2987 
2988 			/*
2989 			 * Stash the old power index, update curpwr, and flag
2990 			 * that the total power state needs to be synched.
2991 			 */
2992 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2993 			/*
2994 			 * Several pm_power_has_changed calls could arrive
2995 			 * while the set power path remains blocked.  Keep the
2996 			 * oldest old power and the newest new power of any
2997 			 * sequence of phc calls which arrive during deadlock.
2998 			 */
2999 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
3000 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
3001 			cp->pmc_cur_pwr =
3002 			    pm_level_to_index(dip, cp, level);
3003 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
3004 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3005 			return (DDI_SUCCESS);
3006 		} else
3007 			if (blocked) {	/* blocked, but different cmpt? */
3008 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
3009 					cmn_err(CE_NOTE,
3010 					    "!pm: parent kuc not updated due "
3011 					    "to possible deadlock.\n");
3012 					return (pm_phc_impl(dip,
3013 					    comp, level, 1));
3014 				}
3015 				old_level = cur_power(cp);
3016 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3017 				    (!PM_ISBC(dip) || comp == 0) &&
3018 				    POWERING_ON(old_level, level))
3019 					pm_hold_power(pdip);
3020 				ret = pm_phc_impl(dip, comp, level, 1);
3021 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3022 					if ((!PM_ISBC(dip) ||
3023 					    comp == 0) && level == 0 &&
3024 					    old_level != PM_LEVEL_UNKNOWN)
3025 						pm_rele_power(pdip);
3026 				}
3027 				ndi_devi_exit(pdip, pcirc);
3028 				/* child lock not held: deadlock */
3029 				return (ret);
3030 			}
3031 		delay(1);
3032 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
3033 	}
3034 
3035 	/* non-deadlock case */
3036 	old_level = cur_power(cp);
3037 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3038 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
3039 		pm_hold_power(pdip);
3040 	ret = pm_phc_impl(dip, comp, level, 1);
3041 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3042 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
3043 		    old_level != PM_LEVEL_UNKNOWN)
3044 			pm_rele_power(pdip);
3045 	}
3046 	PM_UNLOCK_POWER(dip, circ);
3047 	ndi_devi_exit(pdip, pcirc);
3048 	return (ret);
3049 }
3050 
3051 /*
3052  * Account for power changes to a component of the the console frame buffer.
3053  * If lowering power from full (or "unkown", which is treatd as full)
3054  * we will increment the "components off" count of the fb device.
3055  * Subsequent lowering of the same component doesn't affect the count.  If
3056  * raising a component back to full power, we will decrement the count.
3057  *
3058  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
3059  */
3060 static int
calc_cfb_comps_incr(dev_info_t * dip,int cmpt,int old,int new)3061 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
3062 {
3063 	struct pm_component *cp = PM_CP(dip, cmpt);
3064 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
3065 	int want_normal = (new == cp->pmc_norm_pwr);
3066 	int incr = 0;
3067 
3068 	if (on && !want_normal)
3069 		incr = 1;
3070 	else if (!on && want_normal)
3071 		incr = -1;
3072 	return (incr);
3073 }
3074 
3075 /*
3076  * Adjust the count of console frame buffer components < full power.
3077  */
3078 static void
update_comps_off(int incr,dev_info_t * dip)3079 update_comps_off(int incr, dev_info_t *dip)
3080 {
3081 		mutex_enter(&pm_cfb_lock);
3082 		pm_cfb_comps_off += incr;
3083 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
3084 		mutex_exit(&pm_cfb_lock);
3085 }
3086 
3087 /*
3088  * Update the power state in the framework (via the ppm).  The 'notify'
3089  * argument tells whether to notify watchers.  Power lock is already held.
3090  */
3091 static int
pm_phc_impl(dev_info_t * dip,int comp,int level,int notify)3092 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3093 {
3094 	PMD_FUNC(pmf, "phc_impl")
3095 	power_req_t power_req;
3096 	int i, dodeps = 0;
3097 	dev_info_t *pdip = ddi_get_parent(dip);
3098 	int result;
3099 	int old_level;
3100 	struct pm_component *cp;
3101 	int incr = 0;
3102 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3103 	int work_type = 0;
3104 	char *pathbuf;
3105 
3106 	/* Must use "official" power level for this test. */
3107 	cp = PM_CP(dip, comp);
3108 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3109 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3110 	if (old_level != PM_LEVEL_UNKNOWN)
3111 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3112 
3113 	if (level == old_level) {
3114 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3115 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3116 		return (DDI_SUCCESS);
3117 	}
3118 
3119 	/*
3120 	 * Tell ppm about this.
3121 	 */
3122 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3123 	power_req.req.ppm_notify_level_req.who = dip;
3124 	power_req.req.ppm_notify_level_req.cmpt = comp;
3125 	power_req.req.ppm_notify_level_req.new_level = level;
3126 	power_req.req.ppm_notify_level_req.old_level = old_level;
3127 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3128 	    &result) == DDI_FAILURE) {
3129 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3130 		    pmf, PM_DEVICE(dip), level))
3131 		return (DDI_FAILURE);
3132 	}
3133 
3134 	if (PM_IS_CFB(dip)) {
3135 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3136 
3137 		if (incr) {
3138 			update_comps_off(incr, dip);
3139 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3140 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3141 			    comp, old_level, level, pm_cfb_comps_off))
3142 		}
3143 	}
3144 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3145 	result = DDI_SUCCESS;
3146 
3147 	if (notify) {
3148 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3149 			pm_notify_parent(dip, pdip, comp, old_level, level);
3150 		(void) pm_check_and_resume(dip, comp, old_level, level);
3151 	}
3152 
3153 	/*
3154 	 * Decrement the dependency kidsup count if we turn a device
3155 	 * off.
3156 	 */
3157 	if (POWERING_OFF(old_level, level)) {
3158 		dodeps = 1;
3159 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3160 			cp = PM_CP(dip, i);
3161 			if (cur_power(cp)) {
3162 				dodeps = 0;
3163 				break;
3164 			}
3165 		}
3166 		if (dodeps)
3167 			work_type = PM_DEP_WK_POWER_OFF;
3168 	}
3169 
3170 	/*
3171 	 * Increment if we turn it on. Check to see
3172 	 * if other comps are already on, if so,
3173 	 * dont increment.
3174 	 */
3175 	if (POWERING_ON(old_level, level)) {
3176 		dodeps = 1;
3177 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3178 			cp = PM_CP(dip, i);
3179 			if (comp == i)
3180 				continue;
3181 			/* -1 also treated as 0 in this case */
3182 			if (cur_power(cp) > 0) {
3183 				dodeps = 0;
3184 				break;
3185 			}
3186 		}
3187 		if (dodeps)
3188 			work_type = PM_DEP_WK_POWER_ON;
3189 	}
3190 
3191 	if (dodeps) {
3192 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3193 		(void) ddi_pathname(dip, pathbuf);
3194 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3195 		    PM_DEP_NOWAIT, NULL, 0);
3196 		kmem_free(pathbuf, MAXPATHLEN);
3197 	}
3198 
3199 	if (notify && (level != old_level) && pm_watchers()) {
3200 		mutex_enter(&pm_rsvp_lock);
3201 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3202 		    PM_CANBLOCK_BLOCK);
3203 		mutex_exit(&pm_rsvp_lock);
3204 	}
3205 
3206 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3207 	pm_rescan(dip);
3208 	return (DDI_SUCCESS);
3209 }
3210 
3211 /*
3212  * This function is called at startup time to notify pm of the existence
3213  * of any platform power managers for this platform.  As a result of
3214  * this registration, each function provided will be called each time
3215  * a device node is attached, until one returns true, and it must claim the
3216  * device node (by returning non-zero) if it wants to be involved in the
3217  * node's power management.  If it does claim the node, then it will
3218  * subsequently be notified of attach and detach events.
3219  *
3220  */
3221 
3222 int
pm_register_ppm(int (* func)(dev_info_t *),dev_info_t * dip)3223 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3224 {
3225 	PMD_FUNC(pmf, "register_ppm")
3226 	struct ppm_callbacks *ppmcp;
3227 	pm_component_t *cp;
3228 	int i, pwr, result, circ;
3229 	power_req_t power_req;
3230 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3231 	void pm_ppm_claim(dev_info_t *);
3232 
3233 	mutex_enter(&ppm_lock);
3234 	ppmcp = ppm_callbacks;
3235 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3236 		if (ppmcp->ppmc_func == NULL) {
3237 			ppmcp->ppmc_func = func;
3238 			ppmcp->ppmc_dip = dip;
3239 			break;
3240 		}
3241 	}
3242 	mutex_exit(&ppm_lock);
3243 
3244 	if (i >= MAX_PPM_HANDLERS)
3245 		return (DDI_FAILURE);
3246 	while ((dip = ddi_get_parent(dip)) != NULL) {
3247 		if (dip != ddi_root_node() && PM_GET_PM_INFO(dip) == NULL)
3248 			continue;
3249 		pm_ppm_claim(dip);
3250 		/* don't bother with the not power-manageable nodes */
3251 		if (pm_ppm_claimed(dip) && PM_GET_PM_INFO(dip)) {
3252 			/*
3253 			 * Tell ppm about this.
3254 			 */
3255 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3256 			p->old_level = PM_LEVEL_UNKNOWN;
3257 			p->who = dip;
3258 			PM_LOCK_POWER(dip, &circ);
3259 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3260 				cp = PM_CP(dip, i);
3261 				pwr = cp->pmc_cur_pwr;
3262 				if (pwr != PM_LEVEL_UNKNOWN) {
3263 					p->cmpt = i;
3264 					p->new_level = cur_power(cp);
3265 					p->old_level = PM_LEVEL_UNKNOWN;
3266 					if (pm_ctlops(PPM(dip), dip,
3267 					    DDI_CTLOPS_POWER, &power_req,
3268 					    &result) == DDI_FAILURE) {
3269 						PMD(PMD_FAIL, ("%s: pc "
3270 						    "%s@%s(%s#%d) to %d "
3271 						    "fails\n", pmf,
3272 						    PM_DEVICE(dip), pwr))
3273 					}
3274 				}
3275 			}
3276 			PM_UNLOCK_POWER(dip, circ);
3277 		}
3278 	}
3279 	return (DDI_SUCCESS);
3280 }
3281 
3282 /*
3283  * Call the ppm's that have registered and adjust the devinfo struct as
3284  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3285  * by each ppm are assumed to be disjoint.
3286  */
3287 void
pm_ppm_claim(dev_info_t * dip)3288 pm_ppm_claim(dev_info_t *dip)
3289 {
3290 	struct ppm_callbacks *ppmcp;
3291 
3292 	if (PPM(dip)) {
3293 		return;
3294 	}
3295 	mutex_enter(&ppm_lock);
3296 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3297 		if ((*ppmcp->ppmc_func)(dip)) {
3298 			DEVI(dip)->devi_pm_ppm =
3299 			    (struct dev_info *)ppmcp->ppmc_dip;
3300 			mutex_exit(&ppm_lock);
3301 			return;
3302 		}
3303 	}
3304 	mutex_exit(&ppm_lock);
3305 }
3306 
3307 /*
3308  * Node is being detached so stop autopm until we see if it succeeds, in which
3309  * case pm_stop will be called.  For backwards compatible devices we bring the
3310  * device up to full power on the assumption the detach will succeed.
3311  */
3312 void
pm_detaching(dev_info_t * dip)3313 pm_detaching(dev_info_t *dip)
3314 {
3315 	PMD_FUNC(pmf, "detaching")
3316 	pm_info_t *info = PM_GET_PM_INFO(dip);
3317 	int iscons;
3318 
3319 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3320 	    PM_NUMCMPTS(dip)))
3321 	if (info == NULL)
3322 		return;
3323 	ASSERT(DEVI_IS_DETACHING(dip));
3324 	PM_LOCK_DIP(dip);
3325 	info->pmi_dev_pm_state |= PM_DETACHING;
3326 	PM_UNLOCK_DIP(dip);
3327 	if (!PM_ISBC(dip))
3328 		pm_scan_stop(dip);
3329 
3330 	/*
3331 	 * console and old-style devices get brought up when detaching.
3332 	 */
3333 	iscons = PM_IS_CFB(dip);
3334 	if (iscons || PM_ISBC(dip)) {
3335 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3336 		if (iscons) {
3337 			mutex_enter(&pm_cfb_lock);
3338 			while (cfb_inuse) {
3339 				mutex_exit(&pm_cfb_lock);
3340 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3341 				delay(1);
3342 				mutex_enter(&pm_cfb_lock);
3343 			}
3344 			ASSERT(cfb_dip_detaching == NULL);
3345 			ASSERT(cfb_dip);
3346 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3347 			cfb_dip = NULL;
3348 			mutex_exit(&pm_cfb_lock);
3349 		}
3350 	}
3351 }
3352 
3353 /*
3354  * Node failed to detach.  If it used to be autopm'd, make it so again.
3355  */
3356 void
pm_detach_failed(dev_info_t * dip)3357 pm_detach_failed(dev_info_t *dip)
3358 {
3359 	PMD_FUNC(pmf, "detach_failed")
3360 	pm_info_t *info = PM_GET_PM_INFO(dip);
3361 	int pm_all_at_normal(dev_info_t *);
3362 
3363 	if (info == NULL)
3364 		return;
3365 	ASSERT(DEVI_IS_DETACHING(dip));
3366 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3367 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3368 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3369 			/* Make sure the operation is still needed */
3370 			if (!pm_all_at_normal(dip)) {
3371 				if (pm_all_to_normal(dip,
3372 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3373 					PMD(PMD_ERROR, ("%s: could not bring "
3374 					    "%s@%s(%s#%d) to normal\n", pmf,
3375 					    PM_DEVICE(dip)))
3376 				}
3377 			}
3378 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3379 		}
3380 	}
3381 	if (!PM_ISBC(dip)) {
3382 		mutex_enter(&pm_scan_lock);
3383 		if (PM_SCANABLE(dip))
3384 			pm_scan_init(dip);
3385 		mutex_exit(&pm_scan_lock);
3386 		pm_rescan(dip);
3387 	}
3388 }
3389 
3390 /* generic Backwards Compatible component */
3391 static char *bc_names[] = {"off", "on"};
3392 
3393 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3394 
3395 static void
e_pm_default_levels(dev_info_t * dip,pm_component_t * cp,int norm)3396 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3397 {
3398 	pm_comp_t *pmc;
3399 	pmc = &cp->pmc_comp;
3400 	pmc->pmc_numlevels = 2;
3401 	pmc->pmc_lvals[0] = 0;
3402 	pmc->pmc_lvals[1] = norm;
3403 	e_pm_set_cur_pwr(dip, cp, norm);
3404 }
3405 
3406 static void
e_pm_default_components(dev_info_t * dip,int cmpts)3407 e_pm_default_components(dev_info_t *dip, int cmpts)
3408 {
3409 	int i;
3410 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3411 
3412 	p = DEVI(dip)->devi_pm_components;
3413 	for (