1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#include <sys/mutex.h>
26#include <sys/cpuvar.h>
27#include <sys/cyclic.h>
28#include <sys/disp.h>
29#include <sys/ddi.h>
30#include <sys/wdt.h>
31#include <sys/callb.h>
32#include <sys/cmn_err.h>
33#include <sys/hypervisor_api.h>
34#include <sys/membar.h>
35#include <sys/x_call.h>
36#include <sys/promif.h>
37#include <sys/systm.h>
38#include <sys/mach_descrip.h>
39#include <sys/cpu_module.h>
40#include <sys/pg.h>
41#include <sys/lgrp.h>
42#include <sys/sysmacros.h>
43#include <sys/sunddi.h>
44#include <sys/cpupart.h>
45#include <sys/hsvc.h>
46#include <sys/mpo.h>
47#include <vm/hat_sfmmu.h>
48#include <sys/time.h>
49#include <sys/clock.h>
50
51/*
52 * Sun4v OS Suspend
53 *
54 * Provides a means to suspend a sun4v guest domain by pausing CPUs and then
55 * calling into the HV to initiate a suspension. Suspension is sequenced
56 * externally by calling suspend_pre, suspend_start, and suspend_post.
57 * suspend_pre and suspend_post are meant to perform any special operations
58 * that should be done before or after a suspend/resume operation. e.g.,
59 * callbacks to cluster software to disable heartbeat monitoring before the
60 * system is suspended. suspend_start prepares kernel services to be suspended
61 * and then suspends the domain by calling hv_guest_suspend.
62 *
63 * Special Handling for %tick and %stick Registers
64 *
65 * After a suspend/resume operation, the %tick and %stick registers may have
66 * jumped forwards or backwards. The delta is assumed to be consistent across
67 * all CPUs, within the negligible level of %tick and %stick variation
68 * acceptable on a cold boot. In order to maintain increasing %tick and %stick
69 * counter values without exposing large positive or negative jumps to kernel
70 * or user code, a %tick and %stick offset is used. Kernel reads of these
71 * counters return the sum of the hardware register counter and offset
72 * variable. After a suspend/resume operation, user reads of %tick or %stick
73 * are emulated. Suspend code enables emulation by setting the
74 * %{tick,stick}.NPT fields which trigger a privileged instruction access
75 * trap whenever the registers are read from user mode. If emulation has been
76 * enabled, the trap handler emulates the instruction. Emulation is only
77 * enabled during a successful suspend/resume operation. When emulation is
78 * enabled, CPUs that are DR'd into the system will have their
79 * %{tick,stick}.NPT bits set to 1 as well.
80 */
81
82extern u_longlong_t gettick(void);	/* returns %stick */
83extern uint64_t gettick_counter(void);	/* returns %tick */
84extern uint64_t gettick_npt(void);
85extern uint64_t getstick_npt(void);
86extern int mach_descrip_update(void);
87extern cpuset_t cpu_ready_set;
88extern uint64_t native_tick_offset;
89extern uint64_t native_stick_offset;
90extern uint64_t sys_tick_freq;
91
92/*
93 * Global Sun Cluster pre/post callbacks.
94 */
95const char *(*cl_suspend_error_decode)(int);
96int (*cl_suspend_pre_callback)(void);
97int (*cl_suspend_post_callback)(void);
98#define	SC_PRE_FAIL_STR_FMT	"Sun Cluster pre-suspend failure: %d"
99#define	SC_POST_FAIL_STR_FMT	"Sun Cluster post-suspend failure: %d"
100#define	SC_FAIL_STR_MAX		256
101
102/*
103 * The minimum major and minor version of the HSVC_GROUP_CORE API group
104 * required in order to use OS suspend.
105 */
106#define	SUSPEND_CORE_MAJOR	1
107#define	SUSPEND_CORE_MINOR	2
108
109/*
110 * By default, sun4v OS suspend is supported if the required HV version
111 * is present. suspend_disabled should be set on platforms that do not
112 * allow OS suspend regardless of whether or not the HV supports it.
113 * It can also be set in /etc/system.
114 */
115static int suspend_disabled = 0;
116
117/*
118 * Controls whether or not user-land tick and stick register emulation
119 * will be enabled following a successful suspend operation.
120 */
121static int enable_user_tick_stick_emulation = 1;
122
123/*
124 * Indicates whether or not tick and stick emulation is currently active.
125 * After a successful suspend operation, if emulation is enabled, this
126 * variable is set to B_TRUE. Global scope to allow emulation code to
127 * check if emulation is active.
128 */
129boolean_t tick_stick_emulation_active = B_FALSE;
130
131/*
132 * When non-zero, after a successful suspend and resume, cpunodes, CPU HW
133 * sharing data structures, and processor groups will be updated using
134 * information from the updated MD.
135 */
136static int suspend_update_cpu_mappings = 1;
137
138/*
139 * The maximum number of microseconds by which the %tick or %stick register
140 * can vary between any two CPUs in the system. To calculate the
141 * native_stick_offset and native_tick_offset, we measure the change in these
142 * registers on one CPU over a suspend/resume. Other CPUs may experience
143 * slightly larger or smaller changes. %tick and %stick should be synchronized
144 * between CPUs, but there may be some variation. So we add an additional value
145 * derived from this variable to ensure that these registers always increase
146 * over a suspend/resume operation, assuming all %tick and %stick registers
147 * are synchronized (within a certain limit) across CPUs in the system. The
148 * delta between %sticks on different CPUs should be a small number of cycles,
149 * not perceptible to readers of %stick that migrate between CPUs. We set this
150 * value to 1 millisecond which means that over a suspend/resume operation,
151 * all CPU's %tick and %stick will advance forwards as long as, across all
152 * CPUs, the %tick and %stick are synchronized to within 1 ms. This applies to
153 * CPUs before the suspend and CPUs after the resume. 1 ms is conservative,
154 * but small enough to not trigger TOD faults.
155 */
156static uint64_t suspend_tick_stick_max_delta = 1000; /* microseconds */
157
158/*
159 * The number of times the system has been suspended and resumed.
160 */
161static uint64_t suspend_count = 0;
162
163/*
164 * DBG and DBG_PROM() macro.
165 */
166#ifdef	DEBUG
167
168static int suspend_debug_flag = 0;
169
170#define	DBG_PROM		\
171if (suspend_debug_flag)		\
172	prom_printf
173
174#define	DBG			\
175if (suspend_debug_flag)		\
176	suspend_debug
177
178static void
179suspend_debug(const char *fmt, ...)
180{
181	char	buf[512];
182	va_list	ap;
183
184	va_start(ap, fmt);
185	(void) vsprintf(buf, fmt, ap);
186	va_end(ap);
187
188	cmn_err(CE_NOTE, "%s", buf);
189}
190
191#else /* DEBUG */
192
193#define	DBG_PROM
194#define	DBG
195
196#endif /* DEBUG */
197
198/*
199 * Return true if the HV supports OS suspend and if suspend has not been
200 * disabled on this platform.
201 */
202boolean_t
203suspend_supported(void)
204{
205	uint64_t major, minor;
206
207	if (suspend_disabled)
208		return (B_FALSE);
209
210	if (hsvc_version(HSVC_GROUP_CORE, &major, &minor) != 0)
211		return (B_FALSE);
212
213	return ((major == SUSPEND_CORE_MAJOR && minor >= SUSPEND_CORE_MINOR) ||
214	    (major > SUSPEND_CORE_MAJOR));
215}
216
217/*
218 * Memory DR is not permitted if the system has been suspended and resumed.
219 * It is the responsibility of the caller of suspend_start and the DR
220 * subsystem to serialize DR operations and suspend_memdr_allowed() checks.
221 */
222boolean_t
223suspend_memdr_allowed(void)
224{
225	return (suspend_count == 0);
226}
227
228/*
229 * Given a source tick, stick, and tod value, set the tick and stick offsets
230 * such that the (current physical register value) + offset == (source value)
231 * and in addition account for some variation between the %tick/%stick on
232 * different CPUs. We account for this variation by adding in double the value
233 * of suspend_tick_stick_max_delta. The following is an explanation of why
234 * suspend_tick_stick_max_delta must be multplied by two and added to
235 * native_stick_offset.
236 *
237 * Consider a guest instance that is yet to be suspended with CPUs p0 and p1
238 * with physical "source" %stick values s0 and s1 respectively. When the guest
239 * is first resumed, the physical "target" %stick values are t0 and t1
240 * respectively. The virtual %stick values after the resume are v0 and v1
241 * respectively. Let x be the maximum difference between any two CPU's %stick
242 * register at a given point in time and let the %stick values be assigned
243 * such that
244 *
245 *     s1 = s0 + x and
246 *     t1 = t0 - x
247 *
248 * Let us assume that p0 is driving the suspend and resume. Then, we will
249 * calculate the stick offset f and the virtual %stick on p0 after the
250 * resume as follows.
251 *
252 *      f = s0 - t0 and
253 *     v0 = t0 + f
254 *
255 * We calculate the virtual %stick v1 on p1 after the resume as
256 *
257 *     v1 = t1 + f
258 *
259 * Substitution yields
260 *
261 *     v1 = t1 + (s0 - t0)
262 *     v1 = (t0 - x) + (s0 - t0)
263 *     v1 = -x + s0
264 *     v1 = s0 - x
265 *     v1 = (s1 - x) - x
266 *     v1 = s1 - 2x
267 *
268 * Therefore, in this scenario, without accounting for %stick variation in
269 * the calculation of the native_stick_offset f, the virtual %stick on p1
270 * is less than the value of the %stick on p1 before the suspend which is
271 * unacceptable. By adding 2x to v1, we guarantee it will be equal to s1
272 * which means the %stick on p1 after the resume will always be greater
273 * than or equal to the %stick on p1 before the suspend. Since v1 = t1 + f
274 * at any point in time, we can accomplish this by adding 2x to f. This
275 * guarantees any processes bound to CPU P0 or P1 will not see a %stick
276 * decrease across a suspend/resume. Hence, in the code below, we multiply
277 * suspend_tick_stick_max_delta by two in the calculation for
278 * native_stick_offset, native_tick_offset, and target_hrtime.
279 */
280static void
281set_tick_offsets(uint64_t source_tick, uint64_t source_stick, timestruc_t *tsp)
282{
283	uint64_t target_tick;
284	uint64_t target_stick;
285	hrtime_t source_hrtime;
286	hrtime_t target_hrtime;
287
288	/*
289	 * Temporarily set the offsets to zero so that the following reads
290	 * of the registers will yield physical unadjusted counter values.
291	 */
292	native_tick_offset = 0;
293	native_stick_offset = 0;
294
295	target_tick = gettick_counter();	/* returns %tick */
296	target_stick = gettick();		/* returns %stick */
297
298	/*
299	 * Calculate the new offsets. In addition to the delta observed on
300	 * this CPU, add an additional value. Multiply the %tick/%stick
301	 * frequency by suspend_tick_stick_max_delta (us). Then, multiply by 2
302	 * to account for a delta between CPUs before the suspend and a
303	 * delta between CPUs after the resume.
304	 */
305	native_tick_offset = (source_tick - target_tick) +
306	    (CPU->cpu_curr_clock * suspend_tick_stick_max_delta * 2 / MICROSEC);
307	native_stick_offset = (source_stick - target_stick) +
308	    (sys_tick_freq * suspend_tick_stick_max_delta * 2 / MICROSEC);
309
310	/*
311	 * We've effectively increased %stick and %tick by twice the value
312	 * of suspend_tick_stick_max_delta to account for variation across
313	 * CPUs. Now adjust the preserved TOD by the same amount.
314	 */
315	source_hrtime = ts2hrt(tsp);
316	target_hrtime = source_hrtime +
317	    (suspend_tick_stick_max_delta * 2 * (NANOSEC/MICROSEC));
318	hrt2ts(target_hrtime, tsp);
319}
320
321/*
322 * Set the {tick,stick}.NPT field to 1 on this CPU.
323 */
324static void
325enable_tick_stick_npt(void)
326{
327	(void) hv_stick_set_npt(1);
328	(void) hv_tick_set_npt(1);
329}
330
331/*
332 * Synchronize a CPU's {tick,stick}.NPT fields with the current state
333 * of the system. This is used when a CPU is DR'd into the system.
334 */
335void
336suspend_sync_tick_stick_npt(void)
337{
338	if (tick_stick_emulation_active) {
339		DBG("enabling {%%tick/%%stick}.NPT on CPU 0x%x", CPU->cpu_id);
340		(void) hv_stick_set_npt(1);
341		(void) hv_tick_set_npt(1);
342	} else {
343		ASSERT(gettick_npt() == 0);
344		ASSERT(getstick_npt() == 0);
345	}
346}
347
348/*
349 * Obtain an updated MD from the hypervisor and update cpunodes, CPU HW
350 * sharing data structures, and processor groups.
351 */
352static void
353update_cpu_mappings(void)
354{
355	md_t		*mdp;
356	processorid_t	id;
357	cpu_t		*cp;
358	cpu_pg_t	*pgps[NCPU];
359
360	if ((mdp = md_get_handle()) == NULL) {
361		DBG("suspend: md_get_handle failed");
362		return;
363	}
364
365	DBG("suspend: updating CPU mappings");
366
367	mutex_enter(&cpu_lock);
368
369	setup_chip_mappings(mdp);
370	setup_exec_unit_mappings(mdp);
371	for (id = 0; id < NCPU; id++) {
372		if ((cp = cpu_get(id)) == NULL)
373			continue;
374		cpu_map_exec_units(cp);
375	}
376
377	/*
378	 * Re-calculate processor groups.
379	 *
380	 * First tear down all PG information before adding any new PG
381	 * information derived from the MD we just downloaded. We must
382	 * call pg_cpu_inactive and pg_cpu_active with CPUs paused and
383	 * we want to minimize the number of times pause_cpus is called.
384	 * Inactivating all CPUs would leave PGs without any active CPUs,
385	 * so while CPUs are paused, call pg_cpu_inactive and swap in the
386	 * bootstrap PG structure saving the original PG structure to be
387	 * fini'd afterwards. This prevents the dispatcher from encountering
388	 * PGs in which all CPUs are inactive. Offline CPUs are already
389	 * inactive in their PGs and shouldn't be reactivated, so we must
390	 * not call pg_cpu_inactive or pg_cpu_active for those CPUs.
391	 */
392	pause_cpus(NULL, NULL);
393	for (id = 0; id < NCPU; id++) {
394		if ((cp = cpu_get(id)) == NULL)
395			continue;
396		if ((cp->cpu_flags & CPU_OFFLINE) == 0)
397			pg_cpu_inactive(cp);
398		pgps[id] = cp->cpu_pg;
399		pg_cpu_bootstrap(cp);
400	}
401	start_cpus();
402
403	/*
404	 * pg_cpu_fini* and pg_cpu_init* must be called while CPUs are
405	 * not paused. Use two separate loops here so that we do not
406	 * initialize PG data for CPUs until all the old PG data structures
407	 * are torn down.
408	 */
409	for (id = 0; id < NCPU; id++) {
410		if ((cp = cpu_get(id)) == NULL)
411			continue;
412		pg_cpu_fini(cp, pgps[id]);
413		mpo_cpu_remove(id);
414	}
415
416	/*
417	 * Initialize PG data for each CPU, but leave the bootstrapped
418	 * PG structure in place to avoid running with any PGs containing
419	 * nothing but inactive CPUs.
420	 */
421	for (id = 0; id < NCPU; id++) {
422		if ((cp = cpu_get(id)) == NULL)
423			continue;
424		mpo_cpu_add(mdp, id);
425		pgps[id] = pg_cpu_init(cp, B_TRUE);
426	}
427
428	/*
429	 * Now that PG data has been initialized for all CPUs in the
430	 * system, replace the bootstrapped PG structure with the
431	 * initialized PG structure and call pg_cpu_active for each CPU.
432	 */
433	pause_cpus(NULL, NULL);
434	for (id = 0; id < NCPU; id++) {
435		if ((cp = cpu_get(id)) == NULL)
436			continue;
437		cp->cpu_pg = pgps[id];
438		if ((cp->cpu_flags & CPU_OFFLINE) == 0)
439			pg_cpu_active(cp);
440	}
441	start_cpus();
442
443	mutex_exit(&cpu_lock);
444
445	(void) md_fini_handle(mdp);
446}
447
448/*
449 * Wrapper for the Sun Cluster error decoding function.
450 */
451static int
452cluster_error_decode(int error, char *error_reason, size_t max_reason_len)
453{
454	const char	*decoded;
455	size_t		decoded_len;
456
457	ASSERT(error_reason != NULL);
458	ASSERT(max_reason_len > 0);
459
460	max_reason_len = MIN(max_reason_len, SC_FAIL_STR_MAX);
461
462	if (cl_suspend_error_decode == NULL)
463		return (-1);
464
465	if ((decoded = (*cl_suspend_error_decode)(error)) == NULL)
466		return (-1);
467
468	/* Get number of non-NULL bytes */
469	if ((decoded_len = strnlen(decoded, max_reason_len - 1)) == 0)
470		return (-1);
471
472	bcopy(decoded, error_reason, decoded_len);
473
474	/*
475	 * The error string returned from cl_suspend_error_decode
476	 * should be NULL-terminated, but set the terminator here
477	 * because we only copied non-NULL bytes. If the decoded
478	 * string was not NULL-terminated, this guarantees that
479	 * error_reason will be.
480	 */
481	error_reason[decoded_len] = '\0';
482
483	return (0);
484}
485
486/*
487 * Wrapper for the Sun Cluster pre-suspend callback.
488 */
489static int
490cluster_pre_wrapper(char *error_reason, size_t max_reason_len)
491{
492	int rv = 0;
493
494	if (cl_suspend_pre_callback != NULL) {
495		rv = (*cl_suspend_pre_callback)();
496		DBG("suspend: cl_suspend_pre_callback returned %d", rv);
497		if (rv != 0 && error_reason != NULL && max_reason_len > 0) {
498			if (cluster_error_decode(rv, error_reason,
499			    max_reason_len)) {
500				(void) snprintf(error_reason, max_reason_len,
501				    SC_PRE_FAIL_STR_FMT, rv);
502			}
503		}
504	}
505
506	return (rv);
507}
508
509/*
510 * Wrapper for the Sun Cluster post-suspend callback.
511 */
512static int
513cluster_post_wrapper(char *error_reason, size_t max_reason_len)
514{
515	int rv = 0;
516
517	if (cl_suspend_post_callback != NULL) {
518		rv = (*cl_suspend_post_callback)();
519		DBG("suspend: cl_suspend_post_callback returned %d", rv);
520		if (rv != 0 && error_reason != NULL && max_reason_len > 0) {
521			if (cluster_error_decode(rv, error_reason,
522			    max_reason_len)) {
523				(void) snprintf(error_reason,
524				    max_reason_len, SC_POST_FAIL_STR_FMT, rv);
525			}
526		}
527	}
528
529	return (rv);
530}
531
532/*
533 * Execute pre-suspend callbacks preparing the system for a suspend operation.
534 * Returns zero on success, non-zero on failure. Sets the recovered argument
535 * to indicate whether or not callbacks could be undone in the event of a
536 * failure--if callbacks were successfully undone, *recovered is set to B_TRUE,
537 * otherwise *recovered is set to B_FALSE. Must be called successfully before
538 * suspend_start can be called. Callers should first call suspend_support to
539 * determine if OS suspend is supported.
540 */
541int
542suspend_pre(char *error_reason, size_t max_reason_len, boolean_t *recovered)
543{
544	int rv;
545
546	ASSERT(recovered != NULL);
547
548	/*
549	 * Return an error if suspend_pre is erreoneously called
550	 * when OS suspend is not supported.
551	 */
552	ASSERT(suspend_supported());
553	if (!suspend_supported()) {
554		DBG("suspend: suspend_pre called without suspend support");
555		*recovered = B_TRUE;
556		return (ENOTSUP);
557	}
558	DBG("suspend: %s", __func__);
559
560	rv = cluster_pre_wrapper(error_reason, max_reason_len);
561
562	/*
563	 * At present, only one pre-suspend operation exists.
564	 * If it fails, no recovery needs to be done.
565	 */
566	if (rv != 0 && recovered != NULL)
567		*recovered = B_TRUE;
568
569	return (rv);
570}
571
572/*
573 * Execute post-suspend callbacks. Returns zero on success, non-zero on
574 * failure. Must be called after suspend_start is called, regardless of
575 * whether or not suspend_start is successful.
576 */
577int
578suspend_post(char *error_reason, size_t max_reason_len)
579{
580	ASSERT(suspend_supported());
581	DBG("suspend: %s", __func__);
582	return (cluster_post_wrapper(error_reason, max_reason_len));
583}
584
585/*
586 * Suspends the OS by pausing CPUs and calling into the HV to initiate
587 * the suspend. When the HV routine hv_guest_suspend returns, the system
588 * will be resumed. Must be called after a successful call to suspend_pre.
589 * suspend_post must be called after suspend_start, whether or not
590 * suspend_start returns an error.
591 */
592/*ARGSUSED*/
593int
594suspend_start(char *error_reason, size_t max_reason_len)
595{
596	uint64_t	source_tick;
597	uint64_t	source_stick;
598	uint64_t	rv;
599	timestruc_t	source_tod;
600	int		spl;
601
602	ASSERT(suspend_supported());
603	DBG("suspend: %s", __func__);
604
605	sfmmu_ctxdoms_lock();
606
607	mutex_enter(&cpu_lock);
608
609	/* Suspend the watchdog */
610	watchdog_suspend();
611
612	/* Record the TOD */
613	mutex_enter(&tod_lock);
614	source_tod = tod_get();
615	mutex_exit(&tod_lock);
616
617	/* Pause all other CPUs */
618	pause_cpus(NULL, NULL);
619	DBG_PROM("suspend: CPUs paused\n");
620
621	/* Suspend cyclics */
622	cyclic_suspend();
623	DBG_PROM("suspend: cyclics suspended\n");
624
625	/* Disable interrupts */
626	spl = spl8();
627	DBG_PROM("suspend: spl8()\n");
628
629	source_tick = gettick_counter();
630	source_stick = gettick();
631	DBG_PROM("suspend: source_tick: 0x%lx\n", source_tick);
632	DBG_PROM("suspend: source_stick: 0x%lx\n", source_stick);
633
634	/*
635	 * Call into the HV to initiate the suspend. hv_guest_suspend()
636	 * returns after the guest has been resumed or if the suspend
637	 * operation failed or was cancelled. After a successful suspend,
638	 * the %tick and %stick registers may have changed by an amount
639	 * that is not proportional to the amount of time that has passed.
640	 * They may have jumped forwards or backwards. Some variation is
641	 * allowed and accounted for using suspend_tick_stick_max_delta,
642	 * but otherwise this jump must be uniform across all CPUs and we
643	 * operate under the assumption that it is (maintaining two global
644	 * offset variables--one for %tick and one for %stick.)
645	 */
646	DBG_PROM("suspend: suspending... \n");
647	rv = hv_guest_suspend();
648	if (rv != 0) {
649		splx(spl);
650		cyclic_resume();
651		start_cpus();
652		watchdog_resume();
653		mutex_exit(&cpu_lock);
654		sfmmu_ctxdoms_unlock();
655		DBG("suspend: failed, rv: %ld\n", rv);
656		return (rv);
657	}
658
659	suspend_count++;
660
661	/* Update the global tick and stick offsets and the preserved TOD */
662	set_tick_offsets(source_tick, source_stick, &source_tod);
663
664	/* Ensure new offsets are globally visible before resuming CPUs */
665	membar_sync();
666
667	/* Enable interrupts */
668	splx(spl);
669
670	/* Set the {%tick,%stick}.NPT bits on all CPUs */
671	if (enable_user_tick_stick_emulation) {
672		xc_all((xcfunc_t *)enable_tick_stick_npt, 0, 0);
673		xt_sync(cpu_ready_set);
674		ASSERT(gettick_npt() != 0);
675		ASSERT(getstick_npt() != 0);
676	}
677
678	/* If emulation is enabled, but not currently active, enable it */
679	if (enable_user_tick_stick_emulation && !tick_stick_emulation_active) {
680		tick_stick_emulation_active = B_TRUE;
681	}
682
683	sfmmu_ctxdoms_remove();
684
685	/* Resume cyclics, unpause CPUs */
686	cyclic_resume();
687	start_cpus();
688
689	/* Set the TOD */
690	mutex_enter(&tod_lock);
691	tod_set(source_tod);
692	mutex_exit(&tod_lock);
693
694	/* Re-enable the watchdog */
695	watchdog_resume();
696
697	mutex_exit(&cpu_lock);
698
699	/* Download the latest MD */
700	if ((rv = mach_descrip_update()) != 0)
701		cmn_err(CE_PANIC, "suspend: mach_descrip_update failed: %ld",
702		    rv);
703
704	sfmmu_ctxdoms_update();
705	sfmmu_ctxdoms_unlock();
706
707	/* Get new MD, update CPU mappings/relationships */
708	if (suspend_update_cpu_mappings)
709		update_cpu_mappings();
710
711	DBG("suspend: target tick: 0x%lx", gettick_counter());
712	DBG("suspend: target stick: 0x%llx", gettick());
713	DBG("suspend: user %%tick/%%stick emulation is %d",
714	    tick_stick_emulation_active);
715	DBG("suspend: finished");
716
717	return (0);
718}
719