1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/thread.h>
28 #include <sys/conf.h>
29 #include <sys/cpuvar.h>
30 #include <sys/cpr.h>
31 #include <sys/user.h>
32 #include <sys/cmn_err.h>
33 #include <sys/callb.h>
34
35 extern void utstop_init(void);
36 extern void add_one_utstop(void);
37 extern void utstop_timedwait(long ticks);
38
39 static void cpr_stop_user(int);
40 static int cpr_check_user_threads(void);
41
42 /*
43 * CPR user thread related support routines
44 */
45 void
cpr_signal_user(int sig)46 cpr_signal_user(int sig)
47 {
48 /*
49 * The signal SIGTHAW and SIGFREEZE cannot be sent to every thread yet
50 * since openwin is catching every signal and default action is to exit.
51 * We also need to implement the true SIGFREEZE and SIGTHAW to stop threads.
52 */
53 struct proc *p;
54
55 mutex_enter(&pidlock);
56
57 for (p = practive; p; p = p->p_next) {
58 /* only user threads */
59 if (p->p_exec == NULL || p->p_stat == SZOMB ||
60 p == proc_init || p == ttoproc(curthread))
61 continue;
62
63 mutex_enter(&p->p_lock);
64 sigtoproc(p, NULL, sig);
65 mutex_exit(&p->p_lock);
66 }
67 mutex_exit(&pidlock);
68
69 DELAY(MICROSEC);
70 }
71
72 /* max wait time for user thread stop */
73 #define CPR_UTSTOP_WAIT hz
74 #define CPR_UTSTOP_RETRY 4
75 static int count;
76
77 int
cpr_stop_user_threads()78 cpr_stop_user_threads()
79 {
80 utstop_init();
81
82 count = 0;
83 do {
84 if (++count > CPR_UTSTOP_RETRY)
85 return (ESRCH);
86 cpr_stop_user(count * count * CPR_UTSTOP_WAIT);
87 } while (cpr_check_user_threads() &&
88 (count < CPR_UTSTOP_RETRY || CPR->c_fcn != AD_CPR_FORCE));
89
90 return (0);
91 }
92
93 /*
94 * This routine tries to stop all user threads before we get rid of all
95 * its pages.It goes through allthreads list and set the TP_CHKPT flag
96 * for all user threads and make them runnable. If all of the threads
97 * can be stopped within the max wait time, CPR will proceed. Otherwise
98 * CPR is aborted after a few of similiar retries.
99 */
100 static void
cpr_stop_user(int wait)101 cpr_stop_user(int wait)
102 {
103 kthread_id_t tp;
104 proc_t *p;
105
106 /* The whole loop below needs to be atomic */
107 mutex_enter(&pidlock);
108
109 /* faster this way */
110 tp = curthread->t_next;
111 do {
112 /* kernel threads will be handled later */
113 p = ttoproc(tp);
114 if (p->p_as == &kas || p->p_stat == SZOMB)
115 continue;
116
117 /*
118 * If the thread is stopped (by CPR) already, do nothing;
119 * if running, mark TP_CHKPT;
120 * if sleeping normally, mark TP_CHKPT and setrun;
121 * if sleeping non-interruptable, mark TP_CHKPT only for now;
122 * if sleeping with t_wchan0 != 0 etc, virtually stopped,
123 * do nothing.
124 */
125
126 /* p_lock is needed for modifying t_proc_flag */
127 mutex_enter(&p->p_lock);
128 thread_lock(tp); /* needed to check CPR_ISTOPPED */
129
130 if (tp->t_state == TS_STOPPED) {
131 /*
132 * if already stopped by other reasons, add this new
133 * reason to it.
134 */
135 if (tp->t_schedflag & TS_RESUME)
136 tp->t_schedflag &= ~TS_RESUME;
137 } else {
138
139 tp->t_proc_flag |= TP_CHKPT;
140
141 thread_unlock(tp);
142 mutex_exit(&p->p_lock);
143 add_one_utstop();
144 mutex_enter(&p->p_lock);
145 thread_lock(tp);
146
147 aston(tp);
148
149 if (ISWAKEABLE(tp) || ISWAITING(tp)) {
150 setrun_locked(tp);
151 }
152 }
153 /*
154 * force the thread into the kernel if it is not already there.
155 */
156 if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
157 poke_cpu(tp->t_cpu->cpu_id);
158 thread_unlock(tp);
159 mutex_exit(&p->p_lock);
160
161 } while ((tp = tp->t_next) != curthread);
162 mutex_exit(&pidlock);
163
164 utstop_timedwait(wait);
165 }
166
167 /*
168 * Checks and makes sure all user threads are stopped
169 */
170 static int
cpr_check_user_threads()171 cpr_check_user_threads()
172 {
173 kthread_id_t tp;
174 int rc = 0;
175
176 mutex_enter(&pidlock);
177 tp = curthread->t_next;
178 do {
179 if (ttoproc(tp)->p_as == &kas || ttoproc(tp)->p_stat == SZOMB)
180 continue;
181
182 thread_lock(tp);
183 /*
184 * make sure that we are off all the queues and in a stopped
185 * state.
186 */
187 if (!CPR_ISTOPPED(tp)) {
188 thread_unlock(tp);
189 mutex_exit(&pidlock);
190
191 if (count == CPR_UTSTOP_RETRY) {
192 CPR_DEBUG(CPR_DEBUG1, "Suspend failed: "
193 "cannot stop uthread\n");
194 cpr_err(CE_WARN, "Suspend cannot stop "
195 "process %s (%p:%x).",
196 ttoproc(tp)->p_user.u_psargs, (void *)tp,
197 tp->t_state);
198 cpr_err(CE_WARN, "Process may be waiting for"
199 " network request, please try again.");
200 }
201
202 CPR_DEBUG(CPR_DEBUG2, "cant stop t=%p state=%x pfg=%x "
203 "sched=%x\n", (void *)tp, tp->t_state,
204 tp->t_proc_flag, tp->t_schedflag);
205 CPR_DEBUG(CPR_DEBUG2, "proc %p state=%x pid=%d\n",
206 (void *)ttoproc(tp), ttoproc(tp)->p_stat,
207 ttoproc(tp)->p_pidp->pid_id);
208 return (1);
209 }
210 thread_unlock(tp);
211
212 } while ((tp = tp->t_next) != curthread && rc == 0);
213
214 mutex_exit(&pidlock);
215 return (0);
216 }
217
218
219 /*
220 * start all threads that were stopped for checkpoint.
221 */
222 void
cpr_start_user_threads()223 cpr_start_user_threads()
224 {
225 kthread_id_t tp;
226 proc_t *p;
227
228 mutex_enter(&pidlock);
229 tp = curthread->t_next;
230 do {
231 p = ttoproc(tp);
232 /*
233 * kernel threads are callback'ed rather than setrun.
234 */
235 if (ttoproc(tp)->p_as == &kas) continue;
236 /*
237 * t_proc_flag should have been cleared. Just to make sure here
238 */
239 mutex_enter(&p->p_lock);
240 tp->t_proc_flag &= ~TP_CHKPT;
241 mutex_exit(&p->p_lock);
242
243 thread_lock(tp);
244 if (CPR_ISTOPPED(tp)) {
245
246 /*
247 * put it back on the runq
248 */
249 tp->t_schedflag |= TS_RESUME;
250 setrun_locked(tp);
251 }
252 thread_unlock(tp);
253 /*
254 * DEBUG - Keep track of current and next thread pointer.
255 */
256 } while ((tp = tp->t_next) != curthread);
257
258 mutex_exit(&pidlock);
259 }
260
261
262 /*
263 * re/start kernel threads
264 */
265 void
cpr_start_kernel_threads(void)266 cpr_start_kernel_threads(void)
267 {
268 CPR_DEBUG(CPR_DEBUG1, "starting kernel daemons...");
269 (void) callb_execute_class(CB_CL_CPR_DAEMON, CB_CODE_CPR_RESUME);
270 CPR_DEBUG(CPR_DEBUG1, "done\n");
271
272 /* see table lock below */
273 callb_unlock_table();
274 }
275
276
277 /*
278 * Stop kernel threads by using the callback mechanism. If any thread
279 * cannot be stopped, return failure.
280 */
281 int
cpr_stop_kernel_threads(void)282 cpr_stop_kernel_threads(void)
283 {
284 caddr_t name;
285
286 callb_lock_table(); /* Note: we unlock the table in resume. */
287
288 CPR_DEBUG(CPR_DEBUG1, "stopping kernel daemons...");
289 if ((name = callb_execute_class(CB_CL_CPR_DAEMON,
290 CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) {
291 cpr_err(CE_WARN,
292 "Could not stop \"%s\" kernel thread. "
293 "Please try again later.", name);
294 return (EBUSY);
295 }
296
297 CPR_DEBUG(CPR_DEBUG1, ("done\n"));
298 return (0);
299 }
300
301 /*
302 * Check to see that kernel threads are stopped.
303 * This should be called while CPU's are paused, and the caller is
304 * effectively running single user, or else we are virtually guaranteed
305 * to fail. The routine should not ASSERT on the paused state or spl
306 * level, as there may be a use for this to verify that things are running
307 * again.
308 */
309 int
cpr_threads_are_stopped(void)310 cpr_threads_are_stopped(void)
311 {
312 caddr_t name;
313 kthread_id_t tp;
314 proc_t *p;
315
316 /*
317 * We think we stopped all the kernel threads. Just in case
318 * someone is not playing by the rules, take a spin through
319 * the threadlist and see if we can account for everybody.
320 */
321 mutex_enter(&pidlock);
322 tp = curthread->t_next;
323 do {
324 p = ttoproc(tp);
325 if (p->p_as != &kas)
326 continue;
327
328 if (tp->t_flag & T_INTR_THREAD)
329 continue;
330
331 if (! callb_is_stopped(tp, &name)) {
332 mutex_exit(&pidlock);
333 cpr_err(CE_WARN,
334 "\"%s\" kernel thread not stopped.", name);
335 return (EBUSY);
336 }
337 } while ((tp = tp->t_next) != curthread);
338
339 mutex_exit(&pidlock);
340 return (0);
341 }
342