1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2018 Joyent, Inc.
25 */
26
27/*
28 * The main CPU-control loops, used to control masters and slaves.
29 */
30
31#include <sys/types.h>
32
33#include <kmdb/kaif.h>
34#include <kmdb/kaif_start.h>
35#include <kmdb/kmdb_asmutil.h>
36#include <kmdb/kmdb_dpi_impl.h>
37#include <kmdb/kmdb_kdi.h>
38
39#define	KAIF_SLAVE_CMD_SPIN	0
40#define	KAIF_SLAVE_CMD_SWITCH	1
41#define	KAIF_SLAVE_CMD_RESUME	2
42#define	KAIF_SLAVE_CMD_FLUSH	3
43#define	KAIF_SLAVE_CMD_REBOOT	4
44#if defined(__sparc)
45#define	KAIF_SLAVE_CMD_ACK	5
46#endif
47
48
49/*
50 * Used to synchronize attempts to set kaif_master_cpuid.  kaif_master_cpuid may
51 * be read without kaif_master_lock, and may be written by the current master
52 * CPU.
53 */
54int kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
55static uintptr_t kaif_master_lock = 0;
56
57/*
58 * Used to ensure that all CPUs leave the debugger together. kaif_loop_lock must
59 * be held to write kaif_looping, but need not be held to read it.
60 */
61static volatile uint_t kaif_looping;
62static uintptr_t kaif_loop_lock;
63
64static volatile int kaif_slave_cmd;
65static volatile int kaif_slave_tgt;	/* target cpuid for CMD_SWITCH */
66
67static void
68kaif_lock_enter(uintptr_t *lock)
69{
70	while (cas(lock, 0, 1) != 0)
71		continue;
72	membar_producer();
73}
74
75static void
76kaif_lock_exit(uintptr_t *lock)
77{
78	*lock = 0;
79	membar_producer();
80}
81
82static void
83kaif_start_slaves(int cmd)
84{
85	kaif_slave_cmd = cmd;
86	kmdb_kdi_start_slaves();
87}
88
89static int
90kaif_master_loop(kaif_cpusave_t *cpusave)
91{
92	int notflushed, i;
93
94#if defined(__sparc)
95	kaif_prom_rearm();
96#endif
97	kaif_trap_set_debugger();
98
99	/*
100	 * If we re-entered due to a ::switch, we need to tell the slave CPUs
101	 * to sleep again.
102	 */
103	kmdb_kdi_stop_slaves(cpusave->krs_cpu_id, 0);
104
105master_loop:
106	switch (kmdb_dpi_reenter()) {
107	case KMDB_DPI_CMD_SWITCH_CPU:
108		/*
109		 * We assume that the target CPU is a valid slave.  There's no
110		 * easy way to complain here, so we'll assume that the caller
111		 * has done the proper checking.
112		 */
113		if (kmdb_dpi_switch_target == cpusave->krs_cpu_id)
114			break;
115
116		kaif_slave_tgt = kaif_master_cpuid = kmdb_dpi_switch_target;
117		cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
118		membar_producer();
119
120		/*
121		 * Switch back to the saved trap table before we switch CPUs --
122		 * we need to make sure that only one CPU is on the debugger's
123		 * table at a time.
124		 */
125		kaif_trap_set_saved(cpusave);
126
127		kaif_start_slaves(KAIF_SLAVE_CMD_SWITCH);
128
129		/* The new master is now awake */
130		return (KAIF_CPU_CMD_SWITCH);
131
132	case KMDB_DPI_CMD_RESUME_ALL:
133	case KMDB_DPI_CMD_RESUME_UNLOAD:
134		/*
135		 * Resume everyone, clean up for next entry.
136		 */
137		kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
138		membar_producer();
139		kaif_start_slaves(KAIF_SLAVE_CMD_RESUME);
140
141		if (kmdb_dpi_work_required())
142			kmdb_dpi_wrintr_fire();
143
144		kaif_trap_set_saved(cpusave);
145
146		return (KAIF_CPU_CMD_RESUME);
147
148	case KMDB_DPI_CMD_RESUME_MASTER:
149		/*
150		 * Single-CPU resume, which is performed on the debugger's
151		 * trap table (so no need to switch back).
152		 */
153		return (KAIF_CPU_CMD_RESUME_MASTER);
154
155	case KMDB_DPI_CMD_FLUSH_CACHES:
156		kaif_start_slaves(KAIF_SLAVE_CMD_FLUSH);
157
158		/*
159		 * Wait for the other cpus to finish flushing their caches.
160		 */
161		do {
162			notflushed = 0;
163			for (i = 0; i < kaif_ncpusave; i++) {
164				kaif_cpusave_t *save = &kaif_cpusave[i];
165
166				if (save->krs_cpu_state ==
167				    KAIF_CPU_STATE_SLAVE &&
168				    !save->krs_cpu_flushed) {
169					notflushed++;
170					break;
171				}
172			}
173		} while (notflushed > 0);
174
175		kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
176		break;
177
178#if defined(__i386) || defined(__amd64)
179	case KMDB_DPI_CMD_REBOOT:
180		/*
181		 * Reboot must be initiated by CPU 0.  I could ask why, but I'm
182		 * afraid that I don't want to know the answer.
183		 */
184		if (cpusave->krs_cpu_id == 0)
185			kmdb_kdi_reboot();
186
187		kaif_start_slaves(KAIF_SLAVE_CMD_REBOOT);
188
189		/*
190		 * Spin forever, waiting for CPU 0 (apparently a slave) to
191		 * reboot the system.
192		 */
193		for (;;)
194			continue;
195
196		/*NOTREACHED*/
197		break;
198#endif
199	}
200
201	goto master_loop;
202}
203
204static int
205kaif_slave_loop(kaif_cpusave_t *cpusave)
206{
207	int slavecmd, rv;
208
209#if defined(__sparc)
210	/*
211	 * If the user elects to drop to OBP from the debugger, some OBP
212	 * implementations will cross-call the slaves.  We have to turn
213	 * IE back on so we can receive the cross-calls.  If we don't,
214	 * some OBP implementations will wait forever.
215	 */
216	interrupts_on();
217#endif
218
219	/* Wait for duty to call */
220	for (;;) {
221		slavecmd = kaif_slave_cmd;
222
223		if (slavecmd == KAIF_SLAVE_CMD_SWITCH &&
224		    kaif_slave_tgt == cpusave->krs_cpu_id) {
225			kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
226			cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
227			rv = KAIF_CPU_CMD_SWITCH;
228			break;
229
230		} else if (slavecmd == KAIF_SLAVE_CMD_FLUSH) {
231			kmdb_kdi_flush_caches();
232			cpusave->krs_cpu_flushed = 1;
233			continue;
234
235#if defined(__i386) || defined(__amd64)
236		} else if (slavecmd == KAIF_SLAVE_CMD_REBOOT &&
237		    cpusave->krs_cpu_id == 0) {
238			rv = 0;
239			kmdb_kdi_reboot();
240			break;
241#endif
242
243		} else if (slavecmd == KAIF_SLAVE_CMD_RESUME) {
244			rv = KAIF_CPU_CMD_RESUME;
245			break;
246#if defined(__sparc)
247		} else if (slavecmd == KAIF_SLAVE_CMD_ACK) {
248			cpusave->krs_cpu_acked = 1;
249		} else if (cpusave->krs_cpu_acked &&
250		    slavecmd == KAIF_SLAVE_CMD_SPIN) {
251			cpusave->krs_cpu_acked = 0;
252#endif
253		}
254
255		kmdb_kdi_slave_wait();
256	}
257
258#if defined(__sparc)
259	interrupts_off();
260#endif
261
262	return (rv);
263}
264
265static void
266kaif_select_master(kaif_cpusave_t *cpusave)
267{
268	kaif_lock_enter(&kaif_master_lock);
269
270	if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
271		/* This is the master. */
272		kaif_master_cpuid = cpusave->krs_cpu_id;
273		cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
274		kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
275
276		membar_producer();
277
278		kmdb_kdi_stop_slaves(cpusave->krs_cpu_id, 1);
279	} else {
280		/* The master was already chosen - go be a slave */
281		cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
282		membar_producer();
283	}
284
285	kaif_lock_exit(&kaif_master_lock);
286}
287
288int
289kaif_main_loop(kaif_cpusave_t *cpusave)
290{
291	int cmd;
292
293	if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
294
295		/*
296		 * Special case: Unload requested before first debugger entry.
297		 * Don't stop the world, as there's nothing to clean up that
298		 * can't be handled by the running kernel.
299		 */
300		if (!kmdb_dpi_resume_requested &&
301		    kmdb_kdi_get_unload_request()) {
302			cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
303			return (KAIF_CPU_CMD_RESUME);
304		}
305
306		/*
307		 * We're a slave with no master, so just resume.  This can
308		 * happen if, prior to this, two CPUs both raced through
309		 * kdi_cmnint() - for example, a breakpoint on a frequently
310		 * called function.  The loser will be redirected to the slave
311		 * loop; note that the event itself is lost at this point.
312		 *
313		 * The winner will then cross-call that slave, but it won't
314		 * actually be received until the slave returns to the kernel
315		 * and enables interrupts.  We'll then come back in via
316		 * kdi_slave_entry() and hit this path.
317		 */
318		if (cpusave->krs_cpu_state == KAIF_CPU_STATE_SLAVE) {
319			cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
320			return (KAIF_CPU_CMD_RESUME);
321		}
322
323		kaif_select_master(cpusave);
324
325#ifdef __sparc
326		if (kaif_master_cpuid == cpusave->krs_cpu_id) {
327			/*
328			 * Everyone has arrived, so we can disarm the post-PROM
329			 * entry point.
330			 */
331			*kaif_promexitarmp = 0;
332			membar_producer();
333		}
334#endif
335	} else if (kaif_master_cpuid == cpusave->krs_cpu_id) {
336		cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
337	} else {
338		cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
339	}
340
341	cpusave->krs_cpu_flushed = 0;
342
343	kaif_lock_enter(&kaif_loop_lock);
344	kaif_looping++;
345	kaif_lock_exit(&kaif_loop_lock);
346
347	/*
348	 * We know who the master and slaves are, so now they can go off
349	 * to their respective loops.
350	 */
351	do {
352		if (kaif_master_cpuid == cpusave->krs_cpu_id)
353			cmd = kaif_master_loop(cpusave);
354		else
355			cmd = kaif_slave_loop(cpusave);
356	} while (cmd == KAIF_CPU_CMD_SWITCH);
357
358	kaif_lock_enter(&kaif_loop_lock);
359	kaif_looping--;
360	kaif_lock_exit(&kaif_loop_lock);
361
362	cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
363
364	if (cmd == KAIF_CPU_CMD_RESUME) {
365		/*
366		 * By this point, the master has directed the slaves to resume,
367		 * and everyone is making their way to this point.  We're going
368		 * to block here until all CPUs leave the master and slave
369		 * loops.  When all have arrived, we'll turn them all loose.
370		 * This barrier is required for two reasons:
371		 *
372		 * 1. There exists a race condition whereby a CPU could reenter
373		 *    the debugger while another CPU is still in the slave loop
374		 *    from this debugger entry.  This usually happens when the
375		 *    current master releases the slaves, and makes it back to
376		 *    the world before the slaves notice the release.  The
377		 *    former master then triggers a debugger entry, and attempts
378		 *    to stop the slaves for this entry before they've even
379		 *    resumed from the last one.  When the slaves arrive here,
380		 *    they'll have re-disabled interrupts, and will thus ignore
381		 *    cross-calls until they finish resuming.
382		 *
383		 * 2. At the time of this writing, there exists a SPARC bug that
384		 *    causes an apparently unsolicited interrupt vector trap
385		 *    from OBP to one of the slaves.  This wouldn't normally be
386		 *    a problem but for the fact that the cross-called CPU
387		 *    encounters some sort of failure while in OBP.  OBP
388		 *    recovers by executing the debugger-hook word, which sends
389		 *    the slave back into the debugger, triggering a debugger
390		 *    fault.  This problem seems to only happen during resume,
391		 *    the result being that all CPUs save for the cross-called
392		 *    one make it back into the world, while the cross-called
393		 *    one is stuck at the debugger fault prompt.  Leave the
394		 *    world in that state too long, and you'll get a mondo
395		 *    timeout panic.  If we hold everyone here, we can give the
396		 *    the user a chance to trigger a panic for further analysis.
397		 *    To trigger the bug, "pool_unlock:b :c" and "while : ; do
398		 *    psrset -p ; done".
399		 *
400		 * When the second item is fixed, the barrier can move into
401		 * kaif_select_master(), immediately prior to the setting of
402		 * kaif_master_cpuid.
403		 */
404		while (kaif_looping != 0)
405			continue;
406	}
407
408	return (cmd);
409}
410
411
412#if defined(__sparc)
413
414static int slave_loop_barrier_failures = 0;	/* for debug */
415
416/*
417 * There exist a race condition observed by some
418 * platforms where the kmdb master cpu exits to OBP via
419 * prom_enter_mon (e.g. "$q" command) and then later re-enter
420 * kmdb (typing "go") while the slaves are still proceeding
421 * from the OBP idle-loop back to the kmdb slave loop. The
422 * problem arises when the master cpu now back in kmdb proceed
423 * to re-enter OBP (e.g. doing a prom_read() from the kmdb main
424 * loop) while the slaves are still trying to get out of (the
425 * previous trip in) OBP into the safety of the kmdb slave loop.
426 * This routine forces the slaves to explicitly acknowledge
427 * that they are back in the slave loop. The master cpu can
428 * call this routine to ensure that all slave cpus are back
429 * in the slave loop before proceeding.
430 */
431void
432kaif_slave_loop_barrier(void)
433{
434	extern void kdi_usecwait(clock_t);
435	int i;
436	int not_acked;
437	int timeout_count = 0;
438
439	kaif_start_slaves(KAIF_SLAVE_CMD_ACK);
440
441	/*
442	 * Wait for slave cpus to explicitly acknowledge
443	 * that they are spinning in the slave loop.
444	 */
445	do {
446		not_acked = 0;
447		for (i = 0; i < kaif_ncpusave; i++) {
448			kaif_cpusave_t *save = &kaif_cpusave[i];
449
450			if (save->krs_cpu_state ==
451			    KAIF_CPU_STATE_SLAVE &&
452			    !save->krs_cpu_acked) {
453				not_acked++;
454				break;
455			}
456		}
457
458		if (not_acked == 0)
459			break;
460
461		/*
462		 * Play it safe and do a timeout delay.
463		 * We will do at most kaif_ncpusave delays before
464		 * bailing out of this barrier.
465		 */
466		kdi_usecwait(200);
467
468	} while (++timeout_count < kaif_ncpusave);
469
470	if (not_acked > 0)
471		/*
472		 * we cannot establish a barrier with all
473		 * the slave cpus coming back from OBP
474		 * Record this fact for future debugging
475		 */
476		slave_loop_barrier_failures++;
477
478	kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
479}
480#endif
481