ao_poll.c (a307a255) ao_poll.c (8a40a695)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 41 unchanged lines hidden (view full) ---

50
51static uint_t ao_mca_poll_trace_nent = 100;
52#ifdef DEBUG
53static uint_t ao_mca_poll_trace_always = 1;
54#else
55static uint_t ao_mca_poll_trace_always = 0;
56#endif
57
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 41 unchanged lines hidden (view full) ---

50
51static uint_t ao_mca_poll_trace_nent = 100;
52#ifdef DEBUG
53static uint_t ao_mca_poll_trace_always = 1;
54#else
55static uint_t ao_mca_poll_trace_always = 0;
56#endif
57
58static cyclic_id_t ao_mca_poll_cycid;
59static hrtime_t ao_mca_poll_interval = NANOSEC * 10ULL;
58cyclic_id_t ao_mca_poll_cycid;
59hrtime_t ao_mca_poll_interval = NANOSEC * 10ULL;
60
61static void
62ao_mca_poll_trace(ao_mca_t *mca, uint32_t what, uint32_t nerr)
63{
64 uint_t next;
65 ao_mca_poll_trace_t *pt;
66
67 ASSERT(MUTEX_HELD(&mca->ao_mca_poll_lock));

--- 6 unchanged lines hidden (view full) ---

74 pt = &mca->ao_mca_poll_trace[next];
75
76 pt->mpt_when = 0;
77 pt->mpt_what = what;
78
79 if (what == AO_MPT_WHAT_CYC_ERR)
80 pt->mpt_nerr = MIN(nerr, UINT8_MAX);
81
60
61static void
62ao_mca_poll_trace(ao_mca_t *mca, uint32_t what, uint32_t nerr)
63{
64 uint_t next;
65 ao_mca_poll_trace_t *pt;
66
67 ASSERT(MUTEX_HELD(&mca->ao_mca_poll_lock));

--- 6 unchanged lines hidden (view full) ---

74 pt = &mca->ao_mca_poll_trace[next];
75
76 pt->mpt_when = 0;
77 pt->mpt_what = what;
78
79 if (what == AO_MPT_WHAT_CYC_ERR)
80 pt->mpt_nerr = MIN(nerr, UINT8_MAX);
81
82 pt->mpt_when = gethrtime();
82 pt->mpt_when = gethrtime_waitfree();
83 mca->ao_mca_poll_curtrace = next;
84}
85
83 mca->ao_mca_poll_curtrace = next;
84}
85
86/*
87 * Once aos_nb_poll_lock is acquired the caller must not block. The
88 * ao_mca_trap code also requires that once we take the aos_nb_poll_lock
89 * that we do not get preempted so that it can check whether the
90 * thread it has interrupted is the lock owner.
91 */
86static void
92static void
87ao_mca_poll_common(ao_mca_t *mca, int what)
93ao_mca_poll_common(ao_data_t *ao, int what, int pollnb)
88{
94{
95 ao_mca_t *mca = &ao->ao_mca;
89 ao_cpu_logout_t *acl = &mca->ao_mca_logout[AO_MCA_LOGOUT_POLLER];
90 int i, n, fatal;
91
92 if (mca->ao_mca_flags & AO_MCA_F_UNFAULTING) {
93 mca->ao_mca_flags &= ~AO_MCA_F_UNFAULTING;
94 ao_mca_poll_trace(mca, AO_MPT_WHAT_UNFAULTING, 0);
95
96 /*
97 * On the first poll after re-enabling a faulty CPU we clear
98 * the status registers; see ao_faulted_exit() for more info.
99 */
100 if (what == AO_MPT_WHAT_CYC_ERR) {
101 for (i = 0; i < AMD_MCA_BANK_COUNT; i++)
102 wrmsr(ao_bank_regs[i].abr_status, 0);
103 return;
104 }
105 }
106
96 ao_cpu_logout_t *acl = &mca->ao_mca_logout[AO_MCA_LOGOUT_POLLER];
97 int i, n, fatal;
98
99 if (mca->ao_mca_flags & AO_MCA_F_UNFAULTING) {
100 mca->ao_mca_flags &= ~AO_MCA_F_UNFAULTING;
101 ao_mca_poll_trace(mca, AO_MPT_WHAT_UNFAULTING, 0);
102
103 /*
104 * On the first poll after re-enabling a faulty CPU we clear
105 * the status registers; see ao_faulted_exit() for more info.
106 */
107 if (what == AO_MPT_WHAT_CYC_ERR) {
108 for (i = 0; i < AMD_MCA_BANK_COUNT; i++)
109 wrmsr(ao_bank_regs[i].abr_status, 0);
110 return;
111 }
112 }
113
107 fatal = ao_mca_logout(acl, NULL, &n);
114 fatal = ao_mca_logout(acl, NULL, &n, !pollnb,
115 ao->ao_shared->aos_chiprev);
108 ao_mca_poll_trace(mca, what, n);
109
110 if (fatal && cmi_panic_on_uncorrectable_error)
111 fm_panic("Unrecoverable Machine-Check Error (polled)");
112}
113
116 ao_mca_poll_trace(mca, what, n);
117
118 if (fatal && cmi_panic_on_uncorrectable_error)
119 fm_panic("Unrecoverable Machine-Check Error (polled)");
120}
121
114static void
115ao_mca_poll_cyclic(void *arg)
122/*
123 * Decide whether the caller should poll the NB. The decision is made
124 * and any poll is performed under protection of the chip-wide aos_nb_poll_lock,
125 * so that assures that no two cores poll the NB at once. To avoid the
126 * NB poll ping-ponging between different detectors we'll normally stick
127 * with the first winner.
128 */
129static int
130ao_mca_nb_pollowner(ao_data_t *ao)
116{
131{
117 ao_data_t *ao = arg;
132 uint64_t last = ao->ao_shared->aos_nb_poll_timestamp;
133 uint64_t now = gethrtime_waitfree();
134 int rv = 0;
118
135
119 if (ao != NULL && mutex_tryenter(&ao->ao_mca.ao_mca_poll_lock)) {
120 ao_mca_poll_common(&ao->ao_mca, AO_MPT_WHAT_CYC_ERR);
121 mutex_exit(&ao->ao_mca.ao_mca_poll_lock);
136 ASSERT(MUTEX_HELD(&ao->ao_shared->aos_nb_poll_lock));
137
138 if (now - last > 2 * ao_mca_poll_interval || last == 0) {
139 /* Nominal owner making little progress - we'll take over */
140 ao->ao_shared->aos_nb_poll_owner = CPU->cpu_id;
141 rv = 1;
142 } else if (CPU->cpu_id == ao->ao_shared->aos_nb_poll_owner) {
143 rv = 1;
122 }
144 }
145
146 if (rv == 1)
147 ao->ao_shared->aos_nb_poll_timestamp = now;
148
149 return (rv);
123}
124
150}
151
125void
126ao_mca_poke(void *arg)
152/*
153 * Wrapper called from cyclic handler or from an injector poke.
154 * In the former case we are a CYC_LOW_LEVEL handler while in the
155 * latter we're in user context so in both cases we are allowed
156 * to block. Once we acquire the shared and adaptive aos_nb_poll_lock, however,
157 * we must not block or be preempted (see ao_mca_trap).
158 */
159static void
160ao_mca_poll_wrapper(void *arg, int what)
127{
128 ao_data_t *ao = arg;
161{
162 ao_data_t *ao = arg;
163 int pollnb;
129
164
165 if (ao == NULL)
166 return;
167
130 mutex_enter(&ao->ao_mca.ao_mca_poll_lock);
168 mutex_enter(&ao->ao_mca.ao_mca_poll_lock);
131 ao_mca_poll_common(&ao->ao_mca, AO_MPT_WHAT_POKE_ERR);
169 kpreempt_disable();
170 mutex_enter(&ao->ao_shared->aos_nb_poll_lock);
171
172 if ((pollnb = ao_mca_nb_pollowner(ao)) == 0) {
173 mutex_exit(&ao->ao_shared->aos_nb_poll_lock);
174 kpreempt_enable();
175 }
176
177 ao_mca_poll_common(ao, what, pollnb);
178
179 if (pollnb) {
180 mutex_exit(&ao->ao_shared->aos_nb_poll_lock);
181 kpreempt_enable();
182 }
132 mutex_exit(&ao->ao_mca.ao_mca_poll_lock);
133}
134
183 mutex_exit(&ao->ao_mca.ao_mca_poll_lock);
184}
185
186static void
187ao_mca_poll_cyclic(void *arg)
188{
189 ao_mca_poll_wrapper(arg, AO_MPT_WHAT_CYC_ERR);
190}
191
192void
193ao_mca_poke(void *arg)
194{
195 ao_mca_poll_wrapper(arg, AO_MPT_WHAT_POKE_ERR);
196}
197
135/*ARGSUSED*/
136static void
137ao_mca_poll_online(void *arg, cpu_t *cpu, cyc_handler_t *cyh, cyc_time_t *cyt)
138{
139 cyt->cyt_when = 0;
140 cyh->cyh_level = CY_LOW_LEVEL;
141
142 /*

--- 11 unchanged lines hidden (view full) ---

154 cyh->cyh_arg = cpu->cpu_m.mcpu_cmidata;
155 }
156}
157
158/*ARGSUSED*/
159static void
160ao_mca_poll_offline(void *arg, cpu_t *cpu, void *cyh_arg)
161{
198/*ARGSUSED*/
199static void
200ao_mca_poll_online(void *arg, cpu_t *cpu, cyc_handler_t *cyh, cyc_time_t *cyt)
201{
202 cyt->cyt_when = 0;
203 cyh->cyh_level = CY_LOW_LEVEL;
204
205 /*

--- 11 unchanged lines hidden (view full) ---

217 cyh->cyh_arg = cpu->cpu_m.mcpu_cmidata;
218 }
219}
220
221/*ARGSUSED*/
222static void
223ao_mca_poll_offline(void *arg, cpu_t *cpu, void *cyh_arg)
224{
162 /* nothing to do here */
225 ao_data_t *ao = cpu->cpu_m.mcpu_cmidata;
226
227 /*
228 * Any sibling core may begin to poll NB MCA registers
229 */
230 if (cpu->cpu_id == ao->ao_shared->aos_nb_poll_owner)
231 ao->ao_shared->aos_nb_poll_timestamp = 0;
163}
164
165void
232}
233
234void
166ao_mca_poll_init(ao_mca_t *mca)
235ao_mca_poll_init(ao_data_t *ao, int donb)
167{
236{
237 ao_mca_t *mca = &ao->ao_mca;
238
168 mutex_init(&mca->ao_mca_poll_lock, NULL, MUTEX_DRIVER, NULL);
169
239 mutex_init(&mca->ao_mca_poll_lock, NULL, MUTEX_DRIVER, NULL);
240
241 if (donb)
242 mutex_init(&ao->ao_shared->aos_nb_poll_lock, NULL, MUTEX_DRIVER,
243 NULL);
244
170 if (ao_mca_poll_trace_always) {
171 mca->ao_mca_poll_trace =
172 kmem_zalloc(sizeof (ao_mca_poll_trace_t) *
173 ao_mca_poll_trace_nent, KM_SLEEP);
174 mca->ao_mca_poll_curtrace = 0;
175 }
176}
177

--- 16 unchanged lines hidden ---
245 if (ao_mca_poll_trace_always) {
246 mca->ao_mca_poll_trace =
247 kmem_zalloc(sizeof (ao_mca_poll_trace_t) *
248 ao_mca_poll_trace_nent, KM_SLEEP);
249 mca->ao_mca_poll_curtrace = 0;
250 }
251}
252

--- 16 unchanged lines hidden ---