1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Page retirement can be an extended process due to the fact that a retirement
29 * may not be possible when the original request is made.  The kernel will
30 * repeatedly attempt to retire a given page, but will not let us know when the
31 * page has been retired.  We therefore have to poll to see if the retirement
32 * has been completed.  This poll is implemented with a bounded exponential
33 * backoff to reduce the burden which we impose upon the system.
34 *
35 * To reduce the burden on fmd in the face of retirement storms, we schedule
36 * all retries as a group.  In the simplest case, we attempt to retire a single
37 * page.  When forced to retry, we initially schedule a retry at a configurable
38 * interval t.  If the retry fails, we schedule another at 2 * t, and so on,
39 * until t reaches the maximum interval (also configurable).  Future retries
40 * for that page will occur with t equal to the maximum interval value.  We
41 * will never give up on a retirement.
42 *
43 * With multiple retirements, the situation gets slightly more complicated.  As
44 * indicated above, we schedule retries as a group.  We don't want to deny new
45 * pages their short retry intervals, so we'll (re)set the retry interval to the
46 * value appropriate for the newest page.
47 */
48
49#include <cma.h>
50
51#include <time.h>
52#include <errno.h>
53#include <unistd.h>
54#include <strings.h>
55#include <fm/fmd_api.h>
56#include <fm/libtopo.h>
57#include <fm/fmd_fmri.h>
58#include <fm/fmd_agent.h>
59#include <sys/fm/protocol.h>
60
61static void
62cma_page_free(fmd_hdl_t *hdl, cma_page_t *page)
63{
64	nvlist_free(page->pg_asru);
65	nvlist_free(page->pg_rsrc);
66	fmd_hdl_free(hdl, page, sizeof (cma_page_t));
67}
68
69/*
70 * Retire the specified ASRU, referring to a memory page by PA or by DIMM
71 * offset (i.e. the encoded coordinates internal bank, row, and column).
72 * In the initial FMA implementation, fault.memory.page exported an ASRU
73 * with an explicit physical address, which is valid at the initial time of
74 * diagnosis but may not be later following DR, DIMM removal, or interleave
75 * changes.  On SPARC, this issue was solved by exporting the DIMM offset
76 * and pushing the entire FMRI to the platform memory controller through
77 * /dev/fm so it can derive the current PA from the DIMM and offset.
78 * On x86, we also encode DIMM and offset in hc-specific, which is then used
79 * by the x64 memory controller driver.
80 * At some point these three approaches need to be rationalized: all platforms
81 * should use the same scheme, either with decoding in the kernel or decoding
82 * in userland (i.e. with a libtopo method to compute and update the PA).
83 */
84/*ARGSUSED*/
85int
86cma_page_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru,
87    const char *uuid, boolean_t repair)
88{
89	cma_page_t *page;
90	uint64_t pageaddr;
91	const char *action = repair ? "unretire" : "retire";
92	int rc;
93	nvlist_t *rsrc = NULL, *asrucp = NULL, *hcsp;
94
95	(void) nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc);
96
97	if (nvlist_dup(asru, &asrucp, 0) != 0) {
98		fmd_hdl_debug(hdl, "page retire nvlist dup failed\n");
99		return (CMA_RA_FAILURE);
100	}
101
102	/* It should already be expanded, but we'll do it again anyway */
103	if (fmd_nvl_fmri_expand(hdl, asrucp) < 0) {
104		fmd_hdl_debug(hdl, "failed to expand page asru\n");
105		cma_stats.bad_flts.fmds_value.ui64++;
106		nvlist_free(asrucp);
107		return (CMA_RA_FAILURE);
108	}
109
110	if (!repair && !fmd_nvl_fmri_present(hdl, asrucp)) {
111		fmd_hdl_debug(hdl, "page retire overtaken by events\n");
112		cma_stats.page_nonent.fmds_value.ui64++;
113		nvlist_free(asrucp);
114		return (CMA_RA_SUCCESS);
115	}
116
117	/* Figure out physaddr from resource or asru */
118	if (rsrc == NULL ||
119	    nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcsp) != 0 ||
120	    (nvlist_lookup_uint64(hcsp, "asru-" FM_FMRI_HC_SPECIFIC_PHYSADDR,
121	    &pageaddr) != 0 && nvlist_lookup_uint64(hcsp,
122	    FM_FMRI_HC_SPECIFIC_PHYSADDR, &pageaddr) != 0)) {
123		if (nvlist_lookup_uint64(asrucp, FM_FMRI_MEM_PHYSADDR,
124		    &pageaddr) != 0) {
125			fmd_hdl_debug(hdl, "mem fault missing 'physaddr'\n");
126			cma_stats.bad_flts.fmds_value.ui64++;
127			nvlist_free(asrucp);
128			return (CMA_RA_FAILURE);
129		}
130	}
131
132	if (repair) {
133		if (!cma.cma_page_dounretire) {
134			fmd_hdl_debug(hdl, "suppressed unretire of page %llx\n",
135			    (u_longlong_t)pageaddr);
136			cma_stats.page_supp.fmds_value.ui64++;
137			nvlist_free(asrucp);
138			return (CMA_RA_SUCCESS);
139		}
140		/* If unretire via topo fails, we fall back to legacy way */
141		if (rsrc == NULL || (rc = fmd_nvl_fmri_unretire(hdl, rsrc)) < 0)
142			rc = cma_fmri_page_unretire(hdl, asrucp);
143	} else {
144		if (!cma.cma_page_doretire) {
145			fmd_hdl_debug(hdl, "suppressed retire of page %llx\n",
146			    (u_longlong_t)pageaddr);
147			cma_stats.page_supp.fmds_value.ui64++;
148			nvlist_free(asrucp);
149			return (CMA_RA_FAILURE);
150		}
151		/* If retire via topo fails, we fall back to legacy way */
152		if (rsrc == NULL || (rc = fmd_nvl_fmri_retire(hdl, rsrc)) < 0)
153			rc = cma_fmri_page_retire(hdl, asrucp);
154	}
155
156	if (rc == FMD_AGENT_RETIRE_DONE) {
157		fmd_hdl_debug(hdl, "%sd page 0x%llx\n",
158		    action, (u_longlong_t)pageaddr);
159		if (repair)
160			cma_stats.page_repairs.fmds_value.ui64++;
161		else
162			cma_stats.page_flts.fmds_value.ui64++;
163		nvlist_free(asrucp);
164		return (CMA_RA_SUCCESS);
165	} else if (repair || rc != FMD_AGENT_RETIRE_ASYNC) {
166		fmd_hdl_debug(hdl, "%s of page 0x%llx failed, will not "
167		    "retry: %s\n", action, (u_longlong_t)pageaddr,
168		    strerror(errno));
169
170		cma_stats.page_fails.fmds_value.ui64++;
171		nvlist_free(asrucp);
172		return (CMA_RA_FAILURE);
173	}
174
175	/*
176	 * The page didn't immediately retire.  We'll need to periodically
177	 * check to see if it has been retired.
178	 */
179	fmd_hdl_debug(hdl, "page didn't retire - sleeping\n");
180
181	page = fmd_hdl_zalloc(hdl, sizeof (cma_page_t), FMD_SLEEP);
182	page->pg_addr = pageaddr;
183	if (rsrc != NULL)
184		(void) nvlist_dup(rsrc, &page->pg_rsrc, 0);
185	page->pg_asru = asrucp;
186	if (uuid != NULL)
187		page->pg_uuid = fmd_hdl_strdup(hdl, uuid, FMD_SLEEP);
188
189	page->pg_next = cma.cma_pages;
190	cma.cma_pages = page;
191
192	if (cma.cma_page_timerid != 0)
193		fmd_timer_remove(hdl, cma.cma_page_timerid);
194
195	cma.cma_page_curdelay = cma.cma_page_mindelay;
196
197	cma.cma_page_timerid =
198	    fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);
199
200	/* Don't free asrucp here.  This FMRI will be needed for retry. */
201	return (CMA_RA_FAILURE);
202}
203
204static int
205page_retry(fmd_hdl_t *hdl, cma_page_t *page)
206{
207	int rc;
208
209	if (page->pg_asru != NULL &&
210	    !fmd_nvl_fmri_present(hdl, page->pg_asru)) {
211		fmd_hdl_debug(hdl, "page retire overtaken by events");
212		cma_stats.page_nonent.fmds_value.ui64++;
213
214		if (page->pg_uuid != NULL)
215			fmd_case_uuclose(hdl, page->pg_uuid);
216		return (1); /* no longer a page to retire */
217	}
218
219	if (page->pg_rsrc == NULL ||
220	    (rc = fmd_nvl_fmri_service_state(hdl, page->pg_rsrc)) < 0)
221		rc = cma_fmri_page_service_state(hdl, page->pg_asru);
222
223	if (rc == FMD_SERVICE_STATE_UNUSABLE) {
224		fmd_hdl_debug(hdl, "retired page 0x%llx on retry %u\n",
225		    page->pg_addr, page->pg_nretries);
226		cma_stats.page_flts.fmds_value.ui64++;
227
228		if (page->pg_uuid != NULL)
229			fmd_case_uuclose(hdl, page->pg_uuid);
230		return (1); /* page retired */
231	}
232
233	if (rc == FMD_SERVICE_STATE_ISOLATE_PENDING) {
234		fmd_hdl_debug(hdl, "scheduling another retry for 0x%llx\n",
235		    page->pg_addr);
236		return (0); /* schedule another retry */
237	} else {
238		fmd_hdl_debug(hdl, "failed to retry page 0x%llx "
239		    "retirement: %s\n", page->pg_addr,
240		    strerror(errno));
241
242		cma_stats.page_fails.fmds_value.ui64++;
243		return (1); /* give up */
244	}
245}
246
247void
248cma_page_retry(fmd_hdl_t *hdl)
249{
250	cma_page_t **pagep;
251
252	cma.cma_page_timerid = 0;
253
254	fmd_hdl_debug(hdl, "page_retry: timer fired\n");
255
256	pagep = &cma.cma_pages;
257	while (*pagep != NULL) {
258		cma_page_t *page = *pagep;
259
260		if (page_retry(hdl, page)) {
261			/*
262			 * Successful retry or we're giving up - remove from
263			 * the list
264			 */
265			*pagep = page->pg_next;
266
267			if (page->pg_uuid != NULL)
268				fmd_hdl_strfree(hdl, page->pg_uuid);
269
270			cma_page_free(hdl, page);
271		} else {
272			page->pg_nretries++;
273			pagep = &page->pg_next;
274		}
275	}
276
277	if (cma.cma_pages == NULL)
278		return; /* no more retirements */
279
280	/*
281	 * We still have retirements that haven't completed.  Back the delay
282	 * off, and schedule a retry.
283	 */
284	cma.cma_page_curdelay = MIN(cma.cma_page_curdelay * 2,
285	    cma.cma_page_maxdelay);
286
287	fmd_hdl_debug(hdl, "scheduled page retirement retry for %llu secs\n",
288	    (u_longlong_t)(cma.cma_page_curdelay / NANOSEC));
289
290	cma.cma_page_timerid =
291	    fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);
292}
293
294void
295cma_page_fini(fmd_hdl_t *hdl)
296{
297	cma_page_t *page;
298
299	while ((page = cma.cma_pages) != NULL) {
300		cma.cma_pages = page->pg_next;
301		if (page->pg_uuid != NULL)
302			fmd_hdl_strfree(hdl, page->pg_uuid);
303		cma_page_free(hdl, page);
304	}
305}
306