/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * i86pc Memory Scrubbing * * On detection of a correctable memory ECC error, the i86pc hardware * returns the corrected data to the requester and may re-write it * to memory (DRAM or NVRAM). Machines which do not re-write this to * memory should add an NMI handler to correct and rewrite. * * Scrubbing thus reduces the likelyhood that multiple transient errors * will occur in the same memory word, making uncorrectable errors due * to transients less likely. * * Thus is born the desire that every memory location be periodically * accessed. * * This file implements a memory scrubbing thread. This scrubber * guarantees that all of physical memory is accessed periodically * (memscrub_period_sec -- 12 hours). * * It attempts to do this as unobtrusively as possible. The thread * schedules itself to wake up at an interval such that if it reads * memscrub_span_pages (4MB) on each wakeup, it will read all of physical * memory in in memscrub_period_sec (12 hours). * * The scrubber uses the REP LODS so it reads 4MB in 0.15 secs (on P5-200). * When it completes a span, if all the CPUs are idle, it reads another span. * Typically it soaks up idle time this way to reach its deadline early * -- and sleeps until the next period begins. * * Maximal Cost Estimate: 8GB @ xxMB/s = xxx seconds spent in 640 wakeups * that run for 0.15 seconds at intervals of 67 seconds. * * In practice, the scrubber finds enough idle time to finish in a few * minutes, and sleeps until its 12 hour deadline. * * The scrubber maintains a private copy of the phys_install memory list * to keep track of what memory should be scrubbed. * * The following parameters can be set via /etc/system * * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (4MB) * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours) * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (0) * memscrub_delay_start_sec = (10 seconds) * disable_memscrub = (0) * * the scrubber will exit (or never be started) if it finds the variable * "disable_memscrub" set. * * MEMSCRUB_DFL_SPAN_PAGES is based on the guess that 0.15 sec * is a "good" amount of minimum time for the thread to run at a time. * * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess -- * twice the frequency the hardware folk estimated would be necessary. * * MEMSCRUB_DFL_THREAD_PRI (0) is based on the assumption that nearly * any other use of the system should be higher priority than scrubbing. */ #include #include /* timeout, types, t_lock */ #include #include /* MIN */ #include /* memlist */ #include /* KMEM_NOSLEEP */ #include /* ncpus_online */ #include /* ASSERTs */ #include #include #include #include #include #include /* CPR callback */ static caddr_t memscrub_window; static hat_mempte_t memscrub_pte; /* * Global Data: */ /* * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC */ #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */ /* * start only if at least MEMSCRUB_MIN_PAGES in system */ #define MEMSCRUB_MIN_PAGES ((32 * 1024 * 1024) / PAGESIZE) /* * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration */ #define MEMSCRUB_DFL_SPAN_PAGES ((4 * 1024 * 1024) / PAGESIZE) /* * almost anything is higher priority than scrubbing */ #define MEMSCRUB_DFL_THREAD_PRI 0 /* * we can patch these defaults in /etc/system if necessary */ uint_t disable_memscrub = 0; static uint_t disable_memscrub_quietly = 0; pgcnt_t memscrub_min_pages = MEMSCRUB_MIN_PAGES; pgcnt_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES; time_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC; uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI; time_t memscrub_delay_start_sec = 10; /* * Static Routines */ static void memscrubber(void); static int system_is_idle(void); static int memscrub_add_span(uint64_t, uint64_t); /* * Static Data */ static struct memlist *memscrub_memlist; static uint_t memscrub_phys_pages; static kcondvar_t memscrub_cv; static kmutex_t memscrub_lock; /* * memscrub_lock protects memscrub_memlist */ uint_t memscrub_scans_done; uint_t memscrub_done_early; uint_t memscrub_early_sec; uint_t memscrub_done_late; time_t memscrub_late_sec; /* * create memscrub_memlist from phys_install list * initialize locks, set memscrub_phys_pages. */ void memscrub_init() { struct memlist *src; if (physmem < memscrub_min_pages) return; if (!kpm_enable) { memscrub_window = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); memscrub_pte = hat_mempte_setup(memscrub_window); } /* * copy phys_install to memscrub_memlist */ for (src = phys_install; src; src = src->ml_next) { if (memscrub_add_span(src->ml_address, src->ml_size)) { cmn_err(CE_WARN, "Software memory scrubber failed to initialize\n"); return; } } mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL); cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL); /* * create memscrubber thread */ (void) thread_create(NULL, 0, (void (*)())memscrubber, NULL, 0, &p0, TS_RUN, memscrub_thread_pri); } /* * Function to cause the software memscrubber to exit quietly if the * platform support has located a hardware scrubber and enabled it. */ void memscrub_disable(void) { disable_memscrub_quietly = 1; } #ifdef MEMSCRUB_DEBUG static void memscrub_printmemlist(char *title, struct memlist *listp) { struct memlist *list; cmn_err(CE_CONT, "%s:\n", title); for (list = listp; list; list = list->next) { cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n", list->address, list->size); } } #endif /* MEMSCRUB_DEBUG */ /* ARGSUSED */ static void memscrub_wakeup(void *c) { /* * grab mutex to guarantee that our wakeup call * arrives after we go to sleep -- so we can't sleep forever. */ mutex_enter(&memscrub_lock); cv_signal(&memscrub_cv); mutex_exit(&memscrub_lock); } /* * this calculation doesn't account for the time that the actual scan * consumes -- so we'd fall slightly behind schedule with this * interval_sec. but the idle loop optimization below usually makes us * come in way ahead of schedule. */ static int compute_interval_sec() { if (memscrub_phys_pages <= memscrub_span_pages) return (memscrub_period_sec); else return (memscrub_period_sec/ (memscrub_phys_pages/memscrub_span_pages)); } static void memscrubber() { time_t deadline; uint64_t mlp_last_addr; uint64_t mlp_next_addr; int reached_end = 1; time_t interval_sec = 0; struct memlist *mlp; extern void scan_memory(caddr_t, size_t); callb_cpr_t cprinfo; /* * notify CPR of our existence */ CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub"); if (memscrub_memlist == NULL) { cmn_err(CE_WARN, "memscrub_memlist not initialized."); goto memscrub_exit; } mlp = memscrub_memlist; mlp_next_addr = mlp->ml_address; mlp_last_addr = mlp->ml_address + mlp->ml_size; deadline = gethrestime_sec() + memscrub_delay_start_sec; for (;;) { if (disable_memscrub || disable_memscrub_quietly) break; mutex_enter(&memscrub_lock); /* * did we just reach the end of memory? */ if (reached_end) { time_t now = gethrestime_sec(); if (now >= deadline) { memscrub_done_late++; memscrub_late_sec += (now - deadline); /* * past deadline, start right away */ interval_sec = 0; deadline = now + memscrub_period_sec; } else { /* * we finished ahead of schedule. * wait till previous dealine before re-start. */ interval_sec = deadline - now; memscrub_done_early++; memscrub_early_sec += interval_sec; deadline += memscrub_period_sec; } } else { interval_sec = compute_interval_sec(); } /* * it is safe from our standpoint for CPR to * suspend the system */ CALLB_CPR_SAFE_BEGIN(&cprinfo); /* * hit the snooze bar */ (void) timeout(memscrub_wakeup, NULL, interval_sec * hz); /* * go to sleep */ cv_wait(&memscrub_cv, &memscrub_lock); /* we need to goto work */ CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock); mutex_exit(&memscrub_lock); do { pgcnt_t pages = memscrub_span_pages; uint64_t address = mlp_next_addr; if (disable_memscrub || disable_memscrub_quietly) break; mutex_enter(&memscrub_lock); /* * Make sure we don't try to scan beyond the end of * the current memlist. If we would, then resize * our scan target for this iteration, and prepare * to read the next memlist entry on the next * iteration. */ reached_end = 0; if (address + mmu_ptob(pages) >= mlp_last_addr) { pages = mmu_btop(mlp_last_addr - address); mlp = mlp->ml_next; if (mlp == NULL) { reached_end = 1; mlp = memscrub_memlist; } mlp_next_addr = mlp->ml_address; mlp_last_addr = mlp->ml_address + mlp->ml_size; } else { mlp_next_addr += mmu_ptob(pages); } mutex_exit(&memscrub_lock); while (pages--) { pfn_t pfn = btop(address); /* * Without segkpm, the memscrubber cannot * be allowed to migrate across CPUs, as * the CPU-specific mapping of * memscrub_window would be incorrect. * With segkpm, switching CPUs is legal, but * inefficient. We don't use * kpreempt_disable as it might hold a * higher priority thread (eg, RT) too long * off CPU. */ thread_affinity_set(curthread, CPU_CURRENT); if (kpm_enable) memscrub_window = hat_kpm_pfn2va(pfn); else hat_mempte_remap(pfn, memscrub_window, memscrub_pte, PROT_READ, HAT_LOAD_NOCONSIST); scan_memory(memscrub_window, PAGESIZE); thread_affinity_clear(curthread); address += MMU_PAGESIZE; } memscrub_scans_done++; } while (!reached_end && system_is_idle()); } memscrub_exit: if (!disable_memscrub_quietly) cmn_err(CE_NOTE, "Software memory scrubber exiting."); /* * We are about to bail, but don't have the memscrub_lock, * and it is needed for CALLB_CPR_EXIT. */ mutex_enter(&memscrub_lock); CALLB_CPR_EXIT(&cprinfo); cv_destroy(&memscrub_cv); thread_exit(); } /* * return 1 if we're MP and all the other CPUs are idle */ static int system_is_idle() { int cpu_id; int found = 0; if (1 == ncpus_online) return (0); for (cpu_id = 0; cpu_id < NCPU; ++cpu_id) { if (!cpu[cpu_id]) continue; found++; if (cpu[cpu_id]->cpu_thread != cpu[cpu_id]->cpu_idle_thread) { if (CPU->cpu_id == cpu_id && CPU->cpu_disp->disp_nrunnable == 0) continue; return (0); } if (found == ncpus) break; } return (1); } /* * add a span to the memscrub list */ static int memscrub_add_span(uint64_t start, uint64_t bytes) { struct memlist *dst; struct memlist *prev, *next; uint64_t end = start + bytes - 1; int retval = 0; mutex_enter(&memscrub_lock); #ifdef MEMSCRUB_DEBUG memscrub_printmemlist("memscrub_memlist before", memscrub_memlist); cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx" " size: 0x%llx\n", start, bytes); #endif /* MEMSCRUB_DEBUG */ /* * Scan through the list to find the proper place to install it. */ prev = NULL; next = memscrub_memlist; while (next) { uint64_t ns = next->ml_address; uint64_t ne = next->ml_address + next->ml_size - 1; /* * If this span overlaps with an existing span, then * something has gone horribly wrong with the phys_install * list. In fact, I'm surprised we made it this far. */ if ((start >= ns && start <= ne) || (end >= ns && end <= ne) || (start < ns && end > ne)) panic("memscrub found overlapping memory ranges " "(0x%p-0x%p) and (0x%p-0x%p)", (void *)(uintptr_t)start, (void *)(uintptr_t)end, (void *)(uintptr_t)ns, (void *)(uintptr_t)ne); /* * New span can be appended to an existing one. */ if (start == ne + 1) { next->ml_size += bytes; goto add_done; } /* * New span can be prepended to an existing one. */ if (end + 1 == ns) { next->ml_size += bytes; next->ml_address = start; goto add_done; } /* * If the next span has a higher start address than the new * one, then we have found the right spot for our * insertion. */ if (ns > start) break; prev = next; next = next->ml_next; } /* * allocate a new struct memlist */ dst = kmem_alloc(sizeof (struct memlist), KM_NOSLEEP); if (dst == NULL) { retval = -1; goto add_done; } dst->ml_address = start; dst->ml_size = bytes; dst->ml_prev = prev; dst->ml_next = next; if (prev) prev->ml_next = dst; else memscrub_memlist = dst; if (next) next->ml_prev = dst; add_done: if (retval != -1) memscrub_phys_pages += mmu_btop(bytes); #ifdef MEMSCRUB_DEBUG memscrub_printmemlist("memscrub_memlist after", memscrub_memlist); cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages); #endif /* MEMSCRUB_DEBUG */ mutex_exit(&memscrub_lock); return (retval); }