/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* * University Copyright- Copyright (c) 1982, 1986, 1988 * The Regents of the University of California * All Rights Reserved * * University Acknowledgment- Portions of this document are derived from * software developed by the University of California, Berkeley, and its * contributors. */ #ifndef _VM_SEG_MAP_H #define _VM_SEG_MAP_H #ifdef __cplusplus extern "C" { #endif /* * When segmap is created it is possible to program its behavior, * using the create args [needed for performance reasons]. * Segmap creates n lists of pages. * For VAC machines, there will be at least one free list * per color. If more than one free list per color is needed, * set nfreelist as needed. * * For PAC machines, it will be treated as VAC with only one * color- every page is of the same color. Again, set nfreelist * to get more than one free list. */ struct segmap_crargs { uint_t prot; uint_t shmsize; /* shm_alignment for VAC, 0 for PAC. */ uint_t nfreelist; /* number of freelist per color, >= 1 */ }; #include #include /* * Each smap struct represents a MAXBSIZE sized mapping to the * given in the structure. The location of the * the structure in the array gives the virtual address of the * mapping. Structure rearranged for 64bit sm_off. */ struct smap { kmutex_t sm_mtx; /* protect non-list fields */ struct vnode *sm_vp; /* vnode pointer (if mapped) */ struct smap *sm_hash; /* hash pointer */ struct smap *sm_next; /* next pointer */ struct smap *sm_prev; /* previous pointer */ u_offset_t sm_off; /* file offset for mapping */ ushort_t sm_bitmap; /* bit map for locked translations */ ushort_t sm_refcnt; /* reference count for uses */ ushort_t sm_flags; /* smap flags */ ushort_t sm_free_ndx; /* freelist */ #ifdef SEGKPM_SUPPORT struct kpme sm_kpme; /* segkpm */ #endif }; #ifdef SEGKPM_SUPPORT #define GET_KPME(smp) (&(smp)->sm_kpme) #define sm_kpme_next sm_kpme.kpe_next #define sm_kpme_prev sm_kpme.kpe_prev #define sm_kpme_page sm_kpme.kpe_page #else #define GET_KPME(smp) ((struct kpme *)NULL) #endif /* sm_flags */ #define SM_KPM_NEWPAGE 0x00000001 /* page created in segmap_getmapft */ #define SM_NOTKPM_RELEASED 0x00000002 /* released smap not in segkpm mode */ #define SM_QNDX_ZERO 0x00000004 /* on the index 0 freelist */ #define SM_READ_DATA 0x00000010 /* page created for read */ #define SM_WRITE_DATA 0x00000020 /* page created for write */ /* * Multiple smap free lists are maintained so that allocations * will scale with cpu count. Each free list is made up of 2 queues * so that allocations and deallocations can proceed concurrently. * Each queue structure is padded to 64 bytes to avoid false sharing. */ #define SM_FREEQ_PAD (64 - sizeof (struct smap *) - sizeof (kmutex_t)) struct sm_freeq { struct smap *smq_free; /* points into freelist */ kmutex_t smq_mtx; /* protects smq_free */ char smq_pad[SM_FREEQ_PAD]; }; struct smfree { struct sm_freeq sm_freeq[2]; /* alloc and release queues */ struct sm_freeq *sm_allocq; /* current allocq */ struct sm_freeq *sm_releq; /* current releq */ kcondvar_t sm_free_cv; ushort_t sm_want; /* someone wants a slot of this color */ }; /* * Cached smaps are kept on hash chains to enable fast reclaim lookups. */ struct smaphash { kmutex_t sh_mtx; /* protects this hash chain */ struct smap *sh_hash_list; /* start of hash chain */ }; /* * (Semi) private data maintained by the segmap driver per SEGMENT mapping * All fields in segmap_data are read-only after the segment is created. * */ struct segmap_data { struct smap *smd_sm; /* array of smap structures */ long smd_npages; /* size of smap array */ struct smfree *smd_free; /* ptr to freelist header array */ struct smaphash *smd_hash; /* ptr to hash header array */ int smd_nfree; /* number of free lists */ uchar_t smd_prot; /* protections for all smap's */ }; /* * Statistics for segmap operations. * * No explicit locking to protect these stats. */ struct segmapcnt { kstat_named_t smp_fault; /* number of segmap_faults */ kstat_named_t smp_faulta; /* number of segmap_faultas */ kstat_named_t smp_getmap; /* number of segmap_getmaps */ kstat_named_t smp_get_use; /* getmaps that reuse existing map */ kstat_named_t smp_get_reclaim; /* getmaps that do a reclaim */ kstat_named_t smp_get_reuse; /* getmaps that reuse a slot */ kstat_named_t smp_get_unused; /* getmaps that reuse existing map */ kstat_named_t smp_get_nofree; /* getmaps with no free slots */ kstat_named_t smp_rel_async; /* releases that are async */ kstat_named_t smp_rel_write; /* releases that write */ kstat_named_t smp_rel_free; /* releases that free */ kstat_named_t smp_rel_abort; /* releases that abort */ kstat_named_t smp_rel_dontneed; /* releases with dontneed set */ kstat_named_t smp_release; /* releases with no other action */ kstat_named_t smp_pagecreate; /* pagecreates */ kstat_named_t smp_free_notfree; /* pages not freed in */ /* segmap_pagefree */ kstat_named_t smp_free_dirty; /* dirty pages freeed */ /* in segmap_pagefree */ kstat_named_t smp_free; /* clean pages freeed in */ /* segmap_pagefree */ kstat_named_t smp_stolen; /* segmap_getmapflt() stole */ /* from get_free_smp() */ kstat_named_t smp_get_nomtx; /* free smaps but no mutex */ }; /* * These are flags used on release. Some of these might get handled * by segment operations needed for msync (when we figure them out). * SM_ASYNC modifies SM_WRITE. SM_DONTNEED modifies SM_FREE. SM_FREE * and SM_INVAL as well as SM_FREE and SM_DESTROY are mutually exclusive. * SM_DESTROY behaves like SM_INVAL but also forces the pages to be * destroyed -- this prevents them from being written to the backing * store. */ #define SM_WRITE 0x01 /* write back the pages upon release */ #define SM_ASYNC 0x02 /* do the write asynchronously */ #define SM_FREE 0x04 /* put pages back on free list */ #define SM_INVAL 0x08 /* invalidate page (no caching) */ #define SM_DONTNEED 0x10 /* less likely to be needed soon */ #define SM_DESTROY 0x20 /* invalidate page, don't write back */ /* * These are the forcefault flags used on getmapflt. * * The orginal semantic was extended to allow using the segkpm mapping * scheme w/o a major segmap interface change for MAXBSIZE == PAGESIZE * (which is required to enable segkpm for MAXBSIZE > PAGESIZE). * Most segmap consumers needn't to be changed at all or only need to * be changed slightly to take advantage of segkpm. Because the segkpm * virtual address is based on the physical address of a page, a page is * required to determine the virtual address (return value). Pages mapped * with segkpm are always at least read locked and are hence protected * from pageout or fsflush from segmap_getmap until segmap_release. This * implies, that the segkpm mappings are locked within this period too. * No trap driven segmap_fault's are possible in segkpm mode. * * The following combinations of "forcefault" and "rw" allow segkpm mode. * (1) SM_FAULT, S_READ * (2) SM_FAULT, S_WRITE * (3) SM_PAGECREATE, S_WRITE * (4) SM_LOCKPROTO, {S_READ, S_WRITE, S_OTHER} * * The regular additional operations (come in pairs in most of the cases): * . segmap_pagecreate/segmap_pageunlock * . segmap_fault(F_SOFTLOCK)/segmap_fault(F_SOFTUNLOCK) * * are mostly a no-op in segkpm mode with the following exceptions: * . The "newpage" return value of segmap_pagecreate is still supported * for zeroout operations needed on newly created pages. * * . segmap_fault() must follow when a error could be expected in * the VOP_GETPAGE. In segkpm mode this error is recognized in * segmap_getmapflt and returned from the following segmap_fault() * call. The "hole" optimization (read only after first VOP_GETPAGE * mapping in segmap_getmapflt followed by a trap driven protection * fault and a second VOP_GETPAGE via segmap_fault) cannot be used. * * . segmap_fault(F_SOFTUNLOCK) must follow when segmap_getmapflt was * called w/ (SM_LOCKPROTO, S_OTHER). S_WRITE has to be applied, when * the page should be marked "dirty". Otherwise the page is not * written to the backing store later (as mentioned above, no page * or protection faults are possible in segkpm mode). Caller cannot * use only S_OTHER and rely on a protection fault to force the page * to become dirty. * * . The segmap_pagecreate parameter softlock is ignored, pages and * mappings are locked anyway. * * SM_LOCKPROTO is used in the fbio layer and some special segmap consumers. */ #define SM_PAGECREATE 0x00 /* create page in segkpm mode, no I/O */ #define SM_FAULT 0x01 /* fault in page if necessary */ #define SM_LOCKPROTO 0x02 /* lock/unlock protocol used */ #define MAXBSHIFT 13 /* log2(MAXBSIZE) */ #define MAXBOFFSET (MAXBSIZE - 1) #define MAXBMASK (~MAXBOFFSET) /* * SMAP_HASHAVELEN is the average length desired for this chain, from * which the size of the smd_hash table is derived at segment create time. * SMAP_HASHVPSHIFT is defined so that 1 << SMAP_HASHVPSHIFT is the * approximate size of a vnode struct. */ #define SMAP_HASHAVELEN 4 #define SMAP_HASHVPSHIFT 6 #ifdef _KERNEL /* * The kernel generic mapping segment. */ extern struct seg *segkmap; /* * Public seg_map segment operations. */ extern int segmap_create(struct seg *, void *); extern int segmap_pagecreate(struct seg *, caddr_t, size_t, int); extern void segmap_pageunlock(struct seg *, caddr_t, size_t, enum seg_rw); extern faultcode_t segmap_fault(struct hat *, struct seg *, caddr_t, size_t, enum fault_type, enum seg_rw); extern caddr_t segmap_getmap(struct seg *, struct vnode *, u_offset_t); extern caddr_t segmap_getmapflt(struct seg *, struct vnode *, u_offset_t, size_t, int, enum seg_rw); extern int segmap_release(struct seg *, caddr_t, uint_t); extern void segmap_flush(struct seg *, struct vnode *); extern void segmap_inval(struct seg *, struct vnode *, u_offset_t); #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _VM_SEG_MAP_H */