xref: /illumos-gate/usr/src/uts/common/vm/vpm.h (revision 183971ba)
1a5652762Spraks /*
2a5652762Spraks  * CDDL HEADER START
3a5652762Spraks  *
4a5652762Spraks  * The contents of this file are subject to the terms of the
5a5652762Spraks  * Common Development and Distribution License (the "License").
6a5652762Spraks  * You may not use this file except in compliance with the License.
7a5652762Spraks  *
8a5652762Spraks  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9a5652762Spraks  * or http://www.opensolaris.org/os/licensing.
10a5652762Spraks  * See the License for the specific language governing permissions
11a5652762Spraks  * and limitations under the License.
12a5652762Spraks  *
13a5652762Spraks  * When distributing Covered Code, include this CDDL HEADER in each
14a5652762Spraks  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15a5652762Spraks  * If applicable, add the following below this CDDL HEADER, with the
16a5652762Spraks  * fields enclosed by brackets "[]" replaced with your own identifying
17a5652762Spraks  * information: Portions Copyright [yyyy] [name of copyright owner]
18a5652762Spraks  *
19a5652762Spraks  * CDDL HEADER END
20a5652762Spraks  */
21a5652762Spraks /*
22*183971baSPrakash Sangappa  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23a5652762Spraks  * Use is subject to license terms.
24a5652762Spraks  */
25a5652762Spraks 
26a5652762Spraks #ifndef	_VM_VPM_H
27a5652762Spraks #define	_VM_VPM_H
28a5652762Spraks 
29a5652762Spraks 
30a5652762Spraks #ifdef	__cplusplus
31a5652762Spraks extern "C" {
32a5652762Spraks #endif
33a5652762Spraks 
34a5652762Spraks /*
35a5652762Spraks  * The vnode page mappings(VPM) interfaces.
36a5652762Spraks  * "Commitment level - Consolidation private". They are subject
37a5652762Spraks  * to change without notice. Use them at your own risk.
38a5652762Spraks  *
39a5652762Spraks  * At this stage these interfaces are provided only to utilize the
40*183971baSPrakash Sangappa  * segkpm mappings. Therefore these interfaces have to be used under
41*183971baSPrakash Sangappa  * the 'vpm_enable' check as an alternative to segmap interfaces where
42*183971baSPrakash Sangappa  * applicable.
43a5652762Spraks  *
44a5652762Spraks  * The VPM interfaces provide temporary mappings to file pages. They
45a5652762Spraks  * return the mappings in a scatter gather list(SGL).
46a5652762Spraks  * The SGL elements are the structure 'vmap_t'.
47a5652762Spraks  *
48a5652762Spraks  *	typedef struct vmap {
49*183971baSPrakash Sangappa  *		caddr_t	vs_addr;        / public - mapped address /
50*183971baSPrakash Sangappa  *		size_t	vs_len;         / public - length of mapping /
51a5652762Spraks  *		void	*vs_data;	/ opaque - private data /
52a5652762Spraks  *	} vmap_t;
53a5652762Spraks  *
54a5652762Spraks  * An array of this structure has to be passed to the interface routines
55a5652762Spraks  * along with the size(# of elements) of the SGL array. Depending on the
56a5652762Spraks  * requested length and mapped chunk sizes(PAGESIZE here), the number of
57a5652762Spraks  * valid mappings returned can be less then actual size of the SGL array.
58a5652762Spraks  * Always, an element in the SGL will have 'vs_addr' set to NULL which
59a5652762Spraks  * marks the end of the valid entires in the SGL.
60a5652762Spraks  *
61a5652762Spraks  * The vmap_t structure members are populated with the mapped address
62a5652762Spraks  * in 'vs_addr' and length of the mapping in 'vs_len'. Currently the
63a5652762Spraks  * mapping length is fixed at PAGESIZE. The 'vs_data' member is private
64a5652762Spraks  * and the caller should not access or modify it.
65a5652762Spraks  *
66a5652762Spraks  * Using a scatter gather list to return the mappings and length makes it
67*183971baSPrakash Sangappa  * possible to provide mappings of variable length. Mapping length upto
68*183971baSPrakash Sangappa  * VPMMAXLEN is supported.  The scatter gather list array size needs to
69*183971baSPrakash Sangappa  * be a minimum of MINVMAPS elements.
70a5652762Spraks  *
71a5652762Spraks  * Interfaces:
72a5652762Spraks  *
73a5652762Spraks  * int vpm_map_pages( struct vnode *vp, u_offset_t off, size_t len,
74a5652762Spraks  *			int fetchpage, vmap_t *vml, int vmlsz,
75a5652762Spraks  *			int *newpagecreated, enum seg_rw rw);
76a5652762Spraks  *
77a5652762Spraks  * This function returns mappings to vnode pages.
78a5652762Spraks  *
79a5652762Spraks  * It takes a vnode, offset and length and returns mappings to the  pages
80*183971baSPrakash Sangappa  * covering the range [off, off + len) in the vmap_t SGL array 'vml'.
81*183971baSPrakash Sangappa  * The length passed in should satisfy the following criteria
82*183971baSPrakash Sangappa  * '(off + len)  <= ((off & PAGEMASK) + VPMMAXLEN)'
83*183971baSPrakash Sangappa  * The mapped address returned, in 'vs_addr', of first vml[] entry
84*183971baSPrakash Sangappa  * is at begining of page containing 'off'.
85a5652762Spraks  *
86a5652762Spraks  * The 'vmlsz' is the size(# elements) of the 'vml' array.
87a5652762Spraks  *
88a5652762Spraks  * When the 'fetchpage' flag is set, the vnode(file) pages will be fetched
89a5652762Spraks  * (calls VOP_GETPAGE) from the backing store(disk) if not found in the
90a5652762Spraks  * system page cache. If 'fetchpage == 0', the vnode(file) pages for the
91a5652762Spraks  * given offset will be just created if they are not already present in the
92a5652762Spraks  * system page cache. The 'newpagecreated' flag is set on return if new pages
93a5652762Spraks  * are created when 'fetchpage == 0'(requested to just create new pages).
94a5652762Spraks  *
95a5652762Spraks  * The 'seg_rw rw' indicates the intended operation on these mappings
96a5652762Spraks  * (S_WRITE or S_READ).
97a5652762Spraks  *
98*183971baSPrakash Sangappa  * Currently these interfaces only return segkpm mappings. The vnode pages
99*183971baSPrakash Sangappa  * that are being accessed will be locked(at least SHARED locked) for the
100*183971baSPrakash Sangappa  * duration these mappings are in use. After use, the  unmap function,
101*183971baSPrakash Sangappa  * vpm_unmap_pages(), has to be called and the same SGL array
102a5652762Spraks  * needs to be passed to the unmap function.
103a5652762Spraks  *
104a5652762Spraks  *
105a5652762Spraks  * void vpm_unmap_pages(vpmap_t *vml, enum seg_rw rw);.
106a5652762Spraks  *
107a5652762Spraks  * This function unmaps the pages that where mapped by vpm_map_pages.
108a5652762Spraks  * The SGL array 'vml' has to be the one that was passed to vpm_map_pages().
109a5652762Spraks  *
110a5652762Spraks  *
111a5652762Spraks  * ex:
112a5652762Spraks  * To copy file data of vnode(file) 'vp' at offset 'off' to a kernel buffer
113a5652762Spraks  * 'buf' the following code snippet shows how to use the above two interfaces.
114a5652762Spraks  * Here the the copy length is till the MAXBSIZE boundary. This code can be
115a5652762Spraks  * executed repeatedly, in a loop to copy more then MAXBSIZE length of data.
116a5652762Spraks  *
117a5652762Spraks  *	vmap_t  vml[MINVMAPS];
118a5652762Spraks  *	int err, i, newpage, len;
119a5652762Spraks  *	int pon;
120a5652762Spraks  *
121a5652762Spraks  *	pon = (off & PAGEOFFSET);
122a5652762Spraks  *	len = MAXBSIZE - pon;
123a5652762Spraks  *
124a5652762Spraks  *	if (vpm_enable) {
125a5652762Spraks  *             err = vpm_map_pages(vp, off, len, 0, vml, MINVMAPS,
126a5652762Spraks  *				 &newpage, S_WRITE);
127a5652762Spraks  *
128a5652762Spraks  *		if (err)
129a5652762Spraks  *			return;
130a5652762Spraks  *
131a5652762Spraks  *		for (i=0; vml[i].vs_addr != NULL); i++) {
132a5652762Spraks  *			bcopy (buf, vml[i].vs_addr + pon,
133a5652762Spraks  *				 PAGESIZE - pon);
134a5652762Spraks  *			buf += (PAGESIZE - pon);
135a5652762Spraks  *			pon = 0;
136a5652762Spraks  *		}
137a5652762Spraks  *
138a5652762Spraks  *		if (newpage) {
139a5652762Spraks  *			pon = (off & PAGEOFFSET);
140a5652762Spraks  *			bzero(vml[i-1].vs_addr + pon, PAGESIZE - pon);
141a5652762Spraks  *		}
142a5652762Spraks  *
143a5652762Spraks  *		vpm_unmap_pages(vml, S_WRITE);
144a5652762Spraks  *	}
145a5652762Spraks  *
146a5652762Spraks  *
147a5652762Spraks  *
148a5652762Spraks  *
149a5652762Spraks  * int vpm_data_copy(struct vnode *vp, u_offset_t off, size_t len,
150a5652762Spraks  *		struct uio *uio, int fetchpage, int *newpagecreated,
151a5652762Spraks  *		int zerostart, enum seg_rw rw);
152a5652762Spraks  *
153a5652762Spraks  * This function can be called if the need is to just transfer data to/from
154a5652762Spraks  * the vnode pages. It takes a 'uio' structure and  calls 'uiomove()' to
155a5652762Spraks  * do the data transfer. It can be used in the context of read and write
156a5652762Spraks  * system calls to transfer data between a user buffer, which is specified
157a5652762Spraks  * in the uio structure, and the vnode pages. If the data needs to be
158a5652762Spraks  * transferred between a kernel buffer and the pages, like in the above
159a5652762Spraks  * example, a uio structure can be set up accordingly and passed. The 'rw'
160a5652762Spraks  * parameter will determine the direction of the data transfer.
161a5652762Spraks  *
162a5652762Spraks  * The 'fetchpage' and 'newpagecreated' are same as explained before.
163a5652762Spraks  * The 'zerostart' flag when set will zero fill start of the page till the
164a5652762Spraks  * offset 'off' in the first page. i.e  from 'off & PAGEMASK' to 'off'.
165a5652762Spraks  *
166a5652762Spraks  *
167a5652762Spraks  * int vpm_sync_pages(struct vnode *vp, u_offset_t off,
168a5652762Spraks  *					 size_t len, uint_t flags)
169a5652762Spraks  *
170a5652762Spraks  * This function can be called to flush or sync the vnode(file) pages that
171a5652762Spraks  * have been accessed. It will call VOP_PUTPAGE().
172a5652762Spraks  *
173a5652762Spraks  * For the given vnode, off and len the pages covering the range
174a5652762Spraks  * [off, off + len) are flushed. Currently it uses the same flags that
175a5652762Spraks  * are used with segmap_release() interface. Refer vm/seg_map.h.
176a5652762Spraks  * (SM_DONTNEED, SM_ASYNC, SM_FREE, SM_INVAL, SM_DESTROY)
177a5652762Spraks  *
178a5652762Spraks  */
179a5652762Spraks 
180a5652762Spraks 
181a5652762Spraks /*
182a5652762Spraks  * vpm cache related definitions.
183a5652762Spraks  */
184a5652762Spraks #define	VPMAP_MINCACHE		(64 * 1024 * 1024)
185*183971baSPrakash Sangappa #define	VPMAP_MAXCACHE		(256L * 1024L * 1024L * 1024L)  /* 256G */
186*183971baSPrakash Sangappa 
187a5652762Spraks 
188a5652762Spraks /*
189a5652762Spraks  * vpm caching mode
190a5652762Spraks  */
191a5652762Spraks #define	VPMCACHE_LRU		0
192a5652762Spraks #define	VPMCACHE_RANDOM		1
193a5652762Spraks /*
194a5652762Spraks  * Data structures to manage the cache of pages referenced by
195a5652762Spraks  * the vpm interfaces. There is one vpmap struct per page in the cache.
196a5652762Spraks  */
197a5652762Spraks struct vpmap {
198a5652762Spraks 	kmutex_t	vpm_mtx;	/* protects non list fields */
199a5652762Spraks 	struct vnode	*vpm_vp;	/* pointer to vnode of cached page */
200a5652762Spraks 	struct vpmap	*vpm_next;	/* free list pointers */
201a5652762Spraks 	struct vpmap	*vpm_prev;
202a5652762Spraks 	u_offset_t	vpm_off;	/* offset of the page */
203a5652762Spraks 	page_t		*vpm_pp;	/* page pointer */
204a5652762Spraks 	ushort_t	vpm_refcnt;	/* Number active references */
205a5652762Spraks 	ushort_t	vpm_ndxflg;	/* indicates which queue */
206a5652762Spraks 	ushort_t	vpm_free_ndx;	/* freelist it belongs to */
207a5652762Spraks };
208a5652762Spraks 
209a5652762Spraks /*
210a5652762Spraks  * Multiple vpmap free lists are maintaned so that allocations
211a5652762Spraks  * scale with cpu count. To further reduce contentions between
212a5652762Spraks  * allocation and deallocations, each list is made up of two queues.
213a5652762Spraks  */
214a5652762Spraks #define	VPM_FREEQ_PAD	64
215a5652762Spraks union vpm_freeq {
216a5652762Spraks 	struct {
217a5652762Spraks 		struct vpmap	*vpmsq_free;
218a5652762Spraks 		kmutex_t	vpmsq_mtx;
219a5652762Spraks 	} vpmfq;
220a5652762Spraks 	char vpmq_pad[VPM_FREEQ_PAD];
221a5652762Spraks };
222a5652762Spraks 
223a5652762Spraks #define	vpmq_free	vpmfq.vpmsq_free
224a5652762Spraks #define	vpmq_mtx	vpmfq.vpmsq_mtx
225a5652762Spraks 
226a5652762Spraks struct vpmfree {
227a5652762Spraks 	union vpm_freeq vpm_freeq[2];	/* alloc and release queue */
228a5652762Spraks 	union vpm_freeq *vpm_allocq;	/* current alloc queue */
229a5652762Spraks 	union vpm_freeq *vpm_releq;	/* current release queue */
230a5652762Spraks 	kcondvar_t	vpm_free_cv;
231a5652762Spraks 	ushort_t	vpm_want;
232a5652762Spraks };
233a5652762Spraks 
234a5652762Spraks #define	VPMALLOCQ	0
235a5652762Spraks #define	VPMRELEQ	1
236a5652762Spraks 
237a5652762Spraks /*
238a5652762Spraks  * VPM Interface definitions.
239a5652762Spraks  */
240a5652762Spraks 
241a5652762Spraks /*
242a5652762Spraks  * This structure is the scatter gather list element. The page
243a5652762Spraks  * mappings will be returned in this structure. A pointer to an
244a5652762Spraks  * array of this structure is passed to the interface routines.
245a5652762Spraks  */
246a5652762Spraks typedef struct vmap {
247a5652762Spraks 	caddr_t	vs_addr;	/* mapped address */
248a5652762Spraks 	size_t	vs_len;		/* length, currently fixed at PAGESIZE */
249a5652762Spraks 	void	*vs_data;	/* opaque - private data */
250a5652762Spraks } vmap_t;
251a5652762Spraks 
252*183971baSPrakash Sangappa #define	VPM_FETCHPAGE 0x01	/* fault in pages */
253*183971baSPrakash Sangappa 
254*183971baSPrakash Sangappa /*
255*183971baSPrakash Sangappa  * Max request length - Needs to be a multiple of
256*183971baSPrakash Sangappa  * 8192 (PAGESIZE on sparc) so it works properly on both
257*183971baSPrakash Sangappa  * x86 & sparc systems. Max set to 128k.
258*183971baSPrakash Sangappa  */
259*183971baSPrakash Sangappa #define	VPMMAXLEN	(128*1024)
260*183971baSPrakash Sangappa 
261a5652762Spraks /*
262a5652762Spraks  * The minimum and maximum number of array elements in the scatter
263a5652762Spraks  * gather list.
264a5652762Spraks  */
265a5652762Spraks #define	MINVMAPS   3		/* ((MAXBSIZE/4096 + 1)  min # mappings */
266*183971baSPrakash Sangappa #if defined(__sparc)
267*183971baSPrakash Sangappa #define	VPMMAXPGS	(VPMMAXLEN/8192)	/* Max # pages at a time */
268*183971baSPrakash Sangappa #else
269*183971baSPrakash Sangappa #define	VPMMAXPGS	(VPMMAXLEN/4096)
270*183971baSPrakash Sangappa #endif
271*183971baSPrakash Sangappa #define	MAXVMAPS	(VPMMAXPGS + 1)		/* Max # elements in the */
272*183971baSPrakash Sangappa 						/* scatter gather list */
273*183971baSPrakash Sangappa 						/* +1 element to mark the */
274*183971baSPrakash Sangappa 						/* end of the list of valid */
275*183971baSPrakash Sangappa 						/*  mappings */
276a5652762Spraks 
277a5652762Spraks #ifdef _KERNEL
278a5652762Spraks 
279a5652762Spraks extern int	vpm_enable;
280a5652762Spraks /*
281a5652762Spraks  * vpm page mapping operations.
282a5652762Spraks  */
283a5652762Spraks extern void	vpm_init(void);
284a5652762Spraks extern int	vpm_map_pages(struct vnode *, u_offset_t, size_t, int,
285a5652762Spraks 		vmap_t *, int, int  *, enum seg_rw);
286a5652762Spraks 
287a5652762Spraks extern void	vpm_unmap_pages(vmap_t *, enum seg_rw);
288a5652762Spraks extern int	vpm_sync_pages(struct vnode *, u_offset_t, size_t, uint_t);
289a5652762Spraks extern int	vpm_data_copy(struct vnode *, u_offset_t, size_t,
290a5652762Spraks 		struct uio *, int, int *, int, enum seg_rw rw);
291a5652762Spraks #endif	/* _KERNEL */
292a5652762Spraks 
293a5652762Spraks #ifdef	__cplusplus
294a5652762Spraks }
295a5652762Spraks #endif
296a5652762Spraks 
297a5652762Spraks #endif	/* _VM_VPM_H */
298