17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 56f84fed5Scth * Common Development and Distribution License (the "License"). 66f84fed5Scth * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22d3d50737SRafael Vanoni * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 249468939eSJerry Jelinek * Copyright 2011 Joyent, Inc. All rights reserved. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 273f11de9dSSara Hartse /* 283f11de9dSSara Hartse * Copyright (c) 2016 by Delphix. All rights reserved. 293f11de9dSSara Hartse */ 303f11de9dSSara Hartse 317c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 327c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 337c478bd9Sstevel@tonic-gate 347c478bd9Sstevel@tonic-gate /* 357c478bd9Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 367c478bd9Sstevel@tonic-gate * The Regents of the University of California 377c478bd9Sstevel@tonic-gate * All Rights Reserved 387c478bd9Sstevel@tonic-gate * 397c478bd9Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 407c478bd9Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 417c478bd9Sstevel@tonic-gate * contributors. 427c478bd9Sstevel@tonic-gate */ 437c478bd9Sstevel@tonic-gate 447c478bd9Sstevel@tonic-gate #include <sys/types.h> 457c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 467c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 477c478bd9Sstevel@tonic-gate #include <sys/conf.h> 487c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 497c478bd9Sstevel@tonic-gate #include <sys/errno.h> 507c478bd9Sstevel@tonic-gate #include <sys/debug.h> 517c478bd9Sstevel@tonic-gate #include <sys/buf.h> 527c478bd9Sstevel@tonic-gate #include <sys/var.h> 537c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 547c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 557c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 567c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 577c478bd9Sstevel@tonic-gate #include <sys/vmem.h> 587c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 597c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 607c478bd9Sstevel@tonic-gate #include <vm/page.h> 617c478bd9Sstevel@tonic-gate #include <vm/pvn.h> 627c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 637c478bd9Sstevel@tonic-gate #include <sys/tnf_probe.h> 647c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h> 657c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h> 667c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h> 677c478bd9Sstevel@tonic-gate #include <sys/systm.h> 687c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 697c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 707c478bd9Sstevel@tonic-gate 717c478bd9Sstevel@tonic-gate /* Locks */ 727c478bd9Sstevel@tonic-gate static kmutex_t blist_lock; /* protects b_list */ 737c478bd9Sstevel@tonic-gate static kmutex_t bhdr_lock; /* protects the bhdrlist */ 747c478bd9Sstevel@tonic-gate static kmutex_t bfree_lock; /* protects the bfreelist structure */ 757c478bd9Sstevel@tonic-gate 767c478bd9Sstevel@tonic-gate struct hbuf *hbuf; /* Hash buckets */ 777c478bd9Sstevel@tonic-gate struct dwbuf *dwbuf; /* Delayed write buckets */ 787c478bd9Sstevel@tonic-gate static struct buf *bhdrlist; /* buf header free list */ 797c478bd9Sstevel@tonic-gate static int nbuf; /* number of buffer headers allocated */ 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate static int lastindex; /* Reference point on where to start */ 827c478bd9Sstevel@tonic-gate /* when looking for free buffers */ 837c478bd9Sstevel@tonic-gate 847c478bd9Sstevel@tonic-gate #define bio_bhash(dev, bn) (hash2ints((dev), (int)(bn)) & v.v_hmask) 857c478bd9Sstevel@tonic-gate #define EMPTY_LIST ((struct buf *)-1) 867c478bd9Sstevel@tonic-gate 877c478bd9Sstevel@tonic-gate static kcondvar_t bio_mem_cv; /* Condition variables */ 887c478bd9Sstevel@tonic-gate static kcondvar_t bio_flushinval_cv; 897c478bd9Sstevel@tonic-gate static int bio_doingflush; /* flush in progress */ 907c478bd9Sstevel@tonic-gate static int bio_doinginval; /* inval in progress */ 917c478bd9Sstevel@tonic-gate static int bio_flinv_cv_wanted; /* someone waiting for cv */ 927c478bd9Sstevel@tonic-gate 937c478bd9Sstevel@tonic-gate /* 947c478bd9Sstevel@tonic-gate * Statistics on the buffer cache 957c478bd9Sstevel@tonic-gate */ 967c478bd9Sstevel@tonic-gate struct biostats biostats = { 977c478bd9Sstevel@tonic-gate { "buffer_cache_lookups", KSTAT_DATA_UINT32 }, 987c478bd9Sstevel@tonic-gate { "buffer_cache_hits", KSTAT_DATA_UINT32 }, 997c478bd9Sstevel@tonic-gate { "new_buffer_requests", KSTAT_DATA_UINT32 }, 1007c478bd9Sstevel@tonic-gate { "waits_for_buffer_allocs", KSTAT_DATA_UINT32 }, 1017c478bd9Sstevel@tonic-gate { "buffers_locked_by_someone", KSTAT_DATA_UINT32 }, 1027c478bd9Sstevel@tonic-gate { "duplicate_buffers_found", KSTAT_DATA_UINT32 } 1037c478bd9Sstevel@tonic-gate }; 1047c478bd9Sstevel@tonic-gate 1057c478bd9Sstevel@tonic-gate /* 1067c478bd9Sstevel@tonic-gate * kstat data 1077c478bd9Sstevel@tonic-gate */ 1087c478bd9Sstevel@tonic-gate kstat_named_t *biostats_ptr = (kstat_named_t *)&biostats; 1097c478bd9Sstevel@tonic-gate uint_t biostats_ndata = (uint_t)(sizeof (biostats) / 1107c478bd9Sstevel@tonic-gate sizeof (kstat_named_t)); 1117c478bd9Sstevel@tonic-gate 1127c478bd9Sstevel@tonic-gate /* 1137c478bd9Sstevel@tonic-gate * Statistics on ufs buffer cache 1147c478bd9Sstevel@tonic-gate * Not protected by locks 1157c478bd9Sstevel@tonic-gate */ 1167c478bd9Sstevel@tonic-gate struct ufsbiostats ub = { 1177c478bd9Sstevel@tonic-gate { "breads", KSTAT_DATA_UINT32 }, 1187c478bd9Sstevel@tonic-gate { "bwrites", KSTAT_DATA_UINT32 }, 1197c478bd9Sstevel@tonic-gate { "fbiwrites", KSTAT_DATA_UINT32 }, 1207c478bd9Sstevel@tonic-gate { "getpages", KSTAT_DATA_UINT32 }, 1217c478bd9Sstevel@tonic-gate { "getras", KSTAT_DATA_UINT32 }, 1227c478bd9Sstevel@tonic-gate { "putsyncs", KSTAT_DATA_UINT32 }, 1237c478bd9Sstevel@tonic-gate { "putasyncs", KSTAT_DATA_UINT32 }, 1247c478bd9Sstevel@tonic-gate { "putpageios", KSTAT_DATA_UINT32 }, 1257c478bd9Sstevel@tonic-gate }; 1267c478bd9Sstevel@tonic-gate 1277c478bd9Sstevel@tonic-gate /* 1287c478bd9Sstevel@tonic-gate * more UFS Logging eccentricities... 1297c478bd9Sstevel@tonic-gate * 1307c478bd9Sstevel@tonic-gate * required since "#pragma weak ..." doesn't work in reverse order. 1317c478bd9Sstevel@tonic-gate * i.e.: genunix (bio.c) is loaded before the ufs modules and pointers 1327c478bd9Sstevel@tonic-gate * to ufs routines don't get plugged into bio.c calls so 1337c478bd9Sstevel@tonic-gate * we initialize it when setting up the "lufsops" table 1347c478bd9Sstevel@tonic-gate * in "lufs.c:_init()" 1357c478bd9Sstevel@tonic-gate */ 1367c478bd9Sstevel@tonic-gate void (*bio_lufs_strategy)(void *, buf_t *); 1377c478bd9Sstevel@tonic-gate void (*bio_snapshot_strategy)(void *, buf_t *); 1387c478bd9Sstevel@tonic-gate 1397c478bd9Sstevel@tonic-gate 1407c478bd9Sstevel@tonic-gate /* Private routines */ 1417c478bd9Sstevel@tonic-gate static struct buf *bio_getfreeblk(long); 1427c478bd9Sstevel@tonic-gate static void bio_mem_get(long); 1437c478bd9Sstevel@tonic-gate static void bio_bhdr_free(struct buf *); 1447c478bd9Sstevel@tonic-gate static struct buf *bio_bhdr_alloc(void); 1457c478bd9Sstevel@tonic-gate static void bio_recycle(int, long); 1467c478bd9Sstevel@tonic-gate static void bio_pageio_done(struct buf *); 1477c478bd9Sstevel@tonic-gate static int bio_incore(dev_t, daddr_t); 1487c478bd9Sstevel@tonic-gate 1497c478bd9Sstevel@tonic-gate /* 1507c478bd9Sstevel@tonic-gate * Buffer cache constants 1517c478bd9Sstevel@tonic-gate */ 1527c478bd9Sstevel@tonic-gate #define BIO_BUF_PERCENT (100/2) /* default: 2% of memory */ 1537c478bd9Sstevel@tonic-gate #define BIO_MAX_PERCENT (100/20) /* max is 20% of real memory */ 1547c478bd9Sstevel@tonic-gate #define BIO_BHDR_POOL 100 /* Default bhdr pool size */ 1557c478bd9Sstevel@tonic-gate #define BIO_MIN_HDR 10 /* Minimum number of buffer headers */ 1567c478bd9Sstevel@tonic-gate #define BIO_MIN_HWM (BIO_MIN_HDR * MAXBSIZE / 1024) 1577c478bd9Sstevel@tonic-gate #define BIO_HASHLEN 4 /* Target length of hash chains */ 1587c478bd9Sstevel@tonic-gate 1597c478bd9Sstevel@tonic-gate 1607c478bd9Sstevel@tonic-gate /* Flags for bio_recycle() */ 1617c478bd9Sstevel@tonic-gate #define BIO_HEADER 0x01 1627c478bd9Sstevel@tonic-gate #define BIO_MEM 0x02 1637c478bd9Sstevel@tonic-gate 1647c478bd9Sstevel@tonic-gate extern int bufhwm; /* User tunable - high water mark for mem */ 1657c478bd9Sstevel@tonic-gate extern int bufhwm_pct; /* ditto - given in % of physmem */ 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate /* 1687c478bd9Sstevel@tonic-gate * The following routines allocate and free 1697c478bd9Sstevel@tonic-gate * buffers with various side effects. In general the 1707c478bd9Sstevel@tonic-gate * arguments to an allocate routine are a device and 1717c478bd9Sstevel@tonic-gate * a block number, and the value is a pointer to 1727c478bd9Sstevel@tonic-gate * to the buffer header; the buffer returned is locked with a 1737c478bd9Sstevel@tonic-gate * binary semaphore so that no one else can touch it. If the block was 1747c478bd9Sstevel@tonic-gate * already in core, no I/O need be done; if it is 1757c478bd9Sstevel@tonic-gate * already locked, the process waits until it becomes free. 1767c478bd9Sstevel@tonic-gate * The following routines allocate a buffer: 1777c478bd9Sstevel@tonic-gate * getblk 1787c478bd9Sstevel@tonic-gate * bread/BREAD 1797c478bd9Sstevel@tonic-gate * breada 1807c478bd9Sstevel@tonic-gate * Eventually the buffer must be released, possibly with the 1817c478bd9Sstevel@tonic-gate * side effect of writing it out, by using one of 1827c478bd9Sstevel@tonic-gate * bwrite/BWRITE/brwrite 1837c478bd9Sstevel@tonic-gate * bdwrite/bdrwrite 1847c478bd9Sstevel@tonic-gate * bawrite 1857c478bd9Sstevel@tonic-gate * brelse 1867c478bd9Sstevel@tonic-gate * 1877c478bd9Sstevel@tonic-gate * The B_WANTED/B_BUSY bits are NOT used by these routines for synchronization. 1887c478bd9Sstevel@tonic-gate * Instead, a binary semaphore, b_sem is used to gain exclusive access to 1897c478bd9Sstevel@tonic-gate * a buffer and a binary semaphore, b_io is used for I/O synchronization. 1907c478bd9Sstevel@tonic-gate * B_DONE is still used to denote a buffer with I/O complete on it. 1917c478bd9Sstevel@tonic-gate * 1927c478bd9Sstevel@tonic-gate * The bfreelist.b_bcount field is computed everytime fsflush runs. It is 1937c478bd9Sstevel@tonic-gate * should not be used where a very accurate count of the free buffers is 1947c478bd9Sstevel@tonic-gate * needed. 1957c478bd9Sstevel@tonic-gate */ 1967c478bd9Sstevel@tonic-gate 1977c478bd9Sstevel@tonic-gate /* 1987c478bd9Sstevel@tonic-gate * Read in (if necessary) the block and return a buffer pointer. 1997c478bd9Sstevel@tonic-gate * 2007c478bd9Sstevel@tonic-gate * This interface is provided for binary compatibility. Using 2017c478bd9Sstevel@tonic-gate * BREAD() directly avoids the extra function call overhead invoked 2027c478bd9Sstevel@tonic-gate * by calling this routine. 2037c478bd9Sstevel@tonic-gate */ 2047c478bd9Sstevel@tonic-gate struct buf * 2057c478bd9Sstevel@tonic-gate bread(dev_t dev, daddr_t blkno, long bsize) 2067c478bd9Sstevel@tonic-gate { 2077c478bd9Sstevel@tonic-gate return (BREAD(dev, blkno, bsize)); 2087c478bd9Sstevel@tonic-gate } 2097c478bd9Sstevel@tonic-gate 2107c478bd9Sstevel@tonic-gate /* 2117c478bd9Sstevel@tonic-gate * Common code for reading a buffer with various options 2127c478bd9Sstevel@tonic-gate * 2137c478bd9Sstevel@tonic-gate * Read in (if necessary) the block and return a buffer pointer. 2147c478bd9Sstevel@tonic-gate */ 2157c478bd9Sstevel@tonic-gate struct buf * 2167c478bd9Sstevel@tonic-gate bread_common(void *arg, dev_t dev, daddr_t blkno, long bsize) 2177c478bd9Sstevel@tonic-gate { 2187c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg; 2197c478bd9Sstevel@tonic-gate struct buf *bp; 2207c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 2217c478bd9Sstevel@tonic-gate 2227c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lread, 1); 2237c478bd9Sstevel@tonic-gate bp = getblk_common(ufsvfsp, dev, blkno, bsize, /* errflg */ 1); 2247c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DONE) 2257c478bd9Sstevel@tonic-gate return (bp); 2267c478bd9Sstevel@tonic-gate bp->b_flags |= B_READ; 2277c478bd9Sstevel@tonic-gate ASSERT(bp->b_bcount == bsize); 2287c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) { /* !ufs */ 2297c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 2307c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) { 2317c478bd9Sstevel@tonic-gate /* ufs && logging */ 2327c478bd9Sstevel@tonic-gate (*bio_lufs_strategy)(ufsvfsp->vfs_log, bp); 2337c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) { 2347c478bd9Sstevel@tonic-gate /* ufs && snapshots */ 2357c478bd9Sstevel@tonic-gate (*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp); 2367c478bd9Sstevel@tonic-gate } else { 237d3d50737SRafael Vanoni ufsvfsp->vfs_iotstamp = ddi_get_lbolt(); 2387c478bd9Sstevel@tonic-gate ub.ub_breads.value.ul++; /* ufs && !logging */ 2397c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 2407c478bd9Sstevel@tonic-gate } 2417c478bd9Sstevel@tonic-gate if (lwp != NULL) 2427c478bd9Sstevel@tonic-gate lwp->lwp_ru.inblock++; 2437c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1); 2447c478bd9Sstevel@tonic-gate (void) biowait(bp); 2457c478bd9Sstevel@tonic-gate return (bp); 2467c478bd9Sstevel@tonic-gate } 2477c478bd9Sstevel@tonic-gate 2487c478bd9Sstevel@tonic-gate /* 2497c478bd9Sstevel@tonic-gate * Read in the block, like bread, but also start I/O on the 2507c478bd9Sstevel@tonic-gate * read-ahead block (which is not allocated to the caller). 2517c478bd9Sstevel@tonic-gate */ 2527c478bd9Sstevel@tonic-gate struct buf * 2537c478bd9Sstevel@tonic-gate breada(dev_t dev, daddr_t blkno, daddr_t rablkno, long bsize) 2547c478bd9Sstevel@tonic-gate { 2557c478bd9Sstevel@tonic-gate struct buf *bp, *rabp; 2567c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 2577c478bd9Sstevel@tonic-gate 2587c478bd9Sstevel@tonic-gate bp = NULL; 2597c478bd9Sstevel@tonic-gate if (!bio_incore(dev, blkno)) { 2607c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lread, 1); 2617c478bd9Sstevel@tonic-gate bp = GETBLK(dev, blkno, bsize); 2627c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_DONE) == 0) { 2637c478bd9Sstevel@tonic-gate bp->b_flags |= B_READ; 2647c478bd9Sstevel@tonic-gate bp->b_bcount = bsize; 2657c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 2667c478bd9Sstevel@tonic-gate if (lwp != NULL) 2677c478bd9Sstevel@tonic-gate lwp->lwp_ru.inblock++; 2687c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1); 2697c478bd9Sstevel@tonic-gate } 2707c478bd9Sstevel@tonic-gate } 2717c478bd9Sstevel@tonic-gate if (rablkno && bfreelist.b_bcount > 1 && 2727c478bd9Sstevel@tonic-gate !bio_incore(dev, rablkno)) { 2737c478bd9Sstevel@tonic-gate rabp = GETBLK(dev, rablkno, bsize); 2747c478bd9Sstevel@tonic-gate if (rabp->b_flags & B_DONE) 2757c478bd9Sstevel@tonic-gate brelse(rabp); 2767c478bd9Sstevel@tonic-gate else { 2777c478bd9Sstevel@tonic-gate rabp->b_flags |= B_READ|B_ASYNC; 2787c478bd9Sstevel@tonic-gate rabp->b_bcount = bsize; 2797c478bd9Sstevel@tonic-gate (void) bdev_strategy(rabp); 2807c478bd9Sstevel@tonic-gate if (lwp != NULL) 2817c478bd9Sstevel@tonic-gate lwp->lwp_ru.inblock++; 2827c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, bread, 1); 2837c478bd9Sstevel@tonic-gate } 2847c478bd9Sstevel@tonic-gate } 2857c478bd9Sstevel@tonic-gate if (bp == NULL) 2867c478bd9Sstevel@tonic-gate return (BREAD(dev, blkno, bsize)); 2877c478bd9Sstevel@tonic-gate (void) biowait(bp); 2887c478bd9Sstevel@tonic-gate return (bp); 2897c478bd9Sstevel@tonic-gate } 2907c478bd9Sstevel@tonic-gate 2917c478bd9Sstevel@tonic-gate /* 2927c478bd9Sstevel@tonic-gate * Common code for writing a buffer with various options. 2937c478bd9Sstevel@tonic-gate * 2947c478bd9Sstevel@tonic-gate * force_wait - wait for write completion regardless of B_ASYNC flag 2957c478bd9Sstevel@tonic-gate * do_relse - release the buffer when we are done 2967c478bd9Sstevel@tonic-gate * clear_flags - flags to clear from the buffer 2977c478bd9Sstevel@tonic-gate */ 2987c478bd9Sstevel@tonic-gate void 2997c478bd9Sstevel@tonic-gate bwrite_common(void *arg, struct buf *bp, int force_wait, 3003f11de9dSSara Hartse int do_relse, int clear_flags) 3017c478bd9Sstevel@tonic-gate { 3027c478bd9Sstevel@tonic-gate register int do_wait; 3037c478bd9Sstevel@tonic-gate struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg; 3047c478bd9Sstevel@tonic-gate int flag; 3057c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 3067c478bd9Sstevel@tonic-gate struct cpu *cpup; 3077c478bd9Sstevel@tonic-gate 3087c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 3097c478bd9Sstevel@tonic-gate flag = bp->b_flags; 3107c478bd9Sstevel@tonic-gate bp->b_flags &= ~clear_flags; 3117c478bd9Sstevel@tonic-gate if (lwp != NULL) 3127c478bd9Sstevel@tonic-gate lwp->lwp_ru.oublock++; 3137c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 3147c478bd9Sstevel@tonic-gate cpup = CPU; /* get pointer AFTER preemption is disabled */ 3157c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, lwrite, 1); 3167c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, bwrite, 1); 3177c478bd9Sstevel@tonic-gate do_wait = ((flag & B_ASYNC) == 0 || force_wait); 3187c478bd9Sstevel@tonic-gate if (do_wait == 0) 3197c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, sys, bawrite, 1); 3207c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 3217c478bd9Sstevel@tonic-gate if (ufsvfsp == NULL) { 3227c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 3237c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) { 3247c478bd9Sstevel@tonic-gate /* ufs && logging */ 3257c478bd9Sstevel@tonic-gate (*bio_lufs_strategy)(ufsvfsp->vfs_log, bp); 3267c478bd9Sstevel@tonic-gate } else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) { 3277c478bd9Sstevel@tonic-gate /* ufs && snapshots */ 3287c478bd9Sstevel@tonic-gate (*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp); 3297c478bd9Sstevel@tonic-gate } else { 3307c478bd9Sstevel@tonic-gate ub.ub_bwrites.value.ul++; /* ufs && !logging */ 3317c478bd9Sstevel@tonic-gate (void) bdev_strategy(bp); 3327c478bd9Sstevel@tonic-gate } 3337c478bd9Sstevel@tonic-gate if (do_wait) { 3347c478bd9Sstevel@tonic-gate (void) biowait(bp); 3357c478bd9Sstevel@tonic-gate if (do_relse) { 3367c478bd9Sstevel@tonic-gate brelse(bp); 3377c478bd9Sstevel@tonic-gate } 3387c478bd9Sstevel@tonic-gate } 3397c478bd9Sstevel@tonic-gate } 3407c478bd9Sstevel@tonic-gate 3417c478bd9Sstevel@tonic-gate /* 3427c478bd9Sstevel@tonic-gate * Write the buffer, waiting for completion (unless B_ASYNC is set). 3437c478bd9Sstevel@tonic-gate * Then release the buffer. 3447c478bd9Sstevel@tonic-gate * This interface is provided for binary compatibility. Using 3457c478bd9Sstevel@tonic-gate * BWRITE() directly avoids the extra function call overhead invoked 3467c478bd9Sstevel@tonic-gate * by calling this routine. 3477c478bd9Sstevel@tonic-gate */ 3487c478bd9Sstevel@tonic-gate void 3497c478bd9Sstevel@tonic-gate bwrite(struct buf *bp) 3507c478bd9Sstevel@tonic-gate { 3517c478bd9Sstevel@tonic-gate BWRITE(bp); 3527c478bd9Sstevel@tonic-gate } 3537c478bd9Sstevel@tonic-gate 3547c478bd9Sstevel@tonic-gate /* 3557c478bd9Sstevel@tonic-gate * Write the buffer, waiting for completion. 3567c478bd9Sstevel@tonic-gate * But don't release the buffer afterwards. 3577c478bd9Sstevel@tonic-gate * This interface is provided for binary compatibility. Using 3587c478bd9Sstevel@tonic-gate * BWRITE2() directly avoids the extra function call overhead. 3597c478bd9Sstevel@tonic-gate */ 3607c478bd9Sstevel@tonic-gate void 3617c478bd9Sstevel@tonic-gate bwrite2(struct buf *bp) 3627c478bd9Sstevel@tonic-gate { 3637c478bd9Sstevel@tonic-gate BWRITE2(bp); 3647c478bd9Sstevel@tonic-gate } 3657c478bd9Sstevel@tonic-gate 3667c478bd9Sstevel@tonic-gate /* 3677c478bd9Sstevel@tonic-gate * Release the buffer, marking it so that if it is grabbed 3687c478bd9Sstevel@tonic-gate * for another purpose it will be written out before being 3697c478bd9Sstevel@tonic-gate * given up (e.g. when writing a partial block where it is 3707c478bd9Sstevel@tonic-gate * assumed that another write for the same block will soon follow). 3717c478bd9Sstevel@tonic-gate * Also save the time that the block is first marked as delayed 3727c478bd9Sstevel@tonic-gate * so that it will be written in a reasonable time. 3737c478bd9Sstevel@tonic-gate */ 3747c478bd9Sstevel@tonic-gate void 3757c478bd9Sstevel@tonic-gate bdwrite(struct buf *bp) 3767c478bd9Sstevel@tonic-gate { 3777c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 3787c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(sys, lwrite, 1); 3797c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_DELWRI) == 0) 380d3d50737SRafael Vanoni bp->b_start = ddi_get_lbolt(); 3817c478bd9Sstevel@tonic-gate /* 3827c478bd9Sstevel@tonic-gate * B_DONE allows others to use the buffer, B_DELWRI causes the 3837c478bd9Sstevel@tonic-gate * buffer to be written before being reused, and setting b_resid 3847c478bd9Sstevel@tonic-gate * to zero says the buffer is complete. 3857c478bd9Sstevel@tonic-gate */ 3867c478bd9Sstevel@tonic-gate bp->b_flags |= B_DELWRI | B_DONE; 3877c478bd9Sstevel@tonic-gate bp->b_resid = 0; 3887c478bd9Sstevel@tonic-gate brelse(bp); 3897c478bd9Sstevel@tonic-gate } 3907c478bd9Sstevel@tonic-gate 3917c478bd9Sstevel@tonic-gate /* 3927c478bd9Sstevel@tonic-gate * Release the buffer, start I/O on it, but don't wait for completion. 3937c478bd9Sstevel@tonic-gate */ 3947c478bd9Sstevel@tonic-gate void 3957c478bd9Sstevel@tonic-gate bawrite(struct buf *bp) 3967c478bd9Sstevel@tonic-gate { 3977c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 3987c478bd9Sstevel@tonic-gate 3997c478bd9Sstevel@tonic-gate /* Use bfreelist.b_bcount as a weird-ass heuristic */ 4007c478bd9Sstevel@tonic-gate if (bfreelist.b_bcount > 4) 4017c478bd9Sstevel@tonic-gate bp->b_flags |= B_ASYNC; 4027c478bd9Sstevel@tonic-gate BWRITE(bp); 4037c478bd9Sstevel@tonic-gate } 4047c478bd9Sstevel@tonic-gate 4057c478bd9Sstevel@tonic-gate /* 4067c478bd9Sstevel@tonic-gate * Release the buffer, with no I/O implied. 4077c478bd9Sstevel@tonic-gate */ 4087c478bd9Sstevel@tonic-gate void 4097c478bd9Sstevel@tonic-gate brelse(struct buf *bp) 4107c478bd9Sstevel@tonic-gate { 4117c478bd9Sstevel@tonic-gate struct buf **backp; 4127c478bd9Sstevel@tonic-gate uint_t index; 4137c478bd9Sstevel@tonic-gate kmutex_t *hmp; 4147c478bd9Sstevel@tonic-gate struct buf *dp; 4157c478bd9Sstevel@tonic-gate struct hbuf *hp; 4167c478bd9Sstevel@tonic-gate 4177c478bd9Sstevel@tonic-gate 4187c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 4197c478bd9Sstevel@tonic-gate 4207c478bd9Sstevel@tonic-gate /* 4217c478bd9Sstevel@tonic-gate * Clear the retry write flag if the buffer was written without 4227c478bd9Sstevel@tonic-gate * error. The presence of B_DELWRI means the buffer has not yet 4237c478bd9Sstevel@tonic-gate * been written and the presence of B_ERROR means that an error 4247c478bd9Sstevel@tonic-gate * is still occurring. 4257c478bd9Sstevel@tonic-gate */ 4267c478bd9Sstevel@tonic-gate if ((bp->b_flags & (B_ERROR | B_DELWRI | B_RETRYWRI)) == B_RETRYWRI) { 4277c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_RETRYWRI; 4287c478bd9Sstevel@tonic-gate } 4297c478bd9Sstevel@tonic-gate 4307c478bd9Sstevel@tonic-gate /* Check for anomalous conditions */ 4317c478bd9Sstevel@tonic-gate if (bp->b_flags & (B_ERROR|B_NOCACHE)) { 4327c478bd9Sstevel@tonic-gate if (bp->b_flags & B_NOCACHE) { 4337c478bd9Sstevel@tonic-gate /* Don't add to the freelist. Destroy it now */ 4347c478bd9Sstevel@tonic-gate kmem_free(bp->b_un.b_addr, bp->b_bufsize); 4357c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_sem); 4367c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_io); 4377c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (struct buf)); 4387c478bd9Sstevel@tonic-gate return; 4397c478bd9Sstevel@tonic-gate } 4407c478bd9Sstevel@tonic-gate /* 4417c478bd9Sstevel@tonic-gate * If a write failed and we are supposed to retry write, 4427c478bd9Sstevel@tonic-gate * don't toss the buffer. Keep it around and mark it 4437c478bd9Sstevel@tonic-gate * delayed write in the hopes that it will eventually 4447c478bd9Sstevel@tonic-gate * get flushed (and still keep the system running.) 4457c478bd9Sstevel@tonic-gate */ 4467c478bd9Sstevel@tonic-gate if ((bp->b_flags & (B_READ | B_RETRYWRI)) == B_RETRYWRI) { 4477c478bd9Sstevel@tonic-gate bp->b_flags |= B_DELWRI; 4487c478bd9Sstevel@tonic-gate /* keep fsflush from trying continuously to flush */ 449d3d50737SRafael Vanoni bp->b_start = ddi_get_lbolt(); 4507c478bd9Sstevel@tonic-gate } else 4517c478bd9Sstevel@tonic-gate bp->b_flags |= B_AGE|B_STALE; 4527c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_ERROR; 4537c478bd9Sstevel@tonic-gate bp->b_error = 0; 4547c478bd9Sstevel@tonic-gate } 4557c478bd9Sstevel@tonic-gate 4567c478bd9Sstevel@tonic-gate /* 4577c478bd9Sstevel@tonic-gate * If delayed write is set then put in on the delayed 4587c478bd9Sstevel@tonic-gate * write list instead of the free buffer list. 4597c478bd9Sstevel@tonic-gate */ 4607c478bd9Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno); 4617c478bd9Sstevel@tonic-gate hmp = &hbuf[index].b_lock; 4627c478bd9Sstevel@tonic-gate 4637c478bd9Sstevel@tonic-gate mutex_enter(hmp); 4647c478bd9Sstevel@tonic-gate hp = &hbuf[index]; 4657c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 4667c478bd9Sstevel@tonic-gate 4677c478bd9Sstevel@tonic-gate /* 4687c478bd9Sstevel@tonic-gate * Make sure that the number of entries on this list are 4697c478bd9Sstevel@tonic-gate * Zero <= count <= total # buffers 4707c478bd9Sstevel@tonic-gate */ 4717c478bd9Sstevel@tonic-gate ASSERT(hp->b_length >= 0); 4727c478bd9Sstevel@tonic-gate ASSERT(hp->b_length < nbuf); 4737c478bd9Sstevel@tonic-gate 4747c478bd9Sstevel@tonic-gate hp->b_length++; /* We are adding this buffer */ 4757c478bd9Sstevel@tonic-gate 4767c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) { 4777c478bd9Sstevel@tonic-gate /* 4787c478bd9Sstevel@tonic-gate * This buffer goes on the delayed write buffer list 4797c478bd9Sstevel@tonic-gate */ 4807c478bd9Sstevel@tonic-gate dp = (struct buf *)&dwbuf[index]; 4817c478bd9Sstevel@tonic-gate } 4827c478bd9Sstevel@tonic-gate ASSERT(bp->b_bufsize > 0); 4837c478bd9Sstevel@tonic-gate ASSERT(bp->b_bcount > 0); 4847c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr != NULL); 4857c478bd9Sstevel@tonic-gate 4867c478bd9Sstevel@tonic-gate if (bp->b_flags & B_AGE) { 4877c478bd9Sstevel@tonic-gate backp = &dp->av_forw; 4887c478bd9Sstevel@tonic-gate (*backp)->av_back = bp; 4897c478bd9Sstevel@tonic-gate bp->av_forw = *backp; 4907c478bd9Sstevel@tonic-gate *backp = bp; 4917c478bd9Sstevel@tonic-gate bp->av_back = dp; 4927c478bd9Sstevel@tonic-gate } else { 4937c478bd9Sstevel@tonic-gate backp = &dp->av_back; 4947c478bd9Sstevel@tonic-gate (*backp)->av_forw = bp; 4957c478bd9Sstevel@tonic-gate bp->av_back = *backp; 4967c478bd9Sstevel@tonic-gate *backp = bp; 4977c478bd9Sstevel@tonic-gate bp->av_forw = dp; 4987c478bd9Sstevel@tonic-gate } 4997c478bd9Sstevel@tonic-gate mutex_exit(hmp); 5007c478bd9Sstevel@tonic-gate 5017c478bd9Sstevel@tonic-gate if (bfreelist.b_flags & B_WANTED) { 5027c478bd9Sstevel@tonic-gate /* 5037c478bd9Sstevel@tonic-gate * Should come here very very rarely. 5047c478bd9Sstevel@tonic-gate */ 5057c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 5067c478bd9Sstevel@tonic-gate if (bfreelist.b_flags & B_WANTED) { 5077c478bd9Sstevel@tonic-gate bfreelist.b_flags &= ~B_WANTED; 5087c478bd9Sstevel@tonic-gate cv_broadcast(&bio_mem_cv); 5097c478bd9Sstevel@tonic-gate } 5107c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 5117c478bd9Sstevel@tonic-gate } 5127c478bd9Sstevel@tonic-gate 5137c478bd9Sstevel@tonic-gate bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC); 5147c478bd9Sstevel@tonic-gate /* 5157c478bd9Sstevel@tonic-gate * Don't let anyone get the buffer off the freelist before we 5167c478bd9Sstevel@tonic-gate * release our hold on it. 5177c478bd9Sstevel@tonic-gate */ 5187c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 5197c478bd9Sstevel@tonic-gate } 5207c478bd9Sstevel@tonic-gate 5217c478bd9Sstevel@tonic-gate /* 5227c478bd9Sstevel@tonic-gate * Return a count of the number of B_BUSY buffers in the system 5237c478bd9Sstevel@tonic-gate * Can only be used as a good estimate. If 'cleanit' is set, 5247c478bd9Sstevel@tonic-gate * try to flush all bufs. 5257c478bd9Sstevel@tonic-gate */ 5267c478bd9Sstevel@tonic-gate int 5277c478bd9Sstevel@tonic-gate bio_busy(int cleanit) 5287c478bd9Sstevel@tonic-gate { 5297c478bd9Sstevel@tonic-gate struct buf *bp, *dp; 5307c478bd9Sstevel@tonic-gate int busy = 0; 5317c478bd9Sstevel@tonic-gate int i; 5327c478bd9Sstevel@tonic-gate kmutex_t *hmp; 5337c478bd9Sstevel@tonic-gate 5347c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 5357c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[i]; 5367c478bd9Sstevel@tonic-gate hmp = &hbuf[i].b_lock; 5377c478bd9Sstevel@tonic-gate 5387c478bd9Sstevel@tonic-gate mutex_enter(hmp); 5397c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 5407c478bd9Sstevel@tonic-gate if (bp->b_flags & B_BUSY) 5417c478bd9Sstevel@tonic-gate busy++; 5427c478bd9Sstevel@tonic-gate } 5437c478bd9Sstevel@tonic-gate mutex_exit(hmp); 5447c478bd9Sstevel@tonic-gate } 5457c478bd9Sstevel@tonic-gate 5467c478bd9Sstevel@tonic-gate if (cleanit && busy != 0) { 5477c478bd9Sstevel@tonic-gate bflush(NODEV); 5487c478bd9Sstevel@tonic-gate } 5497c478bd9Sstevel@tonic-gate 5507c478bd9Sstevel@tonic-gate return (busy); 5517c478bd9Sstevel@tonic-gate } 5527c478bd9Sstevel@tonic-gate 5537c478bd9Sstevel@tonic-gate /* 5547c478bd9Sstevel@tonic-gate * this interface is provided for binary compatibility. 5557c478bd9Sstevel@tonic-gate * 5567c478bd9Sstevel@tonic-gate * Assign a buffer for the given block. If the appropriate 5577c478bd9Sstevel@tonic-gate * block is already associated, return it; otherwise search 5587c478bd9Sstevel@tonic-gate * for the oldest non-busy buffer and reassign it. 5597c478bd9Sstevel@tonic-gate */ 5607c478bd9Sstevel@tonic-gate struct buf * 5617c478bd9Sstevel@tonic-gate getblk(dev_t dev, daddr_t blkno, long bsize) 5627c478bd9Sstevel@tonic-gate { 5637c478bd9Sstevel@tonic-gate return (getblk_common(/* ufsvfsp */ NULL, dev, 564d3d50737SRafael Vanoni blkno, bsize, /* errflg */ 0)); 5657c478bd9Sstevel@tonic-gate } 5667c478bd9Sstevel@tonic-gate 5677c478bd9Sstevel@tonic-gate /* 5687c478bd9Sstevel@tonic-gate * Assign a buffer for the given block. If the appropriate 5697c478bd9Sstevel@tonic-gate * block is already associated, return it; otherwise search 5707c478bd9Sstevel@tonic-gate * for the oldest non-busy buffer and reassign it. 5717c478bd9Sstevel@tonic-gate */ 5727c478bd9Sstevel@tonic-gate struct buf * 5737c478bd9Sstevel@tonic-gate getblk_common(void * arg, dev_t dev, daddr_t blkno, long bsize, int errflg) 5747c478bd9Sstevel@tonic-gate { 5757c478bd9Sstevel@tonic-gate ufsvfs_t *ufsvfsp = (struct ufsvfs *)arg; 5767c478bd9Sstevel@tonic-gate struct buf *bp; 5777c478bd9Sstevel@tonic-gate struct buf *dp; 5787c478bd9Sstevel@tonic-gate struct buf *nbp = NULL; 5797c478bd9Sstevel@tonic-gate struct buf *errbp; 5807c478bd9Sstevel@tonic-gate uint_t index; 5817c478bd9Sstevel@tonic-gate kmutex_t *hmp; 5827c478bd9Sstevel@tonic-gate struct hbuf *hp; 5837c478bd9Sstevel@tonic-gate 5847c478bd9Sstevel@tonic-gate if (getmajor(dev) >= devcnt) 5857c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "blkdev"); 5867c478bd9Sstevel@tonic-gate 5877c478bd9Sstevel@tonic-gate biostats.bio_lookup.value.ui32++; 5887c478bd9Sstevel@tonic-gate 5897c478bd9Sstevel@tonic-gate index = bio_bhash(dev, blkno); 5907c478bd9Sstevel@tonic-gate hp = &hbuf[index]; 5917c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 5927c478bd9Sstevel@tonic-gate hmp = &hp->b_lock; 5937c478bd9Sstevel@tonic-gate 5947c478bd9Sstevel@tonic-gate mutex_enter(hmp); 5957c478bd9Sstevel@tonic-gate loop: 5967c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 5977c478bd9Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev || 5987c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE)) 5997c478bd9Sstevel@tonic-gate continue; 6007c478bd9Sstevel@tonic-gate /* 6017c478bd9Sstevel@tonic-gate * Avoid holding the hash lock in the event that 6027c478bd9Sstevel@tonic-gate * the buffer is locked by someone. Since the hash chain 6037c478bd9Sstevel@tonic-gate * may change when we drop the hash lock 6047c478bd9Sstevel@tonic-gate * we have to start at the beginning of the chain if the 6057c478bd9Sstevel@tonic-gate * buffer identity/contents aren't valid. 6067c478bd9Sstevel@tonic-gate */ 6077c478bd9Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) { 6087c478bd9Sstevel@tonic-gate biostats.bio_bufbusy.value.ui32++; 6097c478bd9Sstevel@tonic-gate mutex_exit(hmp); 6107c478bd9Sstevel@tonic-gate /* 6117c478bd9Sstevel@tonic-gate * OK, we are dealing with a busy buffer. 6127c478bd9Sstevel@tonic-gate * In the case that we are panicking and we 6137c478bd9Sstevel@tonic-gate * got called from bread(), we have some chance 6147c478bd9Sstevel@tonic-gate * for error recovery. So better bail out from 6157c478bd9Sstevel@tonic-gate * here since sema_p() won't block. If we got 6167c478bd9Sstevel@tonic-gate * called directly from ufs routines, there is 6177c478bd9Sstevel@tonic-gate * no way to report an error yet. 6187c478bd9Sstevel@tonic-gate */ 6197c478bd9Sstevel@tonic-gate if (panicstr && errflg) 6207c478bd9Sstevel@tonic-gate goto errout; 6217c478bd9Sstevel@tonic-gate /* 6227c478bd9Sstevel@tonic-gate * For the following line of code to work 6237c478bd9Sstevel@tonic-gate * correctly never kmem_free the buffer "header". 6247c478bd9Sstevel@tonic-gate */ 6257c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem); 6267c478bd9Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev || 6277c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE)) { 6287c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 6297c478bd9Sstevel@tonic-gate mutex_enter(hmp); 6307c478bd9Sstevel@tonic-gate goto loop; /* start over */ 6317c478bd9Sstevel@tonic-gate } 6327c478bd9Sstevel@tonic-gate mutex_enter(hmp); 6337c478bd9Sstevel@tonic-gate } 6347c478bd9Sstevel@tonic-gate /* Found */ 6357c478bd9Sstevel@tonic-gate biostats.bio_hit.value.ui32++; 6367c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_AGE; 6377c478bd9Sstevel@tonic-gate 6387c478bd9Sstevel@tonic-gate /* 6397c478bd9Sstevel@tonic-gate * Yank it off the free/delayed write lists 6407c478bd9Sstevel@tonic-gate */ 6417c478bd9Sstevel@tonic-gate hp->b_length--; 6427c478bd9Sstevel@tonic-gate notavail(bp); 6437c478bd9Sstevel@tonic-gate mutex_exit(hmp); 6447c478bd9Sstevel@tonic-gate 6457c478bd9Sstevel@tonic-gate ASSERT((bp->b_flags & B_NOCACHE) == NULL); 6467c478bd9Sstevel@tonic-gate 6477c478bd9Sstevel@tonic-gate if (nbp == NULL) { 6487c478bd9Sstevel@tonic-gate /* 6497c478bd9Sstevel@tonic-gate * Make the common path short. 6507c478bd9Sstevel@tonic-gate */ 6517c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 6527c478bd9Sstevel@tonic-gate return (bp); 6537c478bd9Sstevel@tonic-gate } 6547c478bd9Sstevel@tonic-gate 6557c478bd9Sstevel@tonic-gate biostats.bio_bufdup.value.ui32++; 6567c478bd9Sstevel@tonic-gate 6577c478bd9Sstevel@tonic-gate /* 6587c478bd9Sstevel@tonic-gate * The buffer must have entered during the lock upgrade 6597c478bd9Sstevel@tonic-gate * so free the new buffer we allocated and return the 6607c478bd9Sstevel@tonic-gate * found buffer. 6617c478bd9Sstevel@tonic-gate */ 6627c478bd9Sstevel@tonic-gate kmem_free(nbp->b_un.b_addr, nbp->b_bufsize); 6637c478bd9Sstevel@tonic-gate nbp->b_un.b_addr = NULL; 6647c478bd9Sstevel@tonic-gate 6657c478bd9Sstevel@tonic-gate /* 6667c478bd9Sstevel@tonic-gate * Account for the memory 6677c478bd9Sstevel@tonic-gate */ 6687c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 6697c478bd9Sstevel@tonic-gate bfreelist.b_bufsize += nbp->b_bufsize; 6707c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 6717c478bd9Sstevel@tonic-gate 6727c478bd9Sstevel@tonic-gate /* 6737c478bd9Sstevel@tonic-gate * Destroy buf identity, and place on avail list 6747c478bd9Sstevel@tonic-gate */ 6757c478bd9Sstevel@tonic-gate nbp->b_dev = (o_dev_t)NODEV; 6767c478bd9Sstevel@tonic-gate nbp->b_edev = NODEV; 6777c478bd9Sstevel@tonic-gate nbp->b_flags = 0; 6787c478bd9Sstevel@tonic-gate nbp->b_file = NULL; 6797c478bd9Sstevel@tonic-gate nbp->b_offset = -1; 6807c478bd9Sstevel@tonic-gate 6817c478bd9Sstevel@tonic-gate sema_v(&nbp->b_sem); 6827c478bd9Sstevel@tonic-gate bio_bhdr_free(nbp); 6837c478bd9Sstevel@tonic-gate 6847c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 6857c478bd9Sstevel@tonic-gate return (bp); 6867c478bd9Sstevel@tonic-gate } 6877c478bd9Sstevel@tonic-gate 6887c478bd9Sstevel@tonic-gate /* 6897c478bd9Sstevel@tonic-gate * bio_getfreeblk may block so check the hash chain again. 6907c478bd9Sstevel@tonic-gate */ 6917c478bd9Sstevel@tonic-gate if (nbp == NULL) { 6927c478bd9Sstevel@tonic-gate mutex_exit(hmp); 6937c478bd9Sstevel@tonic-gate nbp = bio_getfreeblk(bsize); 6947c478bd9Sstevel@tonic-gate mutex_enter(hmp); 6957c478bd9Sstevel@tonic-gate goto loop; 6967c478bd9Sstevel@tonic-gate } 6977c478bd9Sstevel@tonic-gate 6987c478bd9Sstevel@tonic-gate /* 6997c478bd9Sstevel@tonic-gate * New buffer. Assign nbp and stick it on the hash. 7007c478bd9Sstevel@tonic-gate */ 7017c478bd9Sstevel@tonic-gate nbp->b_flags = B_BUSY; 7027c478bd9Sstevel@tonic-gate nbp->b_edev = dev; 7037c478bd9Sstevel@tonic-gate nbp->b_dev = (o_dev_t)cmpdev(dev); 7047c478bd9Sstevel@tonic-gate nbp->b_blkno = blkno; 7057c478bd9Sstevel@tonic-gate nbp->b_iodone = NULL; 7067c478bd9Sstevel@tonic-gate nbp->b_bcount = bsize; 7077c478bd9Sstevel@tonic-gate /* 7087c478bd9Sstevel@tonic-gate * If we are given a ufsvfsp and the vfs_root field is NULL 7097c478bd9Sstevel@tonic-gate * then this must be I/O for a superblock. A superblock's 7107c478bd9Sstevel@tonic-gate * buffer is set up in mountfs() and there is no root vnode 7117c478bd9Sstevel@tonic-gate * at that point. 7127c478bd9Sstevel@tonic-gate */ 7137c478bd9Sstevel@tonic-gate if (ufsvfsp && ufsvfsp->vfs_root) { 7147c478bd9Sstevel@tonic-gate nbp->b_vp = ufsvfsp->vfs_root; 7157c478bd9Sstevel@tonic-gate } else { 7167c478bd9Sstevel@tonic-gate nbp->b_vp = NULL; 7177c478bd9Sstevel@tonic-gate } 7187c478bd9Sstevel@tonic-gate 7197c478bd9Sstevel@tonic-gate ASSERT((nbp->b_flags & B_NOCACHE) == NULL); 7207c478bd9Sstevel@tonic-gate 7217c478bd9Sstevel@tonic-gate binshash(nbp, dp); 7227c478bd9Sstevel@tonic-gate mutex_exit(hmp); 7237c478bd9Sstevel@tonic-gate 7247c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&nbp->b_sem)); 7257c478bd9Sstevel@tonic-gate 7267c478bd9Sstevel@tonic-gate return (nbp); 7277c478bd9Sstevel@tonic-gate 7287c478bd9Sstevel@tonic-gate 7297c478bd9Sstevel@tonic-gate /* 7307c478bd9Sstevel@tonic-gate * Come here in case of an internal error. At this point we couldn't 731*48bbca81SDaniel Hoffman * get a buffer, but we have to return one. Hence we allocate some 7327c478bd9Sstevel@tonic-gate * kind of error reply buffer on the fly. This buffer is marked as 7337c478bd9Sstevel@tonic-gate * B_NOCACHE | B_AGE | B_ERROR | B_DONE to assure the following: 7347c478bd9Sstevel@tonic-gate * - B_ERROR will indicate error to the caller. 7357c478bd9Sstevel@tonic-gate * - B_DONE will prevent us from reading the buffer from 7367c478bd9Sstevel@tonic-gate * the device. 7377c478bd9Sstevel@tonic-gate * - B_NOCACHE will cause that this buffer gets free'd in 7387c478bd9Sstevel@tonic-gate * brelse(). 7397c478bd9Sstevel@tonic-gate */ 7407c478bd9Sstevel@tonic-gate 7417c478bd9Sstevel@tonic-gate errout: 7427c478bd9Sstevel@tonic-gate errbp = geteblk(); 7437c478bd9Sstevel@tonic-gate sema_p(&errbp->b_sem); 7447c478bd9Sstevel@tonic-gate errbp->b_flags &= ~B_BUSY; 7457c478bd9Sstevel@tonic-gate errbp->b_flags |= (B_ERROR | B_DONE); 7467c478bd9Sstevel@tonic-gate return (errbp); 7477c478bd9Sstevel@tonic-gate } 7487c478bd9Sstevel@tonic-gate 7497c478bd9Sstevel@tonic-gate /* 7507c478bd9Sstevel@tonic-gate * Get an empty block, not assigned to any particular device. 7517c478bd9Sstevel@tonic-gate * Returns a locked buffer that is not on any hash or free list. 7527c478bd9Sstevel@tonic-gate */ 7537c478bd9Sstevel@tonic-gate struct buf * 7547c478bd9Sstevel@tonic-gate ngeteblk(long bsize) 7557c478bd9Sstevel@tonic-gate { 7567c478bd9Sstevel@tonic-gate struct buf *bp; 7577c478bd9Sstevel@tonic-gate 7587c478bd9Sstevel@tonic-gate bp = kmem_alloc(sizeof (struct buf), KM_SLEEP); 7597c478bd9Sstevel@tonic-gate bioinit(bp); 7607c478bd9Sstevel@tonic-gate bp->av_forw = bp->av_back = NULL; 7617c478bd9Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_SLEEP); 7627c478bd9Sstevel@tonic-gate bp->b_bufsize = bsize; 7637c478bd9Sstevel@tonic-gate bp->b_flags = B_BUSY | B_NOCACHE | B_AGE; 7647c478bd9Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV; 7657c478bd9Sstevel@tonic-gate bp->b_edev = NODEV; 7667c478bd9Sstevel@tonic-gate bp->b_lblkno = 0; 7677c478bd9Sstevel@tonic-gate bp->b_bcount = bsize; 7687c478bd9Sstevel@tonic-gate bp->b_iodone = NULL; 7697c478bd9Sstevel@tonic-gate return (bp); 7707c478bd9Sstevel@tonic-gate } 7717c478bd9Sstevel@tonic-gate 7727c478bd9Sstevel@tonic-gate /* 7737c478bd9Sstevel@tonic-gate * Interface of geteblk() is kept intact to maintain driver compatibility. 7747c478bd9Sstevel@tonic-gate * Use ngeteblk() to allocate block size other than 1 KB. 7757c478bd9Sstevel@tonic-gate */ 7767c478bd9Sstevel@tonic-gate struct buf * 7777c478bd9Sstevel@tonic-gate geteblk(void) 7787c478bd9Sstevel@tonic-gate { 7797c478bd9Sstevel@tonic-gate return (ngeteblk((long)1024)); 7807c478bd9Sstevel@tonic-gate } 7817c478bd9Sstevel@tonic-gate 7827c478bd9Sstevel@tonic-gate /* 7837c478bd9Sstevel@tonic-gate * Return a buffer w/o sleeping 7847c478bd9Sstevel@tonic-gate */ 7857c478bd9Sstevel@tonic-gate struct buf * 7867c478bd9Sstevel@tonic-gate trygetblk(dev_t dev, daddr_t blkno) 7877c478bd9Sstevel@tonic-gate { 7887c478bd9Sstevel@tonic-gate struct buf *bp; 7897c478bd9Sstevel@tonic-gate struct buf *dp; 7907c478bd9Sstevel@tonic-gate struct hbuf *hp; 7917c478bd9Sstevel@tonic-gate kmutex_t *hmp; 7927c478bd9Sstevel@tonic-gate uint_t index; 7937c478bd9Sstevel@tonic-gate 7947c478bd9Sstevel@tonic-gate index = bio_bhash(dev, blkno); 7957c478bd9Sstevel@tonic-gate hp = &hbuf[index]; 7967c478bd9Sstevel@tonic-gate hmp = &hp->b_lock; 7977c478bd9Sstevel@tonic-gate 7987c478bd9Sstevel@tonic-gate if (!mutex_tryenter(hmp)) 7997c478bd9Sstevel@tonic-gate return (NULL); 8007c478bd9Sstevel@tonic-gate 8017c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 8027c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 8037c478bd9Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev || 8047c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE)) 8057c478bd9Sstevel@tonic-gate continue; 8067c478bd9Sstevel@tonic-gate /* 8077c478bd9Sstevel@tonic-gate * Get access to a valid buffer without sleeping 8087c478bd9Sstevel@tonic-gate */ 8097c478bd9Sstevel@tonic-gate if (sema_tryp(&bp->b_sem)) { 8107c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DONE) { 8117c478bd9Sstevel@tonic-gate hp->b_length--; 8127c478bd9Sstevel@tonic-gate notavail(bp); 8137c478bd9Sstevel@tonic-gate mutex_exit(hmp); 8147c478bd9Sstevel@tonic-gate return (bp); 8157c478bd9Sstevel@tonic-gate } else { 8167c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 8177c478bd9Sstevel@tonic-gate break; 8187c478bd9Sstevel@tonic-gate } 8197c478bd9Sstevel@tonic-gate } 8207c478bd9Sstevel@tonic-gate break; 8217c478bd9Sstevel@tonic-gate } 8227c478bd9Sstevel@tonic-gate mutex_exit(hmp); 8237c478bd9Sstevel@tonic-gate return (NULL); 8247c478bd9Sstevel@tonic-gate } 8257c478bd9Sstevel@tonic-gate 8267c478bd9Sstevel@tonic-gate /* 8277c478bd9Sstevel@tonic-gate * Wait for I/O completion on the buffer; return errors 8287c478bd9Sstevel@tonic-gate * to the user. 8297c478bd9Sstevel@tonic-gate */ 8307c478bd9Sstevel@tonic-gate int 8317c478bd9Sstevel@tonic-gate iowait(struct buf *bp) 8327c478bd9Sstevel@tonic-gate { 8337c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 8347c478bd9Sstevel@tonic-gate return (biowait(bp)); 8357c478bd9Sstevel@tonic-gate } 8367c478bd9Sstevel@tonic-gate 8377c478bd9Sstevel@tonic-gate /* 8387c478bd9Sstevel@tonic-gate * Mark I/O complete on a buffer, release it if I/O is asynchronous, 8397c478bd9Sstevel@tonic-gate * and wake up anyone waiting for it. 8407c478bd9Sstevel@tonic-gate */ 8417c478bd9Sstevel@tonic-gate void 8427c478bd9Sstevel@tonic-gate iodone(struct buf *bp) 8437c478bd9Sstevel@tonic-gate { 8447c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 8457c478bd9Sstevel@tonic-gate (void) biodone(bp); 8467c478bd9Sstevel@tonic-gate } 8477c478bd9Sstevel@tonic-gate 8487c478bd9Sstevel@tonic-gate /* 8497c478bd9Sstevel@tonic-gate * Zero the core associated with a buffer. 8507c478bd9Sstevel@tonic-gate */ 8517c478bd9Sstevel@tonic-gate void 8527c478bd9Sstevel@tonic-gate clrbuf(struct buf *bp) 8537c478bd9Sstevel@tonic-gate { 8547c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 8557c478bd9Sstevel@tonic-gate bzero(bp->b_un.b_addr, bp->b_bcount); 8567c478bd9Sstevel@tonic-gate bp->b_resid = 0; 8577c478bd9Sstevel@tonic-gate } 8587c478bd9Sstevel@tonic-gate 8597c478bd9Sstevel@tonic-gate 8607c478bd9Sstevel@tonic-gate /* 8617c478bd9Sstevel@tonic-gate * Make sure all write-behind blocks on dev (or NODEV for all) 8627c478bd9Sstevel@tonic-gate * are flushed out. 8637c478bd9Sstevel@tonic-gate */ 8647c478bd9Sstevel@tonic-gate void 8657c478bd9Sstevel@tonic-gate bflush(dev_t dev) 8667c478bd9Sstevel@tonic-gate { 8677c478bd9Sstevel@tonic-gate struct buf *bp, *dp; 8687c478bd9Sstevel@tonic-gate struct hbuf *hp; 8697c478bd9Sstevel@tonic-gate struct buf *delwri_list = EMPTY_LIST; 8707c478bd9Sstevel@tonic-gate int i, index; 8717c478bd9Sstevel@tonic-gate kmutex_t *hmp; 8727c478bd9Sstevel@tonic-gate 8737c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 8747c478bd9Sstevel@tonic-gate /* 8757c478bd9Sstevel@tonic-gate * Wait for any invalidates or flushes ahead of us to finish. 8767c478bd9Sstevel@tonic-gate * We really could split blist_lock up per device for better 8777c478bd9Sstevel@tonic-gate * parallelism here. 8787c478bd9Sstevel@tonic-gate */ 8797c478bd9Sstevel@tonic-gate while (bio_doinginval || bio_doingflush) { 8807c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 1; 8817c478bd9Sstevel@tonic-gate cv_wait(&bio_flushinval_cv, &blist_lock); 8827c478bd9Sstevel@tonic-gate } 8837c478bd9Sstevel@tonic-gate bio_doingflush++; 8847c478bd9Sstevel@tonic-gate /* 8857c478bd9Sstevel@tonic-gate * Gather all B_DELWRI buffer for device. 8867c478bd9Sstevel@tonic-gate * Lock ordering is b_sem > hash lock (brelse). 8877c478bd9Sstevel@tonic-gate * Since we are finding the buffer via the delayed write list, 8887c478bd9Sstevel@tonic-gate * it may be busy and we would block trying to get the 8897c478bd9Sstevel@tonic-gate * b_sem lock while holding hash lock. So transfer all the 8907c478bd9Sstevel@tonic-gate * candidates on the delwri_list and then drop the hash locks. 8917c478bd9Sstevel@tonic-gate */ 8927c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 8937c478bd9Sstevel@tonic-gate hmp = &hbuf[i].b_lock; 8947c478bd9Sstevel@tonic-gate dp = (struct buf *)&dwbuf[i]; 8957c478bd9Sstevel@tonic-gate mutex_enter(hmp); 8967c478bd9Sstevel@tonic-gate for (bp = dp->av_forw; bp != dp; bp = bp->av_forw) { 8977c478bd9Sstevel@tonic-gate if (dev == NODEV || bp->b_edev == dev) { 8987c478bd9Sstevel@tonic-gate if (bp->b_list == NULL) { 8997c478bd9Sstevel@tonic-gate bp->b_list = delwri_list; 9007c478bd9Sstevel@tonic-gate delwri_list = bp; 9017c478bd9Sstevel@tonic-gate } 9027c478bd9Sstevel@tonic-gate } 9037c478bd9Sstevel@tonic-gate } 9047c478bd9Sstevel@tonic-gate mutex_exit(hmp); 9057c478bd9Sstevel@tonic-gate } 9067c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 9077c478bd9Sstevel@tonic-gate 9087c478bd9Sstevel@tonic-gate /* 9097c478bd9Sstevel@tonic-gate * Now that the hash locks have been dropped grab the semaphores 9107c478bd9Sstevel@tonic-gate * and write back all the buffers that have B_DELWRI set. 9117c478bd9Sstevel@tonic-gate */ 9127c478bd9Sstevel@tonic-gate while (delwri_list != EMPTY_LIST) { 9137c478bd9Sstevel@tonic-gate bp = delwri_list; 9147c478bd9Sstevel@tonic-gate 9157c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem); /* may block */ 9167c478bd9Sstevel@tonic-gate if ((dev != bp->b_edev && dev != NODEV) || 9177c478bd9Sstevel@tonic-gate (panicstr && bp->b_flags & B_BUSY)) { 9187c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 9197c478bd9Sstevel@tonic-gate delwri_list = bp->b_list; 9207c478bd9Sstevel@tonic-gate bp->b_list = NULL; 9217c478bd9Sstevel@tonic-gate continue; /* No longer a candidate */ 9227c478bd9Sstevel@tonic-gate } 9237c478bd9Sstevel@tonic-gate if (bp->b_flags & B_DELWRI) { 9247c478bd9Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno); 9257c478bd9Sstevel@tonic-gate hp = &hbuf[index]; 9267c478bd9Sstevel@tonic-gate hmp = &hp->b_lock; 9277c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 9287c478bd9Sstevel@tonic-gate 9297c478bd9Sstevel@tonic-gate bp->b_flags |= B_ASYNC; 9307c478bd9Sstevel@tonic-gate mutex_enter(hmp); 9317c478bd9Sstevel@tonic-gate hp->b_length--; 9327c478bd9Sstevel@tonic-gate notavail(bp); 9337c478bd9Sstevel@tonic-gate mutex_exit(hmp); 9347c478bd9Sstevel@tonic-gate if (bp->b_vp == NULL) { /* !ufs */ 9357c478bd9Sstevel@tonic-gate BWRITE(bp); 9367c478bd9Sstevel@tonic-gate } else { /* ufs */ 9377c478bd9Sstevel@tonic-gate UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp); 9387c478bd9Sstevel@tonic-gate } 9397c478bd9Sstevel@tonic-gate } else { 9407c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 9417c478bd9Sstevel@tonic-gate } 9427c478bd9Sstevel@tonic-gate delwri_list = bp->b_list; 9437c478bd9Sstevel@tonic-gate bp->b_list = NULL; 9447c478bd9Sstevel@tonic-gate } 9457c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 9467c478bd9Sstevel@tonic-gate bio_doingflush--; 9477c478bd9Sstevel@tonic-gate if (bio_flinv_cv_wanted) { 9487c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 0; 9497c478bd9Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv); 9507c478bd9Sstevel@tonic-gate } 9517c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 9527c478bd9Sstevel@tonic-gate } 9537c478bd9Sstevel@tonic-gate 9547c478bd9Sstevel@tonic-gate /* 9557c478bd9Sstevel@tonic-gate * Ensure that a specified block is up-to-date on disk. 9567c478bd9Sstevel@tonic-gate */ 9577c478bd9Sstevel@tonic-gate void 9587c478bd9Sstevel@tonic-gate blkflush(dev_t dev, daddr_t blkno) 9597c478bd9Sstevel@tonic-gate { 9607c478bd9Sstevel@tonic-gate struct buf *bp, *dp; 9617c478bd9Sstevel@tonic-gate struct hbuf *hp; 9627c478bd9Sstevel@tonic-gate struct buf *sbp = NULL; 9637c478bd9Sstevel@tonic-gate uint_t index; 9647c478bd9Sstevel@tonic-gate kmutex_t *hmp; 9657c478bd9Sstevel@tonic-gate 9667c478bd9Sstevel@tonic-gate index = bio_bhash(dev, blkno); 9677c478bd9Sstevel@tonic-gate hp = &hbuf[index]; 9687c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 9697c478bd9Sstevel@tonic-gate hmp = &hp->b_lock; 9707c478bd9Sstevel@tonic-gate 9717c478bd9Sstevel@tonic-gate /* 9727c478bd9Sstevel@tonic-gate * Identify the buffer in the cache belonging to 9737c478bd9Sstevel@tonic-gate * this device and blkno (if any). 9747c478bd9Sstevel@tonic-gate */ 9757c478bd9Sstevel@tonic-gate mutex_enter(hmp); 9767c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 9777c478bd9Sstevel@tonic-gate if (bp->b_blkno != blkno || bp->b_edev != dev || 9787c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE)) 9797c478bd9Sstevel@tonic-gate continue; 9807c478bd9Sstevel@tonic-gate sbp = bp; 9817c478bd9Sstevel@tonic-gate break; 9827c478bd9Sstevel@tonic-gate } 9837c478bd9Sstevel@tonic-gate mutex_exit(hmp); 9847c478bd9Sstevel@tonic-gate if (sbp == NULL) 9857c478bd9Sstevel@tonic-gate return; 9867c478bd9Sstevel@tonic-gate /* 9877c478bd9Sstevel@tonic-gate * Now check the buffer we have identified and 9887c478bd9Sstevel@tonic-gate * make sure it still belongs to the device and is B_DELWRI 9897c478bd9Sstevel@tonic-gate */ 9907c478bd9Sstevel@tonic-gate sema_p(&sbp->b_sem); 9917c478bd9Sstevel@tonic-gate if (sbp->b_blkno == blkno && sbp->b_edev == dev && 9927c478bd9Sstevel@tonic-gate (sbp->b_flags & (B_DELWRI|B_STALE)) == B_DELWRI) { 9937c478bd9Sstevel@tonic-gate mutex_enter(hmp); 9947c478bd9Sstevel@tonic-gate hp->b_length--; 9957c478bd9Sstevel@tonic-gate notavail(sbp); 9967c478bd9Sstevel@tonic-gate mutex_exit(hmp); 9977c478bd9Sstevel@tonic-gate /* 9987c478bd9Sstevel@tonic-gate * XXX - There is nothing to guarantee a synchronous 9997c478bd9Sstevel@tonic-gate * write here if the B_ASYNC flag is set. This needs 10007c478bd9Sstevel@tonic-gate * some investigation. 10017c478bd9Sstevel@tonic-gate */ 10027c478bd9Sstevel@tonic-gate if (sbp->b_vp == NULL) { /* !ufs */ 10037c478bd9Sstevel@tonic-gate BWRITE(sbp); /* synchronous write */ 10047c478bd9Sstevel@tonic-gate } else { /* ufs */ 10057c478bd9Sstevel@tonic-gate UFS_BWRITE(VTOI(sbp->b_vp)->i_ufsvfs, sbp); 10067c478bd9Sstevel@tonic-gate } 10077c478bd9Sstevel@tonic-gate } else { 10087c478bd9Sstevel@tonic-gate sema_v(&sbp->b_sem); 10097c478bd9Sstevel@tonic-gate } 10107c478bd9Sstevel@tonic-gate } 10117c478bd9Sstevel@tonic-gate 10127c478bd9Sstevel@tonic-gate /* 10137c478bd9Sstevel@tonic-gate * Same as binval, except can force-invalidate delayed-write buffers 10147c478bd9Sstevel@tonic-gate * (which are not be already flushed because of device errors). Also 10157c478bd9Sstevel@tonic-gate * makes sure that the retry write flag is cleared. 10167c478bd9Sstevel@tonic-gate */ 10177c478bd9Sstevel@tonic-gate int 10187c478bd9Sstevel@tonic-gate bfinval(dev_t dev, int force) 10197c478bd9Sstevel@tonic-gate { 10207c478bd9Sstevel@tonic-gate struct buf *dp; 10217c478bd9Sstevel@tonic-gate struct buf *bp; 10227c478bd9Sstevel@tonic-gate struct buf *binval_list = EMPTY_LIST; 10237c478bd9Sstevel@tonic-gate int i, error = 0; 10247c478bd9Sstevel@tonic-gate kmutex_t *hmp; 10257c478bd9Sstevel@tonic-gate uint_t index; 10267c478bd9Sstevel@tonic-gate struct buf **backp; 10277c478bd9Sstevel@tonic-gate 10287c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 10297c478bd9Sstevel@tonic-gate /* 10307c478bd9Sstevel@tonic-gate * Wait for any flushes ahead of us to finish, it's ok to 10317c478bd9Sstevel@tonic-gate * do invalidates in parallel. 10327c478bd9Sstevel@tonic-gate */ 10337c478bd9Sstevel@tonic-gate while (bio_doingflush) { 10347c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 1; 10357c478bd9Sstevel@tonic-gate cv_wait(&bio_flushinval_cv, &blist_lock); 10367c478bd9Sstevel@tonic-gate } 10377c478bd9Sstevel@tonic-gate bio_doinginval++; 10387c478bd9Sstevel@tonic-gate 10397c478bd9Sstevel@tonic-gate /* Gather bp's */ 10407c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 10417c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[i]; 10427c478bd9Sstevel@tonic-gate hmp = &hbuf[i].b_lock; 10437c478bd9Sstevel@tonic-gate 10447c478bd9Sstevel@tonic-gate mutex_enter(hmp); 10457c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 10467c478bd9Sstevel@tonic-gate if (bp->b_edev == dev) { 10477c478bd9Sstevel@tonic-gate if (bp->b_list == NULL) { 10487c478bd9Sstevel@tonic-gate bp->b_list = binval_list; 10497c478bd9Sstevel@tonic-gate binval_list = bp; 10507c478bd9Sstevel@tonic-gate } 10517c478bd9Sstevel@tonic-gate } 10527c478bd9Sstevel@tonic-gate } 10537c478bd9Sstevel@tonic-gate mutex_exit(hmp); 10547c478bd9Sstevel@tonic-gate } 10557c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 10567c478bd9Sstevel@tonic-gate 10577c478bd9Sstevel@tonic-gate /* Invalidate all bp's found */ 10587c478bd9Sstevel@tonic-gate while (binval_list != EMPTY_LIST) { 10597c478bd9Sstevel@tonic-gate bp = binval_list; 10607c478bd9Sstevel@tonic-gate 10617c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem); 10627c478bd9Sstevel@tonic-gate if (bp->b_edev == dev) { 10637c478bd9Sstevel@tonic-gate if (force && (bp->b_flags & B_DELWRI)) { 10647c478bd9Sstevel@tonic-gate /* clear B_DELWRI, move to non-dw freelist */ 10657c478bd9Sstevel@tonic-gate index = bio_bhash(bp->b_edev, bp->b_blkno); 10667c478bd9Sstevel@tonic-gate hmp = &hbuf[index].b_lock; 10677c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[index]; 10687c478bd9Sstevel@tonic-gate mutex_enter(hmp); 10697c478bd9Sstevel@tonic-gate 10707c478bd9Sstevel@tonic-gate /* remove from delayed write freelist */ 10717c478bd9Sstevel@tonic-gate notavail(bp); 10727c478bd9Sstevel@tonic-gate 10737c478bd9Sstevel@tonic-gate /* add to B_AGE side of non-dw freelist */ 10747c478bd9Sstevel@tonic-gate backp = &dp->av_forw; 10757c478bd9Sstevel@tonic-gate (*backp)->av_back = bp; 10767c478bd9Sstevel@tonic-gate bp->av_forw = *backp; 10777c478bd9Sstevel@tonic-gate *backp = bp; 10787c478bd9Sstevel@tonic-gate bp->av_back = dp; 10797c478bd9Sstevel@tonic-gate 10807c478bd9Sstevel@tonic-gate /* 10817c478bd9Sstevel@tonic-gate * make sure write retries and busy are cleared 10827c478bd9Sstevel@tonic-gate */ 10837c478bd9Sstevel@tonic-gate bp->b_flags &= 10847c478bd9Sstevel@tonic-gate ~(B_BUSY | B_DELWRI | B_RETRYWRI); 10857c478bd9Sstevel@tonic-gate mutex_exit(hmp); 10867c478bd9Sstevel@tonic-gate } 10877c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_DELWRI) == 0) 10887c478bd9Sstevel@tonic-gate bp->b_flags |= B_STALE|B_AGE; 10897c478bd9Sstevel@tonic-gate else 10907c478bd9Sstevel@tonic-gate error = EIO; 10917c478bd9Sstevel@tonic-gate } 10927c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 10937c478bd9Sstevel@tonic-gate binval_list = bp->b_list; 10947c478bd9Sstevel@tonic-gate bp->b_list = NULL; 10957c478bd9Sstevel@tonic-gate } 10967c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 10977c478bd9Sstevel@tonic-gate bio_doinginval--; 10987c478bd9Sstevel@tonic-gate if (bio_flinv_cv_wanted) { 10997c478bd9Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv); 11007c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 0; 11017c478bd9Sstevel@tonic-gate } 11027c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 11037c478bd9Sstevel@tonic-gate return (error); 11047c478bd9Sstevel@tonic-gate } 11057c478bd9Sstevel@tonic-gate 11067c478bd9Sstevel@tonic-gate /* 11077c478bd9Sstevel@tonic-gate * If possible, invalidate blocks for a dev on demand 11087c478bd9Sstevel@tonic-gate */ 11097c478bd9Sstevel@tonic-gate void 11107c478bd9Sstevel@tonic-gate binval(dev_t dev) 11117c478bd9Sstevel@tonic-gate { 11127c478bd9Sstevel@tonic-gate (void) bfinval(dev, 0); 11137c478bd9Sstevel@tonic-gate } 11147c478bd9Sstevel@tonic-gate 11157c478bd9Sstevel@tonic-gate /* 11167c478bd9Sstevel@tonic-gate * Initialize the buffer I/O system by freeing 11177c478bd9Sstevel@tonic-gate * all buffers and setting all device hash buffer lists to empty. 11187c478bd9Sstevel@tonic-gate */ 11197c478bd9Sstevel@tonic-gate void 11207c478bd9Sstevel@tonic-gate binit(void) 11217c478bd9Sstevel@tonic-gate { 11227c478bd9Sstevel@tonic-gate struct buf *bp; 11237c478bd9Sstevel@tonic-gate unsigned int i, pct; 11247c478bd9Sstevel@tonic-gate ulong_t bio_max_hwm, bio_default_hwm; 11257c478bd9Sstevel@tonic-gate 11267c478bd9Sstevel@tonic-gate /* 11277c478bd9Sstevel@tonic-gate * Maximum/Default values for bufhwm are set to the smallest of: 11287c478bd9Sstevel@tonic-gate * - BIO_MAX_PERCENT resp. BIO_BUF_PERCENT of real memory 11297c478bd9Sstevel@tonic-gate * - 1/4 of kernel virtual memory 11307c478bd9Sstevel@tonic-gate * - INT32_MAX to prevent overflows of v.v_bufhwm (which is int). 11317c478bd9Sstevel@tonic-gate * Additionally, in order to allow simple tuning by percentage of 11327c478bd9Sstevel@tonic-gate * physical memory, bufhwm_pct is used to calculate the default if 11337c478bd9Sstevel@tonic-gate * the value of this tunable is between 0 and BIO_MAX_PERCENT. 11347c478bd9Sstevel@tonic-gate * 11357c478bd9Sstevel@tonic-gate * Since the unit for v.v_bufhwm is kilobytes, this allows for 11367c478bd9Sstevel@tonic-gate * a maximum of 1024 * 2GB == 2TB memory usage by buffer headers. 11377c478bd9Sstevel@tonic-gate */ 11387c478bd9Sstevel@tonic-gate bio_max_hwm = MIN(physmem / BIO_MAX_PERCENT, 11397c478bd9Sstevel@tonic-gate btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024); 11407c478bd9Sstevel@tonic-gate bio_max_hwm = MIN(INT32_MAX, bio_max_hwm); 11417c478bd9Sstevel@tonic-gate 11427c478bd9Sstevel@tonic-gate pct = BIO_BUF_PERCENT; 11437c478bd9Sstevel@tonic-gate if (bufhwm_pct != 0 && 11447c478bd9Sstevel@tonic-gate ((pct = 100 / bufhwm_pct) < BIO_MAX_PERCENT)) { 11457c478bd9Sstevel@tonic-gate pct = BIO_BUF_PERCENT; 11467c478bd9Sstevel@tonic-gate /* 11477c478bd9Sstevel@tonic-gate * Invalid user specified value, emit a warning. 11487c478bd9Sstevel@tonic-gate */ 11497c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "binit: bufhwm_pct(%d) out of \ 1150d3d50737SRafael Vanoni range(1..%d). Using %d as default.", 1151d3d50737SRafael Vanoni bufhwm_pct, 1152d3d50737SRafael Vanoni 100 / BIO_MAX_PERCENT, 100 / BIO_BUF_PERCENT); 11537c478bd9Sstevel@tonic-gate } 11547c478bd9Sstevel@tonic-gate 11557c478bd9Sstevel@tonic-gate bio_default_hwm = MIN(physmem / pct, 11567c478bd9Sstevel@tonic-gate btop(vmem_size(heap_arena, VMEM_FREE)) / 4) * (PAGESIZE / 1024); 11577c478bd9Sstevel@tonic-gate bio_default_hwm = MIN(INT32_MAX, bio_default_hwm); 11587c478bd9Sstevel@tonic-gate 11597c478bd9Sstevel@tonic-gate if ((v.v_bufhwm = bufhwm) == 0) 11607c478bd9Sstevel@tonic-gate v.v_bufhwm = bio_default_hwm; 11617c478bd9Sstevel@tonic-gate 11627c478bd9Sstevel@tonic-gate if (v.v_bufhwm < BIO_MIN_HWM || v.v_bufhwm > bio_max_hwm) { 11637c478bd9Sstevel@tonic-gate v.v_bufhwm = (int)bio_max_hwm; 11647c478bd9Sstevel@tonic-gate /* 11657c478bd9Sstevel@tonic-gate * Invalid user specified value, emit a warning. 11667c478bd9Sstevel@tonic-gate */ 11677c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 1168d3d50737SRafael Vanoni "binit: bufhwm(%d) out \ 1169d3d50737SRafael Vanoni of range(%d..%lu). Using %lu as default", 1170d3d50737SRafael Vanoni bufhwm, 1171d3d50737SRafael Vanoni BIO_MIN_HWM, bio_max_hwm, bio_max_hwm); 11727c478bd9Sstevel@tonic-gate } 11737c478bd9Sstevel@tonic-gate 11747c478bd9Sstevel@tonic-gate /* 11757c478bd9Sstevel@tonic-gate * Determine the number of hash buckets. Default is to 11767c478bd9Sstevel@tonic-gate * create ~BIO_HASHLEN entries per chain based on MAXBSIZE buffers. 11777c478bd9Sstevel@tonic-gate * Round up number to the next power of 2. 11787c478bd9Sstevel@tonic-gate */ 11797c478bd9Sstevel@tonic-gate v.v_hbuf = 1 << highbit((((ulong_t)v.v_bufhwm * 1024) / MAXBSIZE) / 11807c478bd9Sstevel@tonic-gate BIO_HASHLEN); 11817c478bd9Sstevel@tonic-gate v.v_hmask = v.v_hbuf - 1; 11827c478bd9Sstevel@tonic-gate v.v_buf = BIO_BHDR_POOL; 11837c478bd9Sstevel@tonic-gate 11847c478bd9Sstevel@tonic-gate hbuf = kmem_zalloc(v.v_hbuf * sizeof (struct hbuf), KM_SLEEP); 11857c478bd9Sstevel@tonic-gate 11867c478bd9Sstevel@tonic-gate dwbuf = kmem_zalloc(v.v_hbuf * sizeof (struct dwbuf), KM_SLEEP); 11877c478bd9Sstevel@tonic-gate 11887c478bd9Sstevel@tonic-gate bfreelist.b_bufsize = (size_t)v.v_bufhwm * 1024; 11897c478bd9Sstevel@tonic-gate bp = &bfreelist; 11907c478bd9Sstevel@tonic-gate bp->b_forw = bp->b_back = bp->av_forw = bp->av_back = bp; 11917c478bd9Sstevel@tonic-gate 11927c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 11937c478bd9Sstevel@tonic-gate hbuf[i].b_forw = hbuf[i].b_back = (struct buf *)&hbuf[i]; 11947c478bd9Sstevel@tonic-gate hbuf[i].av_forw = hbuf[i].av_back = (struct buf *)&hbuf[i]; 11957c478bd9Sstevel@tonic-gate 11967c478bd9Sstevel@tonic-gate /* 11977c478bd9Sstevel@tonic-gate * Initialize the delayed write buffer list. 11987c478bd9Sstevel@tonic-gate */ 11997c478bd9Sstevel@tonic-gate dwbuf[i].b_forw = dwbuf[i].b_back = (struct buf *)&dwbuf[i]; 12007c478bd9Sstevel@tonic-gate dwbuf[i].av_forw = dwbuf[i].av_back = (struct buf *)&dwbuf[i]; 12017c478bd9Sstevel@tonic-gate } 12027c478bd9Sstevel@tonic-gate } 12037c478bd9Sstevel@tonic-gate 12047c478bd9Sstevel@tonic-gate /* 12057c478bd9Sstevel@tonic-gate * Wait for I/O completion on the buffer; return error code. 12067c478bd9Sstevel@tonic-gate * If bp was for synchronous I/O, bp is invalid and associated 12077c478bd9Sstevel@tonic-gate * resources are freed on return. 12087c478bd9Sstevel@tonic-gate */ 12097c478bd9Sstevel@tonic-gate int 12107c478bd9Sstevel@tonic-gate biowait(struct buf *bp) 12117c478bd9Sstevel@tonic-gate { 12127c478bd9Sstevel@tonic-gate int error = 0; 12137c478bd9Sstevel@tonic-gate struct cpu *cpup; 12147c478bd9Sstevel@tonic-gate 12157c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 12167c478bd9Sstevel@tonic-gate 12177c478bd9Sstevel@tonic-gate cpup = CPU; 12181a5e258fSJosef 'Jeff' Sipek atomic_inc_64(&cpup->cpu_stats.sys.iowait); 12197c478bd9Sstevel@tonic-gate DTRACE_IO1(wait__start, struct buf *, bp); 12207c478bd9Sstevel@tonic-gate 12217c478bd9Sstevel@tonic-gate /* 12227c478bd9Sstevel@tonic-gate * In case of panic, busy wait for completion 12237c478bd9Sstevel@tonic-gate */ 12247c478bd9Sstevel@tonic-gate if (panicstr) { 12257c478bd9Sstevel@tonic-gate while ((bp->b_flags & B_DONE) == 0) 12267c478bd9Sstevel@tonic-gate drv_usecwait(10); 12277c478bd9Sstevel@tonic-gate } else 12287c478bd9Sstevel@tonic-gate sema_p(&bp->b_io); 12297c478bd9Sstevel@tonic-gate 12307c478bd9Sstevel@tonic-gate DTRACE_IO1(wait__done, struct buf *, bp); 12311a5e258fSJosef 'Jeff' Sipek atomic_dec_64(&cpup->cpu_stats.sys.iowait); 12327c478bd9Sstevel@tonic-gate 12337c478bd9Sstevel@tonic-gate error = geterror(bp); 12347c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_ASYNC) == 0) { 12357c478bd9Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED) 12367c478bd9Sstevel@tonic-gate bp_mapout(bp); 12377c478bd9Sstevel@tonic-gate } 12387c478bd9Sstevel@tonic-gate return (error); 12397c478bd9Sstevel@tonic-gate } 12407c478bd9Sstevel@tonic-gate 12417c478bd9Sstevel@tonic-gate static void 12427c478bd9Sstevel@tonic-gate biodone_tnf_probe(struct buf *bp) 12437c478bd9Sstevel@tonic-gate { 12447c478bd9Sstevel@tonic-gate /* Kernel probe */ 12457c478bd9Sstevel@tonic-gate TNF_PROBE_3(biodone, "io blockio", /* CSTYLED */, 1246d3d50737SRafael Vanoni tnf_device, device, bp->b_edev, 1247d3d50737SRafael Vanoni tnf_diskaddr, block, bp->b_lblkno, 1248d3d50737SRafael Vanoni tnf_opaque, buf, bp); 12497c478bd9Sstevel@tonic-gate } 12507c478bd9Sstevel@tonic-gate 12517c478bd9Sstevel@tonic-gate /* 12527c478bd9Sstevel@tonic-gate * Mark I/O complete on a buffer, release it if I/O is asynchronous, 12537c478bd9Sstevel@tonic-gate * and wake up anyone waiting for it. 12547c478bd9Sstevel@tonic-gate */ 12557c478bd9Sstevel@tonic-gate void 12567c478bd9Sstevel@tonic-gate biodone(struct buf *bp) 12577c478bd9Sstevel@tonic-gate { 12587c478bd9Sstevel@tonic-gate if (bp->b_flags & B_STARTED) { 12597c478bd9Sstevel@tonic-gate DTRACE_IO1(done, struct buf *, bp); 12607c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_STARTED; 12617c478bd9Sstevel@tonic-gate } 12627c478bd9Sstevel@tonic-gate 12637c478bd9Sstevel@tonic-gate /* 12647c478bd9Sstevel@tonic-gate * Call the TNF probe here instead of the inline code 12657c478bd9Sstevel@tonic-gate * to force our compiler to use the tail call optimization. 12667c478bd9Sstevel@tonic-gate */ 12677c478bd9Sstevel@tonic-gate biodone_tnf_probe(bp); 12687c478bd9Sstevel@tonic-gate 12697c478bd9Sstevel@tonic-gate if (bp->b_iodone != NULL) { 12707c478bd9Sstevel@tonic-gate (*(bp->b_iodone))(bp); 12717c478bd9Sstevel@tonic-gate return; 12727c478bd9Sstevel@tonic-gate } 12737c478bd9Sstevel@tonic-gate ASSERT((bp->b_flags & B_DONE) == 0); 12747c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 12757c478bd9Sstevel@tonic-gate bp->b_flags |= B_DONE; 12767c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ASYNC) { 12777c478bd9Sstevel@tonic-gate if (bp->b_flags & (B_PAGEIO|B_REMAPPED)) 12787c478bd9Sstevel@tonic-gate bio_pageio_done(bp); 12797c478bd9Sstevel@tonic-gate else 12807c478bd9Sstevel@tonic-gate brelse(bp); /* release bp to freelist */ 12817c478bd9Sstevel@tonic-gate } else { 12827c478bd9Sstevel@tonic-gate sema_v(&bp->b_io); 12837c478bd9Sstevel@tonic-gate } 12847c478bd9Sstevel@tonic-gate } 12857c478bd9Sstevel@tonic-gate 12867c478bd9Sstevel@tonic-gate /* 12877c478bd9Sstevel@tonic-gate * Pick up the device's error number and pass it to the user; 12887c478bd9Sstevel@tonic-gate * if there is an error but the number is 0 set a generalized code. 12897c478bd9Sstevel@tonic-gate */ 12907c478bd9Sstevel@tonic-gate int 12917c478bd9Sstevel@tonic-gate geterror(struct buf *bp) 12927c478bd9Sstevel@tonic-gate { 12937c478bd9Sstevel@tonic-gate int error = 0; 12947c478bd9Sstevel@tonic-gate 12957c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 12967c478bd9Sstevel@tonic-gate if (bp->b_flags & B_ERROR) { 12977c478bd9Sstevel@tonic-gate error = bp->b_error; 12987c478bd9Sstevel@tonic-gate if (!error) 12997c478bd9Sstevel@tonic-gate error = EIO; 13007c478bd9Sstevel@tonic-gate } 13017c478bd9Sstevel@tonic-gate return (error); 13027c478bd9Sstevel@tonic-gate } 13037c478bd9Sstevel@tonic-gate 13047c478bd9Sstevel@tonic-gate /* 13057c478bd9Sstevel@tonic-gate * Support for pageio buffers. 13067c478bd9Sstevel@tonic-gate * 13077c478bd9Sstevel@tonic-gate * This stuff should be generalized to provide a generalized bp 13087c478bd9Sstevel@tonic-gate * header facility that can be used for things other than pageio. 13097c478bd9Sstevel@tonic-gate */ 13107c478bd9Sstevel@tonic-gate 13117c478bd9Sstevel@tonic-gate /* 13127c478bd9Sstevel@tonic-gate * Allocate and initialize a buf struct for use with pageio. 13137c478bd9Sstevel@tonic-gate */ 13147c478bd9Sstevel@tonic-gate struct buf * 13157c478bd9Sstevel@tonic-gate pageio_setup(struct page *pp, size_t len, struct vnode *vp, int flags) 13167c478bd9Sstevel@tonic-gate { 13177c478bd9Sstevel@tonic-gate struct buf *bp; 13187c478bd9Sstevel@tonic-gate struct cpu *cpup; 13197c478bd9Sstevel@tonic-gate 13207c478bd9Sstevel@tonic-gate if (flags & B_READ) { 13217c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 13227c478bd9Sstevel@tonic-gate cpup = CPU; /* get pointer AFTER preemption is disabled */ 13237c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgin, 1); 13247c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgpgin, btopr(len)); 13259468939eSJerry Jelinek 13269468939eSJerry Jelinek atomic_add_64(&curzone->zone_pgpgin, btopr(len)); 13279468939eSJerry Jelinek 13287c478bd9Sstevel@tonic-gate if ((flags & B_ASYNC) == 0) { 13297c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 13307c478bd9Sstevel@tonic-gate if (lwp != NULL) 13317c478bd9Sstevel@tonic-gate lwp->lwp_ru.majflt++; 13327c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, maj_fault, 1); 13337c478bd9Sstevel@tonic-gate /* Kernel probe */ 13347c478bd9Sstevel@tonic-gate TNF_PROBE_2(major_fault, "vm pagefault", /* CSTYLED */, 1335d3d50737SRafael Vanoni tnf_opaque, vnode, pp->p_vnode, 1336d3d50737SRafael Vanoni tnf_offset, offset, pp->p_offset); 13377c478bd9Sstevel@tonic-gate } 13387c478bd9Sstevel@tonic-gate /* 13397c478bd9Sstevel@tonic-gate * Update statistics for pages being paged in 13407c478bd9Sstevel@tonic-gate */ 13417c478bd9Sstevel@tonic-gate if (pp != NULL && pp->p_vnode != NULL) { 13427c478bd9Sstevel@tonic-gate if (IS_SWAPFSVP(pp->p_vnode)) { 1343d3d50737SRafael Vanoni CPU_STATS_ADDQ(cpup, vm, anonpgin, btopr(len)); 13449468939eSJerry Jelinek atomic_add_64(&curzone->zone_anonpgin, 13459468939eSJerry Jelinek btopr(len)); 13467c478bd9Sstevel@tonic-gate } else { 13477c478bd9Sstevel@tonic-gate if (pp->p_vnode->v_flag & VVMEXEC) { 13487c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, execpgin, 1349d3d50737SRafael Vanoni btopr(len)); 13509468939eSJerry Jelinek atomic_add_64(&curzone->zone_execpgin, 13519468939eSJerry Jelinek btopr(len)); 13527c478bd9Sstevel@tonic-gate } else { 13537c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, fspgin, 1354d3d50737SRafael Vanoni btopr(len)); 13559468939eSJerry Jelinek atomic_add_64(&curzone->zone_fspgin, 13569468939eSJerry Jelinek btopr(len)); 13577c478bd9Sstevel@tonic-gate } 13587c478bd9Sstevel@tonic-gate } 13597c478bd9Sstevel@tonic-gate } 13607c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 13617c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_WS_IN, 13627c478bd9Sstevel@tonic-gate "page_ws_in:pp %p", pp); 13637c478bd9Sstevel@tonic-gate /* Kernel probe */ 13647c478bd9Sstevel@tonic-gate TNF_PROBE_3(pagein, "vm pageio io", /* CSTYLED */, 1365d3d50737SRafael Vanoni tnf_opaque, vnode, pp->p_vnode, 1366d3d50737SRafael Vanoni tnf_offset, offset, pp->p_offset, 1367d3d50737SRafael Vanoni tnf_size, size, len); 13687c478bd9Sstevel@tonic-gate } 13697c478bd9Sstevel@tonic-gate 13707c478bd9Sstevel@tonic-gate bp = kmem_zalloc(sizeof (struct buf), KM_SLEEP); 13717c478bd9Sstevel@tonic-gate bp->b_bcount = len; 13727c478bd9Sstevel@tonic-gate bp->b_bufsize = len; 13737c478bd9Sstevel@tonic-gate bp->b_pages = pp; 13747c478bd9Sstevel@tonic-gate bp->b_flags = B_PAGEIO | B_NOCACHE | B_BUSY | flags; 13757c478bd9Sstevel@tonic-gate bp->b_offset = -1; 13767c478bd9Sstevel@tonic-gate sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 13777c478bd9Sstevel@tonic-gate 13787c478bd9Sstevel@tonic-gate /* Initialize bp->b_sem in "locked" state */ 13797c478bd9Sstevel@tonic-gate sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 13807c478bd9Sstevel@tonic-gate 13817c478bd9Sstevel@tonic-gate VN_HOLD(vp); 13827c478bd9Sstevel@tonic-gate bp->b_vp = vp; 13837c478bd9Sstevel@tonic-gate THREAD_KPRI_RELEASE_N(btopr(len)); /* release kpri from page_locks */ 13847c478bd9Sstevel@tonic-gate 13857c478bd9Sstevel@tonic-gate /* 13867c478bd9Sstevel@tonic-gate * Caller sets dev & blkno and can adjust 13877c478bd9Sstevel@tonic-gate * b_addr for page offset and can use bp_mapin 13887c478bd9Sstevel@tonic-gate * to make pages kernel addressable. 13897c478bd9Sstevel@tonic-gate */ 13907c478bd9Sstevel@tonic-gate return (bp); 13917c478bd9Sstevel@tonic-gate } 13927c478bd9Sstevel@tonic-gate 13937c478bd9Sstevel@tonic-gate void 13947c478bd9Sstevel@tonic-gate pageio_done(struct buf *bp) 13957c478bd9Sstevel@tonic-gate { 13967c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 13977c478bd9Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED) 13987c478bd9Sstevel@tonic-gate bp_mapout(bp); 13997c478bd9Sstevel@tonic-gate VN_RELE(bp->b_vp); 14007c478bd9Sstevel@tonic-gate bp->b_vp = NULL; 14017c478bd9Sstevel@tonic-gate ASSERT((bp->b_flags & B_NOCACHE) != 0); 14027c478bd9Sstevel@tonic-gate 14037c478bd9Sstevel@tonic-gate /* A sema_v(bp->b_sem) is implied if we are destroying it */ 14047c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_sem); 14057c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_io); 14067c478bd9Sstevel@tonic-gate kmem_free(bp, sizeof (struct buf)); 14077c478bd9Sstevel@tonic-gate } 14087c478bd9Sstevel@tonic-gate 14097c478bd9Sstevel@tonic-gate /* 14107c478bd9Sstevel@tonic-gate * Check to see whether the buffers, except the one pointed by sbp, 14117c478bd9Sstevel@tonic-gate * associated with the device are busy. 14127c478bd9Sstevel@tonic-gate * NOTE: This expensive operation shall be improved together with ufs_icheck(). 14137c478bd9Sstevel@tonic-gate */ 14147c478bd9Sstevel@tonic-gate int 14157c478bd9Sstevel@tonic-gate bcheck(dev_t dev, struct buf *sbp) 14167c478bd9Sstevel@tonic-gate { 14177c478bd9Sstevel@tonic-gate struct buf *bp; 14187c478bd9Sstevel@tonic-gate struct buf *dp; 14197c478bd9Sstevel@tonic-gate int i; 14207c478bd9Sstevel@tonic-gate kmutex_t *hmp; 14217c478bd9Sstevel@tonic-gate 14227c478bd9Sstevel@tonic-gate /* 14237c478bd9Sstevel@tonic-gate * check for busy bufs for this filesystem 14247c478bd9Sstevel@tonic-gate */ 14257c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_hbuf; i++) { 14267c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[i]; 14277c478bd9Sstevel@tonic-gate hmp = &hbuf[i].b_lock; 14287c478bd9Sstevel@tonic-gate 14297c478bd9Sstevel@tonic-gate mutex_enter(hmp); 14307c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 14317c478bd9Sstevel@tonic-gate /* 14327c478bd9Sstevel@tonic-gate * if buf is busy or dirty, then filesystem is busy 14337c478bd9Sstevel@tonic-gate */ 14347c478bd9Sstevel@tonic-gate if ((bp->b_edev == dev) && 14357c478bd9Sstevel@tonic-gate ((bp->b_flags & B_STALE) == 0) && 14367c478bd9Sstevel@tonic-gate (bp->b_flags & (B_DELWRI|B_BUSY)) && 14377c478bd9Sstevel@tonic-gate (bp != sbp)) { 14387c478bd9Sstevel@tonic-gate mutex_exit(hmp); 14397c478bd9Sstevel@tonic-gate return (1); 14407c478bd9Sstevel@tonic-gate } 14417c478bd9Sstevel@tonic-gate } 14427c478bd9Sstevel@tonic-gate mutex_exit(hmp); 14437c478bd9Sstevel@tonic-gate } 14447c478bd9Sstevel@tonic-gate return (0); 14457c478bd9Sstevel@tonic-gate } 14467c478bd9Sstevel@tonic-gate 14477c478bd9Sstevel@tonic-gate /* 14487c478bd9Sstevel@tonic-gate * Hash two 32 bit entities. 14497c478bd9Sstevel@tonic-gate */ 14507c478bd9Sstevel@tonic-gate int 14517c478bd9Sstevel@tonic-gate hash2ints(int x, int y) 14527c478bd9Sstevel@tonic-gate { 14537c478bd9Sstevel@tonic-gate int hash = 0; 14547c478bd9Sstevel@tonic-gate 14557c478bd9Sstevel@tonic-gate hash = x - 1; 14567c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (x >> 8)) - 1; 14577c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (x >> 16)) - 1; 14587c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (x >> 24)) - 1; 14597c478bd9Sstevel@tonic-gate hash = ((hash * 7) + y) - 1; 14607c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (y >> 8)) - 1; 14617c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (y >> 16)) - 1; 14627c478bd9Sstevel@tonic-gate hash = ((hash * 7) + (y >> 24)) - 1; 14637c478bd9Sstevel@tonic-gate 14647c478bd9Sstevel@tonic-gate return (hash); 14657c478bd9Sstevel@tonic-gate } 14667c478bd9Sstevel@tonic-gate 14677c478bd9Sstevel@tonic-gate 14687c478bd9Sstevel@tonic-gate /* 14697c478bd9Sstevel@tonic-gate * Return a new buffer struct. 14707c478bd9Sstevel@tonic-gate * Create a new buffer if we haven't gone over our high water 14717c478bd9Sstevel@tonic-gate * mark for memory, otherwise try to get one off the freelist. 14727c478bd9Sstevel@tonic-gate * 14737c478bd9Sstevel@tonic-gate * Returns a locked buf that has no id and is not on any hash or free 14747c478bd9Sstevel@tonic-gate * list. 14757c478bd9Sstevel@tonic-gate */ 14767c478bd9Sstevel@tonic-gate static struct buf * 14777c478bd9Sstevel@tonic-gate bio_getfreeblk(long bsize) 14787c478bd9Sstevel@tonic-gate { 14797c478bd9Sstevel@tonic-gate struct buf *bp, *dp; 14807c478bd9Sstevel@tonic-gate struct hbuf *hp; 14817c478bd9Sstevel@tonic-gate kmutex_t *hmp; 14827c478bd9Sstevel@tonic-gate uint_t start, end; 14837c478bd9Sstevel@tonic-gate 14847c478bd9Sstevel@tonic-gate /* 14857c478bd9Sstevel@tonic-gate * mutex_enter(&bfree_lock); 14867c478bd9Sstevel@tonic-gate * bfreelist.b_bufsize represents the amount of memory 14877c478bd9Sstevel@tonic-gate * mutex_exit(&bfree_lock); protect ref to bfreelist 14887c478bd9Sstevel@tonic-gate * we are allowed to allocate in the cache before we hit our hwm. 14897c478bd9Sstevel@tonic-gate */ 14907c478bd9Sstevel@tonic-gate bio_mem_get(bsize); /* Account for our memory request */ 14917c478bd9Sstevel@tonic-gate 14927c478bd9Sstevel@tonic-gate again: 14937c478bd9Sstevel@tonic-gate bp = bio_bhdr_alloc(); /* Get a buf hdr */ 14947c478bd9Sstevel@tonic-gate sema_p(&bp->b_sem); /* Should never fail */ 14957c478bd9Sstevel@tonic-gate 14967c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == NULL); 14977c478bd9Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_NOSLEEP); 14987c478bd9Sstevel@tonic-gate if (bp->b_un.b_addr != NULL) { 14997c478bd9Sstevel@tonic-gate /* 15007c478bd9Sstevel@tonic-gate * Make the common path short 15017c478bd9Sstevel@tonic-gate */ 15027c478bd9Sstevel@tonic-gate bp->b_bufsize = bsize; 15037c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 15047c478bd9Sstevel@tonic-gate return (bp); 15057c478bd9Sstevel@tonic-gate } else { 15067c478bd9Sstevel@tonic-gate struct buf *save; 15077c478bd9Sstevel@tonic-gate 15087c478bd9Sstevel@tonic-gate save = bp; /* Save bp we allocated */ 15097c478bd9Sstevel@tonic-gate start = end = lastindex; 15107c478bd9Sstevel@tonic-gate 15117c478bd9Sstevel@tonic-gate biostats.bio_bufwant.value.ui32++; 15127c478bd9Sstevel@tonic-gate 15137c478bd9Sstevel@tonic-gate /* 15147c478bd9Sstevel@tonic-gate * Memory isn't available from the system now. Scan 15157c478bd9Sstevel@tonic-gate * the hash buckets till enough space is found. 15167c478bd9Sstevel@tonic-gate */ 15177c478bd9Sstevel@tonic-gate do { 15187c478bd9Sstevel@tonic-gate hp = &hbuf[start]; 15197c478bd9Sstevel@tonic-gate hmp = &hp->b_lock; 15207c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 15217c478bd9Sstevel@tonic-gate 15227c478bd9Sstevel@tonic-gate mutex_enter(hmp); 15237c478bd9Sstevel@tonic-gate bp = dp->av_forw; 15247c478bd9Sstevel@tonic-gate 15257c478bd9Sstevel@tonic-gate while (bp != dp) { 15267c478bd9Sstevel@tonic-gate 15277c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 15287c478bd9Sstevel@tonic-gate 15297c478bd9Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) { 15307c478bd9Sstevel@tonic-gate bp = bp->av_forw; 15317c478bd9Sstevel@tonic-gate continue; 15327c478bd9Sstevel@tonic-gate } 15337c478bd9Sstevel@tonic-gate 15347c478bd9Sstevel@tonic-gate /* 15357c478bd9Sstevel@tonic-gate * Since we are going down the freelist 15367c478bd9Sstevel@tonic-gate * associated with this hash bucket the 15377c478bd9Sstevel@tonic-gate * B_DELWRI flag should not be set. 15387c478bd9Sstevel@tonic-gate */ 15397c478bd9Sstevel@tonic-gate ASSERT(!(bp->b_flags & B_DELWRI)); 15407c478bd9Sstevel@tonic-gate 15417c478bd9Sstevel@tonic-gate if (bp->b_bufsize == bsize) { 15427c478bd9Sstevel@tonic-gate hp->b_length--; 15437c478bd9Sstevel@tonic-gate notavail(bp); 15447c478bd9Sstevel@tonic-gate bremhash(bp); 15457c478bd9Sstevel@tonic-gate mutex_exit(hmp); 15467c478bd9Sstevel@tonic-gate 15477c478bd9Sstevel@tonic-gate /* 15487c478bd9Sstevel@tonic-gate * Didn't kmem_alloc any more, so don't 15497c478bd9Sstevel@tonic-gate * count it twice. 15507c478bd9Sstevel@tonic-gate */ 15517c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 15527c478bd9Sstevel@tonic-gate bfreelist.b_bufsize += bsize; 15537c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 15547c478bd9Sstevel@tonic-gate 15557c478bd9Sstevel@tonic-gate /* 15567c478bd9Sstevel@tonic-gate * Update the lastindex value. 15577c478bd9Sstevel@tonic-gate */ 15587c478bd9Sstevel@tonic-gate lastindex = start; 15597c478bd9Sstevel@tonic-gate 15607c478bd9Sstevel@tonic-gate /* 15617c478bd9Sstevel@tonic-gate * Put our saved bp back on the list 15627c478bd9Sstevel@tonic-gate */ 15637c478bd9Sstevel@tonic-gate sema_v(&save->b_sem); 15647c478bd9Sstevel@tonic-gate bio_bhdr_free(save); 15657c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 15667c478bd9Sstevel@tonic-gate return (bp); 15677c478bd9Sstevel@tonic-gate } 15687c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 15697c478bd9Sstevel@tonic-gate bp = bp->av_forw; 15707c478bd9Sstevel@tonic-gate } 15717c478bd9Sstevel@tonic-gate mutex_exit(hmp); 15727c478bd9Sstevel@tonic-gate start = ((start + 1) % v.v_hbuf); 15737c478bd9Sstevel@tonic-gate } while (start != end); 15747c478bd9Sstevel@tonic-gate 15757c478bd9Sstevel@tonic-gate biostats.bio_bufwait.value.ui32++; 15767c478bd9Sstevel@tonic-gate bp = save; /* Use original bp */ 15777c478bd9Sstevel@tonic-gate bp->b_un.b_addr = kmem_alloc(bsize, KM_SLEEP); 15787c478bd9Sstevel@tonic-gate } 15797c478bd9Sstevel@tonic-gate 15807c478bd9Sstevel@tonic-gate bp->b_bufsize = bsize; 15817c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 15827c478bd9Sstevel@tonic-gate return (bp); 15837c478bd9Sstevel@tonic-gate } 15847c478bd9Sstevel@tonic-gate 15857c478bd9Sstevel@tonic-gate /* 15867c478bd9Sstevel@tonic-gate * Allocate a buffer header. If none currently available, allocate 15877c478bd9Sstevel@tonic-gate * a new pool. 15887c478bd9Sstevel@tonic-gate */ 15897c478bd9Sstevel@tonic-gate static struct buf * 15907c478bd9Sstevel@tonic-gate bio_bhdr_alloc(void) 15917c478bd9Sstevel@tonic-gate { 15927c478bd9Sstevel@tonic-gate struct buf *dp, *sdp; 15937c478bd9Sstevel@tonic-gate struct buf *bp; 15947c478bd9Sstevel@tonic-gate int i; 15957c478bd9Sstevel@tonic-gate 15967c478bd9Sstevel@tonic-gate for (;;) { 15977c478bd9Sstevel@tonic-gate mutex_enter(&bhdr_lock); 15987c478bd9Sstevel@tonic-gate if (bhdrlist != NULL) { 15997c478bd9Sstevel@tonic-gate bp = bhdrlist; 16007c478bd9Sstevel@tonic-gate bhdrlist = bp->av_forw; 16017c478bd9Sstevel@tonic-gate mutex_exit(&bhdr_lock); 16027c478bd9Sstevel@tonic-gate bp->av_forw = NULL; 16037c478bd9Sstevel@tonic-gate return (bp); 16047c478bd9Sstevel@tonic-gate } 16057c478bd9Sstevel@tonic-gate mutex_exit(&bhdr_lock); 16067c478bd9Sstevel@tonic-gate 16077c478bd9Sstevel@tonic-gate /* 16087c478bd9Sstevel@tonic-gate * Need to allocate a new pool. If the system is currently 16097c478bd9Sstevel@tonic-gate * out of memory, then try freeing things on the freelist. 16107c478bd9Sstevel@tonic-gate */ 16117c478bd9Sstevel@tonic-gate dp = kmem_zalloc(sizeof (struct buf) * v.v_buf, KM_NOSLEEP); 16127c478bd9Sstevel@tonic-gate if (dp == NULL) { 16137c478bd9Sstevel@tonic-gate /* 16147c478bd9Sstevel@tonic-gate * System can't give us a pool of headers, try 16157c478bd9Sstevel@tonic-gate * recycling from the free lists. 16167c478bd9Sstevel@tonic-gate */ 16177c478bd9Sstevel@tonic-gate bio_recycle(BIO_HEADER, 0); 16187c478bd9Sstevel@tonic-gate } else { 16197c478bd9Sstevel@tonic-gate sdp = dp; 16207c478bd9Sstevel@tonic-gate for (i = 0; i < v.v_buf; i++, dp++) { 16217c478bd9Sstevel@tonic-gate /* 16227c478bd9Sstevel@tonic-gate * The next two lines are needed since NODEV 16237c478bd9Sstevel@tonic-gate * is -1 and not NULL 16247c478bd9Sstevel@tonic-gate */ 16257c478bd9Sstevel@tonic-gate dp->b_dev = (o_dev_t)NODEV; 16267c478bd9Sstevel@tonic-gate dp->b_edev = NODEV; 16277c478bd9Sstevel@tonic-gate dp->av_forw = dp + 1; 16287c478bd9Sstevel@tonic-gate sema_init(&dp->b_sem, 1, NULL, SEMA_DEFAULT, 16297c478bd9Sstevel@tonic-gate NULL); 16307c478bd9Sstevel@tonic-gate sema_init(&dp->b_io, 0, NULL, SEMA_DEFAULT, 16317c478bd9Sstevel@tonic-gate NULL); 16327c478bd9Sstevel@tonic-gate dp->b_offset = -1; 16337c478bd9Sstevel@tonic-gate } 16347c478bd9Sstevel@tonic-gate mutex_enter(&bhdr_lock); 16357c478bd9Sstevel@tonic-gate (--dp)->av_forw = bhdrlist; /* Fix last pointer */ 16367c478bd9Sstevel@tonic-gate bhdrlist = sdp; 16377c478bd9Sstevel@tonic-gate nbuf += v.v_buf; 16387c478bd9Sstevel@tonic-gate bp = bhdrlist; 16397c478bd9Sstevel@tonic-gate bhdrlist = bp->av_forw; 16407c478bd9Sstevel@tonic-gate mutex_exit(&bhdr_lock); 16417c478bd9Sstevel@tonic-gate 16427c478bd9Sstevel@tonic-gate bp->av_forw = NULL; 16437c478bd9Sstevel@tonic-gate return (bp); 16447c478bd9Sstevel@tonic-gate } 16457c478bd9Sstevel@tonic-gate } 16467c478bd9Sstevel@tonic-gate } 16477c478bd9Sstevel@tonic-gate 16487c478bd9Sstevel@tonic-gate static void 16497c478bd9Sstevel@tonic-gate bio_bhdr_free(struct buf *bp) 16507c478bd9Sstevel@tonic-gate { 16517c478bd9Sstevel@tonic-gate ASSERT(bp->b_back == NULL); 16527c478bd9Sstevel@tonic-gate ASSERT(bp->b_forw == NULL); 16537c478bd9Sstevel@tonic-gate ASSERT(bp->av_back == NULL); 16547c478bd9Sstevel@tonic-gate ASSERT(bp->av_forw == NULL); 16557c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == NULL); 16567c478bd9Sstevel@tonic-gate ASSERT(bp->b_dev == (o_dev_t)NODEV); 16577c478bd9Sstevel@tonic-gate ASSERT(bp->b_edev == NODEV); 16587c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags == 0); 16597c478bd9Sstevel@tonic-gate 16607c478bd9Sstevel@tonic-gate mutex_enter(&bhdr_lock); 16617c478bd9Sstevel@tonic-gate bp->av_forw = bhdrlist; 16627c478bd9Sstevel@tonic-gate bhdrlist = bp; 16637c478bd9Sstevel@tonic-gate mutex_exit(&bhdr_lock); 16647c478bd9Sstevel@tonic-gate } 16657c478bd9Sstevel@tonic-gate 16667c478bd9Sstevel@tonic-gate /* 16677c478bd9Sstevel@tonic-gate * If we haven't gone over the high water mark, it's o.k. to 16687c478bd9Sstevel@tonic-gate * allocate more buffer space, otherwise recycle buffers 16697c478bd9Sstevel@tonic-gate * from the freelist until enough memory is free for a bsize request. 16707c478bd9Sstevel@tonic-gate * 16717c478bd9Sstevel@tonic-gate * We account for this memory, even though 16727c478bd9Sstevel@tonic-gate * we don't allocate it here. 16737c478bd9Sstevel@tonic-gate */ 16747c478bd9Sstevel@tonic-gate static void 16757c478bd9Sstevel@tonic-gate bio_mem_get(long bsize) 16767c478bd9Sstevel@tonic-gate { 16777c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 16787c478bd9Sstevel@tonic-gate if (bfreelist.b_bufsize > bsize) { 16797c478bd9Sstevel@tonic-gate bfreelist.b_bufsize -= bsize; 16807c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 16817c478bd9Sstevel@tonic-gate return; 16827c478bd9Sstevel@tonic-gate } 16837c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 16847c478bd9Sstevel@tonic-gate bio_recycle(BIO_MEM, bsize); 16857c478bd9Sstevel@tonic-gate } 16867c478bd9Sstevel@tonic-gate 16877c478bd9Sstevel@tonic-gate /* 16887c478bd9Sstevel@tonic-gate * flush a list of delayed write buffers. 16897c478bd9Sstevel@tonic-gate * (currently used only by bio_recycle below.) 16907c478bd9Sstevel@tonic-gate */ 16917c478bd9Sstevel@tonic-gate static void 16927c478bd9Sstevel@tonic-gate bio_flushlist(struct buf *delwri_list) 16937c478bd9Sstevel@tonic-gate { 16947c478bd9Sstevel@tonic-gate struct buf *bp; 16957c478bd9Sstevel@tonic-gate 16967c478bd9Sstevel@tonic-gate while (delwri_list != EMPTY_LIST) { 16977c478bd9Sstevel@tonic-gate bp = delwri_list; 16987c478bd9Sstevel@tonic-gate bp->b_flags |= B_AGE | B_ASYNC; 16997c478bd9Sstevel@tonic-gate if (bp->b_vp == NULL) { /* !ufs */ 17007c478bd9Sstevel@tonic-gate BWRITE(bp); 17017c478bd9Sstevel@tonic-gate } else { /* ufs */ 17027c478bd9Sstevel@tonic-gate UFS_BWRITE(VTOI(bp->b_vp)->i_ufsvfs, bp); 17037c478bd9Sstevel@tonic-gate } 17047c478bd9Sstevel@tonic-gate delwri_list = bp->b_list; 17057c478bd9Sstevel@tonic-gate bp->b_list = NULL; 17067c478bd9Sstevel@tonic-gate } 17077c478bd9Sstevel@tonic-gate } 17087c478bd9Sstevel@tonic-gate 17097c478bd9Sstevel@tonic-gate /* 17107c478bd9Sstevel@tonic-gate * Start recycling buffers on the freelist for one of 2 reasons: 17117c478bd9Sstevel@tonic-gate * - we need a buffer header 17127c478bd9Sstevel@tonic-gate * - we need to free up memory 17137c478bd9Sstevel@tonic-gate * Once started we continue to recycle buffers until the B_AGE 17147c478bd9Sstevel@tonic-gate * buffers are gone. 17157c478bd9Sstevel@tonic-gate */ 17167c478bd9Sstevel@tonic-gate static void 17177c478bd9Sstevel@tonic-gate bio_recycle(int want, long bsize) 17187c478bd9Sstevel@tonic-gate { 17197c478bd9Sstevel@tonic-gate struct buf *bp, *dp, *dwp, *nbp; 17207c478bd9Sstevel@tonic-gate struct hbuf *hp; 17217c478bd9Sstevel@tonic-gate int found = 0; 17227c478bd9Sstevel@tonic-gate kmutex_t *hmp; 17237c478bd9Sstevel@tonic-gate int start, end; 17247c478bd9Sstevel@tonic-gate struct buf *delwri_list = EMPTY_LIST; 17257c478bd9Sstevel@tonic-gate 17267c478bd9Sstevel@tonic-gate /* 17277c478bd9Sstevel@tonic-gate * Recycle buffers. 17287c478bd9Sstevel@tonic-gate */ 17297c478bd9Sstevel@tonic-gate top: 17307c478bd9Sstevel@tonic-gate start = end = lastindex; 17317c478bd9Sstevel@tonic-gate do { 17327c478bd9Sstevel@tonic-gate hp = &hbuf[start]; 17337c478bd9Sstevel@tonic-gate hmp = &hp->b_lock; 17347c478bd9Sstevel@tonic-gate dp = (struct buf *)hp; 17357c478bd9Sstevel@tonic-gate 17367c478bd9Sstevel@tonic-gate mutex_enter(hmp); 17377c478bd9Sstevel@tonic-gate bp = dp->av_forw; 17387c478bd9Sstevel@tonic-gate 17397c478bd9Sstevel@tonic-gate while (bp != dp) { 17407c478bd9Sstevel@tonic-gate 17417c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 17427c478bd9Sstevel@tonic-gate 17437c478bd9Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) { 17447c478bd9Sstevel@tonic-gate bp = bp->av_forw; 17457c478bd9Sstevel@tonic-gate continue; 17467c478bd9Sstevel@tonic-gate } 17477c478bd9Sstevel@tonic-gate /* 17487c478bd9Sstevel@tonic-gate * Do we really want to nuke all of the B_AGE stuff?? 17497c478bd9Sstevel@tonic-gate */ 17507c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_AGE) == 0 && found) { 17517c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 17527c478bd9Sstevel@tonic-gate mutex_exit(hmp); 17537c478bd9Sstevel@tonic-gate lastindex = start; 17547c478bd9Sstevel@tonic-gate return; /* All done */ 17557c478bd9Sstevel@tonic-gate } 17567c478bd9Sstevel@tonic-gate 17577c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&hp->b_lock)); 17587c478bd9Sstevel@tonic-gate ASSERT(!(bp->b_flags & B_DELWRI)); 17597c478bd9Sstevel@tonic-gate hp->b_length--; 17607c478bd9Sstevel@tonic-gate notavail(bp); 17617c478bd9Sstevel@tonic-gate 17627c478bd9Sstevel@tonic-gate /* 17637c478bd9Sstevel@tonic-gate * Remove bhdr from cache, free up memory, 17647c478bd9Sstevel@tonic-gate * and add the hdr to the freelist. 17657c478bd9Sstevel@tonic-gate */ 17667c478bd9Sstevel@tonic-gate bremhash(bp); 17677c478bd9Sstevel@tonic-gate mutex_exit(hmp); 17687c478bd9Sstevel@tonic-gate 17697c478bd9Sstevel@tonic-gate if (bp->b_bufsize) { 17707c478bd9Sstevel@tonic-gate kmem_free(bp->b_un.b_addr, bp->b_bufsize); 17717c478bd9Sstevel@tonic-gate bp->b_un.b_addr = NULL; 17727c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 17737c478bd9Sstevel@tonic-gate bfreelist.b_bufsize += bp->b_bufsize; 17747c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 17757c478bd9Sstevel@tonic-gate } 17767c478bd9Sstevel@tonic-gate 17777c478bd9Sstevel@tonic-gate bp->b_dev = (o_dev_t)NODEV; 17787c478bd9Sstevel@tonic-gate bp->b_edev = NODEV; 17797c478bd9Sstevel@tonic-gate bp->b_flags = 0; 17807c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 17817c478bd9Sstevel@tonic-gate bio_bhdr_free(bp); 17827c478bd9Sstevel@tonic-gate if (want == BIO_HEADER) { 17837c478bd9Sstevel@tonic-gate found = 1; 17847c478bd9Sstevel@tonic-gate } else { 17857c478bd9Sstevel@tonic-gate ASSERT(want == BIO_MEM); 17867c478bd9Sstevel@tonic-gate if (!found && bfreelist.b_bufsize >= bsize) { 17877c478bd9Sstevel@tonic-gate /* Account for the memory we want */ 17887c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 17897c478bd9Sstevel@tonic-gate if (bfreelist.b_bufsize >= bsize) { 17907c478bd9Sstevel@tonic-gate bfreelist.b_bufsize -= bsize; 17917c478bd9Sstevel@tonic-gate found = 1; 17927c478bd9Sstevel@tonic-gate } 17937c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 17947c478bd9Sstevel@tonic-gate } 17957c478bd9Sstevel@tonic-gate } 17967c478bd9Sstevel@tonic-gate 17977c478bd9Sstevel@tonic-gate /* 17987c478bd9Sstevel@tonic-gate * Since we dropped hmp start from the 17997c478bd9Sstevel@tonic-gate * begining. 18007c478bd9Sstevel@tonic-gate */ 18017c478bd9Sstevel@tonic-gate mutex_enter(hmp); 18027c478bd9Sstevel@tonic-gate bp = dp->av_forw; 18037c478bd9Sstevel@tonic-gate } 18047c478bd9Sstevel@tonic-gate mutex_exit(hmp); 18057c478bd9Sstevel@tonic-gate 18067c478bd9Sstevel@tonic-gate /* 18077c478bd9Sstevel@tonic-gate * Look at the delayed write list. 18087c478bd9Sstevel@tonic-gate * First gather into a private list, then write them. 18097c478bd9Sstevel@tonic-gate */ 18107c478bd9Sstevel@tonic-gate dwp = (struct buf *)&dwbuf[start]; 18117c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 18127c478bd9Sstevel@tonic-gate bio_doingflush++; 18137c478bd9Sstevel@tonic-gate mutex_enter(hmp); 18147c478bd9Sstevel@tonic-gate for (bp = dwp->av_forw; bp != dwp; bp = nbp) { 18157c478bd9Sstevel@tonic-gate 18167c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 18177c478bd9Sstevel@tonic-gate nbp = bp->av_forw; 18187c478bd9Sstevel@tonic-gate 18197c478bd9Sstevel@tonic-gate if (!sema_tryp(&bp->b_sem)) 18207c478bd9Sstevel@tonic-gate continue; 18217c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags & B_DELWRI); 18227c478bd9Sstevel@tonic-gate /* 18237c478bd9Sstevel@tonic-gate * Do we really want to nuke all of the B_AGE stuff?? 18247c478bd9Sstevel@tonic-gate */ 18257c478bd9Sstevel@tonic-gate 18267c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_AGE) == 0 && found) { 18277c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 18287c478bd9Sstevel@tonic-gate mutex_exit(hmp); 18297c478bd9Sstevel@tonic-gate lastindex = start; 18307c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 18317c478bd9Sstevel@tonic-gate bio_flushlist(delwri_list); 18327c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 18337c478bd9Sstevel@tonic-gate bio_doingflush--; 18347c478bd9Sstevel@tonic-gate if (bio_flinv_cv_wanted) { 18357c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 0; 18367c478bd9Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv); 18377c478bd9Sstevel@tonic-gate } 18387c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 18397c478bd9Sstevel@tonic-gate return; /* All done */ 18407c478bd9Sstevel@tonic-gate } 18417c478bd9Sstevel@tonic-gate 18427c478bd9Sstevel@tonic-gate /* 18437c478bd9Sstevel@tonic-gate * If the buffer is already on a flush or 18447c478bd9Sstevel@tonic-gate * invalidate list then just skip it. 18457c478bd9Sstevel@tonic-gate */ 18467c478bd9Sstevel@tonic-gate if (bp->b_list != NULL) { 18477c478bd9Sstevel@tonic-gate sema_v(&bp->b_sem); 18487c478bd9Sstevel@tonic-gate continue; 18497c478bd9Sstevel@tonic-gate } 18507c478bd9Sstevel@tonic-gate /* 18517c478bd9Sstevel@tonic-gate * We are still on the same bucket. 18527c478bd9Sstevel@tonic-gate */ 18537c478bd9Sstevel@tonic-gate hp->b_length--; 18547c478bd9Sstevel@tonic-gate notavail(bp); 18557c478bd9Sstevel@tonic-gate bp->b_list = delwri_list; 18567c478bd9Sstevel@tonic-gate delwri_list = bp; 18577c478bd9Sstevel@tonic-gate } 18587c478bd9Sstevel@tonic-gate mutex_exit(hmp); 18597c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 18607c478bd9Sstevel@tonic-gate bio_flushlist(delwri_list); 18617c478bd9Sstevel@tonic-gate delwri_list = EMPTY_LIST; 18627c478bd9Sstevel@tonic-gate mutex_enter(&blist_lock); 18637c478bd9Sstevel@tonic-gate bio_doingflush--; 18647c478bd9Sstevel@tonic-gate if (bio_flinv_cv_wanted) { 18657c478bd9Sstevel@tonic-gate bio_flinv_cv_wanted = 0; 18667c478bd9Sstevel@tonic-gate cv_broadcast(&bio_flushinval_cv); 18677c478bd9Sstevel@tonic-gate } 18687c478bd9Sstevel@tonic-gate mutex_exit(&blist_lock); 18697c478bd9Sstevel@tonic-gate start = (start + 1) % v.v_hbuf; 18707c478bd9Sstevel@tonic-gate 18717c478bd9Sstevel@tonic-gate } while (start != end); 18727c478bd9Sstevel@tonic-gate 18737c478bd9Sstevel@tonic-gate if (found) 18747c478bd9Sstevel@tonic-gate return; 18757c478bd9Sstevel@tonic-gate 18767c478bd9Sstevel@tonic-gate /* 18777c478bd9Sstevel@tonic-gate * Free lists exhausted and we haven't satisfied the request. 18787c478bd9Sstevel@tonic-gate * Wait here for more entries to be added to freelist. 18797c478bd9Sstevel@tonic-gate * Because this might have just happened, make it timed. 18807c478bd9Sstevel@tonic-gate */ 18817c478bd9Sstevel@tonic-gate mutex_enter(&bfree_lock); 18827c478bd9Sstevel@tonic-gate bfreelist.b_flags |= B_WANTED; 1883d3d50737SRafael Vanoni (void) cv_reltimedwait(&bio_mem_cv, &bfree_lock, hz, TR_CLOCK_TICK); 18847c478bd9Sstevel@tonic-gate mutex_exit(&bfree_lock); 18857c478bd9Sstevel@tonic-gate goto top; 18867c478bd9Sstevel@tonic-gate } 18877c478bd9Sstevel@tonic-gate 18887c478bd9Sstevel@tonic-gate /* 18897c478bd9Sstevel@tonic-gate * See if the block is associated with some buffer 18907c478bd9Sstevel@tonic-gate * (mainly to avoid getting hung up on a wait in breada). 18917c478bd9Sstevel@tonic-gate */ 18927c478bd9Sstevel@tonic-gate static int 18937c478bd9Sstevel@tonic-gate bio_incore(dev_t dev, daddr_t blkno) 18947c478bd9Sstevel@tonic-gate { 18957c478bd9Sstevel@tonic-gate struct buf *bp; 18967c478bd9Sstevel@tonic-gate struct buf *dp; 18977c478bd9Sstevel@tonic-gate uint_t index; 18987c478bd9Sstevel@tonic-gate kmutex_t *hmp; 18997c478bd9Sstevel@tonic-gate 19007c478bd9Sstevel@tonic-gate index = bio_bhash(dev, blkno); 19017c478bd9Sstevel@tonic-gate dp = (struct buf *)&hbuf[index]; 19027c478bd9Sstevel@tonic-gate hmp = &hbuf[index].b_lock; 19037c478bd9Sstevel@tonic-gate 19047c478bd9Sstevel@tonic-gate mutex_enter(hmp); 19057c478bd9Sstevel@tonic-gate for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 19067c478bd9Sstevel@tonic-gate if (bp->b_blkno == blkno && bp->b_edev == dev && 19077c478bd9Sstevel@tonic-gate (bp->b_flags & B_STALE) == 0) { 19087c478bd9Sstevel@tonic-gate mutex_exit(hmp); 19097c478bd9Sstevel@tonic-gate return (1); 19107c478bd9Sstevel@tonic-gate } 19117c478bd9Sstevel@tonic-gate } 19127c478bd9Sstevel@tonic-gate mutex_exit(hmp); 19137c478bd9Sstevel@tonic-gate return (0); 19147c478bd9Sstevel@tonic-gate } 19157c478bd9Sstevel@tonic-gate 19167c478bd9Sstevel@tonic-gate static void 19177c478bd9Sstevel@tonic-gate bio_pageio_done(struct buf *bp) 19187c478bd9Sstevel@tonic-gate { 19197c478bd9Sstevel@tonic-gate if (bp->b_flags & B_PAGEIO) { 19207c478bd9Sstevel@tonic-gate 19217c478bd9Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED) 19227c478bd9Sstevel@tonic-gate bp_mapout(bp); 19237c478bd9Sstevel@tonic-gate 19247c478bd9Sstevel@tonic-gate if (bp->b_flags & B_READ) 19257c478bd9Sstevel@tonic-gate pvn_read_done(bp->b_pages, bp->b_flags); 19267c478bd9Sstevel@tonic-gate else 19277c478bd9Sstevel@tonic-gate pvn_write_done(bp->b_pages, B_WRITE | bp->b_flags); 19287c478bd9Sstevel@tonic-gate pageio_done(bp); 19297c478bd9Sstevel@tonic-gate } else { 19307c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags & B_REMAPPED); 19317c478bd9Sstevel@tonic-gate bp_mapout(bp); 19327c478bd9Sstevel@tonic-gate brelse(bp); 19337c478bd9Sstevel@tonic-gate } 19347c478bd9Sstevel@tonic-gate } 19357c478bd9Sstevel@tonic-gate 19367c478bd9Sstevel@tonic-gate /* 19377c478bd9Sstevel@tonic-gate * bioerror(9F) - indicate error in buffer header 19387c478bd9Sstevel@tonic-gate * If 'error' is zero, remove the error indication. 19397c478bd9Sstevel@tonic-gate */ 19407c478bd9Sstevel@tonic-gate void 19417c478bd9Sstevel@tonic-gate bioerror(struct buf *bp, int error) 19427c478bd9Sstevel@tonic-gate { 19437c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 19447c478bd9Sstevel@tonic-gate ASSERT(error >= 0); 19457c478bd9Sstevel@tonic-gate ASSERT(SEMA_HELD(&bp->b_sem)); 19467c478bd9Sstevel@tonic-gate 19477c478bd9Sstevel@tonic-gate if (error != 0) { 19487c478bd9Sstevel@tonic-gate bp->b_flags |= B_ERROR; 19497c478bd9Sstevel@tonic-gate } else { 19507c478bd9Sstevel@tonic-gate bp->b_flags &= ~B_ERROR; 19517c478bd9Sstevel@tonic-gate } 19527c478bd9Sstevel@tonic-gate bp->b_error = error; 19537c478bd9Sstevel@tonic-gate } 19547c478bd9Sstevel@tonic-gate 19557c478bd9Sstevel@tonic-gate /* 19567c478bd9Sstevel@tonic-gate * bioreset(9F) - reuse a private buffer header after I/O is complete 19577c478bd9Sstevel@tonic-gate */ 19587c478bd9Sstevel@tonic-gate void 19597c478bd9Sstevel@tonic-gate bioreset(struct buf *bp) 19607c478bd9Sstevel@tonic-gate { 19617c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 19627c478bd9Sstevel@tonic-gate 19637c478bd9Sstevel@tonic-gate biofini(bp); 19647c478bd9Sstevel@tonic-gate bioinit(bp); 19657c478bd9Sstevel@tonic-gate } 19667c478bd9Sstevel@tonic-gate 19677c478bd9Sstevel@tonic-gate /* 19687c478bd9Sstevel@tonic-gate * biosize(9F) - return size of a buffer header 19697c478bd9Sstevel@tonic-gate */ 19707c478bd9Sstevel@tonic-gate size_t 19717c478bd9Sstevel@tonic-gate biosize(void) 19727c478bd9Sstevel@tonic-gate { 19737c478bd9Sstevel@tonic-gate return (sizeof (struct buf)); 19747c478bd9Sstevel@tonic-gate } 19757c478bd9Sstevel@tonic-gate 19767c478bd9Sstevel@tonic-gate /* 19777c478bd9Sstevel@tonic-gate * biomodified(9F) - check if buffer is modified 19787c478bd9Sstevel@tonic-gate */ 19797c478bd9Sstevel@tonic-gate int 19807c478bd9Sstevel@tonic-gate biomodified(struct buf *bp) 19817c478bd9Sstevel@tonic-gate { 19827c478bd9Sstevel@tonic-gate int npf; 19837c478bd9Sstevel@tonic-gate int ppattr; 19847c478bd9Sstevel@tonic-gate struct page *pp; 19857c478bd9Sstevel@tonic-gate 19867c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 19877c478bd9Sstevel@tonic-gate 19887c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_PAGEIO) == 0) { 19897c478bd9Sstevel@tonic-gate return (-1); 19907c478bd9Sstevel@tonic-gate } 19917c478bd9Sstevel@tonic-gate pp = bp->b_pages; 19927c478bd9Sstevel@tonic-gate npf = btopr(bp->b_bcount + ((uintptr_t)bp->b_un.b_addr & PAGEOFFSET)); 19937c478bd9Sstevel@tonic-gate 19947c478bd9Sstevel@tonic-gate while (npf > 0) { 19957c478bd9Sstevel@tonic-gate ppattr = hat_pagesync(pp, HAT_SYNC_DONTZERO | 1996d3d50737SRafael Vanoni HAT_SYNC_STOPON_MOD); 19977c478bd9Sstevel@tonic-gate if (ppattr & P_MOD) 19987c478bd9Sstevel@tonic-gate return (1); 19997c478bd9Sstevel@tonic-gate pp = pp->p_next; 20007c478bd9Sstevel@tonic-gate npf--; 20017c478bd9Sstevel@tonic-gate } 20027c478bd9Sstevel@tonic-gate 20037c478bd9Sstevel@tonic-gate return (0); 20047c478bd9Sstevel@tonic-gate } 20057c478bd9Sstevel@tonic-gate 20067c478bd9Sstevel@tonic-gate /* 20077c478bd9Sstevel@tonic-gate * bioinit(9F) - initialize a buffer structure 20087c478bd9Sstevel@tonic-gate */ 20097c478bd9Sstevel@tonic-gate void 20107c478bd9Sstevel@tonic-gate bioinit(struct buf *bp) 20117c478bd9Sstevel@tonic-gate { 20127c478bd9Sstevel@tonic-gate bzero(bp, sizeof (struct buf)); 20137c478bd9Sstevel@tonic-gate sema_init(&bp->b_sem, 0, NULL, SEMA_DEFAULT, NULL); 20147c478bd9Sstevel@tonic-gate sema_init(&bp->b_io, 0, NULL, SEMA_DEFAULT, NULL); 20157c478bd9Sstevel@tonic-gate bp->b_offset = -1; 20167c478bd9Sstevel@tonic-gate } 20177c478bd9Sstevel@tonic-gate 20187c478bd9Sstevel@tonic-gate /* 20197c478bd9Sstevel@tonic-gate * biofini(9F) - uninitialize a buffer structure 20207c478bd9Sstevel@tonic-gate */ 20217c478bd9Sstevel@tonic-gate void 20227c478bd9Sstevel@tonic-gate biofini(struct buf *bp) 20237c478bd9Sstevel@tonic-gate { 20247c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_io); 20257c478bd9Sstevel@tonic-gate sema_destroy(&bp->b_sem); 20267c478bd9Sstevel@tonic-gate } 20277c478bd9Sstevel@tonic-gate 20287c478bd9Sstevel@tonic-gate /* 20297c478bd9Sstevel@tonic-gate * bioclone(9F) - clone a buffer 20307c478bd9Sstevel@tonic-gate */ 20317c478bd9Sstevel@tonic-gate struct buf * 20327c478bd9Sstevel@tonic-gate bioclone(struct buf *bp, off_t off, size_t len, dev_t dev, daddr_t blkno, 20337c478bd9Sstevel@tonic-gate int (*iodone)(struct buf *), struct buf *bp_mem, int sleep) 20347c478bd9Sstevel@tonic-gate { 20357c478bd9Sstevel@tonic-gate struct buf *bufp; 20367c478bd9Sstevel@tonic-gate 20377c478bd9Sstevel@tonic-gate ASSERT(bp); 20387c478bd9Sstevel@tonic-gate if (bp_mem == NULL) { 20397c478bd9Sstevel@tonic-gate bufp = kmem_alloc(sizeof (struct buf), sleep); 20407c478bd9Sstevel@tonic-gate if (bufp == NULL) { 20417c478bd9Sstevel@tonic-gate return (NULL); 20427c478bd9Sstevel@tonic-gate } 20437c478bd9Sstevel@tonic-gate bioinit(bufp); 20447c478bd9Sstevel@tonic-gate } else { 20457c478bd9Sstevel@tonic-gate bufp = bp_mem; 20467c478bd9Sstevel@tonic-gate bioreset(bufp); 20477c478bd9Sstevel@tonic-gate } 20487c478bd9Sstevel@tonic-gate 20497c478bd9Sstevel@tonic-gate #define BUF_CLONE_FLAGS (B_READ|B_WRITE|B_SHADOW|B_PHYS|B_PAGEIO|B_FAILFAST|\ 20507c478bd9Sstevel@tonic-gate B_ABRWRITE) 20517c478bd9Sstevel@tonic-gate 20527c478bd9Sstevel@tonic-gate /* 20536f84fed5Scth * The cloned buffer does not inherit the B_REMAPPED flag. 20547c478bd9Sstevel@tonic-gate */ 20557c478bd9Sstevel@tonic-gate bufp->b_flags = (bp->b_flags & BUF_CLONE_FLAGS) | B_BUSY; 20567c478bd9Sstevel@tonic-gate bufp->b_bcount = len; 20577c478bd9Sstevel@tonic-gate bufp->b_blkno = blkno; 20587c478bd9Sstevel@tonic-gate bufp->b_iodone = iodone; 20597c478bd9Sstevel@tonic-gate bufp->b_proc = bp->b_proc; 20607c478bd9Sstevel@tonic-gate bufp->b_edev = dev; 20617c478bd9Sstevel@tonic-gate bufp->b_file = bp->b_file; 20627c478bd9Sstevel@tonic-gate bufp->b_offset = bp->b_offset; 20637c478bd9Sstevel@tonic-gate 20647c478bd9Sstevel@tonic-gate if (bp->b_flags & B_SHADOW) { 20657c478bd9Sstevel@tonic-gate ASSERT(bp->b_shadow); 20667c478bd9Sstevel@tonic-gate ASSERT(bp->b_flags & B_PHYS); 20677c478bd9Sstevel@tonic-gate 20687c478bd9Sstevel@tonic-gate bufp->b_shadow = bp->b_shadow + 2069d3d50737SRafael Vanoni btop(((uintptr_t)bp->b_un.b_addr & PAGEOFFSET) + off); 20707c478bd9Sstevel@tonic-gate bufp->b_un.b_addr = (caddr_t)((uintptr_t)bp->b_un.b_addr + off); 20716f84fed5Scth if (bp->b_flags & B_REMAPPED) 20726f84fed5Scth bufp->b_proc = NULL; 20737c478bd9Sstevel@tonic-gate } else { 20747c478bd9Sstevel@tonic-gate if (bp->b_flags & B_PAGEIO) { 20757c478bd9Sstevel@tonic-gate struct page *pp; 20767c478bd9Sstevel@tonic-gate off_t o; 20777c478bd9Sstevel@tonic-gate int i; 20787c478bd9Sstevel@tonic-gate 20797c478bd9Sstevel@tonic-gate pp = bp->b_pages; 20807c478bd9Sstevel@tonic-gate o = ((uintptr_t)bp->b_un.b_addr & PAGEOFFSET) + off; 20817c478bd9Sstevel@tonic-gate for (i = btop(o); i > 0; i--) { 20827c478bd9Sstevel@tonic-gate pp = pp->p_next; 20837c478bd9Sstevel@tonic-gate } 20847c478bd9Sstevel@tonic-gate bufp->b_pages = pp; 20857c478bd9Sstevel@tonic-gate bufp->b_un.b_addr = (caddr_t)(o & PAGEOFFSET); 20867c478bd9Sstevel@tonic-gate } else { 20877c478bd9Sstevel@tonic-gate bufp->b_un.b_addr = 2088d3d50737SRafael Vanoni (caddr_t)((uintptr_t)bp->b_un.b_addr + off); 20897c478bd9Sstevel@tonic-gate if (bp->b_flags & B_REMAPPED) 20907c478bd9Sstevel@tonic-gate bufp->b_proc = NULL; 20917c478bd9Sstevel@tonic-gate } 20927c478bd9Sstevel@tonic-gate } 20937c478bd9Sstevel@tonic-gate return (bufp); 20947c478bd9Sstevel@tonic-gate } 2095