17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5d216dff5SRobert Mastors * Common Development and Distribution License (the "License").
6d216dff5SRobert Mastors * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
210dfe541eSEvan Layton
227c478bd9Sstevel@tonic-gate /*
23422d9515SGerald Thornbrugh * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
260dfe541eSEvan Layton /*
270dfe541eSEvan Layton * Copyright 2018 Nexenta Systems, Inc.
280dfe541eSEvan Layton */
290dfe541eSEvan Layton
307c478bd9Sstevel@tonic-gate #include <sys/systm.h>
317c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
327c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
337c478bd9Sstevel@tonic-gate #include <sys/disp.h>
347c478bd9Sstevel@tonic-gate #include <sys/id_space.h>
357c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
367c478bd9Sstevel@tonic-gate #include <rpc/rpc.h>
377c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h>
387c478bd9Sstevel@tonic-gate #include <nfs/nfs4_db_impl.h>
39f6cf9e50SRick Mesta #include <sys/sdt.h>
407c478bd9Sstevel@tonic-gate
417c478bd9Sstevel@tonic-gate static int rfs4_reap_interval = RFS4_REAP_INTERVAL;
427c478bd9Sstevel@tonic-gate
437c478bd9Sstevel@tonic-gate static void rfs4_dbe_reap(rfs4_table_t *, time_t, uint32_t);
447c478bd9Sstevel@tonic-gate static void rfs4_dbe_destroy(rfs4_dbe_t *);
45d216dff5SRobert Mastors static rfs4_dbe_t *rfs4_dbe_create(rfs4_table_t *, id_t, rfs4_entry_t);
467c478bd9Sstevel@tonic-gate static void rfs4_start_reaper(rfs4_table_t *);
477c478bd9Sstevel@tonic-gate
48f6cf9e50SRick Mesta /*
49f6cf9e50SRick Mesta * t_lowat - integer percentage of table entries /etc/system only
50f6cf9e50SRick Mesta * t_hiwat - integer percentage of table entries /etc/system only
51f6cf9e50SRick Mesta * t_lreap - integer percentage of table reap time mdb or /etc/system
52f6cf9e50SRick Mesta * t_hreap - integer percentage of table reap time mdb or /etc/system
53f6cf9e50SRick Mesta */
54f6cf9e50SRick Mesta uint32_t t_lowat = 50; /* reap at t_lreap when id's in use hit 50% */
55f6cf9e50SRick Mesta uint32_t t_hiwat = 75; /* reap at t_hreap when id's in use hit 75% */
56f6cf9e50SRick Mesta time_t t_lreap = 50; /* default to 50% of table's reap interval */
57f6cf9e50SRick Mesta time_t t_hreap = 10; /* default to 10% of table's reap interval */
58f6cf9e50SRick Mesta
597c478bd9Sstevel@tonic-gate id_t
rfs4_dbe_getid(rfs4_dbe_t * entry)60d216dff5SRobert Mastors rfs4_dbe_getid(rfs4_dbe_t *entry)
617c478bd9Sstevel@tonic-gate {
62d216dff5SRobert Mastors return (entry->dbe_id);
637c478bd9Sstevel@tonic-gate }
647c478bd9Sstevel@tonic-gate
657c478bd9Sstevel@tonic-gate void
rfs4_dbe_hold(rfs4_dbe_t * entry)66d216dff5SRobert Mastors rfs4_dbe_hold(rfs4_dbe_t *entry)
677c478bd9Sstevel@tonic-gate {
681a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&entry->dbe_refcnt);
697c478bd9Sstevel@tonic-gate }
707c478bd9Sstevel@tonic-gate
717c478bd9Sstevel@tonic-gate /*
727c478bd9Sstevel@tonic-gate * rfs4_dbe_rele_nolock only decrements the reference count of the entry.
737c478bd9Sstevel@tonic-gate */
747c478bd9Sstevel@tonic-gate void
rfs4_dbe_rele_nolock(rfs4_dbe_t * entry)75d216dff5SRobert Mastors rfs4_dbe_rele_nolock(rfs4_dbe_t *entry)
767c478bd9Sstevel@tonic-gate {
771a5e258fSJosef 'Jeff' Sipek atomic_dec_32(&entry->dbe_refcnt);
787c478bd9Sstevel@tonic-gate }
797c478bd9Sstevel@tonic-gate
807c478bd9Sstevel@tonic-gate
817c478bd9Sstevel@tonic-gate uint32_t
rfs4_dbe_refcnt(rfs4_dbe_t * entry)82d216dff5SRobert Mastors rfs4_dbe_refcnt(rfs4_dbe_t *entry)
837c478bd9Sstevel@tonic-gate {
84d216dff5SRobert Mastors return (entry->dbe_refcnt);
857c478bd9Sstevel@tonic-gate }
867c478bd9Sstevel@tonic-gate
877c478bd9Sstevel@tonic-gate /*
887c478bd9Sstevel@tonic-gate * Mark an entry such that the dbsearch will skip it.
897c478bd9Sstevel@tonic-gate * Caller does not want this entry to be found any longer
907c478bd9Sstevel@tonic-gate */
917c478bd9Sstevel@tonic-gate void
rfs4_dbe_invalidate(rfs4_dbe_t * entry)92d216dff5SRobert Mastors rfs4_dbe_invalidate(rfs4_dbe_t *entry)
937c478bd9Sstevel@tonic-gate {
94d216dff5SRobert Mastors entry->dbe_invalid = TRUE;
95d216dff5SRobert Mastors entry->dbe_skipsearch = TRUE;
967c478bd9Sstevel@tonic-gate }
977c478bd9Sstevel@tonic-gate
987c478bd9Sstevel@tonic-gate /*
997c478bd9Sstevel@tonic-gate * Is this entry invalid?
1007c478bd9Sstevel@tonic-gate */
1017c478bd9Sstevel@tonic-gate bool_t
rfs4_dbe_is_invalid(rfs4_dbe_t * entry)102d216dff5SRobert Mastors rfs4_dbe_is_invalid(rfs4_dbe_t *entry)
1037c478bd9Sstevel@tonic-gate {
104d216dff5SRobert Mastors return (entry->dbe_invalid);
1057c478bd9Sstevel@tonic-gate }
1067c478bd9Sstevel@tonic-gate
1077c478bd9Sstevel@tonic-gate time_t
rfs4_dbe_get_timerele(rfs4_dbe_t * entry)108d216dff5SRobert Mastors rfs4_dbe_get_timerele(rfs4_dbe_t *entry)
1097c478bd9Sstevel@tonic-gate {
110d216dff5SRobert Mastors return (entry->dbe_time_rele);
1117c478bd9Sstevel@tonic-gate }
1127c478bd9Sstevel@tonic-gate
1137c478bd9Sstevel@tonic-gate /*
1147c478bd9Sstevel@tonic-gate * Use these to temporarily hide/unhide a db entry.
1157c478bd9Sstevel@tonic-gate */
1167c478bd9Sstevel@tonic-gate void
rfs4_dbe_hide(rfs4_dbe_t * entry)117d216dff5SRobert Mastors rfs4_dbe_hide(rfs4_dbe_t *entry)
1187c478bd9Sstevel@tonic-gate {
119d216dff5SRobert Mastors rfs4_dbe_lock(entry);
120d216dff5SRobert Mastors entry->dbe_skipsearch = TRUE;
121d216dff5SRobert Mastors rfs4_dbe_unlock(entry);
1227c478bd9Sstevel@tonic-gate }
1237c478bd9Sstevel@tonic-gate
1247c478bd9Sstevel@tonic-gate void
rfs4_dbe_unhide(rfs4_dbe_t * entry)125d216dff5SRobert Mastors rfs4_dbe_unhide(rfs4_dbe_t *entry)
1267c478bd9Sstevel@tonic-gate {
127d216dff5SRobert Mastors rfs4_dbe_lock(entry);
128d216dff5SRobert Mastors entry->dbe_skipsearch = FALSE;
129d216dff5SRobert Mastors rfs4_dbe_unlock(entry);
1307c478bd9Sstevel@tonic-gate }
1317c478bd9Sstevel@tonic-gate
1327c478bd9Sstevel@tonic-gate void
rfs4_dbe_rele(rfs4_dbe_t * entry)133d216dff5SRobert Mastors rfs4_dbe_rele(rfs4_dbe_t *entry)
1347c478bd9Sstevel@tonic-gate {
135d216dff5SRobert Mastors mutex_enter(entry->dbe_lock);
136d216dff5SRobert Mastors ASSERT(entry->dbe_refcnt > 1);
1371a5e258fSJosef 'Jeff' Sipek atomic_dec_32(&entry->dbe_refcnt);
138d216dff5SRobert Mastors entry->dbe_time_rele = gethrestime_sec();
139d216dff5SRobert Mastors mutex_exit(entry->dbe_lock);
1407c478bd9Sstevel@tonic-gate }
1417c478bd9Sstevel@tonic-gate
1427c478bd9Sstevel@tonic-gate void
rfs4_dbe_lock(rfs4_dbe_t * entry)143d216dff5SRobert Mastors rfs4_dbe_lock(rfs4_dbe_t *entry)
1447c478bd9Sstevel@tonic-gate {
145d216dff5SRobert Mastors mutex_enter(entry->dbe_lock);
1467c478bd9Sstevel@tonic-gate }
1477c478bd9Sstevel@tonic-gate
1487c478bd9Sstevel@tonic-gate void
rfs4_dbe_unlock(rfs4_dbe_t * entry)149d216dff5SRobert Mastors rfs4_dbe_unlock(rfs4_dbe_t *entry)
1507c478bd9Sstevel@tonic-gate {
151d216dff5SRobert Mastors mutex_exit(entry->dbe_lock);
1527c478bd9Sstevel@tonic-gate }
1537c478bd9Sstevel@tonic-gate
1547c478bd9Sstevel@tonic-gate bool_t
rfs4_dbe_islocked(rfs4_dbe_t * entry)155d216dff5SRobert Mastors rfs4_dbe_islocked(rfs4_dbe_t *entry)
1567c478bd9Sstevel@tonic-gate {
157d216dff5SRobert Mastors return (mutex_owned(entry->dbe_lock));
1587c478bd9Sstevel@tonic-gate }
1597c478bd9Sstevel@tonic-gate
1607c478bd9Sstevel@tonic-gate clock_t
rfs4_dbe_twait(rfs4_dbe_t * entry,clock_t timeout)161d216dff5SRobert Mastors rfs4_dbe_twait(rfs4_dbe_t *entry, clock_t timeout)
1627c478bd9Sstevel@tonic-gate {
163d216dff5SRobert Mastors return (cv_timedwait(entry->dbe_cv, entry->dbe_lock, timeout));
1647c478bd9Sstevel@tonic-gate }
1657c478bd9Sstevel@tonic-gate
1667c478bd9Sstevel@tonic-gate void
rfs4_dbe_cv_broadcast(rfs4_dbe_t * entry)167d216dff5SRobert Mastors rfs4_dbe_cv_broadcast(rfs4_dbe_t *entry)
1687c478bd9Sstevel@tonic-gate {
169d216dff5SRobert Mastors cv_broadcast(entry->dbe_cv);
1707c478bd9Sstevel@tonic-gate }
1717c478bd9Sstevel@tonic-gate
1727c478bd9Sstevel@tonic-gate static int
rfs4_dbe_kmem_constructor(void * obj,void * private __unused,int kmflag __unused)173*f44e1126SVitaliy Gusev rfs4_dbe_kmem_constructor(void *obj, void *private __unused,
174*f44e1126SVitaliy Gusev int kmflag __unused)
1757c478bd9Sstevel@tonic-gate {
1767c478bd9Sstevel@tonic-gate rfs4_dbe_t *entry = obj;
1777c478bd9Sstevel@tonic-gate
178d216dff5SRobert Mastors mutex_init(entry->dbe_lock, NULL, MUTEX_DEFAULT, NULL);
179d216dff5SRobert Mastors cv_init(entry->dbe_cv, NULL, CV_DEFAULT, NULL);
1807c478bd9Sstevel@tonic-gate
1817c478bd9Sstevel@tonic-gate return (0);
1827c478bd9Sstevel@tonic-gate }
1837c478bd9Sstevel@tonic-gate
1847c478bd9Sstevel@tonic-gate static void
rfs4_dbe_kmem_destructor(void * obj,void * private __unused)185*f44e1126SVitaliy Gusev rfs4_dbe_kmem_destructor(void *obj, void *private __unused)
1867c478bd9Sstevel@tonic-gate {
1877c478bd9Sstevel@tonic-gate rfs4_dbe_t *entry = obj;
1887c478bd9Sstevel@tonic-gate
189d216dff5SRobert Mastors mutex_destroy(entry->dbe_lock);
190d216dff5SRobert Mastors cv_destroy(entry->dbe_cv);
1917c478bd9Sstevel@tonic-gate }
1927c478bd9Sstevel@tonic-gate
1937c478bd9Sstevel@tonic-gate rfs4_database_t *
rfs4_database_create(uint32_t flags)1947c478bd9Sstevel@tonic-gate rfs4_database_create(uint32_t flags)
1957c478bd9Sstevel@tonic-gate {
1967c478bd9Sstevel@tonic-gate rfs4_database_t *db;
1977c478bd9Sstevel@tonic-gate
1987c478bd9Sstevel@tonic-gate db = kmem_alloc(sizeof (rfs4_database_t), KM_SLEEP);
199d216dff5SRobert Mastors mutex_init(db->db_lock, NULL, MUTEX_DEFAULT, NULL);
200d216dff5SRobert Mastors db->db_tables = NULL;
201d216dff5SRobert Mastors db->db_debug_flags = flags;
202d216dff5SRobert Mastors db->db_shutdown_count = 0;
203d216dff5SRobert Mastors cv_init(&db->db_shutdown_wait, NULL, CV_DEFAULT, NULL);
2047c478bd9Sstevel@tonic-gate return (db);
2057c478bd9Sstevel@tonic-gate }
2067c478bd9Sstevel@tonic-gate
2077c478bd9Sstevel@tonic-gate
2087c478bd9Sstevel@tonic-gate /*
2097c478bd9Sstevel@tonic-gate * The reaper threads that have been created for the tables in this
2107c478bd9Sstevel@tonic-gate * database must be stopped and the entries in the tables released.
2117c478bd9Sstevel@tonic-gate * Each table will be marked as "shutdown" and the reaper threads
2127c478bd9Sstevel@tonic-gate * poked and they will see that a shutdown is in progress and cleanup
2137c478bd9Sstevel@tonic-gate * and exit. This function waits for all reaper threads to stop
2147c478bd9Sstevel@tonic-gate * before returning to the caller.
2157c478bd9Sstevel@tonic-gate */
2167c478bd9Sstevel@tonic-gate void
rfs4_database_shutdown(rfs4_database_t * db)2177c478bd9Sstevel@tonic-gate rfs4_database_shutdown(rfs4_database_t *db)
2187c478bd9Sstevel@tonic-gate {
2197c478bd9Sstevel@tonic-gate rfs4_table_t *table;
2207c478bd9Sstevel@tonic-gate
221d216dff5SRobert Mastors mutex_enter(db->db_lock);
222d216dff5SRobert Mastors for (table = db->db_tables; table; table = table->dbt_tnext) {
223d216dff5SRobert Mastors mutex_enter(&table->dbt_reaper_cv_lock);
224422d9515SGerald Thornbrugh table->dbt_reaper_shutdown = TRUE;
225d216dff5SRobert Mastors cv_broadcast(&table->dbt_reaper_wait);
226d216dff5SRobert Mastors db->db_shutdown_count++;
227d216dff5SRobert Mastors mutex_exit(&table->dbt_reaper_cv_lock);
2287c478bd9Sstevel@tonic-gate }
229d216dff5SRobert Mastors while (db->db_shutdown_count > 0) {
230d216dff5SRobert Mastors cv_wait(&db->db_shutdown_wait, db->db_lock);
2317c478bd9Sstevel@tonic-gate }
232d216dff5SRobert Mastors mutex_exit(db->db_lock);
2337c478bd9Sstevel@tonic-gate }
2347c478bd9Sstevel@tonic-gate
2357c478bd9Sstevel@tonic-gate /*
2367c478bd9Sstevel@tonic-gate * Given a database that has been "shutdown" by the function above all
2377c478bd9Sstevel@tonic-gate * of the table tables are destroyed and then the database itself
2387c478bd9Sstevel@tonic-gate * freed.
2397c478bd9Sstevel@tonic-gate */
2407c478bd9Sstevel@tonic-gate void
rfs4_database_destroy(rfs4_database_t * db)2417c478bd9Sstevel@tonic-gate rfs4_database_destroy(rfs4_database_t *db)
2427c478bd9Sstevel@tonic-gate {
2437c478bd9Sstevel@tonic-gate rfs4_table_t *next, *tmp;
2447c478bd9Sstevel@tonic-gate
245d216dff5SRobert Mastors for (next = db->db_tables; next; ) {
2467c478bd9Sstevel@tonic-gate tmp = next;
247d216dff5SRobert Mastors next = tmp->dbt_tnext;
2487c478bd9Sstevel@tonic-gate rfs4_table_destroy(db, tmp);
2497c478bd9Sstevel@tonic-gate }
2507c478bd9Sstevel@tonic-gate
251d216dff5SRobert Mastors mutex_destroy(db->db_lock);
2527c478bd9Sstevel@tonic-gate kmem_free(db, sizeof (rfs4_database_t));
2537c478bd9Sstevel@tonic-gate }
2547c478bd9Sstevel@tonic-gate
2550dfe541eSEvan Layton /*
2560dfe541eSEvan Layton * Used to get the correct kmem_cache database for the state table being
2570dfe541eSEvan Layton * created.
2580dfe541eSEvan Layton * Helper function for rfs4_table_create
2590dfe541eSEvan Layton */
2600dfe541eSEvan Layton static kmem_cache_t *
get_db_mem_cache(char * name)2610dfe541eSEvan Layton get_db_mem_cache(char *name)
2620dfe541eSEvan Layton {
2630dfe541eSEvan Layton int i;
2640dfe541eSEvan Layton
2650dfe541eSEvan Layton for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
2660dfe541eSEvan Layton if (strcmp(name, rfs4_db_mem_cache_table[i].r_db_name) == 0)
2670dfe541eSEvan Layton return (rfs4_db_mem_cache_table[i].r_db_mem_cache);
2680dfe541eSEvan Layton }
2690dfe541eSEvan Layton /*
2700dfe541eSEvan Layton * There is no associated kmem cache for this NFS4 server state
2710dfe541eSEvan Layton * table name
2720dfe541eSEvan Layton */
2730dfe541eSEvan Layton return (NULL);
2740dfe541eSEvan Layton }
2750dfe541eSEvan Layton
2760dfe541eSEvan Layton /*
2770dfe541eSEvan Layton * Used to initialize the global NFSv4 server state database.
2780dfe541eSEvan Layton * Helper funtion for rfs4_state_g_init and called when module is loaded.
2790dfe541eSEvan Layton */
2800dfe541eSEvan Layton kmem_cache_t *
2810dfe541eSEvan Layton /* CSTYLED */
nfs4_init_mem_cache(char * cache_name,uint32_t idxcnt,uint32_t size,uint32_t idx)2820dfe541eSEvan Layton nfs4_init_mem_cache(char *cache_name, uint32_t idxcnt, uint32_t size, uint32_t idx)
2830dfe541eSEvan Layton {
2840dfe541eSEvan Layton kmem_cache_t *mem_cache = kmem_cache_create(cache_name,
2850dfe541eSEvan Layton sizeof (rfs4_dbe_t) + idxcnt * sizeof (rfs4_link_t) + size,
2860dfe541eSEvan Layton 0,
2870dfe541eSEvan Layton rfs4_dbe_kmem_constructor,
2880dfe541eSEvan Layton rfs4_dbe_kmem_destructor,
2890dfe541eSEvan Layton NULL,
2900dfe541eSEvan Layton NULL,
2910dfe541eSEvan Layton NULL,
2920dfe541eSEvan Layton 0);
2930dfe541eSEvan Layton (void) strlcpy(rfs4_db_mem_cache_table[idx].r_db_name, cache_name,
2940dfe541eSEvan Layton strlen(cache_name) + 1);
2950dfe541eSEvan Layton rfs4_db_mem_cache_table[idx].r_db_mem_cache = mem_cache;
2960dfe541eSEvan Layton return (mem_cache);
2970dfe541eSEvan Layton }
2980dfe541eSEvan Layton
2997c478bd9Sstevel@tonic-gate rfs4_table_t *
rfs4_table_create(rfs4_database_t * db,char * tabname,time_t max_cache_time,uint32_t idxcnt,bool_t (* create)(rfs4_entry_t,void *),void (* destroy)(rfs4_entry_t),bool_t (* expiry)(rfs4_entry_t),uint32_t size,uint32_t hashsize,uint32_t maxentries,id_t start)300d216dff5SRobert Mastors rfs4_table_create(rfs4_database_t *db, char *tabname, time_t max_cache_time,
301d216dff5SRobert Mastors uint32_t idxcnt, bool_t (*create)(rfs4_entry_t, void *),
302d216dff5SRobert Mastors void (*destroy)(rfs4_entry_t),
303d216dff5SRobert Mastors bool_t (*expiry)(rfs4_entry_t),
304d216dff5SRobert Mastors uint32_t size, uint32_t hashsize,
305d216dff5SRobert Mastors uint32_t maxentries, id_t start)
3067c478bd9Sstevel@tonic-gate {
307f6cf9e50SRick Mesta rfs4_table_t *table;
308f6cf9e50SRick Mesta int len;
309f6cf9e50SRick Mesta char *cache_name;
310f6cf9e50SRick Mesta char *id_name;
3117c478bd9Sstevel@tonic-gate
3127c478bd9Sstevel@tonic-gate table = kmem_alloc(sizeof (rfs4_table_t), KM_SLEEP);
313d216dff5SRobert Mastors table->dbt_db = db;
314d216dff5SRobert Mastors rw_init(table->dbt_t_lock, NULL, RW_DEFAULT, NULL);
315d216dff5SRobert Mastors mutex_init(table->dbt_lock, NULL, MUTEX_DEFAULT, NULL);
316d216dff5SRobert Mastors mutex_init(&table->dbt_reaper_cv_lock, NULL, MUTEX_DEFAULT, NULL);
317d216dff5SRobert Mastors cv_init(&table->dbt_reaper_wait, NULL, CV_DEFAULT, NULL);
3187c478bd9Sstevel@tonic-gate
3197c478bd9Sstevel@tonic-gate len = strlen(tabname);
320d216dff5SRobert Mastors table->dbt_name = kmem_alloc(len+1, KM_SLEEP);
3217c478bd9Sstevel@tonic-gate cache_name = kmem_alloc(len + 12 /* "_entry_cache" */ + 1, KM_SLEEP);
322d216dff5SRobert Mastors (void) strcpy(table->dbt_name, tabname);
323d216dff5SRobert Mastors (void) sprintf(cache_name, "%s_entry_cache", table->dbt_name);
324d216dff5SRobert Mastors table->dbt_max_cache_time = max_cache_time;
325d216dff5SRobert Mastors table->dbt_usize = size;
326d216dff5SRobert Mastors table->dbt_len = hashsize;
327d216dff5SRobert Mastors table->dbt_count = 0;
328d216dff5SRobert Mastors table->dbt_idxcnt = 0;
329d216dff5SRobert Mastors table->dbt_ccnt = 0;
330d216dff5SRobert Mastors table->dbt_maxcnt = idxcnt;
331d216dff5SRobert Mastors table->dbt_indices = NULL;
332d216dff5SRobert Mastors table->dbt_id_space = NULL;
333d216dff5SRobert Mastors table->dbt_reaper_shutdown = FALSE;
3347c478bd9Sstevel@tonic-gate
3357c478bd9Sstevel@tonic-gate if (start >= 0) {
3367c478bd9Sstevel@tonic-gate if (maxentries + (uint32_t)start > (uint32_t)INT32_MAX)
3377c478bd9Sstevel@tonic-gate maxentries = INT32_MAX - start;
3387c478bd9Sstevel@tonic-gate id_name = kmem_alloc(len + 9 /* "_id_space" */ + 1, KM_SLEEP);
339d216dff5SRobert Mastors (void) sprintf(id_name, "%s_id_space", table->dbt_name);
340d216dff5SRobert Mastors table->dbt_id_space = id_space_create(id_name, start,
341d216dff5SRobert Mastors maxentries + start);
3427c478bd9Sstevel@tonic-gate kmem_free(id_name, len + 10);
3437c478bd9Sstevel@tonic-gate }
344f6cf9e50SRick Mesta ASSERT(t_lowat != 0);
345f6cf9e50SRick Mesta table->dbt_id_lwat = (maxentries * t_lowat) / 100;
346f6cf9e50SRick Mesta ASSERT(t_hiwat != 0);
347f6cf9e50SRick Mesta table->dbt_id_hwat = (maxentries * t_hiwat) / 100;
348f6cf9e50SRick Mesta table->dbt_id_reap = MIN(rfs4_reap_interval, max_cache_time);
349d216dff5SRobert Mastors table->dbt_maxentries = maxentries;
350d216dff5SRobert Mastors table->dbt_create = create;
351d216dff5SRobert Mastors table->dbt_destroy = destroy;
352d216dff5SRobert Mastors table->dbt_expiry = expiry;
353d216dff5SRobert Mastors
3540dfe541eSEvan Layton /*
3550dfe541eSEvan Layton * get the correct kmem_cache for this table type based on the name.
3560dfe541eSEvan Layton */
3570dfe541eSEvan Layton table->dbt_mem_cache = get_db_mem_cache(cache_name);
3580dfe541eSEvan Layton
3597c478bd9Sstevel@tonic-gate kmem_free(cache_name, len+13);
3607c478bd9Sstevel@tonic-gate
361d216dff5SRobert Mastors table->dbt_debug = db->db_debug_flags;
3627c478bd9Sstevel@tonic-gate
363d216dff5SRobert Mastors mutex_enter(db->db_lock);
364d216dff5SRobert Mastors table->dbt_tnext = db->db_tables;
365d216dff5SRobert Mastors db->db_tables = table;
366d216dff5SRobert Mastors mutex_exit(db->db_lock);
3677c478bd9Sstevel@tonic-gate
3687c478bd9Sstevel@tonic-gate rfs4_start_reaper(table);
3697c478bd9Sstevel@tonic-gate
3707c478bd9Sstevel@tonic-gate return (table);
3717c478bd9Sstevel@tonic-gate }
3727c478bd9Sstevel@tonic-gate
3737c478bd9Sstevel@tonic-gate void
rfs4_table_destroy(rfs4_database_t * db,rfs4_table_t * table)374d216dff5SRobert Mastors rfs4_table_destroy(rfs4_database_t *db, rfs4_table_t *table)
3757c478bd9Sstevel@tonic-gate {
3767c478bd9Sstevel@tonic-gate rfs4_table_t *p;
377d216dff5SRobert Mastors rfs4_index_t *idx;
3787c478bd9Sstevel@tonic-gate
379d216dff5SRobert Mastors ASSERT(table->dbt_count == 0);
3807c478bd9Sstevel@tonic-gate
381d216dff5SRobert Mastors mutex_enter(db->db_lock);
382d216dff5SRobert Mastors if (table == db->db_tables)
383d216dff5SRobert Mastors db->db_tables = table->dbt_tnext;
3847c478bd9Sstevel@tonic-gate else {
385d216dff5SRobert Mastors for (p = db->db_tables; p; p = p->dbt_tnext)
386d216dff5SRobert Mastors if (p->dbt_tnext == table) {
387d216dff5SRobert Mastors p->dbt_tnext = table->dbt_tnext;
388d216dff5SRobert Mastors table->dbt_tnext = NULL;
3897c478bd9Sstevel@tonic-gate break;
3907c478bd9Sstevel@tonic-gate }
3917c478bd9Sstevel@tonic-gate ASSERT(p != NULL);
3927c478bd9Sstevel@tonic-gate }
393d216dff5SRobert Mastors mutex_exit(db->db_lock);
3947c478bd9Sstevel@tonic-gate
3957c478bd9Sstevel@tonic-gate /* Destroy indices */
396d216dff5SRobert Mastors while (table->dbt_indices) {
397d216dff5SRobert Mastors idx = table->dbt_indices;
398d216dff5SRobert Mastors table->dbt_indices = idx->dbi_inext;
399d216dff5SRobert Mastors rfs4_index_destroy(idx);
4007c478bd9Sstevel@tonic-gate }
4017c478bd9Sstevel@tonic-gate
402d216dff5SRobert Mastors rw_destroy(table->dbt_t_lock);
403d216dff5SRobert Mastors mutex_destroy(table->dbt_lock);
404d216dff5SRobert Mastors mutex_destroy(&table->dbt_reaper_cv_lock);
405d216dff5SRobert Mastors cv_destroy(&table->dbt_reaper_wait);
4067c478bd9Sstevel@tonic-gate
407d216dff5SRobert Mastors kmem_free(table->dbt_name, strlen(table->dbt_name) + 1);
408d216dff5SRobert Mastors if (table->dbt_id_space)
409d216dff5SRobert Mastors id_space_destroy(table->dbt_id_space);
4100dfe541eSEvan Layton table->dbt_mem_cache = NULL;
4117c478bd9Sstevel@tonic-gate kmem_free(table, sizeof (rfs4_table_t));
4127c478bd9Sstevel@tonic-gate }
4137c478bd9Sstevel@tonic-gate
4147c478bd9Sstevel@tonic-gate rfs4_index_t *
rfs4_index_create(rfs4_table_t * table,char * keyname,uint32_t (* hash)(void *),bool_t (compare)(rfs4_entry_t,void *),void * (* mkkey)(rfs4_entry_t),bool_t createable)4157c478bd9Sstevel@tonic-gate rfs4_index_create(rfs4_table_t *table, char *keyname,
416d216dff5SRobert Mastors uint32_t (*hash)(void *),
417d216dff5SRobert Mastors bool_t (compare)(rfs4_entry_t, void *),
418d216dff5SRobert Mastors void *(*mkkey)(rfs4_entry_t),
419d216dff5SRobert Mastors bool_t createable)
4207c478bd9Sstevel@tonic-gate {
4217c478bd9Sstevel@tonic-gate rfs4_index_t *idx;
4227c478bd9Sstevel@tonic-gate
423d216dff5SRobert Mastors ASSERT(table->dbt_idxcnt < table->dbt_maxcnt);
4247c478bd9Sstevel@tonic-gate
4257c478bd9Sstevel@tonic-gate idx = kmem_alloc(sizeof (rfs4_index_t), KM_SLEEP);
4267c478bd9Sstevel@tonic-gate
427d216dff5SRobert Mastors idx->dbi_table = table;
428d216dff5SRobert Mastors idx->dbi_keyname = kmem_alloc(strlen(keyname) + 1, KM_SLEEP);
429d216dff5SRobert Mastors (void) strcpy(idx->dbi_keyname, keyname);
430d216dff5SRobert Mastors idx->dbi_hash = hash;
431d216dff5SRobert Mastors idx->dbi_compare = compare;
432d216dff5SRobert Mastors idx->dbi_mkkey = mkkey;
433d216dff5SRobert Mastors idx->dbi_tblidx = table->dbt_idxcnt;
434d216dff5SRobert Mastors table->dbt_idxcnt++;
4357c478bd9Sstevel@tonic-gate if (createable) {
436d216dff5SRobert Mastors table->dbt_ccnt++;
437d216dff5SRobert Mastors if (table->dbt_ccnt > 1)
4387c478bd9Sstevel@tonic-gate panic("Table %s currently can have only have one "
4397c478bd9Sstevel@tonic-gate "index that will allow creation of entries",
440d216dff5SRobert Mastors table->dbt_name);
441d216dff5SRobert Mastors idx->dbi_createable = TRUE;
4427c478bd9Sstevel@tonic-gate } else {
443d216dff5SRobert Mastors idx->dbi_createable = FALSE;
4447c478bd9Sstevel@tonic-gate }
4457c478bd9Sstevel@tonic-gate
446d216dff5SRobert Mastors idx->dbi_inext = table->dbt_indices;
447d216dff5SRobert Mastors table->dbt_indices = idx;
448d216dff5SRobert Mastors idx->dbi_buckets = kmem_zalloc(sizeof (rfs4_bucket_t) * table->dbt_len,
449d216dff5SRobert Mastors KM_SLEEP);
4507c478bd9Sstevel@tonic-gate
4517c478bd9Sstevel@tonic-gate return (idx);
4527c478bd9Sstevel@tonic-gate }
4537c478bd9Sstevel@tonic-gate
4547c478bd9Sstevel@tonic-gate void
rfs4_index_destroy(rfs4_index_t * idx)4557c478bd9Sstevel@tonic-gate rfs4_index_destroy(rfs4_index_t *idx)
4567c478bd9Sstevel@tonic-gate {
457d216dff5SRobert Mastors kmem_free(idx->dbi_keyname, strlen(idx->dbi_keyname) + 1);
458d216dff5SRobert Mastors kmem_free(idx->dbi_buckets,
459d216dff5SRobert Mastors sizeof (rfs4_bucket_t) * idx->dbi_table->dbt_len);
4607c478bd9Sstevel@tonic-gate kmem_free(idx, sizeof (rfs4_index_t));
4617c478bd9Sstevel@tonic-gate }
4627c478bd9Sstevel@tonic-gate
4637c478bd9Sstevel@tonic-gate static void
rfs4_dbe_destroy(rfs4_dbe_t * entry)4647c478bd9Sstevel@tonic-gate rfs4_dbe_destroy(rfs4_dbe_t *entry)
4657c478bd9Sstevel@tonic-gate {
466d216dff5SRobert Mastors rfs4_index_t *idx;
4677c478bd9Sstevel@tonic-gate void *key;
4687c478bd9Sstevel@tonic-gate int i;
469d216dff5SRobert Mastors rfs4_bucket_t *bp;
470d216dff5SRobert Mastors rfs4_table_t *table = entry->dbe_table;
471d216dff5SRobert Mastors rfs4_link_t *l;
4727c478bd9Sstevel@tonic-gate
473d216dff5SRobert Mastors NFS4_DEBUG(table->dbt_debug & DESTROY_DEBUG,
474d216dff5SRobert Mastors (CE_NOTE, "Destroying entry %p from %s",
475d216dff5SRobert Mastors (void*)entry, table->dbt_name));
4767c478bd9Sstevel@tonic-gate
477d216dff5SRobert Mastors mutex_enter(entry->dbe_lock);
478d216dff5SRobert Mastors ASSERT(entry->dbe_refcnt == 0);
479d216dff5SRobert Mastors mutex_exit(entry->dbe_lock);
4807c478bd9Sstevel@tonic-gate
4817c478bd9Sstevel@tonic-gate /* Unlink from all indices */
482d216dff5SRobert Mastors for (idx = table->dbt_indices; idx; idx = idx->dbi_inext) {
483d216dff5SRobert Mastors l = &entry->dbe_indices[idx->dbi_tblidx];
4847c478bd9Sstevel@tonic-gate /* check and see if we were ever linked in to the index */
4857c478bd9Sstevel@tonic-gate if (INVALID_LINK(l)) {
4867c478bd9Sstevel@tonic-gate ASSERT(l->next == NULL && l->prev == NULL);
4877c478bd9Sstevel@tonic-gate continue;
4887c478bd9Sstevel@tonic-gate }
489d216dff5SRobert Mastors key = idx->dbi_mkkey(entry->dbe_data);
490d216dff5SRobert Mastors i = HASH(idx, key);
491d216dff5SRobert Mastors bp = &idx->dbi_buckets[i];
492d216dff5SRobert Mastors ASSERT(bp->dbk_head != NULL);
493d216dff5SRobert Mastors DEQUEUE_IDX(bp, &entry->dbe_indices[idx->dbi_tblidx]);
4947c478bd9Sstevel@tonic-gate }
4957c478bd9Sstevel@tonic-gate
4967c478bd9Sstevel@tonic-gate /* Destroy user data */
497d216dff5SRobert Mastors if (table->dbt_destroy)
498d216dff5SRobert Mastors (*table->dbt_destroy)(entry->dbe_data);
4997c478bd9Sstevel@tonic-gate
500d216dff5SRobert Mastors if (table->dbt_id_space)
501d216dff5SRobert Mastors id_free(table->dbt_id_space, entry->dbe_id);
5027c478bd9Sstevel@tonic-gate
503d216dff5SRobert Mastors mutex_enter(table->dbt_lock);
504d216dff5SRobert Mastors table->dbt_count--;
505d216dff5SRobert Mastors mutex_exit(table->dbt_lock);
5067c478bd9Sstevel@tonic-gate
5077c478bd9Sstevel@tonic-gate /* Destroy the entry itself */
508d216dff5SRobert Mastors kmem_cache_free(table->dbt_mem_cache, entry);
5097c478bd9Sstevel@tonic-gate }
5107c478bd9Sstevel@tonic-gate
5117c478bd9Sstevel@tonic-gate
5127c478bd9Sstevel@tonic-gate static rfs4_dbe_t *
rfs4_dbe_create(rfs4_table_t * table,id_t id,rfs4_entry_t data)513d216dff5SRobert Mastors rfs4_dbe_create(rfs4_table_t *table, id_t id, rfs4_entry_t data)
5147c478bd9Sstevel@tonic-gate {
5157c478bd9Sstevel@tonic-gate rfs4_dbe_t *entry;
5167c478bd9Sstevel@tonic-gate int i;
5177c478bd9Sstevel@tonic-gate
518d216dff5SRobert Mastors NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
519d216dff5SRobert Mastors (CE_NOTE, "Creating entry in table %s", table->dbt_name));
5207c478bd9Sstevel@tonic-gate
521d216dff5SRobert Mastors entry = kmem_cache_alloc(table->dbt_mem_cache, KM_SLEEP);
5227c478bd9Sstevel@tonic-gate
523d216dff5SRobert Mastors entry->dbe_refcnt = 1;
524d216dff5SRobert Mastors entry->dbe_invalid = FALSE;
525d216dff5SRobert Mastors entry->dbe_skipsearch = FALSE;
526d216dff5SRobert Mastors entry->dbe_time_rele = 0;
527d216dff5SRobert Mastors entry->dbe_id = 0;
5287c478bd9Sstevel@tonic-gate
529d216dff5SRobert Mastors if (table->dbt_id_space)
530d216dff5SRobert Mastors entry->dbe_id = id;
531d216dff5SRobert Mastors entry->dbe_table = table;
5327c478bd9Sstevel@tonic-gate
533d216dff5SRobert Mastors for (i = 0; i < table->dbt_maxcnt; i++) {
534d216dff5SRobert Mastors entry->dbe_indices[i].next = entry->dbe_indices[i].prev = NULL;
535d216dff5SRobert Mastors entry->dbe_indices[i].entry = entry;
5367c478bd9Sstevel@tonic-gate /*
5377c478bd9Sstevel@tonic-gate * We mark the entry as not indexed by setting the low
5387c478bd9Sstevel@tonic-gate * order bit, since address are word aligned. This has
5397c478bd9Sstevel@tonic-gate * the advantage of causeing a trap if the address is
5407c478bd9Sstevel@tonic-gate * used. After the entry is linked in to the
5417c478bd9Sstevel@tonic-gate * corresponding index the bit will be cleared.
5427c478bd9Sstevel@tonic-gate */
543d216dff5SRobert Mastors INVALIDATE_ADDR(entry->dbe_indices[i].entry);
5447c478bd9Sstevel@tonic-gate }
5457c478bd9Sstevel@tonic-gate
546d216dff5SRobert Mastors entry->dbe_data = (rfs4_entry_t)&entry->dbe_indices[table->dbt_maxcnt];
547d216dff5SRobert Mastors bzero(entry->dbe_data, table->dbt_usize);
548d216dff5SRobert Mastors entry->dbe_data->dbe = entry;
5497c478bd9Sstevel@tonic-gate
550d216dff5SRobert Mastors if (!(*table->dbt_create)(entry->dbe_data, data)) {
551d216dff5SRobert Mastors kmem_cache_free(table->dbt_mem_cache, entry);
5527c478bd9Sstevel@tonic-gate return (NULL);
5537c478bd9Sstevel@tonic-gate }
5547c478bd9Sstevel@tonic-gate
555d216dff5SRobert Mastors mutex_enter(table->dbt_lock);
556d216dff5SRobert Mastors table->dbt_count++;
557d216dff5SRobert Mastors mutex_exit(table->dbt_lock);
5587c478bd9Sstevel@tonic-gate
5597c478bd9Sstevel@tonic-gate return (entry);
5607c478bd9Sstevel@tonic-gate }
5617c478bd9Sstevel@tonic-gate
562f6cf9e50SRick Mesta static void
rfs4_dbe_tabreap_adjust(rfs4_table_t * table)563f6cf9e50SRick Mesta rfs4_dbe_tabreap_adjust(rfs4_table_t *table)
564f6cf9e50SRick Mesta {
565f6cf9e50SRick Mesta clock_t tabreap;
566f6cf9e50SRick Mesta clock_t reap_int;
567f6cf9e50SRick Mesta uint32_t in_use;
568f6cf9e50SRick Mesta
569f6cf9e50SRick Mesta /*
570f6cf9e50SRick Mesta * Adjust the table's reap interval based on the
571f6cf9e50SRick Mesta * number of id's currently in use. Each table's
572f6cf9e50SRick Mesta * default remains the same if id usage subsides.
573f6cf9e50SRick Mesta */
574f6cf9e50SRick Mesta ASSERT(MUTEX_HELD(&table->dbt_reaper_cv_lock));
575f6cf9e50SRick Mesta tabreap = MIN(rfs4_reap_interval, table->dbt_max_cache_time);
576f6cf9e50SRick Mesta
577f6cf9e50SRick Mesta in_use = table->dbt_count + 1; /* see rfs4_dbe_create */
578f6cf9e50SRick Mesta if (in_use >= table->dbt_id_hwat) {
579f6cf9e50SRick Mesta ASSERT(t_hreap != 0);
580f6cf9e50SRick Mesta reap_int = (tabreap * t_hreap) / 100;
581f6cf9e50SRick Mesta } else if (in_use >= table->dbt_id_lwat) {
582f6cf9e50SRick Mesta ASSERT(t_lreap != 0);
583f6cf9e50SRick Mesta reap_int = (tabreap * t_lreap) / 100;
584f6cf9e50SRick Mesta } else {
585f6cf9e50SRick Mesta reap_int = tabreap;
586f6cf9e50SRick Mesta }
587f6cf9e50SRick Mesta table->dbt_id_reap = reap_int;
588f6cf9e50SRick Mesta DTRACE_PROBE2(table__reap__interval, char *,
589f6cf9e50SRick Mesta table->dbt_name, time_t, table->dbt_id_reap);
590f6cf9e50SRick Mesta }
591f6cf9e50SRick Mesta
5927c478bd9Sstevel@tonic-gate rfs4_entry_t
rfs4_dbsearch(rfs4_index_t * idx,void * key,bool_t * create,void * arg,rfs4_dbsearch_type_t dbsearch_type)5937c478bd9Sstevel@tonic-gate rfs4_dbsearch(rfs4_index_t *idx, void *key, bool_t *create, void *arg,
594d216dff5SRobert Mastors rfs4_dbsearch_type_t dbsearch_type)
5957c478bd9Sstevel@tonic-gate {
596f6cf9e50SRick Mesta int already_done;
597f6cf9e50SRick Mesta uint32_t i;
598f6cf9e50SRick Mesta rfs4_table_t *table = idx->dbi_table;
599f6cf9e50SRick Mesta rfs4_index_t *ip;
600f6cf9e50SRick Mesta rfs4_bucket_t *bp;
601f6cf9e50SRick Mesta rfs4_link_t *l;
602f6cf9e50SRick Mesta rfs4_dbe_t *entry;
603f6cf9e50SRick Mesta id_t id = -1;
6047c478bd9Sstevel@tonic-gate
6057c478bd9Sstevel@tonic-gate i = HASH(idx, key);
606d216dff5SRobert Mastors bp = &idx->dbi_buckets[i];
6077c478bd9Sstevel@tonic-gate
608d216dff5SRobert Mastors NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
609d216dff5SRobert Mastors (CE_NOTE, "Searching for key %p in table %s by %s",
610d216dff5SRobert Mastors key, table->dbt_name, idx->dbi_keyname));
6117c478bd9Sstevel@tonic-gate
612d216dff5SRobert Mastors rw_enter(bp->dbk_lock, RW_READER);
6137c478bd9Sstevel@tonic-gate retry:
614d216dff5SRobert Mastors for (l = bp->dbk_head; l; l = l->next) {
615d216dff5SRobert Mastors if (l->entry->dbe_refcnt > 0 &&
616d216dff5SRobert Mastors (l->entry->dbe_skipsearch == FALSE ||
617d216dff5SRobert Mastors (l->entry->dbe_skipsearch == TRUE &&
618d216dff5SRobert Mastors dbsearch_type == RFS4_DBS_INVALID)) &&
619d216dff5SRobert Mastors (*idx->dbi_compare)(l->entry->dbe_data, key)) {
620d216dff5SRobert Mastors mutex_enter(l->entry->dbe_lock);
621d216dff5SRobert Mastors if (l->entry->dbe_refcnt == 0) {
622d216dff5SRobert Mastors mutex_exit(l->entry->dbe_lock);
6237c478bd9Sstevel@tonic-gate continue;
6247c478bd9Sstevel@tonic-gate }
6257c478bd9Sstevel@tonic-gate
6267c478bd9Sstevel@tonic-gate /* place an additional hold since we are returning */
6277c478bd9Sstevel@tonic-gate rfs4_dbe_hold(l->entry);
6287c478bd9Sstevel@tonic-gate
629d216dff5SRobert Mastors mutex_exit(l->entry->dbe_lock);
630d216dff5SRobert Mastors rw_exit(bp->dbk_lock);
6317c478bd9Sstevel@tonic-gate
6327c478bd9Sstevel@tonic-gate *create = FALSE;
6337c478bd9Sstevel@tonic-gate
634d216dff5SRobert Mastors NFS4_DEBUG((table->dbt_debug & SEARCH_DEBUG),
635d216dff5SRobert Mastors (CE_NOTE, "Found entry %p for %p in table %s",
636d216dff5SRobert Mastors (void *)l->entry, key, table->dbt_name));
6377c478bd9Sstevel@tonic-gate
638d216dff5SRobert Mastors if (id != -1)
639d216dff5SRobert Mastors id_free(table->dbt_id_space, id);
640d216dff5SRobert Mastors return (l->entry->dbe_data);
6417c478bd9Sstevel@tonic-gate }
6427c478bd9Sstevel@tonic-gate }
6437c478bd9Sstevel@tonic-gate
644d216dff5SRobert Mastors if (!*create || table->dbt_create == NULL || !idx->dbi_createable ||
645d216dff5SRobert Mastors table->dbt_maxentries == table->dbt_count) {
646d216dff5SRobert Mastors NFS4_DEBUG(table->dbt_debug & SEARCH_DEBUG,
647d216dff5SRobert Mastors (CE_NOTE, "Entry for %p in %s not found",
648d216dff5SRobert Mastors key, table->dbt_name));
6497c478bd9Sstevel@tonic-gate
650d216dff5SRobert Mastors rw_exit(bp->dbk_lock);
651d216dff5SRobert Mastors if (id != -1)
652d216dff5SRobert Mastors id_free(table->dbt_id_space, id);
6537c478bd9Sstevel@tonic-gate return (NULL);
6547c478bd9Sstevel@tonic-gate }
6557c478bd9Sstevel@tonic-gate
656d216dff5SRobert Mastors if (table->dbt_id_space && id == -1) {
657f6cf9e50SRick Mesta rw_exit(bp->dbk_lock);
658d216dff5SRobert Mastors
659f6cf9e50SRick Mesta /* get an id, ok to sleep for it here */
660f6cf9e50SRick Mesta id = id_alloc(table->dbt_id_space);
661f6cf9e50SRick Mesta ASSERT(id != -1);
662d216dff5SRobert Mastors
663f6cf9e50SRick Mesta mutex_enter(&table->dbt_reaper_cv_lock);
664f6cf9e50SRick Mesta rfs4_dbe_tabreap_adjust(table);
665f6cf9e50SRick Mesta mutex_exit(&table->dbt_reaper_cv_lock);
666f6cf9e50SRick Mesta
667f6cf9e50SRick Mesta rw_enter(bp->dbk_lock, RW_WRITER);
668f6cf9e50SRick Mesta goto retry;
6697c478bd9Sstevel@tonic-gate }
6707c478bd9Sstevel@tonic-gate
671d216dff5SRobert Mastors /* get an exclusive lock on the bucket */
672d216dff5SRobert Mastors if (rw_read_locked(bp->dbk_lock) && !rw_tryupgrade(bp->dbk_lock)) {
673d216dff5SRobert Mastors NFS4_DEBUG(table->dbt_debug & OTHER_DEBUG,
674d216dff5SRobert Mastors (CE_NOTE, "Trying to upgrade lock on "
675d216dff5SRobert Mastors "hash chain %d (%p) for %s by %s",
676d216dff5SRobert Mastors i, (void*)bp, table->dbt_name, idx->dbi_keyname));
6777c478bd9Sstevel@tonic-gate
678d216dff5SRobert Mastors rw_exit(bp->dbk_lock);
679d216dff5SRobert Mastors rw_enter(bp->dbk_lock, RW_WRITER);
680d216dff5SRobert Mastors goto retry;
681d216dff5SRobert Mastors }
6827c478bd9Sstevel@tonic-gate
683d216dff5SRobert Mastors /* create entry */
684d216dff5SRobert Mastors entry = rfs4_dbe_create(table, id, arg);
685d216dff5SRobert Mastors if (entry == NULL) {
686d216dff5SRobert Mastors rw_exit(bp->dbk_lock);
687d216dff5SRobert Mastors if (id != -1)
688d216dff5SRobert Mastors id_free(table->dbt_id_space, id);
6897c478bd9Sstevel@tonic-gate
690d216dff5SRobert Mastors NFS4_DEBUG(table->dbt_debug & CREATE_DEBUG,
691d216dff5SRobert Mastors (CE_NOTE, "Constructor for table %s failed",
692d216dff5SRobert Mastors table->dbt_name));
693d216dff5SRobert Mastors return (NULL);
6947c478bd9Sstevel@tonic-gate }
6957c478bd9Sstevel@tonic-gate
6967c478bd9Sstevel@tonic-gate /*
6977c478bd9Sstevel@tonic-gate * Add one ref for entry into table's hash - only one
698d216dff5SRobert Mastors * reference added even though there may be multiple indices
6997c478bd9Sstevel@tonic-gate */
7007c478bd9Sstevel@tonic-gate rfs4_dbe_hold(entry);
701d216dff5SRobert Mastors ENQUEUE(bp->dbk_head, &entry->dbe_indices[idx->dbi_tblidx]);
702d216dff5SRobert Mastors VALIDATE_ADDR(entry->dbe_indices[idx->dbi_tblidx].entry);
7037c478bd9Sstevel@tonic-gate
704d216dff5SRobert Mastors already_done = idx->dbi_tblidx;
705d216dff5SRobert Mastors rw_exit(bp->dbk_lock);
7067c478bd9Sstevel@tonic-gate
707d216dff5SRobert Mastors for (ip = table->dbt_indices; ip; ip = ip->dbi_inext) {
708d216dff5SRobert Mastors if (ip->dbi_tblidx == already_done)
7097c478bd9Sstevel@tonic-gate continue;
710d216dff5SRobert Mastors l = &entry->dbe_indices[ip->dbi_tblidx];
711d216dff5SRobert Mastors i = HASH(ip, ip->dbi_mkkey(entry->dbe_data));
712d216dff5SRobert Mastors ASSERT(i < ip->dbi_table->dbt_len);
713d216dff5SRobert Mastors bp = &ip->dbi_buckets[i];
7147c478bd9Sstevel@tonic-gate ENQUEUE_IDX(bp, l);
7157c478bd9Sstevel@tonic-gate }
7167c478bd9Sstevel@tonic-gate
717d216dff5SRobert Mastors NFS4_DEBUG(
718d216dff5SRobert Mastors table->dbt_debug & SEARCH_DEBUG || table->dbt_debug & CREATE_DEBUG,
719d216dff5SRobert Mastors (CE_NOTE, "Entry %p created for %s = %p in table %s",
720d216dff5SRobert Mastors (void*)entry, idx->dbi_keyname, (void*)key, table->dbt_name));
7217c478bd9Sstevel@tonic-gate
722d216dff5SRobert Mastors return (entry->dbe_data);
7237c478bd9Sstevel@tonic-gate }
7247c478bd9Sstevel@tonic-gate
7257c478bd9Sstevel@tonic-gate /*ARGSUSED*/
7267c478bd9Sstevel@tonic-gate boolean_t
rfs4_cpr_callb(void * arg,int code)7277c478bd9Sstevel@tonic-gate rfs4_cpr_callb(void *arg, int code)
7287c478bd9Sstevel@tonic-gate {
729d216dff5SRobert Mastors rfs4_bucket_t *buckets, *bp;
730d216dff5SRobert Mastors rfs4_link_t *l;
731d216dff5SRobert Mastors rfs4_client_t *cp;
7327c478bd9Sstevel@tonic-gate int i;
7337c478bd9Sstevel@tonic-gate
7340dfe541eSEvan Layton nfs4_srv_t *nsrv4 = nfs4_get_srv();
7350dfe541eSEvan Layton rfs4_table_t *table = nsrv4->rfs4_client_tab;
7360dfe541eSEvan Layton
7377c478bd9Sstevel@tonic-gate /*
7387c478bd9Sstevel@tonic-gate * We get called for Suspend and Resume events.
7397c478bd9Sstevel@tonic-gate * For the suspend case we simply don't care! Nor do we care if
7407c478bd9Sstevel@tonic-gate * there are no clients.
7417c478bd9Sstevel@tonic-gate */
742d216dff5SRobert Mastors if (code == CB_CODE_CPR_CHKPT || table == NULL) {
7437c478bd9Sstevel@tonic-gate return (B_TRUE);
7447c478bd9Sstevel@tonic-gate }
7457c478bd9Sstevel@tonic-gate
746d216dff5SRobert Mastors buckets = table->dbt_indices->dbi_buckets;
7477c478bd9Sstevel@tonic-gate
7487c478bd9Sstevel@tonic-gate /*
7497c478bd9Sstevel@tonic-gate * When we get this far we are in the process of
7507c478bd9Sstevel@tonic-gate * resuming the system from a previous suspend.
7517c478bd9Sstevel@tonic-gate *
7527c478bd9Sstevel@tonic-gate * We are going to blast through and update the
7537c478bd9Sstevel@tonic-gate * last_access time for all the clients and in
7547c478bd9Sstevel@tonic-gate * doing so extend them by one lease period.
7557c478bd9Sstevel@tonic-gate */
756d216dff5SRobert Mastors for (i = 0; i < table->dbt_len; i++) {
7577c478bd9Sstevel@tonic-gate bp = &buckets[i];
758d216dff5SRobert Mastors for (l = bp->dbk_head; l; l = l->next) {
759d216dff5SRobert Mastors cp = (rfs4_client_t *)l->entry->dbe_data;
760d216dff5SRobert Mastors cp->rc_last_access = gethrestime_sec();
7617c478bd9Sstevel@tonic-gate }
7627c478bd9Sstevel@tonic-gate }
7637c478bd9Sstevel@tonic-gate
7647c478bd9Sstevel@tonic-gate return (B_TRUE);
7657c478bd9Sstevel@tonic-gate }
7667c478bd9Sstevel@tonic-gate
7677c478bd9Sstevel@tonic-gate /*
7687c478bd9Sstevel@tonic-gate * Given a table, lock each of the buckets and walk all entries (in
7697c478bd9Sstevel@tonic-gate * turn locking those) and calling the provided "callout" function
7707c478bd9Sstevel@tonic-gate * with the provided parameter. Obviously used to iterate across all
7717c478bd9Sstevel@tonic-gate * entries in a particular table via the database locking hierarchy.
7727c478bd9Sstevel@tonic-gate * Obviously the caller must not hold locks on any of the entries in
7737c478bd9Sstevel@tonic-gate * the specified table.
7747c478bd9Sstevel@tonic-gate */
7757c478bd9Sstevel@tonic-gate void
rfs4_dbe_walk(rfs4_table_t * table,void (* callout)(rfs4_entry_t,void *),void * data)7767c478bd9Sstevel@tonic-gate rfs4_dbe_walk(rfs4_table_t *table,
777d216dff5SRobert Mastors void (*callout)(rfs4_entry_t, void *),
778d216dff5SRobert Mastors void *data)
7797c478bd9Sstevel@tonic-gate {
780d216dff5SRobert Mastors rfs4_bucket_t *buckets = table->dbt_indices->dbi_buckets, *bp;
781d216dff5SRobert Mastors rfs4_link_t *l;
782d216dff5SRobert Mastors rfs4_dbe_t *entry;
7837c478bd9Sstevel@tonic-gate int i;
7847c478bd9Sstevel@tonic-gate
785d216dff5SRobert Mastors NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
786d216dff5SRobert Mastors (CE_NOTE, "Walking entries in %s", table->dbt_name));
7877c478bd9Sstevel@tonic-gate
7887c478bd9Sstevel@tonic-gate /* Walk the buckets looking for entries to release/destroy */
789d216dff5SRobert Mastors for (i = 0; i < table->dbt_len; i++) {
7907c478bd9Sstevel@tonic-gate bp = &buckets[i];
791d216dff5SRobert Mastors rw_enter(bp->dbk_lock, RW_READER);
792d216dff5SRobert Mastors for (l = bp->dbk_head; l; l = l->next) {
793d216dff5SRobert Mastors entry = l->entry;
794d216dff5SRobert Mastors mutex_enter(entry->dbe_lock);
795d216dff5SRobert Mastors (*callout)(entry->dbe_data, data);
796d216dff5SRobert Mastors mutex_exit(entry->dbe_lock);
7977c478bd9Sstevel@tonic-gate }
798d216dff5SRobert Mastors rw_exit(bp->dbk_lock);
7997c478bd9Sstevel@tonic-gate }
8007c478bd9Sstevel@tonic-gate
801d216dff5SRobert Mastors NFS4_DEBUG(table->dbt_debug & WALK_DEBUG,
802d216dff5SRobert Mastors (CE_NOTE, "Walking entries complete %s", table->dbt_name));
8037c478bd9Sstevel@tonic-gate }
8047c478bd9Sstevel@tonic-gate
8057c478bd9Sstevel@tonic-gate
8067c478bd9Sstevel@tonic-gate static void
rfs4_dbe_reap(rfs4_table_t * table,time_t cache_time,uint32_t desired)8077c478bd9Sstevel@tonic-gate rfs4_dbe_reap(rfs4_table_t *table, time_t cache_time, uint32_t desired)
8087c478bd9Sstevel@tonic-gate {
809d216dff5SRobert Mastors rfs4_index_t *idx = table->dbt_indices;
810d216dff5SRobert Mastors rfs4_bucket_t *buckets = idx->dbi_buckets, *bp;
811d216dff5SRobert Mastors rfs4_link_t *l, *t;
812d216dff5SRobert Mastors rfs4_dbe_t *entry;
8137c478bd9Sstevel@tonic-gate bool_t found;
8147c478bd9Sstevel@tonic-gate int i;
8157c478bd9Sstevel@tonic-gate int count = 0;
8167c478bd9Sstevel@tonic-gate
817d216dff5SRobert Mastors NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
818d216dff5SRobert Mastors (CE_NOTE, "Reaping %d entries older than %ld seconds in table %s",
819d216dff5SRobert Mastors desired, cache_time, table->dbt_name));
8207c478bd9Sstevel@tonic-gate
8217c478bd9Sstevel@tonic-gate /* Walk the buckets looking for entries to release/destroy */
822d216dff5SRobert Mastors for (i = 0; i < table->dbt_len; i++) {
8237c478bd9Sstevel@tonic-gate bp = &buckets[i];
8247c478bd9Sstevel@tonic-gate do {
8257c478bd9Sstevel@tonic-gate found = FALSE;
826d216dff5SRobert Mastors rw_enter(bp->dbk_lock, RW_READER);
827d216dff5SRobert Mastors for (l = bp->dbk_head; l; l = l->next) {
828d216dff5SRobert Mastors entry = l->entry;
8297c478bd9Sstevel@tonic-gate /*
8307c478bd9Sstevel@tonic-gate * Examine an entry. Ref count of 1 means
8317c478bd9Sstevel@tonic-gate * that the only reference is for the hash
8327c478bd9Sstevel@tonic-gate * table reference.
8337c478bd9Sstevel@tonic-gate */
834d216dff5SRobert Mastors if (entry->dbe_refcnt != 1)
835d216dff5SRobert Mastors continue;
836d216dff5SRobert Mastors mutex_enter(entry->dbe_lock);
837d216dff5SRobert Mastors if ((entry->dbe_refcnt == 1) &&
838d216dff5SRobert Mastors (table->dbt_reaper_shutdown ||
839d216dff5SRobert Mastors table->dbt_expiry == NULL ||
840d216dff5SRobert Mastors (*table->dbt_expiry)(entry->dbe_data))) {
841d216dff5SRobert Mastors entry->dbe_refcnt--;
842d216dff5SRobert Mastors count++;
843d216dff5SRobert Mastors found = TRUE;
8447c478bd9Sstevel@tonic-gate }
845d216dff5SRobert Mastors mutex_exit(entry->dbe_lock);
8467c478bd9Sstevel@tonic-gate }
8477c478bd9Sstevel@tonic-gate if (found) {
848d216dff5SRobert Mastors if (!rw_tryupgrade(bp->dbk_lock)) {
849d216dff5SRobert Mastors rw_exit(bp->dbk_lock);
850d216dff5SRobert Mastors rw_enter(bp->dbk_lock, RW_WRITER);
8517c478bd9Sstevel@tonic-gate }
8527c478bd9Sstevel@tonic-gate
853d216dff5SRobert Mastors l = bp->dbk_head;
8547c478bd9Sstevel@tonic-gate while (l) {
8557c478bd9Sstevel@tonic-gate t = l;
856d216dff5SRobert Mastors entry = t->entry;
8577c478bd9Sstevel@tonic-gate l = l->next;
858d216dff5SRobert Mastors if (entry->dbe_refcnt == 0) {
859d216dff5SRobert Mastors DEQUEUE(bp->dbk_head, t);
8607c478bd9Sstevel@tonic-gate t->next = NULL;
8617c478bd9Sstevel@tonic-gate t->prev = NULL;
8627c478bd9Sstevel@tonic-gate INVALIDATE_ADDR(t->entry);
863d216dff5SRobert Mastors rfs4_dbe_destroy(entry);
8647c478bd9Sstevel@tonic-gate }
8657c478bd9Sstevel@tonic-gate }
8667c478bd9Sstevel@tonic-gate }
867d216dff5SRobert Mastors rw_exit(bp->dbk_lock);
8687c478bd9Sstevel@tonic-gate /*
8697c478bd9Sstevel@tonic-gate * delay slightly if there is more work to do
8707c478bd9Sstevel@tonic-gate * with the expectation that other reaper
8717c478bd9Sstevel@tonic-gate * threads are freeing data structures as well
8727c478bd9Sstevel@tonic-gate * and in turn will reduce ref counts on
8737c478bd9Sstevel@tonic-gate * entries in this table allowing them to be
8747c478bd9Sstevel@tonic-gate * released. This is only done in the
8757c478bd9Sstevel@tonic-gate * instance that the tables are being shut down.
8767c478bd9Sstevel@tonic-gate */
877d216dff5SRobert Mastors if (table->dbt_reaper_shutdown && bp->dbk_head != NULL)
8787c478bd9Sstevel@tonic-gate delay(hz/100);
8797c478bd9Sstevel@tonic-gate /*
8807c478bd9Sstevel@tonic-gate * If this is a table shutdown, keep going until
8817c478bd9Sstevel@tonic-gate * everything is gone
8827c478bd9Sstevel@tonic-gate */
883d216dff5SRobert Mastors } while (table->dbt_reaper_shutdown && bp->dbk_head != NULL);
8847c478bd9Sstevel@tonic-gate
885d216dff5SRobert Mastors if (!table->dbt_reaper_shutdown && desired && count >= desired)
8867c478bd9Sstevel@tonic-gate break;
8877c478bd9Sstevel@tonic-gate }
8887c478bd9Sstevel@tonic-gate
889d216dff5SRobert Mastors NFS4_DEBUG(table->dbt_debug & REAP_DEBUG,
890d216dff5SRobert Mastors (CE_NOTE, "Reaped %d entries older than %ld seconds in table %s",
891d216dff5SRobert Mastors count, cache_time, table->dbt_name));
8927c478bd9Sstevel@tonic-gate }
8937c478bd9Sstevel@tonic-gate
8947c478bd9Sstevel@tonic-gate static void
reaper_thread(caddr_t * arg)8957c478bd9Sstevel@tonic-gate reaper_thread(caddr_t *arg)
8967c478bd9Sstevel@tonic-gate {
897f6cf9e50SRick Mesta rfs4_table_t *table = (rfs4_table_t *)arg;
898f6cf9e50SRick Mesta clock_t rc;
8997c478bd9Sstevel@tonic-gate
900d216dff5SRobert Mastors NFS4_DEBUG(table->dbt_debug,
901d216dff5SRobert Mastors (CE_NOTE, "rfs4_reaper_thread starting for %s", table->dbt_name));
9027c478bd9Sstevel@tonic-gate
903d216dff5SRobert Mastors CALLB_CPR_INIT(&table->dbt_reaper_cpr_info, &table->dbt_reaper_cv_lock,
904d216dff5SRobert Mastors callb_generic_cpr, "nfsv4Reaper");
9057c478bd9Sstevel@tonic-gate
906d216dff5SRobert Mastors mutex_enter(&table->dbt_reaper_cv_lock);
9077c478bd9Sstevel@tonic-gate do {
908d216dff5SRobert Mastors CALLB_CPR_SAFE_BEGIN(&table->dbt_reaper_cpr_info);
909d3d50737SRafael Vanoni rc = cv_reltimedwait_sig(&table->dbt_reaper_wait,
910f6cf9e50SRick Mesta &table->dbt_reaper_cv_lock,
911f6cf9e50SRick Mesta SEC_TO_TICK(table->dbt_id_reap), TR_CLOCK_TICK);
912d216dff5SRobert Mastors CALLB_CPR_SAFE_END(&table->dbt_reaper_cpr_info,
913d216dff5SRobert Mastors &table->dbt_reaper_cv_lock);
914d216dff5SRobert Mastors rfs4_dbe_reap(table, table->dbt_max_cache_time, 0);
915d216dff5SRobert Mastors } while (rc != 0 && table->dbt_reaper_shutdown == FALSE);
9167c478bd9Sstevel@tonic-gate
917d216dff5SRobert Mastors CALLB_CPR_EXIT(&table->dbt_reaper_cpr_info);
9187c478bd9Sstevel@tonic-gate
919d216dff5SRobert Mastors NFS4_DEBUG(table->dbt_debug,
920d216dff5SRobert Mastors (CE_NOTE, "rfs4_reaper_thread exiting for %s", table->dbt_name));
9217c478bd9Sstevel@tonic-gate
9227c478bd9Sstevel@tonic-gate /* Notify the database shutdown processing that the table is shutdown */
923d216dff5SRobert Mastors mutex_enter(table->dbt_db->db_lock);
924d216dff5SRobert Mastors table->dbt_db->db_shutdown_count--;
925d216dff5SRobert Mastors cv_signal(&table->dbt_db->db_shutdown_wait);
926d216dff5SRobert Mastors mutex_exit(table->dbt_db->db_lock);
9270dfe541eSEvan Layton zthread_exit();
9287c478bd9Sstevel@tonic-gate }
9297c478bd9Sstevel@tonic-gate
9307c478bd9Sstevel@tonic-gate static void
rfs4_start_reaper(rfs4_table_t * table)9317c478bd9Sstevel@tonic-gate rfs4_start_reaper(rfs4_table_t *table)
9327c478bd9Sstevel@tonic-gate {
933d216dff5SRobert Mastors if (table->dbt_max_cache_time == 0)
9347c478bd9Sstevel@tonic-gate return;
9357c478bd9Sstevel@tonic-gate
9360dfe541eSEvan Layton (void) zthread_create(NULL, 0, reaper_thread, table, 0,
937d216dff5SRobert Mastors minclsyspri);
9387c478bd9Sstevel@tonic-gate }
9397c478bd9Sstevel@tonic-gate
9407c478bd9Sstevel@tonic-gate #ifdef DEBUG
9417c478bd9Sstevel@tonic-gate void
rfs4_dbe_debug(rfs4_dbe_t * entry)942d216dff5SRobert Mastors rfs4_dbe_debug(rfs4_dbe_t *entry)
9437c478bd9Sstevel@tonic-gate {
944d216dff5SRobert Mastors cmn_err(CE_NOTE, "Entry %p from table %s",
945d216dff5SRobert Mastors (void *)entry, entry->dbe_table->dbt_name);
946d216dff5SRobert Mastors cmn_err(CE_CONT, "\trefcnt = %d id = %d",
947d216dff5SRobert Mastors entry->dbe_refcnt, entry->dbe_id);
9487c478bd9Sstevel@tonic-gate }
9497c478bd9Sstevel@tonic-gate #endif
950