1/*
2 * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3 * Authors: Doug Rabson <dfr@rabson.org>
4 * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28/*
29 * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
30 * Copyright (c) 2012 by Delphix. All rights reserved.
31 */
32
33/*
34 * NFS LockManager, start/stop, support functions, etc.
35 * Most of the interesting code is here.
36 *
37 * Source code derived from FreeBSD nlm_prot_impl.c
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/thread.h>
43#include <sys/fcntl.h>
44#include <sys/flock.h>
45#include <sys/mount.h>
46#include <sys/priv.h>
47#include <sys/proc.h>
48#include <sys/share.h>
49#include <sys/socket.h>
50#include <sys/syscall.h>
51#include <sys/syslog.h>
52#include <sys/systm.h>
53#include <sys/class.h>
54#include <sys/unistd.h>
55#include <sys/vnode.h>
56#include <sys/vfs.h>
57#include <sys/queue.h>
58#include <sys/bitmap.h>
59#include <sys/sdt.h>
60#include <netinet/in.h>
61
62#include <rpc/rpc.h>
63#include <rpc/xdr.h>
64#include <rpc/pmap_prot.h>
65#include <rpc/pmap_clnt.h>
66#include <rpc/rpcb_prot.h>
67
68#include <rpcsvc/nlm_prot.h>
69#include <rpcsvc/sm_inter.h>
70#include <rpcsvc/nsm_addr.h>
71
72#include <nfs/nfs.h>
73#include <nfs/nfs_clnt.h>
74#include <nfs/export.h>
75#include <nfs/rnode.h>
76#include <nfs/lm.h>
77
78#include "nlm_impl.h"
79
80struct nlm_knc {
81	struct knetconfig	n_knc;
82	const char		*n_netid;
83};
84
85/*
86 * Number of attempts NLM tries to obtain RPC binding
87 * of local statd.
88 */
89#define	NLM_NSM_RPCBIND_RETRIES 10
90
91/*
92 * Timeout (in seconds) NLM waits before making another
93 * attempt to obtain RPC binding of local statd.
94 */
95#define	NLM_NSM_RPCBIND_TIMEOUT 5
96
97/*
98 * Total number of sysids in NLM sysid bitmap
99 */
100#define	NLM_BMAP_NITEMS	(LM_SYSID_MAX + 1)
101
102/*
103 * Number of ulong_t words in bitmap that is used
104 * for allocation of sysid numbers.
105 */
106#define	NLM_BMAP_WORDS  (NLM_BMAP_NITEMS / BT_NBIPUL)
107
108/*
109 * Given an integer x, the macro returns
110 * -1 if x is negative,
111 *  0 if x is zero
112 *  1 if x is positive
113 */
114#define	SIGN(x) (((x) > 0) - ((x) < 0))
115
116#define	ARRSIZE(arr)	(sizeof (arr) / sizeof ((arr)[0]))
117#define	NLM_KNCS	ARRSIZE(nlm_netconfigs)
118
119krwlock_t lm_lck;
120
121/*
122 * Zero timeout for asynchronous NLM RPC operations
123 */
124static const struct timeval nlm_rpctv_zero = { 0,  0 };
125
126/*
127 * List of all Zone globals nlm_globals instences
128 * linked together.
129 */
130static struct nlm_globals_list nlm_zones_list; /* (g) */
131
132/*
133 * NLM kmem caches
134 */
135static struct kmem_cache *nlm_hosts_cache = NULL;
136static struct kmem_cache *nlm_vhold_cache = NULL;
137
138/*
139 * A bitmap for allocation of new sysids.
140 * Sysid is a unique number between LM_SYSID
141 * and LM_SYSID_MAX. Sysid represents unique remote
142 * host that does file locks on the given host.
143 */
144static ulong_t	nlm_sysid_bmap[NLM_BMAP_WORDS];	/* (g) */
145static int	nlm_sysid_nidx;			/* (g) */
146
147/*
148 * RPC service registration for all transports
149 */
150static SVC_CALLOUT nlm_svcs[] = {
151	{ NLM_PROG, 4, 4, nlm_prog_4 },	/* NLM4_VERS */
152	{ NLM_PROG, 1, 3, nlm_prog_3 }	/* NLM_VERS - NLM_VERSX */
153};
154
155static SVC_CALLOUT_TABLE nlm_sct = {
156	ARRSIZE(nlm_svcs),
157	FALSE,
158	nlm_svcs
159};
160
161/*
162 * Static table of all netid/knetconfig network
163 * lock manager can work with. nlm_netconfigs table
164 * is used when we need to get valid knetconfig by
165 * netid and vice versa.
166 *
167 * Knetconfigs are activated either by the call from
168 * user-space lockd daemon (server side) or by taking
169 * knetconfig from NFS mountinfo (client side)
170 */
171static struct nlm_knc nlm_netconfigs[] = { /* (g) */
172	/* UDP */
173	{
174		{ NC_TPI_CLTS, NC_INET, NC_UDP, NODEV },
175		"udp",
176	},
177	/* TCP */
178	{
179		{ NC_TPI_COTS_ORD, NC_INET, NC_TCP, NODEV },
180		"tcp",
181	},
182	/* UDP over IPv6 */
183	{
184		{ NC_TPI_CLTS, NC_INET6, NC_UDP, NODEV },
185		"udp6",
186	},
187	/* TCP over IPv6 */
188	{
189		{ NC_TPI_COTS_ORD, NC_INET6, NC_TCP, NODEV },
190		"tcp6",
191	},
192	/* ticlts (loopback over UDP) */
193	{
194		{ NC_TPI_CLTS, NC_LOOPBACK, NC_NOPROTO, NODEV },
195		"ticlts",
196	},
197	/* ticotsord (loopback over TCP) */
198	{
199		{ NC_TPI_COTS_ORD, NC_LOOPBACK, NC_NOPROTO, NODEV },
200		"ticotsord",
201	},
202};
203
204/*
205 * NLM misc. function
206 */
207static void nlm_copy_netbuf(struct netbuf *, struct netbuf *);
208static int nlm_netbuf_addrs_cmp(struct netbuf *, struct netbuf *);
209static void nlm_kmem_reclaim(void *);
210static void nlm_pool_shutdown(void);
211static void nlm_suspend_zone(struct nlm_globals *);
212static void nlm_resume_zone(struct nlm_globals *);
213static void nlm_nsm_clnt_init(CLIENT *, struct nlm_nsm *);
214static void nlm_netbuf_to_netobj(struct netbuf *, int *, netobj *);
215
216/*
217 * NLM thread functions
218 */
219static void nlm_gc(struct nlm_globals *);
220static void nlm_reclaimer(struct nlm_host *);
221
222/*
223 * NLM NSM functions
224 */
225static int nlm_init_local_knc(struct knetconfig *);
226static int nlm_nsm_init_local(struct nlm_nsm *);
227static int nlm_nsm_init(struct nlm_nsm *, struct knetconfig *, struct netbuf *);
228static void nlm_nsm_fini(struct nlm_nsm *);
229static enum clnt_stat nlm_nsm_simu_crash(struct nlm_nsm *);
230static enum clnt_stat nlm_nsm_stat(struct nlm_nsm *, int32_t *);
231static enum clnt_stat nlm_nsm_mon(struct nlm_nsm *, char *, uint16_t);
232static enum clnt_stat nlm_nsm_unmon(struct nlm_nsm *, char *);
233
234/*
235 * NLM host functions
236 */
237static int nlm_host_ctor(void *, void *, int);
238static void nlm_host_dtor(void *, void *);
239static void nlm_host_destroy(struct nlm_host *);
240static struct nlm_host *nlm_host_create(char *, const char *,
241    struct knetconfig *, struct netbuf *);
242static struct nlm_host *nlm_host_find_locked(struct nlm_globals *,
243    const char *, struct netbuf *, avl_index_t *);
244static void nlm_host_unregister(struct nlm_globals *, struct nlm_host *);
245static void nlm_host_gc_vholds(struct nlm_host *);
246static bool_t nlm_host_has_srv_locks(struct nlm_host *);
247static bool_t nlm_host_has_cli_locks(struct nlm_host *);
248static bool_t nlm_host_has_locks(struct nlm_host *);
249
250/*
251 * NLM vhold functions
252 */
253static int nlm_vhold_ctor(void *, void *, int);
254static void nlm_vhold_dtor(void *, void *);
255static void nlm_vhold_destroy(struct nlm_host *,
256    struct nlm_vhold *);
257static bool_t nlm_vhold_busy(struct nlm_host *, struct nlm_vhold *);
258static void nlm_vhold_clean(struct nlm_vhold *, int);
259
260/*
261 * NLM client/server sleeping locks/share reservation functions
262 */
263struct nlm_slreq *nlm_slreq_find_locked(struct nlm_host *,
264    struct nlm_vhold *, struct flock64 *);
265static struct nlm_shres *nlm_shres_create_item(struct shrlock *, vnode_t *);
266static void nlm_shres_destroy_item(struct nlm_shres *);
267static bool_t nlm_shres_equal(struct shrlock *, struct shrlock *);
268
269/*
270 * NLM initialization functions.
271 */
272void
273nlm_init(void)
274{
275	nlm_hosts_cache = kmem_cache_create("nlm_host_cache",
276	    sizeof (struct nlm_host), 0, nlm_host_ctor, nlm_host_dtor,
277	    nlm_kmem_reclaim, NULL, NULL, 0);
278
279	nlm_vhold_cache = kmem_cache_create("nlm_vhold_cache",
280	    sizeof (struct nlm_vhold), 0, nlm_vhold_ctor, nlm_vhold_dtor,
281	    NULL, NULL, NULL, 0);
282
283	nlm_rpc_init();
284	TAILQ_INIT(&nlm_zones_list);
285
286	/* initialize sysids bitmap */
287	bzero(nlm_sysid_bmap, sizeof (nlm_sysid_bmap));
288	nlm_sysid_nidx = 1;
289
290	/*
291	 * Reserv the sysid #0, because it's associated
292	 * with local locks only. Don't let to allocate
293	 * it for remote locks.
294	 */
295	BT_SET(nlm_sysid_bmap, 0);
296}
297
298void
299nlm_globals_register(struct nlm_globals *g)
300{
301	rw_enter(&lm_lck, RW_WRITER);
302	TAILQ_INSERT_TAIL(&nlm_zones_list, g, nlm_link);
303	rw_exit(&lm_lck);
304}
305
306void
307nlm_globals_unregister(struct nlm_globals *g)
308{
309	rw_enter(&lm_lck, RW_WRITER);
310	TAILQ_REMOVE(&nlm_zones_list, g, nlm_link);
311	rw_exit(&lm_lck);
312}
313
314/* ARGSUSED */
315static void
316nlm_kmem_reclaim(void *cdrarg)
317{
318	struct nlm_globals *g;
319
320	rw_enter(&lm_lck, RW_READER);
321	TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
322		cv_broadcast(&g->nlm_gc_sched_cv);
323
324	rw_exit(&lm_lck);
325}
326
327/*
328 * NLM garbage collector thread (GC).
329 *
330 * NLM GC periodically checks whether there're any host objects
331 * that can be cleaned up. It also releases stale vnodes that
332 * live on the server side (under protection of vhold objects).
333 *
334 * NLM host objects are cleaned up from GC thread because
335 * operations helping us to determine whether given host has
336 * any locks can be quite expensive and it's not good to call
337 * them every time the very last reference to the host is dropped.
338 * Thus we use "lazy" approach for hosts cleanup.
339 *
340 * The work of GC is to release stale vnodes on the server side
341 * and destroy hosts that haven't any locks and any activity for
342 * some time (i.e. idle hosts).
343 */
344static void
345nlm_gc(struct nlm_globals *g)
346{
347	struct nlm_host *hostp;
348	clock_t now, idle_period;
349
350	idle_period = SEC_TO_TICK(g->cn_idle_tmo);
351	mutex_enter(&g->lock);
352	for (;;) {
353		/*
354		 * GC thread can be explicitly scheduled from
355		 * memory reclamation function.
356		 */
357		(void) cv_timedwait(&g->nlm_gc_sched_cv, &g->lock,
358		    ddi_get_lbolt() + idle_period);
359
360		/*
361		 * NLM is shutting down, time to die.
362		 */
363		if (g->run_status == NLM_ST_STOPPING)
364			break;
365
366		now = ddi_get_lbolt();
367		DTRACE_PROBE2(gc__start, struct nlm_globals *, g,
368		    clock_t, now);
369
370		/*
371		 * Find all obviously unused vholds and destroy them.
372		 */
373		for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
374		    hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
375			struct nlm_vhold *nvp;
376
377			mutex_enter(&hostp->nh_lock);
378
379			nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
380			while (nvp != NULL) {
381				struct nlm_vhold *new_nvp;
382
383				new_nvp = TAILQ_NEXT(nvp, nv_link);
384
385				/*
386				 * If these conditions are met, the vhold is
387				 * obviously unused and we will destroy it.  In
388				 * a case either v_filocks and/or v_shrlocks is
389				 * non-NULL the vhold might still be unused by
390				 * the host, but it is expensive to check that.
391				 * We defer such check until the host is idle.
392				 * The expensive check is done below without
393				 * the global lock held.
394				 */
395				if (nvp->nv_refcnt == 0 &&
396				    nvp->nv_vp->v_filocks == NULL &&
397				    nvp->nv_vp->v_shrlocks == NULL) {
398					nlm_vhold_destroy(hostp, nvp);
399				}
400
401				nvp = new_nvp;
402			}
403
404			mutex_exit(&hostp->nh_lock);
405		}
406
407		/*
408		 * Handle all hosts that are unused at the moment
409		 * until we meet one with idle timeout in future.
410		 */
411		while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
412			bool_t has_locks;
413
414			if (hostp->nh_idle_timeout > now)
415				break;
416
417			/*
418			 * Drop global lock while doing expensive work
419			 * on this host. We'll re-check any conditions
420			 * that might change after retaking the global
421			 * lock.
422			 */
423			mutex_exit(&g->lock);
424			mutex_enter(&hostp->nh_lock);
425
426			/*
427			 * nlm_globals lock was dropped earlier because
428			 * garbage collecting of vholds and checking whether
429			 * host has any locks/shares are expensive operations.
430			 */
431			nlm_host_gc_vholds(hostp);
432			has_locks = nlm_host_has_locks(hostp);
433
434			mutex_exit(&hostp->nh_lock);
435			mutex_enter(&g->lock);
436
437			/*
438			 * While we were doing expensive operations
439			 * outside of nlm_globals critical section,
440			 * somebody could take the host and remove it
441			 * from the idle list.  Whether its been
442			 * reinserted or not, our information about
443			 * the host is outdated, and we should take no
444			 * further action.
445			 */
446			if ((hostp->nh_flags & NLM_NH_INIDLE) == 0 ||
447			    hostp->nh_idle_timeout > now)
448				continue;
449
450			/*
451			 * If the host has locks we have to renew the
452			 * host's timeout and put it at the end of LRU
453			 * list.
454			 */
455			if (has_locks) {
456				TAILQ_REMOVE(&g->nlm_idle_hosts,
457				    hostp, nh_link);
458				hostp->nh_idle_timeout = now + idle_period;
459				TAILQ_INSERT_TAIL(&g->nlm_idle_hosts,
460				    hostp, nh_link);
461				continue;
462			}
463
464			/*
465			 * We're here if all the following conditions hold:
466			 * 1) Host hasn't any locks or share reservations
467			 * 2) Host is unused
468			 * 3) Host wasn't touched by anyone at least for
469			 *    g->cn_idle_tmo seconds.
470			 *
471			 * So, now we can destroy it.
472			 */
473			nlm_host_unregister(g, hostp);
474			mutex_exit(&g->lock);
475
476			nlm_host_unmonitor(g, hostp);
477			nlm_host_destroy(hostp);
478			mutex_enter(&g->lock);
479			if (g->run_status == NLM_ST_STOPPING)
480				break;
481
482		}
483
484		DTRACE_PROBE(gc__end);
485	}
486
487	DTRACE_PROBE1(gc__exit, struct nlm_globals *, g);
488
489	/* Let others know that GC has died */
490	g->nlm_gc_thread = NULL;
491	mutex_exit(&g->lock);
492
493	cv_broadcast(&g->nlm_gc_finish_cv);
494	zthread_exit();
495}
496
497/*
498 * Thread reclaim locks/shares acquired by the client side
499 * on the given server represented by hostp.
500 */
501static void
502nlm_reclaimer(struct nlm_host *hostp)
503{
504	struct nlm_globals *g;
505
506	mutex_enter(&hostp->nh_lock);
507	hostp->nh_reclaimer = curthread;
508	mutex_exit(&hostp->nh_lock);
509
510	g = zone_getspecific(nlm_zone_key, curzone);
511	nlm_reclaim_client(g, hostp);
512
513	mutex_enter(&hostp->nh_lock);
514	hostp->nh_flags &= ~NLM_NH_RECLAIM;
515	hostp->nh_reclaimer = NULL;
516	cv_broadcast(&hostp->nh_recl_cv);
517	mutex_exit(&hostp->nh_lock);
518
519	/*
520	 * Host was explicitly referenced before
521	 * nlm_reclaim() was called, release it
522	 * here.
523	 */
524	nlm_host_release(g, hostp);
525	zthread_exit();
526}
527
528/*
529 * Copy a struct netobj.  (see xdr.h)
530 */
531void
532nlm_copy_netobj(struct netobj *dst, struct netobj *src)
533{
534	dst->n_len = src->n_len;
535	dst->n_bytes = kmem_alloc(src->n_len, KM_SLEEP);
536	bcopy(src->n_bytes, dst->n_bytes, src->n_len);
537}
538
539/*
540 * An NLM specificw replacement for clnt_call().
541 * nlm_clnt_call() is used by all RPC functions generated
542 * from nlm_prot.x specification. The function is aware
543 * about some pitfalls of NLM RPC procedures and has a logic
544 * that handles them properly.
545 */
546enum clnt_stat
547nlm_clnt_call(CLIENT *clnt, rpcproc_t procnum, xdrproc_t xdr_args,
548    caddr_t argsp, xdrproc_t xdr_result, caddr_t resultp, struct timeval wait)
549{
550	k_sigset_t oldmask;
551	enum clnt_stat stat;
552	bool_t sig_blocked = FALSE;
553
554	/*
555	 * If NLM RPC procnum is one of the NLM _RES procedures
556	 * that are used to reply to asynchronous NLM RPC
557	 * (MSG calls), explicitly set RPC timeout to zero.
558	 * Client doesn't send a reply to RES procedures, so
559	 * we don't need to wait anything.
560	 *
561	 * NOTE: we ignore NLM4_*_RES procnums because they are
562	 * equal to NLM_*_RES numbers.
563	 */
564	if (procnum >= NLM_TEST_RES && procnum <= NLM_GRANTED_RES)
565		wait = nlm_rpctv_zero;
566
567	/*
568	 * We need to block signals in case of NLM_CANCEL RPC
569	 * in order to prevent interruption of network RPC
570	 * calls.
571	 */
572	if (procnum == NLM_CANCEL) {
573		k_sigset_t newmask;
574
575		sigfillset(&newmask);
576		sigreplace(&newmask, &oldmask);
577		sig_blocked = TRUE;
578	}
579
580	stat = clnt_call(clnt, procnum, xdr_args,
581	    argsp, xdr_result, resultp, wait);
582
583	/*
584	 * Restore signal mask back if signals were blocked
585	 */
586	if (sig_blocked)
587		sigreplace(&oldmask, (k_sigset_t *)NULL);
588
589	return (stat);
590}
591
592/*
593 * Suspend NLM client/server in the given zone.
594 *
595 * During suspend operation we mark those hosts
596 * that have any locks with NLM_NH_SUSPEND flags,
597 * so that they can be checked later, when resume
598 * operation occurs.
599 */
600static void
601nlm_suspend_zone(struct nlm_globals *g)
602{
603	struct nlm_host *hostp;
604	struct nlm_host_list all_hosts;
605
606	/*
607	 * Note that while we're doing suspend, GC thread is active
608	 * and it can destroy some hosts while we're walking through
609	 * the hosts tree. To prevent that and make suspend logic
610	 * a bit more simple we put all hosts to local "all_hosts"
611	 * list and increment reference counter of each host.
612	 * This guaranties that no hosts will be released while
613	 * we're doing suspend.
614	 * NOTE: reference of each host must be dropped during
615	 * resume operation.
616	 */
617	TAILQ_INIT(&all_hosts);
618	mutex_enter(&g->lock);
619	for (hostp = avl_first(&g->nlm_hosts_tree); hostp != NULL;
620	    hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp)) {
621		/*
622		 * If host is idle, remove it from idle list and
623		 * clear idle flag. That is done to prevent GC
624		 * from touching this host.
625		 */
626		if (hostp->nh_flags & NLM_NH_INIDLE) {
627			TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
628			hostp->nh_flags &= ~NLM_NH_INIDLE;
629		}
630
631		hostp->nh_refs++;
632		TAILQ_INSERT_TAIL(&all_hosts, hostp, nh_link);
633	}
634
635	/*
636	 * Now we can walk through all hosts on the system
637	 * with zone globals lock released. The fact the
638	 * we have taken a reference to each host guaranties
639	 * that no hosts can be destroyed during that process.
640	 */
641	mutex_exit(&g->lock);
642	while ((hostp = TAILQ_FIRST(&all_hosts)) != NULL) {
643		mutex_enter(&hostp->nh_lock);
644		if (nlm_host_has_locks(hostp))
645			hostp->nh_flags |= NLM_NH_SUSPEND;
646
647		mutex_exit(&hostp->nh_lock);
648		TAILQ_REMOVE(&all_hosts, hostp, nh_link);
649	}
650}
651
652/*
653 * Resume NLM hosts for the given zone.
654 *
655 * nlm_resume_zone() is called after hosts were suspended
656 * (see nlm_suspend_zone) and its main purpose to check
657 * whether remote locks owned by hosts are still in consistent
658 * state. If they aren't, resume function tries to reclaim
659 * locks (for client side hosts) and clean locks (for
660 * server side hosts).
661 */
662static void
663nlm_resume_zone(struct nlm_globals *g)
664{
665	struct nlm_host *hostp, *h_next;
666
667	mutex_enter(&g->lock);
668	hostp = avl_first(&g->nlm_hosts_tree);
669
670	/*
671	 * In nlm_suspend_zone() the reference counter of each
672	 * host was incremented, so we can safely iterate through
673	 * all hosts without worrying that any host we touch will
674	 * be removed at the moment.
675	 */
676	while (hostp != NULL) {
677		struct nlm_nsm nsm;
678		enum clnt_stat stat;
679		int32_t sm_state;
680		int error;
681		bool_t resume_failed = FALSE;
682
683		h_next = AVL_NEXT(&g->nlm_hosts_tree, hostp);
684		mutex_exit(&g->lock);
685
686		DTRACE_PROBE1(resume__host, struct nlm_host *, hostp);
687
688		/*
689		 * Suspend operation marked that the host doesn't
690		 * have any locks. Skip it.
691		 */
692		if (!(hostp->nh_flags & NLM_NH_SUSPEND))
693			goto cycle_end;
694
695		error = nlm_nsm_init(&nsm, &hostp->nh_knc, &hostp->nh_addr);
696		if (error != 0) {
697			NLM_ERR("Resume: Failed to contact to NSM of host %s "
698			    "[error=%d]\n", hostp->nh_name, error);
699			resume_failed = TRUE;
700			goto cycle_end;
701		}
702
703		stat = nlm_nsm_stat(&nsm, &sm_state);
704		if (stat != RPC_SUCCESS) {
705			NLM_ERR("Resume: Failed to call SM_STAT operation for "
706			    "host %s [stat=%d]\n", hostp->nh_name, stat);
707			resume_failed = TRUE;
708			nlm_nsm_fini(&nsm);
709			goto cycle_end;
710		}
711
712		if (sm_state != hostp->nh_state) {
713			/*
714			 * Current SM state of the host isn't equal
715			 * to the one host had when it was suspended.
716			 * Probably it was rebooted. Try to reclaim
717			 * locks if the host has any on its client side.
718			 * Also try to clean up its server side locks
719			 * (if the host has any).
720			 */
721			nlm_host_notify_client(hostp, sm_state);
722			nlm_host_notify_server(hostp, sm_state);
723		}
724
725		nlm_nsm_fini(&nsm);
726
727cycle_end:
728		if (resume_failed) {
729			/*
730			 * Resume failed for the given host.
731			 * Just clean up all resources it owns.
732			 */
733			nlm_host_notify_server(hostp, 0);
734			nlm_client_cancel_all(g, hostp);
735		}
736
737		hostp->nh_flags &= ~NLM_NH_SUSPEND;
738		nlm_host_release(g, hostp);
739		hostp = h_next;
740		mutex_enter(&g->lock);
741	}
742
743	mutex_exit(&g->lock);
744}
745
746/*
747 * NLM functions responsible for operations on NSM handle.
748 */
749
750/*
751 * Initialize knetconfig that is used for communication
752 * with local statd via loopback interface.
753 */
754static int
755nlm_init_local_knc(struct knetconfig *knc)
756{
757	int error;
758	vnode_t *vp;
759
760	bzero(knc, sizeof (*knc));
761	error = lookupname("/dev/tcp", UIO_SYSSPACE,
762	    FOLLOW, NULLVPP, &vp);
763	if (error != 0)
764		return (error);
765
766	knc->knc_semantics = NC_TPI_COTS;
767	knc->knc_protofmly = NC_INET;
768	knc->knc_proto = NC_TCP;
769	knc->knc_rdev = vp->v_rdev;
770	VN_RELE(vp);
771
772
773	return (0);
774}
775
776/*
777 * Initialize NSM handle that will be used to talk
778 * to local statd via loopback interface.
779 */
780static int
781nlm_nsm_init_local(struct nlm_nsm *nsm)
782{
783	int error;
784	struct knetconfig knc;
785	struct sockaddr_in sin;
786	struct netbuf nb;
787
788	error = nlm_init_local_knc(&knc);
789	if (error != 0)
790		return (error);
791
792	bzero(&sin, sizeof (sin));
793	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
794	sin.sin_family = AF_INET;
795
796	nb.buf = (char *)&sin;
797	nb.len = nb.maxlen = sizeof (sin);
798
799	return (nlm_nsm_init(nsm, &knc, &nb));
800}
801
802/*
803 * Initialize NSM handle used for talking to statd
804 */
805static int
806nlm_nsm_init(struct nlm_nsm *nsm, struct knetconfig *knc, struct netbuf *nb)
807{
808	enum clnt_stat stat;
809	int error, retries;
810
811	bzero(nsm, sizeof (*nsm));
812	nsm->ns_knc = *knc;
813	nlm_copy_netbuf(&nsm->ns_addr, nb);
814
815	/*
816	 * Try several times to get the port of statd service,
817	 * If rpcbind_getaddr returns  RPC_PROGNOTREGISTERED,
818	 * retry an attempt, but wait for NLM_NSM_RPCBIND_TIMEOUT
819	 * seconds berofore.
820	 */
821	for (retries = 0; retries < NLM_NSM_RPCBIND_RETRIES; retries++) {
822		stat = rpcbind_getaddr(&nsm->ns_knc, SM_PROG,
823		    SM_VERS, &nsm->ns_addr);
824		if (stat != RPC_SUCCESS) {
825			if (stat == RPC_PROGNOTREGISTERED) {
826				delay(SEC_TO_TICK(NLM_NSM_RPCBIND_TIMEOUT));
827				continue;
828			}
829		}
830
831		break;
832	}
833
834	if (stat != RPC_SUCCESS) {
835		DTRACE_PROBE2(rpcbind__error, enum clnt_stat, stat,
836		    int, retries);
837		error = ENOENT;
838		goto error;
839	}
840
841	/*
842	 * Create an RPC handle that'll be used for communication with local
843	 * statd using the status monitor protocol.
844	 */
845	error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, SM_PROG, SM_VERS,
846	    0, NLM_RPC_RETRIES, zone_kcred(), &nsm->ns_handle);
847	if (error != 0)
848		goto error;
849
850	/*
851	 * Create an RPC handle that'll be used for communication with the
852	 * local statd using the address registration protocol.
853	 */
854	error = clnt_tli_kcreate(&nsm->ns_knc, &nsm->ns_addr, NSM_ADDR_PROGRAM,
855	    NSM_ADDR_V1, 0, NLM_RPC_RETRIES, zone_kcred(),
856	    &nsm->ns_addr_handle);
857	if (error != 0)
858		goto error;
859
860	sema_init(&nsm->ns_sem, 1, NULL, SEMA_DEFAULT, NULL);
861	return (0);
862
863error:
864	kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
865	if (nsm->ns_handle) {
866		ASSERT(nsm->ns_handle->cl_auth != NULL);
867		auth_destroy(nsm->ns_handle->cl_auth);
868		CLNT_DESTROY(nsm->ns_handle);
869	}
870
871	return (error);
872}
873
874static void
875nlm_nsm_fini(struct nlm_nsm *nsm)
876{
877	kmem_free(nsm->ns_addr.buf, nsm->ns_addr.maxlen);
878	if (nsm->ns_addr_handle->cl_auth != NULL)
879		auth_destroy(nsm->ns_addr_handle->cl_auth);
880	CLNT_DESTROY(nsm->ns_addr_handle);
881	nsm->ns_addr_handle = NULL;
882	if (nsm->ns_handle->cl_auth != NULL)
883		auth_destroy(nsm->ns_handle->cl_auth);
884	CLNT_DESTROY(nsm->ns_handle);
885	nsm->ns_handle = NULL;
886	sema_destroy(&nsm->ns_sem);
887}
888
889static enum clnt_stat
890nlm_nsm_simu_crash(struct nlm_nsm *nsm)
891{
892	enum clnt_stat stat;
893
894	sema_p(&nsm->ns_sem);
895	nlm_nsm_clnt_init(nsm->ns_handle, nsm);
896	stat = sm_simu_crash_1(NULL, NULL, nsm->ns_handle);
897	sema_v(&nsm->ns_sem);
898
899	return (stat);
900}
901
902static enum clnt_stat
903nlm_nsm_stat(struct nlm_nsm *nsm, int32_t *out_stat)
904{
905	struct sm_name args;
906	struct sm_stat_res res;
907	enum clnt_stat stat;
908
909	args.mon_name = uts_nodename();
910	bzero(&res, sizeof (res));
911
912	sema_p(&nsm->ns_sem);
913	nlm_nsm_clnt_init(nsm->ns_handle, nsm);
914	stat = sm_stat_1(&args, &res, nsm->ns_handle);
915	sema_v(&nsm->ns_sem);
916
917	if (stat == RPC_SUCCESS)
918		*out_stat = res.state;
919
920	return (stat);
921}
922
923static enum clnt_stat
924nlm_nsm_mon(struct nlm_nsm *nsm, char *hostname, uint16_t priv)
925{
926	struct mon args;
927	struct sm_stat_res res;
928	enum clnt_stat stat;
929
930	bzero(&args, sizeof (args));
931	bzero(&res, sizeof (res));
932
933	args.mon_id.mon_name = hostname;
934	args.mon_id.my_id.my_name = uts_nodename();
935	args.mon_id.my_id.my_prog = NLM_PROG;
936	args.mon_id.my_id.my_vers = NLM_SM;
937	args.mon_id.my_id.my_proc = NLM_SM_NOTIFY1;
938	bcopy(&priv, args.priv, sizeof (priv));
939
940	sema_p(&nsm->ns_sem);
941	nlm_nsm_clnt_init(nsm->ns_handle, nsm);
942	stat = sm_mon_1(&args, &res, nsm->ns_handle);
943	sema_v(&nsm->ns_sem);
944
945	return (stat);
946}
947
948static enum clnt_stat
949nlm_nsm_unmon(struct nlm_nsm *nsm, char *hostname)
950{
951	struct mon_id args;
952	struct sm_stat res;
953	enum clnt_stat stat;
954
955	bzero(&args, sizeof (args));
956	bzero(&res, sizeof (res));
957
958	args.mon_name = hostname;
959	args.my_id.my_name = uts_nodename();
960	args.my_id.my_prog = NLM_PROG;
961	args.my_id.my_vers = NLM_SM;
962	args.my_id.my_proc = NLM_SM_NOTIFY1;
963
964	sema_p(&nsm->ns_sem);
965	nlm_nsm_clnt_init(nsm->ns_handle, nsm);
966	stat = sm_unmon_1(&args, &res, nsm->ns_handle);
967	sema_v(&nsm->ns_sem);
968
969	return (stat);
970}
971
972static enum clnt_stat
973nlm_nsmaddr_reg(struct nlm_nsm *nsm, char *name, int family, netobj *address)
974{
975	struct reg1args args = { 0 };
976	struct reg1res res = { 0 };
977	enum clnt_stat stat;
978
979	args.family = family;
980	args.name = name;
981	args.address = *address;
982
983	sema_p(&nsm->ns_sem);
984	nlm_nsm_clnt_init(nsm->ns_addr_handle, nsm);
985	stat = nsmaddrproc1_reg_1(&args, &res, nsm->ns_addr_handle);
986	sema_v(&nsm->ns_sem);
987
988	return (stat);
989}
990
991/*
992 * Get NLM vhold object corresponding to vnode "vp".
993 * If no such object was found, create a new one.
994 *
995 * The purpose of this function is to associate vhold
996 * object with given vnode, so that:
997 * 1) vnode is hold (VN_HOLD) while vhold object is alive.
998 * 2) host has a track of all vnodes it touched by lock
999 *    or share operations. These vnodes are accessible
1000 *    via collection of vhold objects.
1001 */
1002struct nlm_vhold *
1003nlm_vhold_get(struct nlm_host *hostp, vnode_t *vp)
1004{
1005	struct nlm_vhold *nvp, *new_nvp = NULL;
1006
1007	mutex_enter(&hostp->nh_lock);
1008	nvp = nlm_vhold_find_locked(hostp, vp);
1009	if (nvp != NULL)
1010		goto out;
1011
1012	/* nlm_vhold wasn't found, then create a new one */
1013	mutex_exit(&hostp->nh_lock);
1014	new_nvp = kmem_cache_alloc(nlm_vhold_cache, KM_SLEEP);
1015
1016	/*
1017	 * Check if another thread has already
1018	 * created the same nlm_vhold.
1019	 */
1020	mutex_enter(&hostp->nh_lock);
1021	nvp = nlm_vhold_find_locked(hostp, vp);
1022	if (nvp == NULL) {
1023		nvp = new_nvp;
1024		new_nvp = NULL;
1025
1026		TAILQ_INIT(&nvp->nv_slreqs);
1027		nvp->nv_vp = vp;
1028		nvp->nv_refcnt = 1;
1029		VN_HOLD(nvp->nv_vp);
1030
1031		VERIFY(mod_hash_insert(hostp->nh_vholds_by_vp,
1032		    (mod_hash_key_t)vp, (mod_hash_val_t)nvp) == 0);
1033		TAILQ_INSERT_TAIL(&hostp->nh_vholds_list, nvp, nv_link);
1034	}
1035
1036out:
1037	mutex_exit(&hostp->nh_lock);
1038	if (new_nvp != NULL)
1039		kmem_cache_free(nlm_vhold_cache, new_nvp);
1040
1041	return (nvp);
1042}
1043
1044/*
1045 * Drop a reference to vhold object nvp.
1046 */
1047void
1048nlm_vhold_release(struct nlm_host *hostp, struct nlm_vhold *nvp)
1049{
1050	if (nvp == NULL)
1051		return;
1052
1053	mutex_enter(&hostp->nh_lock);
1054	ASSERT(nvp->nv_refcnt > 0);
1055	nvp->nv_refcnt--;
1056
1057	/*
1058	 * If these conditions are met, the vhold is obviously unused and we
1059	 * will destroy it.  In a case either v_filocks and/or v_shrlocks is
1060	 * non-NULL the vhold might still be unused by the host, but it is
1061	 * expensive to check that.  We defer such check until the host is
1062	 * idle.  The expensive check is done in the NLM garbage collector.
1063	 */
1064	if (nvp->nv_refcnt == 0 &&
1065	    nvp->nv_vp->v_filocks == NULL &&
1066	    nvp->nv_vp->v_shrlocks == NULL) {
1067		nlm_vhold_destroy(hostp, nvp);
1068	}
1069
1070	mutex_exit(&hostp->nh_lock);
1071}
1072
1073/*
1074 * Clean all locks and share reservations on the
1075 * given vhold object that were acquired by the
1076 * given sysid
1077 */
1078static void
1079nlm_vhold_clean(struct nlm_vhold *nvp, int sysid)
1080{
1081	cleanlocks(nvp->nv_vp, IGN_PID, sysid);
1082	cleanshares_by_sysid(nvp->nv_vp, sysid);
1083}
1084
1085static void
1086nlm_vhold_destroy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1087{
1088	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1089
1090	ASSERT(nvp->nv_refcnt == 0);
1091	ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1092
1093	VERIFY(mod_hash_remove(hostp->nh_vholds_by_vp,
1094	    (mod_hash_key_t)nvp->nv_vp,
1095	    (mod_hash_val_t)&nvp) == 0);
1096
1097	TAILQ_REMOVE(&hostp->nh_vholds_list, nvp, nv_link);
1098	VN_RELE(nvp->nv_vp);
1099	nvp->nv_vp = NULL;
1100
1101	kmem_cache_free(nlm_vhold_cache, nvp);
1102}
1103
1104/*
1105 * Return TRUE if the given vhold is busy.
1106 * Vhold object is considered to be "busy" when
1107 * all the following conditions hold:
1108 * 1) No one uses it at the moment;
1109 * 2) It hasn't any locks;
1110 * 3) It hasn't any share reservations;
1111 */
1112static bool_t
1113nlm_vhold_busy(struct nlm_host *hostp, struct nlm_vhold *nvp)
1114{
1115	vnode_t *vp;
1116	int sysid;
1117
1118	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1119
1120	if (nvp->nv_refcnt > 0)
1121		return (TRUE);
1122
1123	vp = nvp->nv_vp;
1124	sysid = hostp->nh_sysid;
1125	if (flk_has_remote_locks_for_sysid(vp, sysid) ||
1126	    shr_has_remote_shares(vp, sysid))
1127		return (TRUE);
1128
1129	return (FALSE);
1130}
1131
1132/* ARGSUSED */
1133static int
1134nlm_vhold_ctor(void *datap, void *cdrarg, int kmflags)
1135{
1136	struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1137
1138	bzero(nvp, sizeof (*nvp));
1139	return (0);
1140}
1141
1142/* ARGSUSED */
1143static void
1144nlm_vhold_dtor(void *datap, void *cdrarg)
1145{
1146	struct nlm_vhold *nvp = (struct nlm_vhold *)datap;
1147
1148	ASSERT(nvp->nv_refcnt == 0);
1149	ASSERT(TAILQ_EMPTY(&nvp->nv_slreqs));
1150	ASSERT(nvp->nv_vp == NULL);
1151}
1152
1153struct nlm_vhold *
1154nlm_vhold_find_locked(struct nlm_host *hostp, const vnode_t *vp)
1155{
1156	struct nlm_vhold *nvp = NULL;
1157
1158	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1159	(void) mod_hash_find(hostp->nh_vholds_by_vp,
1160	    (mod_hash_key_t)vp,
1161	    (mod_hash_val_t)&nvp);
1162
1163	if (nvp != NULL)
1164		nvp->nv_refcnt++;
1165
1166	return (nvp);
1167}
1168
1169/*
1170 * NLM host functions
1171 */
1172static void
1173nlm_copy_netbuf(struct netbuf *dst, struct netbuf *src)
1174{
1175	ASSERT(src->len <= src->maxlen);
1176
1177	dst->maxlen = src->maxlen;
1178	dst->len = src->len;
1179	dst->buf = kmem_zalloc(src->maxlen, KM_SLEEP);
1180	bcopy(src->buf, dst->buf, src->len);
1181}
1182
1183/* ARGSUSED */
1184static int
1185nlm_host_ctor(void *datap, void *cdrarg, int kmflags)
1186{
1187	struct nlm_host *hostp = (struct nlm_host *)datap;
1188
1189	bzero(hostp, sizeof (*hostp));
1190	return (0);
1191}
1192
1193/* ARGSUSED */
1194static void
1195nlm_host_dtor(void *datap, void *cdrarg)
1196{
1197	struct nlm_host *hostp = (struct nlm_host *)datap;
1198	ASSERT(hostp->nh_refs == 0);
1199}
1200
1201static void
1202nlm_host_unregister(struct nlm_globals *g, struct nlm_host *hostp)
1203{
1204	ASSERT(hostp->nh_refs == 0);
1205	ASSERT(hostp->nh_flags & NLM_NH_INIDLE);
1206
1207	avl_remove(&g->nlm_hosts_tree, hostp);
1208	VERIFY(mod_hash_remove(g->nlm_hosts_hash,
1209	    (mod_hash_key_t)(uintptr_t)hostp->nh_sysid,
1210	    (mod_hash_val_t)&hostp) == 0);
1211	TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1212	hostp->nh_flags &= ~NLM_NH_INIDLE;
1213}
1214
1215/*
1216 * Free resources used by a host. This is called after the reference
1217 * count has reached zero so it doesn't need to worry about locks.
1218 */
1219static void
1220nlm_host_destroy(struct nlm_host *hostp)
1221{
1222	ASSERT(hostp->nh_name != NULL);
1223	ASSERT(hostp->nh_netid != NULL);
1224	ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1225
1226	strfree(hostp->nh_name);
1227	strfree(hostp->nh_netid);
1228	kmem_free(hostp->nh_addr.buf, hostp->nh_addr.maxlen);
1229
1230	if (hostp->nh_sysid != LM_NOSYSID)
1231		nlm_sysid_free(hostp->nh_sysid);
1232
1233	nlm_rpc_cache_destroy(hostp);
1234
1235	ASSERT(TAILQ_EMPTY(&hostp->nh_vholds_list));
1236	mod_hash_destroy_ptrhash(hostp->nh_vholds_by_vp);
1237
1238	mutex_destroy(&hostp->nh_lock);
1239	cv_destroy(&hostp->nh_rpcb_cv);
1240	cv_destroy(&hostp->nh_recl_cv);
1241
1242	kmem_cache_free(nlm_hosts_cache, hostp);
1243}
1244
1245/*
1246 * Cleanup SERVER-side state after a client restarts,
1247 * or becomes unresponsive, or whatever.
1248 *
1249 * We unlock any active locks owned by the host.
1250 * When rpc.lockd is shutting down,
1251 * this function is called with newstate set to zero
1252 * which allows us to cancel any pending async locks
1253 * and clear the locking state.
1254 *
1255 * When "state" is 0, we don't update host's state,
1256 * but cleanup all remote locks on the host.
1257 * It's useful to call this function for resources
1258 * cleanup.
1259 */
1260void
1261nlm_host_notify_server(struct nlm_host *hostp, int32_t state)
1262{
1263	struct nlm_vhold *nvp;
1264	struct nlm_slreq *slr;
1265	struct nlm_slreq_list slreqs2free;
1266
1267	TAILQ_INIT(&slreqs2free);
1268	mutex_enter(&hostp->nh_lock);
1269	if (state != 0)
1270		hostp->nh_state = state;
1271
1272	TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
1273
1274		/* cleanup sleeping requests at first */
1275		while ((slr = TAILQ_FIRST(&nvp->nv_slreqs)) != NULL) {
1276			TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
1277
1278			/*
1279			 * Instead of freeing cancelled sleeping request
1280			 * here, we add it to the linked list created
1281			 * on the stack in order to do all frees outside
1282			 * the critical section.
1283			 */
1284			TAILQ_INSERT_TAIL(&slreqs2free, slr, nsr_link);
1285		}
1286
1287		nvp->nv_refcnt++;
1288		mutex_exit(&hostp->nh_lock);
1289
1290		nlm_vhold_clean(nvp, hostp->nh_sysid);
1291
1292		mutex_enter(&hostp->nh_lock);
1293		nvp->nv_refcnt--;
1294	}
1295
1296	mutex_exit(&hostp->nh_lock);
1297	while ((slr = TAILQ_FIRST(&slreqs2free)) != NULL) {
1298		TAILQ_REMOVE(&slreqs2free, slr, nsr_link);
1299		kmem_free(slr, sizeof (*slr));
1300	}
1301}
1302
1303/*
1304 * Cleanup CLIENT-side state after a server restarts,
1305 * or becomes unresponsive, or whatever.
1306 *
1307 * This is called by the local NFS statd when we receive a
1308 * host state change notification.  (also nlm_svc_stopping)
1309 *
1310 * Deal with a server restart.  If we are stopping the
1311 * NLM service, we'll have newstate == 0, and will just
1312 * cancel all our client-side lock requests.  Otherwise,
1313 * start the "recovery" process to reclaim any locks
1314 * we hold on this server.
1315 */
1316void
1317nlm_host_notify_client(struct nlm_host *hostp, int32_t state)
1318{
1319	mutex_enter(&hostp->nh_lock);
1320	hostp->nh_state = state;
1321	if (hostp->nh_flags & NLM_NH_RECLAIM) {
1322		/*
1323		 * Either host's state is up to date or
1324		 * host is already in recovery.
1325		 */
1326		mutex_exit(&hostp->nh_lock);
1327		return;
1328	}
1329
1330	hostp->nh_flags |= NLM_NH_RECLAIM;
1331
1332	/*
1333	 * Host will be released by the recovery thread,
1334	 * thus we need to increment refcount.
1335	 */
1336	hostp->nh_refs++;
1337	mutex_exit(&hostp->nh_lock);
1338
1339	(void) zthread_create(NULL, 0, nlm_reclaimer,
1340	    hostp, 0, minclsyspri);
1341}
1342
1343/*
1344 * The function is called when NLM client detects that
1345 * server has entered in grace period and client needs
1346 * to wait until reclamation process (if any) does
1347 * its job.
1348 */
1349int
1350nlm_host_wait_grace(struct nlm_host *hostp)
1351{
1352	struct nlm_globals *g;
1353	int error = 0;
1354
1355	g = zone_getspecific(nlm_zone_key, curzone);
1356	mutex_enter(&hostp->nh_lock);
1357
1358	do {
1359		int rc;
1360
1361		rc = cv_timedwait_sig(&hostp->nh_recl_cv,
1362		    &hostp->nh_lock, ddi_get_lbolt() +
1363		    SEC_TO_TICK(g->retrans_tmo));
1364
1365		if (rc == 0) {
1366			error = EINTR;
1367			break;
1368		}
1369	} while (hostp->nh_flags & NLM_NH_RECLAIM);
1370
1371	mutex_exit(&hostp->nh_lock);
1372	return (error);
1373}
1374
1375/*
1376 * Create a new NLM host.
1377 *
1378 * NOTE: The in-kernel RPC (kRPC) subsystem uses TLI/XTI,
1379 * which needs both a knetconfig and an address when creating
1380 * endpoints. Thus host object stores both knetconfig and
1381 * netid.
1382 */
1383static struct nlm_host *
1384nlm_host_create(char *name, const char *netid,
1385    struct knetconfig *knc, struct netbuf *naddr)
1386{
1387	struct nlm_host *host;
1388
1389	host = kmem_cache_alloc(nlm_hosts_cache, KM_SLEEP);
1390
1391	mutex_init(&host->nh_lock, NULL, MUTEX_DEFAULT, NULL);
1392	cv_init(&host->nh_rpcb_cv, NULL, CV_DEFAULT, NULL);
1393	cv_init(&host->nh_recl_cv, NULL, CV_DEFAULT, NULL);
1394
1395	host->nh_sysid = LM_NOSYSID;
1396	host->nh_refs = 1;
1397	host->nh_name = strdup(name);
1398	host->nh_netid = strdup(netid);
1399	host->nh_knc = *knc;
1400	nlm_copy_netbuf(&host->nh_addr, naddr);
1401
1402	host->nh_state = 0;
1403	host->nh_rpcb_state = NRPCB_NEED_UPDATE;
1404	host->nh_flags = 0;
1405
1406	host->nh_vholds_by_vp = mod_hash_create_ptrhash("nlm vholds hash",
1407	    32, mod_hash_null_valdtor, sizeof (vnode_t));
1408
1409	TAILQ_INIT(&host->nh_vholds_list);
1410	TAILQ_INIT(&host->nh_rpchc);
1411
1412	return (host);
1413}
1414
1415/*
1416 * Cancel all client side sleeping locks owned by given host.
1417 */
1418void
1419nlm_host_cancel_slocks(struct nlm_globals *g, struct nlm_host *hostp)
1420{
1421	struct nlm_slock *nslp;
1422
1423	mutex_enter(&g->lock);
1424	TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
1425		if (nslp->nsl_host == hostp) {
1426			nslp->nsl_state = NLM_SL_CANCELLED;
1427			cv_broadcast(&nslp->nsl_cond);
1428		}
1429	}
1430
1431	mutex_exit(&g->lock);
1432}
1433
1434/*
1435 * Garbage collect stale vhold objects.
1436 *
1437 * In other words check whether vnodes that are
1438 * held by vhold objects still have any locks
1439 * or shares or still in use. If they aren't,
1440 * just destroy them.
1441 */
1442static void
1443nlm_host_gc_vholds(struct nlm_host *hostp)
1444{
1445	struct nlm_vhold *nvp;
1446
1447	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1448
1449	nvp = TAILQ_FIRST(&hostp->nh_vholds_list);
1450	while (nvp != NULL) {
1451		struct nlm_vhold *nvp_tmp;
1452
1453		if (nlm_vhold_busy(hostp, nvp)) {
1454			nvp = TAILQ_NEXT(nvp, nv_link);
1455			continue;
1456		}
1457
1458		nvp_tmp = TAILQ_NEXT(nvp, nv_link);
1459		nlm_vhold_destroy(hostp, nvp);
1460		nvp = nvp_tmp;
1461	}
1462}
1463
1464/*
1465 * Check whether the given host has any
1466 * server side locks or share reservations.
1467 */
1468static bool_t
1469nlm_host_has_srv_locks(struct nlm_host *hostp)
1470{
1471	/*
1472	 * It's cheap and simple: if server has
1473	 * any locks/shares there must be vhold
1474	 * object storing the affected vnode.
1475	 *
1476	 * NOTE: We don't need to check sleeping
1477	 * locks on the server side, because if
1478	 * server side sleeping lock is alive,
1479	 * there must be a vhold object corresponding
1480	 * to target vnode.
1481	 */
1482	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1483	if (!TAILQ_EMPTY(&hostp->nh_vholds_list))
1484		return (TRUE);
1485
1486	return (FALSE);
1487}
1488
1489/*
1490 * Check whether the given host has any client side
1491 * locks or share reservations.
1492 */
1493static bool_t
1494nlm_host_has_cli_locks(struct nlm_host *hostp)
1495{
1496	ASSERT(MUTEX_HELD(&hostp->nh_lock));
1497
1498	/*
1499	 * XXX: It's not the way I'd like to do the check,
1500	 * because flk_sysid_has_locks() can be very
1501	 * expensive by design. Unfortunatelly it iterates
1502	 * through all locks on the system, doesn't matter
1503	 * were they made on remote system via NLM or
1504	 * on local system via reclock. To understand the
1505	 * problem, consider that there're dozens of thousands
1506	 * of locks that are made on some ZFS dataset. And there's
1507	 * another dataset shared by NFS where NLM client had locks
1508	 * some time ago, but doesn't have them now.
1509	 * In this case flk_sysid_has_locks() will iterate
1510	 * thrught dozens of thousands locks until it returns us
1511	 * FALSE.
1512	 * Oh, I hope that in shiny future somebody will make
1513	 * local lock manager (os/flock.c) better, so that
1514	 * it'd be more friedly to remote locks and
1515	 * flk_sysid_has_locks() wouldn't be so expensive.
1516	 */
1517	if (flk_sysid_has_locks(hostp->nh_sysid |
1518	    LM_SYSID_CLIENT, FLK_QUERY_ACTIVE))
1519		return (TRUE);
1520
1521	/*
1522	 * Check whether host has any share reservations
1523	 * registered on the client side.
1524	 */
1525	if (hostp->nh_shrlist != NULL)
1526		return (TRUE);
1527
1528	return (FALSE);
1529}
1530
1531/*
1532 * Determine whether the given host owns any
1533 * locks or share reservations.
1534 */
1535static bool_t
1536nlm_host_has_locks(struct nlm_host *hostp)
1537{
1538	if (nlm_host_has_srv_locks(hostp))
1539		return (TRUE);
1540
1541	return (nlm_host_has_cli_locks(hostp));
1542}
1543
1544/*
1545 * This function compares only addresses of two netbufs
1546 * that belong to NC_TCP[6] or NC_UDP[6] protofamily.
1547 * Port part of netbuf is ignored.
1548 *
1549 * Return values:
1550 *  -1: nb1's address is "smaller" than nb2's
1551 *   0: addresses are equal
1552 *   1: nb1's address is "greater" than nb2's
1553 */
1554static int
1555nlm_netbuf_addrs_cmp(struct netbuf *nb1, struct netbuf *nb2)
1556{
1557	union nlm_addr {
1558		struct sockaddr sa;
1559		struct sockaddr_in sin;
1560		struct sockaddr_in6 sin6;
1561	} *na1, *na2;
1562	int res;
1563
1564	/* LINTED E_BAD_PTR_CAST_ALIGN */
1565	na1 = (union nlm_addr *)nb1->buf;
1566	/* LINTED E_BAD_PTR_CAST_ALIGN */
1567	na2 = (union nlm_addr *)nb2->buf;
1568
1569	if (na1->sa.sa_family < na2->sa.sa_family)
1570		return (-1);
1571	if (na1->sa.sa_family > na2->sa.sa_family)
1572		return (1);
1573
1574	switch (na1->sa.sa_family) {
1575	case AF_INET:
1576		res = memcmp(&na1->sin.sin_addr, &na2->sin.sin_addr,
1577		    sizeof (na1->sin.sin_addr));
1578		break;
1579	case AF_INET6:
1580		res = memcmp(&na1->sin6.sin6_addr, &na2->sin6.sin6_addr,
1581		    sizeof (na1->sin6.sin6_addr));
1582		break;
1583	default:
1584		VERIFY(0);
1585		return (0);
1586	}
1587
1588	return (SIGN(res));
1589}
1590
1591/*
1592 * Compare two nlm hosts.
1593 * Return values:
1594 * -1: host1 is "smaller" than host2
1595 *  0: host1 is equal to host2
1596 *  1: host1 is "greater" than host2
1597 */
1598int
1599nlm_host_cmp(const void *p1, const void *p2)
1600{
1601	struct nlm_host *h1 = (struct nlm_host *)p1;
1602	struct nlm_host *h2 = (struct nlm_host *)p2;
1603	int res;
1604
1605	res = strcmp(h1->nh_netid, h2->nh_netid);
1606	if (res != 0)
1607		return (SIGN(res));
1608
1609	res = nlm_netbuf_addrs_cmp(&h1->nh_addr, &h2->nh_addr);
1610	return (res);
1611}
1612
1613/*
1614 * Find the host specified by...  (see below)
1615 * If found, increment the ref count.
1616 */
1617static struct nlm_host *
1618nlm_host_find_locked(struct nlm_globals *g, const char *netid,
1619    struct netbuf *naddr, avl_index_t *wherep)
1620{
1621	struct nlm_host *hostp, key;
1622	avl_index_t pos;
1623
1624	ASSERT(MUTEX_HELD(&g->lock));
1625
1626	key.nh_netid = (char *)netid;
1627	key.nh_addr.buf = naddr->buf;
1628	key.nh_addr.len = naddr->len;
1629	key.nh_addr.maxlen = naddr->maxlen;
1630
1631	hostp = avl_find(&g->nlm_hosts_tree, &key, &pos);
1632
1633	if (hostp != NULL) {
1634		/*
1635		 * Host is inuse now. Remove it from idle
1636		 * hosts list if needed.
1637		 */
1638		if (hostp->nh_flags & NLM_NH_INIDLE) {
1639			TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1640			hostp->nh_flags &= ~NLM_NH_INIDLE;
1641		}
1642
1643		hostp->nh_refs++;
1644	}
1645	if (wherep != NULL)
1646		*wherep = pos;
1647
1648	return (hostp);
1649}
1650
1651/*
1652 * Find NLM host for the given name and address.
1653 */
1654struct nlm_host *
1655nlm_host_find(struct nlm_globals *g, const char *netid,
1656    struct netbuf *addr)
1657{
1658	struct nlm_host *hostp = NULL;
1659
1660	mutex_enter(&g->lock);
1661	if (g->run_status != NLM_ST_UP)
1662		goto out;
1663
1664	hostp = nlm_host_find_locked(g, netid, addr, NULL);
1665
1666out:
1667	mutex_exit(&g->lock);
1668	return (hostp);
1669}
1670
1671
1672/*
1673 * Find or create an NLM host for the given name and address.
1674 *
1675 * The remote host is determined by all of: name, netid, address.
1676 * Note that the netid is whatever nlm_svc_add_ep() gave to
1677 * svc_tli_kcreate() for the service binding.  If any of these
1678 * are different, allocate a new host (new sysid).
1679 */
1680struct nlm_host *
1681nlm_host_findcreate(struct nlm_globals *g, char *name,
1682    const char *netid, struct netbuf *addr)
1683{
1684	int err;
1685	struct nlm_host *host, *newhost = NULL;
1686	struct knetconfig knc;
1687	avl_index_t where;
1688
1689	mutex_enter(&g->lock);
1690	if (g->run_status != NLM_ST_UP) {
1691		mutex_exit(&g->lock);
1692		return (NULL);
1693	}
1694
1695	host = nlm_host_find_locked(g, netid, addr, NULL);
1696	mutex_exit(&g->lock);
1697	if (host != NULL)
1698		return (host);
1699
1700	err = nlm_knc_from_netid(netid, &knc);
1701	if (err != 0)
1702		return (NULL);
1703	/*
1704	 * Do allocations (etc.) outside of mutex,
1705	 * and then check again before inserting.
1706	 */
1707	newhost = nlm_host_create(name, netid, &knc, addr);
1708	newhost->nh_sysid = nlm_sysid_alloc();
1709	if (newhost->nh_sysid == LM_NOSYSID)
1710		goto out;
1711
1712	mutex_enter(&g->lock);
1713	host = nlm_host_find_locked(g, netid, addr, &where);
1714	if (host == NULL) {
1715		host = newhost;
1716		newhost = NULL;
1717
1718		/*
1719		 * Insert host to the hosts AVL tree that is
1720		 * used to lookup by <netid, address> pair.
1721		 */
1722		avl_insert(&g->nlm_hosts_tree, host, where);
1723
1724		/*
1725		 * Insert host to the hosts hash table that is
1726		 * used to lookup host by sysid.
1727		 */
1728		VERIFY(mod_hash_insert(g->nlm_hosts_hash,
1729		    (mod_hash_key_t)(uintptr_t)host->nh_sysid,
1730		    (mod_hash_val_t)host) == 0);
1731	}
1732
1733	mutex_exit(&g->lock);
1734
1735out:
1736	if (newhost != NULL) {
1737		/*
1738		 * We do not need the preallocated nlm_host
1739		 * so decrement the reference counter
1740		 * and destroy it.
1741		 */
1742		newhost->nh_refs--;
1743		nlm_host_destroy(newhost);
1744	}
1745
1746	return (host);
1747}
1748
1749/*
1750 * Find the NLM host that matches the value of 'sysid'.
1751 * If found, return it with a new ref,
1752 * else return NULL.
1753 */
1754struct nlm_host *
1755nlm_host_find_by_sysid(struct nlm_globals *g, sysid_t sysid)
1756{
1757	struct nlm_host *hostp = NULL;
1758
1759	mutex_enter(&g->lock);
1760	if (g->run_status != NLM_ST_UP)
1761		goto out;
1762
1763	(void) mod_hash_find(g->nlm_hosts_hash,
1764	    (mod_hash_key_t)(uintptr_t)sysid,
1765	    (mod_hash_val_t)&hostp);
1766
1767	if (hostp == NULL)
1768		goto out;
1769
1770	/*
1771	 * Host is inuse now. Remove it
1772	 * from idle hosts list if needed.
1773	 */
1774	if (hostp->nh_flags & NLM_NH_INIDLE) {
1775		TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
1776		hostp->nh_flags &= ~NLM_NH_INIDLE;
1777	}
1778
1779	hostp->nh_refs++;
1780
1781out:
1782	mutex_exit(&g->lock);
1783	return (hostp);
1784}
1785
1786/*
1787 * Release the given host.
1788 * I.e. drop a reference that was taken earlier by one of
1789 * the following functions: nlm_host_findcreate(), nlm_host_find(),
1790 * nlm_host_find_by_sysid().
1791 *
1792 * When the very last reference is dropped, host is moved to
1793 * so-called "idle state". All hosts that are in idle state
1794 * have an idle timeout. If timeout is expired, GC thread
1795 * checks whether hosts have any locks and if they heven't
1796 * any, it removes them.
1797 * NOTE: only unused hosts can be in idle state.
1798 */
1799static void
1800nlm_host_release_locked(struct nlm_globals *g, struct nlm_host *hostp)
1801{
1802	if (hostp == NULL)
1803		return;
1804
1805	ASSERT(MUTEX_HELD(&g->lock));
1806	ASSERT(hostp->nh_refs > 0);
1807
1808	hostp->nh_refs--;
1809	if (hostp->nh_refs != 0)
1810		return;
1811
1812	/*
1813	 * The very last reference to the host was dropped,
1814	 * thus host is unused now. Set its idle timeout
1815	 * and move it to the idle hosts LRU list.
1816	 */
1817	hostp->nh_idle_timeout = ddi_get_lbolt() +
1818	    SEC_TO_TICK(g->cn_idle_tmo);
1819
1820	ASSERT((hostp->nh_flags & NLM_NH_INIDLE) == 0);
1821	TAILQ_INSERT_TAIL(&g->nlm_idle_hosts, hostp, nh_link);
1822	hostp->nh_flags |= NLM_NH_INIDLE;
1823}
1824
1825void
1826nlm_host_release(struct nlm_globals *g, struct nlm_host *hostp)
1827{
1828	if (hostp == NULL)
1829		return;
1830
1831	mutex_enter(&g->lock);
1832	nlm_host_release_locked(g, hostp);
1833	mutex_exit(&g->lock);
1834}
1835
1836/*
1837 * Unregister this NLM host (NFS client) with the local statd
1838 * due to idleness (no locks held for a while).
1839 */
1840void
1841nlm_host_unmonitor(struct nlm_globals *g, struct nlm_host *host)
1842{
1843	enum clnt_stat stat;
1844
1845	VERIFY(host->nh_refs == 0);
1846	if (!(host->nh_flags & NLM_NH_MONITORED))
1847		return;
1848
1849	host->nh_flags &= ~NLM_NH_MONITORED;
1850	stat = nlm_nsm_unmon(&g->nlm_nsm, host->nh_name);
1851	if (stat != RPC_SUCCESS) {
1852		NLM_WARN("NLM: Failed to contact statd, stat=%d\n", stat);
1853		return;
1854	}
1855}
1856
1857/*
1858 * Ask the local NFS statd to begin monitoring this host.
1859 * It will call us back when that host restarts, using the
1860 * prog,vers,proc specified below, i.e. NLM_SM_NOTIFY1,
1861 * which is handled in nlm_do_notify1().
1862 */
1863void
1864nlm_host_monitor(struct nlm_globals *g, struct nlm_host *host, int state)
1865{
1866	int family;
1867	netobj obj;
1868	enum clnt_stat stat;
1869
1870	if (state != 0 && host->nh_state == 0) {
1871		/*
1872		 * This is the first time we have seen an NSM state
1873		 * Value for this host. We record it here to help
1874		 * detect host reboots.
1875		 */
1876		host->nh_state = state;
1877	}
1878
1879	mutex_enter(&host->nh_lock);
1880	if (host->nh_flags & NLM_NH_MONITORED) {
1881		mutex_exit(&host->nh_lock);
1882		return;
1883	}
1884
1885	host->nh_flags |= NLM_NH_MONITORED;
1886	mutex_exit(&host->nh_lock);
1887
1888	/*
1889	 * Before we begin monitoring the host register the network address
1890	 * associated with this hostname.
1891	 */
1892	nlm_netbuf_to_netobj(&host->nh_addr, &family, &obj);
1893	stat = nlm_nsmaddr_reg(&g->nlm_nsm, host->nh_name, family, &obj);
1894	if (stat != RPC_SUCCESS) {
1895		NLM_WARN("Failed to register address, stat=%d\n", stat);
1896		mutex_enter(&g->lock);
1897		host->nh_flags &= ~NLM_NH_MONITORED;
1898		mutex_exit(&g->lock);
1899
1900		return;
1901	}
1902
1903	/*
1904	 * Tell statd how to call us with status updates for
1905	 * this host. Updates arrive via nlm_do_notify1().
1906	 *
1907	 * We put our assigned system ID value in the priv field to
1908	 * make it simpler to find the host if we are notified of a
1909	 * host restart.
1910	 */
1911	stat = nlm_nsm_mon(&g->nlm_nsm, host->nh_name, host->nh_sysid);
1912	if (stat != RPC_SUCCESS) {
1913		NLM_WARN("Failed to contact local NSM, stat=%d\n", stat);
1914		mutex_enter(&g->lock);
1915		host->nh_flags &= ~NLM_NH_MONITORED;
1916		mutex_exit(&g->lock);
1917
1918		return;
1919	}
1920}
1921
1922int
1923nlm_host_get_state(struct nlm_host *hostp)
1924{
1925
1926	return (hostp->nh_state);
1927}
1928
1929/*
1930 * NLM client/server sleeping locks
1931 */
1932
1933/*
1934 * Register client side sleeping lock.
1935 *
1936 * Our client code calls this to keep information
1937 * about sleeping lock somewhere. When it receives
1938 * grant callback from server or when it just
1939 * needs to remove all sleeping locks from vnode,
1940 * it uses this information for remove/apply lock
1941 * properly.
1942 */
1943struct nlm_slock *
1944nlm_slock_register(
1945	struct nlm_globals *g,
1946	struct nlm_host *host,
1947	struct nlm4_lock *lock,
1948	struct vnode *vp)
1949{
1950	struct nlm_slock *nslp;
1951
1952	nslp = kmem_zalloc(sizeof (*nslp), KM_SLEEP);
1953	cv_init(&nslp->nsl_cond, NULL, CV_DEFAULT, NULL);
1954	nslp->nsl_lock = *lock;
1955	nlm_copy_netobj(&nslp->nsl_fh, &nslp->nsl_lock.fh);
1956	nslp->nsl_state = NLM_SL_BLOCKED;
1957	nslp->nsl_host = host;
1958	nslp->nsl_vp = vp;
1959
1960	mutex_enter(&g->lock);
1961	TAILQ_INSERT_TAIL(&g->nlm_slocks, nslp, nsl_link);
1962	mutex_exit(&g->lock);
1963
1964	return (nslp);
1965}
1966
1967/*
1968 * Remove this lock from the wait list and destroy it.
1969 */
1970void
1971nlm_slock_unregister(struct nlm_globals *g, struct nlm_slock *nslp)
1972{
1973	mutex_enter(&g->lock);
1974	TAILQ_REMOVE(&g->nlm_slocks, nslp, nsl_link);
1975	mutex_exit(&g->lock);
1976
1977	kmem_free(nslp->nsl_fh.n_bytes, nslp->nsl_fh.n_len);
1978	cv_destroy(&nslp->nsl_cond);
1979	kmem_free(nslp, sizeof (*nslp));
1980}
1981
1982/*
1983 * Wait for a granted callback or cancellation event
1984 * for a sleeping lock.
1985 *
1986 * If a signal interrupted the wait or if the lock
1987 * was cancelled, return EINTR - the caller must arrange to send
1988 * a cancellation to the server.
1989 *
1990 * If timeout occurred, return ETIMEDOUT - the caller must
1991 * resend the lock request to the server.
1992 *
1993 * On success return 0.
1994 */
1995int
1996nlm_slock_wait(struct nlm_globals *g,
1997    struct nlm_slock *nslp, uint_t timeo_secs)
1998{
1999	clock_t timeo_ticks;
2000	int cv_res, error;
2001
2002	/*
2003	 * If the granted message arrived before we got here,
2004	 * nslp->nsl_state will be NLM_SL_GRANTED - in that case don't sleep.
2005	 */
2006	cv_res = 1;
2007	timeo_ticks = ddi_get_lbolt() + SEC_TO_TICK(timeo_secs);
2008
2009	mutex_enter(&g->lock);
2010	while (nslp->nsl_state == NLM_SL_BLOCKED && cv_res > 0) {
2011		cv_res = cv_timedwait_sig(&nslp->nsl_cond,
2012		    &g->lock, timeo_ticks);
2013	}
2014
2015	/*
2016	 * No matter why we wake up, if the lock was
2017	 * cancelled, let the function caller to know
2018	 * about it by returning EINTR.
2019	 */
2020	if (nslp->nsl_state == NLM_SL_CANCELLED) {
2021		error = EINTR;
2022		goto out;
2023	}
2024
2025	if (cv_res <= 0) {
2026		/* We were woken up either by timeout or by interrupt */
2027		error = (cv_res < 0) ? ETIMEDOUT : EINTR;
2028
2029		/*
2030		 * The granted message may arrive after the
2031		 * interrupt/timeout but before we manage to lock the
2032		 * mutex. Detect this by examining nslp.
2033		 */
2034		if (nslp->nsl_state == NLM_SL_GRANTED)
2035			error = 0;
2036	} else { /* Awaken via cv_signal()/cv_broadcast() or didn't block */
2037		error = 0;
2038		VERIFY(nslp->nsl_state == NLM_SL_GRANTED);
2039	}
2040
2041out:
2042	mutex_exit(&g->lock);
2043	return (error);
2044}
2045
2046/*
2047 * Mark client side sleeping lock as granted
2048 * and wake up a process blocked on the lock.
2049 * Called from server side NLM_GRANT handler.
2050 *
2051 * If sleeping lock is found return 0, otherwise
2052 * return ENOENT.
2053 */
2054int
2055nlm_slock_grant(struct nlm_globals *g,
2056    struct nlm_host *hostp, struct nlm4_lock *alock)
2057{
2058	struct nlm_slock *nslp;
2059	int error = ENOENT;
2060
2061	mutex_enter(&g->lock);
2062	TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
2063		if ((nslp->nsl_state != NLM_SL_BLOCKED) ||
2064		    (nslp->nsl_host != hostp))
2065			continue;
2066
2067		if (alock->svid		== nslp->nsl_lock.svid &&
2068		    alock->l_offset	== nslp->nsl_lock.l_offset &&
2069		    alock->l_len	== nslp->nsl_lock.l_len &&
2070		    alock->fh.n_len	== nslp->nsl_lock.fh.n_len &&
2071		    bcmp(alock->fh.n_bytes, nslp->nsl_lock.fh.n_bytes,
2072		    nslp->nsl_lock.fh.n_len) == 0) {
2073			nslp->nsl_state = NLM_SL_GRANTED;
2074			cv_broadcast(&nslp->nsl_cond);
2075			error = 0;
2076			break;
2077		}
2078	}
2079
2080	mutex_exit(&g->lock);
2081	return (error);
2082}
2083
2084/*
2085 * Register sleeping lock request corresponding to
2086 * flp on the given vhold object.
2087 * On success function returns 0, otherwise (if
2088 * lock request with the same flp is already
2089 * registered) function returns EEXIST.
2090 */
2091int
2092nlm_slreq_register(struct nlm_host *hostp, struct nlm_vhold *nvp,
2093    struct flock64 *flp)
2094{
2095	struct nlm_slreq *slr, *new_slr = NULL;
2096	int ret = EEXIST;
2097
2098	mutex_enter(&hostp->nh_lock);
2099	slr = nlm_slreq_find_locked(hostp, nvp, flp);
2100	if (slr != NULL)
2101		goto out;
2102
2103	mutex_exit(&hostp->nh_lock);
2104	new_slr = kmem_zalloc(sizeof (*slr), KM_SLEEP);
2105	bcopy(flp, &new_slr->nsr_fl, sizeof (*flp));
2106
2107	mutex_enter(&hostp->nh_lock);
2108	slr = nlm_slreq_find_locked(hostp, nvp, flp);
2109	if (slr == NULL) {
2110		slr = new_slr;
2111		new_slr = NULL;
2112		ret = 0;
2113
2114		TAILQ_INSERT_TAIL(&nvp->nv_slreqs, slr, nsr_link);
2115	}
2116
2117out:
2118	mutex_exit(&hostp->nh_lock);
2119	if (new_slr != NULL)
2120		kmem_free(new_slr, sizeof (*new_slr));
2121
2122	return (ret);
2123}
2124
2125/*
2126 * Unregister sleeping lock request corresponding
2127 * to flp from the given vhold object.
2128 * On success function returns 0, otherwise (if
2129 * lock request corresponding to flp isn't found
2130 * on the given vhold) function returns ENOENT.
2131 */
2132int
2133nlm_slreq_unregister(struct nlm_host *hostp, struct nlm_vhold *nvp,
2134    struct flock64 *flp)
2135{
2136	struct nlm_slreq *slr;
2137
2138	mutex_enter(&hostp->nh_lock);
2139	slr = nlm_slreq_find_locked(hostp, nvp, flp);
2140	if (slr == NULL) {
2141		mutex_exit(&hostp->nh_lock);
2142		return (ENOENT);
2143	}
2144
2145	TAILQ_REMOVE(&nvp->nv_slreqs, slr, nsr_link);
2146	mutex_exit(&hostp->nh_lock);
2147
2148	kmem_free(slr, sizeof (*slr));
2149	return (0);
2150}
2151
2152/*
2153 * Find sleeping lock request on the given vhold object by flp.
2154 */
2155struct nlm_slreq *
2156nlm_slreq_find_locked(struct nlm_host *hostp, struct nlm_vhold *nvp,
2157    struct flock64 *flp)
2158{
2159	struct nlm_slreq *slr = NULL;
2160
2161	ASSERT(MUTEX_HELD(&hostp->nh_lock));
2162	TAILQ_FOREACH(slr, &nvp->nv_slreqs, nsr_link) {
2163		if (slr->nsr_fl.l_start		== flp->l_start	&&
2164		    slr->nsr_fl.l_len		== flp->l_len	&&
2165		    slr->nsr_fl.l_pid		== flp->l_pid	&&
2166		    slr->nsr_fl.l_type		== flp->l_type)
2167			break;
2168	}
2169
2170	return (slr);
2171}
2172
2173/*
2174 * NLM tracks active share reservations made on the client side.
2175 * It needs to have a track of share reservations for two purposes
2176 * 1) to determine if nlm_host is busy (if it has active locks and/or
2177 *    share reservations, it is)
2178 * 2) to recover active share reservations when NLM server reports
2179 *    that it has rebooted.
2180 *
2181 * Unfortunately Illumos local share reservations manager (see os/share.c)
2182 * doesn't have an ability to lookup all reservations on the system
2183 * by sysid (like local lock manager) or get all reservations by sysid.
2184 * It tracks reservations per vnode and is able to get/looup them
2185 * on particular vnode. It's not what NLM needs. Thus it has that ugly
2186 * share reservations tracking scheme.
2187 */
2188
2189void
2190nlm_shres_track(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2191{
2192	struct nlm_shres *nsp, *nsp_new;
2193
2194	/*
2195	 * NFS code must fill the s_owner, so that
2196	 * s_own_len is never 0.
2197	 */
2198	ASSERT(shrp->s_own_len > 0);
2199	nsp_new = nlm_shres_create_item(shrp, vp);
2200
2201	mutex_enter(&hostp->nh_lock);
2202	for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next)
2203		if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr))
2204			break;
2205
2206	if (nsp != NULL) {
2207		/*
2208		 * Found a duplicate. Do nothing.
2209		 */
2210
2211		goto out;
2212	}
2213
2214	nsp = nsp_new;
2215	nsp_new = NULL;
2216	nsp->ns_next = hostp->nh_shrlist;
2217	hostp->nh_shrlist = nsp;
2218
2219out:
2220	mutex_exit(&hostp->nh_lock);
2221	if (nsp_new != NULL)
2222		nlm_shres_destroy_item(nsp_new);
2223}
2224
2225void
2226nlm_shres_untrack(struct nlm_host *hostp, vnode_t *vp, struct shrlock *shrp)
2227{
2228	struct nlm_shres *nsp, *nsp_prev = NULL;
2229
2230	mutex_enter(&hostp->nh_lock);
2231	nsp = hostp->nh_shrlist;
2232	while (nsp != NULL) {
2233		if (nsp->ns_vp == vp && nlm_shres_equal(shrp, nsp->ns_shr)) {
2234			struct nlm_shres *nsp_del;
2235
2236			nsp_del = nsp;
2237			nsp = nsp->ns_next;
2238			if (nsp_prev != NULL)
2239				nsp_prev->ns_next = nsp;
2240			else
2241				hostp->nh_shrlist = nsp;
2242
2243			nlm_shres_destroy_item(nsp_del);
2244			continue;
2245		}
2246
2247		nsp_prev = nsp;
2248		nsp = nsp->ns_next;
2249	}
2250
2251	mutex_exit(&hostp->nh_lock);
2252}
2253
2254/*
2255 * Get a _copy_ of the list of all active share reservations
2256 * made by the given host.
2257 * NOTE: the list function returns _must_ be released using
2258 *       nlm_free_shrlist().
2259 */
2260struct nlm_shres *
2261nlm_get_active_shres(struct nlm_host *hostp)
2262{
2263	struct nlm_shres *nsp, *nslist = NULL;
2264
2265	mutex_enter(&hostp->nh_lock);
2266	for (nsp = hostp->nh_shrlist; nsp != NULL; nsp = nsp->ns_next) {
2267		struct nlm_shres *nsp_new;
2268
2269		nsp_new = nlm_shres_create_item(nsp->ns_shr, nsp->ns_vp);
2270		nsp_new->ns_next = nslist;
2271		nslist = nsp_new;
2272	}
2273
2274	mutex_exit(&hostp->nh_lock);
2275	return (nslist);
2276}
2277
2278/*
2279 * Free memory allocated for the active share reservations
2280 * list created by nlm_get_active_shres() function.
2281 */
2282void
2283nlm_free_shrlist(struct nlm_shres *nslist)
2284{
2285	struct nlm_shres *nsp;
2286
2287	while (nslist != NULL) {
2288		nsp =  nslist;
2289		nslist = nslist->ns_next;
2290
2291		nlm_shres_destroy_item(nsp);
2292	}
2293}
2294
2295static bool_t
2296nlm_shres_equal(struct shrlock *shrp1, struct shrlock *shrp2)
2297{
2298	if (shrp1->s_sysid	== shrp2->s_sysid	&&
2299	    shrp1->s_pid	== shrp2->s_pid		&&
2300	    shrp1->s_own_len	== shrp2->s_own_len	&&
2301	    bcmp(shrp1->s_owner, shrp2->s_owner,
2302	    shrp1->s_own_len) == 0)
2303		return (TRUE);
2304
2305	return (FALSE);
2306}
2307
2308static struct nlm_shres *
2309nlm_shres_create_item(struct shrlock *shrp, vnode_t *vp)
2310{
2311	struct nlm_shres *nsp;
2312
2313	nsp = kmem_alloc(sizeof (*nsp), KM_SLEEP);
2314	nsp->ns_shr = kmem_alloc(sizeof (*shrp), KM_SLEEP);
2315	bcopy(shrp, nsp->ns_shr, sizeof (*shrp));
2316	nsp->ns_shr->s_owner = kmem_alloc(shrp->s_own_len, KM_SLEEP);
2317	bcopy(shrp->s_owner, nsp->ns_shr->s_owner, shrp->s_own_len);
2318	nsp->ns_vp = vp;
2319
2320	return (nsp);
2321}
2322
2323static void
2324nlm_shres_destroy_item(struct nlm_shres *nsp)
2325{
2326	kmem_free(nsp->ns_shr->s_owner,
2327	    nsp->ns_shr->s_own_len);
2328	kmem_free(nsp->ns_shr, sizeof (struct shrlock));
2329	kmem_free(nsp, sizeof (*nsp));
2330}
2331
2332/*
2333 * Called by klmmod.c when lockd adds a network endpoint
2334 * on which we should begin RPC services.
2335 */
2336int
2337nlm_svc_add_ep(struct file *fp, const char *netid, struct knetconfig *knc)
2338{
2339	SVCMASTERXPRT *xprt = NULL;
2340	int error;
2341
2342	error = svc_tli_kcreate(fp, 0, (char *)netid, NULL, &xprt,
2343	    &nlm_sct, NULL, NLM_SVCPOOL_ID, FALSE);
2344	if (error != 0)
2345		return (error);
2346
2347	(void) nlm_knc_to_netid(knc);
2348	return (0);
2349}
2350
2351/*
2352 * Start NLM service.
2353 */
2354int
2355nlm_svc_starting(struct nlm_globals *g, struct file *fp,
2356    const char *netid, struct knetconfig *knc)
2357{
2358	int error;
2359	enum clnt_stat stat;
2360
2361	VERIFY(g->run_status == NLM_ST_STARTING);
2362	VERIFY(g->nlm_gc_thread == NULL);
2363
2364	error = nlm_nsm_init_local(&g->nlm_nsm);
2365	if (error != 0) {
2366		NLM_ERR("Failed to initialize NSM handler "
2367		    "(error=%d)\n", error);
2368		g->run_status = NLM_ST_DOWN;
2369		return (error);
2370	}
2371
2372	error = EIO;
2373
2374	/*
2375	 * Create an NLM garbage collector thread that will
2376	 * clean up stale vholds and hosts objects.
2377	 */
2378	g->nlm_gc_thread = zthread_create(NULL, 0, nlm_gc,
2379	    g, 0, minclsyspri);
2380
2381	/*
2382	 * Send SIMU_CRASH to local statd to report that
2383	 * NLM started, so that statd can report other hosts
2384	 * about NLM state change.
2385	 */
2386
2387	stat = nlm_nsm_simu_crash(&g->nlm_nsm);
2388	if (stat != RPC_SUCCESS) {
2389		NLM_ERR("Failed to connect to local statd "
2390		    "(rpcerr=%d)\n", stat);
2391		goto shutdown_lm;
2392	}
2393
2394	stat = nlm_nsm_stat(&g->nlm_nsm, &g->nsm_state);
2395	if (stat != RPC_SUCCESS) {
2396		NLM_ERR("Failed to get the status of local statd "
2397		    "(rpcerr=%d)\n", stat);
2398		goto shutdown_lm;
2399	}
2400
2401	g->grace_threshold = ddi_get_lbolt() +
2402	    SEC_TO_TICK(g->grace_period);
2403
2404	/* Register endpoint used for communications with local NLM */
2405	error = nlm_svc_add_ep(fp, netid, knc);
2406	if (error != 0)
2407		goto shutdown_lm;
2408
2409	(void) svc_pool_control(NLM_SVCPOOL_ID,
2410	    SVCPSET_SHUTDOWN_PROC, (void *)nlm_pool_shutdown);
2411	g->run_status = NLM_ST_UP;
2412	return (0);
2413
2414shutdown_lm:
2415	mutex_enter(&g->lock);
2416	g->run_status = NLM_ST_STOPPING;
2417	mutex_exit(&g->lock);
2418
2419	nlm_svc_stopping(g);
2420	return (error);
2421}
2422
2423/*
2424 * Called when the server pool is destroyed, so that
2425 * all transports are closed and no any server threads
2426 * exist.
2427 *
2428 * Just call lm_shutdown() to shut NLM down properly.
2429 */
2430static void
2431nlm_pool_shutdown(void)
2432{
2433	(void) lm_shutdown();
2434}
2435
2436/*
2437 * Stop NLM service, cleanup all resources
2438 * NLM owns at the moment.
2439 *
2440 * NOTE: NFS code can call NLM while it's
2441 * stopping or even if it's shut down. Any attempt
2442 * to lock file either on client or on the server
2443 * will fail if NLM isn't in NLM_ST_UP state.
2444 */
2445void
2446nlm_svc_stopping(struct nlm_globals *g)
2447{
2448	mutex_enter(&g->lock);
2449	ASSERT(g->run_status == NLM_ST_STOPPING);
2450
2451	/*
2452	 * Ask NLM GC thread to exit and wait until it dies.
2453	 */
2454	cv_signal(&g->nlm_gc_sched_cv);
2455	while (g->nlm_gc_thread != NULL)
2456		cv_wait(&g->nlm_gc_finish_cv, &g->lock);
2457
2458	mutex_exit(&g->lock);
2459
2460	/*
2461	 * Cleanup locks owned by NLM hosts.
2462	 * NOTE: New hosts won't be created while
2463	 * NLM is stopping.
2464	 */
2465	while (!avl_is_empty(&g->nlm_hosts_tree)) {
2466		struct nlm_host *hostp;
2467		int busy_hosts = 0;
2468
2469		/*
2470		 * Iterate through all NLM hosts in the system
2471		 * and drop the locks they own by force.
2472		 */
2473		hostp = avl_first(&g->nlm_hosts_tree);
2474		while (hostp != NULL) {
2475			/* Cleanup all client and server side locks */
2476			nlm_client_cancel_all(g, hostp);
2477			nlm_host_notify_server(hostp, 0);
2478
2479			mutex_enter(&hostp->nh_lock);
2480			nlm_host_gc_vholds(hostp);
2481			if (hostp->nh_refs > 0 || nlm_host_has_locks(hostp)) {
2482				/*
2483				 * Oh, it seems the host is still busy, let
2484				 * it some time to release and go to the
2485				 * next one.
2486				 */
2487
2488				mutex_exit(&hostp->nh_lock);
2489				hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2490				busy_hosts++;
2491				continue;
2492			}
2493
2494			mutex_exit(&hostp->nh_lock);
2495			hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2496		}
2497
2498		/*
2499		 * All hosts go to nlm_idle_hosts list after
2500		 * all locks they own are cleaned up and last refereces
2501		 * were dropped. Just destroy all hosts in nlm_idle_hosts
2502		 * list, they can not be removed from there while we're
2503		 * in stopping state.
2504		 */
2505		while ((hostp = TAILQ_FIRST(&g->nlm_idle_hosts)) != NULL) {
2506			nlm_host_unregister(g, hostp);
2507			nlm_host_destroy(hostp);
2508		}
2509
2510		if (busy_hosts > 0) {
2511			/*
2512			 * There're some hosts that weren't cleaned
2513			 * up. Probably they're in resource cleanup
2514			 * process. Give them some time to do drop
2515			 * references.
2516			 */
2517			delay(MSEC_TO_TICK(500));
2518		}
2519	}
2520
2521	ASSERT(TAILQ_EMPTY(&g->nlm_slocks));
2522
2523	nlm_nsm_fini(&g->nlm_nsm);
2524	g->lockd_pid = 0;
2525	g->run_status = NLM_ST_DOWN;
2526}
2527
2528/*
2529 * Returns TRUE if the given vnode has
2530 * any active or sleeping locks.
2531 */
2532int
2533nlm_vp_active(const vnode_t *vp)
2534{
2535	struct nlm_globals *g;
2536	struct nlm_host *hostp;
2537	struct nlm_vhold *nvp;
2538	int active = 0;
2539
2540	g = zone_getspecific(nlm_zone_key, curzone);
2541
2542	/*
2543	 * Server side NLM has locks on the given vnode
2544	 * if there exist a vhold object that holds
2545	 * the given vnode "vp" in one of NLM hosts.
2546	 */
2547	mutex_enter(&g->lock);
2548	hostp = avl_first(&g->nlm_hosts_tree);
2549	while (hostp != NULL) {
2550		mutex_enter(&hostp->nh_lock);
2551		nvp = nlm_vhold_find_locked(hostp, vp);
2552		mutex_exit(&hostp->nh_lock);
2553		if (nvp != NULL) {
2554			active = 1;
2555			break;
2556		}
2557
2558		hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2559	}
2560
2561	mutex_exit(&g->lock);
2562	return (active);
2563}
2564
2565/*
2566 * Called right before NFS export is going to
2567 * dissapear. The function finds all vnodes
2568 * belonging to the given export and cleans
2569 * all remote locks and share reservations
2570 * on them.
2571 */
2572void
2573nlm_zone_unexport(struct nlm_globals *g, struct exportinfo *exi)
2574{
2575	struct nlm_host *hostp;
2576
2577	mutex_enter(&g->lock);
2578	if (g->run_status != NLM_ST_UP) {
2579		/* nothing to do */
2580		mutex_exit(&g->lock);
2581		return;
2582	}
2583
2584	hostp = avl_first(&g->nlm_hosts_tree);
2585	while (hostp != NULL) {
2586		struct nlm_vhold *nvp;
2587
2588		if (hostp->nh_flags & NLM_NH_INIDLE) {
2589			TAILQ_REMOVE(&g->nlm_idle_hosts, hostp, nh_link);
2590			hostp->nh_flags &= ~NLM_NH_INIDLE;
2591		}
2592		hostp->nh_refs++;
2593
2594		mutex_exit(&g->lock);
2595
2596		mutex_enter(&hostp->nh_lock);
2597		TAILQ_FOREACH(nvp, &hostp->nh_vholds_list, nv_link) {
2598			vnode_t *vp;
2599
2600			nvp->nv_refcnt++;
2601			mutex_exit(&hostp->nh_lock);
2602
2603			vp = nvp->nv_vp;
2604
2605			if (!EQFSID(&exi->exi_fsid, &vp->v_vfsp->vfs_fsid))
2606				goto next_iter;
2607
2608			/*
2609			 * Ok, it we found out that vnode vp is under
2610			 * control by the exportinfo exi, now we need
2611			 * to drop all locks from this vnode, let's
2612			 * do it.
2613			 */
2614			nlm_vhold_clean(nvp, hostp->nh_sysid);
2615
2616		next_iter:
2617			mutex_enter(&hostp->nh_lock);
2618			nvp->nv_refcnt--;
2619		}
2620		mutex_exit(&hostp->nh_lock);
2621
2622		mutex_enter(&g->lock);
2623		nlm_host_release_locked(g, hostp);
2624
2625		hostp = AVL_NEXT(&g->nlm_hosts_tree, hostp);
2626	}
2627
2628	mutex_exit(&g->lock);
2629}
2630
2631void
2632nlm_unexport(struct exportinfo *exi)
2633{
2634	struct nlm_globals *g;
2635
2636	rw_enter(&lm_lck, RW_READER);
2637	TAILQ_FOREACH(g, &nlm_zones_list, nlm_link) {
2638		if (g->nlm_zoneid == exi->exi_zoneid) {
2639			/*
2640			 * NOTE: If we want to drop lm_lock before
2641			 * calling nlm_zone_unexport(), we should break,
2642			 * and have a post-rw_exit() snippit like:
2643			 *	if (g != NULL)
2644			 *		nlm_zone_unexport(g, exi);
2645			 */
2646			nlm_zone_unexport(g, exi);
2647			break; /* Only going to match once! */
2648		}
2649	}
2650	rw_exit(&lm_lck);
2651}
2652
2653/*
2654 * Allocate new unique sysid.
2655 * In case of failure (no available sysids)
2656 * return LM_NOSYSID.
2657 */
2658sysid_t
2659nlm_sysid_alloc(void)
2660{
2661	sysid_t ret_sysid = LM_NOSYSID;
2662
2663	rw_enter(&lm_lck, RW_WRITER);
2664	if (nlm_sysid_nidx > LM_SYSID_MAX)
2665		nlm_sysid_nidx = LM_SYSID;
2666
2667	if (!BT_TEST(nlm_sysid_bmap, nlm_sysid_nidx)) {
2668		BT_SET(nlm_sysid_bmap, nlm_sysid_nidx);
2669		ret_sysid = nlm_sysid_nidx++;
2670	} else {
2671		index_t id;
2672
2673		id = bt_availbit(nlm_sysid_bmap, NLM_BMAP_NITEMS);
2674		if (id > 0) {
2675			nlm_sysid_nidx = id + 1;
2676			ret_sysid = id;
2677			BT_SET(nlm_sysid_bmap, id);
2678		}
2679	}
2680
2681	rw_exit(&lm_lck);
2682	return (ret_sysid);
2683}
2684
2685void
2686nlm_sysid_free(sysid_t sysid)
2687{
2688	ASSERT(sysid >= LM_SYSID && sysid <= LM_SYSID_MAX);
2689
2690	rw_enter(&lm_lck, RW_WRITER);
2691	ASSERT(BT_TEST(nlm_sysid_bmap, sysid));
2692	BT_CLEAR(nlm_sysid_bmap, sysid);
2693	rw_exit(&lm_lck);
2694}
2695
2696/*
2697 * Return true if the request came from a local caller.
2698 * By necessity, this "knows" the netid names invented
2699 * in lm_svc() and nlm_netid_from_knetconfig().
2700 */
2701bool_t
2702nlm_caller_is_local(SVCXPRT *transp)
2703{
2704	char *netid;
2705	struct netbuf *rtaddr;
2706
2707	netid = svc_getnetid(transp);
2708	rtaddr = svc_getrpccaller(transp);
2709
2710	if (netid == NULL)
2711		return (FALSE);
2712
2713	if (strcmp(netid, "ticlts") == 0 ||
2714	    strcmp(netid, "ticotsord") == 0)
2715		return (TRUE);
2716
2717	if (strcmp(netid, "tcp") == 0 || strcmp(netid, "udp") == 0) {
2718		struct sockaddr_in *sin = (void *)rtaddr->buf;
2719		if (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK))
2720			return (TRUE);
2721	}
2722	if (strcmp(netid, "tcp6") == 0 || strcmp(netid, "udp6") == 0) {
2723		struct sockaddr_in6 *sin6 = (void *)rtaddr->buf;
2724		if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))
2725			return (TRUE);
2726	}
2727
2728	return (FALSE); /* unknown transport */
2729}
2730
2731/*
2732 * Get netid string correspondig to the given knetconfig.
2733 * If not done already, save knc->knc_rdev in our table.
2734 */
2735const char *
2736nlm_knc_to_netid(struct knetconfig *knc)
2737{
2738	int i;
2739	dev_t rdev;
2740	struct nlm_knc *nc;
2741	const char *netid = NULL;
2742
2743	rw_enter(&lm_lck, RW_READER);
2744	for (i = 0; i < NLM_KNCS; i++) {
2745		nc = &nlm_netconfigs[i];
2746
2747		if (nc->n_knc.knc_semantics == knc->knc_semantics &&
2748		    strcmp(nc->n_knc.knc_protofmly,
2749		    knc->knc_protofmly) == 0) {
2750			netid = nc->n_netid;
2751			rdev = nc->n_knc.knc_rdev;
2752			break;
2753		}
2754	}
2755	rw_exit(&lm_lck);
2756
2757	if (netid != NULL && rdev == NODEV) {
2758		rw_enter(&lm_lck, RW_WRITER);
2759		if (nc->n_knc.knc_rdev == NODEV)
2760			nc->n_knc.knc_rdev = knc->knc_rdev;
2761		rw_exit(&lm_lck);
2762	}
2763
2764	return (netid);
2765}
2766
2767/*
2768 * Get a knetconfig corresponding to the given netid.
2769 * If there's no knetconfig for this netid, ENOENT
2770 * is returned.
2771 */
2772int
2773nlm_knc_from_netid(const char *netid, struct knetconfig *knc)
2774{
2775	int i, ret;
2776
2777	ret = ENOENT;
2778	for (i = 0; i < NLM_KNCS; i++) {
2779		struct nlm_knc *nknc;
2780
2781		nknc = &nlm_netconfigs[i];
2782		if (strcmp(netid, nknc->n_netid) == 0 &&
2783		    nknc->n_knc.knc_rdev != NODEV) {
2784			*knc = nknc->n_knc;
2785			ret = 0;
2786			break;
2787		}
2788	}
2789
2790	return (ret);
2791}
2792
2793void
2794nlm_cprsuspend(void)
2795{
2796	struct nlm_globals *g;
2797
2798	rw_enter(&lm_lck, RW_READER);
2799	TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2800		nlm_suspend_zone(g);
2801
2802	rw_exit(&lm_lck);
2803}
2804
2805void
2806nlm_cprresume(void)
2807{
2808	struct nlm_globals *g;
2809
2810	rw_enter(&lm_lck, RW_READER);
2811	TAILQ_FOREACH(g, &nlm_zones_list, nlm_link)
2812		nlm_resume_zone(g);
2813
2814	rw_exit(&lm_lck);
2815}
2816
2817static void
2818nlm_nsm_clnt_init(CLIENT *clnt, struct nlm_nsm *nsm)
2819{
2820	(void) clnt_tli_kinit(clnt, &nsm->ns_knc, &nsm->ns_addr, 0,
2821	    NLM_RPC_RETRIES, zone_kcred());
2822}
2823
2824static void
2825nlm_netbuf_to_netobj(struct netbuf *addr, int *family, netobj *obj)
2826{
2827	/* LINTED pointer alignment */
2828	struct sockaddr *sa = (struct sockaddr *)addr->buf;
2829
2830	*family = sa->sa_family;
2831
2832	switch (sa->sa_family) {
2833	case AF_INET: {
2834		/* LINTED pointer alignment */
2835		struct sockaddr_in *sin = (struct sockaddr_in *)sa;
2836
2837		obj->n_len = sizeof (sin->sin_addr);
2838		obj->n_bytes = (char *)&sin->sin_addr;
2839		break;
2840	}
2841
2842	case AF_INET6: {
2843		/* LINTED pointer alignment */
2844		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
2845
2846		obj->n_len = sizeof (sin6->sin6_addr);
2847		obj->n_bytes = (char *)&sin6->sin6_addr;
2848		break;
2849	}
2850
2851	default:
2852		VERIFY(0);
2853		break;
2854	}
2855}
2856