1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
26 * Copyright 2014 Gary Mills
27 */
28
29
30/*
31 * nfs_tbind.c, common part for nfsd and lockd.
32 */
33
34#include <tiuser.h>
35#include <fcntl.h>
36#include <netconfig.h>
37#include <stropts.h>
38#include <errno.h>
39#include <syslog.h>
40#include <rpc/rpc.h>
41#include <sys/time.h>
42#include <sys/resource.h>
43#include <signal.h>
44#include <netdir.h>
45#include <unistd.h>
46#include <string.h>
47#include <netinet/tcp.h>
48#include <malloc.h>
49#include <stdlib.h>
50#include "nfs_tbind.h"
51#include <nfs/nfs.h>
52#include <nfs/nfs_acl.h>
53#include <nfs/nfssys.h>
54#include <nfs/nfs4.h>
55#include <zone.h>
56#include <sys/socket.h>
57#include <tsol/label.h>
58
59/*
60 * Determine valid semantics for most applications.
61 */
62#define	OK_TPI_TYPE(_nconf) \
63	(_nconf->nc_semantics == NC_TPI_CLTS || \
64	_nconf->nc_semantics == NC_TPI_COTS || \
65	_nconf->nc_semantics == NC_TPI_COTS_ORD)
66
67#define	BE32_TO_U32(a) \
68	((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
69	(((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
70	(((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8)  | \
71	((ulong_t)((uchar_t *)a)[3] & 0xFF))
72
73/*
74 * Number of elements to add to the poll array on each allocation.
75 */
76#define	POLL_ARRAY_INC_SIZE	64
77
78/*
79 * Number of file descriptors by which the process soft limit may be
80 * increased on each call to nofile_increase(0).
81 */
82#define	NOFILE_INC_SIZE	64
83
84/*
85 * Default TCP send and receive buffer size of NFS server.
86 */
87#define	NFSD_TCP_BUFSZ	(1024*1024)
88
89struct conn_ind {
90	struct conn_ind *conn_next;
91	struct conn_ind *conn_prev;
92	struct t_call   *conn_call;
93};
94
95struct conn_entry {
96	bool_t			closing;
97	struct netconfig	nc;
98};
99
100/*
101 * this file contains transport routines common to nfsd and lockd
102 */
103static	int	nofile_increase(int);
104static	int	reuseaddr(int);
105static	int	recvucred(int);
106static  int	anonmlp(int);
107static	void	add_to_poll_list(int, struct netconfig *);
108static	char	*serv_name_to_port_name(char *);
109static	int	bind_to_proto(char *, char *, struct netbuf **,
110				struct netconfig **);
111static	int	bind_to_provider(char *, char *, struct netbuf **,
112					struct netconfig **);
113static	void	conn_close_oldest(void);
114static	boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
115static	void	cots_listen_event(int, int);
116static	int	discon_get(int, struct netconfig *, struct conn_ind **);
117static	int	do_poll_clts_action(int, int);
118static	int	do_poll_cots_action(int, int);
119static	void	remove_from_poll_list(int);
120static	int	set_addrmask(int, struct netconfig *, struct netbuf *);
121static	int	is_listen_fd_index(int);
122
123static	struct pollfd *poll_array;
124static	struct conn_entry *conn_polled;
125static	int	num_conns;		/* Current number of connections */
126int		(*Mysvc4)(int, struct netbuf *, struct netconfig *, int,
127		struct netbuf *);
128static int	setopt(int fd, int level, int name, int value);
129static int	get_opt(int fd, int level, int name);
130static void	nfslib_set_sockbuf(int fd);
131
132/*
133 * Called to create and prepare a transport descriptor for in-kernel
134 * RPC service.
135 * Returns -1 on failure and a valid descriptor on success.
136 */
137int
138nfslib_transport_open(struct netconfig *nconf)
139{
140	int fd;
141	struct strioctl	strioc;
142
143	if ((nconf == (struct netconfig *)NULL) ||
144	    (nconf->nc_device == (char *)NULL)) {
145		syslog(LOG_ERR, "no netconfig device");
146		return (-1);
147	}
148
149	/*
150	 * Open the transport device.
151	 */
152	fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
153	if (fd == -1) {
154		if (t_errno == TSYSERR && errno == EMFILE &&
155		    (nofile_increase(0) == 0)) {
156			/* Try again with a higher NOFILE limit. */
157			fd = t_open(nconf->nc_device, O_RDWR,
158			    (struct t_info *)NULL);
159		}
160		if (fd == -1) {
161			syslog(LOG_ERR, "t_open %s failed:  t_errno %d, %m",
162			    nconf->nc_device, t_errno);
163			return (-1);
164		}
165	}
166
167	/*
168	 * Pop timod because the RPC module must be as close as possible
169	 * to the transport.
170	 */
171	if (ioctl(fd, I_POP, 0) < 0) {
172		syslog(LOG_ERR, "I_POP of timod failed: %m");
173		(void) t_close(fd);
174		return (-1);
175	}
176
177	/*
178	 * Common code for CLTS and COTS transports
179	 */
180	if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
181		syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
182		(void) t_close(fd);
183		return (-1);
184	}
185
186	strioc.ic_cmd = RPC_SERVER;
187	strioc.ic_dp = (char *)0;
188	strioc.ic_len = 0;
189	strioc.ic_timout = -1;
190
191	/* Tell rpcmod to act like a server stream. */
192	if (ioctl(fd, I_STR, &strioc) < 0) {
193		syslog(LOG_ERR, "rpcmod set-up ioctl failed: %m");
194		(void) t_close(fd);
195		return (-1);
196	}
197
198	/*
199	 * Re-push timod so that we will still be doing TLI
200	 * operations on the descriptor.
201	 */
202	if (ioctl(fd, I_PUSH, "timod") < 0) {
203		syslog(LOG_ERR, "I_PUSH of timod failed: %m");
204		(void) t_close(fd);
205		return (-1);
206	}
207
208	/*
209	 * Enable options of returning the ip's for udp.
210	 */
211	if (strcmp(nconf->nc_netid, "udp6") == 0)
212		__rpc_tli_set_options(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1);
213	else if (strcmp(nconf->nc_netid, "udp") == 0)
214		__rpc_tli_set_options(fd, IPPROTO_IP, IP_RECVDSTADDR, 1);
215
216	return (fd);
217}
218
219static int
220nofile_increase(int limit)
221{
222	struct rlimit rl;
223
224	if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
225		syslog(LOG_ERR, "getrlimit of NOFILE failed: %m");
226		return (-1);
227	}
228
229	if (limit > 0)
230		rl.rlim_cur = limit;
231	else
232		rl.rlim_cur += NOFILE_INC_SIZE;
233
234	if (rl.rlim_cur > rl.rlim_max &&
235	    rl.rlim_max != RLIM_INFINITY)
236		rl.rlim_max = rl.rlim_cur;
237
238	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
239		syslog(LOG_ERR, "setrlimit of NOFILE to %d failed: %m",
240		    rl.rlim_cur);
241		return (-1);
242	}
243
244	return (0);
245}
246
247static void
248nfslib_set_sockbuf(int fd)
249{
250	int curval, val;
251
252	val = NFSD_TCP_BUFSZ;
253
254	curval = get_opt(fd, SOL_SOCKET, SO_SNDBUF);
255	syslog(LOG_DEBUG, "Current SO_SNDBUF value is %d", curval);
256	if ((curval != -1) && (curval < val)) {
257		syslog(LOG_DEBUG, "Set SO_SNDBUF  option to %d", val);
258		if (setopt(fd, SOL_SOCKET, SO_SNDBUF, val) < 0) {
259			syslog(LOG_ERR,
260			    "couldn't set SO_SNDBUF to %d - t_errno = %d",
261			    val, t_errno);
262			syslog(LOG_ERR,
263			    "Check and increase system-wide tcp_max_buf");
264		}
265	}
266
267	curval = get_opt(fd, SOL_SOCKET, SO_RCVBUF);
268	syslog(LOG_DEBUG, "Current SO_RCVBUF value is %d", curval);
269	if ((curval != -1) && (curval < val)) {
270		syslog(LOG_DEBUG, "Set SO_RCVBUF  option to %d", val);
271		if (setopt(fd, SOL_SOCKET, SO_RCVBUF, val) < 0) {
272			syslog(LOG_ERR,
273			    "couldn't set SO_RCVBUF to %d - t_errno = %d",
274			    val, t_errno);
275			syslog(LOG_ERR,
276			    "Check and increase system-wide tcp_max_buf");
277		}
278	}
279}
280
281int
282nfslib_bindit(struct netconfig *nconf, struct netbuf **addr,
283	struct nd_hostserv *hs, int backlog)
284{
285	int fd;
286	struct t_bind  *ntb;
287	struct t_bind tb;
288	struct nd_addrlist *addrlist;
289	struct t_optmgmt req, resp;
290	struct opthdr *opt;
291	char reqbuf[128];
292	bool_t use_any = FALSE;
293	bool_t gzone = TRUE;
294
295	if ((fd = nfslib_transport_open(nconf)) == -1) {
296		syslog(LOG_ERR, "cannot establish transport service over %s",
297		    nconf->nc_device);
298		return (-1);
299	}
300
301	addrlist = (struct nd_addrlist *)NULL;
302
303	/* nfs4_callback service does not used a fieed port number */
304
305	if (strcmp(hs->h_serv, "nfs4_callback") == 0) {
306		tb.addr.maxlen = 0;
307		tb.addr.len = 0;
308		tb.addr.buf = 0;
309		use_any = TRUE;
310		gzone = (getzoneid() == GLOBAL_ZONEID);
311	} else if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
312
313		syslog(LOG_ERR,
314		"Cannot get address for transport %s host %s service %s",
315		    nconf->nc_netid, hs->h_host, hs->h_serv);
316		(void) t_close(fd);
317		return (-1);
318	}
319
320	if (strcmp(nconf->nc_proto, "tcp") == 0) {
321		/*
322		 * If we're running over TCP, then set the
323		 * SO_REUSEADDR option so that we can bind
324		 * to our preferred address even if previously
325		 * left connections exist in FIN_WAIT states.
326		 * This is somewhat bogus, but otherwise you have
327		 * to wait 2 minutes to restart after killing it.
328		 */
329		if (reuseaddr(fd) == -1) {
330			syslog(LOG_WARNING,
331			"couldn't set SO_REUSEADDR option on transport");
332		}
333	} else if (strcmp(nconf->nc_proto, "udp") == 0) {
334		/*
335		 * In order to run MLP on UDP, we need to handle creds.
336		 */
337		if (recvucred(fd) == -1) {
338			syslog(LOG_WARNING,
339			    "couldn't set SO_RECVUCRED option on transport");
340		}
341	}
342
343	/*
344	 * Make non global zone nfs4_callback port MLP
345	 */
346	if (use_any && is_system_labeled() && !gzone) {
347		if (anonmlp(fd) == -1) {
348			/*
349			 * failing to set this option means nfs4_callback
350			 * could fail silently later. So fail it with
351			 * with an error message now.
352			 */
353			syslog(LOG_ERR,
354			    "couldn't set SO_ANON_MLP option on transport");
355			(void) t_close(fd);
356			return (-1);
357		}
358	}
359
360	if (nconf->nc_semantics == NC_TPI_CLTS)
361		tb.qlen = 0;
362	else
363		tb.qlen = backlog;
364
365	/* LINTED pointer alignment */
366	ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
367	if (ntb == (struct t_bind *)NULL) {
368		syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
369		(void) t_close(fd);
370		netdir_free((void *)addrlist, ND_ADDRLIST);
371		return (-1);
372	}
373
374	/*
375	 * XXX - what about the space tb->addr.buf points to? This should
376	 * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
377	 * should't be called with T_ALL.
378	 */
379	if (addrlist)
380		tb.addr = *(addrlist->n_addrs);		/* structure copy */
381
382	if (t_bind(fd, &tb, ntb) == -1) {
383		syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
384		(void) t_free((char *)ntb, T_BIND);
385		netdir_free((void *)addrlist, ND_ADDRLIST);
386		(void) t_close(fd);
387		return (-1);
388	}
389
390	/* make sure we bound to the right address */
391	if (use_any == FALSE &&
392	    (tb.addr.len != ntb->addr.len ||
393	    memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0)) {
394		syslog(LOG_ERR, "t_bind to wrong address");
395		(void) t_free((char *)ntb, T_BIND);
396		netdir_free((void *)addrlist, ND_ADDRLIST);
397		(void) t_close(fd);
398		return (-1);
399	}
400
401	/*
402	 * Call nfs4svc_setport so that the kernel can be
403	 * informed what port number the daemon is listing
404	 * for incoming connection requests.
405	 */
406
407	if ((nconf->nc_semantics == NC_TPI_COTS ||
408	    nconf->nc_semantics == NC_TPI_COTS_ORD) && Mysvc4 != NULL)
409		(*Mysvc4)(fd, NULL, nconf, NFS4_SETPORT, &ntb->addr);
410
411	*addr = &ntb->addr;
412	netdir_free((void *)addrlist, ND_ADDRLIST);
413
414	if (strcmp(nconf->nc_proto, "tcp") == 0) {
415		/*
416		 * Disable the Nagle algorithm on TCP connections.
417		 * Connections accepted from this listener will
418		 * inherit the listener options.
419		 */
420
421		/* LINTED pointer alignment */
422		opt = (struct opthdr *)reqbuf;
423		opt->level = IPPROTO_TCP;
424		opt->name = TCP_NODELAY;
425		opt->len = sizeof (int);
426
427		/* LINTED pointer alignment */
428		*(int *)((char *)opt + sizeof (*opt)) = 1;
429
430		req.flags = T_NEGOTIATE;
431		req.opt.len = sizeof (*opt) + opt->len;
432		req.opt.buf = (char *)opt;
433		resp.flags = 0;
434		resp.opt.buf = reqbuf;
435		resp.opt.maxlen = sizeof (reqbuf);
436
437		if (t_optmgmt(fd, &req, &resp) < 0 ||
438		    resp.flags != T_SUCCESS) {
439			syslog(LOG_ERR,
440	"couldn't set NODELAY option for proto %s: t_errno = %d, %m",
441			    nconf->nc_proto, t_errno);
442		}
443
444		nfslib_set_sockbuf(fd);
445	}
446
447	return (fd);
448}
449
450static int
451get_opt(int fd, int level, int name)
452{
453	struct t_optmgmt req, res;
454	struct {
455		struct opthdr opt;
456		int value;
457	} reqbuf;
458
459	reqbuf.opt.level = level;
460	reqbuf.opt.name = name;
461	reqbuf.opt.len = sizeof (int);
462	reqbuf.value = 0;
463
464	req.flags = T_CURRENT;
465	req.opt.len = sizeof (reqbuf);
466	req.opt.buf = (char *)&reqbuf;
467
468	res.flags = 0;
469	res.opt.buf = (char *)&reqbuf;
470	res.opt.maxlen = sizeof (reqbuf);
471
472	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
473		t_error("t_optmgmt");
474		return (-1);
475	}
476	return (reqbuf.value);
477}
478
479static int
480setopt(int fd, int level, int name, int value)
481{
482	struct t_optmgmt req, resp;
483	struct {
484		struct opthdr opt;
485		int value;
486	} reqbuf;
487
488	reqbuf.opt.level = level;
489	reqbuf.opt.name = name;
490	reqbuf.opt.len = sizeof (int);
491
492	reqbuf.value = value;
493
494	req.flags = T_NEGOTIATE;
495	req.opt.len = sizeof (reqbuf);
496	req.opt.buf = (char *)&reqbuf;
497
498	resp.flags = 0;
499	resp.opt.buf = (char *)&reqbuf;
500	resp.opt.maxlen = sizeof (reqbuf);
501
502	if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
503		t_error("t_optmgmt");
504		return (-1);
505	}
506	return (0);
507}
508
509static int
510reuseaddr(int fd)
511{
512	return (setopt(fd, SOL_SOCKET, SO_REUSEADDR, 1));
513}
514
515static int
516recvucred(int fd)
517{
518	return (setopt(fd, SOL_SOCKET, SO_RECVUCRED, 1));
519}
520
521static int
522anonmlp(int fd)
523{
524	return (setopt(fd, SOL_SOCKET, SO_ANON_MLP, 1));
525}
526
527void
528nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
529{
530	int error;
531
532	/*
533	 * Save the error code across syslog(), just in case syslog()
534	 * gets its own error and, therefore, overwrites errno.
535	 */
536	error = errno;
537	if (t_errno == TSYSERR) {
538		syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
539		    tli_name, fd, nconf->nc_proto);
540	} else {
541		syslog(LOG_ERR,
542		    "%s(file descriptor %d/transport %s) TLI error %d",
543		    tli_name, fd, nconf->nc_proto, t_errno);
544	}
545	errno = error;
546}
547
548/*
549 * Called to set up service over a particular transport.
550 */
551void
552do_one(char *provider, NETSELDECL(proto), struct protob *protobp0,
553	int (*svc)(int, struct netbuf, struct netconfig *))
554{
555	register int sock;
556	struct protob *protobp;
557	struct netbuf *retaddr;
558	struct netconfig *retnconf;
559	struct netbuf addrmask;
560	int vers;
561	int err;
562	int l;
563
564	if (provider)
565		sock = bind_to_provider(provider, protobp0->serv, &retaddr,
566		    &retnconf);
567	else
568		sock = bind_to_proto(proto, protobp0->serv, &retaddr,
569		    &retnconf);
570
571	if (sock == -1) {
572		(void) syslog(LOG_ERR,
573	"Cannot establish %s service over %s: transport setup problem.",
574		    protobp0->serv, provider ? provider : proto);
575		return;
576	}
577
578	if (set_addrmask(sock, retnconf, &addrmask) < 0) {
579		(void) syslog(LOG_ERR,
580		    "Cannot set address mask for %s", retnconf->nc_netid);
581		return;
582	}
583
584	/*
585	 * Register all versions of the programs in the protocol block list.
586	 */
587	l = strlen(NC_UDP);
588	for (protobp = protobp0; protobp; protobp = protobp->next) {
589		for (vers = protobp->versmin; vers <= protobp->versmax;
590		    vers++) {
591			if ((protobp->program == NFS_PROGRAM ||
592			    protobp->program == NFS_ACL_PROGRAM) &&
593			    vers == NFS_V4 &&
594			    strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0)
595				continue;
596
597			(void) rpcb_unset(protobp->program, vers, retnconf);
598			(void) rpcb_set(protobp->program, vers, retnconf,
599			    retaddr);
600		}
601	}
602
603	/*
604	 * Register services with CLTS semantics right now.
605	 * Note: services with COTS/COTS_ORD semantics will be
606	 * registered later from cots_listen_event function.
607	 */
608	if (retnconf->nc_semantics == NC_TPI_CLTS) {
609		/* Don't drop core if supporting module(s) aren't loaded. */
610		(void) signal(SIGSYS, SIG_IGN);
611
612		/*
613		 * svc() doesn't block, it returns success or failure.
614		 */
615
616		if (svc == NULL && Mysvc4 != NULL)
617			err = (*Mysvc4)(sock, &addrmask, retnconf,
618			    NFS4_SETPORT|NFS4_KRPC_START, retaddr);
619		else
620			err = (*svc)(sock, addrmask, retnconf);
621
622		if (err < 0) {
623			(void) syslog(LOG_ERR,
624			    "Cannot establish %s service over <file desc."
625			    " %d, protocol %s> : %m. Exiting",
626			    protobp0->serv, sock, retnconf->nc_proto);
627			exit(1);
628		}
629	}
630	free(addrmask.buf);
631
632	/*
633	 * We successfully set up the server over this transport.
634	 * Add this descriptor to the one being polled on.
635	 */
636	add_to_poll_list(sock, retnconf);
637}
638
639/*
640 * Set up the NFS service over all the available transports.
641 * Returns -1 for failure, 0 for success.
642 */
643int
644do_all(struct protob *protobp,
645	int (*svc)(int, struct netbuf, struct netconfig *))
646{
647	struct netconfig *nconf;
648	NCONF_HANDLE *nc;
649	int l;
650
651	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
652		syslog(LOG_ERR, "setnetconfig failed: %m");
653		return (-1);
654	}
655	l = strlen(NC_UDP);
656	while (nconf = getnetconfig(nc)) {
657		if ((nconf->nc_flag & NC_VISIBLE) &&
658		    strcmp(nconf->nc_protofmly, NC_LOOPBACK) != 0 &&
659		    OK_TPI_TYPE(nconf) &&
660		    (protobp->program != NFS4_CALLBACK ||
661		    strncasecmp(nconf->nc_proto, NC_UDP, l) != 0))
662			do_one(nconf->nc_device, nconf->nc_proto,
663			    protobp, svc);
664	}
665	(void) endnetconfig(nc);
666	return (0);
667}
668
669/*
670 * poll on the open transport descriptors for events and errors.
671 */
672void
673poll_for_action(void)
674{
675	int nfds;
676	int i;
677
678	/*
679	 * Keep polling until all transports have been closed. When this
680	 * happens, we return.
681	 */
682	while ((int)num_fds > 0) {
683		nfds = poll(poll_array, num_fds, INFTIM);
684		switch (nfds) {
685		case 0:
686			continue;
687
688		case -1:
689			/*
690			 * Some errors from poll could be
691			 * due to temporary conditions, and we try to
692			 * be robust in the face of them. Other
693			 * errors (should never happen in theory)
694			 * are fatal (eg. EINVAL, EFAULT).
695			 */
696			switch (errno) {
697			case EINTR:
698				continue;
699
700			case EAGAIN:
701			case ENOMEM:
702				(void) sleep(10);
703				continue;
704
705			default:
706				(void) syslog(LOG_ERR,
707				    "poll failed: %m. Exiting");
708				exit(1);
709			}
710		default:
711			break;
712		}
713
714		/*
715		 * Go through the poll list looking for events.
716		 */
717		for (i = 0; i < num_fds && nfds > 0; i++) {
718			if (poll_array[i].revents) {
719				nfds--;
720				/*
721				 * We have a message, so try to read it.
722				 * Record the error return in errno,
723				 * so that syslog(LOG_ERR, "...%m")
724				 * dumps the corresponding error string.
725				 */
726				if (conn_polled[i].nc.nc_semantics ==
727				    NC_TPI_CLTS) {
728					errno = do_poll_clts_action(
729					    poll_array[i].fd, i);
730				} else {
731					errno = do_poll_cots_action(
732					    poll_array[i].fd, i);
733				}
734
735				if (errno == 0)
736					continue;
737				/*
738				 * Most returned error codes mean that there is
739				 * fatal condition which we can only deal with
740				 * by closing the transport.
741				 */
742				if (errno != EAGAIN && errno != ENOMEM) {
743					(void) syslog(LOG_ERR,
744		"Error (%m) reading descriptor %d/transport %s. Closing it.",
745					    poll_array[i].fd,
746					    conn_polled[i].nc.nc_proto);
747					(void) t_close(poll_array[i].fd);
748					remove_from_poll_list(poll_array[i].fd);
749
750				} else if (errno == ENOMEM)
751					(void) sleep(5);
752			}
753		}
754	}
755
756	(void) syslog(LOG_ERR,
757	    "All transports have been closed with errors. Exiting.");
758}
759
760/*
761 * Allocate poll/transport array entries for this descriptor.
762 */
763static void
764add_to_poll_list(int fd, struct netconfig *nconf)
765{
766	static int poll_array_size = 0;
767
768	/*
769	 * If the arrays are full, allocate new ones.
770	 */
771	if (num_fds == poll_array_size) {
772		struct pollfd *tpa;
773		struct conn_entry *tnp;
774
775		if (poll_array_size != 0) {
776			tpa = poll_array;
777			tnp = conn_polled;
778		} else
779			tpa = (struct pollfd *)0;
780
781		poll_array_size += POLL_ARRAY_INC_SIZE;
782		/*
783		 * Allocate new arrays.
784		 */
785		poll_array = (struct pollfd *)
786		    malloc(poll_array_size * sizeof (struct pollfd) + 256);
787		conn_polled = (struct conn_entry *)
788		    malloc(poll_array_size * sizeof (struct conn_entry) + 256);
789		if (poll_array == (struct pollfd *)NULL ||
790		    conn_polled == (struct conn_entry *)NULL) {
791			syslog(LOG_ERR, "malloc failed for poll array");
792			exit(1);
793		}
794
795		/*
796		 * Copy the data of the old ones into new arrays, and
797		 * free the old ones.
798		 */
799		if (tpa) {
800			(void) memcpy((void *)poll_array, (void *)tpa,
801			    num_fds * sizeof (struct pollfd));
802			(void) memcpy((void *)conn_polled, (void *)tnp,
803			    num_fds * sizeof (struct conn_entry));
804			free((void *)tpa);
805			free((void *)tnp);
806		}
807	}
808
809	/*
810	 * Set the descriptor and event list. All possible events are
811	 * polled for.
812	 */
813	poll_array[num_fds].fd = fd;
814	poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
815
816	/*
817	 * Copy the transport data over too.
818	 */
819	conn_polled[num_fds].nc = *nconf;
820	conn_polled[num_fds].closing = 0;
821
822	/*
823	 * Set the descriptor to non-blocking. Avoids a race
824	 * between data arriving on the stream and then having it
825	 * flushed before we can read it.
826	 */
827	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
828		(void) syslog(LOG_ERR,
829	"fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
830		    num_fds, nconf->nc_proto);
831		exit(1);
832	}
833
834	/*
835	 * Count this descriptor.
836	 */
837	++num_fds;
838}
839
840static void
841remove_from_poll_list(int fd)
842{
843	int i;
844	int num_to_copy;
845
846	for (i = 0; i < num_fds; i++) {
847		if (poll_array[i].fd == fd) {
848			--num_fds;
849			num_to_copy = num_fds - i;
850			(void) memcpy((void *)&poll_array[i],
851			    (void *)&poll_array[i+1],
852			    num_to_copy * sizeof (struct pollfd));
853			(void) memset((void *)&poll_array[num_fds], 0,
854			    sizeof (struct pollfd));
855			(void) memcpy((void *)&conn_polled[i],
856			    (void *)&conn_polled[i+1],
857			    num_to_copy * sizeof (struct conn_entry));
858			(void) memset((void *)&conn_polled[num_fds], 0,
859			    sizeof (struct conn_entry));
860			return;
861		}
862	}
863	syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
864
865}
866
867/*
868 * Called to read and interpret the event on a connectionless descriptor.
869 * Returns 0 if successful, or a UNIX error code if failure.
870 */
871static int
872do_poll_clts_action(int fd, int conn_index)
873{
874	int error;
875	int ret;
876	int flags;
877	struct netconfig *nconf = &conn_polled[conn_index].nc;
878	static struct t_unitdata *unitdata = NULL;
879	static struct t_uderr *uderr = NULL;
880	static int oldfd = -1;
881	struct nd_hostservlist *host = NULL;
882	struct strbuf ctl[1], data[1];
883	/*
884	 * We just need to have some space to consume the
885	 * message in the event we can't use the TLI interface to do the
886	 * job.
887	 *
888	 * We flush the message using getmsg(). For the control part
889	 * we allocate enough for any TPI header plus 32 bytes for address
890	 * and options. For the data part, there is nothing magic about
891	 * the size of the array, but 256 bytes is probably better than
892	 * 1 byte, and we don't expect any data portion anyway.
893	 *
894	 * If the array sizes are too small, we handle this because getmsg()
895	 * (called to consume the message) will return MOREDATA|MORECTL.
896	 * Thus we just call getmsg() until it's read the message.
897	 */
898	char ctlbuf[sizeof (union T_primitives) + 32];
899	char databuf[256];
900
901	/*
902	 * If this is the same descriptor as the last time
903	 * do_poll_clts_action was called, we can save some
904	 * de-allocation and allocation.
905	 */
906	if (oldfd != fd) {
907		oldfd = fd;
908
909		if (unitdata) {
910			(void) t_free((char *)unitdata, T_UNITDATA);
911			unitdata = NULL;
912		}
913		if (uderr) {
914			(void) t_free((char *)uderr, T_UDERROR);
915			uderr = NULL;
916		}
917	}
918
919	/*
920	 * Allocate a unitdata structure for receiving the event.
921	 */
922	if (unitdata == NULL) {
923		/* LINTED pointer alignment */
924		unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
925		if (unitdata == NULL) {
926			if (t_errno == TSYSERR) {
927				/*
928				 * Save the error code across
929				 * syslog(), just in case
930				 * syslog() gets its own error
931				 * and therefore overwrites errno.
932				 */
933				error = errno;
934				(void) syslog(LOG_ERR,
935	"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
936				    fd, nconf->nc_proto);
937				return (error);
938			}
939			(void) syslog(LOG_ERR,
940"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
941			    fd, nconf->nc_proto, t_errno);
942			goto flush_it;
943		}
944	}
945
946try_again:
947	flags = 0;
948
949	/*
950	 * The idea is we wait for T_UNITDATA_IND's. Of course,
951	 * we don't get any, because rpcmod filters them out.
952	 * However, we need to call t_rcvudata() to let TLI
953	 * tell us we have a T_UDERROR_IND.
954	 *
955	 * algorithm is:
956	 * 	t_rcvudata(), expecting TLOOK.
957	 * 	t_look(), expecting T_UDERR.
958	 * 	t_rcvuderr(), expecting success (0).
959	 * 	expand destination address into ASCII,
960	 *	and dump it.
961	 */
962
963	ret = t_rcvudata(fd, unitdata, &flags);
964	if (ret == 0 || t_errno == TBUFOVFLW) {
965		(void) syslog(LOG_WARNING,
966"t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
967		    fd, nconf->nc_proto, unitdata->udata.len);
968
969		/*
970		 * Even though we don't expect any data, in case we do,
971		 * keep reading until there is no more.
972		 */
973		if (flags & T_MORE)
974			goto try_again;
975
976		return (0);
977	}
978
979	switch (t_errno) {
980	case TNODATA:
981		return (0);
982	case TSYSERR:
983		/*
984		 * System errors are returned to caller.
985		 * Save the error code across
986		 * syslog(), just in case
987		 * syslog() gets its own error
988		 * and therefore overwrites errno.
989		 */
990		error = errno;
991		(void) syslog(LOG_ERR,
992		    "t_rcvudata(file descriptor %d/transport %s) %m",
993		    fd, nconf->nc_proto);
994		return (error);
995	case TLOOK:
996		break;
997	default:
998		(void) syslog(LOG_ERR,
999		"t_rcvudata(file descriptor %d/transport %s) TLI error %d",
1000		    fd, nconf->nc_proto, t_errno);
1001		goto flush_it;
1002	}
1003
1004	ret = t_look(fd);
1005	switch (ret) {
1006	case 0:
1007		return (0);
1008	case -1:
1009		/*
1010		 * System errors are returned to caller.
1011		 */
1012		if (t_errno == TSYSERR) {
1013			/*
1014			 * Save the error code across
1015			 * syslog(), just in case
1016			 * syslog() gets its own error
1017			 * and therefore overwrites errno.
1018			 */
1019			error = errno;
1020			(void) syslog(LOG_ERR,
1021			    "t_look(file descriptor %d/transport %s) %m",
1022			    fd, nconf->nc_proto);
1023			return (error);
1024		}
1025		(void) syslog(LOG_ERR,
1026		    "t_look(file descriptor %d/transport %s) TLI error %d",
1027		    fd, nconf->nc_proto, t_errno);
1028		goto flush_it;
1029	case T_UDERR:
1030		break;
1031	default:
1032		(void) syslog(LOG_WARNING,
1033	"t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1034		    fd, nconf->nc_proto, ret, T_UDERR);
1035	}
1036
1037	if (uderr == NULL) {
1038		/* LINTED pointer alignment */
1039		uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
1040		if (uderr == NULL) {
1041			if (t_errno == TSYSERR) {
1042				/*
1043				 * Save the error code across
1044				 * syslog(), just in case
1045				 * syslog() gets its own error
1046				 * and therefore overwrites errno.
1047				 */
1048				error = errno;
1049				(void) syslog(LOG_ERR,
1050	"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1051				    fd, nconf->nc_proto);
1052				return (error);
1053			}
1054			(void) syslog(LOG_ERR,
1055"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1056			    fd, nconf->nc_proto, t_errno);
1057			goto flush_it;
1058		}
1059	}
1060
1061	ret = t_rcvuderr(fd, uderr);
1062	if (ret == 0) {
1063
1064		/*
1065		 * Save the datagram error in errno, so that the
1066		 * %m argument to syslog picks up the error string.
1067		 */
1068		errno = uderr->error;
1069
1070		/*
1071		 * Log the datagram error, then log the host that
1072		 * probably triggerred. Cannot log both in the
1073		 * same transaction because of packet size limitations
1074		 * in /dev/log.
1075		 */
1076		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1077"NFS response over <file descriptor %d/transport %s> generated error: %m",
1078		    fd, nconf->nc_proto);
1079
1080		/*
1081		 * Try to map the client's address back to a
1082		 * name.
1083		 */
1084		ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1085		if (ret != -1 && host && host->h_cnt > 0 &&
1086		    host->h_hostservs) {
1087		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1088"Bad NFS response was sent to client with host name: %s; service port: %s",
1089		    host->h_hostservs->h_host,
1090		    host->h_hostservs->h_serv);
1091		} else {
1092			int i, j;
1093			char *buf;
1094			char *hex = "0123456789abcdef";
1095
1096			/*
1097			 * Mapping failed, print the whole thing
1098			 * in ASCII hex.
1099			 */
1100			buf = (char *)malloc(uderr->addr.len * 2 + 1);
1101			for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1102				buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1103				buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1104			}
1105			buf[j] = '\0';
1106		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1107	"Bad NFS response was sent to client with transport address: 0x%s",
1108		    buf);
1109			free((void *)buf);
1110		}
1111
1112		if (ret == 0 && host != NULL)
1113			netdir_free((void *)host, ND_HOSTSERVLIST);
1114		return (0);
1115	}
1116
1117	switch (t_errno) {
1118	case TNOUDERR:
1119		goto flush_it;
1120	case TSYSERR:
1121		/*
1122		 * System errors are returned to caller.
1123		 * Save the error code across
1124		 * syslog(), just in case
1125		 * syslog() gets its own error
1126		 * and therefore overwrites errno.
1127		 */
1128		error = errno;
1129		(void) syslog(LOG_ERR,
1130		    "t_rcvuderr(file descriptor %d/transport %s) %m",
1131		    fd, nconf->nc_proto);
1132		return (error);
1133	default:
1134		(void) syslog(LOG_ERR,
1135		"t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1136		    fd, nconf->nc_proto, t_errno);
1137		goto flush_it;
1138	}
1139
1140flush_it:
1141	/*
1142	 * If we get here, then we could not cope with whatever message
1143	 * we attempted to read, so flush it. If we did read a message,
1144	 * and one isn't present, that is all right, because fd is in
1145	 * nonblocking mode.
1146	 */
1147	(void) syslog(LOG_ERR,
1148	"Flushing one input message from <file descriptor %d/transport %s>",
1149	    fd, nconf->nc_proto);
1150
1151	/*
1152	 * Read and discard the message. Do this this until there is
1153	 * no more control/data in the message or until we get an error.
1154	 */
1155	do {
1156		ctl->maxlen = sizeof (ctlbuf);
1157		ctl->buf = ctlbuf;
1158		data->maxlen = sizeof (databuf);
1159		data->buf = databuf;
1160		flags = 0;
1161		ret = getmsg(fd, ctl, data, &flags);
1162		if (ret == -1)
1163			return (errno);
1164	} while (ret != 0);
1165
1166	return (0);
1167}
1168
1169static void
1170conn_close_oldest(void)
1171{
1172	int fd;
1173	int i1;
1174
1175	/*
1176	 * Find the oldest connection that is not already in the
1177	 * process of shutting down.
1178	 */
1179	for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
1180		if (i1 >= num_fds)
1181			return;
1182		if (conn_polled[i1].closing == 0)
1183			break;
1184	}
1185#ifdef DEBUG
1186	printf("too many connections (%d), releasing oldest (%d)\n",
1187	    num_conns, poll_array[i1].fd);
1188#else
1189	syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
1190	    num_conns, poll_array[i1].fd);
1191#endif
1192	fd = poll_array[i1].fd;
1193	if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
1194		/*
1195		 * For politeness, send a T_DISCON_REQ to the transport
1196		 * provider.  We close the stream anyway.
1197		 */
1198		(void) t_snddis(fd, (struct t_call *)0);
1199		num_conns--;
1200		remove_from_poll_list(fd);
1201		(void) t_close(fd);
1202	} else {
1203		/*
1204		 * For orderly release, we do not close the stream
1205		 * until the T_ORDREL_IND arrives to complete
1206		 * the handshake.
1207		 */
1208		if (t_sndrel(fd) == 0)
1209			conn_polled[i1].closing = 1;
1210	}
1211}
1212
1213static boolean_t
1214conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1215{
1216	struct conn_ind	*conn;
1217	struct conn_ind	*next_conn;
1218
1219	conn = (struct conn_ind *)malloc(sizeof (*conn));
1220	if (conn == NULL) {
1221		syslog(LOG_ERR, "malloc for listen indication failed");
1222		return (FALSE);
1223	}
1224
1225	/* LINTED pointer alignment */
1226	conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
1227	if (conn->conn_call == NULL) {
1228		free((char *)conn);
1229		nfslib_log_tli_error("t_alloc", fd, nconf);
1230		return (FALSE);
1231	}
1232
1233	if (t_listen(fd, conn->conn_call) == -1) {
1234		nfslib_log_tli_error("t_listen", fd, nconf);
1235		(void) t_free((char *)conn->conn_call, T_CALL);
1236		free((char *)conn);
1237		return (FALSE);
1238	}
1239
1240	if (conn->conn_call->udata.len > 0) {
1241		syslog(LOG_WARNING,
1242	"rejecting inbound connection(%s) with %d bytes of connect data",
1243		    nconf->nc_proto, conn->conn_call->udata.len);
1244
1245		conn->conn_call->udata.len = 0;
1246		(void) t_snddis(fd, conn->conn_call);
1247		(void) t_free((char *)conn->conn_call, T_CALL);
1248		free((char *)conn);
1249		return (FALSE);
1250	}
1251
1252	if ((next_conn = *connp) != NULL) {
1253		next_conn->conn_prev->conn_next = conn;
1254		conn->conn_next = next_conn;
1255		conn->conn_prev = next_conn->conn_prev;
1256		next_conn->conn_prev = conn;
1257	} else {
1258		conn->conn_next = conn;
1259		conn->conn_prev = conn;
1260		*connp = conn;
1261	}
1262	return (TRUE);
1263}
1264
1265static int
1266discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1267{
1268	struct conn_ind	*conn;
1269	struct t_discon	discon;
1270
1271	discon.udata.buf = (char *)0;
1272	discon.udata.maxlen = 0;
1273	if (t_rcvdis(fd, &discon) == -1) {
1274		nfslib_log_tli_error("t_rcvdis", fd, nconf);
1275		return (-1);
1276	}
1277
1278	conn = *connp;
1279	if (conn == NULL)
1280		return (0);
1281
1282	do {
1283		if (conn->conn_call->sequence == discon.sequence) {
1284			if (conn->conn_next == conn)
1285				*connp = (struct conn_ind *)0;
1286			else {
1287				if (conn == *connp) {
1288					*connp = conn->conn_next;
1289				}
1290				conn->conn_next->conn_prev = conn->conn_prev;
1291				conn->conn_prev->conn_next = conn->conn_next;
1292			}
1293			free((char *)conn);
1294			break;
1295		}
1296		conn = conn->conn_next;
1297	} while (conn != *connp);
1298
1299	return (0);
1300}
1301
1302static void
1303cots_listen_event(int fd, int conn_index)
1304{
1305	struct t_call *call;
1306	struct conn_ind	*conn;
1307	struct conn_ind	*conn_head;
1308	int event;
1309	struct netconfig *nconf = &conn_polled[conn_index].nc;
1310	int new_fd;
1311	struct netbuf addrmask;
1312	int ret = 0;
1313	char *clnt;
1314	char *clnt_uaddr = NULL;
1315	struct nd_hostservlist *clnt_serv = NULL;
1316
1317	conn_head = NULL;
1318	(void) conn_get(fd, nconf, &conn_head);
1319
1320	while ((conn = conn_head) != NULL) {
1321		conn_head = conn->conn_next;
1322		if (conn_head == conn)
1323			conn_head = NULL;
1324		else {
1325			conn_head->conn_prev = conn->conn_prev;
1326			conn->conn_prev->conn_next = conn_head;
1327		}
1328		call = conn->conn_call;
1329		free(conn);
1330
1331		/*
1332		 * If we have already accepted the maximum number of
1333		 * connections allowed on the command line, then drop
1334		 * the oldest connection (for any protocol) before
1335		 * accepting the new connection.  Unless explicitly
1336		 * set on the command line, max_conns_allowed is -1.
1337		 */
1338		if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1339			conn_close_oldest();
1340
1341		/*
1342		 * Create a new transport endpoint for the same proto as
1343		 * the listener.
1344		 */
1345		new_fd = nfslib_transport_open(nconf);
1346		if (new_fd == -1) {
1347			call->udata.len = 0;
1348			(void) t_snddis(fd, call);
1349			(void) t_free((char *)call, T_CALL);
1350			syslog(LOG_ERR, "Cannot establish transport over %s",
1351			    nconf->nc_device);
1352			continue;
1353		}
1354
1355		/* Bind to a generic address/port for the accepting stream. */
1356		if (t_bind(new_fd, NULL, NULL) == -1) {
1357			nfslib_log_tli_error("t_bind", new_fd, nconf);
1358			call->udata.len = 0;
1359			(void) t_snddis(fd, call);
1360			(void) t_free((char *)call, T_CALL);
1361			(void) t_close(new_fd);
1362			continue;
1363		}
1364
1365		while (t_accept(fd, new_fd, call) == -1) {
1366			if (t_errno != TLOOK) {
1367#ifdef DEBUG
1368				nfslib_log_tli_error("t_accept", fd, nconf);
1369#endif
1370				call->udata.len = 0;
1371				(void) t_snddis(fd, call);
1372				(void) t_free((char *)call, T_CALL);
1373				(void) t_close(new_fd);
1374				goto do_next_conn;
1375			}
1376			while (event = t_look(fd)) {
1377				switch (event) {
1378				case T_LISTEN:
1379#ifdef DEBUG
1380					printf(
1381"cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1382#endif
1383					(void) conn_get(fd, nconf, &conn_head);
1384					continue;
1385				case T_DISCONNECT:
1386#ifdef DEBUG
1387					printf(
1388	"cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1389					    nconf->nc_proto);
1390#endif
1391					(void) discon_get(fd, nconf,
1392					    &conn_head);
1393					continue;
1394				default:
1395					syslog(LOG_ERR,
1396			"unexpected event 0x%x during accept processing (%s)",
1397					    event, nconf->nc_proto);
1398					call->udata.len = 0;
1399					(void) t_snddis(fd, call);
1400					(void) t_free((char *)call, T_CALL);
1401					(void) t_close(new_fd);
1402					goto do_next_conn;
1403				}
1404			}
1405		}
1406
1407		if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1408			(void) syslog(LOG_ERR,
1409			    "Cannot set address mask for %s",
1410			    nconf->nc_netid);
1411			(void) t_snddis(new_fd, NULL);
1412			(void) t_free((char *)call, T_CALL);
1413			(void) t_close(new_fd);
1414			continue;
1415		}
1416
1417		/* Tell kRPC about the new stream. */
1418		if (Mysvc4 != NULL)
1419			ret = (*Mysvc4)(new_fd, &addrmask, nconf,
1420			    NFS4_KRPC_START, &call->addr);
1421		else
1422			ret = (*Mysvc)(new_fd, addrmask, nconf);
1423
1424		if (ret < 0) {
1425			if (errno != ENOTCONN) {
1426				syslog(LOG_ERR,
1427				    "unable to register new connection: %m");
1428			} else {
1429				/*
1430				 * This is the only error that could be
1431				 * caused by the client, so who was it?
1432				 */
1433				if (netdir_getbyaddr(nconf, &clnt_serv,
1434				    &(call->addr)) == ND_OK &&
1435				    clnt_serv->h_cnt > 0)
1436					clnt = clnt_serv->h_hostservs->h_host;
1437				else
1438					clnt = clnt_uaddr = taddr2uaddr(nconf,
1439					    &(call->addr));
1440				/*
1441				 * If we don't know who the client was,
1442				 * remain silent.
1443				 */
1444				if (clnt)
1445					syslog(LOG_ERR,
1446"unable to register new connection: client %s has dropped connection", clnt);
1447				if (clnt_serv) {
1448					netdir_free(clnt_serv, ND_HOSTSERVLIST);
1449					clnt_serv = NULL;
1450				}
1451				if (clnt_uaddr) {
1452					free(clnt_uaddr);
1453					clnt_uaddr = NULL;
1454				}
1455			}
1456			free(addrmask.buf);
1457			(void) t_snddis(new_fd, NULL);
1458			(void) t_free((char *)call, T_CALL);
1459			(void) t_close(new_fd);
1460			goto do_next_conn;
1461		}
1462
1463		free(addrmask.buf);
1464		(void) t_free((char *)call, T_CALL);
1465
1466		/*
1467		 * Poll on the new descriptor so that we get disconnect
1468		 * and orderly release indications.
1469		 */
1470		num_conns++;
1471		add_to_poll_list(new_fd, nconf);
1472
1473		/* Reset nconf in case it has been moved. */
1474		nconf = &conn_polled[conn_index].nc;
1475do_next_conn:;
1476	}
1477}
1478
1479static int
1480do_poll_cots_action(int fd, int conn_index)
1481{
1482	char buf[256];
1483	int event;
1484	int i1;
1485	int flags;
1486	struct conn_entry *connent = &conn_polled[conn_index];
1487	struct netconfig *nconf = &(connent->nc);
1488	const char *errorstr;
1489
1490	while (event = t_look(fd)) {
1491		switch (event) {
1492		case T_LISTEN:
1493#ifdef DEBUG
1494printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf->nc_proto, fd);
1495#endif
1496			cots_listen_event(fd, conn_index);
1497			break;
1498
1499		case T_DATA:
1500#ifdef DEBUG
1501printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd, nconf->nc_proto);
1502#endif
1503			/*
1504			 * Receive a private notification from CONS rpcmod.
1505			 */
1506			i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1507			if (i1 == -1) {
1508				syslog(LOG_ERR, "t_rcv failed");
1509				break;
1510			}
1511			if (i1 < sizeof (int))
1512				break;
1513			i1 = BE32_TO_U32(buf);
1514			if (i1 == 1 || i1 == 2) {
1515				/*
1516				 * This connection has been idle for too long,
1517				 * so release it as politely as we can.  If we
1518				 * have already initiated an orderly release
1519				 * and we get notified that the stream is
1520				 * still idle, pull the plug.  This prevents
1521				 * hung connections from continuing to consume
1522				 * resources.
1523				 */
1524#ifdef DEBUG
1525printf("do_poll_cots_action(%s,%d): ", nconf->nc_proto, fd);
1526printf("initiating orderly release of idle connection\n");
1527#endif
1528				if (nconf->nc_semantics == NC_TPI_COTS ||
1529				    connent->closing != 0) {
1530					(void) t_snddis(fd, (struct t_call *)0);
1531					goto fdclose;
1532				}
1533				/*
1534				 * For NC_TPI_COTS_ORD, the stream is closed
1535				 * and removed from the poll list when the
1536				 * T_ORDREL is received from the provider.  We
1537				 * don't wait for it here because it may take
1538				 * a while for the transport to shut down.
1539				 */
1540				if (t_sndrel(fd) == -1) {
1541					syslog(LOG_ERR,
1542					"unable to send orderly release %m");
1543				}
1544				connent->closing = 1;
1545			} else
1546				syslog(LOG_ERR,
1547				"unexpected event from CONS rpcmod %d", i1);
1548			break;
1549
1550		case T_ORDREL:
1551#ifdef DEBUG
1552printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf->nc_proto, fd);
1553#endif
1554			/* Perform an orderly release. */
1555			if (t_rcvrel(fd) == 0) {
1556				/* T_ORDREL on listen fd's should be ignored */
1557				if (!is_listen_fd_index(conn_index)) {
1558					(void) t_sndrel(fd);
1559					goto fdclose;
1560				}
1561				break;
1562
1563			} else if (t_errno == TLOOK) {
1564				break;
1565			} else {
1566				nfslib_log_tli_error("t_rcvrel", fd, nconf);
1567
1568				/*
1569				 * check to make sure we do not close
1570				 * listen fd
1571				 */
1572				if (is_listen_fd_index(conn_index))
1573					break;
1574				else
1575					goto fdclose;
1576			}
1577
1578		case T_DISCONNECT:
1579#ifdef DEBUG
1580printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf->nc_proto, fd);
1581#endif
1582			if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1583				nfslib_log_tli_error("t_rcvdis", fd, nconf);
1584
1585			/*
1586			 * T_DISCONNECT on listen fd's should be ignored.
1587			 */
1588			if (is_listen_fd_index(conn_index))
1589				break;
1590			else
1591				goto fdclose;
1592
1593		default:
1594			if (t_errno == TSYSERR) {
1595				if ((errorstr = strerror(errno)) == NULL) {
1596					(void) sprintf(buf,
1597					    "Unknown error num %d", errno);
1598					errorstr = (const char *) buf;
1599				}
1600			} else if (event == -1)
1601				errorstr = t_strerror(t_errno);
1602			else
1603				errorstr = "";
1604			syslog(LOG_ERR,
1605			    "unexpected TLI event (0x%x) on "
1606			    "connection-oriented transport(%s,%d):%s",
1607			    event, nconf->nc_proto, fd, errorstr);
1608fdclose:
1609			num_conns--;
1610			remove_from_poll_list(fd);
1611			(void) t_close(fd);
1612			return (0);
1613		}
1614	}
1615
1616	return (0);
1617}
1618
1619static char *
1620serv_name_to_port_name(char *name)
1621{
1622	/*
1623	 * Map service names (used primarily in logging) to
1624	 * RPC port names (used by netdir_*() routines).
1625	 */
1626	if (strcmp(name, "NFS") == 0) {
1627		return ("nfs");
1628	} else if (strcmp(name, "NLM") == 0) {
1629		return ("lockd");
1630	} else if (strcmp(name, "NFS4_CALLBACK") == 0) {
1631		return ("nfs4_callback");
1632	}
1633
1634	return ("unrecognized");
1635}
1636
1637static int
1638bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1639		struct netconfig **retnconf)
1640{
1641	struct netconfig *nconf;
1642	NCONF_HANDLE *nc;
1643	struct nd_hostserv hs;
1644
1645	hs.h_host = HOST_SELF;
1646	hs.h_serv = serv_name_to_port_name(serv);
1647
1648	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1649		syslog(LOG_ERR, "setnetconfig failed: %m");
1650		return (-1);
1651	}
1652	while (nconf = getnetconfig(nc)) {
1653		if (OK_TPI_TYPE(nconf) &&
1654		    strcmp(nconf->nc_device, provider) == 0) {
1655			*retnconf = nconf;
1656			return (nfslib_bindit(nconf, addr, &hs,
1657			    listen_backlog));
1658		}
1659	}
1660	(void) endnetconfig(nc);
1661
1662	syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1663	    provider);
1664	return (-1);
1665}
1666
1667static int
1668bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr,
1669		struct netconfig **retnconf)
1670{
1671	struct netconfig *nconf;
1672	NCONF_HANDLE *nc = NULL;
1673	struct nd_hostserv hs;
1674
1675	hs.h_host = HOST_SELF;
1676	hs.h_serv = serv_name_to_port_name(serv);
1677
1678	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1679		syslog(LOG_ERR, "setnetconfig failed: %m");
1680		return (-1);
1681	}
1682	while (nconf = getnetconfig(nc)) {
1683		if (OK_TPI_TYPE(nconf) && NETSELEQ(nconf->nc_proto, proto)) {
1684			*retnconf = nconf;
1685			return (nfslib_bindit(nconf, addr, &hs,
1686			    listen_backlog));
1687		}
1688	}
1689	(void) endnetconfig(nc);
1690
1691	syslog(LOG_ERR, "couldn't find netconfig entry for protocol %s",
1692	    proto);
1693	return (-1);
1694}
1695
1696#include <netinet/in.h>
1697
1698/*
1699 * Create an address mask appropriate for the transport.
1700 * The mask is used to obtain the host-specific part of
1701 * a network address when comparing addresses.
1702 * For an internet address the host-specific part is just
1703 * the 32 bit IP address and this part of the mask is set
1704 * to all-ones. The port number part of the mask is zeroes.
1705 */
1706static int
1707set_addrmask(int fd,
1708	struct netconfig *nconf,
1709	struct netbuf *mask)
1710{
1711	struct t_info info;
1712
1713	/*
1714	 * Find the size of the address we need to mask.
1715	 */
1716	if (t_getinfo(fd, &info) < 0) {
1717		t_error("t_getinfo");
1718		return (-1);
1719	}
1720	mask->len = mask->maxlen = info.addr;
1721	if (info.addr <= 0) {
1722		/*
1723		 * loopback devices have infinite addr size
1724		 * (it is identified by -1 in addr field of t_info structure),
1725		 * so don't build the netmask for them. It's a special case
1726		 * that should be handled properly.
1727		 */
1728		if ((info.addr == -1) &&
1729		    (0 == strcmp(nconf->nc_protofmly, NC_LOOPBACK))) {
1730			memset(mask, 0, sizeof (*mask));
1731			return (0);
1732		}
1733
1734		syslog(LOG_ERR, "set_addrmask: address size: %ld", info.addr);
1735		return (-1);
1736	}
1737
1738	mask->buf = (char *)malloc(mask->len);
1739	if (mask->buf == NULL) {
1740		syslog(LOG_ERR, "set_addrmask: no memory");
1741		return (-1);
1742	}
1743	(void) memset(mask->buf, 0, mask->len);	/* reset all mask bits */
1744
1745	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1746		/*
1747		 * Set the mask so that the port is ignored.
1748		 */
1749		/* LINTED pointer alignment */
1750		((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1751		    (ulong_t)~0;
1752		/* LINTED pointer alignment */
1753		((struct sockaddr_in *)mask->buf)->sin_family =
1754		    (ushort_t)~0;
1755	} else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1756		/* LINTED pointer alignment */
1757		(void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1758		    (uchar_t)~0, sizeof (struct in6_addr));
1759		/* LINTED pointer alignment */
1760		((struct sockaddr_in6 *)mask->buf)->sin6_family =
1761		    (ushort_t)~0;
1762	} else {
1763
1764		/*
1765		 * Set all mask bits.
1766		 */
1767		(void) memset(mask->buf, 0xFF, mask->len);
1768	}
1769	return (0);
1770}
1771
1772/*
1773 * For listen fd's index is always less than end_listen_fds.
1774 * end_listen_fds is defined externally in the daemon that uses this library.
1775 * It's value is equal to the number of open file descriptors after the
1776 * last listen end point was opened but before any connection was accepted.
1777 */
1778static int
1779is_listen_fd_index(int index)
1780{
1781	return (index < end_listen_fds);
1782}
1783