1 /*
2  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 #pragma ident	"%Z%%M%	%I%	%E% SMI"
6 /*
7  * lib/krb5/os/sendto_kdc.c
8  *
9  * Copyright 1990,1991,2001,2002 by the Massachusetts Institute of Technology.
10  * All Rights Reserved.
11  *
12  * Export of this software from the United States of America may
13  *   require a specific license from the United States Government.
14  *   It is the responsibility of any person or organization contemplating
15  *   export to obtain such a license before exporting.
16  *
17  * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
18  * distribute this software and its documentation for any purpose and
19  * without fee is hereby granted, provided that the above copyright
20  * notice appear in all copies and that both that copyright notice and
21  * this permission notice appear in supporting documentation, and that
22  * the name of M.I.T. not be used in advertising or publicity pertaining
23  * to distribution of the software without specific, written prior
24  * permission.  Furthermore if you modify this software you must label
25  * your software as modified software and not distribute it in such a
26  * fashion that it might be confused with the original M.I.T. software.
27  * M.I.T. makes no representations about the suitability of
28  * this software for any purpose.  It is provided "as is" without express
29  * or implied warranty.
30  *
31  *
32  * Send packet to KDC for realm; wait for response, retransmitting
33  * as necessary.
34  */
35 
36 #define NEED_SOCKETS
37 #define NEED_LOWLEVEL_IO
38 #include <fake-addrinfo.h>
39 #include <k5-int.h>
40 
41 #ifdef HAVE_SYS_TIME_H
42 #include <sys/time.h>
43 #else
44 #include <time.h>
45 #endif
46 #include "os-proto.h"
47 
48 #ifdef _AIX
49 #include <sys/select.h>
50 #endif
51 
52 /* For FIONBIO.  */
53 #include <sys/ioctl.h>
54 #ifdef HAVE_SYS_FILIO_H
55 #include <sys/filio.h>
56 #endif
57 
58 #define MAX_PASS		    3
59 /* Solaris Kerberos: moved to k5-int.h */
60 /* #define DEFAULT_UDP_PREF_LIMIT	 1465 */
61 #define HARD_UDP_LIMIT		32700 /* could probably do 64K-epsilon ? */
62 
63 extern krb5_error_code
64 krb5int_sendto (krb5_context context, const krb5_data *message,
65 		const struct addrlist *addrs, krb5_data *reply,
66 		struct sockaddr_storage *localaddr, socklen_t *localaddrlen);
67 
68 /* Solaris kerberos: leaving this here because other code depends on this. */
69 static void default_debug_handler (const void *data, size_t len)
70 {
71     fwrite(data, 1, len, stderr);
72     /* stderr is unbuffered */
73 }
74 
75 void (*krb5int_sendtokdc_debug_handler) (const void *, size_t) = default_debug_handler;
76 
77 /*
78  * Solaris Kerberos: only including the debug stuff if DEBUG defined outside
79  * this file.
80  */
81 #ifdef  DEBUG
82 
83 static char global_err_str[NI_MAXHOST + NI_MAXSERV + 1024];
84 
85 /* Solaris kerberos: removed put() since it isn't needed. */
86 
87 static void putstr(const char *str)
88 {
89     /* Solaris kerberos: build the string which will be passed to syslog later */
90     strlcat(global_err_str, str, sizeof (global_err_str));
91 }
92 
93 #define dprint krb5int_debug_fprint
94 #define dperror dprint
95 
96 #include <com_err.h>
97 
98 static void
99 krb5int_debug_fprint (const char *fmt, ...)
100 {
101     va_list args;
102 
103     /* Temporaries for variable arguments, etc.  */
104     krb5_error_code kerr;
105     int err;
106     fd_set *rfds, *wfds, *xfds;
107     int i;
108     int maxfd;
109     struct timeval *tv;
110     struct addrinfo *ai;
111     const krb5_data *d;
112     char addrbuf[NI_MAXHOST], portbuf[NI_MAXSERV];
113     const char *p;
114     char tmpbuf[NI_MAXHOST + NI_MAXSERV + 30];
115 
116     /*
117      * Solaris kerberos: modified this function to create a string to pass to
118      * syslog()
119      */
120     global_err_str[0] = NULL;
121 
122     va_start(args, fmt);
123 
124 #define putf(FMT,X)	(sprintf(tmpbuf,FMT,X),putstr(tmpbuf))
125 
126     for (; *fmt; fmt++) {
127 	if (*fmt != '%') {
128 	    /* Possible optimization: Look for % and print all chars
129 	       up to it in one call.  */
130 	    putf("%c", *fmt);
131 	    continue;
132 	}
133 	/* After this, always processing a '%' sequence.  */
134 	fmt++;
135 	switch (*fmt) {
136 	case 0:
137 	default:
138 	    abort();
139 	case 'E':
140 	    /* %E => krb5_error_code */
141 	    kerr = va_arg(args, krb5_error_code);
142 	    sprintf(tmpbuf, "%lu/", (unsigned long) kerr);
143 	    putstr(tmpbuf);
144 	    p = error_message(kerr);
145 	    putstr(p);
146 	    break;
147 	case 'm':
148 	    /* %m => errno value (int) */
149 	    /* Like syslog's %m except the errno value is passed in
150 	       rather than the current value.  */
151 	    err = va_arg(args, int);
152 	    putf("%d/", err);
153 	    p = strerror(err);
154 	    putstr(p);
155 	    break;
156 	case 'F':
157 	    /* %F => fd_set *, fd_set *, fd_set *, int */
158 	    rfds = va_arg(args, fd_set *);
159 	    wfds = va_arg(args, fd_set *);
160 	    xfds = va_arg(args, fd_set *);
161 	    maxfd = va_arg(args, int);
162 
163 	    for (i = 0; i < maxfd; i++) {
164 		int r = FD_ISSET(i, rfds);
165 		int w = wfds && FD_ISSET(i, wfds);
166 		int x = xfds && FD_ISSET(i, xfds);
167 		if (r || w || x) {
168 		    putf(" %d", i);
169 		    if (r)
170 			putstr("r");
171 		    if (w)
172 			putstr("w");
173 		    if (x)
174 			putstr("x");
175 		}
176 	    }
177 	    putstr(" ");
178 	    break;
179 	case 's':
180 	    /* %s => char * */
181 	    p = va_arg(args, const char *);
182 	    putstr(p);
183 	    break;
184 	case 't':
185 	    /* %t => struct timeval * */
186 	    tv = va_arg(args, struct timeval *);
187 	    if (tv) {
188 		sprintf(tmpbuf, "%ld.%06ld",
189 			(long) tv->tv_sec, (long) tv->tv_usec);
190 		putstr(tmpbuf);
191 	    } else
192 		putstr("never");
193 	    break;
194 	case 'd':
195 	    /* %d => int */
196 	    putf("%d", va_arg(args, int));
197 	    break;
198 	case 'p':
199 	    /* %p => pointer */
200 	    putf("%p", va_arg(args, void*));
201 	    break;
202 	case 'A':
203 	    /* %A => addrinfo */
204 	    ai = va_arg(args, struct addrinfo *);
205 	    if (0 != getnameinfo (ai->ai_addr, ai->ai_addrlen,
206 				  addrbuf, sizeof (addrbuf),
207 				  portbuf, sizeof (portbuf),
208 				  NI_NUMERICHOST | NI_NUMERICSERV))
209 		strcpy (addrbuf, "??"), strcpy (portbuf, "??");
210 	    sprintf(tmpbuf, "%s %s.%s",
211 		    (ai->ai_socktype == SOCK_DGRAM
212 		     ? "udp"
213 		     : ai->ai_socktype == SOCK_STREAM
214 		     ? "tcp"
215 		     : "???"),
216 		    addrbuf, portbuf);
217 	    putstr(tmpbuf);
218 	    break;
219 	case 'D':
220 	    /* %D => krb5_data * */
221 	    d = va_arg(args, krb5_data *);
222 	    p = d->data;
223 	    putstr("0x");
224 	    for (i = 0; i < d->length; i++) {
225 		putf("%.2x", *p++);
226 	    }
227 	    break;
228 	}
229     }
230     va_end(args);
231 
232     /* Solaris kerberos: use syslog() for debug output */
233     syslog(LOG_DEBUG, global_err_str);
234 }
235 
236 #else
237 #define dprint (void)
238 #define dperror(MSG) ((void)(MSG))
239 #endif
240 
241 static int
242 merge_addrlists (struct addrlist *dest, struct addrlist *src)
243 {
244     int err, i;
245 
246 #ifdef DEBUG
247     /*LINTED*/
248     dprint("merging addrlists:\n\tlist1: ");
249     for (i = 0; i < dest->naddrs; i++)
250 	/*LINTED*/
251 	dprint(" %A", dest->addrs[i]);
252     /*LINTED*/
253     dprint("\n\tlist2: ");
254     for (i = 0; i < src->naddrs; i++)
255 	/*LINTED*/
256 	dprint(" %A", src->addrs[i]);
257     /*LINTED*/
258     dprint("\n");
259 #endif
260 
261     err = krb5int_grow_addrlist (dest, src->naddrs);
262     if (err)
263 	return err;
264     for (i = 0; i < src->naddrs; i++) {
265 	dest->addrs[dest->naddrs + i] = src->addrs[i];
266 	src->addrs[i] = 0;
267     }
268     dest->naddrs += i;
269     src->naddrs = 0;
270 
271 #ifdef DEBUG
272     /*LINTED*/
273     dprint("\tout:   ");
274     for (i = 0; i < dest->naddrs; i++)
275 	/*LINTED*/
276 	dprint(" %A", dest->addrs[i]);
277     /*LINTED*/
278     dprint("\n");
279 #endif
280 
281     return 0;
282 }
283 
284 /*
285  * send the formatted request 'message' to a KDC for realm 'realm' and
286  * return the response (if any) in 'reply'.
287  *
288  * If the message is sent and a response is received, 0 is returned,
289  * otherwise an error code is returned.
290  *
291  * The storage for 'reply' is allocated and should be freed by the caller
292  * when finished.
293  */
294 
295 krb5_error_code
296 krb5_sendto_kdc (krb5_context context, const krb5_data *message,
297 		 const krb5_data *realm, krb5_data *reply,
298 		 int use_master, int tcp_only)
299 {
300     krb5_error_code retval;
301     struct addrlist addrs;
302     int socktype1 = 0, socktype2 = 0;
303 
304     /*
305      * find KDC location(s) for realm
306      */
307 
308     /*
309      * BUG: This code won't return "interesting" errors (e.g., out of mem,
310      * bad config file) from locate_kdc.  KRB5_REALM_CANT_RESOLVE can be
311      * ignored from one query of two, but if only one query is done, or
312      * both return that error, it should be returned to the caller.  Also,
313      * "interesting" errors (not KRB5_KDC_UNREACH) from sendto_{udp,tcp}
314      * should probably be returned as well.
315      */
316 
317     /*LINTED*/
318     dprint("krb5_sendto_kdc(%d@%p, \"%D\", use_master=%d, tcp_only=%d)\n",
319     /*LINTED*/
320 	   message->length, message->data, realm, use_master, tcp_only);
321 
322     /*
323      * Solaris Kerberos: keep it simple by not supporting a udp_preference_limit
324      */
325 #if 0 /************** Begin IFDEF'ed OUT *******************************/
326     if (!tcp_only && context->udp_pref_limit < 0) {
327 	int tmp;
328 	retval = profile_get_integer(context->profile,
329 				     "libdefaults", "udp_preference_limit", 0,
330 				     DEFAULT_UDP_PREF_LIMIT, &tmp);
331 	if (retval)
332 	    return retval;
333 	if (tmp < 0)
334 	    tmp = DEFAULT_UDP_PREF_LIMIT;
335 	else if (tmp > HARD_UDP_LIMIT) {
336 	    /* In the unlikely case that a *really* big value is
337 	       given, let 'em use as big as we think we can
338 	       support.  */
339 	    tmp = HARD_UDP_LIMIT;
340 	}
341 	context->udp_pref_limit = tmp;
342     }
343 #endif /**************** END IFDEF'ed OUT *******************************/
344 
345     retval = (use_master ? KRB5_KDC_UNREACH : KRB5_REALM_UNKNOWN);
346 
347     if (tcp_only)
348 	socktype1 = SOCK_STREAM, socktype2 = 0;
349     else if (message->length <= context->udp_pref_limit)
350 	socktype1 = SOCK_DGRAM, socktype2 = SOCK_STREAM;
351     else
352 	socktype1 = SOCK_STREAM, socktype2 = SOCK_DGRAM;
353 
354     retval = krb5_locate_kdc(context, realm, &addrs, use_master, socktype1, 0);
355     if (socktype2) {
356 	struct addrlist addrs2;
357 
358 	retval = krb5_locate_kdc(context, realm, &addrs2, use_master,
359 				 socktype2, 0);
360 	if (retval == 0) {
361 	    (void) merge_addrlists(&addrs, &addrs2);
362 	    krb5int_free_addrlist(&addrs2);
363 	}
364     }
365     if (addrs.naddrs > 0) {
366 	retval = krb5int_sendto (context, message, &addrs, reply, 0, 0);
367 	krb5int_free_addrlist (&addrs);
368 	if (retval == 0)
369 	    return 0;
370     }
371     return retval;
372 }
373 
374 
375 /*
376  * Notes:
377  *
378  * Getting "connection refused" on a connected UDP socket causes
379  * select to indicate write capability on UNIX, but only shows up
380  * as an exception on Windows.  (I don't think any UNIX system flags
381  * the error as an exception.)  So we check for both, or make it
382  * system-specific.
383  *
384  * Always watch for responses from *any* of the servers.  Eventually
385  * fix the UDP code to do the same.
386  *
387  * To do:
388  * - TCP NOPUSH/CORK socket options?
389  * - error codes that don't suck
390  * - getsockopt(SO_ERROR) to check connect status
391  * - handle error RESPONSE_TOO_BIG from UDP server and use TCP
392  *   connections already in progress
393  */
394 
395 #include <cm.h>
396 
397 static const char *const state_strings[] = {
398     "INITIALIZING", "CONNECTING", "WRITING", "READING", "FAILED"
399 };
400 enum conn_states { INITIALIZING, CONNECTING, WRITING, READING, FAILED };
401 struct incoming_krb5_message {
402     size_t bufsizebytes_read;
403     size_t bufsize;
404     char *buf;
405     char *pos;
406     unsigned char bufsizebytes[4];
407     size_t n_left;
408 };
409 struct conn_state {
410     SOCKET fd;
411     krb5_error_code err;
412     enum conn_states state;
413     unsigned int is_udp : 1;
414     int (*service)(struct conn_state *, struct select_state *, int);
415     struct addrinfo *addr;
416     struct {
417 	struct {
418 	    sg_buf sgbuf[2];
419 	    sg_buf *sgp;
420 	    int sg_count;
421 	} out;
422 	struct incoming_krb5_message in;
423     } x;
424 };
425 
426 static int getcurtime (struct timeval *tvp)
427 {
428     if (gettimeofday(tvp, 0)) {
429 	dperror("gettimeofday");
430 	return errno;
431     }
432     return 0;
433 }
434 
435 /*
436  * Call select and return results.
437  * Input: interesting file descriptors and absolute timeout
438  * Output: select return value (-1 or num fds ready) and fd_sets
439  * Return: 0 (for i/o available or timeout) or error code.
440  */
441 krb5_error_code
442 krb5int_cm_call_select (const struct select_state *in,
443 			struct select_state *out, int *sret)
444 {
445     struct timeval now, *timo;
446     krb5_error_code e;
447 
448     *out = *in;
449     e = getcurtime(&now);
450     if (e)
451 	return e;
452     if (out->end_time.tv_sec == 0)
453 	timo = 0;
454     else {
455 	timo = &out->end_time;
456 	out->end_time.tv_sec -= now.tv_sec;
457 	out->end_time.tv_usec -= now.tv_usec;
458 	if (out->end_time.tv_usec < 0) {
459 	    out->end_time.tv_usec += 1000000;
460 	    out->end_time.tv_sec--;
461 	}
462 	if (out->end_time.tv_sec < 0) {
463 	    *sret = 0;
464 	    return 0;
465 	}
466     }
467     /*LINTED*/
468     dprint("selecting on max=%d sockets [%F] timeout %t\n",
469 	    /*LINTED*/
470 	   out->max, &out->rfds, &out->wfds, &out->xfds, out->max, timo);
471     *sret = select(out->max, &out->rfds, &out->wfds, &out->xfds, timo);
472     e = SOCKET_ERRNO;
473 
474 #ifdef DEBUG
475     /*LINTED*/
476     dprint("select returns %d", *sret);
477     if (*sret < 0)
478 	/*LINTED*/
479 	dprint(", error = %E\n", e);
480     else if (*sret == 0)
481 	/*LINTED*/
482 	dprint(" (timeout)\n");
483     else
484 	/*LINTED*/
485 	dprint(":%F\n", &out->rfds, &out->wfds, &out->xfds, out->max);
486 #endif
487 
488     if (*sret < 0)
489 	return e;
490     return 0;
491 }
492 
493 static int service_tcp_fd (struct conn_state *conn,
494 			   struct select_state *selstate, int ssflags);
495 static int service_udp_fd (struct conn_state *conn,
496 			   struct select_state *selstate, int ssflags);
497 
498 
499 static int
500 setup_connection (struct conn_state *state, struct addrinfo *ai,
501 		  const krb5_data *message, unsigned char *message_len_buf,
502 		  char **udpbufp)
503 {
504     state->state = INITIALIZING;
505     state->err = 0;
506     state->x.out.sgp = state->x.out.sgbuf;
507     state->addr = ai;
508     state->fd = INVALID_SOCKET;
509     SG_SET(&state->x.out.sgbuf[1], 0, 0);
510     if (ai->ai_socktype == SOCK_STREAM) {
511 	SG_SET(&state->x.out.sgbuf[0], message_len_buf, 4);
512 	SG_SET(&state->x.out.sgbuf[1], message->data, message->length);
513 	state->x.out.sg_count = 2;
514 	state->is_udp = 0;
515 	state->service = service_tcp_fd;
516     } else {
517 	SG_SET(&state->x.out.sgbuf[0], message->data, message->length);
518 	SG_SET(&state->x.out.sgbuf[1], 0, 0);
519 	state->x.out.sg_count = 1;
520 	state->is_udp = 1;
521 	state->service = service_udp_fd;
522 
523 	if (*udpbufp == 0) {
524 	    *udpbufp = malloc(krb5_max_dgram_size);
525 	    if (*udpbufp == 0) {
526 		dperror("malloc(krb5_max_dgram_size)");
527 		(void) closesocket(state->fd);
528 		state->fd = INVALID_SOCKET;
529 		state->state = FAILED;
530 		return 1;
531 	    }
532 	}
533 	state->x.in.buf = *udpbufp;
534 	state->x.in.bufsize = krb5_max_dgram_size;
535     }
536     return 0;
537 }
538 
539 static int
540 start_connection (struct conn_state *state, struct select_state *selstate)
541 {
542     int fd, e;
543     struct addrinfo *ai = state->addr;
544 
545     /*LINTED*/
546     dprint("start_connection(@%p)\ngetting %s socket in family %d...", state,
547 	   /*LINTED*/
548 	   ai->ai_socktype == SOCK_STREAM ? "stream" : "dgram", ai->ai_family);
549     fd = socket(ai->ai_family, ai->ai_socktype, 0);
550     if (fd == INVALID_SOCKET) {
551 	state->err = SOCKET_ERRNO;
552 	/*LINTED*/
553 	dprint("socket: %m creating with af %d\n", state->err, ai->ai_family);
554 	return -1;		/* try other hosts */
555     }
556     /* Make it non-blocking.  */
557     if (ai->ai_socktype == SOCK_STREAM) {
558 	static const int one = 1;
559 	static const struct linger lopt = { 0, 0 };
560 
561 	if (ioctlsocket(fd, FIONBIO, (const void *) &one))
562 	    dperror("sendto_kdc: ioctl(FIONBIO)");
563 	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lopt, sizeof(lopt)))
564 	    dperror("sendto_kdc: setsockopt(SO_LINGER)");
565     }
566 
567     /* Start connecting to KDC.  */
568     /*LINTED*/
569     dprint(" fd %d; connecting to %A...\n", fd, ai);
570     e = connect(fd, ai->ai_addr, ai->ai_addrlen);
571     if (e != 0) {
572 	/*
573 	 * This is the path that should be followed for non-blocking
574 	 * connections.
575 	 */
576 	if (SOCKET_ERRNO == EINPROGRESS || SOCKET_ERRNO == EWOULDBLOCK) {
577 	    state->state = CONNECTING;
578 	} else {
579 	    /*LINTED*/
580 	    dprint("connect failed: %m\n", SOCKET_ERRNO);
581 	    state->err = SOCKET_ERRNO;
582 	    state->state = FAILED;
583 	    return -2;
584 	}
585     } else {
586 	/*
587 	 * Connect returned zero even though we tried to make it
588 	 * non-blocking, which should have caused it to return before
589 	 * finishing the connection.  Oh well.  Someone's network
590 	 * stack is broken, but if they gave us a connection, use it.
591 	 */
592 	state->state = WRITING;
593     }
594     /*LINTED*/
595     dprint("new state = %s\n", state_strings[state->state]);
596 
597     state->fd = fd;
598 
599     if (ai->ai_socktype == SOCK_DGRAM) {
600 	/* Send it now.  */
601 	int ret;
602 	sg_buf *sg = &state->x.out.sgbuf[0];
603 
604 	/*LINTED*/
605 	dprint("sending %d bytes on fd %d\n", SG_LEN(sg), state->fd);
606 	ret = send(state->fd, SG_BUF(sg), SG_LEN(sg), 0);
607 	if (ret != SG_LEN(sg)) {
608 	    dperror("sendto");
609 	    (void) closesocket(state->fd);
610 	    state->fd = INVALID_SOCKET;
611 	    state->state = FAILED;
612 	    return -3;
613 	} else {
614 	    state->state = READING;
615 	}
616     }
617 
618     FD_SET(state->fd, &selstate->rfds);
619     if (state->state == CONNECTING || state->state == WRITING)
620 	FD_SET(state->fd, &selstate->wfds);
621     FD_SET(state->fd, &selstate->xfds);
622     if (selstate->max <= state->fd)
623 	selstate->max = state->fd + 1;
624     selstate->nfds++;
625 
626     /*LINTED*/
627     dprint("new select vectors: %F\n",
628 	   /*LINTED*/
629 	   &selstate->rfds, &selstate->wfds, &selstate->xfds, selstate->max);
630 
631     return 0;
632 }
633 
634 /* Return 0 if we sent something, non-0 otherwise.
635    If 0 is returned, the caller should delay waiting for a response.
636    Otherwise, the caller should immediately move on to process the
637    next connection.  */
638 static int
639 maybe_send (struct conn_state *conn, struct select_state *selstate)
640 {
641     sg_buf *sg;
642 
643     /*LINTED*/
644     dprint("maybe_send(@%p) state=%s type=%s\n", conn,
645 	   /*LINTED*/
646 	   state_strings[conn->state], conn->is_udp ? "udp" : "tcp");
647     if (conn->state == INITIALIZING)
648 	return start_connection(conn, selstate);
649 
650     /* Did we already shut down this channel?  */
651     if (conn->state == FAILED) {
652 	dprint("connection already closed\n");
653 	return -1;
654     }
655 
656     if (conn->addr->ai_socktype == SOCK_STREAM) {
657 	dprint("skipping stream socket\n");
658 	/* The select callback will handle flushing any data we
659 	   haven't written yet, and we only write it once.  */
660 	return -1;
661     }
662 
663     /* UDP - Send message, possibly for the first time, possibly a
664        retransmit if a previous attempt timed out.  */
665     sg = &conn->x.out.sgbuf[0];
666     /*LINTED*/
667     dprint("sending %d bytes on fd %d\n", SG_LEN(sg), conn->fd);
668     if (send(conn->fd, SG_BUF(sg), SG_LEN(sg), 0) != SG_LEN(sg)) {
669 	dperror("send");
670 	/* Keep connection alive, we'll try again next pass.
671 
672 	   Is this likely to catch any errors we didn't get from the
673 	   select callbacks?  */
674 	return -1;
675     }
676     /* Yay, it worked.  */
677     return 0;
678 }
679 
680 static void
681 kill_conn(struct conn_state *conn, struct select_state *selstate, int err)
682 {
683     conn->state = FAILED;
684     shutdown(conn->fd, SHUTDOWN_BOTH);
685     FD_CLR(conn->fd, &selstate->rfds);
686     FD_CLR(conn->fd, &selstate->wfds);
687     FD_CLR(conn->fd, &selstate->xfds);
688     conn->err = err;
689     /*LINTED*/
690     dprint("abandoning connection %d: %m\n", conn->fd, err);
691     /* Fix up max fd for next select call.  */
692     if (selstate->max == 1 + conn->fd) {
693 	while (selstate->max > 0
694 	       && ! FD_ISSET(selstate->max-1, &selstate->rfds)
695 	       && ! FD_ISSET(selstate->max-1, &selstate->wfds)
696 	       && ! FD_ISSET(selstate->max-1, &selstate->xfds))
697 	    selstate->max--;
698 	/*LINTED*/
699 	dprint("new max_fd + 1 is %d\n", selstate->max);
700     }
701     selstate->nfds--;
702 }
703 
704 /* Return nonzero only if we're finished and the caller should exit
705    its loop.  This happens in two cases: We have a complete message,
706    or the socket has closed and no others are open.  */
707 
708 static int
709 service_tcp_fd (struct conn_state *conn, struct select_state *selstate,
710 		int ssflags)
711 {
712     krb5_error_code e = 0;
713     int nwritten, nread;
714 
715     if (!(ssflags & (SSF_READ|SSF_WRITE|SSF_EXCEPTION)))
716 	abort();
717     switch (conn->state) {
718 	SOCKET_WRITEV_TEMP tmp;
719 
720     case CONNECTING:
721 	if (ssflags & SSF_READ) {
722 	    /* Bad -- the KDC shouldn't be sending to us first.  */
723 	    e = EINVAL /* ?? */;
724 	kill_conn:
725 	    kill_conn(conn, selstate, e);
726 	    if (e == EINVAL) {
727 		closesocket(conn->fd);
728 		conn->fd = INVALID_SOCKET;
729 	    }
730 	    return e == 0;
731 	}
732 	if (ssflags & SSF_EXCEPTION) {
733 	handle_exception:
734 	    e = 1;		/* need only be non-zero */
735 	    goto kill_conn;
736 	}
737 
738 	/*
739 	 * Connect finished -- but did it succeed or fail?
740 	 * UNIX sets can_write if failed.
741 	 * Try writing, I guess, and find out.
742 	 */
743 	conn->state = WRITING;
744 	goto try_writing;
745 
746     case WRITING:
747 	if (ssflags & SSF_READ) {
748 	    e = E2BIG;
749 	    /* Bad -- the KDC shouldn't be sending anything yet.  */
750 	    goto kill_conn;
751 	}
752 	if (ssflags & SSF_EXCEPTION)
753 	    goto handle_exception;
754 
755     try_writing:
756 	/*LINTED*/
757 	dprint("trying to writev %d (%d bytes) to fd %d\n",
758 		/*LINTED*/
759 	       conn->x.out.sg_count,
760 	       ((conn->x.out.sg_count == 2 ? SG_LEN(&conn->x.out.sgp[1]) : 0)
761 		/*LINTED*/
762 		+ SG_LEN(&conn->x.out.sgp[0])),
763 	       conn->fd);
764 	nwritten = SOCKET_WRITEV(conn->fd, conn->x.out.sgp,
765 				 conn->x.out.sg_count, tmp);
766 	if (nwritten < 0) {
767 	    e = SOCKET_ERRNO;
768 	    /*LINTED*/
769 	    dprint("failed: %m\n", e);
770 	    goto kill_conn;
771 	}
772 	/*LINTED*/
773 	dprint("wrote %d bytes\n", nwritten);
774 	while (nwritten) {
775 	    sg_buf *sgp = conn->x.out.sgp;
776 	    if (nwritten < SG_LEN(sgp)) {
777 		/*LINTED*/
778 		SG_ADVANCE(sgp, nwritten);
779 		nwritten = 0;
780 	    } else {
781 		nwritten -= SG_LEN(conn->x.out.sgp);
782 		conn->x.out.sgp++;
783 		conn->x.out.sg_count--;
784 		if (conn->x.out.sg_count == 0 && nwritten != 0)
785 		    /* Wrote more than we wanted to?  */
786 		    abort();
787 	    }
788 	}
789 	if (conn->x.out.sg_count == 0) {
790 	    /* Done writing, switch to reading.  */
791 	    /* Don't call shutdown at this point because
792 	     * some implementations cannot deal with half-closed connections.*/
793 	    FD_CLR(conn->fd, &selstate->wfds);
794 	    /* Q: How do we detect failures to send the remaining data
795 	       to the remote side, since we're in non-blocking mode?
796 	       Will we always get errors on the reading side?  */
797 	    /*LINTED*/
798 	    dprint("switching fd %d to READING\n", conn->fd);
799 	    conn->state = READING;
800 	    conn->x.in.bufsizebytes_read = 0;
801 	    conn->x.in.bufsize = 0;
802 	    conn->x.in.buf = 0;
803 	    conn->x.in.pos = 0;
804 	    conn->x.in.n_left = 0;
805 	}
806 	return 0;
807 
808     case READING:
809 	if (ssflags & SSF_EXCEPTION) {
810 	    if (conn->x.in.buf) {
811 		free(conn->x.in.buf);
812 		conn->x.in.buf = 0;
813 	    }
814 	    goto handle_exception;
815 	}
816 
817 	if (conn->x.in.bufsizebytes_read == 4) {
818 	    /* Reading data.  */
819 	    /*LINTED*/
820 	    dprint("reading %d bytes of data from fd %d\n",
821 		   (int) conn->x.in.n_left, conn->fd);
822 	    nread = SOCKET_READ(conn->fd, conn->x.in.pos, conn->x.in.n_left);
823 	    if (nread <= 0) {
824 		e = nread ? SOCKET_ERRNO : ECONNRESET;
825 		free(conn->x.in.buf);
826 		conn->x.in.buf = 0;
827 		goto kill_conn;
828 	    }
829 	    conn->x.in.n_left -= nread;
830 	    conn->x.in.pos += nread;
831 	    if ((long)conn->x.in.n_left <= 0) {
832 		/* We win!  */
833 		return 1;
834 	    }
835 	} else {
836 	    /* Reading length.  */
837 	    nread = SOCKET_READ(conn->fd,
838 				conn->x.in.bufsizebytes + conn->x.in.bufsizebytes_read,
839 				4 - conn->x.in.bufsizebytes_read);
840 	    if (nread < 0) {
841 		e = SOCKET_ERRNO;
842 		goto kill_conn;
843 	    }
844 	    conn->x.in.bufsizebytes_read += nread;
845 	    if (conn->x.in.bufsizebytes_read == 4) {
846 		unsigned long len;
847 		len = conn->x.in.bufsizebytes[0];
848 		len = (len << 8) + conn->x.in.bufsizebytes[1];
849 		len = (len << 8) + conn->x.in.bufsizebytes[2];
850 		len = (len << 8) + conn->x.in.bufsizebytes[3];
851 		/*LINTED*/
852 		dprint("received length on fd %d is %d\n", conn->fd, (int)len);
853 		/* Arbitrary 1M cap.  */
854 		if (len > 1 * 1024 * 1024) {
855 		    e = E2BIG;
856 		    goto kill_conn;
857 		}
858 		conn->x.in.bufsize = conn->x.in.n_left = len;
859 		conn->x.in.buf = conn->x.in.pos = malloc(len);
860 		/*LINTED*/
861 		dprint("allocated %d byte buffer at %p\n", (int) len,
862 		       conn->x.in.buf);
863 		if (conn->x.in.buf == 0) {
864 		    /* allocation failure */
865 		    e = errno;
866 		    goto kill_conn;
867 		}
868 	    }
869 	}
870 	break;
871 
872     default:
873 	abort();
874     }
875     return 0;
876 }
877 
878 static int
879 service_udp_fd(struct conn_state *conn, struct select_state *selstate,
880 	       int ssflags)
881 {
882     int nread;
883 
884     if (!(ssflags & (SSF_READ|SSF_EXCEPTION)))
885 	abort();
886     if (conn->state != READING)
887 	abort();
888 
889     nread = recv(conn->fd, conn->x.in.buf, conn->x.in.bufsize, 0);
890     if (nread < 0) {
891 	kill_conn(conn, selstate, SOCKET_ERRNO);
892 	return 0;
893     }
894     conn->x.in.pos = conn->x.in.buf + nread;
895     return 1;
896 }
897 
898 static int
899 service_fds (struct select_state *selstate,
900 	     struct conn_state *conns, size_t n_conns, int *winning_conn)
901 {
902     int e, selret;
903     struct select_state sel_results;
904 
905     e = 0;
906     while (selstate->nfds > 0
907 	   && (e = krb5int_cm_call_select(selstate, &sel_results, &selret)) == 0) {
908 	int i;
909 
910 	/*LINTED*/
911 	dprint("service_fds examining results, selret=%d\n", selret);
912 
913 	if (selret == 0)
914 	    /* Timeout, return to caller.  */
915 	    return 0;
916 
917 	/* Got something on a socket, process it.  */
918 	for (i = 0; i <= selstate->max && selret > 0 && i < n_conns; i++) {
919 	    int ssflags;
920 
921 	    if (conns[i].fd == INVALID_SOCKET)
922 		continue;
923 	    ssflags = 0;
924 	    if (FD_ISSET(conns[i].fd, &sel_results.rfds))
925 		ssflags |= SSF_READ, selret--;
926 	    if (FD_ISSET(conns[i].fd, &sel_results.wfds))
927 		ssflags |= SSF_WRITE, selret--;
928 	    if (FD_ISSET(conns[i].fd, &sel_results.xfds))
929 		ssflags |= SSF_EXCEPTION, selret--;
930 	    if (!ssflags)
931 		continue;
932 
933 	    /*LINTED*/
934 	    dprint("handling flags '%s%s%s' on fd %d (%A) in state %s\n",
935 		    /*LINTED*/
936 		   (ssflags & SSF_READ) ? "r" : "",
937 		    /*LINTED*/
938 		   (ssflags & SSF_WRITE) ? "w" : "",
939 		    /*LINTED*/
940 		   (ssflags & SSF_EXCEPTION) ? "x" : "",
941 		    /*LINTED*/
942 		   conns[i].fd, conns[i].addr,
943 		   state_strings[(int) conns[i].state]);
944 
945 	    if (conns[i].service (&conns[i], selstate, ssflags)) {
946 		dprint("fd service routine says we're done\n");
947 		*winning_conn = i;
948 		return 1;
949 	    }
950 	}
951     }
952     if (e != 0) {
953 	/*LINTED*/
954 	dprint("select returned %m\n", e);
955 	*winning_conn = -1;
956 	return 1;
957     }
958     return 0;
959 }
960 
961 /*
962  * Current worst-case timeout behavior:
963  *
964  * First pass, 1s per udp or tcp server, plus 2s at end.
965  * Second pass, 1s per udp server, plus 4s.
966  * Third pass, 1s per udp server, plus 8s.
967  * Fourth => 16s, etc.
968  *
969  * Restated:
970  * Per UDP server, 1s per pass.
971  * Per TCP server, 1s.
972  * Backoff delay, 2**(P+1) - 2, where P is total number of passes.
973  *
974  * Total = 2**(P+1) + U*P + T - 2.
975  *
976  * If P=3, Total = 3*U + T + 14.
977  * If P=4, Total = 4*U + T + 30.
978  *
979  * Note that if you try to reach two ports (e.g., both 88 and 750) on
980  * one server, it counts as two.
981  */
982 
983 krb5_error_code
984 /*ARGSUSED*/
985 krb5int_sendto (krb5_context context, const krb5_data *message,
986 		const struct addrlist *addrs, krb5_data *reply,
987 		struct sockaddr_storage *localaddr, socklen_t *localaddrlen)
988 {
989     int i, pass;
990     int delay_this_pass = 2;
991     krb5_error_code retval;
992     struct conn_state *conns;
993     size_t n_conns, host;
994     struct select_state select_state;
995     struct timeval now;
996     int winning_conn = -1, e = 0;
997     unsigned char message_len_buf[4];
998     char *udpbuf = 0;
999 
1000     /*LINTED*/
1001     dprint("krb5int_sendto(message=%d@%p)\n", message->length, message->data);
1002 
1003     reply->data = 0;
1004     reply->length = 0;
1005 
1006     n_conns = addrs->naddrs;
1007     conns = malloc(n_conns * sizeof(struct conn_state));
1008     if (conns == NULL) {
1009 	return ENOMEM;
1010     }
1011     memset(conns, 0, n_conns * sizeof(conns[i]));
1012     for (i = 0; i < n_conns; i++) {
1013 	conns[i].fd = INVALID_SOCKET;
1014     }
1015 
1016     select_state.max = 0;
1017     select_state.nfds = 0;
1018     FD_ZERO(&select_state.rfds);
1019     FD_ZERO(&select_state.wfds);
1020     FD_ZERO(&select_state.xfds);
1021 
1022     message_len_buf[0] = (message->length >> 24) & 0xff;
1023     message_len_buf[1] = (message->length >> 16) & 0xff;
1024     message_len_buf[2] = (message->length >>  8) & 0xff;
1025     message_len_buf[3] =  message->length        & 0xff;
1026 
1027     /* Set up connections.  */
1028     for (host = 0; host < n_conns; host++) {
1029 	retval = setup_connection (&conns[host], addrs->addrs[host],
1030 				   message, message_len_buf, &udpbuf);
1031 	if (retval)
1032 	    continue;
1033     }
1034     for (pass = 0; pass < MAX_PASS; pass++) {
1035 	/* Possible optimization: Make only one pass if TCP only.
1036 	   Stop making passes if all UDP ports are closed down.  */
1037 	/*LINTED*/
1038 	dprint("pass %d delay=%d\n", pass, delay_this_pass);
1039 	for (host = 0; host < n_conns; host++) {
1040 	    /*LINTED*/
1041 	    dprint("host %d\n", host);
1042 
1043 	    /* Send to the host, wait for a response, then move on. */
1044 	    if (maybe_send(&conns[host], &select_state))
1045 		continue;
1046 
1047 	    retval = getcurtime(&now);
1048 	    if (retval)
1049 		goto egress;
1050 	    select_state.end_time = now;
1051 	    select_state.end_time.tv_sec += 1;
1052 	    e = service_fds(&select_state, conns, host+1, &winning_conn);
1053 	    if (e)
1054 		break;
1055 	    if (pass > 0 && select_state.nfds == 0)
1056 		/*
1057 		 * After the first pass, if we close all fds, break
1058 		 * out right away.  During the first pass, it's okay,
1059 		 * we're probably about to open another connection.
1060 		 */
1061 		break;
1062 	}
1063 	if (e)
1064 	    break;
1065 	retval = getcurtime(&now);
1066 	if (retval)
1067 	    goto egress;
1068 	/* Possible optimization: Find a way to integrate this select
1069 	   call with the last one from the above loop, if the loop
1070 	   actually calls select.  */
1071 	select_state.end_time.tv_sec += delay_this_pass;
1072 	e = service_fds(&select_state, conns, host+1, &winning_conn);
1073 	if (e)
1074 	    break;
1075 	if (select_state.nfds == 0)
1076 	    break;
1077 	delay_this_pass *= 2;
1078     }
1079 
1080     if (select_state.nfds == 0) {
1081 	/* No addresses?  */
1082 	retval = KRB5_KDC_UNREACH;
1083 	goto egress;
1084     }
1085     if (e == 0 || winning_conn < 0) {
1086 	retval = KRB5_KDC_UNREACH;
1087 	goto egress;
1088     }
1089     /* Success!  */
1090     reply->data = conns[winning_conn].x.in.buf;
1091     reply->length = (conns[winning_conn].x.in.pos
1092 		     - conns[winning_conn].x.in.buf);
1093     /*LINTED*/
1094     dprint("returning %d bytes in buffer %p\n",
1095 	   (int) reply->length, reply->data);
1096     retval = 0;
1097     conns[winning_conn].x.in.buf = 0;
1098     if (localaddr != 0 && localaddrlen != 0 && *localaddrlen > 0)
1099 	(void) getsockname(conns[winning_conn].fd, (struct sockaddr *)localaddr,
1100 			   localaddrlen);
1101 egress:
1102     for (i = 0; i < n_conns; i++) {
1103 	if (conns[i].fd != INVALID_SOCKET)
1104 	    close(conns[i].fd);
1105 	if (conns[i].state == READING
1106 	    && conns[i].x.in.buf != 0
1107 	    && conns[i].x.in.buf != udpbuf)
1108 	    free(conns[i].x.in.buf);
1109     }
1110     free(conns);
1111     if (reply->data != udpbuf)
1112 	free(udpbuf);
1113     return retval;
1114 }
1115