1 /*
2  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 #pragma ident	"%Z%%M%	%I%	%E% SMI"
6 /*
7  * lib/krb5/os/sendto_kdc.c
8  *
9  * Copyright 1990,1991,2001,2002 by the Massachusetts Institute of Technology.
10  * All Rights Reserved.
11  *
12  * Export of this software from the United States of America may
13  *   require a specific license from the United States Government.
14  *   It is the responsibility of any person or organization contemplating
15  *   export to obtain such a license before exporting.
16  *
17  * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
18  * distribute this software and its documentation for any purpose and
19  * without fee is hereby granted, provided that the above copyright
20  * notice appear in all copies and that both that copyright notice and
21  * this permission notice appear in supporting documentation, and that
22  * the name of M.I.T. not be used in advertising or publicity pertaining
23  * to distribution of the software without specific, written prior
24  * permission.  Furthermore if you modify this software you must label
25  * your software as modified software and not distribute it in such a
26  * fashion that it might be confused with the original M.I.T. software.
27  * M.I.T. makes no representations about the suitability of
28  * this software for any purpose.  It is provided "as is" without express
29  * or implied warranty.
30  *
31  *
32  * Send packet to KDC for realm; wait for response, retransmitting
33  * as necessary.
34  */
35 
36 #define NEED_SOCKETS
37 #define NEED_LOWLEVEL_IO
38 #include <fake-addrinfo.h>
39 #include <k5-int.h>
40 
41 #ifdef HAVE_SYS_TIME_H
42 #include <sys/time.h>
43 #else
44 #include <time.h>
45 #endif
46 #include "os-proto.h"
47 
48 #ifdef _AIX
49 #include <sys/select.h>
50 #endif
51 
52 /* For FIONBIO.  */
53 #include <sys/ioctl.h>
54 #ifdef HAVE_SYS_FILIO_H
55 #include <sys/filio.h>
56 #endif
57 
58 #define MAX_PASS		    3
59 /* Solaris Kerberos: moved to k5-int.h */
60 /* #define DEFAULT_UDP_PREF_LIMIT	 1465 */
61 #define HARD_UDP_LIMIT		32700 /* could probably do 64K-epsilon ? */
62 
63 /* Solaris kerberos: leaving this here because other code depends on this. */
64 static void default_debug_handler (const void *data, size_t len)
65 {
66     fwrite(data, 1, len, stderr);
67     /* stderr is unbuffered */
68 }
69 
70 void (*krb5int_sendtokdc_debug_handler) (const void *, size_t) = default_debug_handler;
71 
72 /*
73  * Solaris Kerberos: only including the debug stuff if DEBUG defined outside
74  * this file.
75  */
76 #ifdef  DEBUG
77 
78 static char global_err_str[NI_MAXHOST + NI_MAXSERV + 1024];
79 
80 /* Solaris kerberos: removed put() since it isn't needed. */
81 
82 static void putstr(const char *str)
83 {
84     /* Solaris kerberos: build the string which will be passed to syslog later */
85     strlcat(global_err_str, str, sizeof (global_err_str));
86 }
87 
88 #define dprint krb5int_debug_fprint
89 #define dperror dprint
90 
91 #include <com_err.h>
92 
93 static void
94 krb5int_debug_fprint (const char *fmt, ...)
95 {
96     va_list args;
97 
98     /* Temporaries for variable arguments, etc.  */
99     krb5_error_code kerr;
100     int err;
101     fd_set *rfds, *wfds, *xfds;
102     int i;
103     int maxfd;
104     struct timeval *tv;
105     struct addrinfo *ai;
106     const krb5_data *d;
107     char addrbuf[NI_MAXHOST], portbuf[NI_MAXSERV];
108     const char *p;
109     char tmpbuf[NI_MAXHOST + NI_MAXSERV + 30];
110 
111     /*
112      * Solaris kerberos: modified this function to create a string to pass to
113      * syslog()
114      */
115     global_err_str[0] = NULL;
116 
117     va_start(args, fmt);
118 
119 #define putf(FMT,X)	(sprintf(tmpbuf,FMT,X),putstr(tmpbuf))
120 
121     for (; *fmt; fmt++) {
122 	if (*fmt != '%') {
123 	    /* Possible optimization: Look for % and print all chars
124 	       up to it in one call.  */
125 	    putf("%c", *fmt);
126 	    continue;
127 	}
128 	/* After this, always processing a '%' sequence.  */
129 	fmt++;
130 	switch (*fmt) {
131 	case 0:
132 	default:
133 	    abort();
134 	    break;
135 	case 'E':
136 	    /* %E => krb5_error_code */
137 	    kerr = va_arg(args, krb5_error_code);
138 	    sprintf(tmpbuf, "%lu/", (unsigned long) kerr);
139 	    putstr(tmpbuf);
140 	    p = error_message(kerr);
141 	    putstr(p);
142 	    break;
143 	case 'm':
144 	    /* %m => errno value (int) */
145 	    /* Like syslog's %m except the errno value is passed in
146 	       rather than the current value.  */
147 	    err = va_arg(args, int);
148 	    putf("%d/", err);
149 	    p = strerror(err);
150 	    putstr(p);
151 	    break;
152 	case 'F':
153 	    /* %F => fd_set *, fd_set *, fd_set *, int */
154 	    rfds = va_arg(args, fd_set *);
155 	    wfds = va_arg(args, fd_set *);
156 	    xfds = va_arg(args, fd_set *);
157 	    maxfd = va_arg(args, int);
158 
159 	    for (i = 0; i < maxfd; i++) {
160 		int r = FD_ISSET(i, rfds);
161 		int w = wfds && FD_ISSET(i, wfds);
162 		int x = xfds && FD_ISSET(i, xfds);
163 		if (r || w || x) {
164 		    putf(" %d", i);
165 		    if (r)
166 			putstr("r");
167 		    if (w)
168 			putstr("w");
169 		    if (x)
170 			putstr("x");
171 		}
172 	    }
173 	    putstr(" ");
174 	    break;
175 	case 's':
176 	    /* %s => char * */
177 	    p = va_arg(args, const char *);
178 	    putstr(p);
179 	    break;
180 	case 't':
181 	    /* %t => struct timeval * */
182 	    tv = va_arg(args, struct timeval *);
183 	    if (tv) {
184 		sprintf(tmpbuf, "%ld.%06ld",
185 			(long) tv->tv_sec, (long) tv->tv_usec);
186 		putstr(tmpbuf);
187 	    } else
188 		putstr("never");
189 	    break;
190 	case 'd':
191 	    /* %d => int */
192 	    putf("%d", va_arg(args, int));
193 	    break;
194 	case 'p':
195 	    /* %p => pointer */
196 	    putf("%p", va_arg(args, void*));
197 	    break;
198 	case 'A':
199 	    /* %A => addrinfo */
200 	    ai = va_arg(args, struct addrinfo *);
201 	    if (0 != getnameinfo (ai->ai_addr, ai->ai_addrlen,
202 				  addrbuf, sizeof (addrbuf),
203 				  portbuf, sizeof (portbuf),
204 				  NI_NUMERICHOST | NI_NUMERICSERV))
205 		/*LINTED*/
206 		strcpy (addrbuf, "??"), strcpy (portbuf, "??");
207 	    sprintf(tmpbuf, "%s %s.%s",
208 		    (ai->ai_socktype == SOCK_DGRAM
209 		     ? "udp"
210 		     : ai->ai_socktype == SOCK_STREAM
211 		     ? "tcp"
212 		     : "???"),
213 		    addrbuf, portbuf);
214 	    putstr(tmpbuf);
215 	    break;
216 	case 'D':
217 	    /* %D => krb5_data * */
218 	    d = va_arg(args, krb5_data *);
219 	    p = d->data;
220 	    putstr("0x");
221 	    for (i = 0; i < d->length; i++) {
222 		putf("%.2x", *p++);
223 	    }
224 	    break;
225 	}
226     }
227     va_end(args);
228 
229     /* Solaris kerberos: use syslog() for debug output */
230     syslog(LOG_DEBUG, global_err_str);
231 }
232 
233 #else
234 #define dprint (void)
235 #define dperror(MSG) ((void)(MSG))
236 #endif
237 
238 static int
239 merge_addrlists (struct addrlist *dest, struct addrlist *src)
240 {
241     int err, i;
242 
243 #ifdef DEBUG
244     /*LINTED*/
245     dprint("merging addrlists:\n\tlist1: ");
246     for (i = 0; i < dest->naddrs; i++)
247 	/*LINTED*/
248 	dprint(" %A", dest->addrs[i]);
249     /*LINTED*/
250     dprint("\n\tlist2: ");
251     for (i = 0; i < src->naddrs; i++)
252 	/*LINTED*/
253 	dprint(" %A", src->addrs[i]);
254     /*LINTED*/
255     dprint("\n");
256 #endif
257 
258     err = krb5int_grow_addrlist (dest, src->naddrs);
259     if (err)
260 	return err;
261     for (i = 0; i < src->naddrs; i++) {
262 	dest->addrs[dest->naddrs + i] = src->addrs[i];
263 	src->addrs[i] = 0;
264     }
265     dest->naddrs += i;
266     src->naddrs = 0;
267 
268 #ifdef DEBUG
269     /*LINTED*/
270     dprint("\tout:   ");
271     for (i = 0; i < dest->naddrs; i++)
272 	/*LINTED*/
273 	dprint(" %A", dest->addrs[i]);
274     /*LINTED*/
275     dprint("\n");
276 #endif
277 
278     return 0;
279 }
280 
281 /*
282  * send the formatted request 'message' to a KDC for realm 'realm' and
283  * return the response (if any) in 'reply'.
284  *
285  * If the message is sent and a response is received, 0 is returned,
286  * otherwise an error code is returned.
287  *
288  * The storage for 'reply' is allocated and should be freed by the caller
289  * when finished.
290  */
291 
292 krb5_error_code
293 krb5_sendto_kdc (krb5_context context, const krb5_data *message,
294 		 const krb5_data *realm, krb5_data *reply,
295 		 int *use_master, int tcp_only)
296 {
297     krb5_error_code retval;
298     struct addrlist addrs;
299     int socktype1 = 0, socktype2 = 0, addr_used;
300 
301     /*
302      * find KDC location(s) for realm
303      */
304 
305     /*
306      * BUG: This code won't return "interesting" errors (e.g., out of mem,
307      * bad config file) from locate_kdc.  KRB5_REALM_CANT_RESOLVE can be
308      * ignored from one query of two, but if only one query is done, or
309      * both return that error, it should be returned to the caller.  Also,
310      * "interesting" errors (not KRB5_KDC_UNREACH) from sendto_{udp,tcp}
311      * should probably be returned as well.
312      */
313 
314     /*LINTED*/
315     dprint("krb5_sendto_kdc(%d@%p, \"%D\", use_master=%d, tcp_only=%d)\n",
316     /*LINTED*/
317 	   message->length, message->data, realm, *use_master, tcp_only);
318 
319     if (!tcp_only && context->udp_pref_limit < 0) {
320 	int tmp;
321 	retval = profile_get_integer(context->profile,
322 				     "libdefaults", "udp_preference_limit", 0,
323 				     DEFAULT_UDP_PREF_LIMIT, &tmp);
324 	if (retval)
325 	    return retval;
326 	if (tmp < 0)
327 	    tmp = DEFAULT_UDP_PREF_LIMIT;
328 	else if (tmp > HARD_UDP_LIMIT)
329 	    /* In the unlikely case that a *really* big value is
330 	       given, let 'em use as big as we think we can
331 	       support.  */
332 	    tmp = HARD_UDP_LIMIT;
333 	context->udp_pref_limit = tmp;
334     }
335 
336     retval = (*use_master ? KRB5_KDC_UNREACH : KRB5_REALM_UNKNOWN);
337 
338     if (tcp_only)
339 	socktype1 = SOCK_STREAM, socktype2 = 0;
340     else if (message->length <= context->udp_pref_limit)
341 	socktype1 = SOCK_DGRAM, socktype2 = SOCK_STREAM;
342     else
343 	socktype1 = SOCK_STREAM, socktype2 = SOCK_DGRAM;
344 
345     retval = krb5_locate_kdc(context, realm, &addrs, *use_master, socktype1, 0);
346     if (socktype2) {
347 	struct addrlist addrs2;
348 
349 	retval = krb5_locate_kdc(context, realm, &addrs2, *use_master,
350 				 socktype2, 0);
351 	if (retval == 0) {
352 	    (void) merge_addrlists(&addrs, &addrs2);
353 	    krb5int_free_addrlist(&addrs2);
354 	}
355     }
356     if (addrs.naddrs > 0) {
357         retval = krb5int_sendto (context, message, &addrs, reply, 0, 0,
358 		&addr_used);
359 	if (retval == 0) {
360             /*
361 	     * Set use_master to 1 if we ended up talking to a master when
362 	     * didn't explicitly request to
363 	     */
364 
365 	    if (*use_master == 0) {
366 	        struct addrlist addrs3;
367 		retval = krb5_locate_kdc(context, realm, &addrs3, 1,
368 					addrs.addrs[addr_used]->ai_socktype,
369 					addrs.addrs[addr_used]->ai_family);
370 		if (retval == 0) {
371 		    int i;
372 		    for (i = 0; i < addrs3.naddrs; i++) {
373 			if (addrs.addrs[addr_used]->ai_addrlen ==
374 			    addrs3.addrs[i]->ai_addrlen &&
375 			    memcmp(addrs.addrs[addr_used]->ai_addr,
376 				addrs3.addrs[i]->ai_addr,
377 				addrs.addrs[addr_used]->ai_addrlen) == 0) {
378 				*use_master = 1;
379 				break;
380 			}
381 		    }
382 		    krb5int_free_addrlist (&addrs3);
383 		}
384 	    }
385 	    krb5int_free_addrlist (&addrs);
386 	    return 0;
387 	}
388 	krb5int_free_addrlist (&addrs);
389     }
390     return retval;
391 }
392 
393 
394 /*
395  * Notes:
396  *
397  * Getting "connection refused" on a connected UDP socket causes
398  * select to indicate write capability on UNIX, but only shows up
399  * as an exception on Windows.  (I don't think any UNIX system flags
400  * the error as an exception.)  So we check for both, or make it
401  * system-specific.
402  *
403  * Always watch for responses from *any* of the servers.  Eventually
404  * fix the UDP code to do the same.
405  *
406  * To do:
407  * - TCP NOPUSH/CORK socket options?
408  * - error codes that don't suck
409  * - getsockopt(SO_ERROR) to check connect status
410  * - handle error RESPONSE_TOO_BIG from UDP server and use TCP
411  *   connections already in progress
412  */
413 
414 #include <cm.h>
415 
416 static const char *const state_strings[] = {
417     "INITIALIZING", "CONNECTING", "WRITING", "READING", "FAILED"
418 };
419 enum conn_states { INITIALIZING, CONNECTING, WRITING, READING, FAILED };
420 struct incoming_krb5_message {
421     size_t bufsizebytes_read;
422     size_t bufsize;
423     char *buf;
424     char *pos;
425     unsigned char bufsizebytes[4];
426     size_t n_left;
427 };
428 struct conn_state {
429     SOCKET fd;
430     krb5_error_code err;
431     enum conn_states state;
432     unsigned int is_udp : 1;
433     int (*service)(struct conn_state *, struct select_state *, int);
434     struct addrinfo *addr;
435     struct {
436 	struct {
437 	    sg_buf sgbuf[2];
438 	    sg_buf *sgp;
439 	    int sg_count;
440 	} out;
441 	struct incoming_krb5_message in;
442     } x;
443 };
444 
445 static int getcurtime (struct timeval *tvp)
446 {
447     if (gettimeofday(tvp, 0)) {
448 	dperror("gettimeofday");
449 	return errno;
450     }
451     return 0;
452 }
453 
454 /*
455  * Call select and return results.
456  * Input: interesting file descriptors and absolute timeout
457  * Output: select return value (-1 or num fds ready) and fd_sets
458  * Return: 0 (for i/o available or timeout) or error code.
459  */
460 krb5_error_code
461 krb5int_cm_call_select (const struct select_state *in,
462 			struct select_state *out, int *sret)
463 {
464     struct timeval now, *timo;
465     krb5_error_code e;
466 
467     *out = *in;
468     e = getcurtime(&now);
469     if (e)
470 	return e;
471     if (out->end_time.tv_sec == 0)
472 	timo = 0;
473     else {
474 	timo = &out->end_time;
475 	out->end_time.tv_sec -= now.tv_sec;
476 	out->end_time.tv_usec -= now.tv_usec;
477 	if (out->end_time.tv_usec < 0) {
478 	    out->end_time.tv_usec += 1000000;
479 	    out->end_time.tv_sec--;
480 	}
481 	if (out->end_time.tv_sec < 0) {
482 	    *sret = 0;
483 	    return 0;
484 	}
485     }
486     /*LINTED*/
487     dprint("selecting on max=%d sockets [%F] timeout %t\n",
488 	    /*LINTED*/
489 	   out->max, &out->rfds, &out->wfds, &out->xfds, out->max, timo);
490     *sret = select(out->max, &out->rfds, &out->wfds, &out->xfds, timo);
491     e = SOCKET_ERRNO;
492 
493 #ifdef DEBUG
494     /*LINTED*/
495     dprint("select returns %d", *sret);
496     if (*sret < 0)
497 	/*LINTED*/
498 	dprint(", error = %E\n", e);
499     else if (*sret == 0)
500 	/*LINTED*/
501 	dprint(" (timeout)\n");
502     else
503 	/*LINTED*/
504 	dprint(":%F\n", &out->rfds, &out->wfds, &out->xfds, out->max);
505 #endif
506 
507     if (*sret < 0)
508 	return e;
509     return 0;
510 }
511 
512 static int service_tcp_fd (struct conn_state *conn,
513 			   struct select_state *selstate, int ssflags);
514 static int service_udp_fd (struct conn_state *conn,
515 			   struct select_state *selstate, int ssflags);
516 
517 
518 static int
519 setup_connection (struct conn_state *state, struct addrinfo *ai,
520 		  const krb5_data *message, unsigned char *message_len_buf,
521 		  char **udpbufp)
522 {
523     state->state = INITIALIZING;
524     state->err = 0;
525     state->x.out.sgp = state->x.out.sgbuf;
526     state->addr = ai;
527     state->fd = INVALID_SOCKET;
528     SG_SET(&state->x.out.sgbuf[1], 0, 0);
529     if (ai->ai_socktype == SOCK_STREAM) {
530 	SG_SET(&state->x.out.sgbuf[0], message_len_buf, 4);
531 	SG_SET(&state->x.out.sgbuf[1], message->data, message->length);
532 	state->x.out.sg_count = 2;
533 	state->is_udp = 0;
534 	state->service = service_tcp_fd;
535     } else {
536 	SG_SET(&state->x.out.sgbuf[0], message->data, message->length);
537 	SG_SET(&state->x.out.sgbuf[1], 0, 0);
538 	state->x.out.sg_count = 1;
539 	state->is_udp = 1;
540 	state->service = service_udp_fd;
541 
542 	if (*udpbufp == 0) {
543 	    *udpbufp = malloc(krb5_max_dgram_size);
544 	    if (*udpbufp == 0) {
545 		dperror("malloc(krb5_max_dgram_size)");
546 		(void) closesocket(state->fd);
547 		state->fd = INVALID_SOCKET;
548 		state->state = FAILED;
549 		return 1;
550 	    }
551 	}
552 	state->x.in.buf = *udpbufp;
553 	state->x.in.bufsize = krb5_max_dgram_size;
554     }
555     return 0;
556 }
557 
558 static int
559 start_connection (struct conn_state *state, struct select_state *selstate)
560 {
561     int fd, e;
562     struct addrinfo *ai = state->addr;
563 
564     /*LINTED*/
565     dprint("start_connection(@%p)\ngetting %s socket in family %d...", state,
566 	   /*LINTED*/
567 	   ai->ai_socktype == SOCK_STREAM ? "stream" : "dgram", ai->ai_family);
568     fd = socket(ai->ai_family, ai->ai_socktype, 0);
569     if (fd == INVALID_SOCKET) {
570 	state->err = SOCKET_ERRNO;
571 	/*LINTED*/
572 	dprint("socket: %m creating with af %d\n", state->err, ai->ai_family);
573 	return -1;		/* try other hosts */
574     }
575     /* Make it non-blocking.  */
576     if (ai->ai_socktype == SOCK_STREAM) {
577 	static const int one = 1;
578 	static const struct linger lopt = { 0, 0 };
579 
580 	if (ioctlsocket(fd, FIONBIO, (const void *) &one))
581 	    dperror("sendto_kdc: ioctl(FIONBIO)");
582 	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lopt, sizeof(lopt)))
583 	    dperror("sendto_kdc: setsockopt(SO_LINGER)");
584     }
585 
586     /* Start connecting to KDC.  */
587     /*LINTED*/
588     dprint(" fd %d; connecting to %A...\n", fd, ai);
589     e = connect(fd, ai->ai_addr, ai->ai_addrlen);
590     if (e != 0) {
591 	/*
592 	 * This is the path that should be followed for non-blocking
593 	 * connections.
594 	 */
595 	if (SOCKET_ERRNO == EINPROGRESS || SOCKET_ERRNO == EWOULDBLOCK) {
596 	    state->state = CONNECTING;
597 	} else {
598 	    /*LINTED*/
599 	    dprint("connect failed: %m\n", SOCKET_ERRNO);
600 	    state->err = SOCKET_ERRNO;
601 	    state->state = FAILED;
602 	    return -2;
603 	}
604     } else {
605 	/*
606 	 * Connect returned zero even though we tried to make it
607 	 * non-blocking, which should have caused it to return before
608 	 * finishing the connection.  Oh well.  Someone's network
609 	 * stack is broken, but if they gave us a connection, use it.
610 	 */
611 	state->state = WRITING;
612     }
613     /*LINTED*/
614     dprint("new state = %s\n", state_strings[state->state]);
615 
616     state->fd = fd;
617 
618     if (ai->ai_socktype == SOCK_DGRAM) {
619 	/* Send it now.  */
620 	int ret;
621 	sg_buf *sg = &state->x.out.sgbuf[0];
622 
623 	/*LINTED*/
624 	dprint("sending %d bytes on fd %d\n", SG_LEN(sg), state->fd);
625 	ret = send(state->fd, SG_BUF(sg), SG_LEN(sg), 0);
626 	if (ret != SG_LEN(sg)) {
627 	    dperror("sendto");
628 	    (void) closesocket(state->fd);
629 	    state->fd = INVALID_SOCKET;
630 	    state->state = FAILED;
631 	    return -3;
632 	} else {
633 	    state->state = READING;
634 	}
635     }
636 
637     FD_SET(state->fd, &selstate->rfds);
638     if (state->state == CONNECTING || state->state == WRITING)
639 	FD_SET(state->fd, &selstate->wfds);
640     FD_SET(state->fd, &selstate->xfds);
641     if (selstate->max <= state->fd)
642 	selstate->max = state->fd + 1;
643     selstate->nfds++;
644 
645     /*LINTED*/
646     dprint("new select vectors: %F\n",
647 	   /*LINTED*/
648 	   &selstate->rfds, &selstate->wfds, &selstate->xfds, selstate->max);
649 
650     return 0;
651 }
652 
653 /* Return 0 if we sent something, non-0 otherwise.
654    If 0 is returned, the caller should delay waiting for a response.
655    Otherwise, the caller should immediately move on to process the
656    next connection.  */
657 static int
658 maybe_send (struct conn_state *conn, struct select_state *selstate)
659 {
660     sg_buf *sg;
661 
662     /*LINTED*/
663     dprint("maybe_send(@%p) state=%s type=%s\n", conn,
664 	   /*LINTED*/
665 	   state_strings[conn->state], conn->is_udp ? "udp" : "tcp");
666     if (conn->state == INITIALIZING)
667 	return start_connection(conn, selstate);
668 
669     /* Did we already shut down this channel?  */
670     if (conn->state == FAILED) {
671 	dprint("connection already closed\n");
672 	return -1;
673     }
674 
675     if (conn->addr->ai_socktype == SOCK_STREAM) {
676 	dprint("skipping stream socket\n");
677 	/* The select callback will handle flushing any data we
678 	   haven't written yet, and we only write it once.  */
679 	return -1;
680     }
681 
682     /* UDP - Send message, possibly for the first time, possibly a
683        retransmit if a previous attempt timed out.  */
684     sg = &conn->x.out.sgbuf[0];
685     /*LINTED*/
686     dprint("sending %d bytes on fd %d\n", SG_LEN(sg), conn->fd);
687     if (send(conn->fd, SG_BUF(sg), SG_LEN(sg), 0) != SG_LEN(sg)) {
688 	dperror("send");
689 	/* Keep connection alive, we'll try again next pass.
690 
691 	   Is this likely to catch any errors we didn't get from the
692 	   select callbacks?  */
693 	return -1;
694     }
695     /* Yay, it worked.  */
696     return 0;
697 }
698 
699 static void
700 kill_conn(struct conn_state *conn, struct select_state *selstate, int err)
701 {
702     conn->state = FAILED;
703     shutdown(conn->fd, SHUTDOWN_BOTH);
704     FD_CLR(conn->fd, &selstate->rfds);
705     FD_CLR(conn->fd, &selstate->wfds);
706     FD_CLR(conn->fd, &selstate->xfds);
707     conn->err = err;
708     /*LINTED*/
709     dprint("abandoning connection %d: %m\n", conn->fd, err);
710     /* Fix up max fd for next select call.  */
711     if (selstate->max == 1 + conn->fd) {
712 	while (selstate->max > 0
713 	       && ! FD_ISSET(selstate->max-1, &selstate->rfds)
714 	       && ! FD_ISSET(selstate->max-1, &selstate->wfds)
715 	       && ! FD_ISSET(selstate->max-1, &selstate->xfds))
716 	    selstate->max--;
717 	/*LINTED*/
718 	dprint("new max_fd + 1 is %d\n", selstate->max);
719     }
720     selstate->nfds--;
721 }
722 
723 /* Return nonzero only if we're finished and the caller should exit
724    its loop.  This happens in two cases: We have a complete message,
725    or the socket has closed and no others are open.  */
726 
727 static int
728 service_tcp_fd (struct conn_state *conn, struct select_state *selstate,
729 		int ssflags)
730 {
731     krb5_error_code e = 0;
732     int nwritten, nread;
733 
734     if (!(ssflags & (SSF_READ|SSF_WRITE|SSF_EXCEPTION)))
735 	abort();
736     switch (conn->state) {
737 	SOCKET_WRITEV_TEMP tmp;
738 
739     case CONNECTING:
740 	if (ssflags & SSF_READ) {
741 	    /* Bad -- the KDC shouldn't be sending to us first.  */
742 	    e = EINVAL /* ?? */;
743 	kill_conn:
744 	    kill_conn(conn, selstate, e);
745 	    if (e == EINVAL) {
746 		closesocket(conn->fd);
747 		conn->fd = INVALID_SOCKET;
748 	    }
749 	    return e == 0;
750 	}
751 	if (ssflags & SSF_EXCEPTION) {
752 	handle_exception:
753 	    e = 1;		/* need only be non-zero */
754 	    goto kill_conn;
755 	}
756 
757 	/*
758 	 * Connect finished -- but did it succeed or fail?
759 	 * UNIX sets can_write if failed.
760 	 * Try writing, I guess, and find out.
761 	 */
762 	conn->state = WRITING;
763 	goto try_writing;
764 
765     case WRITING:
766 	if (ssflags & SSF_READ) {
767 	    e = E2BIG;
768 	    /* Bad -- the KDC shouldn't be sending anything yet.  */
769 	    goto kill_conn;
770 	}
771 	if (ssflags & SSF_EXCEPTION)
772 	    goto handle_exception;
773 
774     try_writing:
775 	/*LINTED*/
776 	dprint("trying to writev %d (%d bytes) to fd %d\n",
777 		/*LINTED*/
778 	       conn->x.out.sg_count,
779 	       ((conn->x.out.sg_count == 2 ? SG_LEN(&conn->x.out.sgp[1]) : 0)
780 		/*LINTED*/
781 		+ SG_LEN(&conn->x.out.sgp[0])),
782 	       conn->fd);
783 	nwritten = SOCKET_WRITEV(conn->fd, conn->x.out.sgp,
784 				 conn->x.out.sg_count, tmp);
785 	if (nwritten < 0) {
786 	    e = SOCKET_ERRNO;
787 	    /*LINTED*/
788 	    dprint("failed: %m\n", e);
789 	    goto kill_conn;
790 	}
791 	/*LINTED*/
792 	dprint("wrote %d bytes\n", nwritten);
793 	while (nwritten) {
794 	    sg_buf *sgp = conn->x.out.sgp;
795 	    if (nwritten < SG_LEN(sgp)) {
796 		/*LINTED*/
797 		SG_ADVANCE(sgp, nwritten);
798 		nwritten = 0;
799 	    } else {
800 		nwritten -= SG_LEN(conn->x.out.sgp);
801 		conn->x.out.sgp++;
802 		conn->x.out.sg_count--;
803 		if (conn->x.out.sg_count == 0 && nwritten != 0)
804 		    /* Wrote more than we wanted to?  */
805 		    abort();
806 	    }
807 	}
808 	if (conn->x.out.sg_count == 0) {
809 	    /* Done writing, switch to reading.  */
810 	    /* Don't call shutdown at this point because
811 	     * some implementations cannot deal with half-closed connections.*/
812 	    FD_CLR(conn->fd, &selstate->wfds);
813 	    /* Q: How do we detect failures to send the remaining data
814 	       to the remote side, since we're in non-blocking mode?
815 	       Will we always get errors on the reading side?  */
816 	    /*LINTED*/
817 	    dprint("switching fd %d to READING\n", conn->fd);
818 	    conn->state = READING;
819 	    conn->x.in.bufsizebytes_read = 0;
820 	    conn->x.in.bufsize = 0;
821 	    conn->x.in.buf = 0;
822 	    conn->x.in.pos = 0;
823 	    conn->x.in.n_left = 0;
824 	}
825 	return 0;
826 
827     case READING:
828 	if (ssflags & SSF_EXCEPTION) {
829 	    if (conn->x.in.buf) {
830 		free(conn->x.in.buf);
831 		conn->x.in.buf = 0;
832 	    }
833 	    goto handle_exception;
834 	}
835 
836 	if (conn->x.in.bufsizebytes_read == 4) {
837 	    /* Reading data.  */
838 	    /*LINTED*/
839 	    dprint("reading %d bytes of data from fd %d\n",
840 		   (int) conn->x.in.n_left, conn->fd);
841 	    nread = SOCKET_READ(conn->fd, conn->x.in.pos, conn->x.in.n_left);
842 	    if (nread <= 0) {
843 		e = nread ? SOCKET_ERRNO : ECONNRESET;
844 		free(conn->x.in.buf);
845 		conn->x.in.buf = 0;
846 		goto kill_conn;
847 	    }
848 	    conn->x.in.n_left -= nread;
849 	    conn->x.in.pos += nread;
850 	    if ((long)conn->x.in.n_left <= 0) {
851 		/* We win!  */
852 		return 1;
853 	    }
854 	} else {
855 	    /* Reading length.  */
856 	    nread = SOCKET_READ(conn->fd,
857 				conn->x.in.bufsizebytes + conn->x.in.bufsizebytes_read,
858 				4 - conn->x.in.bufsizebytes_read);
859 	    if (nread < 0) {
860 		e = SOCKET_ERRNO;
861 		goto kill_conn;
862 	    }
863 	    conn->x.in.bufsizebytes_read += nread;
864 	    if (conn->x.in.bufsizebytes_read == 4) {
865 		unsigned long len;
866 		len = conn->x.in.bufsizebytes[0];
867 		len = (len << 8) + conn->x.in.bufsizebytes[1];
868 		len = (len << 8) + conn->x.in.bufsizebytes[2];
869 		len = (len << 8) + conn->x.in.bufsizebytes[3];
870 		/*LINTED*/
871 		dprint("received length on fd %d is %d\n", conn->fd, (int)len);
872 		/* Arbitrary 1M cap.  */
873 		if (len > 1 * 1024 * 1024) {
874 		    e = E2BIG;
875 		    goto kill_conn;
876 		}
877 		conn->x.in.bufsize = conn->x.in.n_left = len;
878 		conn->x.in.buf = conn->x.in.pos = malloc(len);
879 		/*LINTED*/
880 		dprint("allocated %d byte buffer at %p\n", (int) len,
881 		       conn->x.in.buf);
882 		if (conn->x.in.buf == 0) {
883 		    /* allocation failure */
884 		    e = errno;
885 		    goto kill_conn;
886 		}
887 	    }
888 	}
889 	break;
890 
891     default:
892 	abort();
893     }
894     return 0;
895 }
896 
897 static int
898 service_udp_fd(struct conn_state *conn, struct select_state *selstate,
899 	       int ssflags)
900 {
901     int nread;
902 
903     if (!(ssflags & (SSF_READ|SSF_EXCEPTION)))
904 	abort();
905     if (conn->state != READING)
906 	abort();
907 
908     nread = recv(conn->fd, conn->x.in.buf, conn->x.in.bufsize, 0);
909     if (nread < 0) {
910 	kill_conn(conn, selstate, SOCKET_ERRNO);
911 	return 0;
912     }
913     conn->x.in.pos = conn->x.in.buf + nread;
914     return 1;
915 }
916 
917 static int
918 service_fds (struct select_state *selstate,
919 	     struct conn_state *conns, size_t n_conns, int *winning_conn)
920 {
921     int e, selret;
922     struct select_state sel_results;
923 
924     e = 0;
925     while (selstate->nfds > 0
926 	   && (e = krb5int_cm_call_select(selstate, &sel_results, &selret)) == 0) {
927 	int i;
928 
929 	/*LINTED*/
930 	dprint("service_fds examining results, selret=%d\n", selret);
931 
932 	if (selret == 0)
933 	    /* Timeout, return to caller.  */
934 	    return 0;
935 
936 	/* Got something on a socket, process it.  */
937 	for (i = 0; i <= selstate->max && selret > 0 && i < n_conns; i++) {
938 	    int ssflags;
939 
940 	    if (conns[i].fd == INVALID_SOCKET)
941 		continue;
942 	    ssflags = 0;
943 	    if (FD_ISSET(conns[i].fd, &sel_results.rfds))
944 		ssflags |= SSF_READ, selret--;
945 	    if (FD_ISSET(conns[i].fd, &sel_results.wfds))
946 		ssflags |= SSF_WRITE, selret--;
947 	    if (FD_ISSET(conns[i].fd, &sel_results.xfds))
948 		ssflags |= SSF_EXCEPTION, selret--;
949 	    if (!ssflags)
950 		continue;
951 
952 	    /*LINTED*/
953 	    dprint("handling flags '%s%s%s' on fd %d (%A) in state %s\n",
954 		    /*LINTED*/
955 		   (ssflags & SSF_READ) ? "r" : "",
956 		    /*LINTED*/
957 		   (ssflags & SSF_WRITE) ? "w" : "",
958 		    /*LINTED*/
959 		   (ssflags & SSF_EXCEPTION) ? "x" : "",
960 		    /*LINTED*/
961 		   conns[i].fd, conns[i].addr,
962 		   state_strings[(int) conns[i].state]);
963 
964 	    if (conns[i].service (&conns[i], selstate, ssflags)) {
965 		dprint("fd service routine says we're done\n");
966 		*winning_conn = i;
967 		return 1;
968 	    }
969 	}
970     }
971     if (e != 0) {
972 	/*LINTED*/
973 	dprint("select returned %m\n", e);
974 	*winning_conn = -1;
975 	return 1;
976     }
977     return 0;
978 }
979 
980 /*
981  * Current worst-case timeout behavior:
982  *
983  * First pass, 1s per udp or tcp server, plus 2s at end.
984  * Second pass, 1s per udp server, plus 4s.
985  * Third pass, 1s per udp server, plus 8s.
986  * Fourth => 16s, etc.
987  *
988  * Restated:
989  * Per UDP server, 1s per pass.
990  * Per TCP server, 1s.
991  * Backoff delay, 2**(P+1) - 2, where P is total number of passes.
992  *
993  * Total = 2**(P+1) + U*P + T - 2.
994  *
995  * If P=3, Total = 3*U + T + 14.
996  * If P=4, Total = 4*U + T + 30.
997  *
998  * Note that if you try to reach two ports (e.g., both 88 and 750) on
999  * one server, it counts as two.
1000  */
1001 
1002 krb5_error_code
1003 /*ARGSUSED*/
1004 krb5int_sendto (krb5_context context, const krb5_data *message,
1005 		const struct addrlist *addrs, krb5_data *reply,
1006 		struct sockaddr_storage *localaddr, socklen_t *localaddrlen,
1007 		int *addr_used)
1008 {
1009     int i, pass;
1010     int delay_this_pass = 2;
1011     krb5_error_code retval;
1012     struct conn_state *conns;
1013     size_t n_conns, host;
1014     struct select_state select_state;
1015     struct timeval now;
1016     int winning_conn = -1, e = 0;
1017     unsigned char message_len_buf[4];
1018     char *udpbuf = 0;
1019 
1020     /*LINTED*/
1021     dprint("krb5int_sendto(message=%d@%p)\n", message->length, message->data);
1022 
1023     reply->data = 0;
1024     reply->length = 0;
1025 
1026     n_conns = addrs->naddrs;
1027     conns = malloc(n_conns * sizeof(struct conn_state));
1028     if (conns == NULL) {
1029 	return ENOMEM;
1030     }
1031     memset(conns, 0, n_conns * sizeof(conns[i]));
1032     for (i = 0; i < n_conns; i++) {
1033 	conns[i].fd = INVALID_SOCKET;
1034     }
1035 
1036     select_state.max = 0;
1037     select_state.nfds = 0;
1038     FD_ZERO(&select_state.rfds);
1039     FD_ZERO(&select_state.wfds);
1040     FD_ZERO(&select_state.xfds);
1041 
1042     message_len_buf[0] = (message->length >> 24) & 0xff;
1043     message_len_buf[1] = (message->length >> 16) & 0xff;
1044     message_len_buf[2] = (message->length >>  8) & 0xff;
1045     message_len_buf[3] =  message->length        & 0xff;
1046 
1047     /* Set up connections.  */
1048     for (host = 0; host < n_conns; host++) {
1049 	retval = setup_connection (&conns[host], addrs->addrs[host],
1050 				   message, message_len_buf, &udpbuf);
1051 	if (retval)
1052 	    continue;
1053     }
1054     for (pass = 0; pass < MAX_PASS; pass++) {
1055 	/* Possible optimization: Make only one pass if TCP only.
1056 	   Stop making passes if all UDP ports are closed down.  */
1057 	/*LINTED*/
1058 	dprint("pass %d delay=%d\n", pass, delay_this_pass);
1059 	for (host = 0; host < n_conns; host++) {
1060 	    /*LINTED*/
1061 	    dprint("host %d\n", host);
1062 
1063 	    /* Send to the host, wait for a response, then move on. */
1064 	    if (maybe_send(&conns[host], &select_state))
1065 		continue;
1066 
1067 	    retval = getcurtime(&now);
1068 	    if (retval)
1069 		goto egress;
1070 	    select_state.end_time = now;
1071 	    select_state.end_time.tv_sec += 1;
1072 	    e = service_fds(&select_state, conns, host+1, &winning_conn);
1073 	    if (e)
1074 		break;
1075 	    if (pass > 0 && select_state.nfds == 0)
1076 		/*
1077 		 * After the first pass, if we close all fds, break
1078 		 * out right away.  During the first pass, it's okay,
1079 		 * we're probably about to open another connection.
1080 		 */
1081 		break;
1082 	}
1083 	if (e)
1084 	    break;
1085 	retval = getcurtime(&now);
1086 	if (retval)
1087 	    goto egress;
1088 	/* Possible optimization: Find a way to integrate this select
1089 	   call with the last one from the above loop, if the loop
1090 	   actually calls select.  */
1091 	select_state.end_time.tv_sec += delay_this_pass;
1092 	e = service_fds(&select_state, conns, host+1, &winning_conn);
1093 	if (e)
1094 	    break;
1095 	if (select_state.nfds == 0)
1096 	    break;
1097 	delay_this_pass *= 2;
1098     }
1099 
1100     if (select_state.nfds == 0) {
1101 	/* No addresses?  */
1102 	retval = KRB5_KDC_UNREACH;
1103 	goto egress;
1104     }
1105     if (e == 0 || winning_conn < 0) {
1106 	retval = KRB5_KDC_UNREACH;
1107 	goto egress;
1108     }
1109     /* Success!  */
1110     reply->data = conns[winning_conn].x.in.buf;
1111     reply->length = (conns[winning_conn].x.in.pos
1112 		     - conns[winning_conn].x.in.buf);
1113     /*LINTED*/
1114     dprint("returning %d bytes in buffer %p (winning_conn=%d)\n",
1115 	(int) reply->length, reply->data, winning_conn);
1116     retval = 0;
1117     conns[winning_conn].x.in.buf = 0;
1118     if (addr_used)
1119 	    *addr_used = winning_conn;
1120     if (localaddr != 0 && localaddrlen != 0 && *localaddrlen > 0)
1121 	(void) getsockname(conns[winning_conn].fd, (struct sockaddr *)localaddr,
1122 			   localaddrlen);
1123 egress:
1124     for (i = 0; i < n_conns; i++) {
1125 	if (conns[i].fd != INVALID_SOCKET)
1126 	    close(conns[i].fd);
1127 	if (conns[i].state == READING
1128 	    && conns[i].x.in.buf != 0
1129 	    && conns[i].x.in.buf != udpbuf)
1130 	    free(conns[i].x.in.buf);
1131     }
1132     free(conns);
1133     if (reply->data != udpbuf)
1134 	free(udpbuf);
1135     return retval;
1136 }
1137