xref: /illumos-gate/usr/src/cmd/sendmail/src/queue.c (revision 4aac33d3)
1 /*
2  * Copyright (c) 1998-2007 Sendmail, Inc. and its suppliers.
3  *	All rights reserved.
4  * Copyright (c) 1983, 1995-1997 Eric P. Allman.  All rights reserved.
5  * Copyright (c) 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * By using this file, you agree to the terms and conditions set
9  * forth in the LICENSE file which can be found at the top level of
10  * the sendmail distribution.
11  *
12  */
13 
14 #pragma ident	"%Z%%M%	%I%	%E% SMI"
15 
16 #include <sendmail.h>
17 #include <sm/sem.h>
18 
19 SM_RCSID("@(#)$Id: queue.c,v 8.972 2007/03/29 22:55:17 ca Exp $")
20 
21 #include <dirent.h>
22 
23 # define RELEASE_QUEUE	(void) 0
24 # define ST_INODE(st)	(st).st_ino
25 
26 #  define sm_file_exists(errno) ((errno) == EEXIST)
27 
28 # if HASFLOCK && defined(O_EXLOCK)
29 #   define SM_OPEN_EXLOCK 1
30 #   define TF_OPEN_FLAGS (O_CREAT|O_WRONLY|O_EXCL|O_EXLOCK)
31 # else /* HASFLOCK && defined(O_EXLOCK) */
32 #  define TF_OPEN_FLAGS (O_CREAT|O_WRONLY|O_EXCL)
33 # endif /* HASFLOCK && defined(O_EXLOCK) */
34 
35 #ifndef SM_OPEN_EXLOCK
36 # define SM_OPEN_EXLOCK 0
37 #endif /* ! SM_OPEN_EXLOCK */
38 
39 /*
40 **  Historical notes:
41 **	QF_VERSION == 4 was sendmail 8.10/8.11 without _FFR_QUEUEDELAY
42 **	QF_VERSION == 5 was sendmail 8.10/8.11 with    _FFR_QUEUEDELAY
43 **	QF_VERSION == 6 was sendmail 8.12      without _FFR_QUEUEDELAY
44 **	QF_VERSION == 7 was sendmail 8.12      with    _FFR_QUEUEDELAY
45 **	QF_VERSION == 8 is  sendmail 8.13
46 */
47 
48 #define QF_VERSION	8	/* version number of this queue format */
49 
50 static char	queue_letter __P((ENVELOPE *, int));
51 static bool	quarantine_queue_item __P((int, int, ENVELOPE *, char *));
52 
53 /* Naming convention: qgrp: index of queue group, qg: QUEUEGROUP */
54 
55 /*
56 **  Work queue.
57 */
58 
59 struct work
60 {
61 	char		*w_name;	/* name of control file */
62 	char		*w_host;	/* name of recipient host */
63 	bool		w_lock;		/* is message locked? */
64 	bool		w_tooyoung;	/* is it too young to run? */
65 	long		w_pri;		/* priority of message, see below */
66 	time_t		w_ctime;	/* creation time */
67 	time_t		w_mtime;	/* modification time */
68 	int		w_qgrp;		/* queue group located in */
69 	int		w_qdir;		/* queue directory located in */
70 	struct work	*w_next;	/* next in queue */
71 };
72 
73 typedef struct work	WORK;
74 
75 static WORK	*WorkQ;		/* queue of things to be done */
76 static int	NumWorkGroups;	/* number of work groups */
77 static time_t	Current_LA_time = 0;
78 
79 /* Get new load average every 30 seconds. */
80 #define GET_NEW_LA_TIME	30
81 
82 #define SM_GET_LA(now)	\
83 	do							\
84 	{							\
85 		now = curtime();				\
86 		if (Current_LA_time < now - GET_NEW_LA_TIME)	\
87 		{						\
88 			sm_getla();				\
89 			Current_LA_time = now;			\
90 		}						\
91 	} while (0)
92 
93 /*
94 **  DoQueueRun indicates that a queue run is needed.
95 **	Notice: DoQueueRun is modified in a signal handler!
96 */
97 
98 static bool	volatile DoQueueRun; /* non-interrupt time queue run needed */
99 
100 /*
101 **  Work group definition structure.
102 **	Each work group contains one or more queue groups. This is done
103 **	to manage the number of queue group runners active at the same time
104 **	to be within the constraints of MaxQueueChildren (if it is set).
105 **	The number of queue groups that can be run on the next work run
106 **	is kept track of. The queue groups are run in a round robin.
107 */
108 
109 struct workgrp
110 {
111 	int		wg_numqgrp;	/* number of queue groups in work grp */
112 	int		wg_runners;	/* total runners */
113 	int		wg_curqgrp;	/* current queue group */
114 	QUEUEGRP	**wg_qgs;	/* array of queue groups */
115 	int		wg_maxact;	/* max # of active runners */
116 	time_t		wg_lowqintvl;	/* lowest queue interval */
117 	int		wg_restart;	/* needs restarting? */
118 	int		wg_restartcnt;	/* count of times restarted */
119 };
120 
121 typedef struct workgrp WORKGRP;
122 
123 static WORKGRP	volatile WorkGrp[MAXWORKGROUPS + 1];	/* work groups */
124 
125 #if SM_HEAP_CHECK
126 static SM_DEBUG_T DebugLeakQ = SM_DEBUG_INITIALIZER("leak_q",
127 	"@(#)$Debug: leak_q - trace memory leaks during queue processing $");
128 #endif /* SM_HEAP_CHECK */
129 
130 /*
131 **  We use EmptyString instead of "" to avoid
132 **  'zero-length format string' warnings from gcc
133 */
134 
135 static const char EmptyString[] = "";
136 
137 static void	grow_wlist __P((int, int));
138 static int	multiqueue_cache __P((char *, int, QUEUEGRP *, int, unsigned int *));
139 static int	gatherq __P((int, int, bool, bool *, bool *));
140 static int	sortq __P((int));
141 static void	printctladdr __P((ADDRESS *, SM_FILE_T *));
142 static bool	readqf __P((ENVELOPE *, bool));
143 static void	restart_work_group __P((int));
144 static void	runner_work __P((ENVELOPE *, int, bool, int, int));
145 static void	schedule_queue_runs __P((bool, int, bool));
146 static char	*strrev __P((char *));
147 static ADDRESS	*setctluser __P((char *, int, ENVELOPE *));
148 #if _FFR_RHS
149 static int	sm_strshufflecmp __P((char *, char *));
150 static void	init_shuffle_alphabet __P(());
151 #endif /* _FFR_RHS */
152 
153 /*
154 **  Note: workcmpf?() don't use a prototype because it will cause a conflict
155 **  with the qsort() call (which expects something like
156 **  int (*compar)(const void *, const void *), not (WORK *, WORK *))
157 */
158 
159 static int	workcmpf0();
160 static int	workcmpf1();
161 static int	workcmpf2();
162 static int	workcmpf3();
163 static int	workcmpf4();
164 static int	randi = 3;	/* index for workcmpf5() */
165 static int	workcmpf5();
166 static int	workcmpf6();
167 #if _FFR_RHS
168 static int	workcmpf7();
169 #endif /* _FFR_RHS */
170 
171 #if RANDOMSHIFT
172 # define get_rand_mod(m)	((get_random() >> RANDOMSHIFT) % (m))
173 #else /* RANDOMSHIFT */
174 # define get_rand_mod(m)	(get_random() % (m))
175 #endif /* RANDOMSHIFT */
176 
177 /*
178 **  File system definition.
179 **	Used to keep track of how much free space is available
180 **	on a file system in which one or more queue directories reside.
181 */
182 
183 typedef struct filesys_shared	FILESYS;
184 
185 struct filesys_shared
186 {
187 	dev_t	fs_dev;		/* unique device id */
188 	long	fs_avail;	/* number of free blocks available */
189 	long	fs_blksize;	/* block size, in bytes */
190 };
191 
192 /* probably kept in shared memory */
193 static FILESYS	FileSys[MAXFILESYS];	/* queue file systems */
194 static const char *FSPath[MAXFILESYS];	/* pathnames for file systems */
195 
196 #if SM_CONF_SHM
197 
198 /*
199 **  Shared memory data
200 **
201 **  Current layout:
202 **	size -- size of shared memory segment
203 **	pid -- pid of owner, should be a unique id to avoid misinterpretations
204 **		by other processes.
205 **	tag -- should be a unique id to avoid misinterpretations by others.
206 **		idea: hash over configuration data that will be stored here.
207 **	NumFileSys -- number of file systems.
208 **	FileSys -- (arrary of) structure for used file systems.
209 **	RSATmpCnt -- counter for number of uses of ephemeral RSA key.
210 **	QShm -- (array of) structure for information about queue directories.
211 */
212 
213 /*
214 **  Queue data in shared memory
215 */
216 
217 typedef struct queue_shared	QUEUE_SHM_T;
218 
219 struct queue_shared
220 {
221 	int	qs_entries;	/* number of entries */
222 	/* XXX more to follow? */
223 };
224 
225 static void	*Pshm;		/* pointer to shared memory */
226 static FILESYS	*PtrFileSys;	/* pointer to queue file system array */
227 int		ShmId = SM_SHM_NO_ID;	/* shared memory id */
228 static QUEUE_SHM_T	*QShm;		/* pointer to shared queue data */
229 static size_t shms;
230 
231 # define SHM_OFF_PID(p)	(((char *) (p)) + sizeof(int))
232 # define SHM_OFF_TAG(p)	(((char *) (p)) + sizeof(pid_t) + sizeof(int))
233 # define SHM_OFF_HEAD	(sizeof(pid_t) + sizeof(int) * 2)
234 
235 /* how to access FileSys */
236 # define FILE_SYS(i)	(PtrFileSys[i])
237 
238 /* first entry is a tag, for now just the size */
239 # define OFF_FILE_SYS(p)	(((char *) (p)) + SHM_OFF_HEAD)
240 
241 /* offset for PNumFileSys */
242 # define OFF_NUM_FILE_SYS(p)	(((char *) (p)) + SHM_OFF_HEAD + sizeof(FileSys))
243 
244 /* offset for PRSATmpCnt */
245 # define OFF_RSA_TMP_CNT(p) (((char *) (p)) + SHM_OFF_HEAD + sizeof(FileSys) + sizeof(int))
246 int	*PRSATmpCnt;
247 
248 /* offset for queue_shm */
249 # define OFF_QUEUE_SHM(p) (((char *) (p)) + SHM_OFF_HEAD + sizeof(FileSys) + sizeof(int) * 2)
250 
251 # define QSHM_ENTRIES(i)	QShm[i].qs_entries
252 
253 /* basic size of shared memory segment */
254 # define SM_T_SIZE	(SHM_OFF_HEAD + sizeof(FileSys) + sizeof(int) * 2)
255 
256 static unsigned int	hash_q __P((char *, unsigned int));
257 
258 /*
259 **  HASH_Q -- simple hash function
260 **
261 **	Parameters:
262 **		p -- string to hash.
263 **		h -- hash start value (from previous run).
264 **
265 **	Returns:
266 **		hash value.
267 */
268 
269 static unsigned int
270 hash_q(p, h)
271 	char *p;
272 	unsigned int h;
273 {
274 	int c, d;
275 
276 	while (*p != '\0')
277 	{
278 		d = *p++;
279 		c = d;
280 		c ^= c<<6;
281 		h += (c<<11) ^ (c>>1);
282 		h ^= (d<<14) + (d<<7) + (d<<4) + d;
283 	}
284 	return h;
285 }
286 
287 
288 #else /* SM_CONF_SHM */
289 # define FILE_SYS(i)	FileSys[i]
290 #endif /* SM_CONF_SHM */
291 
292 /* access to the various components of file system data */
293 #define FILE_SYS_NAME(i)	FSPath[i]
294 #define FILE_SYS_AVAIL(i)	FILE_SYS(i).fs_avail
295 #define FILE_SYS_BLKSIZE(i)	FILE_SYS(i).fs_blksize
296 #define FILE_SYS_DEV(i)	FILE_SYS(i).fs_dev
297 
298 
299 /*
300 **  Current qf file field assignments:
301 **
302 **	A	AUTH= parameter
303 **	B	body type
304 **	C	controlling user
305 **	D	data file name
306 **	d	data file directory name (added in 8.12)
307 **	E	error recipient
308 **	F	flag bits
309 **	G	free (was: queue delay algorithm if _FFR_QUEUEDELAY)
310 **	H	header
311 **	I	data file's inode number
312 **	K	time of last delivery attempt
313 **	L	Solaris Content-Length: header (obsolete)
314 **	M	message
315 **	N	number of delivery attempts
316 **	P	message priority
317 **	q	quarantine reason
318 **	Q	original recipient (ORCPT=)
319 **	r	final recipient (Final-Recipient: DSN field)
320 **	R	recipient
321 **	S	sender
322 **	T	init time
323 **	V	queue file version
324 **	X	free (was: character set if _FFR_SAVE_CHARSET)
325 **	Y	free (was: current delay if _FFR_QUEUEDELAY)
326 **	Z	original envelope id from ESMTP
327 **	!	deliver by (added in 8.12)
328 **	$	define macro
329 **	.	terminate file
330 */
331 
332 /*
333 **  QUEUEUP -- queue a message up for future transmission.
334 **
335 **	Parameters:
336 **		e -- the envelope to queue up.
337 **		announce -- if true, tell when you are queueing up.
338 **		msync -- if true, then fsync() if SuperSafe interactive mode.
339 **
340 **	Returns:
341 **		none.
342 **
343 **	Side Effects:
344 **		The current request is saved in a control file.
345 **		The queue file is left locked.
346 */
347 
348 void
349 queueup(e, announce, msync)
350 	register ENVELOPE *e;
351 	bool announce;
352 	bool msync;
353 {
354 	register SM_FILE_T *tfp;
355 	register HDR *h;
356 	register ADDRESS *q;
357 	int tfd = -1;
358 	int i;
359 	bool newid;
360 	register char *p;
361 	MAILER nullmailer;
362 	MCI mcibuf;
363 	char qf[MAXPATHLEN];
364 	char tf[MAXPATHLEN];
365 	char df[MAXPATHLEN];
366 	char buf[MAXLINE];
367 
368 	/*
369 	**  Create control file.
370 	*/
371 
372 #define OPEN_TF	do							\
373 		{							\
374 			MODE_T oldumask = 0;				\
375 									\
376 			if (bitset(S_IWGRP, QueueFileMode))		\
377 				oldumask = umask(002);			\
378 			tfd = open(tf, TF_OPEN_FLAGS, QueueFileMode);	\
379 			if (bitset(S_IWGRP, QueueFileMode))		\
380 				(void) umask(oldumask);			\
381 		} while (0)
382 
383 
384 	newid = (e->e_id == NULL) || !bitset(EF_INQUEUE, e->e_flags);
385 	(void) sm_strlcpy(tf, queuename(e, NEWQFL_LETTER), sizeof(tf));
386 	tfp = e->e_lockfp;
387 	if (tfp == NULL && newid)
388 	{
389 		/*
390 		**  open qf file directly: this will give an error if the file
391 		**  already exists and hence prevent problems if a queue-id
392 		**  is reused (e.g., because the clock is set back).
393 		*/
394 
395 		(void) sm_strlcpy(tf, queuename(e, ANYQFL_LETTER), sizeof(tf));
396 		OPEN_TF;
397 		if (tfd < 0 ||
398 #if !SM_OPEN_EXLOCK
399 		    !lockfile(tfd, tf, NULL, LOCK_EX|LOCK_NB) ||
400 #endif /* !SM_OPEN_EXLOCK */
401 		    (tfp = sm_io_open(SmFtStdiofd, SM_TIME_DEFAULT,
402 					 (void *) &tfd, SM_IO_WRONLY,
403 					 NULL)) == NULL)
404 		{
405 			int save_errno = errno;
406 
407 			printopenfds(true);
408 			errno = save_errno;
409 			syserr("!queueup: cannot create queue file %s, euid=%d, fd=%d, fp=%p",
410 				tf, (int) geteuid(), tfd, tfp);
411 			/* NOTREACHED */
412 		}
413 		e->e_lockfp = tfp;
414 		upd_qs(e, 1, 0, "queueup");
415 	}
416 
417 	/* if newid, write the queue file directly (instead of temp file) */
418 	if (!newid)
419 	{
420 		/* get a locked tf file */
421 		for (i = 0; i < 128; i++)
422 		{
423 			if (tfd < 0)
424 			{
425 				OPEN_TF;
426 				if (tfd < 0)
427 				{
428 					if (errno != EEXIST)
429 						break;
430 					if (LogLevel > 0 && (i % 32) == 0)
431 						sm_syslog(LOG_ALERT, e->e_id,
432 							  "queueup: cannot create %s, uid=%d: %s",
433 							  tf, (int) geteuid(),
434 							  sm_errstring(errno));
435 				}
436 #if SM_OPEN_EXLOCK
437 				else
438 					break;
439 #endif /* SM_OPEN_EXLOCK */
440 			}
441 			if (tfd >= 0)
442 			{
443 #if SM_OPEN_EXLOCK
444 				/* file is locked by open() */
445 				break;
446 #else /* SM_OPEN_EXLOCK */
447 				if (lockfile(tfd, tf, NULL, LOCK_EX|LOCK_NB))
448 					break;
449 				else
450 #endif /* SM_OPEN_EXLOCK */
451 				if (LogLevel > 0 && (i % 32) == 0)
452 					sm_syslog(LOG_ALERT, e->e_id,
453 						  "queueup: cannot lock %s: %s",
454 						  tf, sm_errstring(errno));
455 				if ((i % 32) == 31)
456 				{
457 					(void) close(tfd);
458 					tfd = -1;
459 				}
460 			}
461 
462 			if ((i % 32) == 31)
463 			{
464 				/* save the old temp file away */
465 				(void) rename(tf, queuename(e, TEMPQF_LETTER));
466 			}
467 			else
468 				(void) sleep(i % 32);
469 		}
470 		if (tfd < 0 || (tfp = sm_io_open(SmFtStdiofd, SM_TIME_DEFAULT,
471 						 (void *) &tfd, SM_IO_WRONLY_B,
472 						 NULL)) == NULL)
473 		{
474 			int save_errno = errno;
475 
476 			printopenfds(true);
477 			errno = save_errno;
478 			syserr("!queueup: cannot create queue temp file %s, uid=%d",
479 				tf, (int) geteuid());
480 		}
481 	}
482 
483 	if (tTd(40, 1))
484 		sm_dprintf("\n>>>>> queueing %s/%s%s >>>>>\n",
485 			   qid_printqueue(e->e_qgrp, e->e_qdir),
486 			   queuename(e, ANYQFL_LETTER),
487 			   newid ? " (new id)" : "");
488 	if (tTd(40, 3))
489 	{
490 		sm_dprintf("  e_flags=");
491 		printenvflags(e);
492 	}
493 	if (tTd(40, 32))
494 	{
495 		sm_dprintf("  sendq=");
496 		printaddr(sm_debug_file(), e->e_sendqueue, true);
497 	}
498 	if (tTd(40, 9))
499 	{
500 		sm_dprintf("  tfp=");
501 		dumpfd(sm_io_getinfo(tfp, SM_IO_WHAT_FD, NULL), true, false);
502 		sm_dprintf("  lockfp=");
503 		if (e->e_lockfp == NULL)
504 			sm_dprintf("NULL\n");
505 		else
506 			dumpfd(sm_io_getinfo(e->e_lockfp, SM_IO_WHAT_FD, NULL),
507 			       true, false);
508 	}
509 
510 	/*
511 	**  If there is no data file yet, create one.
512 	*/
513 
514 	(void) sm_strlcpy(df, queuename(e, DATAFL_LETTER), sizeof(df));
515 	if (bitset(EF_HAS_DF, e->e_flags))
516 	{
517 		if (e->e_dfp != NULL &&
518 		    SuperSafe != SAFE_REALLY &&
519 		    SuperSafe != SAFE_REALLY_POSTMILTER &&
520 		    sm_io_setinfo(e->e_dfp, SM_BF_COMMIT, NULL) < 0 &&
521 		    errno != EINVAL)
522 		{
523 			syserr("!queueup: cannot commit data file %s, uid=%d",
524 			       queuename(e, DATAFL_LETTER), (int) geteuid());
525 		}
526 		if (e->e_dfp != NULL &&
527 		    SuperSafe == SAFE_INTERACTIVE && msync)
528 		{
529 			if (tTd(40,32))
530 				sm_syslog(LOG_INFO, e->e_id,
531 					  "queueup: fsync(e->e_dfp)");
532 
533 			if (fsync(sm_io_getinfo(e->e_dfp, SM_IO_WHAT_FD,
534 						NULL)) < 0)
535 			{
536 				if (newid)
537 					syserr("!552 Error writing data file %s",
538 					       df);
539 				else
540 					syserr("!452 Error writing data file %s",
541 					       df);
542 			}
543 		}
544 	}
545 	else
546 	{
547 		int dfd;
548 		MODE_T oldumask = 0;
549 		register SM_FILE_T *dfp = NULL;
550 		struct stat stbuf;
551 
552 		if (e->e_dfp != NULL &&
553 		    sm_io_getinfo(e->e_dfp, SM_IO_WHAT_ISTYPE, BF_FILE_TYPE))
554 			syserr("committing over bf file");
555 
556 		if (bitset(S_IWGRP, QueueFileMode))
557 			oldumask = umask(002);
558 		dfd = open(df, O_WRONLY|O_CREAT|O_TRUNC|QF_O_EXTRA,
559 			   QueueFileMode);
560 		if (bitset(S_IWGRP, QueueFileMode))
561 			(void) umask(oldumask);
562 		if (dfd < 0 || (dfp = sm_io_open(SmFtStdiofd, SM_TIME_DEFAULT,
563 						 (void *) &dfd, SM_IO_WRONLY_B,
564 						 NULL)) == NULL)
565 			syserr("!queueup: cannot create data temp file %s, uid=%d",
566 				df, (int) geteuid());
567 		if (fstat(dfd, &stbuf) < 0)
568 			e->e_dfino = -1;
569 		else
570 		{
571 			e->e_dfdev = stbuf.st_dev;
572 			e->e_dfino = ST_INODE(stbuf);
573 		}
574 		e->e_flags |= EF_HAS_DF;
575 		memset(&mcibuf, '\0', sizeof(mcibuf));
576 		mcibuf.mci_out = dfp;
577 		mcibuf.mci_mailer = FileMailer;
578 		(*e->e_putbody)(&mcibuf, e, NULL);
579 
580 		if (SuperSafe == SAFE_REALLY ||
581 		    SuperSafe == SAFE_REALLY_POSTMILTER ||
582 		    (SuperSafe == SAFE_INTERACTIVE && msync))
583 		{
584 			if (tTd(40,32))
585 				sm_syslog(LOG_INFO, e->e_id,
586 					  "queueup: fsync(dfp)");
587 
588 			if (fsync(sm_io_getinfo(dfp, SM_IO_WHAT_FD, NULL)) < 0)
589 			{
590 				if (newid)
591 					syserr("!552 Error writing data file %s",
592 					       df);
593 				else
594 					syserr("!452 Error writing data file %s",
595 					       df);
596 			}
597 		}
598 
599 		if (sm_io_close(dfp, SM_TIME_DEFAULT) < 0)
600 			syserr("!queueup: cannot save data temp file %s, uid=%d",
601 				df, (int) geteuid());
602 		e->e_putbody = putbody;
603 	}
604 
605 	/*
606 	**  Output future work requests.
607 	**	Priority and creation time should be first, since
608 	**	they are required by gatherq.
609 	*/
610 
611 	/* output queue version number (must be first!) */
612 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "V%d\n", QF_VERSION);
613 
614 	/* output creation time */
615 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "T%ld\n", (long) e->e_ctime);
616 
617 	/* output last delivery time */
618 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "K%ld\n", (long) e->e_dtime);
619 
620 	/* output number of delivery attempts */
621 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "N%d\n", e->e_ntries);
622 
623 	/* output message priority */
624 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "P%ld\n", e->e_msgpriority);
625 
626 	/*
627 	**  If data file is in a different directory than the queue file,
628 	**  output a "d" record naming the directory of the data file.
629 	*/
630 
631 	if (e->e_dfqgrp != e->e_qgrp)
632 	{
633 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "d%s\n",
634 			Queue[e->e_dfqgrp]->qg_qpaths[e->e_dfqdir].qp_name);
635 	}
636 
637 	/* output inode number of data file */
638 	/* XXX should probably include device major/minor too */
639 	if (e->e_dfino != -1)
640 	{
641 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "I%ld/%ld/%llu\n",
642 				     (long) major(e->e_dfdev),
643 				     (long) minor(e->e_dfdev),
644 				     (ULONGLONG_T) e->e_dfino);
645 	}
646 
647 	/* output body type */
648 	if (e->e_bodytype != NULL)
649 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "B%s\n",
650 				     denlstring(e->e_bodytype, true, false));
651 
652 	/* quarantine reason */
653 	if (e->e_quarmsg != NULL)
654 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "q%s\n",
655 				     denlstring(e->e_quarmsg, true, false));
656 
657 	/* message from envelope, if it exists */
658 	if (e->e_message != NULL)
659 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "M%s\n",
660 				     denlstring(e->e_message, true, false));
661 
662 	/* send various flag bits through */
663 	p = buf;
664 	if (bitset(EF_WARNING, e->e_flags))
665 		*p++ = 'w';
666 	if (bitset(EF_RESPONSE, e->e_flags))
667 		*p++ = 'r';
668 	if (bitset(EF_HAS8BIT, e->e_flags))
669 		*p++ = '8';
670 	if (bitset(EF_DELETE_BCC, e->e_flags))
671 		*p++ = 'b';
672 	if (bitset(EF_RET_PARAM, e->e_flags))
673 		*p++ = 'd';
674 	if (bitset(EF_NO_BODY_RETN, e->e_flags))
675 		*p++ = 'n';
676 	if (bitset(EF_SPLIT, e->e_flags))
677 		*p++ = 's';
678 	*p++ = '\0';
679 	if (buf[0] != '\0')
680 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "F%s\n", buf);
681 
682 	/* save $={persistentMacros} macro values */
683 	queueup_macros(macid("{persistentMacros}"), tfp, e);
684 
685 	/* output name of sender */
686 	if (bitnset(M_UDBENVELOPE, e->e_from.q_mailer->m_flags))
687 		p = e->e_sender;
688 	else
689 		p = e->e_from.q_paddr;
690 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "S%s\n",
691 			     denlstring(p, true, false));
692 
693 	/* output ESMTP-supplied "original" information */
694 	if (e->e_envid != NULL)
695 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "Z%s\n",
696 				     denlstring(e->e_envid, true, false));
697 
698 	/* output AUTH= parameter */
699 	if (e->e_auth_param != NULL)
700 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "A%s\n",
701 				     denlstring(e->e_auth_param, true, false));
702 	if (e->e_dlvr_flag != 0)
703 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "!%c %ld\n",
704 				     (char) e->e_dlvr_flag, e->e_deliver_by);
705 
706 	/* output list of recipient addresses */
707 	printctladdr(NULL, NULL);
708 	for (q = e->e_sendqueue; q != NULL; q = q->q_next)
709 	{
710 		if (!QS_IS_UNDELIVERED(q->q_state))
711 			continue;
712 
713 		/* message for this recipient, if it exists */
714 		if (q->q_message != NULL)
715 			(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "M%s\n",
716 					     denlstring(q->q_message, true,
717 							false));
718 
719 		printctladdr(q, tfp);
720 		if (q->q_orcpt != NULL)
721 			(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "Q%s\n",
722 					     denlstring(q->q_orcpt, true,
723 							false));
724 		if (q->q_finalrcpt != NULL)
725 			(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "r%s\n",
726 					     denlstring(q->q_finalrcpt, true,
727 							false));
728 		(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'R');
729 		if (bitset(QPRIMARY, q->q_flags))
730 			(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'P');
731 		if (bitset(QHASNOTIFY, q->q_flags))
732 			(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'N');
733 		if (bitset(QPINGONSUCCESS, q->q_flags))
734 			(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'S');
735 		if (bitset(QPINGONFAILURE, q->q_flags))
736 			(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'F');
737 		if (bitset(QPINGONDELAY, q->q_flags))
738 			(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'D');
739 		if (q->q_alias != NULL &&
740 		    bitset(QALIAS, q->q_alias->q_flags))
741 			(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'A');
742 		(void) sm_io_putc(tfp, SM_TIME_DEFAULT, ':');
743 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "%s\n",
744 				     denlstring(q->q_paddr, true, false));
745 		if (announce)
746 		{
747 			char *tag = "queued";
748 
749 			if (e->e_quarmsg != NULL)
750 				tag = "quarantined";
751 
752 			e->e_to = q->q_paddr;
753 			message(tag);
754 			if (LogLevel > 8)
755 				logdelivery(q->q_mailer, NULL, q->q_status,
756 					    tag, NULL, (time_t) 0, e);
757 			e->e_to = NULL;
758 		}
759 		if (tTd(40, 1))
760 		{
761 			sm_dprintf("queueing ");
762 			printaddr(sm_debug_file(), q, false);
763 		}
764 	}
765 
766 	/*
767 	**  Output headers for this message.
768 	**	Expand macros completely here.  Queue run will deal with
769 	**	everything as absolute headers.
770 	**		All headers that must be relative to the recipient
771 	**		can be cracked later.
772 	**	We set up a "null mailer" -- i.e., a mailer that will have
773 	**	no effect on the addresses as they are output.
774 	*/
775 
776 	memset((char *) &nullmailer, '\0', sizeof(nullmailer));
777 	nullmailer.m_re_rwset = nullmailer.m_rh_rwset =
778 			nullmailer.m_se_rwset = nullmailer.m_sh_rwset = -1;
779 	nullmailer.m_eol = "\n";
780 	memset(&mcibuf, '\0', sizeof(mcibuf));
781 	mcibuf.mci_mailer = &nullmailer;
782 	mcibuf.mci_out = tfp;
783 
784 	macdefine(&e->e_macro, A_PERM, 'g', "\201f");
785 	for (h = e->e_header; h != NULL; h = h->h_link)
786 	{
787 		if (h->h_value == NULL)
788 			continue;
789 
790 		/* don't output resent headers on non-resent messages */
791 		if (bitset(H_RESENT, h->h_flags) &&
792 		    !bitset(EF_RESENT, e->e_flags))
793 			continue;
794 
795 		/* expand macros; if null, don't output header at all */
796 		if (bitset(H_DEFAULT, h->h_flags))
797 		{
798 			(void) expand(h->h_value, buf, sizeof(buf), e);
799 			if (buf[0] == '\0')
800 				continue;
801 			if (buf[0] == ' ' && buf[1] == '\0')
802 				continue;
803 		}
804 
805 		/* output this header */
806 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "H?");
807 
808 		/* output conditional macro if present */
809 		if (h->h_macro != '\0')
810 		{
811 			if (bitset(0200, h->h_macro))
812 				(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT,
813 						     "${%s}",
814 						      macname(bitidx(h->h_macro)));
815 			else
816 				(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT,
817 						     "$%c", h->h_macro);
818 		}
819 		else if (!bitzerop(h->h_mflags) &&
820 			 bitset(H_CHECK|H_ACHECK, h->h_flags))
821 		{
822 			int j;
823 
824 			/* if conditional, output the set of conditions */
825 			for (j = '\0'; j <= '\177'; j++)
826 				if (bitnset(j, h->h_mflags))
827 					(void) sm_io_putc(tfp, SM_TIME_DEFAULT,
828 							  j);
829 		}
830 		(void) sm_io_putc(tfp, SM_TIME_DEFAULT, '?');
831 
832 		/* output the header: expand macros, convert addresses */
833 		if (bitset(H_DEFAULT, h->h_flags) &&
834 		    !bitset(H_BINDLATE, h->h_flags))
835 		{
836 			(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "%s:%s\n",
837 					     h->h_field,
838 					     denlstring(buf, false, true));
839 		}
840 		else if (bitset(H_FROM|H_RCPT, h->h_flags) &&
841 			 !bitset(H_BINDLATE, h->h_flags))
842 		{
843 			bool oldstyle = bitset(EF_OLDSTYLE, e->e_flags);
844 			SM_FILE_T *savetrace = TrafficLogFile;
845 
846 			TrafficLogFile = NULL;
847 
848 			if (bitset(H_FROM, h->h_flags))
849 				oldstyle = false;
850 
851 			commaize(h, h->h_value, oldstyle, &mcibuf, e);
852 
853 			TrafficLogFile = savetrace;
854 		}
855 		else
856 		{
857 			(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "%s:%s\n",
858 					     h->h_field,
859 					     denlstring(h->h_value, false,
860 							true));
861 		}
862 	}
863 
864 	/*
865 	**  Clean up.
866 	**
867 	**	Write a terminator record -- this is to prevent
868 	**	scurrilous crackers from appending any data.
869 	*/
870 
871 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, ".\n");
872 
873 	if (sm_io_flush(tfp, SM_TIME_DEFAULT) != 0 ||
874 	    ((SuperSafe == SAFE_REALLY ||
875 	      SuperSafe == SAFE_REALLY_POSTMILTER ||
876 	      (SuperSafe == SAFE_INTERACTIVE && msync)) &&
877 	     fsync(sm_io_getinfo(tfp, SM_IO_WHAT_FD, NULL)) < 0) ||
878 	    sm_io_error(tfp))
879 	{
880 		if (newid)
881 			syserr("!552 Error writing control file %s", tf);
882 		else
883 			syserr("!452 Error writing control file %s", tf);
884 	}
885 
886 	if (!newid)
887 	{
888 		char new = queue_letter(e, ANYQFL_LETTER);
889 
890 		/* rename (locked) tf to be (locked) [qh]f */
891 		(void) sm_strlcpy(qf, queuename(e, ANYQFL_LETTER),
892 				  sizeof(qf));
893 		if (rename(tf, qf) < 0)
894 			syserr("cannot rename(%s, %s), uid=%d",
895 				tf, qf, (int) geteuid());
896 		else
897 		{
898 			/*
899 			**  Check if type has changed and only
900 			**  remove the old item if the rename above
901 			**  succeeded.
902 			*/
903 
904 			if (e->e_qfletter != '\0' &&
905 			    e->e_qfletter != new)
906 			{
907 				if (tTd(40, 5))
908 				{
909 					sm_dprintf("type changed from %c to %c\n",
910 						   e->e_qfletter, new);
911 				}
912 
913 				if (unlink(queuename(e, e->e_qfletter)) < 0)
914 				{
915 					/* XXX: something more drastic? */
916 					if (LogLevel > 0)
917 						sm_syslog(LOG_ERR, e->e_id,
918 							  "queueup: unlink(%s) failed: %s",
919 							  queuename(e, e->e_qfletter),
920 							  sm_errstring(errno));
921 				}
922 			}
923 		}
924 		e->e_qfletter = new;
925 
926 		/*
927 		**  fsync() after renaming to make sure metadata is
928 		**  written to disk on filesystems in which renames are
929 		**  not guaranteed.
930 		*/
931 
932 		if (SuperSafe != SAFE_NO)
933 		{
934 			/* for softupdates */
935 			if (tfd >= 0 && fsync(tfd) < 0)
936 			{
937 				syserr("!queueup: cannot fsync queue temp file %s",
938 				       tf);
939 			}
940 			SYNC_DIR(qf, true);
941 		}
942 
943 		/* close and unlock old (locked) queue file */
944 		if (e->e_lockfp != NULL)
945 			(void) sm_io_close(e->e_lockfp, SM_TIME_DEFAULT);
946 		e->e_lockfp = tfp;
947 
948 		/* save log info */
949 		if (LogLevel > 79)
950 			sm_syslog(LOG_DEBUG, e->e_id, "queueup %s", qf);
951 	}
952 	else
953 	{
954 		/* save log info */
955 		if (LogLevel > 79)
956 			sm_syslog(LOG_DEBUG, e->e_id, "queueup %s", tf);
957 
958 		e->e_qfletter = queue_letter(e, ANYQFL_LETTER);
959 	}
960 
961 	errno = 0;
962 	e->e_flags |= EF_INQUEUE;
963 
964 	if (tTd(40, 1))
965 		sm_dprintf("<<<<< done queueing %s <<<<<\n\n", e->e_id);
966 	return;
967 }
968 
969 /*
970 **  PRINTCTLADDR -- print control address to file.
971 **
972 **	Parameters:
973 **		a -- address.
974 **		tfp -- file pointer.
975 **
976 **	Returns:
977 **		none.
978 **
979 **	Side Effects:
980 **		The control address (if changed) is printed to the file.
981 **		The last control address and uid are saved.
982 */
983 
984 static void
985 printctladdr(a, tfp)
986 	register ADDRESS *a;
987 	SM_FILE_T *tfp;
988 {
989 	char *user;
990 	register ADDRESS *q;
991 	uid_t uid;
992 	gid_t gid;
993 	static ADDRESS *lastctladdr = NULL;
994 	static uid_t lastuid;
995 
996 	/* initialization */
997 	if (a == NULL || a->q_alias == NULL || tfp == NULL)
998 	{
999 		if (lastctladdr != NULL && tfp != NULL)
1000 			(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "C\n");
1001 		lastctladdr = NULL;
1002 		lastuid = 0;
1003 		return;
1004 	}
1005 
1006 	/* find the active uid */
1007 	q = getctladdr(a);
1008 	if (q == NULL)
1009 	{
1010 		user = NULL;
1011 		uid = 0;
1012 		gid = 0;
1013 	}
1014 	else
1015 	{
1016 		user = q->q_ruser != NULL ? q->q_ruser : q->q_user;
1017 		uid = q->q_uid;
1018 		gid = q->q_gid;
1019 	}
1020 	a = a->q_alias;
1021 
1022 	/* check to see if this is the same as last time */
1023 	if (lastctladdr != NULL && uid == lastuid &&
1024 	    strcmp(lastctladdr->q_paddr, a->q_paddr) == 0)
1025 		return;
1026 	lastuid = uid;
1027 	lastctladdr = a;
1028 
1029 	if (uid == 0 || user == NULL || user[0] == '\0')
1030 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "C");
1031 	else
1032 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "C%s:%ld:%ld",
1033 				     denlstring(user, true, false), (long) uid,
1034 				     (long) gid);
1035 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, ":%s\n",
1036 			     denlstring(a->q_paddr, true, false));
1037 }
1038 
1039 /*
1040 **  RUNNERS_SIGTERM -- propagate a SIGTERM to queue runner process
1041 **
1042 **	This propagates the signal to the child processes that are queue
1043 **	runners. This is for a queue runner "cleanup". After all of the
1044 **	child queue runner processes are signaled (it should be SIGTERM
1045 **	being the sig) then the old signal handler (Oldsh) is called
1046 **	to handle any cleanup set for this process (provided it is not
1047 **	SIG_DFL or SIG_IGN). The signal may not be handled immediately
1048 **	if the BlockOldsh flag is set. If the current process doesn't
1049 **	have a parent then handle the signal immediately, regardless of
1050 **	BlockOldsh.
1051 **
1052 **	Parameters:
1053 **		sig -- the signal number being sent
1054 **
1055 **	Returns:
1056 **		none.
1057 **
1058 **	Side Effects:
1059 **		Sets the NoMoreRunners boolean to true to stop more runners
1060 **		from being started in runqueue().
1061 **
1062 **	NOTE:	THIS CAN BE CALLED FROM A SIGNAL HANDLER.  DO NOT ADD
1063 **		ANYTHING TO THIS ROUTINE UNLESS YOU KNOW WHAT YOU ARE
1064 **		DOING.
1065 */
1066 
1067 static bool		volatile NoMoreRunners = false;
1068 static sigfunc_t	Oldsh_term = SIG_DFL;
1069 static sigfunc_t	Oldsh_hup = SIG_DFL;
1070 static sigfunc_t	volatile Oldsh = SIG_DFL;
1071 static bool		BlockOldsh = false;
1072 static int		volatile Oldsig = 0;
1073 static SIGFUNC_DECL	runners_sigterm __P((int));
1074 static SIGFUNC_DECL	runners_sighup __P((int));
1075 
1076 static SIGFUNC_DECL
1077 runners_sigterm(sig)
1078 	int sig;
1079 {
1080 	int save_errno = errno;
1081 
1082 	FIX_SYSV_SIGNAL(sig, runners_sigterm);
1083 	errno = save_errno;
1084 	CHECK_CRITICAL(sig);
1085 	NoMoreRunners = true;
1086 	Oldsh = Oldsh_term;
1087 	Oldsig = sig;
1088 	proc_list_signal(PROC_QUEUE, sig);
1089 
1090 	if (!BlockOldsh || getppid() <= 1)
1091 	{
1092 		/* Check that a valid 'old signal handler' is callable */
1093 		if (Oldsh_term != SIG_DFL && Oldsh_term != SIG_IGN &&
1094 		    Oldsh_term != runners_sigterm)
1095 			(*Oldsh_term)(sig);
1096 	}
1097 	errno = save_errno;
1098 	return SIGFUNC_RETURN;
1099 }
1100 /*
1101 **  RUNNERS_SIGHUP -- propagate a SIGHUP to queue runner process
1102 **
1103 **	This propagates the signal to the child processes that are queue
1104 **	runners. This is for a queue runner "cleanup". After all of the
1105 **	child queue runner processes are signaled (it should be SIGHUP
1106 **	being the sig) then the old signal handler (Oldsh) is called to
1107 **	handle any cleanup set for this process (provided it is not SIG_DFL
1108 **	or SIG_IGN). The signal may not be handled immediately if the
1109 **	BlockOldsh flag is set. If the current process doesn't have
1110 **	a parent then handle the signal immediately, regardless of
1111 **	BlockOldsh.
1112 **
1113 **	Parameters:
1114 **		sig -- the signal number being sent
1115 **
1116 **	Returns:
1117 **		none.
1118 **
1119 **	Side Effects:
1120 **		Sets the NoMoreRunners boolean to true to stop more runners
1121 **		from being started in runqueue().
1122 **
1123 **	NOTE:	THIS CAN BE CALLED FROM A SIGNAL HANDLER.  DO NOT ADD
1124 **		ANYTHING TO THIS ROUTINE UNLESS YOU KNOW WHAT YOU ARE
1125 **		DOING.
1126 */
1127 
1128 static SIGFUNC_DECL
1129 runners_sighup(sig)
1130 	int sig;
1131 {
1132 	int save_errno = errno;
1133 
1134 	FIX_SYSV_SIGNAL(sig, runners_sighup);
1135 	errno = save_errno;
1136 	CHECK_CRITICAL(sig);
1137 	NoMoreRunners = true;
1138 	Oldsh = Oldsh_hup;
1139 	Oldsig = sig;
1140 	proc_list_signal(PROC_QUEUE, sig);
1141 
1142 	if (!BlockOldsh || getppid() <= 1)
1143 	{
1144 		/* Check that a valid 'old signal handler' is callable */
1145 		if (Oldsh_hup != SIG_DFL && Oldsh_hup != SIG_IGN &&
1146 		    Oldsh_hup != runners_sighup)
1147 			(*Oldsh_hup)(sig);
1148 	}
1149 	errno = save_errno;
1150 	return SIGFUNC_RETURN;
1151 }
1152 /*
1153 **  MARK_WORK_GROUP_RESTART -- mark a work group as needing a restart
1154 **
1155 **  Sets a workgroup for restarting.
1156 **
1157 **	Parameters:
1158 **		wgrp -- the work group id to restart.
1159 **		reason -- why (signal?), -1 to turn off restart
1160 **
1161 **	Returns:
1162 **		none.
1163 **
1164 **	Side effects:
1165 **		May set global RestartWorkGroup to true.
1166 **
1167 **	NOTE:	THIS CAN BE CALLED FROM A SIGNAL HANDLER.  DO NOT ADD
1168 **		ANYTHING TO THIS ROUTINE UNLESS YOU KNOW WHAT YOU ARE
1169 **		DOING.
1170 */
1171 
1172 void
1173 mark_work_group_restart(wgrp, reason)
1174 	int wgrp;
1175 	int reason;
1176 {
1177 	if (wgrp < 0 || wgrp > NumWorkGroups)
1178 		return;
1179 
1180 	WorkGrp[wgrp].wg_restart = reason;
1181 	if (reason >= 0)
1182 		RestartWorkGroup = true;
1183 }
1184 /*
1185 **  RESTART_MARKED_WORK_GROUPS -- restart work groups marked as needing restart
1186 **
1187 **  Restart any workgroup marked as needing a restart provided more
1188 **  runners are allowed.
1189 **
1190 **	Parameters:
1191 **		none.
1192 **
1193 **	Returns:
1194 **		none.
1195 **
1196 **	Side effects:
1197 **		Sets global RestartWorkGroup to false.
1198 */
1199 
1200 void
1201 restart_marked_work_groups()
1202 {
1203 	int i;
1204 	int wasblocked;
1205 
1206 	if (NoMoreRunners)
1207 		return;
1208 
1209 	/* Block SIGCHLD so reapchild() doesn't mess with us */
1210 	wasblocked = sm_blocksignal(SIGCHLD);
1211 
1212 	for (i = 0; i < NumWorkGroups; i++)
1213 	{
1214 		if (WorkGrp[i].wg_restart >= 0)
1215 		{
1216 			if (LogLevel > 8)
1217 				sm_syslog(LOG_ERR, NOQID,
1218 					  "restart queue runner=%d due to signal 0x%x",
1219 					  i, WorkGrp[i].wg_restart);
1220 			restart_work_group(i);
1221 		}
1222 	}
1223 	RestartWorkGroup = false;
1224 
1225 	if (wasblocked == 0)
1226 		(void) sm_releasesignal(SIGCHLD);
1227 }
1228 /*
1229 **  RESTART_WORK_GROUP -- restart a specific work group
1230 **
1231 **  Restart a specific workgroup provided more runners are allowed.
1232 **  If the requested work group has been restarted too many times log
1233 **  this and refuse to restart.
1234 **
1235 **	Parameters:
1236 **		wgrp -- the work group id to restart
1237 **
1238 **	Returns:
1239 **		none.
1240 **
1241 **	Side Effects:
1242 **		starts another process doing the work of wgrp
1243 */
1244 
1245 #define MAX_PERSIST_RESTART	10	/* max allowed number of restarts */
1246 
1247 static void
1248 restart_work_group(wgrp)
1249 	int wgrp;
1250 {
1251 	if (NoMoreRunners ||
1252 	    wgrp < 0 || wgrp > NumWorkGroups)
1253 		return;
1254 
1255 	WorkGrp[wgrp].wg_restart = -1;
1256 	if (WorkGrp[wgrp].wg_restartcnt < MAX_PERSIST_RESTART)
1257 	{
1258 		/* avoid overflow; increment here */
1259 		WorkGrp[wgrp].wg_restartcnt++;
1260 		(void) run_work_group(wgrp, RWG_FORK|RWG_PERSISTENT|RWG_RUNALL);
1261 	}
1262 	else
1263 	{
1264 		sm_syslog(LOG_ERR, NOQID,
1265 			  "ERROR: persistent queue runner=%d restarted too many times, queue runner lost",
1266 			  wgrp);
1267 	}
1268 }
1269 /*
1270 **  SCHEDULE_QUEUE_RUNS -- schedule the next queue run for a work group.
1271 **
1272 **	Parameters:
1273 **		runall -- schedule even if individual bit is not set.
1274 **		wgrp -- the work group id to schedule.
1275 **		didit -- the queue run was performed for this work group.
1276 **
1277 **	Returns:
1278 **		nothing
1279 */
1280 
1281 #define INCR_MOD(v, m)	if (++v >= m)	\
1282 				v = 0;	\
1283 			else
1284 
1285 static void
1286 schedule_queue_runs(runall, wgrp, didit)
1287 	bool runall;
1288 	int wgrp;
1289 	bool didit;
1290 {
1291 	int qgrp, cgrp, endgrp;
1292 #if _FFR_QUEUE_SCHED_DBG
1293 	time_t lastsched;
1294 	bool sched;
1295 #endif /* _FFR_QUEUE_SCHED_DBG */
1296 	time_t now;
1297 	time_t minqintvl;
1298 
1299 	/*
1300 	**  This is a bit ugly since we have to duplicate the
1301 	**  code that "walks" through a work queue group.
1302 	*/
1303 
1304 	now = curtime();
1305 	minqintvl = 0;
1306 	cgrp = endgrp = WorkGrp[wgrp].wg_curqgrp;
1307 	do
1308 	{
1309 		time_t qintvl;
1310 
1311 #if _FFR_QUEUE_SCHED_DBG
1312 		lastsched = 0;
1313 		sched = false;
1314 #endif /* _FFR_QUEUE_SCHED_DBG */
1315 		qgrp = WorkGrp[wgrp].wg_qgs[cgrp]->qg_index;
1316 		if (Queue[qgrp]->qg_queueintvl > 0)
1317 			qintvl = Queue[qgrp]->qg_queueintvl;
1318 		else if (QueueIntvl > 0)
1319 			qintvl = QueueIntvl;
1320 		else
1321 			qintvl = (time_t) 0;
1322 #if _FFR_QUEUE_SCHED_DBG
1323 		lastsched = Queue[qgrp]->qg_nextrun;
1324 #endif /* _FFR_QUEUE_SCHED_DBG */
1325 		if ((runall || Queue[qgrp]->qg_nextrun <= now) && qintvl > 0)
1326 		{
1327 #if _FFR_QUEUE_SCHED_DBG
1328 			sched = true;
1329 #endif /* _FFR_QUEUE_SCHED_DBG */
1330 			if (minqintvl == 0 || qintvl < minqintvl)
1331 				minqintvl = qintvl;
1332 
1333 			/*
1334 			**  Only set a new time if a queue run was performed
1335 			**  for this queue group.  If the queue was not run,
1336 			**  we could starve it by setting a new time on each
1337 			**  call.
1338 			*/
1339 
1340 			if (didit)
1341 				Queue[qgrp]->qg_nextrun += qintvl;
1342 		}
1343 #if _FFR_QUEUE_SCHED_DBG
1344 		if (tTd(69, 10))
1345 			sm_syslog(LOG_INFO, NOQID,
1346 				"sqr: wgrp=%d, cgrp=%d, qgrp=%d, intvl=%ld, QI=%ld, runall=%d, lastrun=%ld, nextrun=%ld, sched=%d",
1347 				wgrp, cgrp, qgrp, Queue[qgrp]->qg_queueintvl,
1348 				QueueIntvl, runall, lastsched,
1349 				Queue[qgrp]->qg_nextrun, sched);
1350 #endif /* _FFR_QUEUE_SCHED_DBG */
1351 		INCR_MOD(cgrp, WorkGrp[wgrp].wg_numqgrp);
1352 	} while (endgrp != cgrp);
1353 	if (minqintvl > 0)
1354 		(void) sm_setevent(minqintvl, runqueueevent, 0);
1355 }
1356 
1357 #if _FFR_QUEUE_RUN_PARANOIA
1358 /*
1359 **  CHECKQUEUERUNNER -- check whether a queue group hasn't been run.
1360 **
1361 **	Use this if events may get lost and hence queue runners may not
1362 **	be started and mail will pile up in a queue.
1363 **
1364 **	Parameters:
1365 **		none.
1366 **
1367 **	Returns:
1368 **		true if a queue run is necessary.
1369 **
1370 **	Side Effects:
1371 **		may schedule a queue run.
1372 */
1373 
1374 bool
1375 checkqueuerunner()
1376 {
1377 	int qgrp;
1378 	time_t now, minqintvl;
1379 
1380 	now = curtime();
1381 	minqintvl = 0;
1382 	for (qgrp = 0; qgrp < NumQueue && Queue[qgrp] != NULL; qgrp++)
1383 	{
1384 		time_t qintvl;
1385 
1386 		if (Queue[qgrp]->qg_queueintvl > 0)
1387 			qintvl = Queue[qgrp]->qg_queueintvl;
1388 		else if (QueueIntvl > 0)
1389 			qintvl = QueueIntvl;
1390 		else
1391 			qintvl = (time_t) 0;
1392 		if (Queue[qgrp]->qg_nextrun <= now - qintvl)
1393 		{
1394 			if (minqintvl == 0 || qintvl < minqintvl)
1395 				minqintvl = qintvl;
1396 			if (LogLevel > 1)
1397 				sm_syslog(LOG_WARNING, NOQID,
1398 					"checkqueuerunner: queue %d should have been run at %s, queue interval %ld",
1399 					qgrp,
1400 					arpadate(ctime(&Queue[qgrp]->qg_nextrun)),
1401 					qintvl);
1402 		}
1403 	}
1404 	if (minqintvl > 0)
1405 	{
1406 		(void) sm_setevent(minqintvl, runqueueevent, 0);
1407 		return true;
1408 	}
1409 	return false;
1410 }
1411 #endif /* _FFR_QUEUE_RUN_PARANOIA */
1412 
1413 /*
1414 **  RUNQUEUE -- run the jobs in the queue.
1415 **
1416 **	Gets the stuff out of the queue in some presumably logical
1417 **	order and processes them.
1418 **
1419 **	Parameters:
1420 **		forkflag -- true if the queue scanning should be done in
1421 **			a child process.  We double-fork so it is not our
1422 **			child and we don't have to clean up after it.
1423 **			false can be ignored if we have multiple queues.
1424 **		verbose -- if true, print out status information.
1425 **		persistent -- persistent queue runner?
1426 **		runall -- run all groups or only a subset (DoQueueRun)?
1427 **
1428 **	Returns:
1429 **		true if the queue run successfully began.
1430 **
1431 **	Side Effects:
1432 **		runs things in the mail queue using run_work_group().
1433 **		maybe schedules next queue run.
1434 */
1435 
1436 static ENVELOPE	QueueEnvelope;		/* the queue run envelope */
1437 static time_t	LastQueueTime = 0;	/* last time a queue ID assigned */
1438 static pid_t	LastQueuePid = -1;	/* last PID which had a queue ID */
1439 
1440 /* values for qp_supdirs */
1441 #define QP_NOSUB	0x0000	/* No subdirectories */
1442 #define QP_SUBDF	0x0001	/* "df" subdirectory */
1443 #define QP_SUBQF	0x0002	/* "qf" subdirectory */
1444 #define QP_SUBXF	0x0004	/* "xf" subdirectory */
1445 
1446 bool
1447 runqueue(forkflag, verbose, persistent, runall)
1448 	bool forkflag;
1449 	bool verbose;
1450 	bool persistent;
1451 	bool runall;
1452 {
1453 	int i;
1454 	bool ret = true;
1455 	static int curnum = 0;
1456 	sigfunc_t cursh;
1457 #if SM_HEAP_CHECK
1458 	SM_NONVOLATILE int oldgroup = 0;
1459 
1460 	if (sm_debug_active(&DebugLeakQ, 1))
1461 	{
1462 		oldgroup = sm_heap_group();
1463 		sm_heap_newgroup();
1464 		sm_dprintf("runqueue() heap group #%d\n", sm_heap_group());
1465 	}
1466 #endif /* SM_HEAP_CHECK */
1467 
1468 	/* queue run has been started, don't do any more this time */
1469 	DoQueueRun = false;
1470 
1471 	/* more than one queue or more than one directory per queue */
1472 	if (!forkflag && !verbose &&
1473 	    (WorkGrp[0].wg_qgs[0]->qg_numqueues > 1 || NumWorkGroups > 1 ||
1474 	     WorkGrp[0].wg_numqgrp > 1))
1475 		forkflag = true;
1476 
1477 	/*
1478 	**  For controlling queue runners via signals sent to this process.
1479 	**  Oldsh* will get called too by runners_sig* (if it is not SIG_IGN
1480 	**  or SIG_DFL) to preserve cleanup behavior. Now that this process
1481 	**  will have children (and perhaps grandchildren) this handler will
1482 	**  be left in place. This is because this process, once it has
1483 	**  finished spinning off queue runners, may go back to doing something
1484 	**  else (like being a daemon). And we still want on a SIG{TERM,HUP} to
1485 	**  clean up the child queue runners. Only install 'runners_sig*' once
1486 	**  else we'll get stuck looping forever.
1487 	*/
1488 
1489 	cursh = sm_signal(SIGTERM, runners_sigterm);
1490 	if (cursh != runners_sigterm)
1491 		Oldsh_term = cursh;
1492 	cursh = sm_signal(SIGHUP, runners_sighup);
1493 	if (cursh != runners_sighup)
1494 		Oldsh_hup = cursh;
1495 
1496 	for (i = 0; i < NumWorkGroups && !NoMoreRunners; i++)
1497 	{
1498 		int rwgflags = RWG_NONE;
1499 
1500 		/*
1501 		**  If MaxQueueChildren active then test whether the start
1502 		**  of the next queue group's additional queue runners (maximum)
1503 		**  will result in MaxQueueChildren being exceeded.
1504 		**
1505 		**  Note: do not use continue; even though another workgroup
1506 		**	may have fewer queue runners, this would be "unfair",
1507 		**	i.e., this work group might "starve" then.
1508 		*/
1509 
1510 #if _FFR_QUEUE_SCHED_DBG
1511 		if (tTd(69, 10))
1512 			sm_syslog(LOG_INFO, NOQID,
1513 				"rq: curnum=%d, MaxQueueChildren=%d, CurRunners=%d, WorkGrp[curnum].wg_maxact=%d",
1514 				curnum, MaxQueueChildren, CurRunners,
1515 				WorkGrp[curnum].wg_maxact);
1516 #endif /* _FFR_QUEUE_SCHED_DBG */
1517 		if (MaxQueueChildren > 0 &&
1518 		    CurRunners + WorkGrp[curnum].wg_maxact > MaxQueueChildren)
1519 			break;
1520 
1521 		/*
1522 		**  Pick up where we left off (curnum), in case we
1523 		**  used up all the children last time without finishing.
1524 		**  This give a round-robin fairness to queue runs.
1525 		**
1526 		**  Increment CurRunners before calling run_work_group()
1527 		**  to avoid a "race condition" with proc_list_drop() which
1528 		**  decrements CurRunners if the queue runners terminate.
1529 		**  Notice: CurRunners is an upper limit, in some cases
1530 		**  (too few jobs in the queue) this value is larger than
1531 		**  the actual number of queue runners. The discrepancy can
1532 		**  increase if some queue runners "hang" for a long time.
1533 		*/
1534 
1535 		CurRunners += WorkGrp[curnum].wg_maxact;
1536 		if (forkflag)
1537 			rwgflags |= RWG_FORK;
1538 		if (verbose)
1539 			rwgflags |= RWG_VERBOSE;
1540 		if (persistent)
1541 			rwgflags |= RWG_PERSISTENT;
1542 		if (runall)
1543 			rwgflags |= RWG_RUNALL;
1544 		ret = run_work_group(curnum, rwgflags);
1545 
1546 		/*
1547 		**  Failure means a message was printed for ETRN
1548 		**  and subsequent queues are likely to fail as well.
1549 		**  Decrement CurRunners in that case because
1550 		**  none have been started.
1551 		*/
1552 
1553 		if (!ret)
1554 		{
1555 			CurRunners -= WorkGrp[curnum].wg_maxact;
1556 			break;
1557 		}
1558 
1559 		if (!persistent)
1560 			schedule_queue_runs(runall, curnum, true);
1561 		INCR_MOD(curnum, NumWorkGroups);
1562 	}
1563 
1564 	/* schedule left over queue runs */
1565 	if (i < NumWorkGroups && !NoMoreRunners && !persistent)
1566 	{
1567 		int h;
1568 
1569 		for (h = curnum; i < NumWorkGroups; i++)
1570 		{
1571 			schedule_queue_runs(runall, h, false);
1572 			INCR_MOD(h, NumWorkGroups);
1573 		}
1574 	}
1575 
1576 
1577 #if SM_HEAP_CHECK
1578 	if (sm_debug_active(&DebugLeakQ, 1))
1579 		sm_heap_setgroup(oldgroup);
1580 #endif /* SM_HEAP_CHECK */
1581 	return ret;
1582 }
1583 
1584 #if _FFR_SKIP_DOMAINS
1585 /*
1586 **  SKIP_DOMAINS -- Skip 'skip' number of domains in the WorkQ.
1587 **
1588 **  Added by Stephen Frost <sfrost@snowman.net> to support
1589 **  having each runner process every N'th domain instead of
1590 **  every N'th message.
1591 **
1592 **	Parameters:
1593 **		skip -- number of domains in WorkQ to skip.
1594 **
1595 **	Returns:
1596 **		total number of messages skipped.
1597 **
1598 **	Side Effects:
1599 **		may change WorkQ
1600 */
1601 
1602 static int
1603 skip_domains(skip)
1604 	int skip;
1605 {
1606 	int n, seqjump;
1607 
1608 	for (n = 0, seqjump = 0; n < skip && WorkQ != NULL; seqjump++)
1609 	{
1610 		if (WorkQ->w_next != NULL)
1611 		{
1612 			if (WorkQ->w_host != NULL &&
1613 			    WorkQ->w_next->w_host != NULL)
1614 			{
1615 				if (sm_strcasecmp(WorkQ->w_host,
1616 						WorkQ->w_next->w_host) != 0)
1617 					n++;
1618 			}
1619 			else
1620 			{
1621 				if ((WorkQ->w_host != NULL &&
1622 				     WorkQ->w_next->w_host == NULL) ||
1623 				    (WorkQ->w_host == NULL &&
1624 				     WorkQ->w_next->w_host != NULL))
1625 					     n++;
1626 			}
1627 		}
1628 		WorkQ = WorkQ->w_next;
1629 	}
1630 	return seqjump;
1631 }
1632 #endif /* _FFR_SKIP_DOMAINS */
1633 
1634 /*
1635 **  RUNNER_WORK -- have a queue runner do its work
1636 **
1637 **  Have a queue runner do its work a list of entries.
1638 **  When work isn't directly being done then this process can take a signal
1639 **  and terminate immediately (in a clean fashion of course).
1640 **  When work is directly being done, it's not to be interrupted
1641 **  immediately: the work should be allowed to finish at a clean point
1642 **  before termination (in a clean fashion of course).
1643 **
1644 **	Parameters:
1645 **		e -- envelope.
1646 **		sequenceno -- 'th process to run WorkQ.
1647 **		didfork -- did the calling process fork()?
1648 **		skip -- process only each skip'th item.
1649 **		njobs -- number of jobs in WorkQ.
1650 **
1651 **	Returns:
1652 **		none.
1653 **
1654 **	Side Effects:
1655 **		runs things in the mail queue.
1656 */
1657 
1658 static void
1659 runner_work(e, sequenceno, didfork, skip, njobs)
1660 	register ENVELOPE *e;
1661 	int sequenceno;
1662 	bool didfork;
1663 	int skip;
1664 	int njobs;
1665 {
1666 	int n, seqjump;
1667 	WORK *w;
1668 	time_t now;
1669 
1670 	SM_GET_LA(now);
1671 
1672 	/*
1673 	**  Here we temporarily block the second calling of the handlers.
1674 	**  This allows us to handle the signal without terminating in the
1675 	**  middle of direct work. If a signal does come, the test for
1676 	**  NoMoreRunners will find it.
1677 	*/
1678 
1679 	BlockOldsh = true;
1680 	seqjump = skip;
1681 
1682 	/* process them once at a time */
1683 	while (WorkQ != NULL)
1684 	{
1685 #if SM_HEAP_CHECK
1686 		SM_NONVOLATILE int oldgroup = 0;
1687 
1688 		if (sm_debug_active(&DebugLeakQ, 1))
1689 		{
1690 			oldgroup = sm_heap_group();
1691 			sm_heap_newgroup();
1692 			sm_dprintf("run_queue_group() heap group #%d\n",
1693 				sm_heap_group());
1694 		}
1695 #endif /* SM_HEAP_CHECK */
1696 
1697 		/* do no more work */
1698 		if (NoMoreRunners)
1699 		{
1700 			/* Check that a valid signal handler is callable */
1701 			if (Oldsh != SIG_DFL && Oldsh != SIG_IGN &&
1702 			    Oldsh != runners_sighup &&
1703 			    Oldsh != runners_sigterm)
1704 				(*Oldsh)(Oldsig);
1705 			break;
1706 		}
1707 
1708 		w = WorkQ; /* assign current work item */
1709 
1710 		/*
1711 		**  Set the head of the WorkQ to the next work item.
1712 		**  It is set 'skip' ahead (the number of parallel queue
1713 		**  runners working on WorkQ together) since each runner
1714 		**  works on every 'skip'th (N-th) item.
1715 #if _FFR_SKIP_DOMAINS
1716 		**  In the case of the BYHOST Queue Sort Order, the 'item'
1717 		**  is a domain, so we work on every 'skip'th (N-th) domain.
1718 #endif * _FFR_SKIP_DOMAINS *
1719 		*/
1720 
1721 #if _FFR_SKIP_DOMAINS
1722 		if (QueueSortOrder == QSO_BYHOST)
1723 		{
1724 			seqjump = 1;
1725 			if (WorkQ->w_next != NULL)
1726 			{
1727 				if (WorkQ->w_host != NULL &&
1728 				    WorkQ->w_next->w_host != NULL)
1729 				{
1730 					if (sm_strcasecmp(WorkQ->w_host,
1731 							WorkQ->w_next->w_host)
1732 								!= 0)
1733 						seqjump = skip_domains(skip);
1734 					else
1735 						WorkQ = WorkQ->w_next;
1736 				}
1737 				else
1738 				{
1739 					if ((WorkQ->w_host != NULL &&
1740 					     WorkQ->w_next->w_host == NULL) ||
1741 					    (WorkQ->w_host == NULL &&
1742 					     WorkQ->w_next->w_host != NULL))
1743 						seqjump = skip_domains(skip);
1744 					else
1745 						WorkQ = WorkQ->w_next;
1746 				}
1747 			}
1748 			else
1749 				WorkQ = WorkQ->w_next;
1750 		}
1751 		else
1752 #endif /* _FFR_SKIP_DOMAINS */
1753 		{
1754 			for (n = 0; n < skip && WorkQ != NULL; n++)
1755 				WorkQ = WorkQ->w_next;
1756 		}
1757 
1758 		e->e_to = NULL;
1759 
1760 		/*
1761 		**  Ignore jobs that are too expensive for the moment.
1762 		**
1763 		**	Get new load average every GET_NEW_LA_TIME seconds.
1764 		*/
1765 
1766 		SM_GET_LA(now);
1767 		if (shouldqueue(WkRecipFact, Current_LA_time))
1768 		{
1769 			char *msg = "Aborting queue run: load average too high";
1770 
1771 			if (Verbose)
1772 				message("%s", msg);
1773 			if (LogLevel > 8)
1774 				sm_syslog(LOG_INFO, NOQID, "runqueue: %s", msg);
1775 			break;
1776 		}
1777 		if (shouldqueue(w->w_pri, w->w_ctime))
1778 		{
1779 			if (Verbose)
1780 				message(EmptyString);
1781 			if (QueueSortOrder == QSO_BYPRIORITY)
1782 			{
1783 				if (Verbose)
1784 					message("Skipping %s/%s (sequence %d of %d) and flushing rest of queue",
1785 						qid_printqueue(w->w_qgrp,
1786 							       w->w_qdir),
1787 						w->w_name + 2, sequenceno,
1788 						njobs);
1789 				if (LogLevel > 8)
1790 					sm_syslog(LOG_INFO, NOQID,
1791 						  "runqueue: Flushing queue from %s/%s (pri %ld, LA %d, %d of %d)",
1792 						  qid_printqueue(w->w_qgrp,
1793 								 w->w_qdir),
1794 						  w->w_name + 2, w->w_pri,
1795 						  CurrentLA, sequenceno,
1796 						  njobs);
1797 				break;
1798 			}
1799 			else if (Verbose)
1800 				message("Skipping %s/%s (sequence %d of %d)",
1801 					qid_printqueue(w->w_qgrp, w->w_qdir),
1802 					w->w_name + 2, sequenceno, njobs);
1803 		}
1804 		else
1805 		{
1806 			if (Verbose)
1807 			{
1808 				message(EmptyString);
1809 				message("Running %s/%s (sequence %d of %d)",
1810 					qid_printqueue(w->w_qgrp, w->w_qdir),
1811 					w->w_name + 2, sequenceno, njobs);
1812 			}
1813 			if (didfork && MaxQueueChildren > 0)
1814 			{
1815 				sm_blocksignal(SIGCHLD);
1816 				(void) sm_signal(SIGCHLD, reapchild);
1817 			}
1818 			if (tTd(63, 100))
1819 				sm_syslog(LOG_DEBUG, NOQID,
1820 					  "runqueue %s dowork(%s)",
1821 					  qid_printqueue(w->w_qgrp, w->w_qdir),
1822 					  w->w_name + 2);
1823 
1824 			(void) dowork(w->w_qgrp, w->w_qdir, w->w_name + 2,
1825 				      ForkQueueRuns, false, e);
1826 			errno = 0;
1827 		}
1828 		sm_free(w->w_name); /* XXX */
1829 		if (w->w_host != NULL)
1830 			sm_free(w->w_host); /* XXX */
1831 		sm_free((char *) w); /* XXX */
1832 		sequenceno += seqjump; /* next sequence number */
1833 #if SM_HEAP_CHECK
1834 		if (sm_debug_active(&DebugLeakQ, 1))
1835 			sm_heap_setgroup(oldgroup);
1836 #endif /* SM_HEAP_CHECK */
1837 	}
1838 
1839 	BlockOldsh = false;
1840 
1841 	/* check the signals didn't happen during the revert */
1842 	if (NoMoreRunners)
1843 	{
1844 		/* Check that a valid signal handler is callable */
1845 		if (Oldsh != SIG_DFL && Oldsh != SIG_IGN &&
1846 		    Oldsh != runners_sighup && Oldsh != runners_sigterm)
1847 			(*Oldsh)(Oldsig);
1848 	}
1849 
1850 	Oldsh = SIG_DFL; /* after the NoMoreRunners check */
1851 }
1852 /*
1853 **  RUN_WORK_GROUP -- run the jobs in a queue group from a work group.
1854 **
1855 **	Gets the stuff out of the queue in some presumably logical
1856 **	order and processes them.
1857 **
1858 **	Parameters:
1859 **		wgrp -- work group to process.
1860 **		flags -- RWG_* flags
1861 **
1862 **	Returns:
1863 **		true if the queue run successfully began.
1864 **
1865 **	Side Effects:
1866 **		runs things in the mail queue.
1867 */
1868 
1869 /* Minimum sleep time for persistent queue runners */
1870 #define MIN_SLEEP_TIME	5
1871 
1872 bool
1873 run_work_group(wgrp, flags)
1874 	int wgrp;
1875 	int flags;
1876 {
1877 	register ENVELOPE *e;
1878 	int njobs, qdir;
1879 	int sequenceno = 1;
1880 	int qgrp, endgrp, h, i;
1881 	time_t now;
1882 	bool full, more;
1883 	SM_RPOOL_T *rpool;
1884 	extern ENVELOPE BlankEnvelope;
1885 	extern SIGFUNC_DECL reapchild __P((int));
1886 
1887 	if (wgrp < 0)
1888 		return false;
1889 
1890 	/*
1891 	**  If no work will ever be selected, don't even bother reading
1892 	**  the queue.
1893 	*/
1894 
1895 	SM_GET_LA(now);
1896 
1897 	if (!bitset(RWG_PERSISTENT, flags) &&
1898 	    shouldqueue(WkRecipFact, Current_LA_time))
1899 	{
1900 		char *msg = "Skipping queue run -- load average too high";
1901 
1902 		if (bitset(RWG_VERBOSE, flags))
1903 			message("458 %s\n", msg);
1904 		if (LogLevel > 8)
1905 			sm_syslog(LOG_INFO, NOQID, "runqueue: %s", msg);
1906 		return false;
1907 	}
1908 
1909 	/*
1910 	**  See if we already have too many children.
1911 	*/
1912 
1913 	if (bitset(RWG_FORK, flags) &&
1914 	    WorkGrp[wgrp].wg_lowqintvl > 0 &&
1915 	    !bitset(RWG_PERSISTENT, flags) &&
1916 	    MaxChildren > 0 && CurChildren >= MaxChildren)
1917 	{
1918 		char *msg = "Skipping queue run -- too many children";
1919 
1920 		if (bitset(RWG_VERBOSE, flags))
1921 			message("458 %s (%d)\n", msg, CurChildren);
1922 		if (LogLevel > 8)
1923 			sm_syslog(LOG_INFO, NOQID, "runqueue: %s (%d)",
1924 				  msg, CurChildren);
1925 		return false;
1926 	}
1927 
1928 	/*
1929 	**  See if we want to go off and do other useful work.
1930 	*/
1931 
1932 	if (bitset(RWG_FORK, flags))
1933 	{
1934 		pid_t pid;
1935 
1936 		(void) sm_blocksignal(SIGCHLD);
1937 		(void) sm_signal(SIGCHLD, reapchild);
1938 
1939 		pid = dofork();
1940 		if (pid == -1)
1941 		{
1942 			const char *msg = "Skipping queue run -- fork() failed";
1943 			const char *err = sm_errstring(errno);
1944 
1945 			if (bitset(RWG_VERBOSE, flags))
1946 				message("458 %s: %s\n", msg, err);
1947 			if (LogLevel > 8)
1948 				sm_syslog(LOG_INFO, NOQID, "runqueue: %s: %s",
1949 					  msg, err);
1950 			(void) sm_releasesignal(SIGCHLD);
1951 			return false;
1952 		}
1953 		if (pid != 0)
1954 		{
1955 			/* parent -- pick up intermediate zombie */
1956 			(void) sm_blocksignal(SIGALRM);
1957 
1958 			/* wgrp only used when queue runners are persistent */
1959 			proc_list_add(pid, "Queue runner", PROC_QUEUE,
1960 				      WorkGrp[wgrp].wg_maxact,
1961 				      bitset(RWG_PERSISTENT, flags) ? wgrp : -1,
1962 				      NULL);
1963 			(void) sm_releasesignal(SIGALRM);
1964 			(void) sm_releasesignal(SIGCHLD);
1965 			return true;
1966 		}
1967 
1968 		/* child -- clean up signals */
1969 
1970 		/* Reset global flags */
1971 		RestartRequest = NULL;
1972 		RestartWorkGroup = false;
1973 		ShutdownRequest = NULL;
1974 		PendingSignal = 0;
1975 		CurrentPid = getpid();
1976 		close_sendmail_pid();
1977 
1978 		/*
1979 		**  Initialize exception stack and default exception
1980 		**  handler for child process.
1981 		*/
1982 
1983 		sm_exc_newthread(fatal_error);
1984 		clrcontrol();
1985 		proc_list_clear();
1986 
1987 		/* Add parent process as first child item */
1988 		proc_list_add(CurrentPid, "Queue runner child process",
1989 			      PROC_QUEUE_CHILD, 0, -1, NULL);
1990 		(void) sm_releasesignal(SIGCHLD);
1991 		(void) sm_signal(SIGCHLD, SIG_DFL);
1992 		(void) sm_signal(SIGHUP, SIG_DFL);
1993 		(void) sm_signal(SIGTERM, intsig);
1994 	}
1995 
1996 	/*
1997 	**  Release any resources used by the daemon code.
1998 	*/
1999 
2000 	clrdaemon();
2001 
2002 	/* force it to run expensive jobs */
2003 	NoConnect = false;
2004 
2005 	/* drop privileges */
2006 	if (geteuid() == (uid_t) 0)
2007 		(void) drop_privileges(false);
2008 
2009 	/*
2010 	**  Create ourselves an envelope
2011 	*/
2012 
2013 	CurEnv = &QueueEnvelope;
2014 	rpool = sm_rpool_new_x(NULL);
2015 	e = newenvelope(&QueueEnvelope, CurEnv, rpool);
2016 	e->e_flags = BlankEnvelope.e_flags;
2017 	e->e_parent = NULL;
2018 
2019 	/* make sure we have disconnected from parent */
2020 	if (bitset(RWG_FORK, flags))
2021 	{
2022 		disconnect(1, e);
2023 		QuickAbort = false;
2024 	}
2025 
2026 	/*
2027 	**  If we are running part of the queue, always ignore stored
2028 	**  host status.
2029 	*/
2030 
2031 	if (QueueLimitId != NULL || QueueLimitSender != NULL ||
2032 	    QueueLimitQuarantine != NULL ||
2033 	    QueueLimitRecipient != NULL)
2034 	{
2035 		IgnoreHostStatus = true;
2036 		MinQueueAge = 0;
2037 	}
2038 
2039 	/*
2040 	**  Here is where we choose the queue group from the work group.
2041 	**  The caller of the "domorework" label must setup a new envelope.
2042 	*/
2043 
2044 	endgrp = WorkGrp[wgrp].wg_curqgrp; /* to not spin endlessly */
2045 
2046   domorework:
2047 
2048 	/*
2049 	**  Run a queue group if:
2050 	**  RWG_RUNALL bit is set or the bit for this group is set.
2051 	*/
2052 
2053 	now = curtime();
2054 	for (;;)
2055 	{
2056 		/*
2057 		**  Find the next queue group within the work group that
2058 		**  has been marked as needing a run.
2059 		*/
2060 
2061 		qgrp = WorkGrp[wgrp].wg_qgs[WorkGrp[wgrp].wg_curqgrp]->qg_index;
2062 		WorkGrp[wgrp].wg_curqgrp++; /* advance */
2063 		WorkGrp[wgrp].wg_curqgrp %= WorkGrp[wgrp].wg_numqgrp; /* wrap */
2064 		if (bitset(RWG_RUNALL, flags) ||
2065 		    (Queue[qgrp]->qg_nextrun <= now &&
2066 		     Queue[qgrp]->qg_nextrun != (time_t) -1))
2067 			break;
2068 		if (endgrp == WorkGrp[wgrp].wg_curqgrp)
2069 		{
2070 			e->e_id = NULL;
2071 			if (bitset(RWG_FORK, flags))
2072 				finis(true, true, ExitStat);
2073 			return true; /* we're done */
2074 		}
2075 	}
2076 
2077 	qdir = Queue[qgrp]->qg_curnum; /* round-robin init of queue position */
2078 #if _FFR_QUEUE_SCHED_DBG
2079 	if (tTd(69, 12))
2080 		sm_syslog(LOG_INFO, NOQID,
2081 			"rwg: wgrp=%d, qgrp=%d, qdir=%d, name=%s, curqgrp=%d, numgrps=%d",
2082 			wgrp, qgrp, qdir, qid_printqueue(qgrp, qdir),
2083 			WorkGrp[wgrp].wg_curqgrp, WorkGrp[wgrp].wg_numqgrp);
2084 #endif /* _FFR_QUEUE_SCHED_DBG */
2085 
2086 #if HASNICE
2087 	/* tweak niceness of queue runs */
2088 	if (Queue[qgrp]->qg_nice > 0)
2089 		(void) nice(Queue[qgrp]->qg_nice);
2090 #endif /* HASNICE */
2091 
2092 	/* XXX running queue group... */
2093 	sm_setproctitle(true, CurEnv, "running queue: %s",
2094 			qid_printqueue(qgrp, qdir));
2095 
2096 	if (LogLevel > 69 || tTd(63, 99))
2097 		sm_syslog(LOG_DEBUG, NOQID,
2098 			  "runqueue %s, pid=%d, forkflag=%d",
2099 			  qid_printqueue(qgrp, qdir), (int) CurrentPid,
2100 			  bitset(RWG_FORK, flags));
2101 
2102 	/*
2103 	**  Start making passes through the queue.
2104 	**	First, read and sort the entire queue.
2105 	**	Then, process the work in that order.
2106 	**		But if you take too long, start over.
2107 	*/
2108 
2109 	for (i = 0; i < Queue[qgrp]->qg_numqueues; i++)
2110 	{
2111 		h = gatherq(qgrp, qdir, false, &full, &more);
2112 #if SM_CONF_SHM
2113 		if (ShmId != SM_SHM_NO_ID)
2114 			QSHM_ENTRIES(Queue[qgrp]->qg_qpaths[qdir].qp_idx) = h;
2115 #endif /* SM_CONF_SHM */
2116 		/* If there are no more items in this queue advance */
2117 		if (!more)
2118 		{
2119 			/* A round-robin advance */
2120 			qdir++;
2121 			qdir %= Queue[qgrp]->qg_numqueues;
2122 		}
2123 
2124 		/* Has the WorkList reached the limit? */
2125 		if (full)
2126 			break; /* don't try to gather more */
2127 	}
2128 
2129 	/* order the existing work requests */
2130 	njobs = sortq(Queue[qgrp]->qg_maxlist);
2131 	Queue[qgrp]->qg_curnum = qdir; /* update */
2132 
2133 
2134 	if (!Verbose && bitnset(QD_FORK, Queue[qgrp]->qg_flags))
2135 	{
2136 		int loop, maxrunners;
2137 		pid_t pid;
2138 
2139 		/*
2140 		**  For this WorkQ we want to fork off N children (maxrunners)
2141 		**  at this point. Each child has a copy of WorkQ. Each child
2142 		**  will process every N-th item. The parent will wait for all
2143 		**  of the children to finish before moving on to the next
2144 		**  queue group within the work group. This saves us forking
2145 		**  a new runner-child for each work item.
2146 		**  It's valid for qg_maxqrun == 0 since this may be an
2147 		**  explicit "don't run this queue" setting.
2148 		*/
2149 
2150 		maxrunners = Queue[qgrp]->qg_maxqrun;
2151 
2152 		/* No need to have more runners then there are jobs */
2153 		if (maxrunners > njobs)
2154 			maxrunners = njobs;
2155 		for (loop = 0; loop < maxrunners; loop++)
2156 		{
2157 			/*
2158 			**  Since the delivery may happen in a child and the
2159 			**  parent does not wait, the parent may close the
2160 			**  maps thereby removing any shared memory used by
2161 			**  the map.  Therefore, close the maps now so the
2162 			**  child will dynamically open them if necessary.
2163 			*/
2164 
2165 			closemaps(false);
2166 
2167 			pid = fork();
2168 			if (pid < 0)
2169 			{
2170 				syserr("run_work_group: cannot fork");
2171 				return false;
2172 			}
2173 			else if (pid > 0)
2174 			{
2175 				/* parent -- clean out connection cache */
2176 				mci_flush(false, NULL);
2177 #if _FFR_SKIP_DOMAINS
2178 				if (QueueSortOrder == QSO_BYHOST)
2179 				{
2180 					sequenceno += skip_domains(1);
2181 				}
2182 				else
2183 #endif /* _FFR_SKIP_DOMAINS */
2184 				{
2185 					/* for the skip */
2186 					WorkQ = WorkQ->w_next;
2187 					sequenceno++;
2188 				}
2189 				proc_list_add(pid, "Queue child runner process",
2190 					      PROC_QUEUE_CHILD, 0, -1, NULL);
2191 
2192 				/* No additional work, no additional runners */
2193 				if (WorkQ == NULL)
2194 					break;
2195 			}
2196 			else
2197 			{
2198 				/* child -- Reset global flags */
2199 				RestartRequest = NULL;
2200 				RestartWorkGroup = false;
2201 				ShutdownRequest = NULL;
2202 				PendingSignal = 0;
2203 				CurrentPid = getpid();
2204 				close_sendmail_pid();
2205 
2206 				/*
2207 				**  Initialize exception stack and default
2208 				**  exception handler for child process.
2209 				**  When fork()'d the child now has a private
2210 				**  copy of WorkQ at its current position.
2211 				*/
2212 
2213 				sm_exc_newthread(fatal_error);
2214 
2215 				/*
2216 				**  SMTP processes (whether -bd or -bs) set
2217 				**  SIGCHLD to reapchild to collect
2218 				**  children status.  However, at delivery
2219 				**  time, that status must be collected
2220 				**  by sm_wait() to be dealt with properly
2221 				**  (check success of delivery based
2222 				**  on status code, etc).  Therefore, if we
2223 				**  are an SMTP process, reset SIGCHLD
2224 				**  back to the default so reapchild
2225 				**  doesn't collect status before
2226 				**  sm_wait().
2227 				*/
2228 
2229 				if (OpMode == MD_SMTP ||
2230 				    OpMode == MD_DAEMON ||
2231 				    MaxQueueChildren > 0)
2232 				{
2233 					proc_list_clear();
2234 					sm_releasesignal(SIGCHLD);
2235 					(void) sm_signal(SIGCHLD, SIG_DFL);
2236 				}
2237 
2238 				/* child -- error messages to the transcript */
2239 				QuickAbort = OnlyOneError = false;
2240 				runner_work(e, sequenceno, true,
2241 					    maxrunners, njobs);
2242 
2243 				/* This child is done */
2244 				finis(true, true, ExitStat);
2245 				/* NOTREACHED */
2246 			}
2247 		}
2248 
2249 		sm_releasesignal(SIGCHLD);
2250 
2251 		/*
2252 		**  Wait until all of the runners have completed before
2253 		**  seeing if there is another queue group in the
2254 		**  work group to process.
2255 		**  XXX Future enhancement: don't wait() for all children
2256 		**  here, just go ahead and make sure that overall the number
2257 		**  of children is not exceeded.
2258 		*/
2259 
2260 		while (CurChildren > 0)
2261 		{
2262 			int status;
2263 			pid_t ret;
2264 
2265 			while ((ret = sm_wait(&status)) <= 0)
2266 				continue;
2267 			proc_list_drop(ret, status, NULL);
2268 		}
2269 	}
2270 	else if (Queue[qgrp]->qg_maxqrun > 0 || bitset(RWG_FORCE, flags))
2271 	{
2272 		/*
2273 		**  When current process will not fork children to do the work,
2274 		**  it will do the work itself. The 'skip' will be 1 since
2275 		**  there are no child runners to divide the work across.
2276 		*/
2277 
2278 		runner_work(e, sequenceno, false, 1, njobs);
2279 	}
2280 
2281 	/* free memory allocated by newenvelope() above */
2282 	sm_rpool_free(rpool);
2283 	QueueEnvelope.e_rpool = NULL;
2284 
2285 	/* Are there still more queues in the work group to process? */
2286 	if (endgrp != WorkGrp[wgrp].wg_curqgrp)
2287 	{
2288 		rpool = sm_rpool_new_x(NULL);
2289 		e = newenvelope(&QueueEnvelope, CurEnv, rpool);
2290 		e->e_flags = BlankEnvelope.e_flags;
2291 		goto domorework;
2292 	}
2293 
2294 	/* No more queues in work group to process. Now check persistent. */
2295 	if (bitset(RWG_PERSISTENT, flags))
2296 	{
2297 		sequenceno = 1;
2298 		sm_setproctitle(true, CurEnv, "running queue: %s",
2299 				qid_printqueue(qgrp, qdir));
2300 
2301 		/*
2302 		**  close bogus maps, i.e., maps which caused a tempfail,
2303 		**	so we get fresh map connections on the next lookup.
2304 		**  closemaps() is also called when children are started.
2305 		*/
2306 
2307 		closemaps(true);
2308 
2309 		/* Close any cached connections. */
2310 		mci_flush(true, NULL);
2311 
2312 		/* Clean out expired related entries. */
2313 		rmexpstab();
2314 
2315 #if NAMED_BIND
2316 		/* Update MX records for FallbackMX. */
2317 		if (FallbackMX != NULL)
2318 			(void) getfallbackmxrr(FallbackMX);
2319 #endif /* NAMED_BIND */
2320 
2321 #if USERDB
2322 		/* close UserDatabase */
2323 		_udbx_close();
2324 #endif /* USERDB */
2325 
2326 #if SM_HEAP_CHECK
2327 		if (sm_debug_active(&SmHeapCheck, 2)
2328 		    && access("memdump", F_OK) == 0
2329 		   )
2330 		{
2331 			SM_FILE_T *out;
2332 
2333 			remove("memdump");
2334 			out = sm_io_open(SmFtStdio, SM_TIME_DEFAULT,
2335 					 "memdump.out", SM_IO_APPEND, NULL);
2336 			if (out != NULL)
2337 			{
2338 				(void) sm_io_fprintf(out, SM_TIME_DEFAULT, "----------------------\n");
2339 				sm_heap_report(out,
2340 					sm_debug_level(&SmHeapCheck) - 1);
2341 				(void) sm_io_close(out, SM_TIME_DEFAULT);
2342 			}
2343 		}
2344 #endif /* SM_HEAP_CHECK */
2345 
2346 		/* let me rest for a second to catch my breath */
2347 		if (njobs == 0 && WorkGrp[wgrp].wg_lowqintvl < MIN_SLEEP_TIME)
2348 			sleep(MIN_SLEEP_TIME);
2349 		else if (WorkGrp[wgrp].wg_lowqintvl <= 0)
2350 			sleep(QueueIntvl > 0 ? QueueIntvl : MIN_SLEEP_TIME);
2351 		else
2352 			sleep(WorkGrp[wgrp].wg_lowqintvl);
2353 
2354 		/*
2355 		**  Get the LA outside the WorkQ loop if necessary.
2356 		**  In a persistent queue runner the code is repeated over
2357 		**  and over but gatherq() may ignore entries due to
2358 		**  shouldqueue() (do we really have to do this twice?).
2359 		**  Hence the queue runners would just idle around when once
2360 		**  CurrentLA caused all entries in a queue to be ignored.
2361 		*/
2362 
2363 		if (njobs == 0)
2364 			SM_GET_LA(now);
2365 		rpool = sm_rpool_new_x(NULL);
2366 		e = newenvelope(&QueueEnvelope, CurEnv, rpool);
2367 		e->e_flags = BlankEnvelope.e_flags;
2368 		goto domorework;
2369 	}
2370 
2371 	/* exit without the usual cleanup */
2372 	e->e_id = NULL;
2373 	if (bitset(RWG_FORK, flags))
2374 		finis(true, true, ExitStat);
2375 	/* NOTREACHED */
2376 	return true;
2377 }
2378 
2379 /*
2380 **  DOQUEUERUN -- do a queue run?
2381 */
2382 
2383 bool
2384 doqueuerun()
2385 {
2386 	return DoQueueRun;
2387 }
2388 
2389 /*
2390 **  RUNQUEUEEVENT -- Sets a flag to indicate that a queue run should be done.
2391 **
2392 **	Parameters:
2393 **		none.
2394 **
2395 **	Returns:
2396 **		none.
2397 **
2398 **	Side Effects:
2399 **		The invocation of this function via an alarm may interrupt
2400 **		a set of actions. Thus errno may be set in that context.
2401 **		We need to restore errno at the end of this function to ensure
2402 **		that any work done here that sets errno doesn't return a
2403 **		misleading/false errno value. Errno may	be EINTR upon entry to
2404 **		this function because of non-restartable/continuable system
2405 **		API was active. Iff this is true we will override errno as
2406 **		a timeout (as a more accurate error message).
2407 **
2408 **	NOTE:	THIS CAN BE CALLED FROM A SIGNAL HANDLER.  DO NOT ADD
2409 **		ANYTHING TO THIS ROUTINE UNLESS YOU KNOW WHAT YOU ARE
2410 **		DOING.
2411 */
2412 
2413 void
2414 runqueueevent(ignore)
2415 	int ignore;
2416 {
2417 	int save_errno = errno;
2418 
2419 	/*
2420 	**  Set the general bit that we want a queue run,
2421 	**  tested in doqueuerun()
2422 	*/
2423 
2424 	DoQueueRun = true;
2425 #if _FFR_QUEUE_SCHED_DBG
2426 	if (tTd(69, 10))
2427 		sm_syslog(LOG_INFO, NOQID, "rqe: done");
2428 #endif /* _FFR_QUEUE_SCHED_DBG */
2429 
2430 	errno = save_errno;
2431 	if (errno == EINTR)
2432 		errno = ETIMEDOUT;
2433 }
2434 /*
2435 **  GATHERQ -- gather messages from the message queue(s) the work queue.
2436 **
2437 **	Parameters:
2438 **		qgrp -- the index of the queue group.
2439 **		qdir -- the index of the queue directory.
2440 **		doall -- if set, include everything in the queue (even
2441 **			the jobs that cannot be run because the load
2442 **			average is too high, or MaxQueueRun is reached).
2443 **			Otherwise, exclude those jobs.
2444 **		full -- (optional) to be set 'true' if WorkList is full
2445 **		more -- (optional) to be set 'true' if there are still more
2446 **			messages in this queue not added to WorkList
2447 **
2448 **	Returns:
2449 **		The number of request in the queue (not necessarily
2450 **		the number of requests in WorkList however).
2451 **
2452 **	Side Effects:
2453 **		prepares available work into WorkList
2454 */
2455 
2456 #define NEED_P		0001	/* 'P': priority */
2457 #define NEED_T		0002	/* 'T': time */
2458 #define NEED_R		0004	/* 'R': recipient */
2459 #define NEED_S		0010	/* 'S': sender */
2460 #define NEED_H		0020	/* host */
2461 #define HAS_QUARANTINE	0040	/* has an unexpected 'q' line */
2462 #define NEED_QUARANTINE	0100	/* 'q': reason */
2463 
2464 static WORK	*WorkList = NULL;	/* list of unsort work */
2465 static int	WorkListSize = 0;	/* current max size of WorkList */
2466 static int	WorkListCount = 0;	/* # of work items in WorkList */
2467 
2468 static int
2469 gatherq(qgrp, qdir, doall, full, more)
2470 	int qgrp;
2471 	int qdir;
2472 	bool doall;
2473 	bool *full;
2474 	bool *more;
2475 {
2476 	register struct dirent *d;
2477 	register WORK *w;
2478 	register char *p;
2479 	DIR *f;
2480 	int i, num_ent;
2481 	int wn;
2482 	QUEUE_CHAR *check;
2483 	char qd[MAXPATHLEN];
2484 	char qf[MAXPATHLEN];
2485 
2486 	wn = WorkListCount - 1;
2487 	num_ent = 0;
2488 	if (qdir == NOQDIR)
2489 		(void) sm_strlcpy(qd, ".", sizeof(qd));
2490 	else
2491 		(void) sm_strlcpyn(qd, sizeof(qd), 2,
2492 			Queue[qgrp]->qg_qpaths[qdir].qp_name,
2493 			(bitset(QP_SUBQF,
2494 				Queue[qgrp]->qg_qpaths[qdir].qp_subdirs)
2495 					? "/qf" : ""));
2496 
2497 	if (tTd(41, 1))
2498 	{
2499 		sm_dprintf("gatherq:\n");
2500 
2501 		check = QueueLimitId;
2502 		while (check != NULL)
2503 		{
2504 			sm_dprintf("\tQueueLimitId = %s%s\n",
2505 				check->queue_negate ? "!" : "",
2506 				check->queue_match);
2507 			check = check->queue_next;
2508 		}
2509 
2510 		check = QueueLimitSender;
2511 		while (check != NULL)
2512 		{
2513 			sm_dprintf("\tQueueLimitSender = %s%s\n",
2514 				check->queue_negate ? "!" : "",
2515 				check->queue_match);
2516 			check = check->queue_next;
2517 		}
2518 
2519 		check = QueueLimitRecipient;
2520 		while (check != NULL)
2521 		{
2522 			sm_dprintf("\tQueueLimitRecipient = %s%s\n",
2523 				check->queue_negate ? "!" : "",
2524 				check->queue_match);
2525 			check = check->queue_next;
2526 		}
2527 
2528 		if (QueueMode == QM_QUARANTINE)
2529 		{
2530 			check = QueueLimitQuarantine;
2531 			while (check != NULL)
2532 			{
2533 				sm_dprintf("\tQueueLimitQuarantine = %s%s\n",
2534 					   check->queue_negate ? "!" : "",
2535 					   check->queue_match);
2536 				check = check->queue_next;
2537 			}
2538 		}
2539 	}
2540 
2541 	/* open the queue directory */
2542 	f = opendir(qd);
2543 	if (f == NULL)
2544 	{
2545 		syserr("gatherq: cannot open \"%s\"",
2546 			qid_printqueue(qgrp, qdir));
2547 		if (full != NULL)
2548 			*full = WorkListCount >= MaxQueueRun && MaxQueueRun > 0;
2549 		if (more != NULL)
2550 			*more = false;
2551 		return 0;
2552 	}
2553 
2554 	/*
2555 	**  Read the work directory.
2556 	*/
2557 
2558 	while ((d = readdir(f)) != NULL)
2559 	{
2560 		SM_FILE_T *cf;
2561 		int qfver = 0;
2562 		char lbuf[MAXNAME + 1];
2563 		struct stat sbuf;
2564 
2565 		if (tTd(41, 50))
2566 			sm_dprintf("gatherq: checking %s..", d->d_name);
2567 
2568 		/* is this an interesting entry? */
2569 		if (!(((QueueMode == QM_NORMAL &&
2570 			d->d_name[0] == NORMQF_LETTER) ||
2571 		       (QueueMode == QM_QUARANTINE &&
2572 			d->d_name[0] == QUARQF_LETTER) ||
2573 		       (QueueMode == QM_LOST &&
2574 			d->d_name[0] == LOSEQF_LETTER)) &&
2575 		      d->d_name[1] == 'f'))
2576 		{
2577 			if (tTd(41, 50))
2578 				sm_dprintf("  skipping\n");
2579 			continue;
2580 		}
2581 		if (tTd(41, 50))
2582 			sm_dprintf("\n");
2583 
2584 		if (strlen(d->d_name) >= MAXQFNAME)
2585 		{
2586 			if (Verbose)
2587 				(void) sm_io_fprintf(smioout, SM_TIME_DEFAULT,
2588 						     "gatherq: %s too long, %d max characters\n",
2589 						     d->d_name, MAXQFNAME);
2590 			if (LogLevel > 0)
2591 				sm_syslog(LOG_ALERT, NOQID,
2592 					  "gatherq: %s too long, %d max characters",
2593 					  d->d_name, MAXQFNAME);
2594 			continue;
2595 		}
2596 
2597 		check = QueueLimitId;
2598 		while (check != NULL)
2599 		{
2600 			if (strcontainedin(false, check->queue_match,
2601 					   d->d_name) != check->queue_negate)
2602 				break;
2603 			else
2604 				check = check->queue_next;
2605 		}
2606 		if (QueueLimitId != NULL && check == NULL)
2607 			continue;
2608 
2609 		/* grow work list if necessary */
2610 		if (++wn >= MaxQueueRun && MaxQueueRun > 0)
2611 		{
2612 			if (wn == MaxQueueRun && LogLevel > 0)
2613 				sm_syslog(LOG_WARNING, NOQID,
2614 					  "WorkList for %s maxed out at %d",
2615 					  qid_printqueue(qgrp, qdir),
2616 					  MaxQueueRun);
2617 			if (doall)
2618 				continue;	/* just count entries */
2619 			break;
2620 		}
2621 		if (wn >= WorkListSize)
2622 		{
2623 			grow_wlist(qgrp, qdir);
2624 			if (wn >= WorkListSize)
2625 				continue;
2626 		}
2627 		SM_ASSERT(wn >= 0);
2628 		w = &WorkList[wn];
2629 
2630 		(void) sm_strlcpyn(qf, sizeof(qf), 3, qd, "/", d->d_name);
2631 		if (stat(qf, &sbuf) < 0)
2632 		{
2633 			if (errno != ENOENT)
2634 				sm_syslog(LOG_INFO, NOQID,
2635 					  "gatherq: can't stat %s/%s",
2636 					  qid_printqueue(qgrp, qdir),
2637 					  d->d_name);
2638 			wn--;
2639 			continue;
2640 		}
2641 		if (!bitset(S_IFREG, sbuf.st_mode))
2642 		{
2643 			/* Yikes!  Skip it or we will hang on open! */
2644 			if (!((d->d_name[0] == DATAFL_LETTER ||
2645 			       d->d_name[0] == NORMQF_LETTER ||
2646 			       d->d_name[0] == QUARQF_LETTER ||
2647 			       d->d_name[0] == LOSEQF_LETTER ||
2648 			       d->d_name[0] == XSCRPT_LETTER) &&
2649 			      d->d_name[1] == 'f' && d->d_name[2] == '\0'))
2650 				syserr("gatherq: %s/%s is not a regular file",
2651 				       qid_printqueue(qgrp, qdir), d->d_name);
2652 			wn--;
2653 			continue;
2654 		}
2655 
2656 		/* avoid work if possible */
2657 		if ((QueueSortOrder == QSO_BYFILENAME ||
2658 		     QueueSortOrder == QSO_BYMODTIME ||
2659 		     QueueSortOrder == QSO_NONE ||
2660 		     QueueSortOrder == QSO_RANDOM) &&
2661 		    QueueLimitQuarantine == NULL &&
2662 		    QueueLimitSender == NULL &&
2663 		    QueueLimitRecipient == NULL)
2664 		{
2665 			w->w_qgrp = qgrp;
2666 			w->w_qdir = qdir;
2667 			w->w_name = newstr(d->d_name);
2668 			w->w_host = NULL;
2669 			w->w_lock = w->w_tooyoung = false;
2670 			w->w_pri = 0;
2671 			w->w_ctime = 0;
2672 			w->w_mtime = sbuf.st_mtime;
2673 			++num_ent;
2674 			continue;
2675 		}
2676 
2677 		/* open control file */
2678 		cf = sm_io_open(SmFtStdio, SM_TIME_DEFAULT, qf, SM_IO_RDONLY_B,
2679 				NULL);
2680 		if (cf == NULL && OpMode != MD_PRINT)
2681 		{
2682 			/* this may be some random person sending hir msgs */
2683 			if (tTd(41, 2))
2684 				sm_dprintf("gatherq: cannot open %s: %s\n",
2685 					d->d_name, sm_errstring(errno));
2686 			errno = 0;
2687 			wn--;
2688 			continue;
2689 		}
2690 		w->w_qgrp = qgrp;
2691 		w->w_qdir = qdir;
2692 		w->w_name = newstr(d->d_name);
2693 		w->w_host = NULL;
2694 		if (cf != NULL)
2695 		{
2696 			w->w_lock = !lockfile(sm_io_getinfo(cf, SM_IO_WHAT_FD,
2697 							    NULL),
2698 					      w->w_name, NULL,
2699 					      LOCK_SH|LOCK_NB);
2700 		}
2701 		w->w_tooyoung = false;
2702 
2703 		/* make sure jobs in creation don't clog queue */
2704 		w->w_pri = 0x7fffffff;
2705 		w->w_ctime = 0;
2706 		w->w_mtime = sbuf.st_mtime;
2707 
2708 		/* extract useful information */
2709 		i = NEED_P|NEED_T;
2710 		if (QueueSortOrder == QSO_BYHOST
2711 #if _FFR_RHS
2712 		    || QueueSortOrder == QSO_BYSHUFFLE
2713 #endif /* _FFR_RHS */
2714 		   )
2715 		{
2716 			/* need w_host set for host sort order */
2717 			i |= NEED_H;
2718 		}
2719 		if (QueueLimitSender != NULL)
2720 			i |= NEED_S;
2721 		if (QueueLimitRecipient != NULL)
2722 			i |= NEED_R;
2723 		if (QueueLimitQuarantine != NULL)
2724 			i |= NEED_QUARANTINE;
2725 		while (cf != NULL && i != 0 &&
2726 		       sm_io_fgets(cf, SM_TIME_DEFAULT, lbuf,
2727 				   sizeof(lbuf)) != NULL)
2728 		{
2729 			int c;
2730 			time_t age;
2731 
2732 			p = strchr(lbuf, '\n');
2733 			if (p != NULL)
2734 				*p = '\0';
2735 			else
2736 			{
2737 				/* flush rest of overly long line */
2738 				while ((c = sm_io_getc(cf, SM_TIME_DEFAULT))
2739 				       != SM_IO_EOF && c != '\n')
2740 					continue;
2741 			}
2742 
2743 			switch (lbuf[0])
2744 			{
2745 			  case 'V':
2746 				qfver = atoi(&lbuf[1]);
2747 				break;
2748 
2749 			  case 'P':
2750 				w->w_pri = atol(&lbuf[1]);
2751 				i &= ~NEED_P;
2752 				break;
2753 
2754 			  case 'T':
2755 				w->w_ctime = atol(&lbuf[1]);
2756 				i &= ~NEED_T;
2757 				break;
2758 
2759 			  case 'q':
2760 				if (QueueMode != QM_QUARANTINE &&
2761 				    QueueMode != QM_LOST)
2762 				{
2763 					if (tTd(41, 49))
2764 						sm_dprintf("%s not marked as quarantined but has a 'q' line\n",
2765 							   w->w_name);
2766 					i |= HAS_QUARANTINE;
2767 				}
2768 				else if (QueueMode == QM_QUARANTINE)
2769 				{
2770 					if (QueueLimitQuarantine == NULL)
2771 					{
2772 						i &= ~NEED_QUARANTINE;
2773 						break;
2774 					}
2775 					p = &lbuf[1];
2776 					check = QueueLimitQuarantine;
2777 					while (check != NULL)
2778 					{
2779 						if (strcontainedin(false,
2780 								   check->queue_match,
2781 								   p) !=
2782 						    check->queue_negate)
2783 							break;
2784 						else
2785 							check = check->queue_next;
2786 					}
2787 					if (check != NULL)
2788 						i &= ~NEED_QUARANTINE;
2789 				}
2790 				break;
2791 
2792 			  case 'R':
2793 				if (w->w_host == NULL &&
2794 				    (p = strrchr(&lbuf[1], '@')) != NULL)
2795 				{
2796 #if _FFR_RHS
2797 					if (QueueSortOrder == QSO_BYSHUFFLE)
2798 						w->w_host = newstr(&p[1]);
2799 					else
2800 #endif /* _FFR_RHS */
2801 						w->w_host = strrev(&p[1]);
2802 					makelower(w->w_host);
2803 					i &= ~NEED_H;
2804 				}
2805 				if (QueueLimitRecipient == NULL)
2806 				{
2807 					i &= ~NEED_R;
2808 					break;
2809 				}
2810 				if (qfver > 0)
2811 				{
2812 					p = strchr(&lbuf[1], ':');
2813 					if (p == NULL)
2814 						p = &lbuf[1];
2815 					else
2816 						++p; /* skip over ':' */
2817 				}
2818 				else
2819 					p = &lbuf[1];
2820 				check = QueueLimitRecipient;
2821 				while (check != NULL)
2822 				{
2823 					if (strcontainedin(true,
2824 							   check->queue_match,
2825 							   p) !=
2826 					    check->queue_negate)
2827 						break;
2828 					else
2829 						check = check->queue_next;
2830 				}
2831 				if (check != NULL)
2832 					i &= ~NEED_R;
2833 				break;
2834 
2835 			  case 'S':
2836 				check = QueueLimitSender;
2837 				while (check != NULL)
2838 				{
2839 					if (strcontainedin(true,
2840 							   check->queue_match,
2841 							   &lbuf[1]) !=
2842 					    check->queue_negate)
2843 						break;
2844 					else
2845 						check = check->queue_next;
2846 				}
2847 				if (check != NULL)
2848 					i &= ~NEED_S;
2849 				break;
2850 
2851 			  case 'K':
2852 				age = curtime() - (time_t) atol(&lbuf[1]);
2853 				if (age >= 0 && MinQueueAge > 0 &&
2854 				    age < MinQueueAge)
2855 					w->w_tooyoung = true;
2856 				break;
2857 
2858 			  case 'N':
2859 				if (atol(&lbuf[1]) == 0)
2860 					w->w_tooyoung = false;
2861 				break;
2862 			}
2863 		}
2864 		if (cf != NULL)
2865 			(void) sm_io_close(cf, SM_TIME_DEFAULT);
2866 
2867 		if ((!doall && (shouldqueue(w->w_pri, w->w_ctime) ||
2868 		    w->w_tooyoung)) ||
2869 		    bitset(HAS_QUARANTINE, i) ||
2870 		    bitset(NEED_QUARANTINE, i) ||
2871 		    bitset(NEED_R|NEED_S, i))
2872 		{
2873 			/* don't even bother sorting this job in */
2874 			if (tTd(41, 49))
2875 				sm_dprintf("skipping %s (%x)\n", w->w_name, i);
2876 			sm_free(w->w_name); /* XXX */
2877 			if (w->w_host != NULL)
2878 				sm_free(w->w_host); /* XXX */
2879 			wn--;
2880 		}
2881 		else
2882 			++num_ent;
2883 	}
2884 	(void) closedir(f);
2885 	wn++;
2886 
2887 	i = wn - WorkListCount;
2888 	WorkListCount += SM_MIN(num_ent, WorkListSize);
2889 
2890 	if (more != NULL)
2891 		*more = WorkListCount < wn;
2892 
2893 	if (full != NULL)
2894 		*full = (wn >= MaxQueueRun && MaxQueueRun > 0) ||
2895 			(WorkList == NULL && wn > 0);
2896 
2897 	return i;
2898 }
2899 /*
2900 **  SORTQ -- sort the work list
2901 **
2902 **	First the old WorkQ is cleared away. Then the WorkList is sorted
2903 **	for all items so that important (higher sorting value) items are not
2904 **	trunctated off. Then the most important items are moved from
2905 **	WorkList to WorkQ. The lower count of 'max' or MaxListCount items
2906 **	are moved.
2907 **
2908 **	Parameters:
2909 **		max -- maximum number of items to be placed in WorkQ
2910 **
2911 **	Returns:
2912 **		the number of items in WorkQ
2913 **
2914 **	Side Effects:
2915 **		WorkQ gets released and filled with new work. WorkList
2916 **		gets released. Work items get sorted in order.
2917 */
2918 
2919 static int
2920 sortq(max)
2921 	int max;
2922 {
2923 	register int i;			/* local counter */
2924 	register WORK *w;		/* tmp item pointer */
2925 	int wc = WorkListCount;		/* trim size for WorkQ */
2926 
2927 	if (WorkQ != NULL)
2928 	{
2929 		WORK *nw;
2930 
2931 		/* Clear out old WorkQ. */
2932 		for (w = WorkQ; w != NULL; w = nw)
2933 		{
2934 			nw = w->w_next;
2935 			sm_free(w->w_name); /* XXX */
2936 			if (w->w_host != NULL)
2937 				sm_free(w->w_host); /* XXX */
2938 			sm_free((char *) w); /* XXX */
2939 		}
2940 		WorkQ = NULL;
2941 	}
2942 
2943 	if (WorkList == NULL || wc <= 0)
2944 		return 0;
2945 
2946 	/*
2947 	**  The sort now takes place using all of the items in WorkList.
2948 	**  The list gets trimmed to the most important items after the sort.
2949 	**  If the trim were to happen before the sort then one or more
2950 	**  important items might get truncated off -- not what we want.
2951 	*/
2952 
2953 	if (QueueSortOrder == QSO_BYHOST)
2954 	{
2955 		/*
2956 		**  Sort the work directory for the first time,
2957 		**  based on host name, lock status, and priority.
2958 		*/
2959 
2960 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf1);
2961 
2962 		/*
2963 		**  If one message to host is locked, "lock" all messages
2964 		**  to that host.
2965 		*/
2966 
2967 		i = 0;
2968 		while (i < wc)
2969 		{
2970 			if (!WorkList[i].w_lock)
2971 			{
2972 				i++;
2973 				continue;
2974 			}
2975 			w = &WorkList[i];
2976 			while (++i < wc)
2977 			{
2978 				if (WorkList[i].w_host == NULL &&
2979 				    w->w_host == NULL)
2980 					WorkList[i].w_lock = true;
2981 				else if (WorkList[i].w_host != NULL &&
2982 					 w->w_host != NULL &&
2983 					 sm_strcasecmp(WorkList[i].w_host,
2984 						       w->w_host) == 0)
2985 					WorkList[i].w_lock = true;
2986 				else
2987 					break;
2988 			}
2989 		}
2990 
2991 		/*
2992 		**  Sort the work directory for the second time,
2993 		**  based on lock status, host name, and priority.
2994 		*/
2995 
2996 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf2);
2997 	}
2998 	else if (QueueSortOrder == QSO_BYTIME)
2999 	{
3000 		/*
3001 		**  Simple sort based on submission time only.
3002 		*/
3003 
3004 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf3);
3005 	}
3006 	else if (QueueSortOrder == QSO_BYFILENAME)
3007 	{
3008 		/*
3009 		**  Sort based on queue filename.
3010 		*/
3011 
3012 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf4);
3013 	}
3014 	else if (QueueSortOrder == QSO_RANDOM)
3015 	{
3016 		/*
3017 		**  Sort randomly.  To avoid problems with an instable sort,
3018 		**  use a random index into the queue file name to start
3019 		**  comparison.
3020 		*/
3021 
3022 		randi = get_rand_mod(MAXQFNAME);
3023 		if (randi < 2)
3024 			randi = 3;
3025 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf5);
3026 	}
3027 	else if (QueueSortOrder == QSO_BYMODTIME)
3028 	{
3029 		/*
3030 		**  Simple sort based on modification time of queue file.
3031 		**  This puts the oldest items first.
3032 		*/
3033 
3034 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf6);
3035 	}
3036 #if _FFR_RHS
3037 	else if (QueueSortOrder == QSO_BYSHUFFLE)
3038 	{
3039 		/*
3040 		**  Simple sort based on shuffled host name.
3041 		*/
3042 
3043 		init_shuffle_alphabet();
3044 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf7);
3045 	}
3046 #endif /* _FFR_RHS */
3047 	else if (QueueSortOrder == QSO_BYPRIORITY)
3048 	{
3049 		/*
3050 		**  Simple sort based on queue priority only.
3051 		*/
3052 
3053 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf0);
3054 	}
3055 	/* else don't sort at all */
3056 
3057 	/* Check if the per queue group item limit will be exceeded */
3058 	if (wc > max && max > 0)
3059 		wc = max;
3060 
3061 	/*
3062 	**  Convert the work list into canonical form.
3063 	**	Should be turning it into a list of envelopes here perhaps.
3064 	**  Only take the most important items up to the per queue group
3065 	**  maximum.
3066 	*/
3067 
3068 	for (i = wc; --i >= 0; )
3069 	{
3070 		w = (WORK *) xalloc(sizeof(*w));
3071 		w->w_qgrp = WorkList[i].w_qgrp;
3072 		w->w_qdir = WorkList[i].w_qdir;
3073 		w->w_name = WorkList[i].w_name;
3074 		w->w_host = WorkList[i].w_host;
3075 		w->w_lock = WorkList[i].w_lock;
3076 		w->w_tooyoung = WorkList[i].w_tooyoung;
3077 		w->w_pri = WorkList[i].w_pri;
3078 		w->w_ctime = WorkList[i].w_ctime;
3079 		w->w_mtime = WorkList[i].w_mtime;
3080 		w->w_next = WorkQ;
3081 		WorkQ = w;
3082 	}
3083 
3084 	/* free the rest of the list */
3085 	for (i = WorkListCount; --i >= wc; )
3086 	{
3087 		sm_free(WorkList[i].w_name);
3088 		if (WorkList[i].w_host != NULL)
3089 			sm_free(WorkList[i].w_host);
3090 	}
3091 
3092 	if (WorkList != NULL)
3093 		sm_free(WorkList); /* XXX */
3094 	WorkList = NULL;
3095 	WorkListSize = 0;
3096 	WorkListCount = 0;
3097 
3098 	if (tTd(40, 1))
3099 	{
3100 		for (w = WorkQ; w != NULL; w = w->w_next)
3101 		{
3102 			if (w->w_host != NULL)
3103 				sm_dprintf("%22s: pri=%ld %s\n",
3104 					w->w_name, w->w_pri, w->w_host);
3105 			else
3106 				sm_dprintf("%32s: pri=%ld\n",
3107 					w->w_name, w->w_pri);
3108 		}
3109 	}
3110 
3111 	return wc; /* return number of WorkQ items */
3112 }
3113 /*
3114 **  GROW_WLIST -- make the work list larger
3115 **
3116 **	Parameters:
3117 **		qgrp -- the index for the queue group.
3118 **		qdir -- the index for the queue directory.
3119 **
3120 **	Returns:
3121 **		none.
3122 **
3123 **	Side Effects:
3124 **		Adds another QUEUESEGSIZE entries to WorkList if possible.
3125 **		It can fail if there isn't enough memory, so WorkListSize
3126 **		should be checked again upon return.
3127 */
3128 
3129 static void
3130 grow_wlist(qgrp, qdir)
3131 	int qgrp;
3132 	int qdir;
3133 {
3134 	if (tTd(41, 1))
3135 		sm_dprintf("grow_wlist: WorkListSize=%d\n", WorkListSize);
3136 	if (WorkList == NULL)
3137 	{
3138 		WorkList = (WORK *) xalloc((sizeof(*WorkList)) *
3139 					   (QUEUESEGSIZE + 1));
3140 		WorkListSize = QUEUESEGSIZE;
3141 	}
3142 	else
3143 	{
3144 		int newsize = WorkListSize + QUEUESEGSIZE;
3145 		WORK *newlist = (WORK *) sm_realloc((char *) WorkList,
3146 					  (unsigned) sizeof(WORK) * (newsize + 1));
3147 
3148 		if (newlist != NULL)
3149 		{
3150 			WorkListSize = newsize;
3151 			WorkList = newlist;
3152 			if (LogLevel > 1)
3153 			{
3154 				sm_syslog(LOG_INFO, NOQID,
3155 					  "grew WorkList for %s to %d",
3156 					  qid_printqueue(qgrp, qdir),
3157 					  WorkListSize);
3158 			}
3159 		}
3160 		else if (LogLevel > 0)
3161 		{
3162 			sm_syslog(LOG_ALERT, NOQID,
3163 				  "FAILED to grow WorkList for %s to %d",
3164 				  qid_printqueue(qgrp, qdir), newsize);
3165 		}
3166 	}
3167 	if (tTd(41, 1))
3168 		sm_dprintf("grow_wlist: WorkListSize now %d\n", WorkListSize);
3169 }
3170 /*
3171 **  WORKCMPF0 -- simple priority-only compare function.
3172 **
3173 **	Parameters:
3174 **		a -- the first argument.
3175 **		b -- the second argument.
3176 **
3177 **	Returns:
3178 **		-1 if a < b
3179 **		 0 if a == b
3180 **		+1 if a > b
3181 **
3182 */
3183 
3184 static int
3185 workcmpf0(a, b)
3186 	register WORK *a;
3187 	register WORK *b;
3188 {
3189 	long pa = a->w_pri;
3190 	long pb = b->w_pri;
3191 
3192 	if (pa == pb)
3193 		return 0;
3194 	else if (pa > pb)
3195 		return 1;
3196 	else
3197 		return -1;
3198 }
3199 /*
3200 **  WORKCMPF1 -- first compare function for ordering work based on host name.
3201 **
3202 **	Sorts on host name, lock status, and priority in that order.
3203 **
3204 **	Parameters:
3205 **		a -- the first argument.
3206 **		b -- the second argument.
3207 **
3208 **	Returns:
3209 **		<0 if a < b
3210 **		 0 if a == b
3211 **		>0 if a > b
3212 **
3213 */
3214 
3215 static int
3216 workcmpf1(a, b)
3217 	register WORK *a;
3218 	register WORK *b;
3219 {
3220 	int i;
3221 
3222 	/* host name */
3223 	if (a->w_host != NULL && b->w_host == NULL)
3224 		return 1;
3225 	else if (a->w_host == NULL && b->w_host != NULL)
3226 		return -1;
3227 	if (a->w_host != NULL && b->w_host != NULL &&
3228 	    (i = sm_strcasecmp(a->w_host, b->w_host)) != 0)
3229 		return i;
3230 
3231 	/* lock status */
3232 	if (a->w_lock != b->w_lock)
3233 		return b->w_lock - a->w_lock;
3234 
3235 	/* job priority */
3236 	return workcmpf0(a, b);
3237 }
3238 /*
3239 **  WORKCMPF2 -- second compare function for ordering work based on host name.
3240 **
3241 **	Sorts on lock status, host name, and priority in that order.
3242 **
3243 **	Parameters:
3244 **		a -- the first argument.
3245 **		b -- the second argument.
3246 **
3247 **	Returns:
3248 **		<0 if a < b
3249 **		 0 if a == b
3250 **		>0 if a > b
3251 **
3252 */
3253 
3254 static int
3255 workcmpf2(a, b)
3256 	register WORK *a;
3257 	register WORK *b;
3258 {
3259 	int i;
3260 
3261 	/* lock status */
3262 	if (a->w_lock != b->w_lock)
3263 		return a->w_lock - b->w_lock;
3264 
3265 	/* host name */
3266 	if (a->w_host != NULL && b->w_host == NULL)
3267 		return 1;
3268 	else if (a->w_host == NULL && b->w_host != NULL)
3269 		return -1;
3270 	if (a->w_host != NULL && b->w_host != NULL &&
3271 	    (i = sm_strcasecmp(a->w_host, b->w_host)) != 0)
3272 		return i;
3273 
3274 	/* job priority */
3275 	return workcmpf0(a, b);
3276 }
3277 /*
3278 **  WORKCMPF3 -- simple submission-time-only compare function.
3279 **
3280 **	Parameters:
3281 **		a -- the first argument.
3282 **		b -- the second argument.
3283 **
3284 **	Returns:
3285 **		-1 if a < b
3286 **		 0 if a == b
3287 **		+1 if a > b
3288 **
3289 */
3290 
3291 static int
3292 workcmpf3(a, b)
3293 	register WORK *a;
3294 	register WORK *b;
3295 {
3296 	if (a->w_ctime > b->w_ctime)
3297 		return 1;
3298 	else if (a->w_ctime < b->w_ctime)
3299 		return -1;
3300 	else
3301 		return 0;
3302 }
3303 /*
3304 **  WORKCMPF4 -- compare based on file name
3305 **
3306 **	Parameters:
3307 **		a -- the first argument.
3308 **		b -- the second argument.
3309 **
3310 **	Returns:
3311 **		-1 if a < b
3312 **		 0 if a == b
3313 **		+1 if a > b
3314 **
3315 */
3316 
3317 static int
3318 workcmpf4(a, b)
3319 	register WORK *a;
3320 	register WORK *b;
3321 {
3322 	return strcmp(a->w_name, b->w_name);
3323 }
3324 /*
3325 **  WORKCMPF5 -- compare based on assigned random number
3326 **
3327 **	Parameters:
3328 **		a -- the first argument (ignored).
3329 **		b -- the second argument (ignored).
3330 **
3331 **	Returns:
3332 **		randomly 1/-1
3333 */
3334 
3335 /* ARGSUSED0 */
3336 static int
3337 workcmpf5(a, b)
3338 	register WORK *a;
3339 	register WORK *b;
3340 {
3341 	if (strlen(a->w_name) < randi || strlen(b->w_name) < randi)
3342 		return -1;
3343 	return a->w_name[randi] - b->w_name[randi];
3344 }
3345 /*
3346 **  WORKCMPF6 -- simple modification-time-only compare function.
3347 **
3348 **	Parameters:
3349 **		a -- the first argument.
3350 **		b -- the second argument.
3351 **
3352 **	Returns:
3353 **		-1 if a < b
3354 **		 0 if a == b
3355 **		+1 if a > b
3356 **
3357 */
3358 
3359 static int
3360 workcmpf6(a, b)
3361 	register WORK *a;
3362 	register WORK *b;
3363 {
3364 	if (a->w_mtime > b->w_mtime)
3365 		return 1;
3366 	else if (a->w_mtime < b->w_mtime)
3367 		return -1;
3368 	else
3369 		return 0;
3370 }
3371 #if _FFR_RHS
3372 /*
3373 **  WORKCMPF7 -- compare function for ordering work based on shuffled host name.
3374 **
3375 **	Sorts on lock status, host name, and priority in that order.
3376 **
3377 **	Parameters:
3378 **		a -- the first argument.
3379 **		b -- the second argument.
3380 **
3381 **	Returns:
3382 **		<0 if a < b
3383 **		 0 if a == b
3384 **		>0 if a > b
3385 **
3386 */
3387 
3388 static int
3389 workcmpf7(a, b)
3390 	register WORK *a;
3391 	register WORK *b;
3392 {
3393 	int i;
3394 
3395 	/* lock status */
3396 	if (a->w_lock != b->w_lock)
3397 		return a->w_lock - b->w_lock;
3398 
3399 	/* host name */
3400 	if (a->w_host != NULL && b->w_host == NULL)
3401 		return 1;
3402 	else if (a->w_host == NULL && b->w_host != NULL)
3403 		return -1;
3404 	if (a->w_host != NULL && b->w_host != NULL &&
3405 	    (i = sm_strshufflecmp(a->w_host, b->w_host)) != 0)
3406 		return i;
3407 
3408 	/* job priority */
3409 	return workcmpf0(a, b);
3410 }
3411 #endif /* _FFR_RHS */
3412 /*
3413 **  STRREV -- reverse string
3414 **
3415 **	Returns a pointer to a new string that is the reverse of
3416 **	the string pointed to by fwd.  The space for the new
3417 **	string is obtained using xalloc().
3418 **
3419 **	Parameters:
3420 **		fwd -- the string to reverse.
3421 **
3422 **	Returns:
3423 **		the reversed string.
3424 */
3425 
3426 static char *
3427 strrev(fwd)
3428 	char *fwd;
3429 {
3430 	char *rev = NULL;
3431 	int len, cnt;
3432 
3433 	len = strlen(fwd);
3434 	rev = xalloc(len + 1);
3435 	for (cnt = 0; cnt < len; ++cnt)
3436 		rev[cnt] = fwd[len - cnt - 1];
3437 	rev[len] = '\0';
3438 	return rev;
3439 }
3440 
3441 #if _FFR_RHS
3442 
3443 # define NASCII	128
3444 # define NCHAR	256
3445 
3446 static unsigned char ShuffledAlphabet[NCHAR];
3447 
3448 void
3449 init_shuffle_alphabet()
3450 {
3451 	static bool init = false;
3452 	int i;
3453 
3454 	if (init)
3455 		return;
3456 
3457 	/* fill the ShuffledAlphabet */
3458 	for (i = 0; i < NASCII; i++)
3459 		ShuffledAlphabet[i] = i;
3460 
3461 	/* mix it */
3462 	for (i = 1; i < NASCII; i++)
3463 	{
3464 		register int j = get_random() % NASCII;
3465 		register int tmp;
3466 
3467 		tmp = ShuffledAlphabet[j];
3468 		ShuffledAlphabet[j] = ShuffledAlphabet[i];
3469 		ShuffledAlphabet[i] = tmp;
3470 	}
3471 
3472 	/* make it case insensitive */
3473 	for (i = 'A'; i <= 'Z'; i++)
3474 		ShuffledAlphabet[i] = ShuffledAlphabet[i + 'a' - 'A'];
3475 
3476 	/* fill the upper part */
3477 	for (i = 0; i < NASCII; i++)
3478 		ShuffledAlphabet[i + NASCII] = ShuffledAlphabet[i];
3479 	init = true;
3480 }
3481 
3482 static int
3483 sm_strshufflecmp(a, b)
3484 	char *a;
3485 	char *b;
3486 {
3487 	const unsigned char *us1 = (const unsigned char *) a;
3488 	const unsigned char *us2 = (const unsigned char *) b;
3489 
3490 	while (ShuffledAlphabet[*us1] == ShuffledAlphabet[*us2++])
3491 	{
3492 		if (*us1++ == '\0')
3493 			return 0;
3494 	}
3495 	return (ShuffledAlphabet[*us1] - ShuffledAlphabet[*--us2]);
3496 }
3497 #endif /* _FFR_RHS */
3498 
3499 /*
3500 **  DOWORK -- do a work request.
3501 **
3502 **	Parameters:
3503 **		qgrp -- the index of the queue group for the job.
3504 **		qdir -- the index of the queue directory for the job.
3505 **		id -- the ID of the job to run.
3506 **		forkflag -- if set, run this in background.
3507 **		requeueflag -- if set, reinstantiate the queue quickly.
3508 **			This is used when expanding aliases in the queue.
3509 **			If forkflag is also set, it doesn't wait for the
3510 **			child.
3511 **		e - the envelope in which to run it.
3512 **
3513 **	Returns:
3514 **		process id of process that is running the queue job.
3515 **
3516 **	Side Effects:
3517 **		The work request is satisfied if possible.
3518 */
3519 
3520 pid_t
3521 dowork(qgrp, qdir, id, forkflag, requeueflag, e)
3522 	int qgrp;
3523 	int qdir;
3524 	char *id;
3525 	bool forkflag;
3526 	bool requeueflag;
3527 	register ENVELOPE *e;
3528 {
3529 	register pid_t pid;
3530 	SM_RPOOL_T *rpool;
3531 
3532 	if (tTd(40, 1))
3533 		sm_dprintf("dowork(%s/%s)\n", qid_printqueue(qgrp, qdir), id);
3534 
3535 	/*
3536 	**  Fork for work.
3537 	*/
3538 
3539 	if (forkflag)
3540 	{
3541 		/*
3542 		**  Since the delivery may happen in a child and the
3543 		**  parent does not wait, the parent may close the
3544 		**  maps thereby removing any shared memory used by
3545 		**  the map.  Therefore, close the maps now so the
3546 		**  child will dynamically open them if necessary.
3547 		*/
3548 
3549 		closemaps(false);
3550 
3551 		pid = fork();
3552 		if (pid < 0)
3553 		{
3554 			syserr("dowork: cannot fork");
3555 			return 0;
3556 		}
3557 		else if (pid > 0)
3558 		{
3559 			/* parent -- clean out connection cache */
3560 			mci_flush(false, NULL);
3561 		}
3562 		else
3563 		{
3564 			/*
3565 			**  Initialize exception stack and default exception
3566 			**  handler for child process.
3567 			*/
3568 
3569 			/* Reset global flags */
3570 			RestartRequest = NULL;
3571 			RestartWorkGroup = false;
3572 			ShutdownRequest = NULL;
3573 			PendingSignal = 0;
3574 			CurrentPid = getpid();
3575 			sm_exc_newthread(fatal_error);
3576 
3577 			/*
3578 			**  See note above about SMTP processes and SIGCHLD.
3579 			*/
3580 
3581 			if (OpMode == MD_SMTP ||
3582 			    OpMode == MD_DAEMON ||
3583 			    MaxQueueChildren > 0)
3584 			{
3585 				proc_list_clear();
3586 				sm_releasesignal(SIGCHLD);
3587 				(void) sm_signal(SIGCHLD, SIG_DFL);
3588 			}
3589 
3590 			/* child -- error messages to the transcript */
3591 			QuickAbort = OnlyOneError = false;
3592 		}
3593 	}
3594 	else
3595 	{
3596 		pid = 0;
3597 	}
3598 
3599 	if (pid == 0)
3600 	{
3601 		/*
3602 		**  CHILD
3603 		**	Lock the control file to avoid duplicate deliveries.
3604 		**		Then run the file as though we had just read it.
3605 		**	We save an idea of the temporary name so we
3606 		**		can recover on interrupt.
3607 		*/
3608 
3609 		if (forkflag)
3610 		{
3611 			/* Reset global flags */
3612 			RestartRequest = NULL;
3613 			RestartWorkGroup = false;
3614 			ShutdownRequest = NULL;
3615 			PendingSignal = 0;
3616 		}
3617 
3618 		/* set basic modes, etc. */
3619 		sm_clear_events();
3620 		clearstats();
3621 		rpool = sm_rpool_new_x(NULL);
3622 		clearenvelope(e, false, rpool);
3623 		e->e_flags |= EF_QUEUERUN|EF_GLOBALERRS;
3624 		set_delivery_mode(SM_DELIVER, e);
3625 		e->e_errormode = EM_MAIL;
3626 		e->e_id = id;
3627 		e->e_qgrp = qgrp;
3628 		e->e_qdir = qdir;
3629 		GrabTo = UseErrorsTo = false;
3630 		ExitStat = EX_OK;
3631 		if (forkflag)
3632 		{
3633 			disconnect(1, e);
3634 			set_op_mode(MD_QUEUERUN);
3635 		}
3636 		sm_setproctitle(true, e, "%s from queue", qid_printname(e));
3637 		if (LogLevel > 76)
3638 			sm_syslog(LOG_DEBUG, e->e_id, "dowork, pid=%d",
3639 				  (int) CurrentPid);
3640 
3641 		/* don't use the headers from sendmail.cf... */
3642 		e->e_header = NULL;
3643 
3644 		/* read the queue control file -- return if locked */
3645 		if (!readqf(e, false))
3646 		{
3647 			if (tTd(40, 4) && e->e_id != NULL)
3648 				sm_dprintf("readqf(%s) failed\n",
3649 					qid_printname(e));
3650 			e->e_id = NULL;
3651 			if (forkflag)
3652 				finis(false, true, EX_OK);
3653 			else
3654 			{
3655 				/* adding this frees 8 bytes */
3656 				clearenvelope(e, false, rpool);
3657 
3658 				/* adding this frees 12 bytes */
3659 				sm_rpool_free(rpool);
3660 				e->e_rpool = NULL;
3661 				return 0;
3662 			}
3663 		}
3664 
3665 		e->e_flags |= EF_INQUEUE;
3666 		eatheader(e, requeueflag, true);
3667 
3668 		if (requeueflag)
3669 			queueup(e, false, false);
3670 
3671 		/* do the delivery */
3672 		sendall(e, SM_DELIVER);
3673 
3674 		/* finish up and exit */
3675 		if (forkflag)
3676 			finis(true, true, ExitStat);
3677 		else
3678 		{
3679 			dropenvelope(e, true, false);
3680 			sm_rpool_free(rpool);
3681 			e->e_rpool = NULL;
3682 		}
3683 	}
3684 	e->e_id = NULL;
3685 	return pid;
3686 }
3687 
3688 /*
3689 **  DOWORKLIST -- process a list of envelopes as work requests
3690 **
3691 **	Similar to dowork(), except that after forking, it processes an
3692 **	envelope and its siblings, treating each envelope as a work request.
3693 **
3694 **	Parameters:
3695 **		el -- envelope to be processed including its siblings.
3696 **		forkflag -- if set, run this in background.
3697 **		requeueflag -- if set, reinstantiate the queue quickly.
3698 **			This is used when expanding aliases in the queue.
3699 **			If forkflag is also set, it doesn't wait for the
3700 **			child.
3701 **
3702 **	Returns:
3703 **		process id of process that is running the queue job.
3704 **
3705 **	Side Effects:
3706 **		The work request is satisfied if possible.
3707 */
3708 
3709 pid_t
3710 doworklist(el, forkflag, requeueflag)
3711 	ENVELOPE *el;
3712 	bool forkflag;
3713 	bool requeueflag;
3714 {
3715 	register pid_t pid;
3716 	ENVELOPE *ei;
3717 
3718 	if (tTd(40, 1))
3719 		sm_dprintf("doworklist()\n");
3720 
3721 	/*
3722 	**  Fork for work.
3723 	*/
3724 
3725 	if (forkflag)
3726 	{
3727 		/*
3728 		**  Since the delivery may happen in a child and the
3729 		**  parent does not wait, the parent may close the
3730 		**  maps thereby removing any shared memory used by
3731 		**  the map.  Therefore, close the maps now so the
3732 		**  child will dynamically open them if necessary.
3733 		*/
3734 
3735 		closemaps(false);
3736 
3737 		pid = fork();
3738 		if (pid < 0)
3739 		{
3740 			syserr("doworklist: cannot fork");
3741 			return 0;
3742 		}
3743 		else if (pid > 0)
3744 		{
3745 			/* parent -- clean out connection cache */
3746 			mci_flush(false, NULL);
3747 		}
3748 		else
3749 		{
3750 			/*
3751 			**  Initialize exception stack and default exception
3752 			**  handler for child process.
3753 			*/
3754 
3755 			/* Reset global flags */
3756 			RestartRequest = NULL;
3757 			RestartWorkGroup = false;
3758 			ShutdownRequest = NULL;
3759 			PendingSignal = 0;
3760 			CurrentPid = getpid();
3761 			sm_exc_newthread(fatal_error);
3762 
3763 			/*
3764 			**  See note above about SMTP processes and SIGCHLD.
3765 			*/
3766 
3767 			if (OpMode == MD_SMTP ||
3768 			    OpMode == MD_DAEMON ||
3769 			    MaxQueueChildren > 0)
3770 			{
3771 				proc_list_clear();
3772 				sm_releasesignal(SIGCHLD);
3773 				(void) sm_signal(SIGCHLD, SIG_DFL);
3774 			}
3775 
3776 			/* child -- error messages to the transcript */
3777 			QuickAbort = OnlyOneError = false;
3778 		}
3779 	}
3780 	else
3781 	{
3782 		pid = 0;
3783 	}
3784 
3785 	if (pid != 0)
3786 		return pid;
3787 
3788 	/*
3789 	**  IN CHILD
3790 	**	Lock the control file to avoid duplicate deliveries.
3791 	**		Then run the file as though we had just read it.
3792 	**	We save an idea of the temporary name so we
3793 	**		can recover on interrupt.
3794 	*/
3795 
3796 	if (forkflag)
3797 	{
3798 		/* Reset global flags */
3799 		RestartRequest = NULL;
3800 		RestartWorkGroup = false;
3801 		ShutdownRequest = NULL;
3802 		PendingSignal = 0;
3803 	}
3804 
3805 	/* set basic modes, etc. */
3806 	sm_clear_events();
3807 	clearstats();
3808 	GrabTo = UseErrorsTo = false;
3809 	ExitStat = EX_OK;
3810 	if (forkflag)
3811 	{
3812 		disconnect(1, el);
3813 		set_op_mode(MD_QUEUERUN);
3814 	}
3815 	if (LogLevel > 76)
3816 		sm_syslog(LOG_DEBUG, el->e_id, "doworklist, pid=%d",
3817 			  (int) CurrentPid);
3818 
3819 	for (ei = el; ei != NULL; ei = ei->e_sibling)
3820 	{
3821 		ENVELOPE e;
3822 		SM_RPOOL_T *rpool;
3823 
3824 		if (WILL_BE_QUEUED(ei->e_sendmode))
3825 			continue;
3826 		else if (QueueMode != QM_QUARANTINE &&
3827 			 ei->e_quarmsg != NULL)
3828 			continue;
3829 
3830 		rpool = sm_rpool_new_x(NULL);
3831 		clearenvelope(&e, true, rpool);
3832 		e.e_flags |= EF_QUEUERUN|EF_GLOBALERRS;
3833 		set_delivery_mode(SM_DELIVER, &e);
3834 		e.e_errormode = EM_MAIL;
3835 		e.e_id = ei->e_id;
3836 		e.e_qgrp = ei->e_qgrp;
3837 		e.e_qdir = ei->e_qdir;
3838 		openxscript(&e);
3839 		sm_setproctitle(true, &e, "%s from queue", qid_printname(&e));
3840 
3841 		/* don't use the headers from sendmail.cf... */
3842 		e.e_header = NULL;
3843 		CurEnv = &e;
3844 
3845 		/* read the queue control file -- return if locked */
3846 		if (readqf(&e, false))
3847 		{
3848 			e.e_flags |= EF_INQUEUE;
3849 			eatheader(&e, requeueflag, true);
3850 
3851 			if (requeueflag)
3852 				queueup(&e, false, false);
3853 
3854 			/* do the delivery */
3855 			sendall(&e, SM_DELIVER);
3856 			dropenvelope(&e, true, false);
3857 		}
3858 		else
3859 		{
3860 			if (tTd(40, 4) && e.e_id != NULL)
3861 				sm_dprintf("readqf(%s) failed\n",
3862 					qid_printname(&e));
3863 		}
3864 		sm_rpool_free(rpool);
3865 		ei->e_id = NULL;
3866 	}
3867 
3868 	/* restore CurEnv */
3869 	CurEnv = el;
3870 
3871 	/* finish up and exit */
3872 	if (forkflag)
3873 		finis(true, true, ExitStat);
3874 	return 0;
3875 }
3876 /*
3877 **  READQF -- read queue file and set up environment.
3878 **
3879 **	Parameters:
3880 **		e -- the envelope of the job to run.
3881 **		openonly -- only open the qf (returned as e_lockfp)
3882 **
3883 **	Returns:
3884 **		true if it successfully read the queue file.
3885 **		false otherwise.
3886 **
3887 **	Side Effects:
3888 **		The queue file is returned locked.
3889 */
3890 
3891 static bool
3892 readqf(e, openonly)
3893 	register ENVELOPE *e;
3894 	bool openonly;
3895 {
3896 	register SM_FILE_T *qfp;
3897 	ADDRESS *ctladdr;
3898 	struct stat st, stf;
3899 	char *bp;
3900 	int qfver = 0;
3901 	long hdrsize = 0;
3902 	register char *p;
3903 	char *frcpt = NULL;
3904 	char *orcpt = NULL;
3905 	bool nomore = false;
3906 	bool bogus = false;
3907 	MODE_T qsafe;
3908 	char *err;
3909 	char qf[MAXPATHLEN];
3910 	char buf[MAXLINE];
3911 	int bufsize;
3912 
3913 	/*
3914 	**  Read and process the file.
3915 	*/
3916 
3917 	SM_REQUIRE(e != NULL);
3918 	bp = NULL;
3919 	(void) sm_strlcpy(qf, queuename(e, ANYQFL_LETTER), sizeof(qf));
3920 	qfp = sm_io_open(SmFtStdio, SM_TIME_DEFAULT, qf, SM_IO_RDWR_B, NULL);
3921 	if (qfp == NULL)
3922 	{
3923 		int save_errno = errno;
3924 
3925 		if (tTd(40, 8))
3926 			sm_dprintf("readqf(%s): sm_io_open failure (%s)\n",
3927 				qf, sm_errstring(errno));
3928 		errno = save_errno;
3929 		if (errno != ENOENT
3930 		    )
3931 			syserr("readqf: no control file %s", qf);
3932 		RELEASE_QUEUE;
3933 		return false;
3934 	}
3935 
3936 	if (!lockfile(sm_io_getinfo(qfp, SM_IO_WHAT_FD, NULL), qf, NULL,
3937 		      LOCK_EX|LOCK_NB))
3938 	{
3939 		/* being processed by another queuer */
3940 		if (Verbose)
3941 			(void) sm_io_fprintf(smioout, SM_TIME_DEFAULT,
3942 					     "%s: locked\n", e->e_id);
3943 		if (tTd(40, 8))
3944 			sm_dprintf("%s: locked\n", e->e_id);
3945 		if (LogLevel > 19)
3946 			sm_syslog(LOG_DEBUG, e->e_id, "locked");
3947 		(void) sm_io_close(qfp, SM_TIME_DEFAULT);
3948 		RELEASE_QUEUE;
3949 		return false;
3950 	}
3951 
3952 	RELEASE_QUEUE;
3953 
3954 	/*
3955 	**  Prevent locking race condition.
3956 	**
3957 	**  Process A: readqf(): qfp = fopen(qffile)
3958 	**  Process B: queueup(): rename(tf, qf)
3959 	**  Process B: unlocks(tf)
3960 	**  Process A: lockfile(qf);
3961 	**
3962 	**  Process A (us) has the old qf file (before the rename deleted
3963 	**  the directory entry) and will be delivering based on old data.
3964 	**  This can lead to multiple deliveries of the same recipients.
3965 	**
3966 	**  Catch this by checking if the underlying qf file has changed
3967 	**  *after* acquiring our lock and if so, act as though the file
3968 	**  was still locked (i.e., just return like the lockfile() case
3969 	**  above.
3970 	*/
3971 
3972 	if (stat(qf, &stf) < 0 ||
3973 	    fstat(sm_io_getinfo(qfp, SM_IO_WHAT_FD, NULL), &st) < 0)
3974 	{
3975 		/* must have been being processed by someone else */
3976 		if (tTd(40, 8))
3977 			sm_dprintf("readqf(%s): [f]stat failure (%s)\n",
3978 				qf, sm_errstring(errno));
3979 		(void) sm_io_close(qfp, SM_TIME_DEFAULT);
3980 		return false;
3981 	}
3982 
3983 	if (st.st_nlink != stf.st_nlink ||
3984 	    st.st_dev != stf.st_dev ||
3985 	    ST_INODE(st) != ST_INODE(stf) ||
3986 #if HAS_ST_GEN && 0		/* AFS returns garbage in st_gen */
3987 	    st.st_gen != stf.st_gen ||
3988 #endif /* HAS_ST_GEN && 0 */
3989 	    st.st_uid != stf.st_uid ||
3990 	    st.st_gid != stf.st_gid ||
3991 	    st.st_size != stf.st_size)
3992 	{
3993 		/* changed after opened */
3994 		if (Verbose)
3995 			(void) sm_io_fprintf(smioout, SM_TIME_DEFAULT,
3996 					     "%s: changed\n", e->e_id);
3997 		if (tTd(40, 8))
3998 			sm_dprintf("%s: changed\n", e->e_id);
3999 		if (LogLevel > 19)
4000 			sm_syslog(LOG_DEBUG, e->e_id, "changed");
4001 		(void) sm_io_close(qfp, SM_TIME_DEFAULT);
4002 		return false;
4003 	}
4004 
4005 	/*
4006 	**  Check the queue file for plausibility to avoid attacks.
4007 	*/
4008 
4009 	qsafe = S_IWOTH|S_IWGRP;
4010 	if (bitset(S_IWGRP, QueueFileMode))
4011 		qsafe &= ~S_IWGRP;
4012 
4013 	bogus = st.st_uid !=