xref: /illumos-gate/usr/src/cmd/sendmail/src/queue.c (revision e9af4bc0)
1 /*
2  * Copyright (c) 1998-2009 Sendmail, Inc. and its suppliers.
3  *	All rights reserved.
4  * Copyright (c) 1983, 1995-1997 Eric P. Allman.  All rights reserved.
5  * Copyright (c) 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * By using this file, you agree to the terms and conditions set
9  * forth in the LICENSE file which can be found at the top level of
10  * the sendmail distribution.
11  *
12  */
13 
14 #include <sendmail.h>
15 #include <sm/sem.h>
16 
17 SM_RCSID("@(#)$Id: queue.c,v 8.987 2009/12/18 17:08:01 ca Exp $")
18 
19 #include <dirent.h>
20 
21 # define RELEASE_QUEUE	(void) 0
22 # define ST_INODE(st)	(st).st_ino
23 
24 #  define sm_file_exists(errno) ((errno) == EEXIST)
25 
26 # if HASFLOCK && defined(O_EXLOCK)
27 #   define SM_OPEN_EXLOCK 1
28 #   define TF_OPEN_FLAGS (O_CREAT|O_WRONLY|O_EXCL|O_EXLOCK)
29 # else /* HASFLOCK && defined(O_EXLOCK) */
30 #  define TF_OPEN_FLAGS (O_CREAT|O_WRONLY|O_EXCL)
31 # endif /* HASFLOCK && defined(O_EXLOCK) */
32 
33 #ifndef SM_OPEN_EXLOCK
34 # define SM_OPEN_EXLOCK 0
35 #endif /* ! SM_OPEN_EXLOCK */
36 
37 /*
38 **  Historical notes:
39 **	QF_VERSION == 4 was sendmail 8.10/8.11 without _FFR_QUEUEDELAY
40 **	QF_VERSION == 5 was sendmail 8.10/8.11 with    _FFR_QUEUEDELAY
41 **	QF_VERSION == 6 was sendmail 8.12      without _FFR_QUEUEDELAY
42 **	QF_VERSION == 7 was sendmail 8.12      with    _FFR_QUEUEDELAY
43 **	QF_VERSION == 8 is  sendmail 8.13
44 */
45 
46 #define QF_VERSION	8	/* version number of this queue format */
47 
48 static char	queue_letter __P((ENVELOPE *, int));
49 static bool	quarantine_queue_item __P((int, int, ENVELOPE *, char *));
50 
51 /* Naming convention: qgrp: index of queue group, qg: QUEUEGROUP */
52 
53 /*
54 **  Work queue.
55 */
56 
57 struct work
58 {
59 	char		*w_name;	/* name of control file */
60 	char		*w_host;	/* name of recipient host */
61 	bool		w_lock;		/* is message locked? */
62 	bool		w_tooyoung;	/* is it too young to run? */
63 	long		w_pri;		/* priority of message, see below */
64 	time_t		w_ctime;	/* creation time */
65 	time_t		w_mtime;	/* modification time */
66 	int		w_qgrp;		/* queue group located in */
67 	int		w_qdir;		/* queue directory located in */
68 	struct work	*w_next;	/* next in queue */
69 };
70 
71 typedef struct work	WORK;
72 
73 static WORK	*WorkQ;		/* queue of things to be done */
74 static int	NumWorkGroups;	/* number of work groups */
75 static time_t	Current_LA_time = 0;
76 
77 /* Get new load average every 30 seconds. */
78 #define GET_NEW_LA_TIME	30
79 
80 #define SM_GET_LA(now)	\
81 	do							\
82 	{							\
83 		now = curtime();				\
84 		if (Current_LA_time < now - GET_NEW_LA_TIME)	\
85 		{						\
86 			sm_getla();				\
87 			Current_LA_time = now;			\
88 		}						\
89 	} while (0)
90 
91 /*
92 **  DoQueueRun indicates that a queue run is needed.
93 **	Notice: DoQueueRun is modified in a signal handler!
94 */
95 
96 static bool	volatile DoQueueRun; /* non-interrupt time queue run needed */
97 
98 /*
99 **  Work group definition structure.
100 **	Each work group contains one or more queue groups. This is done
101 **	to manage the number of queue group runners active at the same time
102 **	to be within the constraints of MaxQueueChildren (if it is set).
103 **	The number of queue groups that can be run on the next work run
104 **	is kept track of. The queue groups are run in a round robin.
105 */
106 
107 struct workgrp
108 {
109 	int		wg_numqgrp;	/* number of queue groups in work grp */
110 	int		wg_runners;	/* total runners */
111 	int		wg_curqgrp;	/* current queue group */
112 	QUEUEGRP	**wg_qgs;	/* array of queue groups */
113 	int		wg_maxact;	/* max # of active runners */
114 	time_t		wg_lowqintvl;	/* lowest queue interval */
115 	int		wg_restart;	/* needs restarting? */
116 	int		wg_restartcnt;	/* count of times restarted */
117 };
118 
119 typedef struct workgrp WORKGRP;
120 
121 static WORKGRP	volatile WorkGrp[MAXWORKGROUPS + 1];	/* work groups */
122 
123 #if SM_HEAP_CHECK
124 static SM_DEBUG_T DebugLeakQ = SM_DEBUG_INITIALIZER("leak_q",
125 	"@(#)$Debug: leak_q - trace memory leaks during queue processing $");
126 #endif /* SM_HEAP_CHECK */
127 
128 /*
129 **  We use EmptyString instead of "" to avoid
130 **  'zero-length format string' warnings from gcc
131 */
132 
133 static const char EmptyString[] = "";
134 
135 static void	grow_wlist __P((int, int));
136 static int	multiqueue_cache __P((char *, int, QUEUEGRP *, int, unsigned int *));
137 static int	gatherq __P((int, int, bool, bool *, bool *, int *));
138 static int	sortq __P((int));
139 static void	printctladdr __P((ADDRESS *, SM_FILE_T *));
140 static bool	readqf __P((ENVELOPE *, bool));
141 static void	restart_work_group __P((int));
142 static void	runner_work __P((ENVELOPE *, int, bool, int, int));
143 static void	schedule_queue_runs __P((bool, int, bool));
144 static char	*strrev __P((char *));
145 static ADDRESS	*setctluser __P((char *, int, ENVELOPE *));
146 #if _FFR_RHS
147 static int	sm_strshufflecmp __P((char *, char *));
148 static void	init_shuffle_alphabet __P(());
149 #endif /* _FFR_RHS */
150 
151 /*
152 **  Note: workcmpf?() don't use a prototype because it will cause a conflict
153 **  with the qsort() call (which expects something like
154 **  int (*compar)(const void *, const void *), not (WORK *, WORK *))
155 */
156 
157 static int	workcmpf0();
158 static int	workcmpf1();
159 static int	workcmpf2();
160 static int	workcmpf3();
161 static int	workcmpf4();
162 static int	randi = 3;	/* index for workcmpf5() */
163 static int	workcmpf5();
164 static int	workcmpf6();
165 #if _FFR_RHS
166 static int	workcmpf7();
167 #endif /* _FFR_RHS */
168 
169 #if RANDOMSHIFT
170 # define get_rand_mod(m)	((get_random() >> RANDOMSHIFT) % (m))
171 #else /* RANDOMSHIFT */
172 # define get_rand_mod(m)	(get_random() % (m))
173 #endif /* RANDOMSHIFT */
174 
175 /*
176 **  File system definition.
177 **	Used to keep track of how much free space is available
178 **	on a file system in which one or more queue directories reside.
179 */
180 
181 typedef struct filesys_shared	FILESYS;
182 
183 struct filesys_shared
184 {
185 	dev_t	fs_dev;		/* unique device id */
186 	long	fs_avail;	/* number of free blocks available */
187 	long	fs_blksize;	/* block size, in bytes */
188 };
189 
190 /* probably kept in shared memory */
191 static FILESYS	FileSys[MAXFILESYS];	/* queue file systems */
192 static const char *FSPath[MAXFILESYS];	/* pathnames for file systems */
193 
194 #if SM_CONF_SHM
195 
196 /*
197 **  Shared memory data
198 **
199 **  Current layout:
200 **	size -- size of shared memory segment
201 **	pid -- pid of owner, should be a unique id to avoid misinterpretations
202 **		by other processes.
203 **	tag -- should be a unique id to avoid misinterpretations by others.
204 **		idea: hash over configuration data that will be stored here.
205 **	NumFileSys -- number of file systems.
206 **	FileSys -- (arrary of) structure for used file systems.
207 **	RSATmpCnt -- counter for number of uses of ephemeral RSA key.
208 **	QShm -- (array of) structure for information about queue directories.
209 */
210 
211 /*
212 **  Queue data in shared memory
213 */
214 
215 typedef struct queue_shared	QUEUE_SHM_T;
216 
217 struct queue_shared
218 {
219 	int	qs_entries;	/* number of entries */
220 	/* XXX more to follow? */
221 };
222 
223 static void	*Pshm;		/* pointer to shared memory */
224 static FILESYS	*PtrFileSys;	/* pointer to queue file system array */
225 int		ShmId = SM_SHM_NO_ID;	/* shared memory id */
226 static QUEUE_SHM_T	*QShm;		/* pointer to shared queue data */
227 static size_t shms;
228 
229 # define SHM_OFF_PID(p)	(((char *) (p)) + sizeof(int))
230 # define SHM_OFF_TAG(p)	(((char *) (p)) + sizeof(pid_t) + sizeof(int))
231 # define SHM_OFF_HEAD	(sizeof(pid_t) + sizeof(int) * 2)
232 
233 /* how to access FileSys */
234 # define FILE_SYS(i)	(PtrFileSys[i])
235 
236 /* first entry is a tag, for now just the size */
237 # define OFF_FILE_SYS(p)	(((char *) (p)) + SHM_OFF_HEAD)
238 
239 /* offset for PNumFileSys */
240 # define OFF_NUM_FILE_SYS(p)	(((char *) (p)) + SHM_OFF_HEAD + sizeof(FileSys))
241 
242 /* offset for PRSATmpCnt */
243 # define OFF_RSA_TMP_CNT(p) (((char *) (p)) + SHM_OFF_HEAD + sizeof(FileSys) + sizeof(int))
244 int	*PRSATmpCnt;
245 
246 /* offset for queue_shm */
247 # define OFF_QUEUE_SHM(p) (((char *) (p)) + SHM_OFF_HEAD + sizeof(FileSys) + sizeof(int) * 2)
248 
249 # define QSHM_ENTRIES(i)	QShm[i].qs_entries
250 
251 /* basic size of shared memory segment */
252 # define SM_T_SIZE	(SHM_OFF_HEAD + sizeof(FileSys) + sizeof(int) * 2)
253 
254 static unsigned int	hash_q __P((char *, unsigned int));
255 
256 /*
257 **  HASH_Q -- simple hash function
258 **
259 **	Parameters:
260 **		p -- string to hash.
261 **		h -- hash start value (from previous run).
262 **
263 **	Returns:
264 **		hash value.
265 */
266 
267 static unsigned int
268 hash_q(p, h)
269 	char *p;
270 	unsigned int h;
271 {
272 	int c, d;
273 
274 	while (*p != '\0')
275 	{
276 		d = *p++;
277 		c = d;
278 		c ^= c<<6;
279 		h += (c<<11) ^ (c>>1);
280 		h ^= (d<<14) + (d<<7) + (d<<4) + d;
281 	}
282 	return h;
283 }
284 
285 
286 #else /* SM_CONF_SHM */
287 # define FILE_SYS(i)	FileSys[i]
288 #endif /* SM_CONF_SHM */
289 
290 /* access to the various components of file system data */
291 #define FILE_SYS_NAME(i)	FSPath[i]
292 #define FILE_SYS_AVAIL(i)	FILE_SYS(i).fs_avail
293 #define FILE_SYS_BLKSIZE(i)	FILE_SYS(i).fs_blksize
294 #define FILE_SYS_DEV(i)	FILE_SYS(i).fs_dev
295 
296 
297 /*
298 **  Current qf file field assignments:
299 **
300 **	A	AUTH= parameter
301 **	B	body type
302 **	C	controlling user
303 **	D	data file name
304 **	d	data file directory name (added in 8.12)
305 **	E	error recipient
306 **	F	flag bits
307 **	G	free (was: queue delay algorithm if _FFR_QUEUEDELAY)
308 **	H	header
309 **	I	data file's inode number
310 **	K	time of last delivery attempt
311 **	L	Solaris Content-Length: header (obsolete)
312 **	M	message
313 **	N	number of delivery attempts
314 **	P	message priority
315 **	q	quarantine reason
316 **	Q	original recipient (ORCPT=)
317 **	r	final recipient (Final-Recipient: DSN field)
318 **	R	recipient
319 **	S	sender
320 **	T	init time
321 **	V	queue file version
322 **	X	free (was: character set if _FFR_SAVE_CHARSET)
323 **	Y	free (was: current delay if _FFR_QUEUEDELAY)
324 **	Z	original envelope id from ESMTP
325 **	!	deliver by (added in 8.12)
326 **	$	define macro
327 **	.	terminate file
328 */
329 
330 /*
331 **  QUEUEUP -- queue a message up for future transmission.
332 **
333 **	Parameters:
334 **		e -- the envelope to queue up.
335 **		announce -- if true, tell when you are queueing up.
336 **		msync -- if true, then fsync() if SuperSafe interactive mode.
337 **
338 **	Returns:
339 **		none.
340 **
341 **	Side Effects:
342 **		The current request is saved in a control file.
343 **		The queue file is left locked.
344 */
345 
346 void
347 queueup(e, announce, msync)
348 	register ENVELOPE *e;
349 	bool announce;
350 	bool msync;
351 {
352 	register SM_FILE_T *tfp;
353 	register HDR *h;
354 	register ADDRESS *q;
355 	int tfd = -1;
356 	int i;
357 	bool newid;
358 	register char *p;
359 	MAILER nullmailer;
360 	MCI mcibuf;
361 	char qf[MAXPATHLEN];
362 	char tf[MAXPATHLEN];
363 	char df[MAXPATHLEN];
364 	char buf[MAXLINE];
365 
366 	/*
367 	**  Create control file.
368 	*/
369 
370 #define OPEN_TF	do							\
371 		{							\
372 			MODE_T oldumask = 0;				\
373 									\
374 			if (bitset(S_IWGRP, QueueFileMode))		\
375 				oldumask = umask(002);			\
376 			tfd = open(tf, TF_OPEN_FLAGS, QueueFileMode);	\
377 			if (bitset(S_IWGRP, QueueFileMode))		\
378 				(void) umask(oldumask);			\
379 		} while (0)
380 
381 
382 	newid = (e->e_id == NULL) || !bitset(EF_INQUEUE, e->e_flags);
383 	(void) sm_strlcpy(tf, queuename(e, NEWQFL_LETTER), sizeof(tf));
384 	tfp = e->e_lockfp;
385 	if (tfp == NULL && newid)
386 	{
387 		/*
388 		**  open qf file directly: this will give an error if the file
389 		**  already exists and hence prevent problems if a queue-id
390 		**  is reused (e.g., because the clock is set back).
391 		*/
392 
393 		(void) sm_strlcpy(tf, queuename(e, ANYQFL_LETTER), sizeof(tf));
394 		OPEN_TF;
395 		if (tfd < 0 ||
396 #if !SM_OPEN_EXLOCK
397 		    !lockfile(tfd, tf, NULL, LOCK_EX|LOCK_NB) ||
398 #endif /* !SM_OPEN_EXLOCK */
399 		    (tfp = sm_io_open(SmFtStdiofd, SM_TIME_DEFAULT,
400 					 (void *) &tfd, SM_IO_WRONLY,
401 					 NULL)) == NULL)
402 		{
403 			int save_errno = errno;
404 
405 			printopenfds(true);
406 			errno = save_errno;
407 			syserr("!queueup: cannot create queue file %s, euid=%d, fd=%d, fp=%p",
408 				tf, (int) geteuid(), tfd, tfp);
409 			/* NOTREACHED */
410 		}
411 		e->e_lockfp = tfp;
412 		upd_qs(e, 1, 0, "queueup");
413 	}
414 
415 	/* if newid, write the queue file directly (instead of temp file) */
416 	if (!newid)
417 	{
418 		/* get a locked tf file */
419 		for (i = 0; i < 128; i++)
420 		{
421 			if (tfd < 0)
422 			{
423 				OPEN_TF;
424 				if (tfd < 0)
425 				{
426 					if (errno != EEXIST)
427 						break;
428 					if (LogLevel > 0 && (i % 32) == 0)
429 						sm_syslog(LOG_ALERT, e->e_id,
430 							  "queueup: cannot create %s, euid=%d: %s",
431 							  tf, (int) geteuid(),
432 							  sm_errstring(errno));
433 				}
434 #if SM_OPEN_EXLOCK
435 				else
436 					break;
437 #endif /* SM_OPEN_EXLOCK */
438 			}
439 			if (tfd >= 0)
440 			{
441 #if SM_OPEN_EXLOCK
442 				/* file is locked by open() */
443 				break;
444 #else /* SM_OPEN_EXLOCK */
445 				if (lockfile(tfd, tf, NULL, LOCK_EX|LOCK_NB))
446 					break;
447 				else
448 #endif /* SM_OPEN_EXLOCK */
449 				if (LogLevel > 0 && (i % 32) == 0)
450 					sm_syslog(LOG_ALERT, e->e_id,
451 						  "queueup: cannot lock %s: %s",
452 						  tf, sm_errstring(errno));
453 				if ((i % 32) == 31)
454 				{
455 					(void) close(tfd);
456 					tfd = -1;
457 				}
458 			}
459 
460 			if ((i % 32) == 31)
461 			{
462 				/* save the old temp file away */
463 				(void) rename(tf, queuename(e, TEMPQF_LETTER));
464 			}
465 			else
466 				(void) sleep(i % 32);
467 		}
468 		if (tfd < 0 || (tfp = sm_io_open(SmFtStdiofd, SM_TIME_DEFAULT,
469 						 (void *) &tfd, SM_IO_WRONLY_B,
470 						 NULL)) == NULL)
471 		{
472 			int save_errno = errno;
473 
474 			printopenfds(true);
475 			errno = save_errno;
476 			syserr("!queueup: cannot create queue temp file %s, uid=%d",
477 				tf, (int) geteuid());
478 		}
479 	}
480 
481 	if (tTd(40, 1))
482 		sm_dprintf("\n>>>>> queueing %s/%s%s >>>>>\n",
483 			   qid_printqueue(e->e_qgrp, e->e_qdir),
484 			   queuename(e, ANYQFL_LETTER),
485 			   newid ? " (new id)" : "");
486 	if (tTd(40, 3))
487 	{
488 		sm_dprintf("  e_flags=");
489 		printenvflags(e);
490 	}
491 	if (tTd(40, 32))
492 	{
493 		sm_dprintf("  sendq=");
494 		printaddr(sm_debug_file(), e->e_sendqueue, true);
495 	}
496 	if (tTd(40, 9))
497 	{
498 		sm_dprintf("  tfp=");
499 		dumpfd(sm_io_getinfo(tfp, SM_IO_WHAT_FD, NULL), true, false);
500 		sm_dprintf("  lockfp=");
501 		if (e->e_lockfp == NULL)
502 			sm_dprintf("NULL\n");
503 		else
504 			dumpfd(sm_io_getinfo(e->e_lockfp, SM_IO_WHAT_FD, NULL),
505 			       true, false);
506 	}
507 
508 	/*
509 	**  If there is no data file yet, create one.
510 	*/
511 
512 	(void) sm_strlcpy(df, queuename(e, DATAFL_LETTER), sizeof(df));
513 	if (bitset(EF_HAS_DF, e->e_flags))
514 	{
515 		if (e->e_dfp != NULL &&
516 		    SuperSafe != SAFE_REALLY &&
517 		    SuperSafe != SAFE_REALLY_POSTMILTER &&
518 		    sm_io_setinfo(e->e_dfp, SM_BF_COMMIT, NULL) < 0 &&
519 		    errno != EINVAL)
520 		{
521 			syserr("!queueup: cannot commit data file %s, uid=%d",
522 			       queuename(e, DATAFL_LETTER), (int) geteuid());
523 		}
524 		if (e->e_dfp != NULL &&
525 		    SuperSafe == SAFE_INTERACTIVE && msync)
526 		{
527 			if (tTd(40,32))
528 				sm_syslog(LOG_INFO, e->e_id,
529 					  "queueup: fsync(e->e_dfp)");
530 
531 			if (fsync(sm_io_getinfo(e->e_dfp, SM_IO_WHAT_FD,
532 						NULL)) < 0)
533 			{
534 				if (newid)
535 					syserr("!552 Error writing data file %s",
536 					       df);
537 				else
538 					syserr("!452 Error writing data file %s",
539 					       df);
540 			}
541 		}
542 	}
543 	else
544 	{
545 		int dfd;
546 		MODE_T oldumask = 0;
547 		register SM_FILE_T *dfp = NULL;
548 		struct stat stbuf;
549 
550 		if (e->e_dfp != NULL &&
551 		    sm_io_getinfo(e->e_dfp, SM_IO_WHAT_ISTYPE, BF_FILE_TYPE))
552 			syserr("committing over bf file");
553 
554 		if (bitset(S_IWGRP, QueueFileMode))
555 			oldumask = umask(002);
556 		dfd = open(df, O_WRONLY|O_CREAT|O_TRUNC|QF_O_EXTRA,
557 			   QueueFileMode);
558 		if (bitset(S_IWGRP, QueueFileMode))
559 			(void) umask(oldumask);
560 		if (dfd < 0 || (dfp = sm_io_open(SmFtStdiofd, SM_TIME_DEFAULT,
561 						 (void *) &dfd, SM_IO_WRONLY_B,
562 						 NULL)) == NULL)
563 			syserr("!queueup: cannot create data temp file %s, uid=%d",
564 				df, (int) geteuid());
565 		if (fstat(dfd, &stbuf) < 0)
566 			e->e_dfino = -1;
567 		else
568 		{
569 			e->e_dfdev = stbuf.st_dev;
570 			e->e_dfino = ST_INODE(stbuf);
571 		}
572 		e->e_flags |= EF_HAS_DF;
573 		memset(&mcibuf, '\0', sizeof(mcibuf));
574 		mcibuf.mci_out = dfp;
575 		mcibuf.mci_mailer = FileMailer;
576 		(*e->e_putbody)(&mcibuf, e, NULL);
577 
578 		if (SuperSafe == SAFE_REALLY ||
579 		    SuperSafe == SAFE_REALLY_POSTMILTER ||
580 		    (SuperSafe == SAFE_INTERACTIVE && msync))
581 		{
582 			if (tTd(40,32))
583 				sm_syslog(LOG_INFO, e->e_id,
584 					  "queueup: fsync(dfp)");
585 
586 			if (fsync(sm_io_getinfo(dfp, SM_IO_WHAT_FD, NULL)) < 0)
587 			{
588 				if (newid)
589 					syserr("!552 Error writing data file %s",
590 					       df);
591 				else
592 					syserr("!452 Error writing data file %s",
593 					       df);
594 			}
595 		}
596 
597 		if (sm_io_close(dfp, SM_TIME_DEFAULT) < 0)
598 			syserr("!queueup: cannot save data temp file %s, uid=%d",
599 				df, (int) geteuid());
600 		e->e_putbody = putbody;
601 	}
602 
603 	/*
604 	**  Output future work requests.
605 	**	Priority and creation time should be first, since
606 	**	they are required by gatherq.
607 	*/
608 
609 	/* output queue version number (must be first!) */
610 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "V%d\n", QF_VERSION);
611 
612 	/* output creation time */
613 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "T%ld\n", (long) e->e_ctime);
614 
615 	/* output last delivery time */
616 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "K%ld\n", (long) e->e_dtime);
617 
618 	/* output number of delivery attempts */
619 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "N%d\n", e->e_ntries);
620 
621 	/* output message priority */
622 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "P%ld\n", e->e_msgpriority);
623 
624 	/*
625 	**  If data file is in a different directory than the queue file,
626 	**  output a "d" record naming the directory of the data file.
627 	*/
628 
629 	if (e->e_dfqgrp != e->e_qgrp)
630 	{
631 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "d%s\n",
632 			Queue[e->e_dfqgrp]->qg_qpaths[e->e_dfqdir].qp_name);
633 	}
634 
635 	/* output inode number of data file */
636 	/* XXX should probably include device major/minor too */
637 	if (e->e_dfino != -1)
638 	{
639 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "I%ld/%ld/%llu\n",
640 				     (long) major(e->e_dfdev),
641 				     (long) minor(e->e_dfdev),
642 				     (ULONGLONG_T) e->e_dfino);
643 	}
644 
645 	/* output body type */
646 	if (e->e_bodytype != NULL)
647 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "B%s\n",
648 				     denlstring(e->e_bodytype, true, false));
649 
650 	/* quarantine reason */
651 	if (e->e_quarmsg != NULL)
652 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "q%s\n",
653 				     denlstring(e->e_quarmsg, true, false));
654 
655 	/* message from envelope, if it exists */
656 	if (e->e_message != NULL)
657 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "M%s\n",
658 				     denlstring(e->e_message, true, false));
659 
660 	/* send various flag bits through */
661 	p = buf;
662 	if (bitset(EF_WARNING, e->e_flags))
663 		*p++ = 'w';
664 	if (bitset(EF_RESPONSE, e->e_flags))
665 		*p++ = 'r';
666 	if (bitset(EF_HAS8BIT, e->e_flags))
667 		*p++ = '8';
668 	if (bitset(EF_DELETE_BCC, e->e_flags))
669 		*p++ = 'b';
670 	if (bitset(EF_RET_PARAM, e->e_flags))
671 		*p++ = 'd';
672 	if (bitset(EF_NO_BODY_RETN, e->e_flags))
673 		*p++ = 'n';
674 	if (bitset(EF_SPLIT, e->e_flags))
675 		*p++ = 's';
676 	*p++ = '\0';
677 	if (buf[0] != '\0')
678 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "F%s\n", buf);
679 
680 	/* save $={persistentMacros} macro values */
681 	queueup_macros(macid("{persistentMacros}"), tfp, e);
682 
683 	/* output name of sender */
684 	if (bitnset(M_UDBENVELOPE, e->e_from.q_mailer->m_flags))
685 		p = e->e_sender;
686 	else
687 		p = e->e_from.q_paddr;
688 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "S%s\n",
689 			     denlstring(p, true, false));
690 
691 	/* output ESMTP-supplied "original" information */
692 	if (e->e_envid != NULL)
693 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "Z%s\n",
694 				     denlstring(e->e_envid, true, false));
695 
696 	/* output AUTH= parameter */
697 	if (e->e_auth_param != NULL)
698 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "A%s\n",
699 				     denlstring(e->e_auth_param, true, false));
700 	if (e->e_dlvr_flag != 0)
701 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "!%c %ld\n",
702 				     (char) e->e_dlvr_flag, e->e_deliver_by);
703 
704 	/* output list of recipient addresses */
705 	printctladdr(NULL, NULL);
706 	for (q = e->e_sendqueue; q != NULL; q = q->q_next)
707 	{
708 		if (!QS_IS_UNDELIVERED(q->q_state))
709 			continue;
710 
711 		/* message for this recipient, if it exists */
712 		if (q->q_message != NULL)
713 			(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "M%s\n",
714 					     denlstring(q->q_message, true,
715 							false));
716 
717 		printctladdr(q, tfp);
718 		if (q->q_orcpt != NULL)
719 			(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "Q%s\n",
720 					     denlstring(q->q_orcpt, true,
721 							false));
722 		if (q->q_finalrcpt != NULL)
723 			(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "r%s\n",
724 					     denlstring(q->q_finalrcpt, true,
725 							false));
726 		(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'R');
727 		if (bitset(QPRIMARY, q->q_flags))
728 			(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'P');
729 		if (bitset(QHASNOTIFY, q->q_flags))
730 			(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'N');
731 		if (bitset(QPINGONSUCCESS, q->q_flags))
732 			(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'S');
733 		if (bitset(QPINGONFAILURE, q->q_flags))
734 			(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'F');
735 		if (bitset(QPINGONDELAY, q->q_flags))
736 			(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'D');
737 		if (q->q_alias != NULL &&
738 		    bitset(QALIAS, q->q_alias->q_flags))
739 			(void) sm_io_putc(tfp, SM_TIME_DEFAULT, 'A');
740 		(void) sm_io_putc(tfp, SM_TIME_DEFAULT, ':');
741 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "%s\n",
742 				     denlstring(q->q_paddr, true, false));
743 		if (announce)
744 		{
745 			char *tag = "queued";
746 
747 			if (e->e_quarmsg != NULL)
748 				tag = "quarantined";
749 
750 			e->e_to = q->q_paddr;
751 			message(tag);
752 			if (LogLevel > 8)
753 				logdelivery(q->q_mailer, NULL, q->q_status,
754 					    tag, NULL, (time_t) 0, e);
755 			e->e_to = NULL;
756 		}
757 		if (tTd(40, 1))
758 		{
759 			sm_dprintf("queueing ");
760 			printaddr(sm_debug_file(), q, false);
761 		}
762 	}
763 
764 	/*
765 	**  Output headers for this message.
766 	**	Expand macros completely here.  Queue run will deal with
767 	**	everything as absolute headers.
768 	**		All headers that must be relative to the recipient
769 	**		can be cracked later.
770 	**	We set up a "null mailer" -- i.e., a mailer that will have
771 	**	no effect on the addresses as they are output.
772 	*/
773 
774 	memset((char *) &nullmailer, '\0', sizeof(nullmailer));
775 	nullmailer.m_re_rwset = nullmailer.m_rh_rwset =
776 			nullmailer.m_se_rwset = nullmailer.m_sh_rwset = -1;
777 	nullmailer.m_eol = "\n";
778 	memset(&mcibuf, '\0', sizeof(mcibuf));
779 	mcibuf.mci_mailer = &nullmailer;
780 	mcibuf.mci_out = tfp;
781 
782 	macdefine(&e->e_macro, A_PERM, 'g', "\201f");
783 	for (h = e->e_header; h != NULL; h = h->h_link)
784 	{
785 		if (h->h_value == NULL)
786 			continue;
787 
788 		/* don't output resent headers on non-resent messages */
789 		if (bitset(H_RESENT, h->h_flags) &&
790 		    !bitset(EF_RESENT, e->e_flags))
791 			continue;
792 
793 		/* expand macros; if null, don't output header at all */
794 		if (bitset(H_DEFAULT, h->h_flags))
795 		{
796 			(void) expand(h->h_value, buf, sizeof(buf), e);
797 			if (buf[0] == '\0')
798 				continue;
799 			if (buf[0] == ' ' && buf[1] == '\0')
800 				continue;
801 		}
802 
803 		/* output this header */
804 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "H?");
805 
806 		/* output conditional macro if present */
807 		if (h->h_macro != '\0')
808 		{
809 			if (bitset(0200, h->h_macro))
810 				(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT,
811 						     "${%s}",
812 						      macname(bitidx(h->h_macro)));
813 			else
814 				(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT,
815 						     "$%c", h->h_macro);
816 		}
817 		else if (!bitzerop(h->h_mflags) &&
818 			 bitset(H_CHECK|H_ACHECK, h->h_flags))
819 		{
820 			int j;
821 
822 			/* if conditional, output the set of conditions */
823 			for (j = '\0'; j <= '\177'; j++)
824 				if (bitnset(j, h->h_mflags))
825 					(void) sm_io_putc(tfp, SM_TIME_DEFAULT,
826 							  j);
827 		}
828 		(void) sm_io_putc(tfp, SM_TIME_DEFAULT, '?');
829 
830 		/* output the header: expand macros, convert addresses */
831 		if (bitset(H_DEFAULT, h->h_flags) &&
832 		    !bitset(H_BINDLATE, h->h_flags))
833 		{
834 			(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "%s:%s\n",
835 					     h->h_field,
836 					     denlstring(buf, false, true));
837 		}
838 		else if (bitset(H_FROM|H_RCPT, h->h_flags) &&
839 			 !bitset(H_BINDLATE, h->h_flags))
840 		{
841 			bool oldstyle = bitset(EF_OLDSTYLE, e->e_flags);
842 			SM_FILE_T *savetrace = TrafficLogFile;
843 
844 			TrafficLogFile = NULL;
845 
846 			if (bitset(H_FROM, h->h_flags))
847 				oldstyle = false;
848 			commaize(h, h->h_value, oldstyle, &mcibuf, e,
849 				 PXLF_HEADER);
850 
851 			TrafficLogFile = savetrace;
852 		}
853 		else
854 		{
855 			(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "%s:%s\n",
856 					     h->h_field,
857 					     denlstring(h->h_value, false,
858 							true));
859 		}
860 	}
861 
862 	/*
863 	**  Clean up.
864 	**
865 	**	Write a terminator record -- this is to prevent
866 	**	scurrilous crackers from appending any data.
867 	*/
868 
869 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, ".\n");
870 
871 	if (sm_io_flush(tfp, SM_TIME_DEFAULT) != 0 ||
872 	    ((SuperSafe == SAFE_REALLY ||
873 	      SuperSafe == SAFE_REALLY_POSTMILTER ||
874 	      (SuperSafe == SAFE_INTERACTIVE && msync)) &&
875 	     fsync(sm_io_getinfo(tfp, SM_IO_WHAT_FD, NULL)) < 0) ||
876 	    sm_io_error(tfp))
877 	{
878 		if (newid)
879 			syserr("!552 Error writing control file %s", tf);
880 		else
881 			syserr("!452 Error writing control file %s", tf);
882 	}
883 
884 	if (!newid)
885 	{
886 		char new = queue_letter(e, ANYQFL_LETTER);
887 
888 		/* rename (locked) tf to be (locked) [qh]f */
889 		(void) sm_strlcpy(qf, queuename(e, ANYQFL_LETTER),
890 				  sizeof(qf));
891 		if (rename(tf, qf) < 0)
892 			syserr("cannot rename(%s, %s), uid=%d",
893 				tf, qf, (int) geteuid());
894 		else
895 		{
896 			/*
897 			**  Check if type has changed and only
898 			**  remove the old item if the rename above
899 			**  succeeded.
900 			*/
901 
902 			if (e->e_qfletter != '\0' &&
903 			    e->e_qfletter != new)
904 			{
905 				if (tTd(40, 5))
906 				{
907 					sm_dprintf("type changed from %c to %c\n",
908 						   e->e_qfletter, new);
909 				}
910 
911 				if (unlink(queuename(e, e->e_qfletter)) < 0)
912 				{
913 					/* XXX: something more drastic? */
914 					if (LogLevel > 0)
915 						sm_syslog(LOG_ERR, e->e_id,
916 							  "queueup: unlink(%s) failed: %s",
917 							  queuename(e, e->e_qfletter),
918 							  sm_errstring(errno));
919 				}
920 			}
921 		}
922 		e->e_qfletter = new;
923 
924 		/*
925 		**  fsync() after renaming to make sure metadata is
926 		**  written to disk on filesystems in which renames are
927 		**  not guaranteed.
928 		*/
929 
930 		if (SuperSafe != SAFE_NO)
931 		{
932 			/* for softupdates */
933 			if (tfd >= 0 && fsync(tfd) < 0)
934 			{
935 				syserr("!queueup: cannot fsync queue temp file %s",
936 				       tf);
937 			}
938 			SYNC_DIR(qf, true);
939 		}
940 
941 		/* close and unlock old (locked) queue file */
942 		if (e->e_lockfp != NULL)
943 			(void) sm_io_close(e->e_lockfp, SM_TIME_DEFAULT);
944 		e->e_lockfp = tfp;
945 
946 		/* save log info */
947 		if (LogLevel > 79)
948 			sm_syslog(LOG_DEBUG, e->e_id, "queueup %s", qf);
949 	}
950 	else
951 	{
952 		/* save log info */
953 		if (LogLevel > 79)
954 			sm_syslog(LOG_DEBUG, e->e_id, "queueup %s", tf);
955 
956 		e->e_qfletter = queue_letter(e, ANYQFL_LETTER);
957 	}
958 
959 	errno = 0;
960 	e->e_flags |= EF_INQUEUE;
961 
962 	if (tTd(40, 1))
963 		sm_dprintf("<<<<< done queueing %s <<<<<\n\n", e->e_id);
964 	return;
965 }
966 
967 /*
968 **  PRINTCTLADDR -- print control address to file.
969 **
970 **	Parameters:
971 **		a -- address.
972 **		tfp -- file pointer.
973 **
974 **	Returns:
975 **		none.
976 **
977 **	Side Effects:
978 **		The control address (if changed) is printed to the file.
979 **		The last control address and uid are saved.
980 */
981 
982 static void
983 printctladdr(a, tfp)
984 	register ADDRESS *a;
985 	SM_FILE_T *tfp;
986 {
987 	char *user;
988 	register ADDRESS *q;
989 	uid_t uid;
990 	gid_t gid;
991 	static ADDRESS *lastctladdr = NULL;
992 	static uid_t lastuid;
993 
994 	/* initialization */
995 	if (a == NULL || a->q_alias == NULL || tfp == NULL)
996 	{
997 		if (lastctladdr != NULL && tfp != NULL)
998 			(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "C\n");
999 		lastctladdr = NULL;
1000 		lastuid = 0;
1001 		return;
1002 	}
1003 
1004 	/* find the active uid */
1005 	q = getctladdr(a);
1006 	if (q == NULL)
1007 	{
1008 		user = NULL;
1009 		uid = 0;
1010 		gid = 0;
1011 	}
1012 	else
1013 	{
1014 		user = q->q_ruser != NULL ? q->q_ruser : q->q_user;
1015 		uid = q->q_uid;
1016 		gid = q->q_gid;
1017 	}
1018 	a = a->q_alias;
1019 
1020 	/* check to see if this is the same as last time */
1021 	if (lastctladdr != NULL && uid == lastuid &&
1022 	    strcmp(lastctladdr->q_paddr, a->q_paddr) == 0)
1023 		return;
1024 	lastuid = uid;
1025 	lastctladdr = a;
1026 
1027 	if (uid == 0 || user == NULL || user[0] == '\0')
1028 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "C");
1029 	else
1030 		(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, "C%s:%ld:%ld",
1031 				     denlstring(user, true, false), (long) uid,
1032 				     (long) gid);
1033 	(void) sm_io_fprintf(tfp, SM_TIME_DEFAULT, ":%s\n",
1034 			     denlstring(a->q_paddr, true, false));
1035 }
1036 
1037 /*
1038 **  RUNNERS_SIGTERM -- propagate a SIGTERM to queue runner process
1039 **
1040 **	This propagates the signal to the child processes that are queue
1041 **	runners. This is for a queue runner "cleanup". After all of the
1042 **	child queue runner processes are signaled (it should be SIGTERM
1043 **	being the sig) then the old signal handler (Oldsh) is called
1044 **	to handle any cleanup set for this process (provided it is not
1045 **	SIG_DFL or SIG_IGN). The signal may not be handled immediately
1046 **	if the BlockOldsh flag is set. If the current process doesn't
1047 **	have a parent then handle the signal immediately, regardless of
1048 **	BlockOldsh.
1049 **
1050 **	Parameters:
1051 **		sig -- the signal number being sent
1052 **
1053 **	Returns:
1054 **		none.
1055 **
1056 **	Side Effects:
1057 **		Sets the NoMoreRunners boolean to true to stop more runners
1058 **		from being started in runqueue().
1059 **
1060 **	NOTE:	THIS CAN BE CALLED FROM A SIGNAL HANDLER.  DO NOT ADD
1061 **		ANYTHING TO THIS ROUTINE UNLESS YOU KNOW WHAT YOU ARE
1062 **		DOING.
1063 */
1064 
1065 static bool		volatile NoMoreRunners = false;
1066 static sigfunc_t	Oldsh_term = SIG_DFL;
1067 static sigfunc_t	Oldsh_hup = SIG_DFL;
1068 static sigfunc_t	volatile Oldsh = SIG_DFL;
1069 static bool		BlockOldsh = false;
1070 static int		volatile Oldsig = 0;
1071 static SIGFUNC_DECL	runners_sigterm __P((int));
1072 static SIGFUNC_DECL	runners_sighup __P((int));
1073 
1074 static SIGFUNC_DECL
1075 runners_sigterm(sig)
1076 	int sig;
1077 {
1078 	int save_errno = errno;
1079 
1080 	FIX_SYSV_SIGNAL(sig, runners_sigterm);
1081 	errno = save_errno;
1082 	CHECK_CRITICAL(sig);
1083 	NoMoreRunners = true;
1084 	Oldsh = Oldsh_term;
1085 	Oldsig = sig;
1086 	proc_list_signal(PROC_QUEUE, sig);
1087 
1088 	if (!BlockOldsh || getppid() <= 1)
1089 	{
1090 		/* Check that a valid 'old signal handler' is callable */
1091 		if (Oldsh_term != SIG_DFL && Oldsh_term != SIG_IGN &&
1092 		    Oldsh_term != runners_sigterm)
1093 			(*Oldsh_term)(sig);
1094 	}
1095 	errno = save_errno;
1096 	return SIGFUNC_RETURN;
1097 }
1098 /*
1099 **  RUNNERS_SIGHUP -- propagate a SIGHUP to queue runner process
1100 **
1101 **	This propagates the signal to the child processes that are queue
1102 **	runners. This is for a queue runner "cleanup". After all of the
1103 **	child queue runner processes are signaled (it should be SIGHUP
1104 **	being the sig) then the old signal handler (Oldsh) is called to
1105 **	handle any cleanup set for this process (provided it is not SIG_DFL
1106 **	or SIG_IGN). The signal may not be handled immediately if the
1107 **	BlockOldsh flag is set. If the current process doesn't have
1108 **	a parent then handle the signal immediately, regardless of
1109 **	BlockOldsh.
1110 **
1111 **	Parameters:
1112 **		sig -- the signal number being sent
1113 **
1114 **	Returns:
1115 **		none.
1116 **
1117 **	Side Effects:
1118 **		Sets the NoMoreRunners boolean to true to stop more runners
1119 **		from being started in runqueue().
1120 **
1121 **	NOTE:	THIS CAN BE CALLED FROM A SIGNAL HANDLER.  DO NOT ADD
1122 **		ANYTHING TO THIS ROUTINE UNLESS YOU KNOW WHAT YOU ARE
1123 **		DOING.
1124 */
1125 
1126 static SIGFUNC_DECL
1127 runners_sighup(sig)
1128 	int sig;
1129 {
1130 	int save_errno = errno;
1131 
1132 	FIX_SYSV_SIGNAL(sig, runners_sighup);
1133 	errno = save_errno;
1134 	CHECK_CRITICAL(sig);
1135 	NoMoreRunners = true;
1136 	Oldsh = Oldsh_hup;
1137 	Oldsig = sig;
1138 	proc_list_signal(PROC_QUEUE, sig);
1139 
1140 	if (!BlockOldsh || getppid() <= 1)
1141 	{
1142 		/* Check that a valid 'old signal handler' is callable */
1143 		if (Oldsh_hup != SIG_DFL && Oldsh_hup != SIG_IGN &&
1144 		    Oldsh_hup != runners_sighup)
1145 			(*Oldsh_hup)(sig);
1146 	}
1147 	errno = save_errno;
1148 	return SIGFUNC_RETURN;
1149 }
1150 /*
1151 **  MARK_WORK_GROUP_RESTART -- mark a work group as needing a restart
1152 **
1153 **  Sets a workgroup for restarting.
1154 **
1155 **	Parameters:
1156 **		wgrp -- the work group id to restart.
1157 **		reason -- why (signal?), -1 to turn off restart
1158 **
1159 **	Returns:
1160 **		none.
1161 **
1162 **	Side effects:
1163 **		May set global RestartWorkGroup to true.
1164 **
1165 **	NOTE:	THIS CAN BE CALLED FROM A SIGNAL HANDLER.  DO NOT ADD
1166 **		ANYTHING TO THIS ROUTINE UNLESS YOU KNOW WHAT YOU ARE
1167 **		DOING.
1168 */
1169 
1170 void
1171 mark_work_group_restart(wgrp, reason)
1172 	int wgrp;
1173 	int reason;
1174 {
1175 	if (wgrp < 0 || wgrp > NumWorkGroups)
1176 		return;
1177 
1178 	WorkGrp[wgrp].wg_restart = reason;
1179 	if (reason >= 0)
1180 		RestartWorkGroup = true;
1181 }
1182 /*
1183 **  RESTART_MARKED_WORK_GROUPS -- restart work groups marked as needing restart
1184 **
1185 **  Restart any workgroup marked as needing a restart provided more
1186 **  runners are allowed.
1187 **
1188 **	Parameters:
1189 **		none.
1190 **
1191 **	Returns:
1192 **		none.
1193 **
1194 **	Side effects:
1195 **		Sets global RestartWorkGroup to false.
1196 */
1197 
1198 void
1199 restart_marked_work_groups()
1200 {
1201 	int i;
1202 	int wasblocked;
1203 
1204 	if (NoMoreRunners)
1205 		return;
1206 
1207 	/* Block SIGCHLD so reapchild() doesn't mess with us */
1208 	wasblocked = sm_blocksignal(SIGCHLD);
1209 
1210 	for (i = 0; i < NumWorkGroups; i++)
1211 	{
1212 		if (WorkGrp[i].wg_restart >= 0)
1213 		{
1214 			if (LogLevel > 8)
1215 				sm_syslog(LOG_ERR, NOQID,
1216 					  "restart queue runner=%d due to signal 0x%x",
1217 					  i, WorkGrp[i].wg_restart);
1218 			restart_work_group(i);
1219 		}
1220 	}
1221 	RestartWorkGroup = false;
1222 
1223 	if (wasblocked == 0)
1224 		(void) sm_releasesignal(SIGCHLD);
1225 }
1226 /*
1227 **  RESTART_WORK_GROUP -- restart a specific work group
1228 **
1229 **  Restart a specific workgroup provided more runners are allowed.
1230 **  If the requested work group has been restarted too many times log
1231 **  this and refuse to restart.
1232 **
1233 **	Parameters:
1234 **		wgrp -- the work group id to restart
1235 **
1236 **	Returns:
1237 **		none.
1238 **
1239 **	Side Effects:
1240 **		starts another process doing the work of wgrp
1241 */
1242 
1243 #define MAX_PERSIST_RESTART	10	/* max allowed number of restarts */
1244 
1245 static void
1246 restart_work_group(wgrp)
1247 	int wgrp;
1248 {
1249 	if (NoMoreRunners ||
1250 	    wgrp < 0 || wgrp > NumWorkGroups)
1251 		return;
1252 
1253 	WorkGrp[wgrp].wg_restart = -1;
1254 	if (WorkGrp[wgrp].wg_restartcnt < MAX_PERSIST_RESTART)
1255 	{
1256 		/* avoid overflow; increment here */
1257 		WorkGrp[wgrp].wg_restartcnt++;
1258 		(void) run_work_group(wgrp, RWG_FORK|RWG_PERSISTENT|RWG_RUNALL);
1259 	}
1260 	else
1261 	{
1262 		sm_syslog(LOG_ERR, NOQID,
1263 			  "ERROR: persistent queue runner=%d restarted too many times, queue runner lost",
1264 			  wgrp);
1265 	}
1266 }
1267 /*
1268 **  SCHEDULE_QUEUE_RUNS -- schedule the next queue run for a work group.
1269 **
1270 **	Parameters:
1271 **		runall -- schedule even if individual bit is not set.
1272 **		wgrp -- the work group id to schedule.
1273 **		didit -- the queue run was performed for this work group.
1274 **
1275 **	Returns:
1276 **		nothing
1277 */
1278 
1279 #define INCR_MOD(v, m)	if (++v >= m)	\
1280 				v = 0;	\
1281 			else
1282 
1283 static void
1284 schedule_queue_runs(runall, wgrp, didit)
1285 	bool runall;
1286 	int wgrp;
1287 	bool didit;
1288 {
1289 	int qgrp, cgrp, endgrp;
1290 #if _FFR_QUEUE_SCHED_DBG
1291 	time_t lastsched;
1292 	bool sched;
1293 #endif /* _FFR_QUEUE_SCHED_DBG */
1294 	time_t now;
1295 	time_t minqintvl;
1296 
1297 	/*
1298 	**  This is a bit ugly since we have to duplicate the
1299 	**  code that "walks" through a work queue group.
1300 	*/
1301 
1302 	now = curtime();
1303 	minqintvl = 0;
1304 	cgrp = endgrp = WorkGrp[wgrp].wg_curqgrp;
1305 	do
1306 	{
1307 		time_t qintvl;
1308 
1309 #if _FFR_QUEUE_SCHED_DBG
1310 		lastsched = 0;
1311 		sched = false;
1312 #endif /* _FFR_QUEUE_SCHED_DBG */
1313 		qgrp = WorkGrp[wgrp].wg_qgs[cgrp]->qg_index;
1314 		if (Queue[qgrp]->qg_queueintvl > 0)
1315 			qintvl = Queue[qgrp]->qg_queueintvl;
1316 		else if (QueueIntvl > 0)
1317 			qintvl = QueueIntvl;
1318 		else
1319 			qintvl = (time_t) 0;
1320 #if _FFR_QUEUE_SCHED_DBG
1321 		lastsched = Queue[qgrp]->qg_nextrun;
1322 #endif /* _FFR_QUEUE_SCHED_DBG */
1323 		if ((runall || Queue[qgrp]->qg_nextrun <= now) && qintvl > 0)
1324 		{
1325 #if _FFR_QUEUE_SCHED_DBG
1326 			sched = true;
1327 #endif /* _FFR_QUEUE_SCHED_DBG */
1328 			if (minqintvl == 0 || qintvl < minqintvl)
1329 				minqintvl = qintvl;
1330 
1331 			/*
1332 			**  Only set a new time if a queue run was performed
1333 			**  for this queue group.  If the queue was not run,
1334 			**  we could starve it by setting a new time on each
1335 			**  call.
1336 			*/
1337 
1338 			if (didit)
1339 				Queue[qgrp]->qg_nextrun += qintvl;
1340 		}
1341 #if _FFR_QUEUE_SCHED_DBG
1342 		if (tTd(69, 10))
1343 			sm_syslog(LOG_INFO, NOQID,
1344 				"sqr: wgrp=%d, cgrp=%d, qgrp=%d, intvl=%ld, QI=%ld, runall=%d, lastrun=%ld, nextrun=%ld, sched=%d",
1345 				wgrp, cgrp, qgrp, Queue[qgrp]->qg_queueintvl,
1346 				QueueIntvl, runall, lastsched,
1347 				Queue[qgrp]->qg_nextrun, sched);
1348 #endif /* _FFR_QUEUE_SCHED_DBG */
1349 		INCR_MOD(cgrp, WorkGrp[wgrp].wg_numqgrp);
1350 	} while (endgrp != cgrp);
1351 	if (minqintvl > 0)
1352 		(void) sm_setevent(minqintvl, runqueueevent, 0);
1353 }
1354 
1355 #if _FFR_QUEUE_RUN_PARANOIA
1356 /*
1357 **  CHECKQUEUERUNNER -- check whether a queue group hasn't been run.
1358 **
1359 **	Use this if events may get lost and hence queue runners may not
1360 **	be started and mail will pile up in a queue.
1361 **
1362 **	Parameters:
1363 **		none.
1364 **
1365 **	Returns:
1366 **		true if a queue run is necessary.
1367 **
1368 **	Side Effects:
1369 **		may schedule a queue run.
1370 */
1371 
1372 bool
1373 checkqueuerunner()
1374 {
1375 	int qgrp;
1376 	time_t now, minqintvl;
1377 
1378 	now = curtime();
1379 	minqintvl = 0;
1380 	for (qgrp = 0; qgrp < NumQueue && Queue[qgrp] != NULL; qgrp++)
1381 	{
1382 		time_t qintvl;
1383 
1384 		if (Queue[qgrp]->qg_queueintvl > 0)
1385 			qintvl = Queue[qgrp]->qg_queueintvl;
1386 		else if (QueueIntvl > 0)
1387 			qintvl = QueueIntvl;
1388 		else
1389 			qintvl = (time_t) 0;
1390 		if (Queue[qgrp]->qg_nextrun <= now - qintvl)
1391 		{
1392 			if (minqintvl == 0 || qintvl < minqintvl)
1393 				minqintvl = qintvl;
1394 			if (LogLevel > 1)
1395 				sm_syslog(LOG_WARNING, NOQID,
1396 					"checkqueuerunner: queue %d should have been run at %s, queue interval %ld",
1397 					qgrp,
1398 					arpadate(ctime(&Queue[qgrp]->qg_nextrun)),
1399 					qintvl);
1400 		}
1401 	}
1402 	if (minqintvl > 0)
1403 	{
1404 		(void) sm_setevent(minqintvl, runqueueevent, 0);
1405 		return true;
1406 	}
1407 	return false;
1408 }
1409 #endif /* _FFR_QUEUE_RUN_PARANOIA */
1410 
1411 /*
1412 **  RUNQUEUE -- run the jobs in the queue.
1413 **
1414 **	Gets the stuff out of the queue in some presumably logical
1415 **	order and processes them.
1416 **
1417 **	Parameters:
1418 **		forkflag -- true if the queue scanning should be done in
1419 **			a child process.  We double-fork so it is not our
1420 **			child and we don't have to clean up after it.
1421 **			false can be ignored if we have multiple queues.
1422 **		verbose -- if true, print out status information.
1423 **		persistent -- persistent queue runner?
1424 **		runall -- run all groups or only a subset (DoQueueRun)?
1425 **
1426 **	Returns:
1427 **		true if the queue run successfully began.
1428 **
1429 **	Side Effects:
1430 **		runs things in the mail queue using run_work_group().
1431 **		maybe schedules next queue run.
1432 */
1433 
1434 static ENVELOPE	QueueEnvelope;		/* the queue run envelope */
1435 static time_t	LastQueueTime = 0;	/* last time a queue ID assigned */
1436 static pid_t	LastQueuePid = -1;	/* last PID which had a queue ID */
1437 
1438 /* values for qp_supdirs */
1439 #define QP_NOSUB	0x0000	/* No subdirectories */
1440 #define QP_SUBDF	0x0001	/* "df" subdirectory */
1441 #define QP_SUBQF	0x0002	/* "qf" subdirectory */
1442 #define QP_SUBXF	0x0004	/* "xf" subdirectory */
1443 
1444 bool
1445 runqueue(forkflag, verbose, persistent, runall)
1446 	bool forkflag;
1447 	bool verbose;
1448 	bool persistent;
1449 	bool runall;
1450 {
1451 	int i;
1452 	bool ret = true;
1453 	static int curnum = 0;
1454 	sigfunc_t cursh;
1455 #if SM_HEAP_CHECK
1456 	SM_NONVOLATILE int oldgroup = 0;
1457 
1458 	if (sm_debug_active(&DebugLeakQ, 1))
1459 	{
1460 		oldgroup = sm_heap_group();
1461 		sm_heap_newgroup();
1462 		sm_dprintf("runqueue() heap group #%d\n", sm_heap_group());
1463 	}
1464 #endif /* SM_HEAP_CHECK */
1465 
1466 	/* queue run has been started, don't do any more this time */
1467 	DoQueueRun = false;
1468 
1469 	/* more than one queue or more than one directory per queue */
1470 	if (!forkflag && !verbose &&
1471 	    (WorkGrp[0].wg_qgs[0]->qg_numqueues > 1 || NumWorkGroups > 1 ||
1472 	     WorkGrp[0].wg_numqgrp > 1))
1473 		forkflag = true;
1474 
1475 	/*
1476 	**  For controlling queue runners via signals sent to this process.
1477 	**  Oldsh* will get called too by runners_sig* (if it is not SIG_IGN
1478 	**  or SIG_DFL) to preserve cleanup behavior. Now that this process
1479 	**  will have children (and perhaps grandchildren) this handler will
1480 	**  be left in place. This is because this process, once it has
1481 	**  finished spinning off queue runners, may go back to doing something
1482 	**  else (like being a daemon). And we still want on a SIG{TERM,HUP} to
1483 	**  clean up the child queue runners. Only install 'runners_sig*' once
1484 	**  else we'll get stuck looping forever.
1485 	*/
1486 
1487 	cursh = sm_signal(SIGTERM, runners_sigterm);
1488 	if (cursh != runners_sigterm)
1489 		Oldsh_term = cursh;
1490 	cursh = sm_signal(SIGHUP, runners_sighup);
1491 	if (cursh != runners_sighup)
1492 		Oldsh_hup = cursh;
1493 
1494 	for (i = 0; i < NumWorkGroups && !NoMoreRunners; i++)
1495 	{
1496 		int rwgflags = RWG_NONE;
1497 
1498 		/*
1499 		**  If MaxQueueChildren active then test whether the start
1500 		**  of the next queue group's additional queue runners (maximum)
1501 		**  will result in MaxQueueChildren being exceeded.
1502 		**
1503 		**  Note: do not use continue; even though another workgroup
1504 		**	may have fewer queue runners, this would be "unfair",
1505 		**	i.e., this work group might "starve" then.
1506 		*/
1507 
1508 #if _FFR_QUEUE_SCHED_DBG
1509 		if (tTd(69, 10))
1510 			sm_syslog(LOG_INFO, NOQID,
1511 				"rq: curnum=%d, MaxQueueChildren=%d, CurRunners=%d, WorkGrp[curnum].wg_maxact=%d",
1512 				curnum, MaxQueueChildren, CurRunners,
1513 				WorkGrp[curnum].wg_maxact);
1514 #endif /* _FFR_QUEUE_SCHED_DBG */
1515 		if (MaxQueueChildren > 0 &&
1516 		    CurRunners + WorkGrp[curnum].wg_maxact > MaxQueueChildren)
1517 			break;
1518 
1519 		/*
1520 		**  Pick up where we left off (curnum), in case we
1521 		**  used up all the children last time without finishing.
1522 		**  This give a round-robin fairness to queue runs.
1523 		**
1524 		**  Increment CurRunners before calling run_work_group()
1525 		**  to avoid a "race condition" with proc_list_drop() which
1526 		**  decrements CurRunners if the queue runners terminate.
1527 		**  Notice: CurRunners is an upper limit, in some cases
1528 		**  (too few jobs in the queue) this value is larger than
1529 		**  the actual number of queue runners. The discrepancy can
1530 		**  increase if some queue runners "hang" for a long time.
1531 		*/
1532 
1533 		CurRunners += WorkGrp[curnum].wg_maxact;
1534 		if (forkflag)
1535 			rwgflags |= RWG_FORK;
1536 		if (verbose)
1537 			rwgflags |= RWG_VERBOSE;
1538 		if (persistent)
1539 			rwgflags |= RWG_PERSISTENT;
1540 		if (runall)
1541 			rwgflags |= RWG_RUNALL;
1542 		ret = run_work_group(curnum, rwgflags);
1543 
1544 		/*
1545 		**  Failure means a message was printed for ETRN
1546 		**  and subsequent queues are likely to fail as well.
1547 		**  Decrement CurRunners in that case because
1548 		**  none have been started.
1549 		*/
1550 
1551 		if (!ret)
1552 		{
1553 			CurRunners -= WorkGrp[curnum].wg_maxact;
1554 			break;
1555 		}
1556 
1557 		if (!persistent)
1558 			schedule_queue_runs(runall, curnum, true);
1559 		INCR_MOD(curnum, NumWorkGroups);
1560 	}
1561 
1562 	/* schedule left over queue runs */
1563 	if (i < NumWorkGroups && !NoMoreRunners && !persistent)
1564 	{
1565 		int h;
1566 
1567 		for (h = curnum; i < NumWorkGroups; i++)
1568 		{
1569 			schedule_queue_runs(runall, h, false);
1570 			INCR_MOD(h, NumWorkGroups);
1571 		}
1572 	}
1573 
1574 
1575 #if SM_HEAP_CHECK
1576 	if (sm_debug_active(&DebugLeakQ, 1))
1577 		sm_heap_setgroup(oldgroup);
1578 #endif /* SM_HEAP_CHECK */
1579 	return ret;
1580 }
1581 
1582 #if _FFR_SKIP_DOMAINS
1583 /*
1584 **  SKIP_DOMAINS -- Skip 'skip' number of domains in the WorkQ.
1585 **
1586 **  Added by Stephen Frost <sfrost@snowman.net> to support
1587 **  having each runner process every N'th domain instead of
1588 **  every N'th message.
1589 **
1590 **	Parameters:
1591 **		skip -- number of domains in WorkQ to skip.
1592 **
1593 **	Returns:
1594 **		total number of messages skipped.
1595 **
1596 **	Side Effects:
1597 **		may change WorkQ
1598 */
1599 
1600 static int
1601 skip_domains(skip)
1602 	int skip;
1603 {
1604 	int n, seqjump;
1605 
1606 	for (n = 0, seqjump = 0; n < skip && WorkQ != NULL; seqjump++)
1607 	{
1608 		if (WorkQ->w_next != NULL)
1609 		{
1610 			if (WorkQ->w_host != NULL &&
1611 			    WorkQ->w_next->w_host != NULL)
1612 			{
1613 				if (sm_strcasecmp(WorkQ->w_host,
1614 						WorkQ->w_next->w_host) != 0)
1615 					n++;
1616 			}
1617 			else
1618 			{
1619 				if ((WorkQ->w_host != NULL &&
1620 				     WorkQ->w_next->w_host == NULL) ||
1621 				    (WorkQ->w_host == NULL &&
1622 				     WorkQ->w_next->w_host != NULL))
1623 					     n++;
1624 			}
1625 		}
1626 		WorkQ = WorkQ->w_next;
1627 	}
1628 	return seqjump;
1629 }
1630 #endif /* _FFR_SKIP_DOMAINS */
1631 
1632 /*
1633 **  RUNNER_WORK -- have a queue runner do its work
1634 **
1635 **  Have a queue runner do its work a list of entries.
1636 **  When work isn't directly being done then this process can take a signal
1637 **  and terminate immediately (in a clean fashion of course).
1638 **  When work is directly being done, it's not to be interrupted
1639 **  immediately: the work should be allowed to finish at a clean point
1640 **  before termination (in a clean fashion of course).
1641 **
1642 **	Parameters:
1643 **		e -- envelope.
1644 **		sequenceno -- 'th process to run WorkQ.
1645 **		didfork -- did the calling process fork()?
1646 **		skip -- process only each skip'th item.
1647 **		njobs -- number of jobs in WorkQ.
1648 **
1649 **	Returns:
1650 **		none.
1651 **
1652 **	Side Effects:
1653 **		runs things in the mail queue.
1654 */
1655 
1656 static void
1657 runner_work(e, sequenceno, didfork, skip, njobs)
1658 	register ENVELOPE *e;
1659 	int sequenceno;
1660 	bool didfork;
1661 	int skip;
1662 	int njobs;
1663 {
1664 	int n, seqjump;
1665 	WORK *w;
1666 	time_t now;
1667 
1668 	SM_GET_LA(now);
1669 
1670 	/*
1671 	**  Here we temporarily block the second calling of the handlers.
1672 	**  This allows us to handle the signal without terminating in the
1673 	**  middle of direct work. If a signal does come, the test for
1674 	**  NoMoreRunners will find it.
1675 	*/
1676 
1677 	BlockOldsh = true;
1678 	seqjump = skip;
1679 
1680 	/* process them once at a time */
1681 	while (WorkQ != NULL)
1682 	{
1683 #if SM_HEAP_CHECK
1684 		SM_NONVOLATILE int oldgroup = 0;
1685 
1686 		if (sm_debug_active(&DebugLeakQ, 1))
1687 		{
1688 			oldgroup = sm_heap_group();
1689 			sm_heap_newgroup();
1690 			sm_dprintf("run_queue_group() heap group #%d\n",
1691 				sm_heap_group());
1692 		}
1693 #endif /* SM_HEAP_CHECK */
1694 
1695 		/* do no more work */
1696 		if (NoMoreRunners)
1697 		{
1698 			/* Check that a valid signal handler is callable */
1699 			if (Oldsh != SIG_DFL && Oldsh != SIG_IGN &&
1700 			    Oldsh != runners_sighup &&
1701 			    Oldsh != runners_sigterm)
1702 				(*Oldsh)(Oldsig);
1703 			break;
1704 		}
1705 
1706 		w = WorkQ; /* assign current work item */
1707 
1708 		/*
1709 		**  Set the head of the WorkQ to the next work item.
1710 		**  It is set 'skip' ahead (the number of parallel queue
1711 		**  runners working on WorkQ together) since each runner
1712 		**  works on every 'skip'th (N-th) item.
1713 #if _FFR_SKIP_DOMAINS
1714 		**  In the case of the BYHOST Queue Sort Order, the 'item'
1715 		**  is a domain, so we work on every 'skip'th (N-th) domain.
1716 #endif * _FFR_SKIP_DOMAINS *
1717 		*/
1718 
1719 #if _FFR_SKIP_DOMAINS
1720 		if (QueueSortOrder == QSO_BYHOST)
1721 		{
1722 			seqjump = 1;
1723 			if (WorkQ->w_next != NULL)
1724 			{
1725 				if (WorkQ->w_host != NULL &&
1726 				    WorkQ->w_next->w_host != NULL)
1727 				{
1728 					if (sm_strcasecmp(WorkQ->w_host,
1729 							WorkQ->w_next->w_host)
1730 								!= 0)
1731 						seqjump = skip_domains(skip);
1732 					else
1733 						WorkQ = WorkQ->w_next;
1734 				}
1735 				else
1736 				{
1737 					if ((WorkQ->w_host != NULL &&
1738 					     WorkQ->w_next->w_host == NULL) ||
1739 					    (WorkQ->w_host == NULL &&
1740 					     WorkQ->w_next->w_host != NULL))
1741 						seqjump = skip_domains(skip);
1742 					else
1743 						WorkQ = WorkQ->w_next;
1744 				}
1745 			}
1746 			else
1747 				WorkQ = WorkQ->w_next;
1748 		}
1749 		else
1750 #endif /* _FFR_SKIP_DOMAINS */
1751 		{
1752 			for (n = 0; n < skip && WorkQ != NULL; n++)
1753 				WorkQ = WorkQ->w_next;
1754 		}
1755 
1756 		e->e_to = NULL;
1757 
1758 		/*
1759 		**  Ignore jobs that are too expensive for the moment.
1760 		**
1761 		**	Get new load average every GET_NEW_LA_TIME seconds.
1762 		*/
1763 
1764 		SM_GET_LA(now);
1765 		if (shouldqueue(WkRecipFact, Current_LA_time))
1766 		{
1767 			char *msg = "Aborting queue run: load average too high";
1768 
1769 			if (Verbose)
1770 				message("%s", msg);
1771 			if (LogLevel > 8)
1772 				sm_syslog(LOG_INFO, NOQID, "runqueue: %s", msg);
1773 			break;
1774 		}
1775 		if (shouldqueue(w->w_pri, w->w_ctime))
1776 		{
1777 			if (Verbose)
1778 				message(EmptyString);
1779 			if (QueueSortOrder == QSO_BYPRIORITY)
1780 			{
1781 				if (Verbose)
1782 					message("Skipping %s/%s (sequence %d of %d) and flushing rest of queue",
1783 						qid_printqueue(w->w_qgrp,
1784 							       w->w_qdir),
1785 						w->w_name + 2, sequenceno,
1786 						njobs);
1787 				if (LogLevel > 8)
1788 					sm_syslog(LOG_INFO, NOQID,
1789 						  "runqueue: Flushing queue from %s/%s (pri %ld, LA %d, %d of %d)",
1790 						  qid_printqueue(w->w_qgrp,
1791 								 w->w_qdir),
1792 						  w->w_name + 2, w->w_pri,
1793 						  CurrentLA, sequenceno,
1794 						  njobs);
1795 				break;
1796 			}
1797 			else if (Verbose)
1798 				message("Skipping %s/%s (sequence %d of %d)",
1799 					qid_printqueue(w->w_qgrp, w->w_qdir),
1800 					w->w_name + 2, sequenceno, njobs);
1801 		}
1802 		else
1803 		{
1804 			if (Verbose)
1805 			{
1806 				message(EmptyString);
1807 				message("Running %s/%s (sequence %d of %d)",
1808 					qid_printqueue(w->w_qgrp, w->w_qdir),
1809 					w->w_name + 2, sequenceno, njobs);
1810 			}
1811 			if (didfork && MaxQueueChildren > 0)
1812 			{
1813 				sm_blocksignal(SIGCHLD);
1814 				(void) sm_signal(SIGCHLD, reapchild);
1815 			}
1816 			if (tTd(63, 100))
1817 				sm_syslog(LOG_DEBUG, NOQID,
1818 					  "runqueue %s dowork(%s)",
1819 					  qid_printqueue(w->w_qgrp, w->w_qdir),
1820 					  w->w_name + 2);
1821 
1822 			(void) dowork(w->w_qgrp, w->w_qdir, w->w_name + 2,
1823 				      ForkQueueRuns, false, e);
1824 			errno = 0;
1825 		}
1826 		sm_free(w->w_name); /* XXX */
1827 		if (w->w_host != NULL)
1828 			sm_free(w->w_host); /* XXX */
1829 		sm_free((char *) w); /* XXX */
1830 		sequenceno += seqjump; /* next sequence number */
1831 #if SM_HEAP_CHECK
1832 		if (sm_debug_active(&DebugLeakQ, 1))
1833 			sm_heap_setgroup(oldgroup);
1834 #endif /* SM_HEAP_CHECK */
1835 	}
1836 
1837 	BlockOldsh = false;
1838 
1839 	/* check the signals didn't happen during the revert */
1840 	if (NoMoreRunners)
1841 	{
1842 		/* Check that a valid signal handler is callable */
1843 		if (Oldsh != SIG_DFL && Oldsh != SIG_IGN &&
1844 		    Oldsh != runners_sighup && Oldsh != runners_sigterm)
1845 			(*Oldsh)(Oldsig);
1846 	}
1847 
1848 	Oldsh = SIG_DFL; /* after the NoMoreRunners check */
1849 }
1850 /*
1851 **  RUN_WORK_GROUP -- run the jobs in a queue group from a work group.
1852 **
1853 **	Gets the stuff out of the queue in some presumably logical
1854 **	order and processes them.
1855 **
1856 **	Parameters:
1857 **		wgrp -- work group to process.
1858 **		flags -- RWG_* flags
1859 **
1860 **	Returns:
1861 **		true if the queue run successfully began.
1862 **
1863 **	Side Effects:
1864 **		runs things in the mail queue.
1865 */
1866 
1867 /* Minimum sleep time for persistent queue runners */
1868 #define MIN_SLEEP_TIME	5
1869 
1870 bool
1871 run_work_group(wgrp, flags)
1872 	int wgrp;
1873 	int flags;
1874 {
1875 	register ENVELOPE *e;
1876 	int njobs, qdir;
1877 	int sequenceno = 1;
1878 	int qgrp, endgrp, h, i;
1879 	time_t now;
1880 	bool full, more;
1881 	SM_RPOOL_T *rpool;
1882 	extern ENVELOPE BlankEnvelope;
1883 	extern SIGFUNC_DECL reapchild __P((int));
1884 
1885 	if (wgrp < 0)
1886 		return false;
1887 
1888 	/*
1889 	**  If no work will ever be selected, don't even bother reading
1890 	**  the queue.
1891 	*/
1892 
1893 	SM_GET_LA(now);
1894 
1895 	if (!bitset(RWG_PERSISTENT, flags) &&
1896 	    shouldqueue(WkRecipFact, Current_LA_time))
1897 	{
1898 		char *msg = "Skipping queue run -- load average too high";
1899 
1900 		if (bitset(RWG_VERBOSE, flags))
1901 			message("458 %s\n", msg);
1902 		if (LogLevel > 8)
1903 			sm_syslog(LOG_INFO, NOQID, "runqueue: %s", msg);
1904 		return false;
1905 	}
1906 
1907 	/*
1908 	**  See if we already have too many children.
1909 	*/
1910 
1911 	if (bitset(RWG_FORK, flags) &&
1912 	    WorkGrp[wgrp].wg_lowqintvl > 0 &&
1913 	    !bitset(RWG_PERSISTENT, flags) &&
1914 	    MaxChildren > 0 && CurChildren >= MaxChildren)
1915 	{
1916 		char *msg = "Skipping queue run -- too many children";
1917 
1918 		if (bitset(RWG_VERBOSE, flags))
1919 			message("458 %s (%d)\n", msg, CurChildren);
1920 		if (LogLevel > 8)
1921 			sm_syslog(LOG_INFO, NOQID, "runqueue: %s (%d)",
1922 				  msg, CurChildren);
1923 		return false;
1924 	}
1925 
1926 	/*
1927 	**  See if we want to go off and do other useful work.
1928 	*/
1929 
1930 	if (bitset(RWG_FORK, flags))
1931 	{
1932 		pid_t pid;
1933 
1934 		(void) sm_blocksignal(SIGCHLD);
1935 		(void) sm_signal(SIGCHLD, reapchild);
1936 
1937 		pid = dofork();
1938 		if (pid == -1)
1939 		{
1940 			const char *msg = "Skipping queue run -- fork() failed";
1941 			const char *err = sm_errstring(errno);
1942 
1943 			if (bitset(RWG_VERBOSE, flags))
1944 				message("458 %s: %s\n", msg, err);
1945 			if (LogLevel > 8)
1946 				sm_syslog(LOG_INFO, NOQID, "runqueue: %s: %s",
1947 					  msg, err);
1948 			(void) sm_releasesignal(SIGCHLD);
1949 			return false;
1950 		}
1951 		if (pid != 0)
1952 		{
1953 			/* parent -- pick up intermediate zombie */
1954 			(void) sm_blocksignal(SIGALRM);
1955 
1956 			/* wgrp only used when queue runners are persistent */
1957 			proc_list_add(pid, "Queue runner", PROC_QUEUE,
1958 				      WorkGrp[wgrp].wg_maxact,
1959 				      bitset(RWG_PERSISTENT, flags) ? wgrp : -1,
1960 				      NULL);
1961 			(void) sm_releasesignal(SIGALRM);
1962 			(void) sm_releasesignal(SIGCHLD);
1963 			return true;
1964 		}
1965 
1966 		/* child -- clean up signals */
1967 
1968 		/* Reset global flags */
1969 		RestartRequest = NULL;
1970 		RestartWorkGroup = false;
1971 		ShutdownRequest = NULL;
1972 		PendingSignal = 0;
1973 		CurrentPid = getpid();
1974 		close_sendmail_pid();
1975 
1976 		/*
1977 		**  Initialize exception stack and default exception
1978 		**  handler for child process.
1979 		*/
1980 
1981 		sm_exc_newthread(fatal_error);
1982 		clrcontrol();
1983 		proc_list_clear();
1984 
1985 		/* Add parent process as first child item */
1986 		proc_list_add(CurrentPid, "Queue runner child process",
1987 			      PROC_QUEUE_CHILD, 0, -1, NULL);
1988 		(void) sm_releasesignal(SIGCHLD);
1989 		(void) sm_signal(SIGCHLD, SIG_DFL);
1990 		(void) sm_signal(SIGHUP, SIG_DFL);
1991 		(void) sm_signal(SIGTERM, intsig);
1992 	}
1993 
1994 	/*
1995 	**  Release any resources used by the daemon code.
1996 	*/
1997 
1998 	clrdaemon();
1999 
2000 	/* force it to run expensive jobs */
2001 	NoConnect = false;
2002 
2003 	/* drop privileges */
2004 	if (geteuid() == (uid_t) 0)
2005 		(void) drop_privileges(false);
2006 
2007 	/*
2008 	**  Create ourselves an envelope
2009 	*/
2010 
2011 	CurEnv = &QueueEnvelope;
2012 	rpool = sm_rpool_new_x(NULL);
2013 	e = newenvelope(&QueueEnvelope, CurEnv, rpool);
2014 	e->e_flags = BlankEnvelope.e_flags;
2015 	e->e_parent = NULL;
2016 
2017 	/* make sure we have disconnected from parent */
2018 	if (bitset(RWG_FORK, flags))
2019 	{
2020 		disconnect(1, e);
2021 		QuickAbort = false;
2022 	}
2023 
2024 	/*
2025 	**  If we are running part of the queue, always ignore stored
2026 	**  host status.
2027 	*/
2028 
2029 	if (QueueLimitId != NULL || QueueLimitSender != NULL ||
2030 	    QueueLimitQuarantine != NULL ||
2031 	    QueueLimitRecipient != NULL)
2032 	{
2033 		IgnoreHostStatus = true;
2034 		MinQueueAge = 0;
2035 	}
2036 
2037 	/*
2038 	**  Here is where we choose the queue group from the work group.
2039 	**  The caller of the "domorework" label must setup a new envelope.
2040 	*/
2041 
2042 	endgrp = WorkGrp[wgrp].wg_curqgrp; /* to not spin endlessly */
2043 
2044   domorework:
2045 
2046 	/*
2047 	**  Run a queue group if:
2048 	**  RWG_RUNALL bit is set or the bit for this group is set.
2049 	*/
2050 
2051 	now = curtime();
2052 	for (;;)
2053 	{
2054 		/*
2055 		**  Find the next queue group within the work group that
2056 		**  has been marked as needing a run.
2057 		*/
2058 
2059 		qgrp = WorkGrp[wgrp].wg_qgs[WorkGrp[wgrp].wg_curqgrp]->qg_index;
2060 		WorkGrp[wgrp].wg_curqgrp++; /* advance */
2061 		WorkGrp[wgrp].wg_curqgrp %= WorkGrp[wgrp].wg_numqgrp; /* wrap */
2062 		if (bitset(RWG_RUNALL, flags) ||
2063 		    (Queue[qgrp]->qg_nextrun <= now &&
2064 		     Queue[qgrp]->qg_nextrun != (time_t) -1))
2065 			break;
2066 		if (endgrp == WorkGrp[wgrp].wg_curqgrp)
2067 		{
2068 			e->e_id = NULL;
2069 			if (bitset(RWG_FORK, flags))
2070 				finis(true, true, ExitStat);
2071 			return true; /* we're done */
2072 		}
2073 	}
2074 
2075 	qdir = Queue[qgrp]->qg_curnum; /* round-robin init of queue position */
2076 #if _FFR_QUEUE_SCHED_DBG
2077 	if (tTd(69, 12))
2078 		sm_syslog(LOG_INFO, NOQID,
2079 			"rwg: wgrp=%d, qgrp=%d, qdir=%d, name=%s, curqgrp=%d, numgrps=%d",
2080 			wgrp, qgrp, qdir, qid_printqueue(qgrp, qdir),
2081 			WorkGrp[wgrp].wg_curqgrp, WorkGrp[wgrp].wg_numqgrp);
2082 #endif /* _FFR_QUEUE_SCHED_DBG */
2083 
2084 #if HASNICE
2085 	/* tweak niceness of queue runs */
2086 	if (Queue[qgrp]->qg_nice > 0)
2087 		(void) nice(Queue[qgrp]->qg_nice);
2088 #endif /* HASNICE */
2089 
2090 	/* XXX running queue group... */
2091 	sm_setproctitle(true, CurEnv, "running queue: %s",
2092 			qid_printqueue(qgrp, qdir));
2093 
2094 	if (LogLevel > 69 || tTd(63, 99))
2095 		sm_syslog(LOG_DEBUG, NOQID,
2096 			  "runqueue %s, pid=%d, forkflag=%d",
2097 			  qid_printqueue(qgrp, qdir), (int) CurrentPid,
2098 			  bitset(RWG_FORK, flags));
2099 
2100 	/*
2101 	**  Start making passes through the queue.
2102 	**	First, read and sort the entire queue.
2103 	**	Then, process the work in that order.
2104 	**		But if you take too long, start over.
2105 	*/
2106 
2107 	for (i = 0; i < Queue[qgrp]->qg_numqueues; i++)
2108 	{
2109 		(void) gatherq(qgrp, qdir, false, &full, &more, &h);
2110 #if SM_CONF_SHM
2111 		if (ShmId != SM_SHM_NO_ID)
2112 			QSHM_ENTRIES(Queue[qgrp]->qg_qpaths[qdir].qp_idx) = h;
2113 #endif /* SM_CONF_SHM */
2114 		/* If there are no more items in this queue advance */
2115 		if (!more)
2116 		{
2117 			/* A round-robin advance */
2118 			qdir++;
2119 			qdir %= Queue[qgrp]->qg_numqueues;
2120 		}
2121 
2122 		/* Has the WorkList reached the limit? */
2123 		if (full)
2124 			break; /* don't try to gather more */
2125 	}
2126 
2127 	/* order the existing work requests */
2128 	njobs = sortq(Queue[qgrp]->qg_maxlist);
2129 	Queue[qgrp]->qg_curnum = qdir; /* update */
2130 
2131 
2132 	if (!Verbose && bitnset(QD_FORK, Queue[qgrp]->qg_flags))
2133 	{
2134 		int loop, maxrunners;
2135 		pid_t pid;
2136 
2137 		/*
2138 		**  For this WorkQ we want to fork off N children (maxrunners)
2139 		**  at this point. Each child has a copy of WorkQ. Each child
2140 		**  will process every N-th item. The parent will wait for all
2141 		**  of the children to finish before moving on to the next
2142 		**  queue group within the work group. This saves us forking
2143 		**  a new runner-child for each work item.
2144 		**  It's valid for qg_maxqrun == 0 since this may be an
2145 		**  explicit "don't run this queue" setting.
2146 		*/
2147 
2148 		maxrunners = Queue[qgrp]->qg_maxqrun;
2149 
2150 		/*
2151 		**  If no runners are configured for this group but
2152 		**  the queue is "forced" then lets use 1 runner.
2153 		*/
2154 
2155 		if (maxrunners == 0 && bitset(RWG_FORCE, flags))
2156 			maxrunners = 1;
2157 
2158 		/* No need to have more runners then there are jobs */
2159 		if (maxrunners > njobs)
2160 			maxrunners = njobs;
2161 		for (loop = 0; loop < maxrunners; loop++)
2162 		{
2163 			/*
2164 			**  Since the delivery may happen in a child and the
2165 			**  parent does not wait, the parent may close the
2166 			**  maps thereby removing any shared memory used by
2167 			**  the map.  Therefore, close the maps now so the
2168 			**  child will dynamically open them if necessary.
2169 			*/
2170 
2171 			closemaps(false);
2172 
2173 			pid = fork();
2174 			if (pid < 0)
2175 			{
2176 				syserr("run_work_group: cannot fork");
2177 				return false;
2178 			}
2179 			else if (pid > 0)
2180 			{
2181 				/* parent -- clean out connection cache */
2182 				mci_flush(false, NULL);
2183 #if _FFR_SKIP_DOMAINS
2184 				if (QueueSortOrder == QSO_BYHOST)
2185 				{
2186 					sequenceno += skip_domains(1);
2187 				}
2188 				else
2189 #endif /* _FFR_SKIP_DOMAINS */
2190 				{
2191 					/* for the skip */
2192 					WorkQ = WorkQ->w_next;
2193 					sequenceno++;
2194 				}
2195 				proc_list_add(pid, "Queue child runner process",
2196 					      PROC_QUEUE_CHILD, 0, -1, NULL);
2197 
2198 				/* No additional work, no additional runners */
2199 				if (WorkQ == NULL)
2200 					break;
2201 			}
2202 			else
2203 			{
2204 				/* child -- Reset global flags */
2205 				RestartRequest = NULL;
2206 				RestartWorkGroup = false;
2207 				ShutdownRequest = NULL;
2208 				PendingSignal = 0;
2209 				CurrentPid = getpid();
2210 				close_sendmail_pid();
2211 
2212 				/*
2213 				**  Initialize exception stack and default
2214 				**  exception handler for child process.
2215 				**  When fork()'d the child now has a private
2216 				**  copy of WorkQ at its current position.
2217 				*/
2218 
2219 				sm_exc_newthread(fatal_error);
2220 
2221 				/*
2222 				**  SMTP processes (whether -bd or -bs) set
2223 				**  SIGCHLD to reapchild to collect
2224 				**  children status.  However, at delivery
2225 				**  time, that status must be collected
2226 				**  by sm_wait() to be dealt with properly
2227 				**  (check success of delivery based
2228 				**  on status code, etc).  Therefore, if we
2229 				**  are an SMTP process, reset SIGCHLD
2230 				**  back to the default so reapchild
2231 				**  doesn't collect status before
2232 				**  sm_wait().
2233 				*/
2234 
2235 				if (OpMode == MD_SMTP ||
2236 				    OpMode == MD_DAEMON ||
2237 				    MaxQueueChildren > 0)
2238 				{
2239 					proc_list_clear();
2240 					sm_releasesignal(SIGCHLD);
2241 					(void) sm_signal(SIGCHLD, SIG_DFL);
2242 				}
2243 
2244 				/* child -- error messages to the transcript */
2245 				QuickAbort = OnlyOneError = false;
2246 				runner_work(e, sequenceno, true,
2247 					    maxrunners, njobs);
2248 
2249 				/* This child is done */
2250 				finis(true, true, ExitStat);
2251 				/* NOTREACHED */
2252 			}
2253 		}
2254 
2255 		sm_releasesignal(SIGCHLD);
2256 
2257 		/*
2258 		**  Wait until all of the runners have completed before
2259 		**  seeing if there is another queue group in the
2260 		**  work group to process.
2261 		**  XXX Future enhancement: don't wait() for all children
2262 		**  here, just go ahead and make sure that overall the number
2263 		**  of children is not exceeded.
2264 		*/
2265 
2266 		while (CurChildren > 0)
2267 		{
2268 			int status;
2269 			pid_t ret;
2270 
2271 			while ((ret = sm_wait(&status)) <= 0)
2272 				continue;
2273 			proc_list_drop(ret, status, NULL);
2274 		}
2275 	}
2276 	else if (Queue[qgrp]->qg_maxqrun > 0 || bitset(RWG_FORCE, flags))
2277 	{
2278 		/*
2279 		**  When current process will not fork children to do the work,
2280 		**  it will do the work itself. The 'skip' will be 1 since
2281 		**  there are no child runners to divide the work across.
2282 		*/
2283 
2284 		runner_work(e, sequenceno, false, 1, njobs);
2285 	}
2286 
2287 	/* free memory allocated by newenvelope() above */
2288 	sm_rpool_free(rpool);
2289 	QueueEnvelope.e_rpool = NULL;
2290 
2291 	/* Are there still more queues in the work group to process? */
2292 	if (endgrp != WorkGrp[wgrp].wg_curqgrp)
2293 	{
2294 		rpool = sm_rpool_new_x(NULL);
2295 		e = newenvelope(&QueueEnvelope, CurEnv, rpool);
2296 		e->e_flags = BlankEnvelope.e_flags;
2297 		goto domorework;
2298 	}
2299 
2300 	/* No more queues in work group to process. Now check persistent. */
2301 	if (bitset(RWG_PERSISTENT, flags))
2302 	{
2303 		sequenceno = 1;
2304 		sm_setproctitle(true, CurEnv, "running queue: %s",
2305 				qid_printqueue(qgrp, qdir));
2306 
2307 		/*
2308 		**  close bogus maps, i.e., maps which caused a tempfail,
2309 		**	so we get fresh map connections on the next lookup.
2310 		**  closemaps() is also called when children are started.
2311 		*/
2312 
2313 		closemaps(true);
2314 
2315 		/* Close any cached connections. */
2316 		mci_flush(true, NULL);
2317 
2318 		/* Clean out expired related entries. */
2319 		rmexpstab();
2320 
2321 #if NAMED_BIND
2322 		/* Update MX records for FallbackMX. */
2323 		if (FallbackMX != NULL)
2324 			(void) getfallbackmxrr(FallbackMX);
2325 #endif /* NAMED_BIND */
2326 
2327 #if USERDB
2328 		/* close UserDatabase */
2329 		_udbx_close();
2330 #endif /* USERDB */
2331 
2332 #if SM_HEAP_CHECK
2333 		if (sm_debug_active(&SmHeapCheck, 2)
2334 		    && access("memdump", F_OK) == 0
2335 		   )
2336 		{
2337 			SM_FILE_T *out;
2338 
2339 			remove("memdump");
2340 			out = sm_io_open(SmFtStdio, SM_TIME_DEFAULT,
2341 					 "memdump.out", SM_IO_APPEND, NULL);
2342 			if (out != NULL)
2343 			{
2344 				(void) sm_io_fprintf(out, SM_TIME_DEFAULT, "----------------------\n");
2345 				sm_heap_report(out,
2346 					sm_debug_level(&SmHeapCheck) - 1);
2347 				(void) sm_io_close(out, SM_TIME_DEFAULT);
2348 			}
2349 		}
2350 #endif /* SM_HEAP_CHECK */
2351 
2352 		/* let me rest for a second to catch my breath */
2353 		if (njobs == 0 && WorkGrp[wgrp].wg_lowqintvl < MIN_SLEEP_TIME)
2354 			sleep(MIN_SLEEP_TIME);
2355 		else if (WorkGrp[wgrp].wg_lowqintvl <= 0)
2356 			sleep(QueueIntvl > 0 ? QueueIntvl : MIN_SLEEP_TIME);
2357 		else
2358 			sleep(WorkGrp[wgrp].wg_lowqintvl);
2359 
2360 		/*
2361 		**  Get the LA outside the WorkQ loop if necessary.
2362 		**  In a persistent queue runner the code is repeated over
2363 		**  and over but gatherq() may ignore entries due to
2364 		**  shouldqueue() (do we really have to do this twice?).
2365 		**  Hence the queue runners would just idle around when once
2366 		**  CurrentLA caused all entries in a queue to be ignored.
2367 		*/
2368 
2369 		if (njobs == 0)
2370 			SM_GET_LA(now);
2371 		rpool = sm_rpool_new_x(NULL);
2372 		e = newenvelope(&QueueEnvelope, CurEnv, rpool);
2373 		e->e_flags = BlankEnvelope.e_flags;
2374 		goto domorework;
2375 	}
2376 
2377 	/* exit without the usual cleanup */
2378 	e->e_id = NULL;
2379 	if (bitset(RWG_FORK, flags))
2380 		finis(true, true, ExitStat);
2381 	/* NOTREACHED */
2382 	return true;
2383 }
2384 
2385 /*
2386 **  DOQUEUERUN -- do a queue run?
2387 */
2388 
2389 bool
2390 doqueuerun()
2391 {
2392 	return DoQueueRun;
2393 }
2394 
2395 /*
2396 **  RUNQUEUEEVENT -- Sets a flag to indicate that a queue run should be done.
2397 **
2398 **	Parameters:
2399 **		none.
2400 **
2401 **	Returns:
2402 **		none.
2403 **
2404 **	Side Effects:
2405 **		The invocation of this function via an alarm may interrupt
2406 **		a set of actions. Thus errno may be set in that context.
2407 **		We need to restore errno at the end of this function to ensure
2408 **		that any work done here that sets errno doesn't return a
2409 **		misleading/false errno value. Errno may	be EINTR upon entry to
2410 **		this function because of non-restartable/continuable system
2411 **		API was active. Iff this is true we will override errno as
2412 **		a timeout (as a more accurate error message).
2413 **
2414 **	NOTE:	THIS CAN BE CALLED FROM A SIGNAL HANDLER.  DO NOT ADD
2415 **		ANYTHING TO THIS ROUTINE UNLESS YOU KNOW WHAT YOU ARE
2416 **		DOING.
2417 */
2418 
2419 void
2420 runqueueevent(ignore)
2421 	int ignore;
2422 {
2423 	int save_errno = errno;
2424 
2425 	/*
2426 	**  Set the general bit that we want a queue run,
2427 	**  tested in doqueuerun()
2428 	*/
2429 
2430 	DoQueueRun = true;
2431 #if _FFR_QUEUE_SCHED_DBG
2432 	if (tTd(69, 10))
2433 		sm_syslog(LOG_INFO, NOQID, "rqe: done");
2434 #endif /* _FFR_QUEUE_SCHED_DBG */
2435 
2436 	errno = save_errno;
2437 	if (errno == EINTR)
2438 		errno = ETIMEDOUT;
2439 }
2440 /*
2441 **  GATHERQ -- gather messages from the message queue(s) the work queue.
2442 **
2443 **	Parameters:
2444 **		qgrp -- the index of the queue group.
2445 **		qdir -- the index of the queue directory.
2446 **		doall -- if set, include everything in the queue (even
2447 **			the jobs that cannot be run because the load
2448 **			average is too high, or MaxQueueRun is reached).
2449 **			Otherwise, exclude those jobs.
2450 **		full -- (optional) to be set 'true' if WorkList is full
2451 **		more -- (optional) to be set 'true' if there are still more
2452 **			messages in this queue not added to WorkList
2453 **		pnentries -- (optional) total nuber of entries in queue
2454 **
2455 **	Returns:
2456 **		The number of request in the queue (not necessarily
2457 **		the number of requests in WorkList however).
2458 **
2459 **	Side Effects:
2460 **		prepares available work into WorkList
2461 */
2462 
2463 #define NEED_P		0001	/* 'P': priority */
2464 #define NEED_T		0002	/* 'T': time */
2465 #define NEED_R		0004	/* 'R': recipient */
2466 #define NEED_S		0010	/* 'S': sender */
2467 #define NEED_H		0020	/* host */
2468 #define HAS_QUARANTINE	0040	/* has an unexpected 'q' line */
2469 #define NEED_QUARANTINE	0100	/* 'q': reason */
2470 
2471 static WORK	*WorkList = NULL;	/* list of unsort work */
2472 static int	WorkListSize = 0;	/* current max size of WorkList */
2473 static int	WorkListCount = 0;	/* # of work items in WorkList */
2474 
2475 static int
2476 gatherq(qgrp, qdir, doall, full, more, pnentries)
2477 	int qgrp;
2478 	int qdir;
2479 	bool doall;
2480 	bool *full;
2481 	bool *more;
2482 	int *pnentries;
2483 {
2484 	register struct dirent *d;
2485 	register WORK *w;
2486 	register char *p;
2487 	DIR *f;
2488 	int i, num_ent, wn, nentries;
2489 	QUEUE_CHAR *check;
2490 	char qd[MAXPATHLEN];
2491 	char qf[MAXPATHLEN];
2492 
2493 	wn = WorkListCount - 1;
2494 	num_ent = 0;
2495 	nentries = 0;
2496 	if (qdir == NOQDIR)
2497 		(void) sm_strlcpy(qd, ".", sizeof(qd));
2498 	else
2499 		(void) sm_strlcpyn(qd, sizeof(qd), 2,
2500 			Queue[qgrp]->qg_qpaths[qdir].qp_name,
2501 			(bitset(QP_SUBQF,
2502 				Queue[qgrp]->qg_qpaths[qdir].qp_subdirs)
2503 					? "/qf" : ""));
2504 
2505 	if (tTd(41, 1))
2506 	{
2507 		sm_dprintf("gatherq:\n");
2508 
2509 		check = QueueLimitId;
2510 		while (check != NULL)
2511 		{
2512 			sm_dprintf("\tQueueLimitId = %s%s\n",
2513 				check->queue_negate ? "!" : "",
2514 				check->queue_match);
2515 			check = check->queue_next;
2516 		}
2517 
2518 		check = QueueLimitSender;
2519 		while (check != NULL)
2520 		{
2521 			sm_dprintf("\tQueueLimitSender = %s%s\n",
2522 				check->queue_negate ? "!" : "",
2523 				check->queue_match);
2524 			check = check->queue_next;
2525 		}
2526 
2527 		check = QueueLimitRecipient;
2528 		while (check != NULL)
2529 		{
2530 			sm_dprintf("\tQueueLimitRecipient = %s%s\n",
2531 				check->queue_negate ? "!" : "",
2532 				check->queue_match);
2533 			check = check->queue_next;
2534 		}
2535 
2536 		if (QueueMode == QM_QUARANTINE)
2537 		{
2538 			check = QueueLimitQuarantine;
2539 			while (check != NULL)
2540 			{
2541 				sm_dprintf("\tQueueLimitQuarantine = %s%s\n",
2542 					   check->queue_negate ? "!" : "",
2543 					   check->queue_match);
2544 				check = check->queue_next;
2545 			}
2546 		}
2547 	}
2548 
2549 	/* open the queue directory */
2550 	f = opendir(qd);
2551 	if (f == NULL)
2552 	{
2553 		syserr("gatherq: cannot open \"%s\"",
2554 			qid_printqueue(qgrp, qdir));
2555 		if (full != NULL)
2556 			*full = WorkListCount >= MaxQueueRun && MaxQueueRun > 0;
2557 		if (more != NULL)
2558 			*more = false;
2559 		return 0;
2560 	}
2561 
2562 	/*
2563 	**  Read the work directory.
2564 	*/
2565 
2566 	while ((d = readdir(f)) != NULL)
2567 	{
2568 		SM_FILE_T *cf;
2569 		int qfver = 0;
2570 		char lbuf[MAXNAME + 1];
2571 		struct stat sbuf;
2572 
2573 		if (tTd(41, 50))
2574 			sm_dprintf("gatherq: checking %s..", d->d_name);
2575 
2576 		/* is this an interesting entry? */
2577 		if (!(((QueueMode == QM_NORMAL &&
2578 			d->d_name[0] == NORMQF_LETTER) ||
2579 		       (QueueMode == QM_QUARANTINE &&
2580 			d->d_name[0] == QUARQF_LETTER) ||
2581 		       (QueueMode == QM_LOST &&
2582 			d->d_name[0] == LOSEQF_LETTER)) &&
2583 		      d->d_name[1] == 'f'))
2584 		{
2585 			if (tTd(41, 50))
2586 				sm_dprintf("  skipping\n");
2587 			continue;
2588 		}
2589 		if (tTd(41, 50))
2590 			sm_dprintf("\n");
2591 
2592 		if (strlen(d->d_name) >= MAXQFNAME)
2593 		{
2594 			if (Verbose)
2595 				(void) sm_io_fprintf(smioout, SM_TIME_DEFAULT,
2596 						     "gatherq: %s too long, %d max characters\n",
2597 						     d->d_name, MAXQFNAME);
2598 			if (LogLevel > 0)
2599 				sm_syslog(LOG_ALERT, NOQID,
2600 					  "gatherq: %s too long, %d max characters",
2601 					  d->d_name, MAXQFNAME);
2602 			continue;
2603 		}
2604 
2605 		++nentries;
2606 		check = QueueLimitId;
2607 		while (check != NULL)
2608 		{
2609 			if (strcontainedin(false, check->queue_match,
2610 					   d->d_name) != check->queue_negate)
2611 				break;
2612 			else
2613 				check = check->queue_next;
2614 		}
2615 		if (QueueLimitId != NULL && check == NULL)
2616 			continue;
2617 
2618 		/* grow work list if necessary */
2619 		if (++wn >= MaxQueueRun && MaxQueueRun > 0)
2620 		{
2621 			if (wn == MaxQueueRun && LogLevel > 0)
2622 				sm_syslog(LOG_WARNING, NOQID,
2623 					  "WorkList for %s maxed out at %d",
2624 					  qid_printqueue(qgrp, qdir),
2625 					  MaxQueueRun);
2626 			if (doall)
2627 				continue;	/* just count entries */
2628 			break;
2629 		}
2630 		if (wn >= WorkListSize)
2631 		{
2632 			grow_wlist(qgrp, qdir);
2633 			if (wn >= WorkListSize)
2634 				continue;
2635 		}
2636 		SM_ASSERT(wn >= 0);
2637 		w = &WorkList[wn];
2638 
2639 		(void) sm_strlcpyn(qf, sizeof(qf), 3, qd, "/", d->d_name);
2640 		if (stat(qf, &sbuf) < 0)
2641 		{
2642 			if (errno != ENOENT)
2643 				sm_syslog(LOG_INFO, NOQID,
2644 					  "gatherq: can't stat %s/%s",
2645 					  qid_printqueue(qgrp, qdir),
2646 					  d->d_name);
2647 			wn--;
2648 			continue;
2649 		}
2650 		if (!bitset(S_IFREG, sbuf.st_mode))
2651 		{
2652 			/* Yikes!  Skip it or we will hang on open! */
2653 			if (!((d->d_name[0] == DATAFL_LETTER ||
2654 			       d->d_name[0] == NORMQF_LETTER ||
2655 			       d->d_name[0] == QUARQF_LETTER ||
2656 			       d->d_name[0] == LOSEQF_LETTER ||
2657 			       d->d_name[0] == XSCRPT_LETTER) &&
2658 			      d->d_name[1] == 'f' && d->d_name[2] == '\0'))
2659 				syserr("gatherq: %s/%s is not a regular file",
2660 				       qid_printqueue(qgrp, qdir), d->d_name);
2661 			wn--;
2662 			continue;
2663 		}
2664 
2665 		/* avoid work if possible */
2666 		if ((QueueSortOrder == QSO_BYFILENAME ||
2667 		     QueueSortOrder == QSO_BYMODTIME ||
2668 		     QueueSortOrder == QSO_NONE ||
2669 		     QueueSortOrder == QSO_RANDOM) &&
2670 		    QueueLimitQuarantine == NULL &&
2671 		    QueueLimitSender == NULL &&
2672 		    QueueLimitRecipient == NULL)
2673 		{
2674 			w->w_qgrp = qgrp;
2675 			w->w_qdir = qdir;
2676 			w->w_name = newstr(d->d_name);
2677 			w->w_host = NULL;
2678 			w->w_lock = w->w_tooyoung = false;
2679 			w->w_pri = 0;
2680 			w->w_ctime = 0;
2681 			w->w_mtime = sbuf.st_mtime;
2682 			++num_ent;
2683 			continue;
2684 		}
2685 
2686 		/* open control file */
2687 		cf = sm_io_open(SmFtStdio, SM_TIME_DEFAULT, qf, SM_IO_RDONLY_B,
2688 				NULL);
2689 		if (cf == NULL && OpMode != MD_PRINT)
2690 		{
2691 			/* this may be some random person sending hir msgs */
2692 			if (tTd(41, 2))
2693 				sm_dprintf("gatherq: cannot open %s: %s\n",
2694 					d->d_name, sm_errstring(errno));
2695 			errno = 0;
2696 			wn--;
2697 			continue;
2698 		}
2699 		w->w_qgrp = qgrp;
2700 		w->w_qdir = qdir;
2701 		w->w_name = newstr(d->d_name);
2702 		w->w_host = NULL;
2703 		if (cf != NULL)
2704 		{
2705 			w->w_lock = !lockfile(sm_io_getinfo(cf, SM_IO_WHAT_FD,
2706 							    NULL),
2707 					      w->w_name, NULL,
2708 					      LOCK_SH|LOCK_NB);
2709 		}
2710 		w->w_tooyoung = false;
2711 
2712 		/* make sure jobs in creation don't clog queue */
2713 		w->w_pri = 0x7fffffff;
2714 		w->w_ctime = 0;
2715 		w->w_mtime = sbuf.st_mtime;
2716 
2717 		/* extract useful information */
2718 		i = NEED_P|NEED_T;
2719 		if (QueueSortOrder == QSO_BYHOST
2720 #if _FFR_RHS
2721 		    || QueueSortOrder == QSO_BYSHUFFLE
2722 #endif /* _FFR_RHS */
2723 		   )
2724 		{
2725 			/* need w_host set for host sort order */
2726 			i |= NEED_H;
2727 		}
2728 		if (QueueLimitSender != NULL)
2729 			i |= NEED_S;
2730 		if (QueueLimitRecipient != NULL)
2731 			i |= NEED_R;
2732 		if (QueueLimitQuarantine != NULL)
2733 			i |= NEED_QUARANTINE;
2734 		while (cf != NULL && i != 0 &&
2735 		       sm_io_fgets(cf, SM_TIME_DEFAULT, lbuf,
2736 				   sizeof(lbuf)) != NULL)
2737 		{
2738 			int c;
2739 			time_t age;
2740 
2741 			p = strchr(lbuf, '\n');
2742 			if (p != NULL)
2743 				*p = '\0';
2744 			else
2745 			{
2746 				/* flush rest of overly long line */
2747 				while ((c = sm_io_getc(cf, SM_TIME_DEFAULT))
2748 				       != SM_IO_EOF && c != '\n')
2749 					continue;
2750 			}
2751 
2752 			switch (lbuf[0])
2753 			{
2754 			  case 'V':
2755 				qfver = atoi(&lbuf[1]);
2756 				break;
2757 
2758 			  case 'P':
2759 				w->w_pri = atol(&lbuf[1]);
2760 				i &= ~NEED_P;
2761 				break;
2762 
2763 			  case 'T':
2764 				w->w_ctime = atol(&lbuf[1]);
2765 				i &= ~NEED_T;
2766 				break;
2767 
2768 			  case 'q':
2769 				if (QueueMode != QM_QUARANTINE &&
2770 				    QueueMode != QM_LOST)
2771 				{
2772 					if (tTd(41, 49))
2773 						sm_dprintf("%s not marked as quarantined but has a 'q' line\n",
2774 							   w->w_name);
2775 					i |= HAS_QUARANTINE;
2776 				}
2777 				else if (QueueMode == QM_QUARANTINE)
2778 				{
2779 					if (QueueLimitQuarantine == NULL)
2780 					{
2781 						i &= ~NEED_QUARANTINE;
2782 						break;
2783 					}
2784 					p = &lbuf[1];
2785 					check = QueueLimitQuarantine;
2786 					while (check != NULL)
2787 					{
2788 						if (strcontainedin(false,
2789 								   check->queue_match,
2790 								   p) !=
2791 						    check->queue_negate)
2792 							break;
2793 						else
2794 							check = check->queue_next;
2795 					}
2796 					if (check != NULL)
2797 						i &= ~NEED_QUARANTINE;
2798 				}
2799 				break;
2800 
2801 			  case 'R':
2802 				if (w->w_host == NULL &&
2803 				    (p = strrchr(&lbuf[1], '@')) != NULL)
2804 				{
2805 #if _FFR_RHS
2806 					if (QueueSortOrder == QSO_BYSHUFFLE)
2807 						w->w_host = newstr(&p[1]);
2808 					else
2809 #endif /* _FFR_RHS */
2810 						w->w_host = strrev(&p[1]);
2811 					makelower(w->w_host);
2812 					i &= ~NEED_H;
2813 				}
2814 				if (QueueLimitRecipient == NULL)
2815 				{
2816 					i &= ~NEED_R;
2817 					break;
2818 				}
2819 				if (qfver > 0)
2820 				{
2821 					p = strchr(&lbuf[1], ':');
2822 					if (p == NULL)
2823 						p = &lbuf[1];
2824 					else
2825 						++p; /* skip over ':' */
2826 				}
2827 				else
2828 					p = &lbuf[1];
2829 				check = QueueLimitRecipient;
2830 				while (check != NULL)
2831 				{
2832 					if (strcontainedin(true,
2833 							   check->queue_match,
2834 							   p) !=
2835 					    check->queue_negate)
2836 						break;
2837 					else
2838 						check = check->queue_next;
2839 				}
2840 				if (check != NULL)
2841 					i &= ~NEED_R;
2842 				break;
2843 
2844 			  case 'S':
2845 				check = QueueLimitSender;
2846 				while (check != NULL)
2847 				{
2848 					if (strcontainedin(true,
2849 							   check->queue_match,
2850 							   &lbuf[1]) !=
2851 					    check->queue_negate)
2852 						break;
2853 					else
2854 						check = check->queue_next;
2855 				}
2856 				if (check != NULL)
2857 					i &= ~NEED_S;
2858 				break;
2859 
2860 			  case 'K':
2861 #if _FFR_EXPDELAY
2862 				if (MaxQueueAge > 0)
2863 				{
2864 					time_t lasttry, delay;
2865 
2866 					lasttry = (time_t) atol(&lbuf[1]);
2867 					delay = MIN(lasttry - w->w_ctime,
2868 						    MaxQueueAge);
2869 					age = curtime() - lasttry;
2870 					if (age < delay)
2871 						w->w_tooyoung = true;
2872 					break;
2873 				}
2874 #endif /* _FFR_EXPDELAY */
2875 
2876 				age = curtime() - (time_t) atol(&lbuf[1]);
2877 				if (age >= 0 && MinQueueAge > 0 &&
2878 				    age < MinQueueAge)
2879 					w->w_tooyoung = true;
2880 				break;
2881 
2882 			  case 'N':
2883 				if (atol(&lbuf[1]) == 0)
2884 					w->w_tooyoung = false;
2885 				break;
2886 			}
2887 		}
2888 		if (cf != NULL)
2889 			(void) sm_io_close(cf, SM_TIME_DEFAULT);
2890 
2891 		if ((!doall && (shouldqueue(w->w_pri, w->w_ctime) ||
2892 		    w->w_tooyoung)) ||
2893 		    bitset(HAS_QUARANTINE, i) ||
2894 		    bitset(NEED_QUARANTINE, i) ||
2895 		    bitset(NEED_R|NEED_S, i))
2896 		{
2897 			/* don't even bother sorting this job in */
2898 			if (tTd(41, 49))
2899 				sm_dprintf("skipping %s (%x)\n", w->w_name, i);
2900 			sm_free(w->w_name); /* XXX */
2901 			if (w->w_host != NULL)
2902 				sm_free(w->w_host); /* XXX */
2903 			wn--;
2904 		}
2905 		else
2906 			++num_ent;
2907 	}
2908 	(void) closedir(f);
2909 	wn++;
2910 
2911 	i = wn - WorkListCount;
2912 	WorkListCount += SM_MIN(num_ent, WorkListSize);
2913 
2914 	if (more != NULL)
2915 		*more = WorkListCount < wn;
2916 
2917 	if (full != NULL)
2918 		*full = (wn >= MaxQueueRun && MaxQueueRun > 0) ||
2919 			(WorkList == NULL && wn > 0);
2920 
2921 	if (pnentries != NULL)
2922 		*pnentries = nentries;
2923 	return i;
2924 }
2925 /*
2926 **  SORTQ -- sort the work list
2927 **
2928 **	First the old WorkQ is cleared away. Then the WorkList is sorted
2929 **	for all items so that important (higher sorting value) items are not
2930 **	trunctated off. Then the most important items are moved from
2931 **	WorkList to WorkQ. The lower count of 'max' or MaxListCount items
2932 **	are moved.
2933 **
2934 **	Parameters:
2935 **		max -- maximum number of items to be placed in WorkQ
2936 **
2937 **	Returns:
2938 **		the number of items in WorkQ
2939 **
2940 **	Side Effects:
2941 **		WorkQ gets released and filled with new work. WorkList
2942 **		gets released. Work items get sorted in order.
2943 */
2944 
2945 static int
2946 sortq(max)
2947 	int max;
2948 {
2949 	register int i;			/* local counter */
2950 	register WORK *w;		/* tmp item pointer */
2951 	int wc = WorkListCount;		/* trim size for WorkQ */
2952 
2953 	if (WorkQ != NULL)
2954 	{
2955 		WORK *nw;
2956 
2957 		/* Clear out old WorkQ. */
2958 		for (w = WorkQ; w != NULL; w = nw)
2959 		{
2960 			nw = w->w_next;
2961 			sm_free(w->w_name); /* XXX */
2962 			if (w->w_host != NULL)
2963 				sm_free(w->w_host); /* XXX */
2964 			sm_free((char *) w); /* XXX */
2965 		}
2966 		WorkQ = NULL;
2967 	}
2968 
2969 	if (WorkList == NULL || wc <= 0)
2970 		return 0;
2971 
2972 	/*
2973 	**  The sort now takes place using all of the items in WorkList.
2974 	**  The list gets trimmed to the most important items after the sort.
2975 	**  If the trim were to happen before the sort then one or more
2976 	**  important items might get truncated off -- not what we want.
2977 	*/
2978 
2979 	if (QueueSortOrder == QSO_BYHOST)
2980 	{
2981 		/*
2982 		**  Sort the work directory for the first time,
2983 		**  based on host name, lock status, and priority.
2984 		*/
2985 
2986 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf1);
2987 
2988 		/*
2989 		**  If one message to host is locked, "lock" all messages
2990 		**  to that host.
2991 		*/
2992 
2993 		i = 0;
2994 		while (i < wc)
2995 		{
2996 			if (!WorkList[i].w_lock)
2997 			{
2998 				i++;
2999 				continue;
3000 			}
3001 			w = &WorkList[i];
3002 			while (++i < wc)
3003 			{
3004 				if (WorkList[i].w_host == NULL &&
3005 				    w->w_host == NULL)
3006 					WorkList[i].w_lock = true;
3007 				else if (WorkList[i].w_host != NULL &&
3008 					 w->w_host != NULL &&
3009 					 sm_strcasecmp(WorkList[i].w_host,
3010 						       w->w_host) == 0)
3011 					WorkList[i].w_lock = true;
3012 				else
3013 					break;
3014 			}
3015 		}
3016 
3017 		/*
3018 		**  Sort the work directory for the second time,
3019 		**  based on lock status, host name, and priority.
3020 		*/
3021 
3022 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf2);
3023 	}
3024 	else if (QueueSortOrder == QSO_BYTIME)
3025 	{
3026 		/*
3027 		**  Simple sort based on submission time only.
3028 		*/
3029 
3030 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf3);
3031 	}
3032 	else if (QueueSortOrder == QSO_BYFILENAME)
3033 	{
3034 		/*
3035 		**  Sort based on queue filename.
3036 		*/
3037 
3038 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf4);
3039 	}
3040 	else if (QueueSortOrder == QSO_RANDOM)
3041 	{
3042 		/*
3043 		**  Sort randomly.  To avoid problems with an instable sort,
3044 		**  use a random index into the queue file name to start
3045 		**  comparison.
3046 		*/
3047 
3048 		randi = get_rand_mod(MAXQFNAME);
3049 		if (randi < 2)
3050 			randi = 3;
3051 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf5);
3052 	}
3053 	else if (QueueSortOrder == QSO_BYMODTIME)
3054 	{
3055 		/*
3056 		**  Simple sort based on modification time of queue file.
3057 		**  This puts the oldest items first.
3058 		*/
3059 
3060 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf6);
3061 	}
3062 #if _FFR_RHS
3063 	else if (QueueSortOrder == QSO_BYSHUFFLE)
3064 	{
3065 		/*
3066 		**  Simple sort based on shuffled host name.
3067 		*/
3068 
3069 		init_shuffle_alphabet();
3070 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf7);
3071 	}
3072 #endif /* _FFR_RHS */
3073 	else if (QueueSortOrder == QSO_BYPRIORITY)
3074 	{
3075 		/*
3076 		**  Simple sort based on queue priority only.
3077 		*/
3078 
3079 		qsort((char *) WorkList, wc, sizeof(*WorkList), workcmpf0);
3080 	}
3081 	/* else don't sort at all */
3082 
3083 	/* Check if the per queue group item limit will be exceeded */
3084 	if (wc > max && max > 0)
3085 		wc = max;
3086 
3087 	/*
3088 	**  Convert the work list into canonical form.
3089 	**	Should be turning it into a list of envelopes here perhaps.
3090 	**  Only take the most important items up to the per queue group
3091 	**  maximum.
3092 	*/
3093 
3094 	for (i = wc; --i >= 0; )
3095 	{
3096 		w = (WORK *) xalloc(sizeof(*w));
3097 		w->w_qgrp = WorkList[i].w_qgrp;
3098 		w->w_qdir = WorkList[i].w_qdir;
3099 		w->w_name = WorkList[i].w_name;
3100 		w->w_host = WorkList[i].w_host;
3101 		w->w_lock = WorkList[i].w_lock;
3102 		w->w_tooyoung = WorkList[i].w_tooyoung;
3103 		w->w_pri = WorkList[i].w_pri;
3104 		w->w_ctime = WorkList[i].w_ctime;
3105 		w->w_mtime = WorkList[i].w_mtime;
3106 		w->w_next = WorkQ;
3107 		WorkQ = w;
3108 	}
3109 
3110 	/* free the rest of the list */
3111 	for (i = WorkListCount; --i >= wc; )
3112 	{
3113 		sm_free(WorkList[i].w_name);
3114 		if (WorkList[i].w_host != NULL)
3115 			sm_free(WorkList[i].w_host);
3116 	}
3117 
3118 	if (WorkList != NULL)
3119 		sm_free(WorkList); /* XXX */
3120 	WorkList = NULL;
3121 	WorkListSize = 0;
3122 	WorkListCount = 0;
3123 
3124 	if (tTd(40, 1))
3125 	{
3126 		for (w = WorkQ; w != NULL; w = w->w_next)
3127 		{
3128 			if (w->w_host != NULL)
3129 				sm_dprintf("%22s: pri=%ld %s\n",
3130 					w->w_name, w->w_pri, w->w_host);
3131 			else
3132 				sm_dprintf("%32s: pri=%ld\n",
3133 					w->w_name, w->w_pri);
3134 		}
3135 	}
3136 
3137 	return wc; /* return number of WorkQ items */
3138 }
3139 /*
3140 **  GROW_WLIST -- make the work list larger
3141 **
3142 **	Parameters:
3143 **		qgrp -- the index for the queue group.
3144 **		qdir -- the index for the queue directory.
3145 **
3146 **	Returns:
3147 **		none.
3148 **
3149 **	Side Effects:
3150 **		Adds another QUEUESEGSIZE entries to WorkList if possible.
3151 **		It can fail if there isn't enough memory, so WorkListSize
3152 **		should be checked again upon return.
3153 */
3154 
3155 static void
3156 grow_wlist(qgrp, qdir)
3157 	int qgrp;
3158 	int qdir;
3159 {
3160 	if (tTd(41, 1))
3161 		sm_dprintf("grow_wlist: WorkListSize=%d\n", WorkListSize);
3162 	if (WorkList == NULL)
3163 	{
3164 		WorkList = (WORK *) xalloc((sizeof(*WorkList)) *
3165 					   (QUEUESEGSIZE + 1));
3166 		WorkListSize = QUEUESEGSIZE;
3167 	}
3168 	else
3169 	{
3170 		int newsize = WorkListSize + QUEUESEGSIZE;
3171 		WORK *newlist = (WORK *) sm_realloc((char *) WorkList,
3172 					  (unsigned) sizeof(WORK) * (newsize + 1));
3173 
3174 		if (newlist != NULL)
3175 		{
3176 			WorkListSize = newsize;
3177 			WorkList = newlist;
3178 			if (LogLevel > 1)
3179 			{
3180 				sm_syslog(LOG_INFO, NOQID,
3181 					  "grew WorkList for %s to %d",
3182 					  qid_printqueue(qgrp, qdir),
3183 					  WorkListSize);
3184 			}
3185 		}
3186 		else if (LogLevel > 0)
3187 		{
3188 			sm_syslog(LOG_ALERT, NOQID,
3189 				  "FAILED to grow WorkList for %s to %d",
3190 				  qid_printqueue(qgrp, qdir), newsize);
3191 		}
3192 	}
3193 	if (tTd(41, 1))
3194 		sm_dprintf("grow_wlist: WorkListSize now %d\n", WorkListSize);
3195 }
3196 /*
3197 **  WORKCMPF0 -- simple priority-only compare function.
3198 **
3199 **	Parameters:
3200 **		a -- the first argument.
3201 **		b -- the second argument.
3202 **
3203 **	Returns:
3204 **		-1 if a < b
3205 **		 0 if a == b
3206 **		+1 if a > b
3207 **
3208 */
3209 
3210 static int
3211 workcmpf0(a, b)
3212 	register WORK *a;
3213 	register WORK *b;
3214 {
3215 	long pa = a->w_pri;
3216 	long pb = b->w_pri;
3217 
3218 	if (pa == pb)
3219 		return 0;
3220 	else if (pa > pb)
3221 		return 1;
3222 	else
3223 		return -1;
3224 }
3225 /*
3226 **  WORKCMPF1 -- first compare function for ordering work based on host name.
3227 **
3228 **	Sorts on host name, lock status, and priority in that order.
3229 **
3230 **	Parameters:
3231 **		a -- the first argument.
3232 **		b -- the second argument.
3233 **
3234 **	Returns:
3235 **		<0 if a < b
3236 **		 0 if a == b
3237 **		>0 if a > b
3238 **
3239 */
3240 
3241 static int
3242 workcmpf1(a, b)
3243 	register WORK *a;
3244 	register WORK *b;
3245 {
3246 	int i;
3247 
3248 	/* host name */
3249 	if (a->w_host != NULL && b->w_host == NULL)
3250 		return 1;
3251 	else if (a->w_host == NULL && b->w_host != NULL)
3252 		return -1;
3253 	if (a->w_host != NULL && b->w_host != NULL &&
3254 	    (i = sm_strcasecmp(a->w_host, b->w_host)) != 0)
3255 		return i;
3256 
3257 	/* lock status */
3258 	if (a->w_lock != b->w_lock)
3259 		return b->w_lock - a->w_lock;
3260 
3261 	/* job priority */
3262 	return workcmpf0(a, b);
3263 }
3264 /*
3265 **  WORKCMPF2 -- second compare function for ordering work based on host name.
3266 **
3267 **	Sorts on lock status, host name, and priority in that order.
3268 **
3269 **	Parameters:
3270 **		a -- the first argument.
3271 **		b -- the second argument.
3272 **
3273 **	Returns:
3274 **		<0 if a < b
3275 **		 0 if a == b
3276 **		>0 if a > b
3277 **
3278 */
3279 
3280 static int
3281 workcmpf2(a, b)
3282 	register WORK *a;
3283 	register WORK *b;
3284 {
3285 	int i;
3286 
3287 	/* lock status */
3288 	if (a->w_lock != b->w_lock)
3289 		return a->w_lock - b->w_lock;
3290 
3291 	/* host name */
3292 	if (a->w_host != NULL && b->w_host == NULL)
3293 		return 1;
3294 	else if (a->w_host == NULL && b->w_host != NULL)
3295 		return -1;
3296 	if (a->w_host != NULL && b->w_host != NULL &&
3297 	    (i = sm_strcasecmp(a->w_host, b->w_host)) != 0)
3298 		return i;
3299 
3300 	/* job priority */
3301 	return workcmpf0(a, b);
3302 }
3303 /*
3304 **  WORKCMPF3 -- simple submission-time-only compare function.
3305 **
3306 **	Parameters:
3307 **		a -- the first argument.
3308 **		b -- the second argument.
3309 **
3310 **	Returns:
3311 **		-1 if a < b
3312 **		 0 if a == b
3313 **		+1 if a > b
3314 **
3315 */
3316 
3317 static int
3318 workcmpf3(a, b)
3319 	register WORK *a;
3320 	register WORK *b;
3321 {
3322 	if (a->w_ctime > b->w_ctime)
3323 		return 1;
3324 	else if (a->w_ctime < b->w_ctime)
3325 		return -1;
3326 	else
3327 		return 0;
3328 }
3329 /*
3330 **  WORKCMPF4 -- compare based on file name
3331 **
3332 **	Parameters:
3333 **		a -- the first argument.
3334 **		b -- the second argument.
3335 **
3336 **	Returns:
3337 **		-1 if a < b
3338 **		 0 if a == b
3339 **		+1 if a > b
3340 **
3341 */
3342 
3343 static int
3344 workcmpf4(a, b)
3345 	register WORK *a;
3346 	register WORK *b;
3347 {
3348 	return strcmp(a->w_name, b->w_name);
3349 }
3350 /*
3351 **  WORKCMPF5 -- compare based on assigned random number
3352 **
3353 **	Parameters:
3354 **		a -- the first argument.
3355 **		b -- the second argument.
3356 **
3357 **	Returns:
3358 **		randomly 1/-1
3359 */
3360 
3361 /* ARGSUSED0 */
3362 static int
3363 workcmpf5(a, b)
3364 	register WORK *a;
3365 	register WORK *b;
3366 {
3367 	if (strlen(a->w_name) < randi || strlen(b->w_name) < randi)
3368 		return -1;
3369 	return a->w_name[randi] - b->w_name[randi];
3370 }
3371 /*
3372 **  WORKCMPF6 -- simple modification-time-only compare function.
3373 **
3374 **	Parameters:
3375 **		a -- the first argument.
3376 **		b -- the second argument.
3377 **
3378 **	Returns:
3379 **		-1 if a < b
3380 **		 0 if a == b
3381 **		+1 if a > b
3382 **
3383 */
3384 
3385 static int
3386 workcmpf6(a, b)
3387 	register WORK *a;
3388 	register WORK *b;
3389 {
3390 	if (a->w_mtime > b->w_mtime)
3391 		return 1;
3392 	else if (a->w_mtime < b->w_mtime)
3393 		return -1;
3394 	else
3395 		return 0;
3396 }
3397 #if _FFR_RHS
3398 /*
3399 **  WORKCMPF7 -- compare function for ordering work based on shuffled host name.
3400 **
3401 **	Sorts on lock status, host name, and priority in that order.
3402 **
3403 **	Parameters:
3404 **		a -- the first argument.
3405 **		b -- the second argument.
3406 **
3407 **	Returns:
3408 **		<0 if a < b
3409 **		 0 if a == b
3410 **		>0 if a > b
3411 **
3412 */
3413 
3414 static int
3415 workcmpf7(a, b)
3416 	register WORK *a;
3417 	register WORK *b;
3418 {
3419 	int i;
3420 
3421 	/* lock status */
3422 	if (a->w_lock != b->w_lock)
3423 		return a->w_lock - b->w_lock;
3424 
3425 	/* host name */
3426 	if (a->w_host != NULL && b->w_host == NULL)
3427 		return 1;
3428 	else if (a->w_host == NULL && b->w_host != NULL)
3429 		return -1;
3430 	if (a->w_host != NULL && b->w_host != NULL &&
3431 	    (i = sm_strshufflecmp(a->w_host, b->w_host)) != 0)
3432 		return i;
3433 
3434 	/* job priority */
3435 	return workcmpf0(a, b);
3436 }
3437 #endif /* _FFR_RHS */
3438 /*
3439 **  STRREV -- reverse string
3440 **
3441 **	Returns a pointer to a new string that is the reverse of
3442 **	the string pointed to by fwd.  The space for the new
3443 **	string is obtained using xalloc().
3444 **
3445 **	Parameters:
3446 **		fwd -- the string to reverse.
3447 **
3448 **	Returns:
3449 **		the reversed string.
3450 */
3451 
3452 static char *
3453 strrev(fwd)
3454 	char *fwd;
3455 {
3456 	char *rev = NULL;
3457 	int len, cnt;
3458 
3459 	len = strlen(fwd);
3460 	rev = xalloc(len + 1);
3461 	for (cnt = 0; cnt < len; ++cnt)
3462 		rev[cnt] = fwd[len - cnt - 1];
3463 	rev[len] = '\0';
3464 	return rev;
3465 }
3466 
3467 #if _FFR_RHS
3468 
3469 # define NASCII	128
3470 # define NCHAR	256
3471 
3472 static unsigned char ShuffledAlphabet[NCHAR];
3473 
3474 void
3475 init_shuffle_alphabet()
3476 {
3477 	static bool init = false;
3478 	int i;
3479 
3480 	if (init)
3481 		return;
3482 
3483 	/* fill the ShuffledAlphabet */
3484 	for (i = 0; i < NASCII; i++)
3485 		ShuffledAlphabet[i] = i;
3486 
3487 	/* mix it */
3488 	for (i = 1; i < NASCII; i++)
3489 	{
3490 		register int j = get_random() % NASCII;
3491 		register int tmp;
3492 
3493 		tmp = ShuffledAlphabet[j];
3494 		ShuffledAlphabet[j] = ShuffledAlphabet[i];
3495 		ShuffledAlphabet[i] = tmp;
3496 	}
3497 
3498 	/* make it case insensitive */
3499 	for (i = 'A'; i <= 'Z'; i++)
3500 		ShuffledAlphabet[i] = ShuffledAlphabet[i + 'a' - 'A'];
3501 
3502 	/* fill the upper part */
3503 	for (i = 0; i < NASCII; i++)
3504 		ShuffledAlphabet[i + NASCII] = ShuffledAlphabet[i];
3505 	init = true;
3506 }
3507 
3508 static int
3509 sm_strshufflecmp(a, b)
3510 	char *a;
3511 	char *b;
3512 {
3513 	const unsigned char *us1 = (const unsigned char *) a;
3514 	const unsigned char *us2 = (const unsigned char *) b;
3515 
3516 	while (ShuffledAlphabet[*us1] == ShuffledAlphabet[*us2++])
3517 	{
3518 		if (*us1++ == '\0')
3519 			return 0;
3520 	}
3521 	return (ShuffledAlphabet[*us1] - ShuffledAlphabet[*--us2]);
3522 }
3523 #endif /* _FFR_RHS */
3524 
3525 /*
3526 **  DOWORK -- do a work request.
3527 **
3528 **	Parameters:
3529 **		qgrp -- the index of the queue group for the job.
3530 **		qdir -- the index of the queue directory for the job.
3531 **		id -- the ID of the job to run.
3532 **		forkflag -- if set, run this in background.
3533 **		requeueflag -- if set, reinstantiate the queue quickly.
3534 **			This is used when expanding aliases in the queue.
3535 **			If forkflag is also set, it doesn't wait for the
3536 **			child.
3537 **		e - the envelope in which to run it.
3538 **
3539 **	Returns:
3540 **		process id of process that is running the queue job.
3541 **
3542 **	Side Effects:
3543 **		The work request is satisfied if possible.
3544 */
3545 
3546 pid_t
3547 dowork(qgrp, qdir, id, forkflag, requeueflag, e)
3548 	int qgrp;
3549 	int qdir;
3550 	char *id;
3551 	bool forkflag;
3552 	bool requeueflag;
3553 	register ENVELOPE *e;
3554 {
3555 	register pid_t pid;
3556 	SM_RPOOL_T *rpool;
3557 
3558 	if (tTd(40, 1))
3559 		sm_dprintf("dowork(%s/%s)\n", qid_printqueue(qgrp, qdir), id);
3560 
3561 	/*
3562 	**  Fork for work.
3563 	*/
3564 
3565 	if (forkflag)
3566 	{
3567 		/*
3568 		**  Since the delivery may happen in a child and the
3569 		**  parent does not wait, the parent may close the
3570 		**  maps thereby removing any shared memory used by
3571 		**  the map.  Therefore, close the maps now so the
3572 		**  child will dynamically open them if necessary.
3573 		*/
3574 
3575 		closemaps(false);
3576 
3577 		pid = fork();
3578 		if (pid < 0)
3579 		{
3580 			syserr("dowork: cannot fork");
3581 			return 0;
3582 		}
3583 		else if (pid > 0)
3584 		{
3585 			/* parent -- clean out connection cache */
3586 			mci_flush(false, NULL);
3587 		}
3588 		else
3589 		{
3590 			/*
3591 			**  Initialize exception stack and default exception
3592 			**  handler for child process.
3593 			*/
3594 
3595 			/* Reset global flags */
3596 			RestartRequest = NULL;
3597 			RestartWorkGroup = false;
3598 			ShutdownRequest = NULL;
3599 			PendingSignal = 0;
3600 			CurrentPid = getpid();
3601 			sm_exc_newthread(fatal_error);
3602 
3603 			/*
3604 			**  See note above about SMTP processes and SIGCHLD.
3605 			*/
3606 
3607 			if (OpMode == MD_SMTP ||
3608 			    OpMode == MD_DAEMON ||
3609 			    MaxQueueChildren > 0)
3610 			{
3611 				proc_list_clear();
3612 				sm_releasesignal(SIGCHLD);
3613 				(void) sm_signal(SIGCHLD, SIG_DFL);
3614 			}
3615 
3616 			/* child -- error messages to the transcript */
3617 			QuickAbort = OnlyOneError = false;
3618 		}
3619 	}
3620 	else
3621 	{
3622 		pid = 0;
3623 	}
3624 
3625 	if (pid == 0)
3626 	{
3627 		/*
3628 		**  CHILD
3629 		**	Lock the control file to avoid duplicate deliveries.
3630 		**		Then run the file as though we had just read it.
3631 		**	We save an idea of the temporary name so we
3632 		**		can recover on interrupt.
3633 		*/
3634 
3635 		if (forkflag)
3636 		{
3637 			/* Reset global flags */
3638 			RestartRequest = NULL;
3639 			RestartWorkGroup = false;
3640 			ShutdownRequest = NULL;
3641 			PendingSignal = 0;
3642 		}
3643 
3644 		/* set basic modes, etc. */
3645 		sm_clear_events();
3646 		clearstats();
3647 		rpool = sm_rpool_new_x(NULL);
3648 		clearenvelope(e, false, rpool);
3649 		e->e_flags |= EF_QUEUERUN|EF_GLOBALERRS;
3650 		set_delivery_mode(SM_DELIVER, e);
3651 		e->e_errormode = EM_MAIL;
3652 		e->e_id = id;
3653 		e->e_qgrp = qgrp;
3654 		e->e_qdir = qdir;
3655 		GrabTo = UseErrorsTo = false;
3656 		ExitStat = EX_OK;
3657 		if (forkflag)
3658 		{
3659 			disconnect(1, e);
3660 			set_op_mode(MD_QUEUERUN);
3661 		}
3662 		sm_setproctitle(true, e, "%s from queue", qid_printname(e));
3663 		if (LogLevel > 76)
3664 			sm_syslog(LOG_DEBUG, e->e_id, "dowork, pid=%d",
3665 				  (int) CurrentPid);
3666 
3667 		/* don't use the headers from sendmail.cf... */
3668 		e->e_header = NULL;
3669 
3670 		/* read the queue control file -- return if locked */
3671 		if (!readqf(e, false))
3672 		{
3673 			if (tTd(40, 4) && e->e_id != NULL)
3674 				sm_dprintf("readqf(%s) failed\n",
3675 					qid_printname(e));
3676 			e->e_id = NULL;
3677 			if (forkflag)
3678 				finis(false, true, EX_OK);
3679 			else
3680 			{
3681 				/* adding this frees 8 bytes */
3682 				clearenvelope(e, false, rpool);
3683 
3684 				/* adding this frees 12 bytes */
3685 				sm_rpool_free(rpool);
3686 				e->e_rpool = NULL;
3687 				return 0;
3688 			}
3689 		}
3690 
3691 		e->e_flags |= EF_INQUEUE;
3692 		eatheader(e, requeueflag, true);
3693 
3694 		if (requeueflag)
3695 			queueup(e, false, false);
3696 
3697 		/* do the delivery */
3698 		sendall(e, SM_DELIVER);
3699 
3700 		/* finish up and exit */
3701 		if (forkflag)
3702 			finis(true, true, ExitStat);
3703 		else
3704 		{
3705 			(void) dropenvelope(e, true, false);
3706 			sm_rpool_free(rpool);
3707 			e->e_rpool = NULL;
3708 		}
3709 	}
3710 	e->e_id = NULL;
3711 	return pid;
3712 }
3713 
3714 /*
3715 **  DOWORKLIST -- process a list of envelopes as work requests
3716 **
3717 **	Similar to dowork(), except that after forking, it processes an
3718 **	envelope and its siblings, treating each envelope as a work request.
3719 **
3720 **	Parameters:
3721 **		el -- envelope to be processed including its siblings.
3722 **		forkflag -- if set, run this in background.
3723 **		requeueflag -- if set, reinstantiate the queue quickly.
3724 **			This is used when expanding aliases in the queue.
3725 **			If forkflag is also set, it doesn't wait for the
3726 **			child.
3727 **
3728 **	Returns:
3729 **		process id of process that is running the queue job.
3730 **
3731 **	Side Effects:
3732 **		The work request is satisfied if possible.
3733 */
3734 
3735 pid_t
3736 doworklist(el, forkflag, requeueflag)
3737 	ENVELOPE *el;
3738 	bool forkflag;
3739 	bool requeueflag;
3740 {
3741 	register pid_t pid;
3742 	ENVELOPE *ei;
3743 
3744 	if (tTd(40, 1))
3745 		sm_dprintf("doworklist()\n");
3746 
3747 	/*
3748 	**  Fork for work.
3749 	*/
3750 
3751 	if (forkflag)
3752 	{
3753 		/*
3754 		**  Since the delivery may happen in a child and the
3755 		**  parent does not wait, the parent may close the
3756 		**  maps thereby removing any shared memory used by
3757 		**  the map.  Therefore, close the maps now so the
3758 		**  child will dynamically open them if necessary.
3759 		*/
3760 
3761 		closemaps(false);
3762 
3763 		pid = fork();
3764 		if (pid < 0)
3765 		{
3766 			syserr("doworklist: cannot fork");
3767 			return 0;
3768 		}
3769 		else if (pid > 0)
3770 		{
3771 			/* parent -- clean out connection cache */
3772 			mci_flush(false, NULL);
3773 		}
3774 		else
3775 		{
3776 			/*
3777 			**  Initialize exception stack and default exception
3778 			**  handler for child process.
3779 			*/
3780 
3781 			/* Reset global flags */
3782 			RestartRequest = NULL;
3783 			RestartWorkGroup = false;
3784 			ShutdownRequest = NULL;
3785 			PendingSignal = 0;
3786 			CurrentPid = getpid();
3787 			sm_exc_newthread(fatal_error);
3788 
3789 			/*
3790 			**  See note above about SMTP processes and SIGCHLD.
3791 			*/
3792 
3793 			if (OpMode == MD_SMTP ||
3794 			    OpMode == MD_DAEMON ||
3795 			    MaxQueueChildren > 0)
3796 			{
3797 				proc_list_clear();
3798 				sm_releasesignal(SIGCHLD);
3799 				(void) sm_signal(SIGCHLD, SIG_DFL);
3800 			}
3801 
3802 			/* child -- error messages to the transcript */
3803 			QuickAbort = OnlyOneError = false;
3804 		}
3805 	}
3806 	else
3807 	{
3808 		pid = 0;
3809 	}
3810 
3811 	if (pid != 0)
3812 		return pid;
3813 
3814 	/*
3815 	**  IN CHILD
3816 	**	Lock the control file to avoid duplicate deliveries.
3817 	**		Then run the file as though we had just read it.
3818 	**	We save an idea of the temporary name so we
3819 	**		can recover on interrupt.
3820 	*/
3821 
3822 	if (forkflag)
3823 	{
3824 		/* Reset global flags */
3825 		RestartRequest = NULL;
3826 		RestartWorkGroup = false;
3827 		ShutdownRequest = NULL;
3828 		PendingSignal = 0;
3829 	}
3830 
3831 	/* set basic modes, etc. */
3832 	sm_clear_events();
3833 	clearstats();
3834 	GrabTo = UseErrorsTo = false;
3835 	ExitStat = EX_OK;
3836 	if (forkflag)
3837 	{
3838 		disconnect(1, el);
3839 		set_op_mode(MD_QUEUERUN);
3840 	}
3841 	if (LogLevel > 76)
3842 		sm_syslog(LOG_DEBUG, el->e_id, "doworklist, pid=%d",
3843 			  (int) CurrentPid);
3844 
3845 	for (ei = el; ei != NULL; ei = ei->e_sibling)
3846 	{
3847 		ENVELOPE e;
3848 		SM_RPOOL_T *rpool;
3849 
3850 		if (WILL_BE_QUEUED(ei->e_sendmode))
3851 			continue;
3852 		else if (QueueMode != QM_QUARANTINE &&
3853 			 ei->e_quarmsg != NULL)
3854 			continue;
3855 
3856 		rpool = sm_rpool_new_x(NULL);
3857 		clearenvelope(&e, true, rpool);
3858 		e.e_flags |= EF_QUEUERUN|EF_GLOBALERRS;
3859 		set_delivery_mode(SM_DELIVER, &e);
3860 		e.e_errormode = EM_MAIL;
3861 		e.e_id = ei->e_id;
3862 		e.e_qgrp = ei->e_qgrp;
3863 		e.e_qdir = ei->e_qdir;
3864 		openxscript(&e);
3865 		sm_setproctitle(true, &e, "%s from queue", qid_printname(&e));
3866 
3867 		/* don't use the headers from sendmail.cf... */
3868 		e.e_header = NULL;
3869 		CurEnv = &e;
3870 
3871 		/* read the queue control file -- return if locked */
3872 		if (readqf(&e, false))
3873 		{
3874 			e.e_flags |= EF_INQUEUE;
3875 			eatheader(&e, requeueflag, true);
3876 
3877 			if (requeueflag)
3878 				queueup(&e, false, false);
3879 
3880 			/* do the delivery */
3881 			sendall(&e, SM_DELIVER);
3882 			(void) dropenvelope(&e, true, false);
3883 		}
3884 		else
3885 		{
3886 			if (tTd(40, 4) && e.e_id != NULL)
3887 				sm_dprintf("readqf(%s) failed\n",
3888 					qid_printname(&e));
3889 		}
3890 		sm_rpool_free(rpool);
3891 		ei->e_id = NULL;
3892 	}
3893 
3894 	/* restore CurEnv */
3895 	CurEnv = el;
3896 
3897 	/* finish up and exit */
3898 	if (forkflag)
3899 		finis(true, true, ExitStat);
3900 	return 0;
3901 }
3902 /*
3903 **  READQF -- read queue file and set up environment.
3904 **
3905 **	Parameters:
3906 **		e -- the envelope of the job to run.
3907 **		openonly -- only open the qf (returned as e_lockfp)
3908 **
3909 **	Returns:
3910 **		true if it successfully read the queue file.
3911 **		false otherwise.
3912 **
3913 **	Side Effects:
3914 **		The queue file is returned locked.
3915 */
3916 
3917 static bool
3918 readqf(e, openonly)
3919 	register ENVELOPE *e;
3920 	bool openonly;
3921 {
3922 	register SM_FILE_T *qfp;
3923 	ADDRESS *ctladdr;
3924 	struct stat st, stf;
3925 	char *bp;
3926 	int qfver = 0;
3927 	long hdrsize = 0;
3928 	register char *p;
3929 	char *frcpt = NULL;
3930 	char *orcpt = NULL;
3931 	bool nomore = false;
3932 	bool bogus = false;
3933 	MODE_T qsafe;
3934 	char *err;
3935 	char qf[MAXPATHLEN];
3936 	char buf[MAXLINE];
3937 	int bufsize;
3938 
3939 	/*
3940 	**  Read and process the file.
3941 	*/
3942 
3943 	SM_REQUIRE(e != NULL);
3944 	bp = NULL;
3945 	(void) sm_strlcpy(qf, queuename(e, ANYQFL_LETTER), sizeof(qf));
3946 	qfp = sm_io_open(SmFtStdio, SM_TIME_DEFAULT, qf, SM_IO_RDWR_B, NULL);
3947 	if (qfp == NULL)
3948 	{
3949 		int save_errno = errno;
3950 
3951 		if (tTd(40, 8))
3952 			sm_dprintf("readqf(%s): sm_io_open failure (%s)\n",
3953 				qf, sm_errstring(errno));
3954 		errno = save_errno;
3955 		if (errno != ENOENT
3956 		    )
3957 			syserr("readqf: no control file %s", qf);
3958 		RELEASE_QUEUE;
3959 		return false;
3960 	}
3961 
3962 	if (!lockfile(sm_io_getinfo(qfp, SM_IO_WHAT_FD, NULL), qf, NULL,
3963 		      LOCK_EX|LOCK_NB))
3964 	{
3965 		/* being processed by another queuer */
3966 		if (Verbose)
3967 			(void) sm_io_fprintf(smioout, SM_TIME_DEFAULT,
3968 					     "%s: locked\n", e->e_id);
3969 		if (tTd(40, 8))
3970 			sm_dprintf("%s: locked\n", e->e_id);
3971 		if (LogLevel > 19)
3972 			sm_syslog(LOG_DEBUG, e->e_id, "locked");
3973 		(void) sm_io_close(qfp, SM_TIME_DEFAULT);
3974 		RELEASE_QUEUE;
3975 		return false;
3976 	}
3977 
3978 	RELEASE_QUEUE;
3979 
3980 	/*
3981 	**  Prevent locking race condition.
3982 	**
3983 	**  Process A: readqf(): qfp = fopen(qffile)
3984 	**  Process B: queueup(): rename(tf, qf)
3985 	**  Process B: unlocks(tf)
3986 	**  Process A: lockfile(qf);
3987 	**
3988 	**  Process A (us) has the old qf file (before the rename deleted
3989 	**  the directory entry) and will be delivering based on old data.
3990 	**  This can lead to multiple deliveries of the same recipients.
3991 	**
3992 	**  Catch this by checking if the underlying qf file has changed
3993 	**  *after* acquiring our lock and if so, act as though the file
3994 	**  was still locked (i.e., just return like the lockfile() case
3995 	**  above.
3996 	*/
3997 
3998 	if (stat(qf, &stf) < 0 ||
3999 	    fstat(sm_io_getinfo(qfp, SM_IO_WHAT_FD, NULL), &st) < 0)
4000 	{
4001 		/* must have been being processed by someone else */
4002 		if (tTd(40, 8))
4003 			sm_dprintf("readqf(%s): [f]stat failure (%s)\n",
4004 				qf, sm_errstring(errno));
4005 		(void) sm_io_close(qfp, SM_TIME_DEFAULT);
4006 		return false;
4007 	}
4008 
4009 	if (st.st_nlink != stf.st_nlink ||
4010 	    st.st_dev != stf.st_dev ||
4011 	    ST_INODE(st) != ST_INODE(stf) ||
4012 #if HAS_ST_GEN && 0		/* AFS returns garbage in st_gen */
4013 	    st.st_gen != stf.st_gen ||
4014 #endif /* HAS_ST_GEN && 0 */
4015 	    st.st_uid != stf.