xref: /illumos-gate/usr/src/cmd/sendmail/src/mime.c (revision 2a8bcb4e)
1 /*
2  * Copyright (c) 1998-2003, 2006 Sendmail, Inc. and its suppliers.
3  *	All rights reserved.
4  * Copyright (c) 1994, 1996-1997 Eric P. Allman.  All rights reserved.
5  * Copyright (c) 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * By using this file, you agree to the terms and conditions set
9  * forth in the LICENSE file which can be found at the top level of
10  * the sendmail distribution.
11  *
12  */
13 
14 #include <sendmail.h>
15 #include <string.h>
16 
17 SM_RCSID("@(#)$Id: mime.c,v 8.147 2007/09/26 23:29:11 ca Exp $")
18 
19 /*
20 **  MIME support.
21 **
22 **	I am indebted to John Beck of Hewlett-Packard, who contributed
23 **	his code to me for inclusion.  As it turns out, I did not use
24 **	his code since he used a "minimum change" approach that used
25 **	several temp files, and I wanted a "minimum impact" approach
26 **	that would avoid copying.  However, looking over his code
27 **	helped me cement my understanding of the problem.
28 **
29 **	I also looked at, but did not directly use, Nathaniel
30 **	Borenstein's "code.c" module.  Again, it functioned as
31 **	a file-to-file translator, which did not fit within my
32 **	design bounds, but it was a useful base for understanding
33 **	the problem.
34 */
35 
36 /* use "old" mime 7 to 8 algorithm by default */
37 #ifndef MIME7TO8_OLD
38 # define MIME7TO8_OLD	1
39 #endif /* ! MIME7TO8_OLD */
40 
41 #if MIME8TO7
42 static int	isboundary __P((char *, char **));
43 static int	mimeboundary __P((char *, char **));
44 static int	mime_getchar __P((SM_FILE_T *, char **, int *));
45 static int	mime_getchar_crlf __P((SM_FILE_T *, char **, int *));
46 
47 /* character set for hex and base64 encoding */
48 static char	Base16Code[] =	"0123456789ABCDEF";
49 static char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
50 
51 /* types of MIME boundaries */
52 # define MBT_SYNTAX	0	/* syntax error */
53 # define MBT_NOTSEP	1	/* not a boundary */
54 # define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
55 # define MBT_FINAL	3	/* final boundary (trailing -- included) */
56 
57 static char	*MimeBoundaryNames[] =
58 {
59 	"SYNTAX",	"NOTSEP",	"INTERMED",	"FINAL"
60 };
61 
62 static bool	MapNLtoCRLF;
63 
64 /*
65 **  MIME8TO7 -- output 8 bit body in 7 bit format
66 **
67 **	The header has already been output -- this has to do the
68 **	8 to 7 bit conversion.  It would be easy if we didn't have
69 **	to deal with nested formats (multipart/xxx and message/rfc822).
70 **
71 **	We won't be called if we don't have to do a conversion, and
72 **	appropriate MIME-Version: and Content-Type: fields have been
73 **	output.  Any Content-Transfer-Encoding: field has not been
74 **	output, and we can add it here.
75 **
76 **	Parameters:
77 **		mci -- mailer connection information.
78 **		header -- the header for this body part.
79 **		e -- envelope.
80 **		boundaries -- the currently pending message boundaries.
81 **			NULL if we are processing the outer portion.
82 **		flags -- to tweak processing.
83 **		level -- recursion level.
84 **
85 **	Returns:
86 **		An indicator of what terminated the message part:
87 **		  MBT_FINAL -- the final boundary
88 **		  MBT_INTERMED -- an intermediate boundary
89 **		  MBT_NOTSEP -- an end of file
90 **		  SM_IO_EOF -- I/O error occurred
91 */
92 
93 struct args
94 {
95 	char	*a_field;	/* name of field */
96 	char	*a_value;	/* value of that field */
97 };
98 
99 int
mime8to7(mci,header,e,boundaries,flags,level)100 mime8to7(mci, header, e, boundaries, flags, level)
101 	register MCI *mci;
102 	HDR *header;
103 	register ENVELOPE *e;
104 	char **boundaries;
105 	int flags;
106 	int level;
107 {
108 	register char *p;
109 	int linelen;
110 	int bt;
111 	off_t offset;
112 	size_t sectionsize, sectionhighbits;
113 	int i;
114 	char *type;
115 	char *subtype;
116 	char *cte;
117 	char **pvp;
118 	int argc = 0;
119 	char *bp;
120 	bool use_qp = false;
121 	struct args argv[MAXMIMEARGS];
122 	char bbuf[128];
123 	char buf[MAXLINE];
124 	char pvpbuf[MAXLINE];
125 	extern unsigned char MimeTokenTab[256];
126 
127 	if (level > MAXMIMENESTING)
128 	{
129 		if (!bitset(EF_TOODEEP, e->e_flags))
130 		{
131 			if (tTd(43, 4))
132 				sm_dprintf("mime8to7: too deep, level=%d\n",
133 					   level);
134 			usrerr("mime8to7: recursion level %d exceeded",
135 				level);
136 			e->e_flags |= EF_DONT_MIME|EF_TOODEEP;
137 		}
138 	}
139 	if (tTd(43, 1))
140 	{
141 		sm_dprintf("mime8to7: flags = %x, boundaries =", flags);
142 		if (boundaries[0] == NULL)
143 			sm_dprintf(" <none>");
144 		else
145 		{
146 			for (i = 0; boundaries[i] != NULL; i++)
147 				sm_dprintf(" %s", boundaries[i]);
148 		}
149 		sm_dprintf("\n");
150 	}
151 	MapNLtoCRLF = true;
152 	p = hvalue("Content-Transfer-Encoding", header);
153 	if (p == NULL ||
154 	    (pvp = prescan(p, '\0', pvpbuf, sizeof(pvpbuf), NULL,
155 			   MimeTokenTab, false)) == NULL ||
156 	    pvp[0] == NULL)
157 	{
158 		cte = NULL;
159 	}
160 	else
161 	{
162 		cataddr(pvp, NULL, buf, sizeof(buf), '\0', false);
163 		cte = sm_rpool_strdup_x(e->e_rpool, buf);
164 	}
165 
166 	type = subtype = NULL;
167 	p = hvalue("Content-Type", header);
168 	if (p == NULL)
169 	{
170 		if (bitset(M87F_DIGEST, flags))
171 			p = "message/rfc822";
172 		else
173 			p = "text/plain";
174 	}
175 	if (p != NULL &&
176 	    (pvp = prescan(p, '\0', pvpbuf, sizeof(pvpbuf), NULL,
177 			   MimeTokenTab, false)) != NULL &&
178 	    pvp[0] != NULL)
179 	{
180 		if (tTd(43, 40))
181 		{
182 			for (i = 0; pvp[i] != NULL; i++)
183 				sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]);
184 		}
185 		type = *pvp++;
186 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
187 		    *++pvp != NULL)
188 		{
189 			subtype = *pvp++;
190 		}
191 
192 		/* break out parameters */
193 		while (*pvp != NULL && argc < MAXMIMEARGS)
194 		{
195 			/* skip to semicolon separator */
196 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
197 				pvp++;
198 			if (*pvp++ == NULL || *pvp == NULL)
199 				break;
200 
201 			/* complain about empty values */
202 			if (strcmp(*pvp, ";") == 0)
203 			{
204 				usrerr("mime8to7: Empty parameter in Content-Type header");
205 
206 				/* avoid bounce loops */
207 				e->e_flags |= EF_DONT_MIME;
208 				continue;
209 			}
210 
211 			/* extract field name */
212 			argv[argc].a_field = *pvp++;
213 
214 			/* see if there is a value */
215 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
216 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
217 			{
218 				argv[argc].a_value = *pvp;
219 				argc++;
220 			}
221 		}
222 	}
223 
224 	/* check for disaster cases */
225 	if (type == NULL)
226 		type = "-none-";
227 	if (subtype == NULL)
228 		subtype = "-none-";
229 
230 	/* don't propagate some flags more than one level into the message */
231 	flags &= ~M87F_DIGEST;
232 
233 	/*
234 	**  Check for cases that can not be encoded.
235 	**
236 	**	For example, you can't encode certain kinds of types
237 	**	or already-encoded messages.  If we find this case,
238 	**	just copy it through.
239 	*/
240 
241 	(void) sm_snprintf(buf, sizeof(buf), "%.100s/%.100s", type, subtype);
242 	if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e')))
243 		flags |= M87F_NO8BIT;
244 
245 # ifdef USE_B_CLASS
246 	if (wordinclass(buf, 'b') || wordinclass(type, 'b'))
247 		MapNLtoCRLF = false;
248 # endif /* USE_B_CLASS */
249 	if (wordinclass(buf, 'q') || wordinclass(type, 'q'))
250 		use_qp = true;
251 
252 	/*
253 	**  Multipart requires special processing.
254 	**
255 	**	Do a recursive descent into the message.
256 	*/
257 
258 	if (sm_strcasecmp(type, "multipart") == 0 &&
259 	    (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags)) &&
260 	    !bitset(EF_TOODEEP, e->e_flags)
261 	   )
262 	{
263 
264 		if (sm_strcasecmp(subtype, "digest") == 0)
265 			flags |= M87F_DIGEST;
266 
267 		for (i = 0; i < argc; i++)
268 		{
269 			if (sm_strcasecmp(argv[i].a_field, "boundary") == 0)
270 				break;
271 		}
272 		if (i >= argc || argv[i].a_value == NULL)
273 		{
274 			usrerr("mime8to7: Content-Type: \"%s\": %s boundary",
275 				i >= argc ? "missing" : "bogus", p);
276 			p = "---";
277 
278 			/* avoid bounce loops */
279 			e->e_flags |= EF_DONT_MIME;
280 		}
281 		else
282 		{
283 			p = argv[i].a_value;
284 			stripquotes(p);
285 		}
286 		if (sm_strlcpy(bbuf, p, sizeof(bbuf)) >= sizeof(bbuf))
287 		{
288 			usrerr("mime8to7: multipart boundary \"%s\" too long",
289 				p);
290 
291 			/* avoid bounce loops */
292 			e->e_flags |= EF_DONT_MIME;
293 		}
294 
295 		if (tTd(43, 1))
296 			sm_dprintf("mime8to7: multipart boundary \"%s\"\n",
297 				bbuf);
298 		for (i = 0; i < MAXMIMENESTING; i++)
299 		{
300 			if (boundaries[i] == NULL)
301 				break;
302 		}
303 		if (i >= MAXMIMENESTING)
304 		{
305 			if (tTd(43, 4))
306 				sm_dprintf("mime8to7: too deep, i=%d\n", i);
307 			if (!bitset(EF_TOODEEP, e->e_flags))
308 				usrerr("mime8to7: multipart nesting boundary too deep");
309 
310 			/* avoid bounce loops */
311 			e->e_flags |= EF_DONT_MIME|EF_TOODEEP;
312 		}
313 		else
314 		{
315 			boundaries[i] = bbuf;
316 			boundaries[i + 1] = NULL;
317 		}
318 		mci->mci_flags |= MCIF_INMIME;
319 
320 		/* skip the early "comment" prologue */
321 		if (!putline("", mci))
322 			goto writeerr;
323 		mci->mci_flags &= ~MCIF_INHEADER;
324 		bt = MBT_FINAL;
325 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof(buf))
326 			!= NULL)
327 		{
328 			bt = mimeboundary(buf, boundaries);
329 			if (bt != MBT_NOTSEP)
330 				break;
331 			if (!putxline(buf, strlen(buf), mci,
332 					PXLF_MAPFROM|PXLF_STRIP8BIT))
333 				goto writeerr;
334 			if (tTd(43, 99))
335 				sm_dprintf("  ...%s", buf);
336 		}
337 		if (sm_io_eof(e->e_dfp))
338 			bt = MBT_FINAL;
339 		while (bt != MBT_FINAL)
340 		{
341 			auto HDR *hdr = NULL;
342 
343 			(void) sm_strlcpyn(buf, sizeof(buf), 2, "--", bbuf);
344 			if (!putline(buf, mci))
345 				goto writeerr;
346 			if (tTd(43, 35))
347 				sm_dprintf("  ...%s\n", buf);
348 			collect(e->e_dfp, false, &hdr, e, false);
349 			if (tTd(43, 101))
350 				putline("+++after collect", mci);
351 			if (!putheader(mci, hdr, e, flags))
352 				goto writeerr;
353 			if (tTd(43, 101))
354 				putline("+++after putheader", mci);
355 			bt = mime8to7(mci, hdr, e, boundaries, flags,
356 				      level + 1);
357 			if (bt == SM_IO_EOF)
358 				goto writeerr;
359 		}
360 		(void) sm_strlcpyn(buf, sizeof(buf), 3, "--", bbuf, "--");
361 		if (!putline(buf, mci))
362 			goto writeerr;
363 		if (tTd(43, 35))
364 			sm_dprintf("  ...%s\n", buf);
365 		boundaries[i] = NULL;
366 		mci->mci_flags &= ~MCIF_INMIME;
367 
368 		/* skip the late "comment" epilogue */
369 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof(buf))
370 			!= NULL)
371 		{
372 			bt = mimeboundary(buf, boundaries);
373 			if (bt != MBT_NOTSEP)
374 				break;
375 			if (!putxline(buf, strlen(buf), mci,
376 					PXLF_MAPFROM|PXLF_STRIP8BIT))
377 				goto writeerr;
378 			if (tTd(43, 99))
379 				sm_dprintf("  ...%s", buf);
380 		}
381 		if (sm_io_eof(e->e_dfp))
382 			bt = MBT_FINAL;
383 		if (tTd(43, 3))
384 			sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n",
385 				MimeBoundaryNames[bt]);
386 		return bt;
387 	}
388 
389 	/*
390 	**  Message/xxx types -- recurse exactly once.
391 	**
392 	**	Class 's' is predefined to have "rfc822" only.
393 	*/
394 
395 	if (sm_strcasecmp(type, "message") == 0)
396 	{
397 		if (!wordinclass(subtype, 's') ||
398 		    bitset(EF_TOODEEP, e->e_flags))
399 		{
400 			flags |= M87F_NO8BIT;
401 		}
402 		else
403 		{
404 			auto HDR *hdr = NULL;
405 
406 			if (!putline("", mci))
407 				goto writeerr;
408 
409 			mci->mci_flags |= MCIF_INMIME;
410 			collect(e->e_dfp, false, &hdr, e, false);
411 			if (tTd(43, 101))
412 				putline("+++after collect", mci);
413 			if (!putheader(mci, hdr, e, flags))
414 				goto writeerr;
415 			if (tTd(43, 101))
416 				putline("+++after putheader", mci);
417 			if (hvalue("MIME-Version", hdr) == NULL &&
418 			    !bitset(M87F_NO8TO7, flags) &&
419 			    !putline("MIME-Version: 1.0", mci))
420 				goto writeerr;
421 			bt = mime8to7(mci, hdr, e, boundaries, flags,
422 				      level + 1);
423 			mci->mci_flags &= ~MCIF_INMIME;
424 			return bt;
425 		}
426 	}
427 
428 	/*
429 	**  Non-compound body type
430 	**
431 	**	Compute the ratio of seven to eight bit characters;
432 	**	use that as a heuristic to decide how to do the
433 	**	encoding.
434 	*/
435 
436 	sectionsize = sectionhighbits = 0;
437 	if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags))
438 	{
439 		/* remember where we were */
440 		offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT);
441 		if (offset == -1)
442 			syserr("mime8to7: cannot sm_io_tell on %cf%s",
443 			       DATAFL_LETTER, e->e_id);
444 
445 		/* do a scan of this body type to count character types */
446 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof(buf))
447 			!= NULL)
448 		{
449 			if (mimeboundary(buf, boundaries) != MBT_NOTSEP)
450 				break;
451 			for (p = buf; *p != '\0'; p++)
452 			{
453 				/* count bytes with the high bit set */
454 				sectionsize++;
455 				if (bitset(0200, *p))
456 					sectionhighbits++;
457 			}
458 
459 			/*
460 			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
461 			**  assume base64.  This heuristic avoids double-reading
462 			**  large graphics or video files.
463 			*/
464 
465 			if (sectionsize >= 4096 &&
466 			    sectionhighbits > sectionsize / 4)
467 				break;
468 		}
469 
470 		/* return to the original offset for processing */
471 		/* XXX use relative seeks to handle >31 bit file sizes? */
472 		if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0)
473 			syserr("mime8to7: cannot sm_io_fseek on %cf%s",
474 			       DATAFL_LETTER, e->e_id);
475 		else
476 			sm_io_clearerr(e->e_dfp);
477 	}
478 
479 	/*
480 	**  Heuristically determine encoding method.
481 	**	If more than 1/8 of the total characters have the
482 	**	eighth bit set, use base64; else use quoted-printable.
483 	**	However, only encode binary encoded data as base64,
484 	**	since otherwise the NL=>CRLF mapping will be a problem.
485 	*/
486 
487 	if (tTd(43, 8))
488 	{
489 		sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n",
490 			(long) sectionhighbits, (long) sectionsize,
491 			cte == NULL ? "[none]" : cte,
492 			type == NULL ? "[none]" : type,
493 			subtype == NULL ? "[none]" : subtype);
494 	}
495 	if (cte != NULL && sm_strcasecmp(cte, "binary") == 0)
496 		sectionsize = sectionhighbits;
497 	linelen = 0;
498 	bp = buf;
499 	if (sectionhighbits == 0)
500 	{
501 		/* no encoding necessary */
502 		if (cte != NULL &&
503 		    bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME,
504 			   mci->mci_flags) &&
505 		    !bitset(M87F_NO8TO7, flags))
506 		{
507 			/*
508 			**  Skip _unless_ in MIME mode and potentially
509 			**  converting from 8 bit to 7 bit MIME.  See
510 			**  putheader() for the counterpart where the
511 			**  CTE header is skipped in the opposite
512 			**  situation.
513 			*/
514 
515 			(void) sm_snprintf(buf, sizeof(buf),
516 				"Content-Transfer-Encoding: %.200s", cte);
517 			if (!putline(buf, mci))
518 				goto writeerr;
519 			if (tTd(43, 36))
520 				sm_dprintf("  ...%s\n", buf);
521 		}
522 		if (!putline("", mci))
523 			goto writeerr;
524 		mci->mci_flags &= ~MCIF_INHEADER;
525 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof(buf))
526 			!= NULL)
527 		{
528 			if (!bitset(MCIF_INLONGLINE, mci->mci_flags))
529 			{
530 				bt = mimeboundary(buf, boundaries);
531 				if (bt != MBT_NOTSEP)
532 					break;
533 			}
534 			if (!putxline(buf, strlen(buf), mci,
535 				      PXLF_MAPFROM|PXLF_NOADDEOL))
536 				goto writeerr;
537 		}
538 		if (sm_io_eof(e->e_dfp))
539 			bt = MBT_FINAL;
540 	}
541 	else if (!MapNLtoCRLF ||
542 		 (sectionsize / 8 < sectionhighbits && !use_qp))
543 	{
544 		/* use base64 encoding */
545 		int c1, c2;
546 
547 		if (tTd(43, 36))
548 			sm_dprintf("  ...Content-Transfer-Encoding: base64\n");
549 		if (!putline("Content-Transfer-Encoding: base64", mci))
550 			goto writeerr;
551 		(void) sm_snprintf(buf, sizeof(buf),
552 			"X-MIME-Autoconverted: from 8bit to base64 by %s id %s",
553 			MyHostName, e->e_id);
554 		if (!putline(buf, mci) || !putline("", mci))
555 			goto writeerr;
556 		mci->mci_flags &= ~MCIF_INHEADER;
557 		while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) !=
558 			SM_IO_EOF)
559 		{
560 			if (linelen > 71)
561 			{
562 				*bp = '\0';
563 				if (!putline(buf, mci))
564 					goto writeerr;
565 				linelen = 0;
566 				bp = buf;
567 			}
568 			linelen += 4;
569 			*bp++ = Base64Code[(c1 >> 2)];
570 			c1 = (c1 & 0x03) << 4;
571 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
572 			if (c2 == SM_IO_EOF)
573 			{
574 				*bp++ = Base64Code[c1];
575 				*bp++ = '=';
576 				*bp++ = '=';
577 				break;
578 			}
579 			c1 |= (c2 >> 4) & 0x0f;
580 			*bp++ = Base64Code[c1];
581 			c1 = (c2 & 0x0f) << 2;
582 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
583 			if (c2 == SM_IO_EOF)
584 			{
585 				*bp++ = Base64Code[c1];
586 				*bp++ = '=';
587 				break;
588 			}
589 			c1 |= (c2 >> 6) & 0x03;
590 			*bp++ = Base64Code[c1];
591 			*bp++ = Base64Code[c2 & 0x3f];
592 		}
593 		*bp = '\0';
594 		if (!putline(buf, mci))
595 			goto writeerr;
596 	}
597 	else
598 	{
599 		/* use quoted-printable encoding */
600 		int c1, c2;
601 		int fromstate;
602 		BITMAP256 badchars;
603 
604 		/* set up map of characters that must be mapped */
605 		clrbitmap(badchars);
606 		for (c1 = 0x00; c1 < 0x20; c1++)
607 			setbitn(c1, badchars);
608 		clrbitn('\t', badchars);
609 		for (c1 = 0x7f; c1 < 0x100; c1++)
610 			setbitn(c1, badchars);
611 		setbitn('=', badchars);
612 		if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags))
613 			for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++)
614 				setbitn(*p, badchars);
615 
616 		if (tTd(43, 36))
617 			sm_dprintf("  ...Content-Transfer-Encoding: quoted-printable\n");
618 		if (!putline("Content-Transfer-Encoding: quoted-printable",
619 				mci))
620 			goto writeerr;
621 		(void) sm_snprintf(buf, sizeof(buf),
622 			"X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s",
623 			MyHostName, e->e_id);
624 		if (!putline(buf, mci) || !putline("", mci))
625 			goto writeerr;
626 		mci->mci_flags &= ~MCIF_INHEADER;
627 		fromstate = 0;
628 		c2 = '\n';
629 		while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) !=
630 			SM_IO_EOF)
631 		{
632 			if (c1 == '\n')
633 			{
634 				if (c2 == ' ' || c2 == '\t')
635 				{
636 					*bp++ = '=';
637 					*bp++ = Base16Code[(c2 >> 4) & 0x0f];
638 					*bp++ = Base16Code[c2 & 0x0f];
639 				}
640 				if (buf[0] == '.' && bp == &buf[1])
641 				{
642 					buf[0] = '=';
643 					*bp++ = Base16Code[('.' >> 4) & 0x0f];
644 					*bp++ = Base16Code['.' & 0x0f];
645 				}
646 				*bp = '\0';
647 				if (!putline(buf, mci))
648 					goto writeerr;
649 				linelen = fromstate = 0;
650 				bp = buf;
651 				c2 = c1;
652 				continue;
653 			}
654 			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
655 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
656 			{
657 				*bp++ = '=';
658 				*bp++ = '2';
659 				*bp++ = '0';
660 				linelen += 3;
661 			}
662 			else if (c2 == ' ' || c2 == '\t')
663 			{
664 				*bp++ = c2;
665 				linelen++;
666 			}
667 			if (linelen > 72 &&
668 			    (linelen > 75 || c1 != '.' ||
669 			     (linelen > 73 && c2 == '.')))
670 			{
671 				if (linelen > 73 && c2 == '.')
672 					bp--;
673 				else
674 					c2 = '\n';
675 				*bp++ = '=';
676 				*bp = '\0';
677 				if (!putline(buf, mci))
678 					goto writeerr;
679 				linelen = fromstate = 0;
680 				bp = buf;
681 				if (c2 == '.')
682 				{
683 					*bp++ = '.';
684 					linelen++;
685 				}
686 			}
687 			if (bitnset(bitidx(c1), badchars))
688 			{
689 				*bp++ = '=';
690 				*bp++ = Base16Code[(c1 >> 4) & 0x0f];
691 				*bp++ = Base16Code[c1 & 0x0f];
692 				linelen += 3;
693 			}
694 			else if (c1 != ' ' && c1 != '\t')
695 			{
696 				if (linelen < 4 && c1 == "From"[linelen])
697 					fromstate++;
698 				*bp++ = c1;
699 				linelen++;
700 			}
701 			c2 = c1;
702 		}
703 
704 		/* output any saved character */
705 		if (c2 == ' ' || c2 == '\t')
706 		{
707 			*bp++ = '=';
708 			*bp++ = Base16Code[(c2 >> 4) & 0x0f];
709 			*bp++ = Base16Code[c2 & 0x0f];
710 			linelen += 3;
711 		}
712 
713 		if (linelen > 0 || boundaries[0] != NULL)
714 		{
715 			*bp = '\0';
716 			if (!putline(buf, mci))
717 				goto writeerr;
718 		}
719 
720 	}
721 	if (tTd(43, 3))
722 		sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]);
723 	return bt;
724 
725   writeerr:
726 	return SM_IO_EOF;
727 }
728 /*
729 **  MIME_GETCHAR -- get a character for MIME processing
730 **
731 **	Treats boundaries as SM_IO_EOF.
732 **
733 **	Parameters:
734 **		fp -- the input file.
735 **		boundaries -- the current MIME boundaries.
736 **		btp -- if the return value is SM_IO_EOF, *btp is set to
737 **			the type of the boundary.
738 **
739 **	Returns:
740 **		The next character in the input stream.
741 */
742 
743 static int
mime_getchar(fp,boundaries,btp)744 mime_getchar(fp, boundaries, btp)
745 	register SM_FILE_T *fp;
746 	char **boundaries;
747 	int *btp;
748 {
749 	int c;
750 	static unsigned char *bp = NULL;
751 	static int buflen = 0;
752 	static bool atbol = true;	/* at beginning of line */
753 	static int bt = MBT_SYNTAX;	/* boundary type of next SM_IO_EOF */
754 	static unsigned char buf[128];	/* need not be a full line */
755 	int start = 0;			/* indicates position of - in buffer */
756 
757 	if (buflen == 1 && *bp == '\n')
758 	{
759 		/* last \n in buffer may be part of next MIME boundary */
760 		c = *bp;
761 	}
762 	else if (buflen > 0)
763 	{
764 		buflen--;
765 		return *bp++;
766 	}
767 	else
768 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
769 	bp = buf;
770 	buflen = 0;
771 	if (c == '\n')
772 	{
773 		/* might be part of a MIME boundary */
774 		*bp++ = c;
775 		atbol = true;
776 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
777 		if (c == '\n')
778 		{
779 			(void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c);
780 			return c;
781 		}
782 		start = 1;
783 	}
784 	if (c != SM_IO_EOF)
785 		*bp++ = c;
786 	else
787 		bt = MBT_FINAL;
788 	if (atbol && c == '-')
789 	{
790 		/* check for a message boundary */
791 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
792 		if (c != '-')
793 		{
794 			if (c != SM_IO_EOF)
795 				*bp++ = c;
796 			else
797 				bt = MBT_FINAL;
798 			buflen = bp - buf - 1;
799 			bp = buf;
800 			return *bp++;
801 		}
802 
803 		/* got "--", now check for rest of separator */
804 		*bp++ = '-';
805 		while (bp < &buf[sizeof(buf) - 2] &&
806 		       (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF &&
807 		       c != '\n')
808 		{
809 			*bp++ = c;
810 		}
811 		*bp = '\0';	/* XXX simply cut off? */
812 		bt = mimeboundary((char *) &buf[start], boundaries);
813 		switch (bt)
814 		{
815 		  case MBT_FINAL:
816 		  case MBT_INTERMED:
817 			/* we have a message boundary */
818 			buflen = 0;
819 			*btp = bt;
820 			return SM_IO_EOF;
821 		}
822 
823 		if (bp < &buf[sizeof(buf) - 2] && c != SM_IO_EOF)
824 			*bp++ = c;
825 	}
826 
827 	atbol = c == '\n';
828 	buflen = bp - buf - 1;
829 	if (buflen < 0)
830 	{
831 		*btp = bt;
832 		return SM_IO_EOF;
833 	}
834 	bp = buf;
835 	return *bp++;
836 }
837 /*
838 **  MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF
839 **
840 **	Parameters:
841 **		fp -- the input file.
842 **		boundaries -- the current MIME boundaries.
843 **		btp -- if the return value is SM_IO_EOF, *btp is set to
844 **			the type of the boundary.
845 **
846 **	Returns:
847 **		The next character in the input stream.
848 */
849 
850 static int
mime_getchar_crlf(fp,boundaries,btp)851 mime_getchar_crlf(fp, boundaries, btp)
852 	register SM_FILE_T *fp;
853 	char **boundaries;
854 	int *btp;
855 {
856 	static bool sendlf = false;
857 	int c;
858 
859 	if (sendlf)
860 	{
861 		sendlf = false;
862 		return '\n';
863 	}
864 	c = mime_getchar(fp, boundaries, btp);
865 	if (c == '\n' && MapNLtoCRLF)
866 	{
867 		sendlf = true;
868 		return '\r';
869 	}
870 	return c;
871 }
872 /*
873 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
874 **
875 **	Parameters:
876 **		line -- the input line.
877 **		boundaries -- the set of currently pending boundaries.
878 **
879 **	Returns:
880 **		MBT_NOTSEP -- if this is not a separator line
881 **		MBT_INTERMED -- if this is an intermediate separator
882 **		MBT_FINAL -- if this is a final boundary
883 **		MBT_SYNTAX -- if this is a boundary for the wrong
884 **			enclosure -- i.e., a syntax error.
885 */
886 
887 static int
mimeboundary(line,boundaries)888 mimeboundary(line, boundaries)
889 	register char *line;
890 	char **boundaries;
891 {
892 	int type = MBT_NOTSEP;
893 	int i;
894 	int savec;
895 
896 	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
897 		return MBT_NOTSEP;
898 	i = strlen(line);
899 	if (i > 0 && line[i - 1] == '\n')
900 		i--;
901 
902 	/* strip off trailing whitespace */
903 	while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t'
904 #if _FFR_MIME_CR_OK
905 		|| line[i - 1] == '\r'
906 #endif /* _FFR_MIME_CR_OK */
907 	       ))
908 		i--;
909 	savec = line[i];
910 	line[i] = '\0';
911 
912 	if (tTd(43, 5))
913 		sm_dprintf("mimeboundary: line=\"%s\"... ", line);
914 
915 	/* check for this as an intermediate boundary */
916 	if (isboundary(&line[2], boundaries) >= 0)
917 		type = MBT_INTERMED;
918 	else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
919 	{
920 		/* check for a final boundary */
921 		line[i - 2] = '\0';
922 		if (isboundary(&line[2], boundaries) >= 0)
923 			type = MBT_FINAL;
924 		line[i - 2] = '-';
925 	}
926 
927 	line[i] = savec;
928 	if (tTd(43, 5))
929 		sm_dprintf("%s\n", MimeBoundaryNames[type]);
930 	return type;
931 }
932 /*
933 **  DEFCHARSET -- return default character set for message
934 **
935 **	The first choice for character set is for the mailer
936 **	corresponding to the envelope sender.  If neither that
937 **	nor the global configuration file has a default character
938 **	set defined, return "unknown-8bit" as recommended by
939 **	RFC 1428 section 3.
940 **
941 **	Parameters:
942 **		e -- the envelope for this message.
943 **
944 **	Returns:
945 **		The default character set for that mailer.
946 */
947 
948 char *
defcharset(e)949 defcharset(e)
950 	register ENVELOPE *e;
951 {
952 	if (e != NULL && e->e_from.q_mailer != NULL &&
953 	    e->e_from.q_mailer->m_defcharset != NULL)
954 		return e->e_from.q_mailer->m_defcharset;
955 	if (DefaultCharSet != NULL)
956 		return DefaultCharSet;
957 	return "unknown-8bit";
958 }
959 /*
960 **  ISBOUNDARY -- is a given string a currently valid boundary?
961 **
962 **	Parameters:
963 **		line -- the current input line.
964 **		boundaries -- the list of valid boundaries.
965 **
966 **	Returns:
967 **		The index number in boundaries if the line is found.
968 **		-1 -- otherwise.
969 **
970 */
971 
972 static int
isboundary(line,boundaries)973 isboundary(line, boundaries)
974 	char *line;
975 	char **boundaries;
976 {
977 	register int i;
978 
979 	for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++)
980 	{
981 		if (strcmp(line, boundaries[i]) == 0)
982 			return i;
983 	}
984 	return -1;
985 }
986 #endif /* MIME8TO7 */
987 
988 #if MIME7TO8
989 static int	mime_fromqp __P((unsigned char *, unsigned char **, int));
990 
991 /*
992 **  MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format
993 **
994 **  This is a hack. Supports translating the two 7-bit body-encodings
995 **  (quoted-printable and base64) to 8-bit coded bodies.
996 **
997 **  There is not much point in supporting multipart here, as the UA
998 **  will be able to deal with encoded MIME bodies if it can parse MIME
999 **  multipart messages.
1000 **
1001 **  Note also that we won't be called unless it is a text/plain MIME
1002 **  message, encoded base64 or QP and mailer flag '9' has been defined
1003 **  on mailer.
1004 **
1005 **  Contributed by Marius Olaffson <marius@rhi.hi.is>.
1006 **
1007 **	Parameters:
1008 **		mci -- mailer connection information.
1009 **		header -- the header for this body part.
1010 **		e -- envelope.
1011 **
1012 **	Returns:
1013 **		true iff body was written successfully
1014 */
1015 
1016 static char index_64[128] =
1017 {
1018 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1019 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1020 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
1021 	52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
1022 	-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
1023 	15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
1024 	-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
1025 	41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
1026 };
1027 
1028 # define CHAR64(c)  (((c) < 0 || (c) > 127) ? -1 : index_64[(c)])
1029 
1030 bool
mime7to8(mci,header,e)1031 mime7to8(mci, header, e)
1032 	register MCI *mci;
1033 	HDR *header;
1034 	register ENVELOPE *e;
1035 {
1036 	int pxflags;
1037 	register char *p;
1038 	char *cte;
1039 	char **pvp;
1040 	unsigned char *fbufp;
1041 	char buf[MAXLINE];
1042 	unsigned char fbuf[MAXLINE + 1];
1043 	char pvpbuf[MAXLINE];
1044 	extern unsigned char MimeTokenTab[256];
1045 
1046 	p = hvalue("Content-Transfer-Encoding", header);
1047 	if (p == NULL ||
1048 	    (pvp = prescan(p, '\0', pvpbuf, sizeof(pvpbuf), NULL,
1049 			   MimeTokenTab, false)) == NULL ||
1050 	    pvp[0] == NULL)
1051 	{
1052 		/* "can't happen" -- upper level should have caught this */
1053 		syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p);
1054 
1055 		/* avoid bounce loops */
1056 		e->e_flags |= EF_DONT_MIME;
1057 
1058 		/* cheap failsafe algorithm -- should work on text/plain */
1059 		if (p != NULL)
1060 		{
1061 			(void) sm_snprintf(buf, sizeof(buf),
1062 				"Content-Transfer-Encoding: %s", p);
1063 			if (!putline(buf, mci))
1064 				goto writeerr;
1065 		}
1066 		if (!putline("", mci))
1067 			goto writeerr;
1068 		mci->mci_flags &= ~MCIF_INHEADER;
1069 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof(buf))
1070 			!= NULL)
1071 		{
1072 			if (!putline(buf, mci))
1073 				goto writeerr;
1074 		}
1075 		return true;
1076 	}
1077 	cataddr(pvp, NULL, buf, sizeof(buf), '\0', false);
1078 	cte = sm_rpool_strdup_x(e->e_rpool, buf);
1079 
1080 	mci->mci_flags |= MCIF_INHEADER;
1081 	if (!putline("Content-Transfer-Encoding: 8bit", mci))
1082 		goto writeerr;
1083 	(void) sm_snprintf(buf, sizeof(buf),
1084 		"X-MIME-Autoconverted: from %.200s to 8bit by %s id %s",
1085 		cte, MyHostName, e->e_id);
1086 	if (!putline(buf, mci) || !putline("", mci))
1087 		goto writeerr;
1088 	mci->mci_flags &= ~MCIF_INHEADER;
1089 
1090 	/*
1091 	**  Translate body encoding to 8-bit.  Supports two types of
1092 	**  encodings; "base64" and "quoted-printable". Assume qp if
1093 	**  it is not base64.
1094 	*/
1095 
1096 	pxflags = PXLF_MAPFROM;
1097 	if (sm_strcasecmp(cte, "base64") == 0)
1098 	{
1099 		int c1, c2, c3, c4;
1100 
1101 		fbufp = fbuf;
1102 		while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) !=
1103 			SM_IO_EOF)
1104 		{
1105 			if (isascii(c1) && isspace(c1))
1106 				continue;
1107 
1108 			do
1109 			{
1110 				c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1111 			} while (isascii(c2) && isspace(c2));
1112 			if (c2 == SM_IO_EOF)
1113 				break;
1114 
1115 			do
1116 			{
1117 				c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1118 			} while (isascii(c3) && isspace(c3));
1119 			if (c3 == SM_IO_EOF)
1120 				break;
1121 
1122 			do
1123 			{
1124 				c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1125 			} while (isascii(c4) && isspace(c4));
1126 			if (c4 == SM_IO_EOF)
1127 				break;
1128 
1129 			if (c1 == '=' || c2 == '=')
1130 				continue;
1131 			c1 = CHAR64(c1);
1132 			c2 = CHAR64(c2);
1133 
1134 #if MIME7TO8_OLD
1135 #define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \
1136 			++fbufp;
1137 #else /* MIME7TO8_OLD */
1138 #define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \
1139 		{					\
1140 			++fbufp;			\
1141 			pxflags |= PXLF_NOADDEOL;	\
1142 		}
1143 #endif /* MIME7TO8_OLD */
1144 
1145 #define PUTLINE64	\
1146 	do		\
1147 	{		\
1148 		if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])	\
1149 		{							\
1150 			CHK_EOL;					\
1151 			if (!putxline((char *) fbuf, fbufp - fbuf, mci, pxflags)) \
1152 				goto writeerr;				\
1153 			pxflags &= ~PXLF_NOADDEOL;			\
1154 			fbufp = fbuf;					\
1155 		}	\
1156 	} while (0)
1157 
1158 			*fbufp = (c1 << 2) | ((c2 & 0x30) >> 4);
1159 			PUTLINE64;
1160 			if (c3 == '=')
1161 				continue;
1162 			c3 = CHAR64(c3);
1163 			*fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2);
1164 			PUTLINE64;
1165 			if (c4 == '=')
1166 				continue;
1167 			c4 = CHAR64(c4);
1168 			*fbufp = ((c3 & 0x03) << 6) | c4;
1169 			PUTLINE64;
1170 		}
1171 	}
1172 	else
1173 	{
1174 		int off;
1175 
1176 		/* quoted-printable */
1177 		pxflags |= PXLF_NOADDEOL;
1178 		fbufp = fbuf;
1179 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
1180 				   sizeof(buf)) != NULL)
1181 		{
1182 			off = mime_fromqp((unsigned char *) buf, &fbufp,
1183 					  &fbuf[MAXLINE] - fbufp);
1184 again:
1185 			if (off < -1)
1186 				continue;
1187 
1188 			if (fbufp - fbuf > 0)
1189 			{
1190 				if (!putxline((char *) fbuf, fbufp - fbuf - 1,
1191 						mci, pxflags))
1192 					goto writeerr;
1193 			}
1194 			fbufp = fbuf;
1195 			if (off >= 0 && buf[off] != '\0')
1196 			{
1197 				off = mime_fromqp((unsigned char *) (buf + off),
1198 						  &fbufp,
1199 						  &fbuf[MAXLINE] - fbufp);
1200 				goto again;
1201 			}
1202 		}
1203 	}
1204 
1205 	/* force out partial last line */
1206 	if (fbufp > fbuf)
1207 	{
1208 		*fbufp = '\0';
1209 		if (!putxline((char *) fbuf, fbufp - fbuf, mci, pxflags))
1210 			goto writeerr;
1211 	}
1212 
1213 	/*
1214 	**  The decoded text may end without an EOL.  Since this function
1215 	**  is only called for text/plain MIME messages, it is safe to
1216 	**  add an extra one at the end just in case.  This is a hack,
1217 	**  but so is auto-converting MIME in the first place.
1218 	*/
1219 
1220 	if (!putline("", mci))
1221 		goto writeerr;
1222 
1223 	if (tTd(43, 3))
1224 		sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte);
1225 	return true;
1226 
1227   writeerr:
1228 	return false;
1229 }
1230 /*
1231 **  The following is based on Borenstein's "codes.c" module, with simplifying
1232 **  changes as we do not deal with multipart, and to do the translation in-core,
1233 **  with an attempt to prevent overrun of output buffers.
1234 **
1235 **  What is needed here are changes to defend this code better against
1236 **  bad encodings. Questionable to always return 0xFF for bad mappings.
1237 */
1238 
1239 static char index_hex[128] =
1240 {
1241 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1242 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1243 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1244 	0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1245 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1246 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1247 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1248 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1249 };
1250 
1251 # define HEXCHAR(c)  (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)])
1252 
1253 /*
1254 **  MIME_FROMQP -- decode quoted printable string
1255 **
1256 **	Parameters:
1257 **		infile -- input (encoded) string
1258 **		outfile -- output string
1259 **		maxlen -- size of output buffer
1260 **
1261 **	Returns:
1262 **		-2 if decoding failure
1263 **		-1 if infile completely decoded into outfile
1264 **		>= 0 is the position in infile decoding
1265 **			reached before maxlen was reached
1266 */
1267 
1268 static int
mime_fromqp(infile,outfile,maxlen)1269 mime_fromqp(infile, outfile, maxlen)
1270 	unsigned char *infile;
1271 	unsigned char **outfile;
1272 	int maxlen;		/* Max # of chars allowed in outfile */
1273 {
1274 	int c1, c2;
1275 	int nchar = 0;
1276 	unsigned char *b;
1277 
1278 	/* decrement by one for trailing '\0', at least one other char */
1279 	if (--maxlen < 1)
1280 		return 0;
1281 
1282 	b = infile;
1283 	while ((c1 = *infile++) != '\0' && nchar < maxlen)
1284 	{
1285 		if (c1 == '=')
1286 		{
1287 			if ((c1 = *infile++) == '\0')
1288 				break;
1289 
1290 			if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1)
1291 			{
1292 				/* ignore it and the rest of the buffer */
1293 				return -2;
1294 			}
1295 			else
1296 			{
1297 				do
1298 				{
1299 					if ((c2 = *infile++) == '\0')
1300 					{
1301 						c2 = -1;
1302 						break;
1303 					}
1304 				} while ((c2 = HEXCHAR(c2)) == -1);
1305 
1306 				if (c2 == -1)
1307 					break;
1308 				nchar++;
1309 				*(*outfile)++ = c1 << 4 | c2;
1310 			}
1311 		}
1312 		else
1313 		{
1314 			nchar++;
1315 			*(*outfile)++ = c1;
1316 			if (c1 == '\n')
1317 				break;
1318 		}
1319 	}
1320 	*(*outfile)++ = '\0';
1321 	if (nchar >= maxlen)
1322 		return (infile - b - 1);
1323 	return -1;
1324 }
1325 #endif /* MIME7TO8 */
1326