17c478bdstevel@tonic-gate/*
27c478bdstevel@tonic-gate * rfc2047.c -- decode RFC-2047 header format
37c478bdstevel@tonic-gate */
47c478bdstevel@tonic-gate
57c478bdstevel@tonic-gate#pragma ident	"%Z%%M%	%I%	%E% SMI"
67c478bdstevel@tonic-gate
77c478bdstevel@tonic-gate#ifndef lint
87c478bdstevel@tonic-gatestatic char sccsi2[] = "%W% (Sun) %G%";
97c478bdstevel@tonic-gate#endif
107c478bdstevel@tonic-gate
117c478bdstevel@tonic-gate/*
127c478bdstevel@tonic-gate * Copyright (c) 1997-1998 Richard Coleman
137c478bdstevel@tonic-gate * All rights reserved.
147c478bdstevel@tonic-gate *
157c478bdstevel@tonic-gate * Permission is hereby granted, without written agreement and without
167c478bdstevel@tonic-gate * license or royalty fees, to use, copy, modify, and distribute this
177c478bdstevel@tonic-gate * software and to distribute modified versions of this software for any
187c478bdstevel@tonic-gate * purpose, provided that the above copyright notice and the following two
197c478bdstevel@tonic-gate * paragraphs appear in all copies of this software.
207c478bdstevel@tonic-gate *
217c478bdstevel@tonic-gate * In no event shall Richard Coleman be liable to any party for direct,
227c478bdstevel@tonic-gate * indirect, special, incidental, or consequential damages arising out of
237c478bdstevel@tonic-gate * the use of this software and its documentation, even if Richard Coleman
247c478bdstevel@tonic-gate * has been advised of the possibility of such damage.
257c478bdstevel@tonic-gate *
267c478bdstevel@tonic-gate * Richard Coleman specifically disclaims any warranties, including, but
277c478bdstevel@tonic-gate * not limited to, the implied warranties of merchantability and fitness
287c478bdstevel@tonic-gate * for a particular purpose.  The software provided hereunder is on an "as
297c478bdstevel@tonic-gate * is" basis, and Richard Coleman has no obligation to provide maintenance,
307c478bdstevel@tonic-gate * support, updates, enhancements, or modifications.
317c478bdstevel@tonic-gate */
327c478bdstevel@tonic-gate
337c478bdstevel@tonic-gate/*
347c478bdstevel@tonic-gate * Parts of this code were derived from metamail, which is ...
357c478bdstevel@tonic-gate *
367c478bdstevel@tonic-gate * Copyright (c) 1991 Bell Communications Research, Inc. (Bellcore)
377c478bdstevel@tonic-gate *
387c478bdstevel@tonic-gate * Permission to use, copy, modify, and distribute this material
397c478bdstevel@tonic-gate * for any purpose and without fee is hereby granted, provided
407c478bdstevel@tonic-gate * that the above copyright notice and this permission notice
417c478bdstevel@tonic-gate * appear in all copies, and that the name of Bellcore not be
427c478bdstevel@tonic-gate * used in advertising or publicity pertaining to this
437c478bdstevel@tonic-gate * material without the specific, prior written permission
447c478bdstevel@tonic-gate * of an authorized representative of Bellcore.  BELLCORE
457c478bdstevel@tonic-gate * MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY
467c478bdstevel@tonic-gate * OF THIS MATERIAL FOR ANY PURPOSE.  IT IS PROVIDED "AS IS",
477c478bdstevel@tonic-gate * WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
487c478bdstevel@tonic-gate */
497c478bdstevel@tonic-gate
507c478bdstevel@tonic-gate/*
517c478bdstevel@tonic-gate * Copyright (c) 1998, by Sun Microsystems, Inc.
527c478bdstevel@tonic-gate * All rights reserved.
537c478bdstevel@tonic-gate */
547c478bdstevel@tonic-gate
557c478bdstevel@tonic-gate#include <string.h>
567c478bdstevel@tonic-gate
577c478bdstevel@tonic-gatetypedef int bool;
587c478bdstevel@tonic-gate
597c478bdstevel@tonic-gate#define	FALSE	0
607c478bdstevel@tonic-gate#define	TRUE	1
617c478bdstevel@tonic-gate
627c478bdstevel@tonic-gatestatic signed char hexindex[] = {
637c478bdstevel@tonic-gate	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
647c478bdstevel@tonic-gate	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
657c478bdstevel@tonic-gate	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
667c478bdstevel@tonic-gate	0,   1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,
677c478bdstevel@tonic-gate	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
687c478bdstevel@tonic-gate	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
697c478bdstevel@tonic-gate	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
707c478bdstevel@tonic-gate	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
717c478bdstevel@tonic-gate};
727c478bdstevel@tonic-gate
737c478bdstevel@tonic-gatestatic signed char index_64[128] = {
747c478bdstevel@tonic-gate	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
757c478bdstevel@tonic-gate	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
767c478bdstevel@tonic-gate	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
777c478bdstevel@tonic-gate	52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
787c478bdstevel@tonic-gate	-1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
797c478bdstevel@tonic-gate	15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
807c478bdstevel@tonic-gate	-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
817c478bdstevel@tonic-gate	41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1
827c478bdstevel@tonic-gate};
837c478bdstevel@tonic-gate
847c478bdstevel@tonic-gate#define	char64(c) (((unsigned char) (c) > 127) ? -1 : \
857c478bdstevel@tonic-gate	index_64[(unsigned char) (c)])
867c478bdstevel@tonic-gate
877c478bdstevel@tonic-gatestatic int
887c478bdstevel@tonic-gateunqp(unsigned char byte1, unsigned char byte2)
897c478bdstevel@tonic-gate{
907c478bdstevel@tonic-gate	if (hexindex[byte1] == -1 || hexindex[byte2] == -1)
917c478bdstevel@tonic-gate		return (-1);
927c478bdstevel@tonic-gate	return (hexindex[byte1] << 4 | hexindex[byte2]);
937c478bdstevel@tonic-gate}
947c478bdstevel@tonic-gate
957c478bdstevel@tonic-gate/* Check if character is linear whitespace */
967c478bdstevel@tonic-gate#define	is_lws(c)  ((c) == ' ' || (c) == '\t' || (c) == '\n')
977c478bdstevel@tonic-gate
987c478bdstevel@tonic-gate/*
997c478bdstevel@tonic-gate * Decode the string as a RFC-2047 header field
1007c478bdstevel@tonic-gate */
1017c478bdstevel@tonic-gate
1027c478bdstevel@tonic-gatebool
1037c478bdstevel@tonic-gatedecode_rfc2047(char *str, char *dst, char *charset)
1047c478bdstevel@tonic-gate{
1057c478bdstevel@tonic-gate	char *p, *q, *pp;
1067c478bdstevel@tonic-gate	char *startofmime, *endofmime;
1077c478bdstevel@tonic-gate	int c, quoted_printable;
1087c478bdstevel@tonic-gate	bool encoding_found = FALSE;	/* did we decode anything?	  */
1097c478bdstevel@tonic-gate	bool between_encodings = FALSE;	/* are we between two encodings?  */
1107c478bdstevel@tonic-gate	bool equals_pending = FALSE;	/* is there a '=' pending?	  */
1117c478bdstevel@tonic-gate	int whitespace = 0;	/* how much whitespace between encodings? */
1127c478bdstevel@tonic-gate
1137c478bdstevel@tonic-gate	if (str == NULL)
1147c478bdstevel@tonic-gate		return (FALSE);
1157c478bdstevel@tonic-gate
1167c478bdstevel@tonic-gate	/*
1177c478bdstevel@tonic-gate	 * Do a quick and dirty check for the '=' character.
1187c478bdstevel@tonic-gate	 * This should quickly eliminate many cases.
1197c478bdstevel@tonic-gate	 */
1207c478bdstevel@tonic-gate	if (!strchr(str, '='))
1217c478bdstevel@tonic-gate		return (FALSE);
1227c478bdstevel@tonic-gate
1237c478bdstevel@tonic-gate	for (p = str, q = dst; *p; p++) {
1247c478bdstevel@tonic-gate		/*
1257c478bdstevel@tonic-gate		 * If we had an '=' character pending from
1267c478bdstevel@tonic-gate		 * last iteration, then add it first.
1277c478bdstevel@tonic-gate		 */
1287c478bdstevel@tonic-gate		if (equals_pending) {
1297c478bdstevel@tonic-gate			*q++ = '=';
1307c478bdstevel@tonic-gate			equals_pending = FALSE;
1317c478bdstevel@tonic-gate			between_encodings = FALSE; /* we added non-WS text */
1327c478bdstevel@tonic-gate		}
1337c478bdstevel@tonic-gate
1347c478bdstevel@tonic-gate		if (*p != '=') {
1357c478bdstevel@tonic-gate			/* count linear whitespace while between encodings */
1367c478bdstevel@tonic-gate			if (between_encodings && is_lws(*p))
1377c478bdstevel@tonic-gate				whitespace++;
1387c478bdstevel@tonic-gate			else
1397c478bdstevel@tonic-gate				between_encodings = FALSE; /* non-WS added */
1407c478bdstevel@tonic-gate			*q++ = *p;
1417c478bdstevel@tonic-gate			continue;
1427c478bdstevel@tonic-gate		}
1437c478bdstevel@tonic-gate
1447c478bdstevel@tonic-gate		equals_pending = TRUE;	/* we have a '=' pending */
1457c478bdstevel@tonic-gate
1467c478bdstevel@tonic-gate		/* Check for initial =? */
1477c478bdstevel@tonic-gate		if (*p == '=' && p[1] && p[1] == '?' && p[2]) {
1487c478bdstevel@tonic-gate			startofmime = p + 2;
1497c478bdstevel@tonic-gate
1507c478bdstevel@tonic-gate			/* Scan ahead for the next '?' character */
1517c478bdstevel@tonic-gate			for (pp = startofmime; *pp && *pp != '?'; pp++)
1527c478bdstevel@tonic-gate				;
1537c478bdstevel@tonic-gate
1547c478bdstevel@tonic-gate			if (!*pp)
1557c478bdstevel@tonic-gate				continue;
1567c478bdstevel@tonic-gate
1577c478bdstevel@tonic-gate			strncpy(charset, startofmime, pp - startofmime);
1587c478bdstevel@tonic-gate			charset[pp - startofmime] = '\0';
1597c478bdstevel@tonic-gate
1607c478bdstevel@tonic-gate			startofmime = pp + 1;
1617c478bdstevel@tonic-gate
1627c478bdstevel@tonic-gate			/* Check for valid encoding type */
1637c478bdstevel@tonic-gate			if (*startofmime != 'B' && *startofmime != 'b' &&
1647c478bdstevel@tonic-gate			    *startofmime != 'Q' && *startofmime != 'q')
1657c478bdstevel@tonic-gate				continue;
1667c478bdstevel@tonic-gate
1677c478bdstevel@tonic-gate			/* Is encoding quoted printable or base64? */
1687c478bdstevel@tonic-gate			quoted_printable = (*startofmime == 'Q' ||
1697c478bdstevel@tonic-gate					    *startofmime == 'q');
1707c478bdstevel@tonic-gate			startofmime++;
1717c478bdstevel@tonic-gate
1727c478bdstevel@tonic-gate			/* Check for next '?' character */
1737c478bdstevel@tonic-gate			if (*startofmime != '?')
1747c478bdstevel@tonic-gate				continue;
1757c478bdstevel@tonic-gate			startofmime++;
1767c478bdstevel@tonic-gate
1777c478bdstevel@tonic-gate			/*
1787c478bdstevel@tonic-gate			 * Scan ahead for the ending ?=
1797c478bdstevel@tonic-gate			 *
1807c478bdstevel@tonic-gate			 * While doing this, we will also check if encoded
1817c478bdstevel@tonic-gate			 * word has any embedded linear whitespace.
1827c478bdstevel@tonic-gate			 */
1837c478bdstevel@tonic-gate			endofmime = NULL;
1847c478bdstevel@tonic-gate			for (pp = startofmime; *pp && *(pp+1); pp++) {
1857c478bdstevel@tonic-gate				if (is_lws(*pp))
1867c478bdstevel@tonic-gate					break;
1877c478bdstevel@tonic-gate				else if (*pp == '?' && pp[1] == '=') {
1887c478bdstevel@tonic-gate					endofmime = pp;
1897c478bdstevel@tonic-gate					break;
1907c478bdstevel@tonic-gate				}
1917c478bdstevel@tonic-gate			}
1927c478bdstevel@tonic-gate			if (is_lws(*pp) || endofmime == NULL)
1937c478bdstevel@tonic-gate				continue;
1947c478bdstevel@tonic-gate
1957c478bdstevel@tonic-gate			/*
1967c478bdstevel@tonic-gate			 * We've found an encoded word, so we can drop
1977c478bdstevel@tonic-gate			 * the '=' that was pending
1987c478bdstevel@tonic-gate			 */
1997c478bdstevel@tonic-gate			equals_pending = FALSE;
2007c478bdstevel@tonic-gate
2017c478bdstevel@tonic-gate			/*
2027c478bdstevel@tonic-gate			 * If we are between two encoded words separated only
2037c478bdstevel@tonic-gate			 * by linear whitespace, then we ignore the whitespace.
2047c478bdstevel@tonic-gate			 * We will roll back the buffer the number of whitespace
2057c478bdstevel@tonic-gate			 * characters we've seen since last encoded word.
2067c478bdstevel@tonic-gate			 */
2077c478bdstevel@tonic-gate			if (between_encodings)
2087c478bdstevel@tonic-gate				q -= whitespace;
2097c478bdstevel@tonic-gate
2107c478bdstevel@tonic-gate			/* Now decode the text */
2117c478bdstevel@tonic-gate			if (quoted_printable) {
2127c478bdstevel@tonic-gate				for (pp = startofmime; pp < endofmime; pp++) {
2137c478bdstevel@tonic-gate					if (*pp == '=') {
2147c478bdstevel@tonic-gate						c = unqp(pp[1], pp[2]);
2157c478bdstevel@tonic-gate						if (c == -1)
2167c478bdstevel@tonic-gate							continue;
2177c478bdstevel@tonic-gate						if (c != 0)
2187c478bdstevel@tonic-gate							*q++ = c;
2197c478bdstevel@tonic-gate						pp += 2;
2207c478bdstevel@tonic-gate					} else if (*pp == '_')
2217c478bdstevel@tonic-gate						*q++ = ' ';
2227c478bdstevel@tonic-gate					else
2237c478bdstevel@tonic-gate						*q++ = *pp;
2247c478bdstevel@tonic-gate				}
2257c478bdstevel@tonic-gate			} else {
2267c478bdstevel@tonic-gate				/* base64 */
2277c478bdstevel@tonic-gate				int c1, c2, c3, c4;
2287c478bdstevel@tonic-gate
2297c478bdstevel@tonic-gate				pp = startofmime;
2307c478bdstevel@tonic-gate				while (pp < endofmime) {
2317c478bdstevel@tonic-gate					/* 6 + 2 bits */
2327c478bdstevel@tonic-gate					while ((pp < endofmime) &&
2337c478bdstevel@tonic-gate						((c1 = char64(*pp)) == -1)) {
2347c478bdstevel@tonic-gate						pp++;
2357c478bdstevel@tonic-gate					}
2367c478bdstevel@tonic-gate					if (pp < endofmime)
2377c478bdstevel@tonic-gate						pp++;
2387c478bdstevel@tonic-gate					while ((pp < endofmime) &&
2397c478bdstevel@tonic-gate						((c2 = char64(*pp)) == -1)) {
2407c478bdstevel@tonic-gate						pp++;
2417c478bdstevel@tonic-gate					}
2427c478bdstevel@tonic-gate					if (pp < endofmime && c1 != -1 &&
2437c478bdstevel@tonic-gate								c2 != -1) {
2447c478bdstevel@tonic-gate						*q++ = (c1 << 2) | (c2 >> 4);
2457c478bdstevel@tonic-gate						pp++;
2467c478bdstevel@tonic-gate					}
2477c478bdstevel@tonic-gate					/* 4 + 4 bits */
2487c478bdstevel@tonic-gate					while ((pp < endofmime) &&
2497c478bdstevel@tonic-gate						((c3 = char64(*pp)) == -1)) {
2507c478bdstevel@tonic-gate						pp++;
2517c478bdstevel@tonic-gate					}
2527c478bdstevel@tonic-gate					if (pp < endofmime && c2 != -1 &&
2537c478bdstevel@tonic-gate								c3 != -1) {
2547c478bdstevel@tonic-gate						*q++ = ((c2 & 0xF) << 4) |
2557c478bdstevel@tonic-gate								(c3 >> 2);
2567c478bdstevel@tonic-gate						pp++;
2577c478bdstevel@tonic-gate					}
2587c478bdstevel@tonic-gate					/* 2 + 6 bits */
2597c478bdstevel@tonic-gate					while ((pp < endofmime) &&
2607c478bdstevel@tonic-gate						((c4 = char64(*pp)) == -1)) {
2617c478bdstevel@tonic-gate						pp++;
2627c478bdstevel@tonic-gate					}
2637c478bdstevel@tonic-gate					if (pp < endofmime && c3 != -1 &&
2647c478bdstevel@tonic-gate								c4 != -1) {
2657c478bdstevel@tonic-gate						*q++ = ((c3 & 0x3) << 6) | (c4);
2667c478bdstevel@tonic-gate						pp++;
2677c478bdstevel@tonic-gate					}
2687c478bdstevel@tonic-gate				}
2697c478bdstevel@tonic-gate			}
2707c478bdstevel@tonic-gate
2717c478bdstevel@tonic-gate			/*
2727c478bdstevel@tonic-gate			 * Now that we are done decoding this particular
2737c478bdstevel@tonic-gate			 * encoded word, advance string to trailing '='.
2747c478bdstevel@tonic-gate			 */
2757c478bdstevel@tonic-gate			p = endofmime + 1;
2767c478bdstevel@tonic-gate
2777c478bdstevel@tonic-gate			encoding_found = TRUE;	 /* found (>= 1) encoded word */
2787c478bdstevel@tonic-gate			between_encodings = TRUE; /* just decoded something   */
2797c478bdstevel@tonic-gate			whitespace = 0; /* re-initialize amount of whitespace */
2807c478bdstevel@tonic-gate		}
2817c478bdstevel@tonic-gate	}
2827c478bdstevel@tonic-gate
2837c478bdstevel@tonic-gate	/* If an equals was pending at end of string, add it now. */
2847c478bdstevel@tonic-gate	if (equals_pending)
2857c478bdstevel@tonic-gate		*q++ = '=';
2867c478bdstevel@tonic-gate	*q = '\0';
2877c478bdstevel@tonic-gate
2887c478bdstevel@tonic-gate	return (encoding_found);
2897c478bdstevel@tonic-gate}
290