1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#include "lint.h"
30#include "mtlib.h"
31#include <ctype.h>
32#include <stdio.h>
33#include <stdlib.h>
34#include <string.h>
35#include <sys/types.h>
36#include <sys/mman.h>
37#include <sys/param.h>
38#include <sys/stat.h>
39#include <thread.h>
40#include <synch.h>
41#include <unistd.h>
42#include <limits.h>
43#include <errno.h>
44#include <inttypes.h>
45#include "libc.h"
46#include "msgfmt.h"
47#include "nlspath_checks.h"
48#include "gettext.h"
49
50#ifdef DEBUG
51#include <assert.h>
52#endif
53
54/* The following symbols are just for GNU binary compatibility */
55int	_nl_msg_cat_cntr;
56int	*_nl_domain_bindings;
57
58static const char	*nullstr = "";
59
60#define	CHARSET_MOD	"charset="
61#define	CHARSET_LEN	(sizeof (CHARSET_MOD) - 1)
62#define	NPLURALS_MOD	"nplurals="
63#define	NPLURALS_LEN	(sizeof (NPLURALS_MOD) - 1)
64#define	PLURAL_MOD	"plural="
65#define	PLURAL_LEN	(sizeof (PLURAL_MOD) - 1)
66
67static uint32_t	get_hash_index(uint32_t *, uint32_t, uint32_t);
68
69/*
70 * free_conv_msgstr
71 *
72 * release the memory allocated for storing code-converted messages
73 *
74 * f
75 *	0:	do not free gmnp->conv_msgstr
76 *	1:	free gmnp->conv_msgstr
77 */
78static void
79free_conv_msgstr(Msg_g_node *gmnp, int f)
80{
81	uint32_t	i, num_of_conv;
82
83#ifdef GETTEXT_DEBUG
84	gprintf(0, "*************** free_conv_msgstr(0x%p, %d)\n",
85	    (void *)gmnp, f);
86	printgnumsg(gmnp, 1);
87#endif
88
89	num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str;
90	for (i = 0; i < num_of_conv; i++) {
91		if (gmnp->conv_msgstr[i]) {
92			free(gmnp->conv_msgstr[i]);
93		}
94		gmnp->conv_msgstr[i] = NULL;
95	}
96	if (f) {
97		free(gmnp->conv_msgstr);
98		gmnp->conv_msgstr = NULL;
99	}
100}
101
102/*
103 * dfltmsgstr
104 *
105 * choose an appropriate message by evaluating the plural expression,
106 * and return it.
107 */
108static char *
109dfltmsgstr(Msg_g_node *gmnp, const char *msgstr, uint32_t msgstr_len,
110    struct msg_pack *mp)
111{
112	unsigned int	pindex;
113	size_t	len;
114	const char	*p;
115
116#ifdef GETTEXT_DEBUG
117	gprintf(0, "*************** dfltmsgstr(0x%p, \"%s\", %u, 0x%p)\n",
118	    (void *)gmnp,
119	    msgstr ? msgstr : "(null)", msgstr_len, (void *)mp);
120	printgnumsg(gmnp, 1);
121	printmp(mp, 1);
122#endif
123
124	if (mp->plural) {
125		if (gmnp->plural) {
126			pindex = plural_eval(gmnp->plural, mp->n);
127		} else {
128			/*
129			 * This mo does not have plural information.
130			 * Using the English form.
131			 */
132			if (mp->n == 1)
133				pindex = 0;
134			else
135				pindex = 1;
136		}
137#ifdef GETTEXT_DEBUG
138		gprintf(0, "plural_eval returned: %u\n", pindex);
139#endif
140		if (pindex >= gmnp->nplurals) {
141			/* should never happen */
142			pindex = 0;
143		}
144		p = msgstr;
145		for (; pindex != 0; pindex--) {
146			len = msgstr_len - (p - msgstr);
147			p = memchr(p, '\0', len);
148			if (p == NULL) {
149				/*
150				 * null byte not found
151				 * this should never happen
152				 */
153				char	*result;
154				DFLTMSG(result, mp->msgid1, mp->msgid2,
155				    mp->n, mp->plural);
156				return (result);
157			}
158			p++;		/* skip */
159		}
160		return ((char *)p);
161	}
162
163	return ((char *)msgstr);
164}
165
166/*
167 * parse_header
168 *
169 * parse the header entry of the GNU MO file and
170 * extract the src encoding and the plural information of the MO file
171 */
172static int
173parse_header(const char *header, Msg_g_node *gmnp)
174{
175	char	*charset = NULL;
176	char	*charset_str;
177	size_t	len;
178	char	*nplurals_str, *plural_str;
179	plural_expr_t	plural;
180	char	*p, *q;
181	unsigned int	nplurals;
182	int	ret;
183
184#ifdef GETTEXT_DEBUG
185	gprintf(0, "*************** parse_header(\"%s\", 0x%p)\n",
186	    header ? header : "(null)", (void *)gmnp);
187	printgnumsg(gmnp, 1);
188#endif
189
190	if (header == NULL) {
191		gmnp->src_encoding = (char *)nullstr;
192		gmnp->nplurals = 2;
193		gmnp->plural = NULL;
194#ifdef GETTEXT_DEBUG
195		gprintf(0, "*************** exiting parse_header\n");
196		gprintf(0, "no header\n");
197#endif
198
199		return (0);
200	}
201
202	charset_str = strstr(header, CHARSET_MOD);
203	if (charset_str == NULL) {
204		gmnp->src_encoding = (char *)nullstr;
205	} else {
206		p = charset_str + CHARSET_LEN;
207		q = p;
208		while ((*q != ' ') && (*q != '\t') &&
209		    (*q != '\n')) {
210			q++;
211		}
212		len = q - p;
213		if (len > 0) {
214			charset = malloc(len + 1);
215			if (charset == NULL) {
216				gmnp->src_encoding = (char *)nullstr;
217				gmnp->nplurals = 2;
218				gmnp->plural = NULL;
219				return (-1);
220			}
221			(void) memcpy(charset, p, len);
222			charset[len] = '\0';
223			gmnp->src_encoding = charset;
224		} else {
225			gmnp->src_encoding = (char *)nullstr;
226		}
227	}
228
229	nplurals_str = strstr(header, NPLURALS_MOD);
230	plural_str = strstr(header, PLURAL_MOD);
231	if (nplurals_str == NULL || plural_str == NULL) {
232		/* no valid plural specification */
233		gmnp->nplurals = 2;
234		gmnp->plural = NULL;
235#ifdef GETTEXT_DEBUG
236		gprintf(0, "*************** exiting parse_header\n");
237		gprintf(0, "no plural entry\n");
238#endif
239		return (0);
240	} else {
241		p = nplurals_str + NPLURALS_LEN;
242		while (*p && isspace((unsigned char)*p)) {
243			p++;
244		}
245		nplurals = (unsigned int)strtol(p, &q, 10);
246		if (p != q) {
247			gmnp->nplurals = nplurals;
248		} else {
249			gmnp->nplurals = 2;
250		}
251
252		p = plural_str + PLURAL_LEN;
253#ifdef GETTEXT_DEBUG
254		gprintf(0, "plural_str: \"%s\"\n", p);
255#endif
256
257		ret = plural_expr(&plural, (const char *)p);
258		if (ret == 0) {
259			/* parse succeeded */
260			gmnp->plural = plural;
261#ifdef GETTEXT_DEBUG
262		gprintf(0, "*************** exiting parse_header\n");
263		gprintf(0, "charset: \"%s\"\n",
264		    charset ? charset : "(null)");
265		printexpr(plural, 1);
266#endif
267			return (0);
268		} else if (ret == 1) {
269			/* parse error */
270			gmnp->nplurals = 2;
271			gmnp->plural = NULL;
272			return (0);
273		} else {
274			/* fatal error */
275			if (charset)
276				free(charset);
277			gmnp->src_encoding = (char *)nullstr;
278			gmnp->nplurals = 2;
279			gmnp->plural = NULL;
280			return (-1);
281		}
282	}
283	/* NOTREACHED */
284}
285
286/*
287 * handle_lang
288 *
289 * take care of the LANGUAGE specification
290 */
291char *
292handle_lang(struct msg_pack *mp)
293{
294	const char	*p, *op, *q;
295	size_t	locale_len;
296	char	*result;
297	char	locale[MAXPATHLEN];
298
299
300#ifdef GETTEXT_DEBUG
301	gprintf(0, "*************** handle_lang(0x%p)\n", (void *)mp);
302	printmp(mp, 1);
303#endif
304
305	p = mp->language;
306
307	while (*p) {
308		op = p;
309		q = strchr(p, ':');
310		if (q == NULL) {
311			locale_len = strlen(p);
312			p += locale_len;
313		} else {
314			locale_len = q - p;
315			p += locale_len + 1;
316		}
317		if (locale_len >= MAXPATHLEN || locale_len == 0) {
318			/* illegal locale name */
319			continue;
320		}
321		(void) memcpy(locale, op, locale_len);
322		locale[locale_len] = '\0';
323		mp->locale = locale;
324
325#ifdef GETTEXT_DEBUG
326		*mp->msgfile = '\0';
327#endif
328		if (mk_msgfile(mp) == NULL) {
329			/* illegal locale name */
330			continue;
331		}
332
333		result = handle_mo(mp);
334		if (mp->status & ST_GNU_MSG_FOUND)
335			return (result);
336
337		if (mp->status & ST_SUN_MO_FOUND)
338			break;
339	}
340
341	/*
342	 * no valid locale found, Sun MO found, or
343	 * GNU MO found but no valid msg found there.
344	 */
345
346	if (mp->status & ST_GNU_MO_FOUND) {
347		/*
348		 * GNU MO found but no valid msg found there.
349		 * returning DFLTMSG.
350		 */
351		DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural);
352		return (result);
353	}
354	return (NULL);
355}
356
357/*
358 * gnu_msgsearch
359 *
360 * Searchs the translation message for the specified msgid1.
361 * Hash algorithm used in this function is Open Addressing
362 * with Double Hashing:
363 * H(k, i) = (H1(k) + i * H2(k)) mod M
364 * H1(k) = hashvalue % M
365 * H2(k) = 1 + (hashvalue % (M - 2))
366 *
367 * Ref: The Art of Computer Programming Volume 3
368 * Sorting and Searching, second edition
369 * Donald E Knuth
370 */
371static char *
372gnu_msgsearch(Msg_g_node *gmnp, const char *msgid1,
373    uint32_t *msgstrlen, uint32_t *midx)
374{
375	struct gnu_msg_info	*header = gmnp->msg_file_info;
376	struct gnu_msg_ent	*msgid_tbl, *msgstr_tbl;
377	uint32_t	num_of_str, idx, mlen, msglen;
378	uint32_t	hash_size, hash_val, hash_id, hash_inc, hash_idx;
379	uint32_t	*hash_table;
380	char	*base;
381	char	*msg;
382
383#ifdef GETTEXT_DEBUG
384	gprintf(0, "*************** gnu_msgsearch(0x%p, \"%s\", "
385	    "0x%p, 0x%p)\n",
386	    (void *)gmnp, msgid1, msgstrlen, midx);
387	printgnumsg(gmnp, 1);
388#endif
389
390	base = (char *)header;
391
392	msgid_tbl = gmnp->msg_tbl[MSGID];
393	msgstr_tbl = gmnp->msg_tbl[MSGSTR];
394	hash_table = gmnp->hash_table;
395	hash_size = gmnp->hash_size;
396	num_of_str = gmnp->num_of_str;
397
398	if (!(gmnp->flag & ST_REV1) &&
399	    (hash_table == NULL || (hash_size <= 2))) {
400		/*
401		 * Revision 0 and
402		 * No hash table exists or
403		 * hash size is enough small.
404		 */
405		uint32_t	top, bottom;
406		char	*msg_id_str;
407		int	val;
408
409		top = 0;
410		bottom = num_of_str;
411		while (top < bottom) {
412			idx = (top + bottom) / 2;
413			msg_id_str = base +
414			    SWAP(gmnp, msgid_tbl[idx].offset);
415
416			val = strcmp(msg_id_str, msgid1);
417			if (val < 0) {
418				top = idx + 1;
419			} else if (val > 0) {
420				bottom = idx;
421			} else {
422				*msgstrlen = (unsigned int)
423				    SWAP(gmnp, msgstr_tbl[idx].len) + 1;
424				*midx = idx;
425				return (base +
426				    SWAP(gmnp, msgstr_tbl[idx].offset));
427			}
428		}
429		/* not found */
430		return ((char *)msgid1);
431	}
432
433	/* use hash table */
434	hash_id = get_hashid(msgid1, &msglen);
435	hash_idx = hash_id % hash_size;
436	hash_inc = 1 + (hash_id % (hash_size - 2));
437
438	for (;;) {
439		hash_val = HASH_TBL(gmnp, hash_table[hash_idx]);
440
441		if (hash_val == 0) {
442			/* not found */
443			return ((char *)msgid1);
444		}
445		if (hash_val <= num_of_str) {
446			/* static message */
447			idx = hash_val - 1;
448			mlen = SWAP(gmnp, msgid_tbl[idx].len);
449			msg = base + SWAP(gmnp, msgid_tbl[idx].offset);
450		} else {
451			if (!(gmnp->flag & ST_REV1)) {
452				/* rev 0 does not have dynamic message */
453				return ((char *)msgid1);
454			}
455			/* dynamic message */
456			idx = hash_val - num_of_str - 1;
457			mlen = gmnp->d_msg[MSGID][idx].len;
458			msg = gmnp->mchunk + gmnp->d_msg[MSGID][idx].offset;
459		}
460		if (msglen <= mlen && strcmp(msgid1, msg) == 0) {
461			/* found */
462			break;
463		}
464		hash_idx = (hash_idx + hash_inc) % hash_size;
465	}
466
467	/* msgstrlen should include a null termination */
468	if (hash_val <= num_of_str) {
469		*msgstrlen = SWAP(gmnp, msgstr_tbl[idx].len) + 1;
470		msg = base + SWAP(gmnp, msgstr_tbl[idx].offset);
471		*midx = idx;
472	} else {
473		*msgstrlen = gmnp->d_msg[MSGSTR][idx].len + 1;
474		msg = gmnp->mchunk + gmnp->d_msg[MSGSTR][idx].offset;
475		*midx = idx + num_of_str;
476	}
477
478	return (msg);
479}
480
481/*
482 * do_conv
483 *
484 * Converts the specified string from the src encoding
485 * to the dst encoding by calling iconv()
486 */
487static uint32_t *
488do_conv(iconv_t fd, const char *src, uint32_t srclen)
489{
490	uint32_t	tolen;
491	uint32_t	*ptr, *optr;
492	size_t	oleft, ileft, bufsize, memincr;
493	char	*to, *tptr;
494
495#ifdef GETTEXT_DEBUG
496	gprintf(0, "*************** do_conv("
497	    "0x%p, \"%s\", %d)\n",
498	    (void *)fd, src ? src : "(null)", srclen);
499#endif
500
501	memincr = srclen * 2;
502	bufsize = memincr;
503	ileft = srclen;
504	oleft = bufsize;
505	ptr = malloc(bufsize + sizeof (uint32_t));
506	if (ptr == NULL) {
507		return (NULL);
508	}
509	to = (char *)(ptr + 1);
510
511	for (;;) {
512		tptr = to;
513		errno = 0;
514#ifdef GETTEXT_DEBUG
515		gprintf(0, "******* calling iconv()\n");
516#endif
517		if (iconv(fd, &src, &ileft, &tptr, &oleft) == (size_t)-1) {
518			if (errno == E2BIG) {
519#ifdef GETTEXT_DEBUG
520				gprintf(0, "******* iconv detected E2BIG\n");
521				gprintf(0, "old bufsize: %u\n", bufsize);
522#endif
523
524				optr = realloc(ptr,
525				    bufsize + memincr + sizeof (uint32_t));
526				if (optr == NULL) {
527					free(ptr);
528					return (NULL);
529				}
530				ptr = optr;
531				to = (char *)(optr + 1);
532				to += bufsize - oleft;
533				oleft += memincr;
534				bufsize += memincr;
535#ifdef GETTEXT_DEBUG
536				gprintf(0, "new bufsize: %u\n", bufsize);
537#endif
538				continue;
539			} else {
540				tolen = (uint32_t)(bufsize - oleft);
541				break;
542			}
543		}
544		tolen = (uint32_t)(bufsize - oleft);
545		break;
546	}
547
548	if (tolen < bufsize) {
549		/* shrink the buffer */
550		optr = realloc(ptr, tolen + sizeof (uint32_t));
551		if (optr == NULL) {
552			free(ptr);
553			return (NULL);
554		}
555		ptr = optr;
556	}
557	*ptr = tolen;
558
559#ifdef GETTEXT_DEBUG
560	gprintf(0, "******* exiting do_conv()\n");
561	gprintf(0, "tolen: %u\n", *ptr);
562	gprintf(0, "return: 0x%p\n", ptr);
563#endif
564	return (ptr);
565}
566
567/*
568 * conv_msg
569 */
570static char *
571conv_msg(Msg_g_node *gmnp, char *msgstr, uint32_t msgstr_len, uint32_t midx,
572    struct msg_pack *mp)
573{
574	uint32_t	*conv_dst;
575	size_t	num_of_conv, conv_msgstr_len;
576	char	*conv_msgstr, *result;
577
578	if (gmnp->conv_msgstr == NULL) {
579		num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str;
580		gmnp->conv_msgstr =
581		    calloc((size_t)num_of_conv, sizeof (uint32_t *));
582		if (gmnp->conv_msgstr == NULL) {
583			/* malloc failed */
584			result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
585			return (result);
586		}
587	}
588
589	conv_dst = do_conv(gmnp->fd, (const char *)msgstr, msgstr_len);
590
591	if (conv_dst == NULL) {
592		result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
593		return (result);
594	}
595	conv_msgstr_len = *conv_dst;
596	gmnp->conv_msgstr[midx] = conv_dst;
597	conv_msgstr = (char *)(conv_dst + 1);
598	result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp);
599	return (result);
600}
601
602/*
603 * gnu_key_2_text
604 *
605 * Extracts msgstr from the GNU MO file
606 */
607char *
608gnu_key_2_text(Msg_g_node *gmnp, const char *codeset,
609    struct msg_pack *mp)
610{
611	uint32_t	msgstr_len, midx;
612	iconv_t	fd;
613	char	*result, *msgstr;
614	int	ret, conversion, new_encoding;
615
616#ifdef GETTEXT_DEBUG
617	gprintf(0, "*************** gnu_key_2_text("
618	    "0x%p, \"%s\", 0x%p)\n",
619	    (void *)gmnp, codeset ? codeset : "(null)", (void *)mp);
620	printgnumsg(gmnp, 1);
621	printmp(mp, 1);
622#endif
623
624	/* first checks if header entry has been processed */
625	if (!(gmnp->flag & ST_CHK)) {
626		char	*msg_header;
627
628		msg_header = gnu_msgsearch(gmnp, "", &msgstr_len, &midx);
629		ret = parse_header((const char *)msg_header, gmnp);
630		if (ret == -1) {
631			/* fatal error */
632			DFLTMSG(result, mp->msgid1, mp->msgid2,
633			    mp->n, mp->plural);
634			return (result);
635		}
636		gmnp->flag |= ST_CHK;
637	}
638	msgstr = gnu_msgsearch(gmnp, mp->msgid1, &msgstr_len, &midx);
639	if (msgstr == mp->msgid1) {
640		/* not found */
641		DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural);
642		return (result);
643	}
644
645#ifdef GETTEXT_DEBUG
646	printgnumsg(gmnp, 1);
647#endif
648	if (gmnp->dst_encoding == NULL) {
649		/*
650		 * destination encoding has not been set.
651		 */
652		char	*dupcodeset = strdup(codeset);
653		if (dupcodeset == NULL) {
654			/* strdup failed */
655			result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
656			return (result);
657		}
658		gmnp->dst_encoding = dupcodeset;
659
660		if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) == 0) {
661			/*
662			 * target encoding and src encoding
663			 * are the same.
664			 * No conversion required.
665			 */
666			conversion = 0;
667		} else {
668			/*
669			 * target encoding is different from
670			 * src encoding.
671			 * New conversion required.
672			 */
673			/* sanity check */
674			if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) {
675				(void) iconv_close(gmnp->fd);
676				gmnp->fd = (iconv_t)-1;
677			}
678			if (gmnp->conv_msgstr)
679				free_conv_msgstr(gmnp, 0);
680			conversion = 1;
681			new_encoding = 1;
682		}
683	} else {
684		/*
685		 * dst encoding has been already set.
686		 */
687		if (strcmp(gmnp->dst_encoding, codeset) == 0) {
688			/*
689			 * dst encoding and target encoding are the same.
690			 */
691			if (strcmp(gmnp->dst_encoding, gmnp->src_encoding)
692			    == 0) {
693				/*
694				 * dst encoding and src encoding are the same.
695				 * No conversion required.
696				 */
697				conversion = 0;
698			} else {
699				/*
700				 * dst encoding is different from src encoding.
701				 * current conversion is valid.
702				 */
703				conversion = 1;
704				new_encoding = 0;
705				/* checks if iconv_open has succeeded before */
706				if (gmnp->fd == (iconv_t)-1) {
707					/*
708					 * iconv_open should have failed before
709					 * Assume this conversion is invalid
710					 */
711					conversion = 0;
712				} else {
713					if (gmnp->conv_msgstr == NULL) {
714						/*
715						 * memory allocation for
716						 * conv_msgstr should
717						 * have failed before.
718						 */
719						new_encoding = 1;
720						if (gmnp->fd)
721							(void) iconv_close(
722							    gmnp->fd);
723						gmnp->fd = (iconv_t)-1;
724					}
725				}
726			}
727		} else {
728			/*
729			 * dst encoding is different from target encoding.
730			 * It has changed since before.
731			 */
732			char	*dupcodeset = strdup(codeset);
733			if (dupcodeset == NULL) {
734				result = dfltmsgstr(gmnp, msgstr,
735				    msgstr_len, mp);
736				return (result);
737			}
738			free(gmnp->dst_encoding);
739			gmnp->dst_encoding = dupcodeset;
740			if (strcmp(gmnp->dst_encoding, gmnp->src_encoding)
741			    == 0) {
742				/*
743				 * dst encoding and src encoding are the same.
744				 * now, no conversion required.
745				 */
746				conversion = 0;
747				if (gmnp->conv_msgstr)
748					free_conv_msgstr(gmnp, 1);
749			} else {
750				/*
751				 * dst encoding is different from src encoding.
752				 * new conversion required.
753				 */
754				conversion = 1;
755				new_encoding = 1;
756				if (gmnp->conv_msgstr)
757					free_conv_msgstr(gmnp, 0);
758			}
759
760			if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) {
761				(void) iconv_close(gmnp->fd);
762			}
763			if (gmnp->fd != (iconv_t)-1) {
764				gmnp->fd = (iconv_t)-1;
765			}
766		}
767	}
768
769	if (conversion == 0) {
770		/* no conversion */
771		result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
772		return (result);
773	}
774	/* conversion required */
775
776	if (new_encoding == 0) {
777		/* dst codeset hasn't been changed since before */
778		uint32_t	*cmsg;
779		uint32_t	conv_msgstr_len;
780		char	*conv_msgstr;
781
782		if (gmnp->conv_msgstr[midx] == NULL) {
783			/* this msgstr hasn't been converted yet */
784			result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp);
785			return (result);
786		}
787		/* this msgstr is in the conversion cache */
788		cmsg = (uint32_t *)(uintptr_t)gmnp->conv_msgstr[midx];
789		conv_msgstr_len = *cmsg;
790		conv_msgstr = (char *)(cmsg + 1);
791		result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp);
792		return (result);
793	}
794	/* new conversion */
795#ifdef GETTEXT_DEBUG
796	gprintf(0, "******* calling iconv_open()\n");
797	gprintf(0, "      dst: \"%s\", src: \"%s\"\n",
798	    gmnp->dst_encoding, gmnp->src_encoding);
799#endif
800	fd = iconv_open(gmnp->dst_encoding, gmnp->src_encoding);
801	gmnp->fd = fd;
802	if (fd == (iconv_t)-1) {
803		/*
804		 * iconv_open() failed.
805		 * no conversion
806		 */
807		result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
808		return (result);
809	}
810	result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp);
811	return (result);
812}
813
814
815#define	PRI_STR(x, n)	PRI##x##n
816#define	PRI_LEN(x, n)	(char)(sizeof (PRI_STR(x, n)) - 1)
817#define	PRIS(P, x)	{\
818/* x/N/ */	P(x, 8), P(x, 16), P(x, 32), P(x, 64), \
819/* xLEAST/N/ */	P(x, LEAST8), P(x, LEAST16), P(x, LEAST32), P(x, LEAST64), \
820/* xFAST/N/ */	P(x, FAST8), P(x, FAST16), P(x, FAST32), P(x, FAST64), \
821/* xMAX,PTR */	P(x, MAX), P(x, PTR) \
822}
823
824#define	PRI_BIAS_LEAST	4
825#define	PRI_BIAS_FAST	8
826#define	PRI_BIAS_MAX	12
827#define	PRI_BIAS_PTR	13
828
829static const char	*pri_d[] = PRIS(PRI_STR, d);
830static const char	*pri_i[] = PRIS(PRI_STR, i);
831static const char	*pri_o[] = PRIS(PRI_STR, o);
832static const char	*pri_u[] = PRIS(PRI_STR, u);
833static const char	*pri_x[] = PRIS(PRI_STR, x);
834static const char	*pri_X[] = PRIS(PRI_STR, X);
835
836static const char	pri_d_len[] = PRIS(PRI_LEN, d);
837static const char	pri_i_len[] = PRIS(PRI_LEN, i);
838static const char	pri_o_len[] = PRIS(PRI_LEN, o);
839static const char	pri_u_len[] = PRIS(PRI_LEN, u);
840static const char	pri_x_len[] = PRIS(PRI_LEN, x);
841static const char	pri_X_len[] = PRIS(PRI_LEN, X);
842
843static struct {
844	const char	type;
845	const char	**str_table;
846	const char	*len_table;
847} pri_table[] = {
848	{'d', pri_d, pri_d_len}, {'i', pri_i, pri_i_len},
849	{'o', pri_o, pri_o_len}, {'u', pri_u, pri_u_len},
850	{'x', pri_x, pri_x_len}, {'X', pri_X, pri_X_len},
851};
852
853static struct {
854	const char	*name;
855	const char	nlen;
856	const char	want_digits;
857	const char	bias;
858} special_table[] = {
859	{"LEAST",	5, 1, PRI_BIAS_LEAST},
860	{"FAST",	4, 1, PRI_BIAS_FAST},
861	{"MAX",		3, 0, PRI_BIAS_MAX},
862	{"PTR",		3, 0, PRI_BIAS_PTR},
863};
864
865/*
866 * conv_macro() returns the conversion specifier corresponding
867 * to the macro name specified in 'name'.  'len' contains the
868 * length of the macro name including the null termination.
869 * '*elen' will be set to the length of the returning conversion
870 * specifier without the null termination.
871 */
872static const char *
873conv_macro(const char *str, uint32_t len, uint32_t *lenp)
874{
875	const char	**tbl;
876	const char	*ltbl;
877	char	*next;
878	int	n, i, num, bias, idx, want_digits;
879
880	if (len == 2) {
881		if (*str == 'I') {
882			/* Solaris does not support %I */
883			*lenp = 0;
884			return ("");
885		}
886		return (NULL);
887	}
888
889	if (len <= 4 || strncmp(str, "PRI", 3) != 0)
890		return (NULL);
891
892	str += 3;
893
894	n = sizeof (pri_table) / sizeof (pri_table[0]);
895	for (i = 0; i < n; i++) {
896		if (pri_table[i].type == *str)
897			break;
898	}
899	if (i == n)
900		return (NULL);
901	tbl = pri_table[i].str_table;
902	ltbl = pri_table[i].len_table;
903
904	str++;
905	idx = want_digits = 0;
906
907	if (isdigit((unsigned char)*str)) {
908		/* PRIx/N/ */
909		bias = 0;
910		want_digits = 1;
911	} else {
912		n = sizeof (special_table) / sizeof (special_table[0]);
913		for (i = 0; i < n; i++) {
914			if (strncmp(special_table[i].name,
915			    str, special_table[i].nlen) == 0) {
916				break;
917			}
918		}
919		if (i == n)
920			return (NULL);
921		bias = special_table[i].bias;
922		want_digits = special_table[i].want_digits;
923		str += special_table[i].nlen;
924	}
925
926	if (want_digits) {
927		if (!isdigit((unsigned char)*str))
928			return (NULL);
929		num = strtol(str, &next, 10);
930		/* see if it is 8/16/32/64 */
931		for (n = 8, idx = 0; idx < 4; idx++, n *= 2) {
932			if (n == num)
933				break;
934		}
935		if (idx == 4)
936			return (NULL);
937		str = next;
938	}
939	if (*str != '\0') {
940		/* unknow format */
941		return (NULL);
942	}
943
944	*lenp = (uint32_t)ltbl[bias + idx];
945	return (tbl[bias + idx]);
946}
947
948static gnu_d_macro_t *
949expand_macros(Msg_g_node *p)
950{
951	char	*base = (char *)p->msg_file_info;
952	struct gnu_msg_rev1_info	*rev1_header = p->rev1_header;
953	struct gnu_msg_ent	*d_macro_tbl;
954	gnu_d_macro_t	*d_macro;
955	uint32_t	num_of_d_macro, e_maclen, maclen, i;
956	const char	*e_macname;
957	char	*macname;
958
959	/* number of the dynamic macros */
960	num_of_d_macro = SWAP(p, rev1_header->num_of_dynamic_macro);
961
962	d_macro = malloc((size_t)num_of_d_macro * sizeof (gnu_d_macro_t));
963	if (d_macro == NULL)
964		return (NULL);
965
966	/* pointer to the dynamic strings table */
967	d_macro_tbl = (struct gnu_msg_ent *)(uintptr_t)
968	    (base + SWAP(p, rev1_header->off_dynamic_macro));
969
970	for (i = 0; i < num_of_d_macro; i++) {
971		macname = base + SWAP(p, d_macro_tbl[i].offset);
972		maclen = SWAP(p, d_macro_tbl[i].len);
973
974		/*
975		 * sanity check
976		 * maclen includes a null termination.
977		 */
978		if (maclen != strlen(macname) + 1) {
979			free(d_macro);
980			return (NULL);
981		}
982		e_macname = conv_macro(macname, maclen, &e_maclen);
983		if (e_macname == NULL) {
984			free(d_macro);
985			return (NULL);
986		}
987		d_macro[i].len = e_maclen;
988		d_macro[i].ptr = e_macname;
989	}
990
991	return (d_macro);
992}
993
994static char *
995expand_dynamic_message(Msg_g_node *p, struct gnu_msg_ent **e_msgs)
996{
997
998	char	*base = (char *)p->msg_file_info;
999	struct gnu_msg_rev1_info	*rev1_header = p->rev1_header;
1000	struct gnu_dynamic_tbl	*d_info;
1001	struct gnu_dynamic_ent	*entry;
1002	gnu_d_macro_t	*d_macro;
1003	uint32_t	num_of_d_str, mlen, dlen, didx, i, j;
1004	uint32_t	off_d_tbl;
1005	uint32_t	*d_msg_off_tbl;
1006	size_t	mchunk_size, used, need;
1007	char	*mchunk, *msg;
1008
1009#define	MEM_INCR	(1024)
1010
1011	d_macro = expand_macros(p);
1012	if (d_macro == NULL)
1013		return (NULL);
1014
1015	/* number of dynamic messages */
1016	num_of_d_str = p->num_of_d_str;
1017
1018	mchunk = NULL;
1019	mchunk_size = 0;	/* size of the allocated memory in mchunk */
1020	used = 0;		/* size of the used memory in mchunk */
1021	for (i = MSGID; i <= MSGSTR; i++) {
1022		/* pointer to the offset table of dynamic msgids/msgstrs */
1023		off_d_tbl = SWAP(p,
1024		    i == MSGID ? rev1_header->off_dynamic_msgid_tbl :
1025		    rev1_header->off_dynamic_msgstr_tbl);
1026		/* pointer to the dynamic msgids/msgstrs */
1027		d_msg_off_tbl = (uint32_t *)(uintptr_t)(base + off_d_tbl);
1028		for (j = 0; j < num_of_d_str; j++) {
1029			e_msgs[i][j].offset = used;
1030			d_info = (struct gnu_dynamic_tbl *)(uintptr_t)
1031			    (base + SWAP(p, d_msg_off_tbl[j]));
1032			entry = d_info->entry;
1033			msg = base + SWAP(p, d_info->offset);
1034
1035			for (;;) {
1036				mlen = SWAP(p, entry->len);
1037				didx = SWAP(p, entry->idx);
1038				dlen = (didx == NOMORE_DYNAMIC_MACRO) ? 0 :
1039				    d_macro[didx].len;
1040				need = used + mlen + dlen;
1041				if (need >= mchunk_size) {
1042					char	*t;
1043					size_t	n = mchunk_size;
1044					do {
1045						n += MEM_INCR;
1046					} while (n <= need);
1047					t = realloc(mchunk, n);
1048					if (t == NULL) {
1049						free(d_macro);
1050						free(mchunk);
1051						return (NULL);
1052					}
1053					mchunk = t;
1054					mchunk_size = n;
1055				}
1056				(void) memcpy(mchunk + used, msg, (size_t)mlen);
1057				msg += mlen;
1058				used += mlen;
1059
1060				if (didx == NOMORE_DYNAMIC_MACRO) {
1061					/*
1062					 * Last segment of a static
1063					 * msg string contains a null
1064					 * termination, so an explicit
1065					 * null termination is not required
1066					 * here.
1067					 */
1068					break;
1069				}
1070				(void) memcpy(mchunk + used,
1071				    d_macro[didx].ptr, (size_t)dlen);
1072				used += dlen;
1073				entry++; /* to next entry */
1074			}
1075			/*
1076			 * e_msgs[][].len does not include a null termination
1077			 */
1078			e_msgs[i][j].len = used - e_msgs[i][j].offset - 1;
1079		}
1080	}
1081
1082	free(d_macro);
1083
1084	/* shrink mchunk to 'used' */
1085	{
1086		char	*t;
1087		t = realloc(mchunk, used);
1088		if (t == NULL) {
1089			free(mchunk);
1090			return (NULL);
1091		}
1092		mchunk = t;
1093	}
1094
1095	return (mchunk);
1096}
1097
1098static int
1099build_rev1_info(Msg_g_node *p)
1100{
1101	uint32_t	*d_hash;
1102	uint32_t	num_of_d_str, num_of_str;
1103	uint32_t	idx, hash_value, hash_size;
1104	size_t	hash_mem_size;
1105	size_t	d_msgid_size, d_msgstr_size;
1106	char	*chunk, *mchunk;
1107	int	i;
1108
1109#ifdef GETTEXT_DEBUG
1110	gprintf(0, "******* entering build_rev1_info(0x%p)\n", p);
1111	printgnumsg(p, 1);
1112#endif
1113
1114	if (p->hash_table == NULL) {
1115		/* Revision 1 always requires the hash table */
1116		return (-1);
1117	}
1118
1119	num_of_str = p->num_of_str;
1120	hash_size = p->hash_size;
1121	num_of_d_str = p->num_of_d_str;
1122
1123	hash_mem_size = hash_size * sizeof (uint32_t);
1124	ROUND(hash_mem_size, sizeof (struct gnu_msg_ent));
1125
1126	d_msgid_size = num_of_d_str * sizeof (struct gnu_msg_ent);
1127	d_msgstr_size = num_of_d_str * sizeof (struct gnu_msg_ent);
1128
1129	chunk = malloc(hash_mem_size + d_msgid_size + d_msgstr_size);
1130	if (chunk == NULL) {
1131		return (-1);
1132	}
1133
1134	d_hash = (uint32_t *)(uintptr_t)chunk;
1135	p->d_msg[MSGID] = (struct gnu_msg_ent *)(uintptr_t)
1136	    (chunk + hash_mem_size);
1137	p->d_msg[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t)
1138	    (chunk + hash_mem_size + d_msgid_size);
1139
1140	if ((mchunk = expand_dynamic_message(p, p->d_msg)) == NULL) {
1141		free(chunk);
1142		return (-1);
1143	}
1144
1145	/* copy the original hash table into the dynamic hash table */
1146	for (i = 0; i < hash_size; i++) {
1147		d_hash[i] = SWAP(p, p->hash_table[i]);
1148	}
1149
1150	/* fill in the dynamic hash table with dynamic messages */
1151	for (i = 0; i < num_of_d_str; i++) {
1152		hash_value = get_hashid(mchunk + p->d_msg[MSGID][i].offset,
1153		    NULL);
1154		idx = get_hash_index(d_hash, hash_value, hash_size);
1155		d_hash[idx] = num_of_str + i + 1;
1156	}
1157
1158	p->mchunk = mchunk;
1159	p->hash_table = d_hash;
1160
1161#ifdef	GETTEXT_DEBUG
1162	print_rev1_info(p);
1163	gprintf(0, "******* exiting build_rev1_info()\n");
1164	printgnumsg(p, 1);
1165#endif
1166
1167	return (0);
1168}
1169
1170/*
1171 * gnu_setmsg
1172 *
1173 * INPUT
1174 *   mnp  - message node
1175 *   addr - address to the mmapped file
1176 *   size - size of the file
1177 *
1178 * RETURN
1179 *   0   - either T_GNU_MO or T_ILL_MO has been set
1180 *  -1   - failed
1181 */
1182int
1183gnu_setmsg(Msg_node *mnp, char *addr, size_t size)
1184{
1185	struct gnu_msg_info	*gnu_header;
1186	Msg_g_node	*p;
1187
1188#ifdef GETTEXT_DEBUG
1189	gprintf(0, "******** entering gnu_setmsg(0x%p, 0x%p, %lu)\n",
1190	    (void *)mnp, addr, size);
1191	printmnp(mnp, 1);
1192#endif
1193
1194	/* checks the GNU MAGIC number */
1195	if (size < sizeof (struct gnu_msg_info)) {
1196		/* invalid mo file */
1197		mnp->type = T_ILL_MO;
1198#ifdef	GETTEXT_DEBUG
1199		gprintf(0, "********* exiting gnu_setmsg\n");
1200		printmnp(mnp, 1);
1201#endif
1202		return (0);
1203	}
1204
1205	gnu_header = (struct gnu_msg_info *)(uintptr_t)addr;
1206
1207	p = calloc(1, sizeof (Msg_g_node));
1208	if (p == NULL) {
1209		return (-1);
1210	}
1211	p->msg_file_info = gnu_header;
1212
1213	if (gnu_header->magic == GNU_MAGIC) {
1214		switch (gnu_header->revision) {
1215		case GNU_REVISION_0_1:
1216		case GNU_REVISION_1_1:
1217			p->flag |= ST_REV1;
1218			break;
1219		}
1220	} else if (gnu_header->magic == GNU_MAGIC_SWAPPED) {
1221		p->flag |= ST_SWP;
1222		switch (gnu_header->revision) {
1223		case GNU_REVISION_0_1_SWAPPED:
1224		case GNU_REVISION_1_1_SWAPPED:
1225			p->flag |= ST_REV1;
1226			break;
1227		}
1228	} else {
1229		/* invalid mo file */
1230		free(p);
1231		mnp->type = T_ILL_MO;
1232#ifdef	GETTEXT_DEBUG
1233		gprintf(0, "********* exiting gnu_setmsg\n");
1234		printmnp(mnp, 1);
1235#endif
1236		return (0);
1237	}
1238
1239	p->fsize = size;
1240	p->num_of_str = SWAP(p, gnu_header->num_of_str);
1241	p->hash_size = SWAP(p, gnu_header->sz_hashtbl);
1242	p->hash_table = p->hash_size <= 2 ? NULL :
1243	    (uint32_t *)(uintptr_t)
1244	    (addr + SWAP(p, gnu_header->off_hashtbl));
1245
1246	p->msg_tbl[MSGID] = (struct gnu_msg_ent *)(uintptr_t)
1247	    (addr + SWAP(p, gnu_header->off_msgid_tbl));
1248	p->msg_tbl[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t)
1249	    (addr + SWAP(p, gnu_header->off_msgstr_tbl));
1250
1251	if (p->flag & ST_REV1) {
1252		/* Revision 1 */
1253		struct gnu_msg_rev1_info	*rev1_header;
1254
1255		rev1_header = (struct gnu_msg_rev1_info *)
1256		    (uintptr_t)(addr + sizeof (struct gnu_msg_info));
1257		p->rev1_header = rev1_header;
1258		p->num_of_d_str = SWAP(p, rev1_header->num_of_dynamic_str);
1259		if (build_rev1_info(p) == -1) {
1260			free(p);
1261#ifdef GETTEXT_DEBUG
1262			gprintf(0, "******** exiting gnu_setmsg: "
1263			    "build_rev1_info() failed\n");
1264#endif
1265			return (-1);
1266		}
1267	}
1268
1269	mnp->msg.gnumsg = p;
1270	mnp->type = T_GNU_MO;
1271
1272#ifdef GETTEXT_DEBUG
1273	gprintf(0, "********* exiting gnu_setmsg\n");
1274	printmnp(mnp, 1);
1275#endif
1276	return (0);
1277}
1278
1279/*
1280 * get_hash_index
1281 *
1282 * Returns the index to an empty slot in the hash table
1283 * for the specified hash_value.
1284 */
1285static uint32_t
1286get_hash_index(uint32_t *hash_tbl, uint32_t hash_value, uint32_t hash_size)
1287{
1288	uint32_t	idx, inc;
1289
1290	idx = hash_value % hash_size;
1291	inc = 1 + (hash_value % (hash_size - 2));
1292
1293	for (;;) {
1294		if (hash_tbl[idx] == 0) {
1295			/* found an empty slot */
1296			return (idx);
1297		}
1298		idx = (idx + inc) % hash_size;
1299	}
1300	/* NOTREACHED */
1301}
1302