1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * This program will generate UTF-8 to whatever single byte codeset mapping
26  * table in the single byte codeset code values' ascending order. You need to
27  * use sort(1) to sort out and make it ready for binary search that will
28  * do the search on the UTF-8 values.
29  */
30 
31 
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <ctype.h>
36 #include <strings.h>
37 #include "../common_defs.h"
38 
39 int
main(int ac,char ** av)40 main(int ac, char **av)
41 {
42 	to_utf8_table_component_t tbl[256];
43 	register int i, j;
44 	char buf[BUFSIZ], num[100];
45 	unsigned int l, k;
46 	char ascii_only = 0;
47 
48 	if (ac > 1 && strcmp(av[1], "-ascii") == 0)
49 		ascii_only = 1;
50 
51 	for (i = 0; i < 256; i++) {
52 		if (i <= 0x1f || i == 0x7f || (ascii_only && i <= 0x7f)) {
53 			tbl[i].size = (signed char)1;
54 			tbl[i].u8 = (unsigned int)i;
55 		} else if (!ascii_only && (i >= 0x80 && i <= 0x9f)) {
56 			tbl[i].size = (signed char)2;
57 			tbl[i].u8 = (unsigned int)i;
58 		} else {
59 			tbl[i].size = (signed char)ICV_TYPE_ILLEGAL_CHAR;
60 			tbl[i].u8 = 0;
61 		}
62 	}
63 
64 
65 	while (fgets(buf, BUFSIZ, stdin)) {
66 		i = 0;
67 		while (buf[i] && isspace(buf[i]))
68 			i++;
69 		if (buf[i] == '#' || buf[i] == '\0')
70 			continue;
71 
72 		for (j = 0; !isspace(buf[i]); i++, j++)
73 			num[j] = buf[i];
74 		num[j] = '\0';
75 
76 		k = strtol(num, (char **)NULL, 0);
77 
78 		while (isspace(buf[i]))
79 			i++;
80 
81 		if (buf[i] == '#' || buf[i] == '\0') {
82 			tbl[k].size = (signed char)ICV_TYPE_ILLEGAL_CHAR;
83 			tbl[k].u8 = 0;
84 			continue;
85 		}
86 
87 		for (j = 0; !isspace(buf[i]); i++, j++)
88 			num[j] = buf[i];
89 		num[j] = '\0';
90 
91 		l = strtol(num, (char **)NULL, 0);
92 
93 		tbl[k].u8 = l;
94 		if (l < 0x80)
95 			tbl[k].size = (signed char)1;
96 		else if (l < 0x800)
97 			tbl[k].size = (signed char)2;
98 		else if (l < 0x10000)
99 			tbl[k].size = (signed char)3;
100 		else if (l < 0x200000)
101 			tbl[k].size = (signed char)4;
102 		else if (l < 0x4000000)
103 			tbl[k].size = (signed char)5;
104 		else
105 			tbl[k].size = (signed char)6;
106 	}
107 
108 	for (i = 0; i < 256; i++) {
109 		l = tbl[i].u8;
110 		if (i > 0x7f && l != 0)
111 			printf("\t{  0x%08X, 0x%02X  },\n", l, i);
112 	}
113 
114 	if (ascii_only)
115 		printf("\t{  0x%08X, 0x%02X  },\n", 0, 0);
116 
117 	fprintf(stderr, "%s: make sure you sort the result by using\n\n\
118 \tsort -k 1 -t ',' result_file\n\n\
119 since iconv module that will include the result table uses binary search.\n",
120 av[0]);
121 }
122