1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * This program will generate UTF-8 to whatever single byte codeset mapping
26  * table in the single byte codeset code values' ascending order. You need to
27  * use sort(1) to sort out and make it ready for binary search that will
28  * do the search on the UTF-8 values.
29  */
30 
31 
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <ctype.h>
36 #include <strings.h>
37 #include "../common_defs.h"
38 
39 int
main(int ac,char ** av)40 main(int ac, char **av)
41 {
42 	to_utf8_table_component_t tbl[256];
43 	register int i, j;
44 	char buf[BUFSIZ], num[100];
45 	unsigned int l, k;
46 	char ascii_only = 0;
47 
48 	if (ac > 1 && strcmp(av[1], "-ascii") == 0)
49 		ascii_only = 1;
50 
51 	for (i = 0; i < 256; i++) {
52 		if (i <= 0x1f || i == 0x7f || (ascii_only && i <= 0x7f)) {
53 			tbl[i].size = (signed char)1;
54 			tbl[i].u8 = (unsigned int)i;
55 		} else if (!ascii_only && (i >= 0x80 && i <= 0x9f)) {
56 			tbl[i].size = (signed char)2;
57 			tbl[i].u8 = (unsigned int)i;
58 		} else {
59 			tbl[i].size = (signed char)ICV_TYPE_ILLEGAL_CHAR;
60 			tbl[i].u8 = 0;
61 		}
62 	}
63 
64 
65 	while (fgets(buf, BUFSIZ, stdin)) {
66 		i = 0;
67 		while (buf[i] && isspace(buf[i]))
68 			i++;
69 		if (buf[i] == '#' || buf[i] == '\0')
70 			continue;
71 
72 		for (j = 0; !isspace(buf[i]); i++, j++)
73 			num[j] = buf[i];
74 		num[j] = '\0';
75 
76 		k = strtol(num, (char **)NULL, 0);
77 
78 		while (isspace(buf[i]))
79 			i++;
80 
81 		if (buf[i] == '#' || buf[i] == '\0') {
82 			tbl[k].size = (signed char)ICV_TYPE_ILLEGAL_CHAR;
83 			tbl[k].u8 = 0;
84 			continue;
85 		}
86 
87 		for (j = 0; !isspace(buf[i]); i++, j++)
88 			num[j] = buf[i];
89 		num[j] = '\0';
90 
91 		l = strtol(num, (char **)NULL, 0);
92 
93 		tbl[k].u8 = l;
94 		if (l < 0x80)
95 			tbl[k].size = (signed char)1;
96 		else if (l < 0x800)
97 			tbl[k].size = (signed char)2;
98 		else if (l < 0x10000)
99 			tbl[k].size = (signed char)3;
100 		else if (l < 0x200000)
101 			tbl[k].size = (signed char)4;
102 		else if (l < 0x4000000)
103 			tbl[k].size = (signed char)5;
104 		else
105 			tbl[k].size = (signed char)6;
106 	}
107 
108 	for (i = 0; i < 256; i++) {
109 		if (tbl[i].u8 < 0x80)
110 			l = tbl[i].u8;
111 		else if (tbl[i].u8 < 0x800) {
112 			l = 0xc080 |
113 				(((tbl[i].u8 >> 6) & 0x1f) << 8) |
114 				(tbl[i].u8 & 0x3f);
115 		} else if (tbl[i].u8 < 0x10000) {
116 			l = 0xe08080 |
117 				(((tbl[i].u8 >> 12) & 0x0f) << 16) |
118 				(((tbl[i].u8 >> 6) & 0x3f) << 8) |
119 				(tbl[i].u8 & 0x3f);
120 		} else if (tbl[i].u8 < 0x200000) {
121 			l = 0xf0808080 |
122 				(((tbl[i].u8 >> 18) & 0x07) << 24) |
123 				(((tbl[i].u8 >> 12) & 0x3f) << 16) |
124 				(((tbl[i].u8 >> 6) & 0x3f) << 8) |
125 				(tbl[i].u8 & 0x3f);
126 		} /* We only support characters in range of UTF-16
127 		else if (tbl[i].u8 < 0x4000000) {
128 			l = 0xf880808080 |
129 				(((tbl[i].u8 >> 24) & 0x03) << 32) |
130 				(((tbl[i].u8 >> 18) & 0x3f) << 24) |
131 				(((tbl[i].u8 >> 12) & 0x3f) << 16) |
132 				(((tbl[i].u8 >> 6) & 0x3f) << 8) |
133 				(tbl[i].u8 & 0x3f);
134 		} else {
135 			l = 0xfc8080808080 |
136 				(((tbl[i].u8 >> 30) & 0x01) << 40) |
137 				(((tbl[i].u8 >> 24) & 0x3f) << 32) |
138 				(((tbl[i].u8 >> 18) & 0x3f) << 24) |
139 				(((tbl[i].u8 >> 12) & 0x3f) << 16) |
140 				(((tbl[i].u8 >> 6) & 0x3f) << 8) |
141 				(tbl[i].u8 & 0x3f);
142 		}
143 		*/
144 
145 		if (i > 0x7f && l != 0)
146 			printf("\t{  0x%08X, 0x%02X  },\n", l, i);
147 	}
148 
149 	if (ascii_only)
150 		printf("\t{  0x%08X, 0x%02X  },\n", 0, 0);
151         fprintf(stderr, "%s: make sure you sort the result by using\n\n\
152 \tsort -k 1 -t ',' result_file\n\n\
153 since iconv module that will include the result table uses binary search.\n",
154 av[0]);
155 }
156