1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * This program will generate UTF-8 to whatever single byte codeset mapping
26 * table in the single byte codeset code values' ascending order. You need to
27 * use sort(1) to sort out and make it ready for binary search that will
28 * do the search on the UTF-8 values.
29 */
30
31
32
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <ctype.h>
36 #include <strings.h>
37 #include "../common_defs.h"
38
39 int
main(int ac,char ** av)40 main(int ac, char **av)
41 {
42 to_utf8_table_component_t tbl[256];
43 register int i, j;
44 char buf[BUFSIZ], num[100];
45 unsigned int l, k;
46 char ascii_only = 0;
47
48 if (ac > 1 && strcmp(av[1], "-ascii") == 0)
49 ascii_only = 1;
50
51 for (i = 0; i < 256; i++) {
52 if (i <= 0x1f || i == 0x7f || (ascii_only && i <= 0x7f)) {
53 tbl[i].size = (signed char)1;
54 tbl[i].u8 = (unsigned int)i;
55 } else if (!ascii_only && (i >= 0x80 && i <= 0x9f)) {
56 tbl[i].size = (signed char)2;
57 tbl[i].u8 = (unsigned int)i;
58 } else {
59 tbl[i].size = (signed char)ICV_TYPE_ILLEGAL_CHAR;
60 tbl[i].u8 = 0;
61 }
62 }
63
64
65 while (fgets(buf, BUFSIZ, stdin)) {
66 i = 0;
67 while (buf[i] && isspace(buf[i]))
68 i++;
69 if (buf[i] == '#' || buf[i] == '\0')
70 continue;
71
72 for (j = 0; !isspace(buf[i]); i++, j++)
73 num[j] = buf[i];
74 num[j] = '\0';
75
76 k = strtol(num, (char **)NULL, 0);
77
78 while (isspace(buf[i]))
79 i++;
80
81 if (buf[i] == '#' || buf[i] == '\0') {
82 tbl[k].size = (signed char)ICV_TYPE_ILLEGAL_CHAR;
83 tbl[k].u8 = 0;
84 continue;
85 }
86
87 for (j = 0; !isspace(buf[i]); i++, j++)
88 num[j] = buf[i];
89 num[j] = '\0';
90
91 l = strtol(num, (char **)NULL, 0);
92
93 tbl[k].u8 = l;
94 if (l < 0x80)
95 tbl[k].size = (signed char)1;
96 else if (l < 0x800)
97 tbl[k].size = (signed char)2;
98 else if (l < 0x10000)
99 tbl[k].size = (signed char)3;
100 else if (l < 0x200000)
101 tbl[k].size = (signed char)4;
102 else if (l < 0x4000000)
103 tbl[k].size = (signed char)5;
104 else
105 tbl[k].size = (signed char)6;
106 }
107
108 for (i = 0; i < 256; i++) {
109 if (tbl[i].u8 < 0x80)
110 l = tbl[i].u8;
111 else if (tbl[i].u8 < 0x800) {
112 l = 0xc080 |
113 (((tbl[i].u8 >> 6) & 0x1f) << 8) |
114 (tbl[i].u8 & 0x3f);
115 } else if (tbl[i].u8 < 0x10000) {
116 l = 0xe08080 |
117 (((tbl[i].u8 >> 12) & 0x0f) << 16) |
118 (((tbl[i].u8 >> 6) & 0x3f) << 8) |
119 (tbl[i].u8 & 0x3f);
120 } else if (tbl[i].u8 < 0x200000) {
121 l = 0xf0808080 |
122 (((tbl[i].u8 >> 18) & 0x07) << 24) |
123 (((tbl[i].u8 >> 12) & 0x3f) << 16) |
124 (((tbl[i].u8 >> 6) & 0x3f) << 8) |
125 (tbl[i].u8 & 0x3f);
126 } /* We only support characters in range of UTF-16
127 else if (tbl[i].u8 < 0x4000000) {
128 l = 0xf880808080 |
129 (((tbl[i].u8 >> 24) & 0x03) << 32) |
130 (((tbl[i].u8 >> 18) & 0x3f) << 24) |
131 (((tbl[i].u8 >> 12) & 0x3f) << 16) |
132 (((tbl[i].u8 >> 6) & 0x3f) << 8) |
133 (tbl[i].u8 & 0x3f);
134 } else {
135 l = 0xfc8080808080 |
136 (((tbl[i].u8 >> 30) & 0x01) << 40) |
137 (((tbl[i].u8 >> 24) & 0x3f) << 32) |
138 (((tbl[i].u8 >> 18) & 0x3f) << 24) |
139 (((tbl[i].u8 >> 12) & 0x3f) << 16) |
140 (((tbl[i].u8 >> 6) & 0x3f) << 8) |
141 (tbl[i].u8 & 0x3f);
142 }
143 */
144
145 if (i > 0x7f && l != 0)
146 printf("\t{ 0x%08X, 0x%02X },\n", l, i);
147 }
148
149 if (ascii_only)
150 printf("\t{ 0x%08X, 0x%02X },\n", 0, 0);
151 fprintf(stderr, "%s: make sure you sort the result by using\n\n\
152 \tsort -k 1 -t ',' result_file\n\n\
153 since iconv module that will include the result table uses binary search.\n",
154 av[0]);
155 }
156