1*16d86563SAlexander Pyhalov /*
2*16d86563SAlexander Pyhalov * CDDL HEADER START
3*16d86563SAlexander Pyhalov *
4*16d86563SAlexander Pyhalov * The contents of this file are subject to the terms of the
5*16d86563SAlexander Pyhalov * Common Development and Distribution License (the "License").
6*16d86563SAlexander Pyhalov * You may not use this file except in compliance with the License.
7*16d86563SAlexander Pyhalov *
8*16d86563SAlexander Pyhalov * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9*16d86563SAlexander Pyhalov * or http://www.opensolaris.org/os/licensing.
10*16d86563SAlexander Pyhalov * See the License for the specific language governing permissions
11*16d86563SAlexander Pyhalov * and limitations under the License.
12*16d86563SAlexander Pyhalov *
13*16d86563SAlexander Pyhalov * When distributing Covered Code, include this CDDL HEADER in each
14*16d86563SAlexander Pyhalov * file and include the License file at src/OPENSOLARIS.LICENSE.
15*16d86563SAlexander Pyhalov * If applicable, add the following below this CDDL HEADER, with the
16*16d86563SAlexander Pyhalov * fields enclosed by brackets "[]" replaced with your own identifying
17*16d86563SAlexander Pyhalov * information: Portions Copyright [yyyy] [name of copyright owner]
18*16d86563SAlexander Pyhalov *
19*16d86563SAlexander Pyhalov * CDDL HEADER END
20*16d86563SAlexander Pyhalov */
21*16d86563SAlexander Pyhalov /*
22*16d86563SAlexander Pyhalov * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
23*16d86563SAlexander Pyhalov * Use is subject to license terms.
24*16d86563SAlexander Pyhalov *
25*16d86563SAlexander Pyhalov * This program will generate UTF-8 to whatever single byte codeset mapping
26*16d86563SAlexander Pyhalov * table in the single byte codeset code values' ascending order. You need to
27*16d86563SAlexander Pyhalov * use sort(1) to sort out and make it ready for binary search that will
28*16d86563SAlexander Pyhalov * do the search on the UTF-8 values.
29*16d86563SAlexander Pyhalov */
30*16d86563SAlexander Pyhalov
31*16d86563SAlexander Pyhalov
32*16d86563SAlexander Pyhalov
33*16d86563SAlexander Pyhalov #include <stdio.h>
34*16d86563SAlexander Pyhalov #include <stdlib.h>
35*16d86563SAlexander Pyhalov #include <ctype.h>
36*16d86563SAlexander Pyhalov #include <strings.h>
37*16d86563SAlexander Pyhalov #include "../common_defs.h"
38*16d86563SAlexander Pyhalov
39*16d86563SAlexander Pyhalov int
main(int ac,char ** av)40*16d86563SAlexander Pyhalov main(int ac, char **av)
41*16d86563SAlexander Pyhalov {
42*16d86563SAlexander Pyhalov to_utf8_table_component_t tbl[256];
43*16d86563SAlexander Pyhalov register int i, j;
44*16d86563SAlexander Pyhalov char buf[BUFSIZ], num[100];
45*16d86563SAlexander Pyhalov unsigned int l, k;
46*16d86563SAlexander Pyhalov char ascii_only = 0;
47*16d86563SAlexander Pyhalov
48*16d86563SAlexander Pyhalov if (ac > 1 && strcmp(av[1], "-ascii") == 0)
49*16d86563SAlexander Pyhalov ascii_only = 1;
50*16d86563SAlexander Pyhalov
51*16d86563SAlexander Pyhalov for (i = 0; i < 256; i++) {
52*16d86563SAlexander Pyhalov if (i <= 0x1f || i == 0x7f || (ascii_only && i <= 0x7f)) {
53*16d86563SAlexander Pyhalov tbl[i].size = (signed char)1;
54*16d86563SAlexander Pyhalov tbl[i].u8 = (unsigned int)i;
55*16d86563SAlexander Pyhalov } else if (!ascii_only && (i >= 0x80 && i <= 0x9f)) {
56*16d86563SAlexander Pyhalov tbl[i].size = (signed char)2;
57*16d86563SAlexander Pyhalov tbl[i].u8 = (unsigned int)i;
58*16d86563SAlexander Pyhalov } else {
59*16d86563SAlexander Pyhalov tbl[i].size = (signed char)ICV_TYPE_ILLEGAL_CHAR;
60*16d86563SAlexander Pyhalov tbl[i].u8 = 0;
61*16d86563SAlexander Pyhalov }
62*16d86563SAlexander Pyhalov }
63*16d86563SAlexander Pyhalov
64*16d86563SAlexander Pyhalov
65*16d86563SAlexander Pyhalov while (fgets(buf, BUFSIZ, stdin)) {
66*16d86563SAlexander Pyhalov i = 0;
67*16d86563SAlexander Pyhalov while (buf[i] && isspace(buf[i]))
68*16d86563SAlexander Pyhalov i++;
69*16d86563SAlexander Pyhalov if (buf[i] == '#' || buf[i] == '\0')
70*16d86563SAlexander Pyhalov continue;
71*16d86563SAlexander Pyhalov
72*16d86563SAlexander Pyhalov for (j = 0; !isspace(buf[i]); i++, j++)
73*16d86563SAlexander Pyhalov num[j] = buf[i];
74*16d86563SAlexander Pyhalov num[j] = '\0';
75*16d86563SAlexander Pyhalov
76*16d86563SAlexander Pyhalov k = strtol(num, (char **)NULL, 0);
77*16d86563SAlexander Pyhalov
78*16d86563SAlexander Pyhalov while (isspace(buf[i]))
79*16d86563SAlexander Pyhalov i++;
80*16d86563SAlexander Pyhalov
81*16d86563SAlexander Pyhalov if (buf[i] == '#' || buf[i] == '\0') {
82*16d86563SAlexander Pyhalov tbl[k].size = (signed char)ICV_TYPE_ILLEGAL_CHAR;
83*16d86563SAlexander Pyhalov tbl[k].u8 = 0;
84*16d86563SAlexander Pyhalov continue;
85*16d86563SAlexander Pyhalov }
86*16d86563SAlexander Pyhalov
87*16d86563SAlexander Pyhalov for (j = 0; !isspace(buf[i]); i++, j++)
88*16d86563SAlexander Pyhalov num[j] = buf[i];
89*16d86563SAlexander Pyhalov num[j] = '\0';
90*16d86563SAlexander Pyhalov
91*16d86563SAlexander Pyhalov l = strtol(num, (char **)NULL, 0);
92*16d86563SAlexander Pyhalov
93*16d86563SAlexander Pyhalov tbl[k].u8 = l;
94*16d86563SAlexander Pyhalov if (l < 0x80)
95*16d86563SAlexander Pyhalov tbl[k].size = (signed char)1;
96*16d86563SAlexander Pyhalov else if (l < 0x800)
97*16d86563SAlexander Pyhalov tbl[k].size = (signed char)2;
98*16d86563SAlexander Pyhalov else if (l < 0x10000)
99*16d86563SAlexander Pyhalov tbl[k].size = (signed char)3;
100*16d86563SAlexander Pyhalov else if (l < 0x200000)
101*16d86563SAlexander Pyhalov tbl[k].size = (signed char)4;
102*16d86563SAlexander Pyhalov else if (l < 0x4000000)
103*16d86563SAlexander Pyhalov tbl[k].size = (signed char)5;
104*16d86563SAlexander Pyhalov else
105*16d86563SAlexander Pyhalov tbl[k].size = (signed char)6;
106*16d86563SAlexander Pyhalov }
107*16d86563SAlexander Pyhalov
108*16d86563SAlexander Pyhalov for (i = 0; i < 256; i++) {
109*16d86563SAlexander Pyhalov l = tbl[i].u8;
110*16d86563SAlexander Pyhalov if (i > 0x7f && l != 0)
111*16d86563SAlexander Pyhalov printf("\t{ 0x%08X, 0x%02X },\n", l, i);
112*16d86563SAlexander Pyhalov }
113*16d86563SAlexander Pyhalov
114*16d86563SAlexander Pyhalov if (ascii_only)
115*16d86563SAlexander Pyhalov printf("\t{ 0x%08X, 0x%02X },\n", 0, 0);
116*16d86563SAlexander Pyhalov
117*16d86563SAlexander Pyhalov fprintf(stderr, "%s: make sure you sort the result by using\n\n\
118*16d86563SAlexander Pyhalov \tsort -k 1 -t ',' result_file\n\n\
119*16d86563SAlexander Pyhalov since iconv module that will include the result table uses binary search.\n",
120*16d86563SAlexander Pyhalov av[0]);
121*16d86563SAlexander Pyhalov }
122