1 /*
2  * Copyright (c) 2010 Marcel Moolenaar
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 
29 #include <sys/types.h>
30 #include <errno.h>
31 #include <stand.h>
32 #include <efichar.h>
33 
34 int
ucs2len(const CHAR16 * str)35 ucs2len(const CHAR16 *str)
36 {
37 	int i;
38 
39 	i = 0;
40 	while (*str++)
41 		i++;
42 	return (i);
43 }
44 
45 /*
46  * If nm were converted to utf8, what what would strlen
47  * return on the resulting string?
48  */
49 static size_t
utf8_len_of_ucs2(const CHAR16 * nm)50 utf8_len_of_ucs2(const CHAR16 *nm)
51 {
52 	size_t len;
53 	CHAR16 c;
54 
55 	len = 0;
56 	while (*nm) {
57 		c = *nm++;
58 		if (c > 0x7ff)
59 			len += 3;
60 		else if (c > 0x7f)
61 			len += 2;
62 		else
63 			len++;
64 	}
65 
66 	return (len);
67 }
68 
69 int
ucs2_to_utf8(const CHAR16 * nm,char ** name)70 ucs2_to_utf8(const CHAR16 *nm, char **name)
71 {
72 	size_t len, sz;
73 	CHAR16 c;
74 	char *cp;
75 	int freeit = *name == NULL;
76 
77 	sz = utf8_len_of_ucs2(nm) + 1;
78 	len = 0;
79 	if (*name != NULL)
80 		cp = *name;
81 	else
82 		cp = *name = malloc(sz);
83 	if (*name == NULL)
84 		return (ENOMEM);
85 
86 	while (*nm) {
87 		c = *nm++;
88 		if (c > 0x7ff) {
89 			if (len++ < sz)
90 				*cp++ = (char)(0xE0 | (c >> 12));
91 			if (len++ < sz)
92 				*cp++ = (char)(0x80 | ((c >> 6) & 0x3f));
93 			if (len++ < sz)
94 				*cp++ = (char)(0x80 | (c & 0x3f));
95 		} else if (c > 0x7f) {
96 			if (len++ < sz)
97 				*cp++ = (char)(0xC0 | ((c >> 6) & 0x1f));
98 			if (len++ < sz)
99 				*cp++ = (char)(0x80 | (c & 0x3f));
100 		} else {
101 			if (len++ < sz)
102 				*cp++ = (char)(c & 0x7f);
103 		}
104 	}
105 
106 	if (len >= sz) {
107 		/* Absent bugs, we'll never return EOVERFLOW */
108 		if (freeit) {
109 			free(*name);
110 			*name = NULL;
111 		}
112 		return (EOVERFLOW);
113 	}
114 	*cp++ = '\0';
115 
116 	return (0);
117 }
118 
119 int
utf8_to_ucs2(const char * name,CHAR16 ** nmp,size_t * len)120 utf8_to_ucs2(const char *name, CHAR16 **nmp, size_t *len)
121 {
122 	CHAR16 *nm;
123 	size_t sz;
124 	uint32_t ucs4;
125 	int c, bytes;
126 	int freeit = *nmp == NULL;
127 
128 	sz = strlen(name) * 2 + 2;
129 	if (*nmp == NULL)
130 		*nmp = malloc(sz);
131 	if (*nmp == NULL)
132 		return (ENOMEM);
133 	nm = *nmp;
134 	*len = sz;
135 
136 	ucs4 = 0;
137 	bytes = 0;
138 	while (sz > 1 && *name != '\0') {
139 		c = *name++;
140 		/*
141 		 * Conditionalize on the two major character types:
142 		 * initial and followup characters.
143 		 */
144 		if ((c & 0xc0) != 0x80) {
145 			/* Initial characters. */
146 			if (bytes != 0)
147 				goto ilseq;
148 			if ((c & 0xf8) == 0xf0) {
149 				ucs4 = c & 0x07;
150 				bytes = 3;
151 			} else if ((c & 0xf0) == 0xe0) {
152 				ucs4 = c & 0x0f;
153 				bytes = 2;
154 			} else if ((c & 0xe0) == 0xc0) {
155 				ucs4 = c & 0x1f;
156 				bytes = 1;
157 			} else {
158 				ucs4 = c & 0x7f;
159 				bytes = 0;
160 			}
161 		} else {
162 			/* Followup characters. */
163 			if (bytes > 0) {
164 				ucs4 = (ucs4 << 6) + (c & 0x3f);
165 				bytes--;
166 			} else if (bytes == 0) {
167 				goto ilseq;
168 			}
169 		}
170 		if (bytes == 0) {
171 			if (ucs4 > 0xffff)
172 				goto ilseq;
173 			*nm++ = (CHAR16)ucs4;
174 			sz -= 2;
175 		}
176 	}
177 	if (sz < 2) {
178 		if (freeit) {
179 			free(nm);
180 			*nmp = NULL;
181 		}
182 		return (EDOOFUS);
183 	}
184 	sz -= 2;
185 	*nm = 0;
186 	*len -= sz;
187 	return (0);
188 ilseq:
189 	if (freeit) {
190 		free(nm);
191 		*nmp = NULL;
192 	}
193 	return (EILSEQ);
194 }
195