1*7d1ffc32SGordon Ross /*
2*7d1ffc32SGordon Ross * This file and its contents are supplied under the terms of the
3*7d1ffc32SGordon Ross * Common Development and Distribution License ("CDDL"), version 1.0.
4*7d1ffc32SGordon Ross * You may only use this file in accordance with the terms of version
5*7d1ffc32SGordon Ross * 1.0 of the CDDL.
6*7d1ffc32SGordon Ross *
7*7d1ffc32SGordon Ross * A full copy of the text of the CDDL should have accompanied this
8*7d1ffc32SGordon Ross * source. A copy of the CDDL is also available via the Internet at
9*7d1ffc32SGordon Ross * http://www.illumos.org/license/CDDL.
10*7d1ffc32SGordon Ross */
11*7d1ffc32SGordon Ross
12*7d1ffc32SGordon Ross /*
13*7d1ffc32SGordon Ross * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
14*7d1ffc32SGordon Ross */
15*7d1ffc32SGordon Ross
16*7d1ffc32SGordon Ross /*
17*7d1ffc32SGordon Ross * Test conversion of strings UTF-8 to/from UTF-16 etc.
18*7d1ffc32SGordon Ross *
19*7d1ffc32SGordon Ross * This tests both 16-bit unicode symbols (UCS-2) and so called
20*7d1ffc32SGordon Ross * "enhanced" unicode symbols such as the "poop emoji" that are
21*7d1ffc32SGordon Ross * above 65535 and encode to four bytes as UTF-8.
22*7d1ffc32SGordon Ross */
23*7d1ffc32SGordon Ross
24*7d1ffc32SGordon Ross #include <sys/types.h>
25*7d1ffc32SGordon Ross #include <sys/debug.h>
26*7d1ffc32SGordon Ross #include <sys/u8_textprep.h>
27*7d1ffc32SGordon Ross #include <smbsrv/string.h>
28*7d1ffc32SGordon Ross #include <stdio.h>
29*7d1ffc32SGordon Ross #include <string.h>
30*7d1ffc32SGordon Ross
31*7d1ffc32SGordon Ross #include "test_defs.h"
32*7d1ffc32SGordon Ross
33*7d1ffc32SGordon Ross #define U_FW_A 0xff21 // full-width A (A)
34*7d1ffc32SGordon Ross static const char fwA[4] = "\xef\xbc\xa1";
35*7d1ffc32SGordon Ross
36*7d1ffc32SGordon Ross #define U_POOP 0x1f4a9 // poop emoji ()
37*7d1ffc32SGordon Ross static const char poop[5] = "\xf0\x9f\x92\xa9";
38*7d1ffc32SGordon Ross
39*7d1ffc32SGordon Ross static char mbsa[] = "A\xef\xbc\xa1."; // A fwA . (5)
40*7d1ffc32SGordon Ross static char mbsp[] = "P\xf0\x9f\x92\xa9."; // P poop . (6)
41*7d1ffc32SGordon Ross static smb_wchar_t wcsa[] = { 'A', U_FW_A, '.', 0 }; // (3)
42*7d1ffc32SGordon Ross static smb_wchar_t wcsp[] = { 'P', 0xd83d, 0xdca9, '.', 0 }; // (4)
43*7d1ffc32SGordon Ross
44*7d1ffc32SGordon Ross
45*7d1ffc32SGordon Ross static void
conv_wctomb()46*7d1ffc32SGordon Ross conv_wctomb()
47*7d1ffc32SGordon Ross {
48*7d1ffc32SGordon Ross char mbs[8];
49*7d1ffc32SGordon Ross int len;
50*7d1ffc32SGordon Ross
51*7d1ffc32SGordon Ross len = smb_wctomb(mbs, U_FW_A);
52*7d1ffc32SGordon Ross if (len != 3) {
53*7d1ffc32SGordon Ross printf("Fail: conv_wctomb fwA ret=%d\n", len);
54*7d1ffc32SGordon Ross return;
55*7d1ffc32SGordon Ross }
56*7d1ffc32SGordon Ross mbs[len] = '\0';
57*7d1ffc32SGordon Ross if (strcmp(mbs, fwA)) {
58*7d1ffc32SGordon Ross printf("Fail: conv_wctomb fwA cmp:\n");
59*7d1ffc32SGordon Ross hexdump((uchar_t *)mbs, len+1);
60*7d1ffc32SGordon Ross return;
61*7d1ffc32SGordon Ross }
62*7d1ffc32SGordon Ross
63*7d1ffc32SGordon Ross len = smb_wctomb(mbs, U_POOP);
64*7d1ffc32SGordon Ross if (len != 4) {
65*7d1ffc32SGordon Ross printf("Fail: conv_wctomb poop ret=%d\n", len);
66*7d1ffc32SGordon Ross return;
67*7d1ffc32SGordon Ross }
68*7d1ffc32SGordon Ross mbs[len] = '\0';
69*7d1ffc32SGordon Ross if (strcmp(mbs, poop)) {
70*7d1ffc32SGordon Ross printf("Fail: conv_wctomb poop cmp:\n");
71*7d1ffc32SGordon Ross hexdump((uchar_t *)mbs, len+1);
72*7d1ffc32SGordon Ross return;
73*7d1ffc32SGordon Ross }
74*7d1ffc32SGordon Ross
75*7d1ffc32SGordon Ross /* null wc to mbs should return 1 and put a null */
76*7d1ffc32SGordon Ross len = smb_wctomb(mbs, 0);
77*7d1ffc32SGordon Ross if (len != 1) {
78*7d1ffc32SGordon Ross printf("Fail: conv_wctomb null ret=%d\n", len);
79*7d1ffc32SGordon Ross return;
80*7d1ffc32SGordon Ross }
81*7d1ffc32SGordon Ross if (mbs[0] != '\0') {
82*7d1ffc32SGordon Ross printf("Fail: conv_wctomb null cmp:\n");
83*7d1ffc32SGordon Ross hexdump((uchar_t *)mbs, len+1);
84*7d1ffc32SGordon Ross return;
85*7d1ffc32SGordon Ross }
86*7d1ffc32SGordon Ross
87*7d1ffc32SGordon Ross printf("Pass: conv_wctomb\n");
88*7d1ffc32SGordon Ross }
89*7d1ffc32SGordon Ross
90*7d1ffc32SGordon Ross static void
conv_mbtowc()91*7d1ffc32SGordon Ross conv_mbtowc()
92*7d1ffc32SGordon Ross {
93*7d1ffc32SGordon Ross uint32_t wch = 0;
94*7d1ffc32SGordon Ross int len;
95*7d1ffc32SGordon Ross
96*7d1ffc32SGordon Ross /*
97*7d1ffc32SGordon Ross * The (void *) cast here is to let this build both
98*7d1ffc32SGordon Ross * before and after an interface change in smb_mbtowc
99*7d1ffc32SGordon Ross * (uint16_t vs uint32_t)
100*7d1ffc32SGordon Ross */
101*7d1ffc32SGordon Ross len = smb_mbtowc((void *)&wch, fwA, 4);
102*7d1ffc32SGordon Ross if (len != 3) {
103*7d1ffc32SGordon Ross printf("Fail: conv_mbtowc fwA ret=%d\n", len);
104*7d1ffc32SGordon Ross return;
105*7d1ffc32SGordon Ross }
106*7d1ffc32SGordon Ross if (wch != U_FW_A) {
107*7d1ffc32SGordon Ross printf("Fail: conv_mbtowc fwA cmp: 0x%x\n", wch);
108*7d1ffc32SGordon Ross return;
109*7d1ffc32SGordon Ross }
110*7d1ffc32SGordon Ross
111*7d1ffc32SGordon Ross len = smb_mbtowc((void *)&wch, poop, 4); // poop emoji
112*7d1ffc32SGordon Ross if (len != 4) {
113*7d1ffc32SGordon Ross printf("Fail: conv_mbtowc poop ret=%d\n", len);
114*7d1ffc32SGordon Ross return;
115*7d1ffc32SGordon Ross }
116*7d1ffc32SGordon Ross if (wch != U_POOP) {
117*7d1ffc32SGordon Ross printf("Fail: conv_mbtowc poop cmp: 0x%x\n", wch);
118*7d1ffc32SGordon Ross return;
119*7d1ffc32SGordon Ross }
120*7d1ffc32SGordon Ross
121*7d1ffc32SGordon Ross /* null mbs to wc should return 0 (and set wch=0) */
122*7d1ffc32SGordon Ross len = smb_mbtowc((void *)&wch, "", 4);
123*7d1ffc32SGordon Ross if (len != 0) {
124*7d1ffc32SGordon Ross printf("Fail: conv_mbtowc null ret=%d\n", len);
125*7d1ffc32SGordon Ross return;
126*7d1ffc32SGordon Ross }
127*7d1ffc32SGordon Ross if (wch != 0) {
128*7d1ffc32SGordon Ross printf("Fail: conv_mbtowc null cmp: 0x%x\n", wch);
129*7d1ffc32SGordon Ross return;
130*7d1ffc32SGordon Ross }
131*7d1ffc32SGordon Ross
132*7d1ffc32SGordon Ross printf("Pass: conv_mbtowc\n");
133*7d1ffc32SGordon Ross }
134*7d1ffc32SGordon Ross
135*7d1ffc32SGordon Ross static void
conv_wcstombs()136*7d1ffc32SGordon Ross conv_wcstombs()
137*7d1ffc32SGordon Ross {
138*7d1ffc32SGordon Ross char tmbs[16];
139*7d1ffc32SGordon Ross int len;
140*7d1ffc32SGordon Ross
141*7d1ffc32SGordon Ross len = smb_wcstombs(tmbs, wcsa, sizeof (tmbs));
142*7d1ffc32SGordon Ross if (len != 5) {
143*7d1ffc32SGordon Ross printf("Fail: conv_wcstombs A ret=%d\n", len);
144*7d1ffc32SGordon Ross return;
145*7d1ffc32SGordon Ross }
146*7d1ffc32SGordon Ross if (strcmp(tmbs, mbsa)) {
147*7d1ffc32SGordon Ross printf("Fail: conv_wcstombs A cmp:\n");
148*7d1ffc32SGordon Ross hexdump((uchar_t *)tmbs, len+2);
149*7d1ffc32SGordon Ross return;
150*7d1ffc32SGordon Ross }
151*7d1ffc32SGordon Ross
152*7d1ffc32SGordon Ross len = smb_wcstombs(tmbs, wcsp, sizeof (tmbs));
153*7d1ffc32SGordon Ross if (len != 6) {
154*7d1ffc32SGordon Ross printf("Fail: conv_wcstombs f ret=%d\n", len);
155*7d1ffc32SGordon Ross return;
156*7d1ffc32SGordon Ross }
157*7d1ffc32SGordon Ross if (strcmp(tmbs, mbsp)) {
158*7d1ffc32SGordon Ross printf("Fail: conv_wcstombs f cmp:\n");
159*7d1ffc32SGordon Ross hexdump((uchar_t *)tmbs, len+2);
160*7d1ffc32SGordon Ross return;
161*7d1ffc32SGordon Ross }
162*7d1ffc32SGordon Ross
163*7d1ffc32SGordon Ross printf("Pass: conv_wcstombs\n");
164*7d1ffc32SGordon Ross }
165*7d1ffc32SGordon Ross
166*7d1ffc32SGordon Ross static void
conv_mbstowcs()167*7d1ffc32SGordon Ross conv_mbstowcs()
168*7d1ffc32SGordon Ross {
169*7d1ffc32SGordon Ross smb_wchar_t twcs[8];
170*7d1ffc32SGordon Ross uint32_t wch = 0;
171*7d1ffc32SGordon Ross int len;
172*7d1ffc32SGordon Ross
173*7d1ffc32SGordon Ross len = smb_mbstowcs(twcs, mbsa, sizeof (twcs));
174*7d1ffc32SGordon Ross if (len != 3) {
175*7d1ffc32SGordon Ross printf("Fail: conv_mbstowcs A ret=%d\n", len);
176*7d1ffc32SGordon Ross return;
177*7d1ffc32SGordon Ross }
178*7d1ffc32SGordon Ross if (memcmp(twcs, wcsa, len+2)) {
179*7d1ffc32SGordon Ross printf("Fail: conv_mbstowcs A cmp: 0x%x\n", wch);
180*7d1ffc32SGordon Ross hexdump((uchar_t *)twcs, len+2);
181*7d1ffc32SGordon Ross return;
182*7d1ffc32SGordon Ross }
183*7d1ffc32SGordon Ross
184*7d1ffc32SGordon Ross len = smb_mbstowcs(twcs, mbsp, sizeof (twcs));
185*7d1ffc32SGordon Ross if (len != 4) {
186*7d1ffc32SGordon Ross printf("Fail: conv_mbstowcs P ret=%d\n", len);
187*7d1ffc32SGordon Ross return;
188*7d1ffc32SGordon Ross }
189*7d1ffc32SGordon Ross if (memcmp(twcs, wcsp, len+2)) {
190*7d1ffc32SGordon Ross printf("Fail: conv_mbstowcs P cmp: 0x%x\n", wch);
191*7d1ffc32SGordon Ross hexdump((uchar_t *)twcs, len+2);
192*7d1ffc32SGordon Ross return;
193*7d1ffc32SGordon Ross }
194*7d1ffc32SGordon Ross
195*7d1ffc32SGordon Ross printf("Pass: conv_mbstowcs\n");
196*7d1ffc32SGordon Ross }
197*7d1ffc32SGordon Ross
198*7d1ffc32SGordon Ross /*
199*7d1ffc32SGordon Ross * An OEM string that will require iconv.
200*7d1ffc32SGordon Ross */
201*7d1ffc32SGordon Ross static uchar_t fubar_oem[] = "F\201bar"; // CP850 x81 (ü)
202*7d1ffc32SGordon Ross static char fubar_mbs[] = "F\303\274bar"; // UTF8 xC3 xBC
203*7d1ffc32SGordon Ross
204*7d1ffc32SGordon Ross
205*7d1ffc32SGordon Ross static void
conv_oemtombs()206*7d1ffc32SGordon Ross conv_oemtombs()
207*7d1ffc32SGordon Ross {
208*7d1ffc32SGordon Ross char tmbs[16];
209*7d1ffc32SGordon Ross int len;
210*7d1ffc32SGordon Ross
211*7d1ffc32SGordon Ross len = smb_oemtombs(tmbs, (uchar_t *)"foo", 4);
212*7d1ffc32SGordon Ross if (len != 3) {
213*7d1ffc32SGordon Ross printf("Fail: conv_wctomb foo ret=%d\n", len);
214*7d1ffc32SGordon Ross return;
215*7d1ffc32SGordon Ross }
216*7d1ffc32SGordon Ross if (strcmp(tmbs, "foo")) {
217*7d1ffc32SGordon Ross printf("Fail: conv_wctomb foo cmp:\n");
218*7d1ffc32SGordon Ross hexdump((uchar_t *)tmbs, len+1);
219*7d1ffc32SGordon Ross return;
220*7d1ffc32SGordon Ross }
221*7d1ffc32SGordon Ross
222*7d1ffc32SGordon Ross len = smb_oemtombs(tmbs, fubar_oem, 7);
223*7d1ffc32SGordon Ross if (len != 6) {
224*7d1ffc32SGordon Ross printf("Fail: conv_oemtombs fubar ret=%d\n", len);
225*7d1ffc32SGordon Ross return;
226*7d1ffc32SGordon Ross }
227*7d1ffc32SGordon Ross if (strcmp(tmbs, fubar_mbs)) {
228*7d1ffc32SGordon Ross printf("Fail: conv_oemtombs fubar cmp:\n");
229*7d1ffc32SGordon Ross hexdump((uchar_t *)tmbs, len+1);
230*7d1ffc32SGordon Ross return;
231*7d1ffc32SGordon Ross }
232*7d1ffc32SGordon Ross
233*7d1ffc32SGordon Ross printf("Pass: conv_oemtombs\n");
234*7d1ffc32SGordon Ross }
235*7d1ffc32SGordon Ross
236*7d1ffc32SGordon Ross static void
conv_mbstooem()237*7d1ffc32SGordon Ross conv_mbstooem()
238*7d1ffc32SGordon Ross {
239*7d1ffc32SGordon Ross uint8_t oemcs[8];
240*7d1ffc32SGordon Ross uint32_t wch = 0;
241*7d1ffc32SGordon Ross int len;
242*7d1ffc32SGordon Ross
243*7d1ffc32SGordon Ross len = smb_mbstooem(oemcs, "foo", 8);
244*7d1ffc32SGordon Ross if (len != 3) {
245*7d1ffc32SGordon Ross printf("Fail: conv_mbstooem foo ret=%d\n", len);
246*7d1ffc32SGordon Ross return;
247*7d1ffc32SGordon Ross }
248*7d1ffc32SGordon Ross if (memcmp(oemcs, "foo", len+1)) {
249*7d1ffc32SGordon Ross printf("Fail: conv_mbstooem P cmp: 0x%x\n", wch);
250*7d1ffc32SGordon Ross hexdump((uchar_t *)oemcs, len+1);
251*7d1ffc32SGordon Ross return;
252*7d1ffc32SGordon Ross }
253*7d1ffc32SGordon Ross
254*7d1ffc32SGordon Ross len = smb_mbstooem(oemcs, fubar_mbs, 8);
255*7d1ffc32SGordon Ross if (len != 5) {
256*7d1ffc32SGordon Ross printf("Fail: conv_mbstooem fubar ret=%d\n", len);
257*7d1ffc32SGordon Ross return;
258*7d1ffc32SGordon Ross }
259*7d1ffc32SGordon Ross if (memcmp(oemcs, (char *)fubar_oem, len+1)) {
260*7d1ffc32SGordon Ross printf("Fail: conv_mbstooem fubar cmp: 0x%x\n", wch);
261*7d1ffc32SGordon Ross hexdump((uchar_t *)oemcs, len+1);
262*7d1ffc32SGordon Ross return;
263*7d1ffc32SGordon Ross }
264*7d1ffc32SGordon Ross
265*7d1ffc32SGordon Ross len = smb_mbstooem(oemcs, mbsp, 8);
266*7d1ffc32SGordon Ross if (len != 3) {
267*7d1ffc32SGordon Ross printf("Fail: conv_mbstooem poop ret=%d\n", len);
268*7d1ffc32SGordon Ross return;
269*7d1ffc32SGordon Ross }
270*7d1ffc32SGordon Ross if (memcmp(oemcs, "P?.", len+1)) {
271*7d1ffc32SGordon Ross printf("Fail: conv_mbstooem poop cmp: 0x%x\n", wch);
272*7d1ffc32SGordon Ross hexdump((uchar_t *)oemcs, len+1);
273*7d1ffc32SGordon Ross return;
274*7d1ffc32SGordon Ross }
275*7d1ffc32SGordon Ross
276*7d1ffc32SGordon Ross printf("Pass: conv_mbstooem\n");
277*7d1ffc32SGordon Ross }
278*7d1ffc32SGordon Ross
279*7d1ffc32SGordon Ross static void
conv_sbequiv_strlen()280*7d1ffc32SGordon Ross conv_sbequiv_strlen()
281*7d1ffc32SGordon Ross {
282*7d1ffc32SGordon Ross int len;
283*7d1ffc32SGordon Ross
284*7d1ffc32SGordon Ross len = (int)smb_sbequiv_strlen("a");
285*7d1ffc32SGordon Ross if (len != 1) {
286*7d1ffc32SGordon Ross printf("Fail: conv_sbequiv_strlen (a) len=%d\n", len);
287*7d1ffc32SGordon Ross return;
288*7d1ffc32SGordon Ross }
289*7d1ffc32SGordon Ross
290*7d1ffc32SGordon Ross len = (int)smb_sbequiv_strlen(fubar_mbs);
291*7d1ffc32SGordon Ross if (len != strlen((char *)fubar_oem)) {
292*7d1ffc32SGordon Ross printf("Fail: conv_sbequiv_strlen (fubar) len=%d\n", len);
293*7d1ffc32SGordon Ross return;
294*7d1ffc32SGordon Ross }
295*7d1ffc32SGordon Ross
296*7d1ffc32SGordon Ross len = (int)smb_sbequiv_strlen(mbsp);
297*7d1ffc32SGordon Ross if (len != 3) { // "P?."
298*7d1ffc32SGordon Ross printf("Fail: conv_sbequiv_strlen (poop) len=%d\n", len);
299*7d1ffc32SGordon Ross return;
300*7d1ffc32SGordon Ross }
301*7d1ffc32SGordon Ross
302*7d1ffc32SGordon Ross printf("Pass: conv_sbequiv_strlen\n");
303*7d1ffc32SGordon Ross }
304*7d1ffc32SGordon Ross
305*7d1ffc32SGordon Ross static void
conv_wcequiv_strlen()306*7d1ffc32SGordon Ross conv_wcequiv_strlen()
307*7d1ffc32SGordon Ross {
308*7d1ffc32SGordon Ross int len;
309*7d1ffc32SGordon Ross
310*7d1ffc32SGordon Ross len = (int)smb_wcequiv_strlen("a");
311*7d1ffc32SGordon Ross if (len != 2) {
312*7d1ffc32SGordon Ross printf("Fail: conv_wcequiv_strlen (a) len=%d\n", len);
313*7d1ffc32SGordon Ross return;
314*7d1ffc32SGordon Ross }
315*7d1ffc32SGordon Ross
316*7d1ffc32SGordon Ross len = (int)smb_wcequiv_strlen(fwA);
317*7d1ffc32SGordon Ross if (len != 2) {
318*7d1ffc32SGordon Ross printf("Fail: conv_wcequiv_strlen (fwA) len=%d\n", len);
319*7d1ffc32SGordon Ross return;
320*7d1ffc32SGordon Ross }
321*7d1ffc32SGordon Ross
322*7d1ffc32SGordon Ross len = (int)smb_wcequiv_strlen(poop);
323*7d1ffc32SGordon Ross if (len != 4) {
324*7d1ffc32SGordon Ross printf("Fail: conv_wcequiv_strlen (poop) len=%d\n", len);
325*7d1ffc32SGordon Ross return;
326*7d1ffc32SGordon Ross }
327*7d1ffc32SGordon Ross
328*7d1ffc32SGordon Ross printf("Pass: conv_wcequiv_strlen\n");
329*7d1ffc32SGordon Ross }
330*7d1ffc32SGordon Ross
331*7d1ffc32SGordon Ross void
test_conv()332*7d1ffc32SGordon Ross test_conv()
333*7d1ffc32SGordon Ross {
334*7d1ffc32SGordon Ross conv_wctomb();
335*7d1ffc32SGordon Ross conv_mbtowc();
336*7d1ffc32SGordon Ross conv_wcstombs();
337*7d1ffc32SGordon Ross conv_mbstowcs();
338*7d1ffc32SGordon Ross conv_oemtombs();
339*7d1ffc32SGordon Ross conv_mbstooem();
340*7d1ffc32SGordon Ross conv_sbequiv_strlen();
341*7d1ffc32SGordon Ross conv_wcequiv_strlen();
342*7d1ffc32SGordon Ross }
343