1*7d1ffc32SGordon Ross /*
2*7d1ffc32SGordon Ross  * This file and its contents are supplied under the terms of the
3*7d1ffc32SGordon Ross  * Common Development and Distribution License ("CDDL"), version 1.0.
4*7d1ffc32SGordon Ross  * You may only use this file in accordance with the terms of version
5*7d1ffc32SGordon Ross  * 1.0 of the CDDL.
6*7d1ffc32SGordon Ross  *
7*7d1ffc32SGordon Ross  * A full copy of the text of the CDDL should have accompanied this
8*7d1ffc32SGordon Ross  * source.  A copy of the CDDL is also available via the Internet at
9*7d1ffc32SGordon Ross  * http://www.illumos.org/license/CDDL.
10*7d1ffc32SGordon Ross  */
11*7d1ffc32SGordon Ross 
12*7d1ffc32SGordon Ross /*
13*7d1ffc32SGordon Ross  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
14*7d1ffc32SGordon Ross  */
15*7d1ffc32SGordon Ross 
16*7d1ffc32SGordon Ross /*
17*7d1ffc32SGordon Ross  * Test conversion of strings UTF-8 to/from UTF-16 etc.
18*7d1ffc32SGordon Ross  *
19*7d1ffc32SGordon Ross  * This tests both 16-bit unicode symbols (UCS-2) and so called
20*7d1ffc32SGordon Ross  * "enhanced" unicode symbols such as the "poop emoji" that are
21*7d1ffc32SGordon Ross  * above 65535 and encode to four bytes as UTF-8.
22*7d1ffc32SGordon Ross  */
23*7d1ffc32SGordon Ross 
24*7d1ffc32SGordon Ross #include <sys/types.h>
25*7d1ffc32SGordon Ross #include <sys/debug.h>
26*7d1ffc32SGordon Ross #include <sys/u8_textprep.h>
27*7d1ffc32SGordon Ross #include <smbsrv/string.h>
28*7d1ffc32SGordon Ross #include <stdio.h>
29*7d1ffc32SGordon Ross #include <string.h>
30*7d1ffc32SGordon Ross 
31*7d1ffc32SGordon Ross #include "test_defs.h"
32*7d1ffc32SGordon Ross 
33*7d1ffc32SGordon Ross #define	U_FW_A	0xff21		// full-width A (A)
34*7d1ffc32SGordon Ross static const char fwA[4] = "\xef\xbc\xa1";
35*7d1ffc32SGordon Ross 
36*7d1ffc32SGordon Ross #define	U_POOP	0x1f4a9		// poop emoji (��)
37*7d1ffc32SGordon Ross static const char poop[5] = "\xf0\x9f\x92\xa9";
38*7d1ffc32SGordon Ross 
39*7d1ffc32SGordon Ross static char mbsa[] = "A\xef\xbc\xa1.";		// A fwA . (5)
40*7d1ffc32SGordon Ross static char mbsp[] = "P\xf0\x9f\x92\xa9.";	// P poop . (6)
41*7d1ffc32SGordon Ross static smb_wchar_t wcsa[] = { 'A', U_FW_A, '.', 0 };	// (3)
42*7d1ffc32SGordon Ross static smb_wchar_t wcsp[] = { 'P', 0xd83d, 0xdca9, '.', 0 }; // (4)
43*7d1ffc32SGordon Ross 
44*7d1ffc32SGordon Ross 
45*7d1ffc32SGordon Ross static void
conv_wctomb()46*7d1ffc32SGordon Ross conv_wctomb()
47*7d1ffc32SGordon Ross {
48*7d1ffc32SGordon Ross 	char mbs[8];
49*7d1ffc32SGordon Ross 	int len;
50*7d1ffc32SGordon Ross 
51*7d1ffc32SGordon Ross 	len = smb_wctomb(mbs, U_FW_A);
52*7d1ffc32SGordon Ross 	if (len != 3) {
53*7d1ffc32SGordon Ross 		printf("Fail: conv_wctomb fwA ret=%d\n", len);
54*7d1ffc32SGordon Ross 		return;
55*7d1ffc32SGordon Ross 	}
56*7d1ffc32SGordon Ross 	mbs[len] = '\0';
57*7d1ffc32SGordon Ross 	if (strcmp(mbs, fwA)) {
58*7d1ffc32SGordon Ross 		printf("Fail: conv_wctomb fwA cmp:\n");
59*7d1ffc32SGordon Ross 		hexdump((uchar_t *)mbs, len+1);
60*7d1ffc32SGordon Ross 		return;
61*7d1ffc32SGordon Ross 	}
62*7d1ffc32SGordon Ross 
63*7d1ffc32SGordon Ross 	len = smb_wctomb(mbs, U_POOP);
64*7d1ffc32SGordon Ross 	if (len != 4) {
65*7d1ffc32SGordon Ross 		printf("Fail: conv_wctomb poop ret=%d\n", len);
66*7d1ffc32SGordon Ross 		return;
67*7d1ffc32SGordon Ross 	}
68*7d1ffc32SGordon Ross 	mbs[len] = '\0';
69*7d1ffc32SGordon Ross 	if (strcmp(mbs, poop)) {
70*7d1ffc32SGordon Ross 		printf("Fail: conv_wctomb poop cmp:\n");
71*7d1ffc32SGordon Ross 		hexdump((uchar_t *)mbs, len+1);
72*7d1ffc32SGordon Ross 		return;
73*7d1ffc32SGordon Ross 	}
74*7d1ffc32SGordon Ross 
75*7d1ffc32SGordon Ross 	/* null wc to mbs should return 1 and put a null */
76*7d1ffc32SGordon Ross 	len = smb_wctomb(mbs, 0);
77*7d1ffc32SGordon Ross 	if (len != 1) {
78*7d1ffc32SGordon Ross 		printf("Fail: conv_wctomb null ret=%d\n", len);
79*7d1ffc32SGordon Ross 		return;
80*7d1ffc32SGordon Ross 	}
81*7d1ffc32SGordon Ross 	if (mbs[0] != '\0') {
82*7d1ffc32SGordon Ross 		printf("Fail: conv_wctomb null cmp:\n");
83*7d1ffc32SGordon Ross 		hexdump((uchar_t *)mbs, len+1);
84*7d1ffc32SGordon Ross 		return;
85*7d1ffc32SGordon Ross 	}
86*7d1ffc32SGordon Ross 
87*7d1ffc32SGordon Ross 	printf("Pass: conv_wctomb\n");
88*7d1ffc32SGordon Ross }
89*7d1ffc32SGordon Ross 
90*7d1ffc32SGordon Ross static void
conv_mbtowc()91*7d1ffc32SGordon Ross conv_mbtowc()
92*7d1ffc32SGordon Ross {
93*7d1ffc32SGordon Ross 	uint32_t wch = 0;
94*7d1ffc32SGordon Ross 	int len;
95*7d1ffc32SGordon Ross 
96*7d1ffc32SGordon Ross 	/*
97*7d1ffc32SGordon Ross 	 * The (void *) cast here is to let this build both
98*7d1ffc32SGordon Ross 	 * before and after an interface change in smb_mbtowc
99*7d1ffc32SGordon Ross 	 * (uint16_t vs uint32_t)
100*7d1ffc32SGordon Ross 	 */
101*7d1ffc32SGordon Ross 	len = smb_mbtowc((void *)&wch, fwA, 4);
102*7d1ffc32SGordon Ross 	if (len != 3) {
103*7d1ffc32SGordon Ross 		printf("Fail: conv_mbtowc fwA ret=%d\n", len);
104*7d1ffc32SGordon Ross 		return;
105*7d1ffc32SGordon Ross 	}
106*7d1ffc32SGordon Ross 	if (wch != U_FW_A) {
107*7d1ffc32SGordon Ross 		printf("Fail: conv_mbtowc fwA cmp: 0x%x\n", wch);
108*7d1ffc32SGordon Ross 		return;
109*7d1ffc32SGordon Ross 	}
110*7d1ffc32SGordon Ross 
111*7d1ffc32SGordon Ross 	len = smb_mbtowc((void *)&wch, poop, 4); // poop emoji
112*7d1ffc32SGordon Ross 	if (len != 4) {
113*7d1ffc32SGordon Ross 		printf("Fail: conv_mbtowc poop ret=%d\n", len);
114*7d1ffc32SGordon Ross 		return;
115*7d1ffc32SGordon Ross 	}
116*7d1ffc32SGordon Ross 	if (wch != U_POOP) {
117*7d1ffc32SGordon Ross 		printf("Fail: conv_mbtowc poop cmp: 0x%x\n", wch);
118*7d1ffc32SGordon Ross 		return;
119*7d1ffc32SGordon Ross 	}
120*7d1ffc32SGordon Ross 
121*7d1ffc32SGordon Ross 	/* null mbs to wc should return 0 (and set wch=0) */
122*7d1ffc32SGordon Ross 	len = smb_mbtowc((void *)&wch, "", 4);
123*7d1ffc32SGordon Ross 	if (len != 0) {
124*7d1ffc32SGordon Ross 		printf("Fail: conv_mbtowc null ret=%d\n", len);
125*7d1ffc32SGordon Ross 		return;
126*7d1ffc32SGordon Ross 	}
127*7d1ffc32SGordon Ross 	if (wch != 0) {
128*7d1ffc32SGordon Ross 		printf("Fail: conv_mbtowc null cmp: 0x%x\n", wch);
129*7d1ffc32SGordon Ross 		return;
130*7d1ffc32SGordon Ross 	}
131*7d1ffc32SGordon Ross 
132*7d1ffc32SGordon Ross 	printf("Pass: conv_mbtowc\n");
133*7d1ffc32SGordon Ross }
134*7d1ffc32SGordon Ross 
135*7d1ffc32SGordon Ross static void
conv_wcstombs()136*7d1ffc32SGordon Ross conv_wcstombs()
137*7d1ffc32SGordon Ross {
138*7d1ffc32SGordon Ross 	char tmbs[16];
139*7d1ffc32SGordon Ross 	int len;
140*7d1ffc32SGordon Ross 
141*7d1ffc32SGordon Ross 	len = smb_wcstombs(tmbs, wcsa, sizeof (tmbs));
142*7d1ffc32SGordon Ross 	if (len != 5) {
143*7d1ffc32SGordon Ross 		printf("Fail: conv_wcstombs A ret=%d\n", len);
144*7d1ffc32SGordon Ross 		return;
145*7d1ffc32SGordon Ross 	}
146*7d1ffc32SGordon Ross 	if (strcmp(tmbs, mbsa)) {
147*7d1ffc32SGordon Ross 		printf("Fail: conv_wcstombs A cmp:\n");
148*7d1ffc32SGordon Ross 		hexdump((uchar_t *)tmbs, len+2);
149*7d1ffc32SGordon Ross 		return;
150*7d1ffc32SGordon Ross 	}
151*7d1ffc32SGordon Ross 
152*7d1ffc32SGordon Ross 	len = smb_wcstombs(tmbs, wcsp, sizeof (tmbs));
153*7d1ffc32SGordon Ross 	if (len != 6) {
154*7d1ffc32SGordon Ross 		printf("Fail: conv_wcstombs f ret=%d\n", len);
155*7d1ffc32SGordon Ross 		return;
156*7d1ffc32SGordon Ross 	}
157*7d1ffc32SGordon Ross 	if (strcmp(tmbs, mbsp)) {
158*7d1ffc32SGordon Ross 		printf("Fail: conv_wcstombs f cmp:\n");
159*7d1ffc32SGordon Ross 		hexdump((uchar_t *)tmbs, len+2);
160*7d1ffc32SGordon Ross 		return;
161*7d1ffc32SGordon Ross 	}
162*7d1ffc32SGordon Ross 
163*7d1ffc32SGordon Ross 	printf("Pass: conv_wcstombs\n");
164*7d1ffc32SGordon Ross }
165*7d1ffc32SGordon Ross 
166*7d1ffc32SGordon Ross static void
conv_mbstowcs()167*7d1ffc32SGordon Ross conv_mbstowcs()
168*7d1ffc32SGordon Ross {
169*7d1ffc32SGordon Ross 	smb_wchar_t twcs[8];
170*7d1ffc32SGordon Ross 	uint32_t wch = 0;
171*7d1ffc32SGordon Ross 	int len;
172*7d1ffc32SGordon Ross 
173*7d1ffc32SGordon Ross 	len = smb_mbstowcs(twcs, mbsa, sizeof (twcs));
174*7d1ffc32SGordon Ross 	if (len != 3) {
175*7d1ffc32SGordon Ross 		printf("Fail: conv_mbstowcs A ret=%d\n", len);
176*7d1ffc32SGordon Ross 		return;
177*7d1ffc32SGordon Ross 	}
178*7d1ffc32SGordon Ross 	if (memcmp(twcs, wcsa, len+2)) {
179*7d1ffc32SGordon Ross 		printf("Fail: conv_mbstowcs A cmp: 0x%x\n", wch);
180*7d1ffc32SGordon Ross 		hexdump((uchar_t *)twcs, len+2);
181*7d1ffc32SGordon Ross 		return;
182*7d1ffc32SGordon Ross 	}
183*7d1ffc32SGordon Ross 
184*7d1ffc32SGordon Ross 	len = smb_mbstowcs(twcs, mbsp, sizeof (twcs));
185*7d1ffc32SGordon Ross 	if (len != 4) {
186*7d1ffc32SGordon Ross 		printf("Fail: conv_mbstowcs P ret=%d\n", len);
187*7d1ffc32SGordon Ross 		return;
188*7d1ffc32SGordon Ross 	}
189*7d1ffc32SGordon Ross 	if (memcmp(twcs, wcsp, len+2)) {
190*7d1ffc32SGordon Ross 		printf("Fail: conv_mbstowcs P cmp: 0x%x\n", wch);
191*7d1ffc32SGordon Ross 		hexdump((uchar_t *)twcs, len+2);
192*7d1ffc32SGordon Ross 		return;
193*7d1ffc32SGordon Ross 	}
194*7d1ffc32SGordon Ross 
195*7d1ffc32SGordon Ross 	printf("Pass: conv_mbstowcs\n");
196*7d1ffc32SGordon Ross }
197*7d1ffc32SGordon Ross 
198*7d1ffc32SGordon Ross /*
199*7d1ffc32SGordon Ross  * An OEM string that will require iconv.
200*7d1ffc32SGordon Ross  */
201*7d1ffc32SGordon Ross static uchar_t fubar_oem[] = "F\201bar";	// CP850 x81 (ü)
202*7d1ffc32SGordon Ross static char fubar_mbs[] = "F\303\274bar";	// UTF8 xC3 xBC
203*7d1ffc32SGordon Ross 
204*7d1ffc32SGordon Ross 
205*7d1ffc32SGordon Ross static void
conv_oemtombs()206*7d1ffc32SGordon Ross conv_oemtombs()
207*7d1ffc32SGordon Ross {
208*7d1ffc32SGordon Ross 	char tmbs[16];
209*7d1ffc32SGordon Ross 	int len;
210*7d1ffc32SGordon Ross 
211*7d1ffc32SGordon Ross 	len = smb_oemtombs(tmbs, (uchar_t *)"foo", 4);
212*7d1ffc32SGordon Ross 	if (len != 3) {
213*7d1ffc32SGordon Ross 		printf("Fail: conv_wctomb foo ret=%d\n", len);
214*7d1ffc32SGordon Ross 		return;
215*7d1ffc32SGordon Ross 	}
216*7d1ffc32SGordon Ross 	if (strcmp(tmbs, "foo")) {
217*7d1ffc32SGordon Ross 		printf("Fail: conv_wctomb foo cmp:\n");
218*7d1ffc32SGordon Ross 		hexdump((uchar_t *)tmbs, len+1);
219*7d1ffc32SGordon Ross 		return;
220*7d1ffc32SGordon Ross 	}
221*7d1ffc32SGordon Ross 
222*7d1ffc32SGordon Ross 	len = smb_oemtombs(tmbs, fubar_oem, 7);
223*7d1ffc32SGordon Ross 	if (len != 6) {
224*7d1ffc32SGordon Ross 		printf("Fail: conv_oemtombs fubar ret=%d\n", len);
225*7d1ffc32SGordon Ross 		return;
226*7d1ffc32SGordon Ross 	}
227*7d1ffc32SGordon Ross 	if (strcmp(tmbs, fubar_mbs)) {
228*7d1ffc32SGordon Ross 		printf("Fail: conv_oemtombs fubar cmp:\n");
229*7d1ffc32SGordon Ross 		hexdump((uchar_t *)tmbs, len+1);
230*7d1ffc32SGordon Ross 		return;
231*7d1ffc32SGordon Ross 	}
232*7d1ffc32SGordon Ross 
233*7d1ffc32SGordon Ross 	printf("Pass: conv_oemtombs\n");
234*7d1ffc32SGordon Ross }
235*7d1ffc32SGordon Ross 
236*7d1ffc32SGordon Ross static void
conv_mbstooem()237*7d1ffc32SGordon Ross conv_mbstooem()
238*7d1ffc32SGordon Ross {
239*7d1ffc32SGordon Ross 	uint8_t oemcs[8];
240*7d1ffc32SGordon Ross 	uint32_t wch = 0;
241*7d1ffc32SGordon Ross 	int len;
242*7d1ffc32SGordon Ross 
243*7d1ffc32SGordon Ross 	len = smb_mbstooem(oemcs, "foo", 8);
244*7d1ffc32SGordon Ross 	if (len != 3) {
245*7d1ffc32SGordon Ross 		printf("Fail: conv_mbstooem foo ret=%d\n", len);
246*7d1ffc32SGordon Ross 		return;
247*7d1ffc32SGordon Ross 	}
248*7d1ffc32SGordon Ross 	if (memcmp(oemcs, "foo", len+1)) {
249*7d1ffc32SGordon Ross 		printf("Fail: conv_mbstooem P cmp: 0x%x\n", wch);
250*7d1ffc32SGordon Ross 		hexdump((uchar_t *)oemcs, len+1);
251*7d1ffc32SGordon Ross 		return;
252*7d1ffc32SGordon Ross 	}
253*7d1ffc32SGordon Ross 
254*7d1ffc32SGordon Ross 	len = smb_mbstooem(oemcs, fubar_mbs, 8);
255*7d1ffc32SGordon Ross 	if (len != 5) {
256*7d1ffc32SGordon Ross 		printf("Fail: conv_mbstooem fubar ret=%d\n", len);
257*7d1ffc32SGordon Ross 		return;
258*7d1ffc32SGordon Ross 	}
259*7d1ffc32SGordon Ross 	if (memcmp(oemcs, (char *)fubar_oem, len+1)) {
260*7d1ffc32SGordon Ross 		printf("Fail: conv_mbstooem fubar cmp: 0x%x\n", wch);
261*7d1ffc32SGordon Ross 		hexdump((uchar_t *)oemcs, len+1);
262*7d1ffc32SGordon Ross 		return;
263*7d1ffc32SGordon Ross 	}
264*7d1ffc32SGordon Ross 
265*7d1ffc32SGordon Ross 	len = smb_mbstooem(oemcs, mbsp, 8);
266*7d1ffc32SGordon Ross 	if (len != 3) {
267*7d1ffc32SGordon Ross 		printf("Fail: conv_mbstooem poop ret=%d\n", len);
268*7d1ffc32SGordon Ross 		return;
269*7d1ffc32SGordon Ross 	}
270*7d1ffc32SGordon Ross 	if (memcmp(oemcs, "P?.", len+1)) {
271*7d1ffc32SGordon Ross 		printf("Fail: conv_mbstooem poop cmp: 0x%x\n", wch);
272*7d1ffc32SGordon Ross 		hexdump((uchar_t *)oemcs, len+1);
273*7d1ffc32SGordon Ross 		return;
274*7d1ffc32SGordon Ross 	}
275*7d1ffc32SGordon Ross 
276*7d1ffc32SGordon Ross 	printf("Pass: conv_mbstooem\n");
277*7d1ffc32SGordon Ross }
278*7d1ffc32SGordon Ross 
279*7d1ffc32SGordon Ross static void
conv_sbequiv_strlen()280*7d1ffc32SGordon Ross conv_sbequiv_strlen()
281*7d1ffc32SGordon Ross {
282*7d1ffc32SGordon Ross 	int len;
283*7d1ffc32SGordon Ross 
284*7d1ffc32SGordon Ross 	len = (int)smb_sbequiv_strlen("a");
285*7d1ffc32SGordon Ross 	if (len != 1) {
286*7d1ffc32SGordon Ross 		printf("Fail: conv_sbequiv_strlen (a) len=%d\n", len);
287*7d1ffc32SGordon Ross 		return;
288*7d1ffc32SGordon Ross 	}
289*7d1ffc32SGordon Ross 
290*7d1ffc32SGordon Ross 	len = (int)smb_sbequiv_strlen(fubar_mbs);
291*7d1ffc32SGordon Ross 	if (len != strlen((char *)fubar_oem)) {
292*7d1ffc32SGordon Ross 		printf("Fail: conv_sbequiv_strlen (fubar) len=%d\n", len);
293*7d1ffc32SGordon Ross 		return;
294*7d1ffc32SGordon Ross 	}
295*7d1ffc32SGordon Ross 
296*7d1ffc32SGordon Ross 	len = (int)smb_sbequiv_strlen(mbsp);
297*7d1ffc32SGordon Ross 	if (len != 3) {	// "P?."
298*7d1ffc32SGordon Ross 		printf("Fail: conv_sbequiv_strlen (poop) len=%d\n", len);
299*7d1ffc32SGordon Ross 		return;
300*7d1ffc32SGordon Ross 	}
301*7d1ffc32SGordon Ross 
302*7d1ffc32SGordon Ross 	printf("Pass: conv_sbequiv_strlen\n");
303*7d1ffc32SGordon Ross }
304*7d1ffc32SGordon Ross 
305*7d1ffc32SGordon Ross static void
conv_wcequiv_strlen()306*7d1ffc32SGordon Ross conv_wcequiv_strlen()
307*7d1ffc32SGordon Ross {
308*7d1ffc32SGordon Ross 	int len;
309*7d1ffc32SGordon Ross 
310*7d1ffc32SGordon Ross 	len = (int)smb_wcequiv_strlen("a");
311*7d1ffc32SGordon Ross 	if (len != 2) {
312*7d1ffc32SGordon Ross 		printf("Fail: conv_wcequiv_strlen (a) len=%d\n", len);
313*7d1ffc32SGordon Ross 		return;
314*7d1ffc32SGordon Ross 	}
315*7d1ffc32SGordon Ross 
316*7d1ffc32SGordon Ross 	len = (int)smb_wcequiv_strlen(fwA);
317*7d1ffc32SGordon Ross 	if (len != 2) {
318*7d1ffc32SGordon Ross 		printf("Fail: conv_wcequiv_strlen (fwA) len=%d\n", len);
319*7d1ffc32SGordon Ross 		return;
320*7d1ffc32SGordon Ross 	}
321*7d1ffc32SGordon Ross 
322*7d1ffc32SGordon Ross 	len = (int)smb_wcequiv_strlen(poop);
323*7d1ffc32SGordon Ross 	if (len != 4) {
324*7d1ffc32SGordon Ross 		printf("Fail: conv_wcequiv_strlen (poop) len=%d\n", len);
325*7d1ffc32SGordon Ross 		return;
326*7d1ffc32SGordon Ross 	}
327*7d1ffc32SGordon Ross 
328*7d1ffc32SGordon Ross 	printf("Pass: conv_wcequiv_strlen\n");
329*7d1ffc32SGordon Ross }
330*7d1ffc32SGordon Ross 
331*7d1ffc32SGordon Ross void
test_conv()332*7d1ffc32SGordon Ross test_conv()
333*7d1ffc32SGordon Ross {
334*7d1ffc32SGordon Ross 	conv_wctomb();
335*7d1ffc32SGordon Ross 	conv_mbtowc();
336*7d1ffc32SGordon Ross 	conv_wcstombs();
337*7d1ffc32SGordon Ross 	conv_mbstowcs();
338*7d1ffc32SGordon Ross 	conv_oemtombs();
339*7d1ffc32SGordon Ross 	conv_mbstooem();
340*7d1ffc32SGordon Ross 	conv_sbequiv_strlen();
341*7d1ffc32SGordon Ross 	conv_wcequiv_strlen();
342*7d1ffc32SGordon Ross }