1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2020 Robert Mustacchi
14  */
15 
16 /*
17  * C11 mbrtoc16(3C) support.
18  *
19  * The char16_t represents a UTF-16 encoding. This means that we have to deal
20  * with surrogate pairs.
21  */
22 
23 #include <locale.h>
24 #include <wchar.h>
25 #include <xlocale.h>
26 #include <uchar.h>
27 #include "mblocal.h"
28 #include "unicode.h"
29 
30 #include <sys/debug.h>
31 
32 /*
33  * Ensure that we never cause our save state to ever exceed that of the
34  * mbstate_t. See the block comment in mblocal.h.
35  */
36 CTASSERT(sizeof (_CHAR16State) <= sizeof (mbstate_t));
37 
38 static mbstate_t mbrtoc16_state;
39 
40 size_t
mbrtoc16(char16_t * restrict pc16,const char * restrict str,size_t len,mbstate_t * restrict ps)41 mbrtoc16(char16_t *restrict pc16, const char *restrict str, size_t len,
42     mbstate_t *restrict ps)
43 {
44 	wchar_t wc;
45 	size_t ret;
46 	char16_t out;
47 	_CHAR16State *c16s;
48 
49 	if (ps == NULL) {
50 		ps = &mbrtoc16_state;
51 	}
52 
53 	if (str == NULL) {
54 		pc16 = NULL;
55 		str = "";
56 		len = 1;
57 	}
58 
59 	c16s = (_CHAR16State *)ps;
60 	if (c16s->c16_surrogate != 0) {
61 		if (pc16 != NULL) {
62 			*pc16 = c16s->c16_surrogate;
63 		}
64 		c16s->c16_surrogate = 0;
65 		return ((size_t)-3);
66 	}
67 
68 	ret = mbrtowc_l(&wc, str, len, ps, uselocale(NULL));
69 	if ((ssize_t)ret < 0) {
70 		return (ret);
71 	}
72 
73 	/*
74 	 * If this character is not in the basic multilingual plane then we need
75 	 * a surrogate character to represent it in UTF-16 and we will need to
76 	 * write that out on the next iteration.
77 	 */
78 	if (wc >= UNICODE_SUP_START) {
79 		wc -= UNICODE_SUP_START;
80 		c16s->c16_surrogate = UNICODE_SUR_LOWER | UNICODE_SUR_LMASK(wc);
81 		out = UNICODE_SUR_UPPER | UNICODE_SUR_UMASK(wc);
82 	} else {
83 		out = (char16_t)wc;
84 	}
85 
86 	if (pc16 != NULL) {
87 		*pc16 = out;
88 	}
89 
90 	return (ret);
91 }
92