14297a3b0SGarrett D'Amore /*
22d08521bSGarrett D'Amore * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3475b496bSGarrett D'Amore * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
44297a3b0SGarrett D'Amore * Copyright (c) 2002-2004 Tim J. Robbins
54297a3b0SGarrett D'Amore * All rights reserved.
64297a3b0SGarrett D'Amore *
74297a3b0SGarrett D'Amore * Redistribution and use in source and binary forms, with or without
84297a3b0SGarrett D'Amore * modification, are permitted provided that the following conditions
94297a3b0SGarrett D'Amore * are met:
104297a3b0SGarrett D'Amore * 1. Redistributions of source code must retain the above copyright
114297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer.
124297a3b0SGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright
134297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer in the
144297a3b0SGarrett D'Amore * documentation and/or other materials provided with the distribution.
154297a3b0SGarrett D'Amore *
164297a3b0SGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
174297a3b0SGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
184297a3b0SGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
194297a3b0SGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
204297a3b0SGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
214297a3b0SGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
224297a3b0SGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
234297a3b0SGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
244297a3b0SGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
254297a3b0SGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
264297a3b0SGarrett D'Amore * SUCH DAMAGE.
274297a3b0SGarrett D'Amore */
284297a3b0SGarrett D'Amore
294297a3b0SGarrett D'Amore #include "lint.h"
304297a3b0SGarrett D'Amore #include <errno.h>
314297a3b0SGarrett D'Amore #include <limits.h>
324297a3b0SGarrett D'Amore #include <stdlib.h>
334297a3b0SGarrett D'Amore #include <string.h>
344297a3b0SGarrett D'Amore #include <wchar.h>
354297a3b0SGarrett D'Amore #include "mblocal.h"
362d08521bSGarrett D'Amore #include "lctype.h"
374297a3b0SGarrett D'Amore
384297a3b0SGarrett D'Amore static size_t _UTF8_mbrtowc(wchar_t *_RESTRICT_KYWD,
394297a3b0SGarrett D'Amore const char *_RESTRICT_KYWD,
40*0ac311baSRobert Mustacchi size_t, mbstate_t *_RESTRICT_KYWD, boolean_t);
414297a3b0SGarrett D'Amore static int _UTF8_mbsinit(const mbstate_t *);
424297a3b0SGarrett D'Amore static size_t _UTF8_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
434297a3b0SGarrett D'Amore const char **_RESTRICT_KYWD, size_t, size_t,
444297a3b0SGarrett D'Amore mbstate_t *_RESTRICT_KYWD);
454297a3b0SGarrett D'Amore static size_t _UTF8_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
464297a3b0SGarrett D'Amore mbstate_t *_RESTRICT_KYWD);
474297a3b0SGarrett D'Amore static size_t _UTF8_wcsnrtombs(char *_RESTRICT_KYWD,
484297a3b0SGarrett D'Amore const wchar_t **_RESTRICT_KYWD,
494297a3b0SGarrett D'Amore size_t, size_t, mbstate_t *_RESTRICT_KYWD);
504297a3b0SGarrett D'Amore
512d08521bSGarrett D'Amore void
_UTF8_init(struct lc_ctype * lct)522d08521bSGarrett D'Amore _UTF8_init(struct lc_ctype *lct)
534297a3b0SGarrett D'Amore {
542d08521bSGarrett D'Amore lct->lc_mbrtowc = _UTF8_mbrtowc;
552d08521bSGarrett D'Amore lct->lc_wcrtomb = _UTF8_wcrtomb;
562d08521bSGarrett D'Amore lct->lc_mbsinit = _UTF8_mbsinit;
572d08521bSGarrett D'Amore lct->lc_mbsnrtowcs = _UTF8_mbsnrtowcs;
582d08521bSGarrett D'Amore lct->lc_wcsnrtombs = _UTF8_wcsnrtombs;
592d08521bSGarrett D'Amore lct->lc_is_ascii = 0;
602d08521bSGarrett D'Amore lct->lc_max_mblen = 4;
614297a3b0SGarrett D'Amore }
624297a3b0SGarrett D'Amore
634297a3b0SGarrett D'Amore static int
_UTF8_mbsinit(const mbstate_t * ps)644297a3b0SGarrett D'Amore _UTF8_mbsinit(const mbstate_t *ps)
654297a3b0SGarrett D'Amore {
664297a3b0SGarrett D'Amore
674297a3b0SGarrett D'Amore return (ps == NULL || ((const _UTF8State *)ps)->want == 0);
684297a3b0SGarrett D'Amore }
694297a3b0SGarrett D'Amore
704297a3b0SGarrett D'Amore static size_t
_UTF8_mbrtowc(wchar_t * _RESTRICT_KYWD pwc,const char * _RESTRICT_KYWD s,size_t n,mbstate_t * _RESTRICT_KYWD ps,boolean_t zero)714297a3b0SGarrett D'Amore _UTF8_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
72*0ac311baSRobert Mustacchi size_t n, mbstate_t *_RESTRICT_KYWD ps, boolean_t zero)
734297a3b0SGarrett D'Amore {
744297a3b0SGarrett D'Amore _UTF8State *us;
754297a3b0SGarrett D'Amore int ch, i, mask, want;
764297a3b0SGarrett D'Amore wchar_t lbound, wch;
774297a3b0SGarrett D'Amore
784297a3b0SGarrett D'Amore us = (_UTF8State *)ps;
794297a3b0SGarrett D'Amore
804297a3b0SGarrett D'Amore if (us->want < 0 || us->want > 6) {
814297a3b0SGarrett D'Amore errno = EINVAL;
824297a3b0SGarrett D'Amore return ((size_t)-1);
834297a3b0SGarrett D'Amore }
844297a3b0SGarrett D'Amore
854297a3b0SGarrett D'Amore if (s == NULL) {
864297a3b0SGarrett D'Amore s = "";
874297a3b0SGarrett D'Amore n = 1;
884297a3b0SGarrett D'Amore pwc = NULL;
894297a3b0SGarrett D'Amore }
904297a3b0SGarrett D'Amore
914297a3b0SGarrett D'Amore if (n == 0)
924297a3b0SGarrett D'Amore /* Incomplete multibyte sequence */
934297a3b0SGarrett D'Amore return ((size_t)-2);
944297a3b0SGarrett D'Amore
954297a3b0SGarrett D'Amore if (us->want == 0) {
964297a3b0SGarrett D'Amore /*
974297a3b0SGarrett D'Amore * Determine the number of octets that make up this character
984297a3b0SGarrett D'Amore * from the first octet, and a mask that extracts the
994297a3b0SGarrett D'Amore * interesting bits of the first octet. We already know
1004297a3b0SGarrett D'Amore * the character is at least two bytes long.
1014297a3b0SGarrett D'Amore *
1024297a3b0SGarrett D'Amore * We also specify a lower bound for the character code to
1034297a3b0SGarrett D'Amore * detect redundant, non-"shortest form" encodings. For
1044297a3b0SGarrett D'Amore * example, the sequence C0 80 is _not_ a legal representation
1054297a3b0SGarrett D'Amore * of the null character. This enforces a 1-to-1 mapping
1064297a3b0SGarrett D'Amore * between character codes and their multibyte representations.
1074297a3b0SGarrett D'Amore */
1084297a3b0SGarrett D'Amore ch = (unsigned char)*s;
1094297a3b0SGarrett D'Amore if ((ch & 0x80) == 0) {
110475b496bSGarrett D'Amore /* Fast path for plain ASCII characters. */
111475b496bSGarrett D'Amore if (pwc != NULL)
112475b496bSGarrett D'Amore *pwc = ch;
113*0ac311baSRobert Mustacchi if (zero || ch != '\0') {
114*0ac311baSRobert Mustacchi return (1);
115*0ac311baSRobert Mustacchi } else {
116*0ac311baSRobert Mustacchi return (0);
117*0ac311baSRobert Mustacchi }
118475b496bSGarrett D'Amore }
119475b496bSGarrett D'Amore if ((ch & 0xe0) == 0xc0) {
1204297a3b0SGarrett D'Amore mask = 0x1f;
1214297a3b0SGarrett D'Amore want = 2;
1224297a3b0SGarrett D'Amore lbound = 0x80;
1234297a3b0SGarrett D'Amore } else if ((ch & 0xf0) == 0xe0) {
1244297a3b0SGarrett D'Amore mask = 0x0f;
1254297a3b0SGarrett D'Amore want = 3;
1264297a3b0SGarrett D'Amore lbound = 0x800;
1274297a3b0SGarrett D'Amore } else if ((ch & 0xf8) == 0xf0) {
1284297a3b0SGarrett D'Amore mask = 0x07;
1294297a3b0SGarrett D'Amore want = 4;
1304297a3b0SGarrett D'Amore lbound = 0x10000;
1314297a3b0SGarrett D'Amore #if 0
1324297a3b0SGarrett D'Amore /* These would be illegal in the UTF-8 space */
1334297a3b0SGarrett D'Amore
1344297a3b0SGarrett D'Amore } else if ((ch & 0xfc) == 0xf8) {
1354297a3b0SGarrett D'Amore mask = 0x03;
1364297a3b0SGarrett D'Amore want = 5;
1374297a3b0SGarrett D'Amore lbound = 0x200000;
1384297a3b0SGarrett D'Amore } else if ((ch & 0xfe) == 0xfc) {
1394297a3b0SGarrett D'Amore mask = 0x01;
1404297a3b0SGarrett D'Amore want = 6;
1414297a3b0SGarrett D'Amore lbound = 0x4000000;
1424297a3b0SGarrett D'Amore #endif
1434297a3b0SGarrett D'Amore } else {
1444297a3b0SGarrett D'Amore /*
1454297a3b0SGarrett D'Amore * Malformed input; input is not UTF-8.
1464297a3b0SGarrett D'Amore */
1474297a3b0SGarrett D'Amore errno = EILSEQ;
1484297a3b0SGarrett D'Amore return ((size_t)-1);
1494297a3b0SGarrett D'Amore }
1504297a3b0SGarrett D'Amore } else {
1514297a3b0SGarrett D'Amore want = us->want;
1524297a3b0SGarrett D'Amore lbound = us->lbound;
1534297a3b0SGarrett D'Amore }
1544297a3b0SGarrett D'Amore
1554297a3b0SGarrett D'Amore /*
1564297a3b0SGarrett D'Amore * Decode the octet sequence representing the character in chunks
1574297a3b0SGarrett D'Amore * of 6 bits, most significant first.
1584297a3b0SGarrett D'Amore */
1594297a3b0SGarrett D'Amore if (us->want == 0)
1604297a3b0SGarrett D'Amore wch = (unsigned char)*s++ & mask;
1614297a3b0SGarrett D'Amore else
1624297a3b0SGarrett D'Amore wch = us->ch;
1634297a3b0SGarrett D'Amore
1644297a3b0SGarrett D'Amore for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) {
1654297a3b0SGarrett D'Amore if ((*s & 0xc0) != 0x80) {
1664297a3b0SGarrett D'Amore /*
1674297a3b0SGarrett D'Amore * Malformed input; bad characters in the middle
1684297a3b0SGarrett D'Amore * of a character.
1694297a3b0SGarrett D'Amore */
1704297a3b0SGarrett D'Amore errno = EILSEQ;
1714297a3b0SGarrett D'Amore return ((size_t)-1);
1724297a3b0SGarrett D'Amore }
1734297a3b0SGarrett D'Amore wch <<= 6;
1744297a3b0SGarrett D'Amore wch |= *s++ & 0x3f;
1754297a3b0SGarrett D'Amore }
1764297a3b0SGarrett D'Amore if (i < want) {
1774297a3b0SGarrett D'Amore /* Incomplete multibyte sequence. */
1784297a3b0SGarrett D'Amore us->want = want - i;
1794297a3b0SGarrett D'Amore us->lbound = lbound;
1804297a3b0SGarrett D'Amore us->ch = wch;
1814297a3b0SGarrett D'Amore return ((size_t)-2);
1824297a3b0SGarrett D'Amore }
1834297a3b0SGarrett D'Amore if (wch < lbound) {
1844297a3b0SGarrett D'Amore /*
1854297a3b0SGarrett D'Amore * Malformed input; redundant encoding.
1864297a3b0SGarrett D'Amore */
1874297a3b0SGarrett D'Amore errno = EILSEQ;
1884297a3b0SGarrett D'Amore return ((size_t)-1);
1894297a3b0SGarrett D'Amore }
1904297a3b0SGarrett D'Amore if (pwc != NULL)
1914297a3b0SGarrett D'Amore *pwc = wch;
1924297a3b0SGarrett D'Amore us->want = 0;
193*0ac311baSRobert Mustacchi if (zero || wch != L'\0') {
194*0ac311baSRobert Mustacchi return (want);
195*0ac311baSRobert Mustacchi } else {
196*0ac311baSRobert Mustacchi return (0);
197*0ac311baSRobert Mustacchi }
1984297a3b0SGarrett D'Amore }
1994297a3b0SGarrett D'Amore
2004297a3b0SGarrett D'Amore static size_t
_UTF8_mbsnrtowcs(wchar_t * _RESTRICT_KYWD dst,const char ** _RESTRICT_KYWD src,size_t nms,size_t len,mbstate_t * _RESTRICT_KYWD ps)2014297a3b0SGarrett D'Amore _UTF8_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst, const char **_RESTRICT_KYWD src,
2024297a3b0SGarrett D'Amore size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
2034297a3b0SGarrett D'Amore {
2044297a3b0SGarrett D'Amore _UTF8State *us;
2054297a3b0SGarrett D'Amore const char *s;
2064297a3b0SGarrett D'Amore size_t nchr;
2074297a3b0SGarrett D'Amore wchar_t wc;
2084297a3b0SGarrett D'Amore size_t nb;
2094297a3b0SGarrett D'Amore
2104297a3b0SGarrett D'Amore us = (_UTF8State *)ps;
2114297a3b0SGarrett D'Amore
2124297a3b0SGarrett D'Amore s = *src;
2134297a3b0SGarrett D'Amore nchr = 0;
2144297a3b0SGarrett D'Amore
2154297a3b0SGarrett D'Amore if (dst == NULL) {
2164297a3b0SGarrett D'Amore /*
2174297a3b0SGarrett D'Amore * The fast path in the loop below is not safe if an ASCII
2184297a3b0SGarrett D'Amore * character appears as anything but the first byte of a
2194297a3b0SGarrett D'Amore * multibyte sequence. Check now to avoid doing it in the loop.
2204297a3b0SGarrett D'Amore */
2214297a3b0SGarrett D'Amore if (nms > 0 && us->want > 0 && (signed char)*s > 0) {
2224297a3b0SGarrett D'Amore errno = EILSEQ;
2234297a3b0SGarrett D'Amore return ((size_t)-1);
2244297a3b0SGarrett D'Amore }
2254297a3b0SGarrett D'Amore for (;;) {
226*0ac311baSRobert Mustacchi if (nms > 0 && (signed char)*s > 0) {
2274297a3b0SGarrett D'Amore /*
2284297a3b0SGarrett D'Amore * Fast path for plain ASCII characters
2294297a3b0SGarrett D'Amore * excluding NUL.
2304297a3b0SGarrett D'Amore */
2314297a3b0SGarrett D'Amore nb = 1;
232*0ac311baSRobert Mustacchi } else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps,
233*0ac311baSRobert Mustacchi B_FALSE)) == (size_t)-1) {
2344297a3b0SGarrett D'Amore /* Invalid sequence - mbrtowc() sets errno. */
2354297a3b0SGarrett D'Amore return ((size_t)-1);
236*0ac311baSRobert Mustacchi } else if (nb == 0 || nb == (size_t)-2) {
2374297a3b0SGarrett D'Amore return (nchr);
238*0ac311baSRobert Mustacchi }
2394297a3b0SGarrett D'Amore s += nb;
2404297a3b0SGarrett D'Amore nms -= nb;
2414297a3b0SGarrett D'Amore nchr++;
2424297a3b0SGarrett D'Amore }
2434297a3b0SGarrett D'Amore /*NOTREACHED*/
2444297a3b0SGarrett D'Amore }
2454297a3b0SGarrett D'Amore
2464297a3b0SGarrett D'Amore /*
2474297a3b0SGarrett D'Amore * The fast path in the loop below is not safe if an ASCII
2484297a3b0SGarrett D'Amore * character appears as anything but the first byte of a
2494297a3b0SGarrett D'Amore * multibyte sequence. Check now to avoid doing it in the loop.
2504297a3b0SGarrett D'Amore */
2514297a3b0SGarrett D'Amore if (nms > 0 && len > 0 && us->want > 0 && (signed char)*s > 0) {
2524297a3b0SGarrett D'Amore errno = EILSEQ;
2534297a3b0SGarrett D'Amore return ((size_t)-1);
2544297a3b0SGarrett D'Amore }
2554297a3b0SGarrett D'Amore while (len-- > 0) {
2564297a3b0SGarrett D'Amore if (nms > 0 && (signed char)*s > 0) {
2574297a3b0SGarrett D'Amore /*
2584297a3b0SGarrett D'Amore * Fast path for plain ASCII characters
2594297a3b0SGarrett D'Amore * excluding NUL.
2604297a3b0SGarrett D'Amore */
2614297a3b0SGarrett D'Amore *dst = (wchar_t)*s;
2624297a3b0SGarrett D'Amore nb = 1;
263*0ac311baSRobert Mustacchi } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps, B_FALSE)) ==
2644297a3b0SGarrett D'Amore (size_t)-1) {
2654297a3b0SGarrett D'Amore *src = s;
2664297a3b0SGarrett D'Amore return ((size_t)-1);
2674297a3b0SGarrett D'Amore } else if (nb == (size_t)-2) {
2684297a3b0SGarrett D'Amore *src = s + nms;
2694297a3b0SGarrett D'Amore return (nchr);
2704297a3b0SGarrett D'Amore } else if (nb == 0) {
2714297a3b0SGarrett D'Amore *src = NULL;
2724297a3b0SGarrett D'Amore return (nchr);
2734297a3b0SGarrett D'Amore }
2744297a3b0SGarrett D'Amore s += nb;
2754297a3b0SGarrett D'Amore nms -= nb;
2764297a3b0SGarrett D'Amore nchr++;
2774297a3b0SGarrett D'Amore dst++;
2784297a3b0SGarrett D'Amore }
2794297a3b0SGarrett D'Amore *src = s;
2804297a3b0SGarrett D'Amore return (nchr);
2814297a3b0SGarrett D'Amore }
2824297a3b0SGarrett D'Amore
2834297a3b0SGarrett D'Amore static size_t
_UTF8_wcrtomb(char * _RESTRICT_KYWD s,wchar_t wc,mbstate_t * _RESTRICT_KYWD ps)2844297a3b0SGarrett D'Amore _UTF8_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps)
2854297a3b0SGarrett D'Amore {
2864297a3b0SGarrett D'Amore _UTF8State *us;
2874297a3b0SGarrett D'Amore unsigned char lead;
2884297a3b0SGarrett D'Amore int i, len;
2894297a3b0SGarrett D'Amore
2904297a3b0SGarrett D'Amore us = (_UTF8State *)ps;
2914297a3b0SGarrett D'Amore
2924297a3b0SGarrett D'Amore if (us->want != 0) {
2934297a3b0SGarrett D'Amore errno = EINVAL;
2944297a3b0SGarrett D'Amore return ((size_t)-1);
2954297a3b0SGarrett D'Amore }
2964297a3b0SGarrett D'Amore
2974297a3b0SGarrett D'Amore if (s == NULL)
2984297a3b0SGarrett D'Amore /* Reset to initial shift state (no-op) */
2994297a3b0SGarrett D'Amore return (1);
3004297a3b0SGarrett D'Amore
3014297a3b0SGarrett D'Amore /*
3024297a3b0SGarrett D'Amore * Determine the number of octets needed to represent this character.
3034297a3b0SGarrett D'Amore * We always output the shortest sequence possible. Also specify the
3044297a3b0SGarrett D'Amore * first few bits of the first octet, which contains the information
3054297a3b0SGarrett D'Amore * about the sequence length.
3064297a3b0SGarrett D'Amore */
3074297a3b0SGarrett D'Amore if ((wc & ~0x7f) == 0) {
308475b496bSGarrett D'Amore /* Fast path for plain ASCII characters. */
309475b496bSGarrett D'Amore *s = (char)wc;
310475b496bSGarrett D'Amore return (1);
3114297a3b0SGarrett D'Amore } else if ((wc & ~0x7ff) == 0) {
3124297a3b0SGarrett D'Amore lead = 0xc0;
3134297a3b0SGarrett D'Amore len = 2;
3144297a3b0SGarrett D'Amore } else if ((wc & ~0xffff) == 0) {
3154297a3b0SGarrett D'Amore lead = 0xe0;
3164297a3b0SGarrett D'Amore len = 3;
3174297a3b0SGarrett D'Amore } else if ((wc & ~0x1fffff) == 0) {
3184297a3b0SGarrett D'Amore lead = 0xf0;
3194297a3b0SGarrett D'Amore len = 4;
3204297a3b0SGarrett D'Amore #if 0
3214297a3b0SGarrett D'Amore /* Again, 5 and 6 byte encodings are simply not permitted */
3224297a3b0SGarrett D'Amore } else if ((wc & ~0x3ffffff) == 0) {
3234297a3b0SGarrett D'Amore lead = 0xf8;
3244297a3b0SGarrett D'Amore len = 5;
3254297a3b0SGarrett D'Amore } else if ((wc & ~0x7fffffff) == 0) {
3264297a3b0SGarrett D'Amore lead = 0xfc;
3274297a3b0SGarrett D'Amore len = 6;
3284297a3b0SGarrett D'Amore #endif
3294297a3b0SGarrett D'Amore } else {
3304297a3b0SGarrett D'Amore errno = EILSEQ;
3314297a3b0SGarrett D'Amore return ((size_t)-1);
3324297a3b0SGarrett D'Amore }
3334297a3b0SGarrett D'Amore
3344297a3b0SGarrett D'Amore /*
3354297a3b0SGarrett D'Amore * Output the octets representing the character in chunks
3364297a3b0SGarrett D'Amore * of 6 bits, least significant last. The first octet is
3374297a3b0SGarrett D'Amore * a special case because it contains the sequence length
3384297a3b0SGarrett D'Amore * information.
3394297a3b0SGarrett D'Amore */
3404297a3b0SGarrett D'Amore for (i = len - 1; i > 0; i--) {
3414297a3b0SGarrett D'Amore s[i] = (wc & 0x3f) | 0x80;
3424297a3b0SGarrett D'Amore wc >>= 6;
3434297a3b0SGarrett D'Amore }
3444297a3b0SGarrett D'Amore *s = (wc & 0xff) | lead;
3454297a3b0SGarrett D'Amore
3464297a3b0SGarrett D'Amore return (len);
3474297a3b0SGarrett D'Amore }
3484297a3b0SGarrett D'Amore
3494297a3b0SGarrett D'Amore static size_t
_UTF8_wcsnrtombs(char * _RESTRICT_KYWD dst,const wchar_t ** _RESTRICT_KYWD src,size_t nwc,size_t len,mbstate_t * _RESTRICT_KYWD ps)3504297a3b0SGarrett D'Amore _UTF8_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
3514297a3b0SGarrett D'Amore size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
3524297a3b0SGarrett D'Amore {
3534297a3b0SGarrett D'Amore _UTF8State *us;
3544297a3b0SGarrett D'Amore char buf[MB_LEN_MAX];
3554297a3b0SGarrett D'Amore const wchar_t *s;
3564297a3b0SGarrett D'Amore size_t nbytes;
3574297a3b0SGarrett D'Amore size_t nb;
3584297a3b0SGarrett D'Amore
3594297a3b0SGarrett D'Amore us = (_UTF8State *)ps;
3604297a3b0SGarrett D'Amore
3614297a3b0SGarrett D'Amore if (us->want != 0) {
3624297a3b0SGarrett D'Amore errno = EINVAL;
3634297a3b0SGarrett D'Amore return ((size_t)-1);
3644297a3b0SGarrett D'Amore }
3654297a3b0SGarrett D'Amore
3664297a3b0SGarrett D'Amore s = *src;
3674297a3b0SGarrett D'Amore nbytes = 0;
3684297a3b0SGarrett D'Amore
3694297a3b0SGarrett D'Amore if (dst == NULL) {
3704297a3b0SGarrett D'Amore while (nwc-- > 0) {
3714297a3b0SGarrett D'Amore if (0 <= *s && *s < 0x80)
3724297a3b0SGarrett D'Amore /* Fast path for plain ASCII characters. */
3734297a3b0SGarrett D'Amore nb = 1;
3744297a3b0SGarrett D'Amore else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) ==
3754297a3b0SGarrett D'Amore (size_t)-1)
3764297a3b0SGarrett D'Amore /* Invalid character - wcrtomb() sets errno. */
3774297a3b0SGarrett D'Amore return ((size_t)-1);
3784297a3b0SGarrett D'Amore if (*s == L'\0')
3794297a3b0SGarrett D'Amore return (nbytes + nb - 1);
3804297a3b0SGarrett D'Amore s++;
3814297a3b0SGarrett D'Amore nbytes += nb;
3824297a3b0SGarrett D'Amore }
3834297a3b0SGarrett D'Amore return (nbytes);
3844297a3b0SGarrett D'Amore }
3854297a3b0SGarrett D'Amore
3864297a3b0SGarrett D'Amore while (len > 0 && nwc-- > 0) {
3874297a3b0SGarrett D'Amore if (0 <= *s && *s < 0x80) {
3884297a3b0SGarrett D'Amore /* Fast path for plain ASCII characters. */
3894297a3b0SGarrett D'Amore nb = 1;
3904297a3b0SGarrett D'Amore *dst = *s;
3914297a3b0SGarrett D'Amore } else if (len > (size_t)MB_CUR_MAX) {
3924297a3b0SGarrett D'Amore /* Enough space to translate in-place. */
3934297a3b0SGarrett D'Amore if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) {
3944297a3b0SGarrett D'Amore *src = s;
3954297a3b0SGarrett D'Amore return ((size_t)-1);
3964297a3b0SGarrett D'Amore }
3974297a3b0SGarrett D'Amore } else {
3984297a3b0SGarrett D'Amore /*
3994297a3b0SGarrett D'Amore * May not be enough space; use temp. buffer.
4004297a3b0SGarrett D'Amore */
4014297a3b0SGarrett D'Amore if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) {
4024297a3b0SGarrett D'Amore *src = s;
4034297a3b0SGarrett D'Amore return ((size_t)-1);
4044297a3b0SGarrett D'Amore }
4054297a3b0SGarrett D'Amore if (nb > (int)len)
4064297a3b0SGarrett D'Amore /* MB sequence for character won't fit. */
4074297a3b0SGarrett D'Amore break;
4084297a3b0SGarrett D'Amore (void) memcpy(dst, buf, nb);
4094297a3b0SGarrett D'Amore }
4104297a3b0SGarrett D'Amore if (*s == L'\0') {
4114297a3b0SGarrett D'Amore *src = NULL;
4124297a3b0SGarrett D'Amore return (nbytes + nb - 1);
4134297a3b0SGarrett D'Amore }
4144297a3b0SGarrett D'Amore s++;
4154297a3b0SGarrett D'Amore dst += nb;
4164297a3b0SGarrett D'Amore len -= nb;
4174297a3b0SGarrett D'Amore nbytes += nb;
4184297a3b0SGarrett D'Amore }
4194297a3b0SGarrett D'Amore *src = s;
4204297a3b0SGarrett D'Amore return (nbytes);
4214297a3b0SGarrett D'Amore }
422