14297a3b0SGarrett D'Amore /*
22d08521bSGarrett D'Amore * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3*efcfb316SYuri Pankov * Copyright 2017 Nexenta Systems, Inc.
44297a3b0SGarrett D'Amore * Copyright (c) 2002 Tim J. Robbins
54297a3b0SGarrett D'Amore * All rights reserved.
64297a3b0SGarrett D'Amore *
74297a3b0SGarrett D'Amore * Redistribution and use in source and binary forms, with or without
84297a3b0SGarrett D'Amore * modification, are permitted provided that the following conditions
94297a3b0SGarrett D'Amore * are met:
104297a3b0SGarrett D'Amore * 1. Redistributions of source code must retain the above copyright
114297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer.
124297a3b0SGarrett D'Amore * 2. Redistributions in binary form must reproduce the above copyright
134297a3b0SGarrett D'Amore * notice, this list of conditions and the following disclaimer in the
144297a3b0SGarrett D'Amore * documentation and/or other materials provided with the distribution.
154297a3b0SGarrett D'Amore *
164297a3b0SGarrett D'Amore * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
174297a3b0SGarrett D'Amore * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
184297a3b0SGarrett D'Amore * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
194297a3b0SGarrett D'Amore * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
204297a3b0SGarrett D'Amore * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
214297a3b0SGarrett D'Amore * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
224297a3b0SGarrett D'Amore * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
234297a3b0SGarrett D'Amore * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
244297a3b0SGarrett D'Amore * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
254297a3b0SGarrett D'Amore * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
264297a3b0SGarrett D'Amore * SUCH DAMAGE.
274297a3b0SGarrett D'Amore */
284297a3b0SGarrett D'Amore
294297a3b0SGarrett D'Amore #include "lint.h"
304297a3b0SGarrett D'Amore #include <errno.h>
314297a3b0SGarrett D'Amore #include <stdlib.h>
324297a3b0SGarrett D'Amore #include <string.h>
334297a3b0SGarrett D'Amore #include <wchar.h>
346b5e5868SGarrett D'Amore #include <assert.h>
354297a3b0SGarrett D'Amore #include "collate.h"
362d08521bSGarrett D'Amore #include "localeimpl.h"
374297a3b0SGarrett D'Amore
384297a3b0SGarrett D'Amore int
wcscoll_l(const wchar_t * ws1,const wchar_t * ws2,locale_t loc)392d08521bSGarrett D'Amore wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc)
404297a3b0SGarrett D'Amore {
41*efcfb316SYuri Pankov int len1, len2, pri1, pri2;
426b5e5868SGarrett D'Amore wchar_t *tr1 = NULL, *tr2 = NULL;
436b5e5868SGarrett D'Amore int direc, pass;
442d08521bSGarrett D'Amore const struct lc_collate *lcc = loc->collate;
45*efcfb316SYuri Pankov int ret = wcscmp(ws1, ws2);
464297a3b0SGarrett D'Amore
47*efcfb316SYuri Pankov if (lcc->lc_is_posix || ret == 0)
48*efcfb316SYuri Pankov return (ret);
494297a3b0SGarrett D'Amore
50*efcfb316SYuri Pankov if (*ws1 == 0 && *ws2 != 0)
51*efcfb316SYuri Pankov return (-1);
52*efcfb316SYuri Pankov if (*ws1 != 0 && *ws2 == 0)
53*efcfb316SYuri Pankov return (1);
544297a3b0SGarrett D'Amore
556b5e5868SGarrett D'Amore /*
566b5e5868SGarrett D'Amore * Once upon a time we had code to try to optimize this, but
576b5e5868SGarrett D'Amore * it turns out that you really can't make many assumptions
586b5e5868SGarrett D'Amore * safely. You absolutely have to run this pass by pass,
596b5e5868SGarrett D'Amore * because some passes will be ignored for a given character,
606b5e5868SGarrett D'Amore * while others will not. Simpler locales will benefit from
616b5e5868SGarrett D'Amore * having fewer passes, and most comparisions should resolve
626b5e5868SGarrett D'Amore * during the primary pass anyway.
636b5e5868SGarrett D'Amore *
646b5e5868SGarrett D'Amore * Note that we do one final extra pass at the end to pick
656b5e5868SGarrett D'Amore * up UNDEFINED elements. There is special handling for them.
666b5e5868SGarrett D'Amore */
672d08521bSGarrett D'Amore for (pass = 0; pass <= lcc->lc_directive_count; pass++) {
682d08521bSGarrett D'Amore const int32_t *st1 = NULL;
692d08521bSGarrett D'Amore const int32_t *st2 = NULL;
706b5e5868SGarrett D'Amore const wchar_t *w1 = ws1;
716b5e5868SGarrett D'Amore const wchar_t *w2 = ws2;
726b5e5868SGarrett D'Amore
736b5e5868SGarrett D'Amore /* special pass for UNDEFINED */
742d08521bSGarrett D'Amore if (pass == lcc->lc_directive_count) {
75*efcfb316SYuri Pankov direc = DIRECTIVE_FORWARD;
766b5e5868SGarrett D'Amore } else {
772d08521bSGarrett D'Amore direc = lcc->lc_directive[pass];
786b5e5868SGarrett D'Amore }
796b5e5868SGarrett D'Amore
806b5e5868SGarrett D'Amore if (direc & DIRECTIVE_BACKWARD) {
816b5e5868SGarrett D'Amore wchar_t *bp, *fp, c;
82*efcfb316SYuri Pankov free(tr1);
836b5e5868SGarrett D'Amore if ((tr1 = wcsdup(w1)) == NULL)
84*efcfb316SYuri Pankov goto end;
856b5e5868SGarrett D'Amore bp = tr1;
866b5e5868SGarrett D'Amore fp = tr1 + wcslen(tr1) - 1;
876b5e5868SGarrett D'Amore while (bp < fp) {
886b5e5868SGarrett D'Amore c = *bp;
896b5e5868SGarrett D'Amore *bp++ = *fp;
906b5e5868SGarrett D'Amore *fp-- = c;
916b5e5868SGarrett D'Amore }
92*efcfb316SYuri Pankov free(tr2);
936b5e5868SGarrett D'Amore if ((tr2 = wcsdup(w2)) == NULL)
94*efcfb316SYuri Pankov goto end;
956b5e5868SGarrett D'Amore bp = tr2;
966b5e5868SGarrett D'Amore fp = tr2 + wcslen(tr2) - 1;
976b5e5868SGarrett D'Amore while (bp < fp) {
986b5e5868SGarrett D'Amore c = *bp;
996b5e5868SGarrett D'Amore *bp++ = *fp;
1006b5e5868SGarrett D'Amore *fp-- = c;
1016b5e5868SGarrett D'Amore }
1026b5e5868SGarrett D'Amore w1 = tr1;
1036b5e5868SGarrett D'Amore w2 = tr2;
1046b5e5868SGarrett D'Amore }
1056b5e5868SGarrett D'Amore
1066b5e5868SGarrett D'Amore if (direc & DIRECTIVE_POSITION) {
107*efcfb316SYuri Pankov int check1, check2;
108c8e81517SJohn Marino while (*w1 && *w2) {
1096b5e5868SGarrett D'Amore pri1 = pri2 = 0;
110c8e81517SJohn Marino check1 = check2 = 1;
111c8e81517SJohn Marino while ((pri1 == pri2) && (check1 || check2)) {
112c8e81517SJohn Marino if (check1) {
113c8e81517SJohn Marino _collate_lookup(lcc, w1, &len1,
114c8e81517SJohn Marino &pri1, pass, &st1);
115c8e81517SJohn Marino if (pri1 < 0) {
116c8e81517SJohn Marino errno = EINVAL;
117*efcfb316SYuri Pankov goto end;
118c8e81517SJohn Marino }
119c8e81517SJohn Marino if (!pri1) {
120c8e81517SJohn Marino /*CSTYLED*/
121c8e81517SJohn Marino pri1 = COLLATE_MAX_PRIORITY;
122c8e81517SJohn Marino st1 = NULL;
123c8e81517SJohn Marino }
124c8e81517SJohn Marino check1 = (st1 != NULL);
1256b5e5868SGarrett D'Amore }
126c8e81517SJohn Marino if (check2) {
127c8e81517SJohn Marino _collate_lookup(lcc, w2, &len2,
128c8e81517SJohn Marino &pri2, pass, &st2);
129c8e81517SJohn Marino if (pri2 < 0) {
130c8e81517SJohn Marino errno = EINVAL;
131*efcfb316SYuri Pankov goto end;
132c8e81517SJohn Marino }
133c8e81517SJohn Marino if (!pri2) {
134c8e81517SJohn Marino /*CSTYLED*/
135c8e81517SJohn Marino pri2 = COLLATE_MAX_PRIORITY;
136c8e81517SJohn Marino st2 = NULL;
137c8e81517SJohn Marino }
138c8e81517SJohn Marino check2 = (st2 != NULL);
1396b5e5868SGarrett D'Amore }
1406b5e5868SGarrett D'Amore }
1416b5e5868SGarrett D'Amore if (pri1 != pri2) {
1426b5e5868SGarrett D'Amore ret = pri1 - pri2;
1436b5e5868SGarrett D'Amore goto end;
1446b5e5868SGarrett D'Amore }
1456b5e5868SGarrett D'Amore w1 += len1;
1466b5e5868SGarrett D'Amore w2 += len2;
1476b5e5868SGarrett D'Amore }
148*efcfb316SYuri Pankov if (!*w1) {
149*efcfb316SYuri Pankov if (*w2) {
150*efcfb316SYuri Pankov ret = -(int)*w2;
151*efcfb316SYuri Pankov goto end;
152*efcfb316SYuri Pankov }
153*efcfb316SYuri Pankov } else {
154*efcfb316SYuri Pankov ret = *w1;
155*efcfb316SYuri Pankov goto end;
156*efcfb316SYuri Pankov }
1576b5e5868SGarrett D'Amore } else {
158*efcfb316SYuri Pankov int vpri1 = 0, vpri2 = 0;
159*efcfb316SYuri Pankov while (*w1 || *w2 || st1 || st2) {
160*efcfb316SYuri Pankov pri1 = 1;
161*efcfb316SYuri Pankov while (*w1 || st1) {
162*efcfb316SYuri Pankov _collate_lookup(lcc, w1, &len1, &pri1,
163*efcfb316SYuri Pankov pass, &st1);
164*efcfb316SYuri Pankov w1 += len1;
165*efcfb316SYuri Pankov if (pri1 > 0) {
166*efcfb316SYuri Pankov vpri1++;
167*efcfb316SYuri Pankov break;
1686b5e5868SGarrett D'Amore }
169*efcfb316SYuri Pankov if (pri1 < 0) {
170*efcfb316SYuri Pankov errno = EINVAL;
171*efcfb316SYuri Pankov goto end;
1726b5e5868SGarrett D'Amore }
173*efcfb316SYuri Pankov st1 = NULL;
174*efcfb316SYuri Pankov }
175*efcfb316SYuri Pankov pri2 = 1;
176*efcfb316SYuri Pankov while (*w2 || st2) {
177*efcfb316SYuri Pankov _collate_lookup(lcc, w2, &len2, &pri2,
178*efcfb316SYuri Pankov pass, &st2);
179*efcfb316SYuri Pankov w2 += len2;
180*efcfb316SYuri Pankov if (pri2 > 0) {
181*efcfb316SYuri Pankov vpri2++;
182c8e81517SJohn Marino break;
183*efcfb316SYuri Pankov }
184*efcfb316SYuri Pankov if (pri2 < 0) {
185*efcfb316SYuri Pankov errno = EINVAL;
186*efcfb316SYuri Pankov goto end;
187*efcfb316SYuri Pankov }
188*efcfb316SYuri Pankov st2 = NULL;
1896b5e5868SGarrett D'Amore }
190*efcfb316SYuri Pankov if ((!pri1 || !pri2) && (vpri1 == vpri2))
1916b5e5868SGarrett D'Amore break;
1926b5e5868SGarrett D'Amore if (pri1 != pri2) {
1936b5e5868SGarrett D'Amore ret = pri1 - pri2;
1946b5e5868SGarrett D'Amore goto end;
1956b5e5868SGarrett D'Amore }
1966b5e5868SGarrett D'Amore }
197*efcfb316SYuri Pankov if (vpri1 && !vpri2) {
198*efcfb316SYuri Pankov ret = 1;
199*efcfb316SYuri Pankov goto end;
200*efcfb316SYuri Pankov }
201*efcfb316SYuri Pankov if (!vpri1 && vpri2) {
202*efcfb316SYuri Pankov ret = -1;
2036b5e5868SGarrett D'Amore goto end;
2046b5e5868SGarrett D'Amore }
2056b5e5868SGarrett D'Amore }
2066b5e5868SGarrett D'Amore }
2076b5e5868SGarrett D'Amore ret = 0;
2086b5e5868SGarrett D'Amore
2096b5e5868SGarrett D'Amore end:
210c8e81517SJohn Marino free(tr1);
211c8e81517SJohn Marino free(tr2);
2124297a3b0SGarrett D'Amore
2136b5e5868SGarrett D'Amore return (ret);
2144297a3b0SGarrett D'Amore }
2152d08521bSGarrett D'Amore
2162d08521bSGarrett D'Amore int
wcscoll(const wchar_t * ws1,const wchar_t * ws2)2172d08521bSGarrett D'Amore wcscoll(const wchar_t *ws1, const wchar_t *ws2)
2182d08521bSGarrett D'Amore {
2192d08521bSGarrett D'Amore return (wcscoll_l(ws1, ws2, uselocale(NULL)));
2202d08521bSGarrett D'Amore }
221