xref: /illumos-gate/usr/src/lib/libc/port/locale/wcscoll.c (revision efcfb316)
14297a3b0SGarrett D'Amore /*
22d08521bSGarrett D'Amore  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3*efcfb316SYuri Pankov  * Copyright 2017 Nexenta Systems, Inc.
44297a3b0SGarrett D'Amore  * Copyright (c) 2002 Tim J. Robbins
54297a3b0SGarrett D'Amore  * All rights reserved.
64297a3b0SGarrett D'Amore  *
74297a3b0SGarrett D'Amore  * Redistribution and use in source and binary forms, with or without
84297a3b0SGarrett D'Amore  * modification, are permitted provided that the following conditions
94297a3b0SGarrett D'Amore  * are met:
104297a3b0SGarrett D'Amore  * 1. Redistributions of source code must retain the above copyright
114297a3b0SGarrett D'Amore  *    notice, this list of conditions and the following disclaimer.
124297a3b0SGarrett D'Amore  * 2. Redistributions in binary form must reproduce the above copyright
134297a3b0SGarrett D'Amore  *    notice, this list of conditions and the following disclaimer in the
144297a3b0SGarrett D'Amore  *    documentation and/or other materials provided with the distribution.
154297a3b0SGarrett D'Amore  *
164297a3b0SGarrett D'Amore  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
174297a3b0SGarrett D'Amore  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
184297a3b0SGarrett D'Amore  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
194297a3b0SGarrett D'Amore  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
204297a3b0SGarrett D'Amore  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
214297a3b0SGarrett D'Amore  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
224297a3b0SGarrett D'Amore  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
234297a3b0SGarrett D'Amore  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
244297a3b0SGarrett D'Amore  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
254297a3b0SGarrett D'Amore  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
264297a3b0SGarrett D'Amore  * SUCH DAMAGE.
274297a3b0SGarrett D'Amore  */
284297a3b0SGarrett D'Amore 
294297a3b0SGarrett D'Amore #include "lint.h"
304297a3b0SGarrett D'Amore #include <errno.h>
314297a3b0SGarrett D'Amore #include <stdlib.h>
324297a3b0SGarrett D'Amore #include <string.h>
334297a3b0SGarrett D'Amore #include <wchar.h>
346b5e5868SGarrett D'Amore #include <assert.h>
354297a3b0SGarrett D'Amore #include "collate.h"
362d08521bSGarrett D'Amore #include "localeimpl.h"
374297a3b0SGarrett D'Amore 
384297a3b0SGarrett D'Amore int
wcscoll_l(const wchar_t * ws1,const wchar_t * ws2,locale_t loc)392d08521bSGarrett D'Amore wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc)
404297a3b0SGarrett D'Amore {
41*efcfb316SYuri Pankov 	int len1, len2, pri1, pri2;
426b5e5868SGarrett D'Amore 	wchar_t *tr1 = NULL, *tr2 = NULL;
436b5e5868SGarrett D'Amore 	int direc, pass;
442d08521bSGarrett D'Amore 	const struct lc_collate *lcc = loc->collate;
45*efcfb316SYuri Pankov 	int ret = wcscmp(ws1, ws2);
464297a3b0SGarrett D'Amore 
47*efcfb316SYuri Pankov 	if (lcc->lc_is_posix || ret == 0)
48*efcfb316SYuri Pankov 		return (ret);
494297a3b0SGarrett D'Amore 
50*efcfb316SYuri Pankov 	if (*ws1 == 0 && *ws2 != 0)
51*efcfb316SYuri Pankov 		return (-1);
52*efcfb316SYuri Pankov 	if (*ws1 != 0 && *ws2 == 0)
53*efcfb316SYuri Pankov 		return (1);
544297a3b0SGarrett D'Amore 
556b5e5868SGarrett D'Amore 	/*
566b5e5868SGarrett D'Amore 	 * Once upon a time we had code to try to optimize this, but
576b5e5868SGarrett D'Amore 	 * it turns out that you really can't make many assumptions
586b5e5868SGarrett D'Amore 	 * safely.  You absolutely have to run this pass by pass,
596b5e5868SGarrett D'Amore 	 * because some passes will be ignored for a given character,
606b5e5868SGarrett D'Amore 	 * while others will not.  Simpler locales will benefit from
616b5e5868SGarrett D'Amore 	 * having fewer passes, and most comparisions should resolve
626b5e5868SGarrett D'Amore 	 * during the primary pass anyway.
636b5e5868SGarrett D'Amore 	 *
646b5e5868SGarrett D'Amore 	 * Note that we do one final extra pass at the end to pick
656b5e5868SGarrett D'Amore 	 * up UNDEFINED elements.  There is special handling for them.
666b5e5868SGarrett D'Amore 	 */
672d08521bSGarrett D'Amore 	for (pass = 0; pass <= lcc->lc_directive_count; pass++) {
682d08521bSGarrett D'Amore 		const int32_t *st1 = NULL;
692d08521bSGarrett D'Amore 		const int32_t *st2 = NULL;
706b5e5868SGarrett D'Amore 		const wchar_t	*w1 = ws1;
716b5e5868SGarrett D'Amore 		const wchar_t	*w2 = ws2;
726b5e5868SGarrett D'Amore 
736b5e5868SGarrett D'Amore 		/* special pass for UNDEFINED */
742d08521bSGarrett D'Amore 		if (pass == lcc->lc_directive_count) {
75*efcfb316SYuri Pankov 			direc = DIRECTIVE_FORWARD;
766b5e5868SGarrett D'Amore 		} else {
772d08521bSGarrett D'Amore 			direc = lcc->lc_directive[pass];
786b5e5868SGarrett D'Amore 		}
796b5e5868SGarrett D'Amore 
806b5e5868SGarrett D'Amore 		if (direc & DIRECTIVE_BACKWARD) {
816b5e5868SGarrett D'Amore 			wchar_t *bp, *fp, c;
82*efcfb316SYuri Pankov 			free(tr1);
836b5e5868SGarrett D'Amore 			if ((tr1 = wcsdup(w1)) == NULL)
84*efcfb316SYuri Pankov 				goto end;
856b5e5868SGarrett D'Amore 			bp = tr1;
866b5e5868SGarrett D'Amore 			fp = tr1 + wcslen(tr1) - 1;
876b5e5868SGarrett D'Amore 			while (bp < fp) {
886b5e5868SGarrett D'Amore 				c = *bp;
896b5e5868SGarrett D'Amore 				*bp++ = *fp;
906b5e5868SGarrett D'Amore 				*fp-- = c;
916b5e5868SGarrett D'Amore 			}
92*efcfb316SYuri Pankov 			free(tr2);
936b5e5868SGarrett D'Amore 			if ((tr2 = wcsdup(w2)) == NULL)
94*efcfb316SYuri Pankov 				goto end;
956b5e5868SGarrett D'Amore 			bp = tr2;
966b5e5868SGarrett D'Amore 			fp = tr2 + wcslen(tr2) - 1;
976b5e5868SGarrett D'Amore 			while (bp < fp) {
986b5e5868SGarrett D'Amore 				c = *bp;
996b5e5868SGarrett D'Amore 				*bp++ = *fp;
1006b5e5868SGarrett D'Amore 				*fp-- = c;
1016b5e5868SGarrett D'Amore 			}
1026b5e5868SGarrett D'Amore 			w1 = tr1;
1036b5e5868SGarrett D'Amore 			w2 = tr2;
1046b5e5868SGarrett D'Amore 		}
1056b5e5868SGarrett D'Amore 
1066b5e5868SGarrett D'Amore 		if (direc & DIRECTIVE_POSITION) {
107*efcfb316SYuri Pankov 			int check1, check2;
108c8e81517SJohn Marino 			while (*w1 && *w2) {
1096b5e5868SGarrett D'Amore 				pri1 = pri2 = 0;
110c8e81517SJohn Marino 				check1 = check2 = 1;
111c8e81517SJohn Marino 				while ((pri1 == pri2) && (check1 || check2)) {
112c8e81517SJohn Marino 					if (check1) {
113c8e81517SJohn Marino 						_collate_lookup(lcc, w1, &len1,
114c8e81517SJohn Marino 						    &pri1, pass, &st1);
115c8e81517SJohn Marino 						if (pri1 < 0) {
116c8e81517SJohn Marino 							errno = EINVAL;
117*efcfb316SYuri Pankov 							goto end;
118c8e81517SJohn Marino 						}
119c8e81517SJohn Marino 						if (!pri1) {
120c8e81517SJohn Marino 							/*CSTYLED*/
121c8e81517SJohn Marino 							pri1 = COLLATE_MAX_PRIORITY;
122c8e81517SJohn Marino 							st1 = NULL;
123c8e81517SJohn Marino 						}
124c8e81517SJohn Marino 						check1 = (st1 != NULL);
1256b5e5868SGarrett D'Amore 					}
126c8e81517SJohn Marino 					if (check2) {
127c8e81517SJohn Marino 						_collate_lookup(lcc, w2, &len2,
128c8e81517SJohn Marino 						    &pri2, pass, &st2);
129c8e81517SJohn Marino 						if (pri2 < 0) {
130c8e81517SJohn Marino 							errno = EINVAL;
131*efcfb316SYuri Pankov 							goto end;
132c8e81517SJohn Marino 						}
133c8e81517SJohn Marino 						if (!pri2) {
134c8e81517SJohn Marino 							/*CSTYLED*/
135c8e81517SJohn Marino 							pri2 = COLLATE_MAX_PRIORITY;
136c8e81517SJohn Marino 							st2 = NULL;
137c8e81517SJohn Marino 						}
138c8e81517SJohn Marino 						check2 = (st2 != NULL);
1396b5e5868SGarrett D'Amore 					}
1406b5e5868SGarrett D'Amore 				}
1416b5e5868SGarrett D'Amore 				if (pri1 != pri2) {
1426b5e5868SGarrett D'Amore 					ret = pri1 - pri2;
1436b5e5868SGarrett D'Amore 					goto end;
1446b5e5868SGarrett D'Amore 				}
1456b5e5868SGarrett D'Amore 				w1 += len1;
1466b5e5868SGarrett D'Amore 				w2 += len2;
1476b5e5868SGarrett D'Amore 			}
148*efcfb316SYuri Pankov 			if (!*w1) {
149*efcfb316SYuri Pankov 				if (*w2) {
150*efcfb316SYuri Pankov 					ret = -(int)*w2;
151*efcfb316SYuri Pankov 					goto end;
152*efcfb316SYuri Pankov 				}
153*efcfb316SYuri Pankov 			} else {
154*efcfb316SYuri Pankov 				ret = *w1;
155*efcfb316SYuri Pankov 				goto end;
156*efcfb316SYuri Pankov 			}
1576b5e5868SGarrett D'Amore 		} else {
158*efcfb316SYuri Pankov 			int vpri1 = 0, vpri2 = 0;
159*efcfb316SYuri Pankov 			while (*w1 || *w2 || st1 || st2) {
160*efcfb316SYuri Pankov 				pri1 = 1;
161*efcfb316SYuri Pankov 				while (*w1 || st1) {
162*efcfb316SYuri Pankov 					_collate_lookup(lcc, w1, &len1, &pri1,
163*efcfb316SYuri Pankov 					    pass, &st1);
164*efcfb316SYuri Pankov 					w1 += len1;
165*efcfb316SYuri Pankov 					if (pri1 > 0) {
166*efcfb316SYuri Pankov 						vpri1++;
167*efcfb316SYuri Pankov 						break;
1686b5e5868SGarrett D'Amore 					}
169*efcfb316SYuri Pankov 					if (pri1 < 0) {
170*efcfb316SYuri Pankov 						errno = EINVAL;
171*efcfb316SYuri Pankov 						goto end;
1726b5e5868SGarrett D'Amore 					}
173*efcfb316SYuri Pankov 					st1 = NULL;
174*efcfb316SYuri Pankov 				}
175*efcfb316SYuri Pankov 				pri2 = 1;
176*efcfb316SYuri Pankov 				while (*w2 || st2) {
177*efcfb316SYuri Pankov 					_collate_lookup(lcc, w2, &len2, &pri2,
178*efcfb316SYuri Pankov 					    pass, &st2);
179*efcfb316SYuri Pankov 					w2 += len2;
180*efcfb316SYuri Pankov 					if (pri2 > 0) {
181*efcfb316SYuri Pankov 						vpri2++;
182c8e81517SJohn Marino 						break;
183*efcfb316SYuri Pankov 					}
184*efcfb316SYuri Pankov 					if (pri2 < 0) {
185*efcfb316SYuri Pankov 						errno = EINVAL;
186*efcfb316SYuri Pankov 						goto end;
187*efcfb316SYuri Pankov 					}
188*efcfb316SYuri Pankov 					st2 = NULL;
1896b5e5868SGarrett D'Amore 				}
190*efcfb316SYuri Pankov 				if ((!pri1 || !pri2) && (vpri1 == vpri2))
1916b5e5868SGarrett D'Amore 					break;
1926b5e5868SGarrett D'Amore 				if (pri1 != pri2) {
1936b5e5868SGarrett D'Amore 					ret = pri1 - pri2;
1946b5e5868SGarrett D'Amore 					goto end;
1956b5e5868SGarrett D'Amore 				}
1966b5e5868SGarrett D'Amore 			}
197*efcfb316SYuri Pankov 			if (vpri1 && !vpri2) {
198*efcfb316SYuri Pankov 				ret = 1;
199*efcfb316SYuri Pankov 				goto end;
200*efcfb316SYuri Pankov 			}
201*efcfb316SYuri Pankov 			if (!vpri1 && vpri2) {
202*efcfb316SYuri Pankov 				ret = -1;
2036b5e5868SGarrett D'Amore 				goto end;
2046b5e5868SGarrett D'Amore 			}
2056b5e5868SGarrett D'Amore 		}
2066b5e5868SGarrett D'Amore 	}
2076b5e5868SGarrett D'Amore 	ret = 0;
2086b5e5868SGarrett D'Amore 
2096b5e5868SGarrett D'Amore end:
210c8e81517SJohn Marino 	free(tr1);
211c8e81517SJohn Marino 	free(tr2);
2124297a3b0SGarrett D'Amore 
2136b5e5868SGarrett D'Amore 	return (ret);
2144297a3b0SGarrett D'Amore }
2152d08521bSGarrett D'Amore 
2162d08521bSGarrett D'Amore int
wcscoll(const wchar_t * ws1,const wchar_t * ws2)2172d08521bSGarrett D'Amore wcscoll(const wchar_t *ws1, const wchar_t *ws2)
2182d08521bSGarrett D'Amore {
2192d08521bSGarrett D'Amore 	return (wcscoll_l(ws1, ws2, uselocale(NULL)));
2202d08521bSGarrett D'Amore }
221