/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 1999 by Sun Microsystems, Inc. * All rights reserved. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * UTF-8 encoded Unicode parsing routines. For efficiency, we convert * to wide chars only when absolutely needed. The following interfaces * are exported to libslp: * * slp_utf_strchr: same semantics as strchr, but handles UTF-8 strings * slp_fold_space: folds white space around and in between works; * handles UTF-8 strings * slp_strcasecmp: same semantics as strcasecmp, but also folds white * space and attempts locale-specific * case-insensitive comparisons. */ #include #include #include #include #include #include /* * Same semantics as strchr. * Assumes that we start on a char boundry, and that c is a 7-bit * ASCII char. */ char *slp_utf_strchr(const char *s, char c) { int len; char *p; for (p = (char *)s; *p; p += len) { len = mblen(p, MB_CUR_MAX); if (len == 1 && *p == c) return (p); } return (NULL); } /* * folds white space around and in between words. * " aa bb " becomes "aa bb". * returns NULL if it couldn't allocate memory. The caller must free * the result when done. */ static char *slp_fold_space(const char *s) { int len; char *folded, *f; if (!(folded = malloc(strlen(s) + 1))) { slp_err(LOG_CRIT, 0, "slp_fold_space", "out of memory"); return (NULL); } f = folded; for (;;) { /* step 1: skip white space */ for (; *s; s++) { len = mblen(s, MB_CUR_MAX); if (len != 1) break; if (!isspace(*s)) break; } if (!*s) { /* end of string */ *f = 0; return (folded); } /* if we are in between words, keep one space */ if (f != folded) *f++ = ' '; /* step 2: copy into folded until we hit more white space */ while (*s) { int i; len = mblen(s, MB_CUR_MAX); if (len == 1 && isspace(*s)) break; for (i = 0; i < len; i++) *f++ = *s++; } *f = *s; if (!*s++) return (folded); } } /* * performs like strcasecmp, but also folds white space before comparing, * and will handle UTF-8 comparisons (including case). Note that the * application's locale must have been set to a UTF-8 locale for this * to work properly. */ int slp_strcasecmp(const char *s1, const char *s2) { int diff = -1; char *p1, *p2; size_t wcslen1, wcslen2; wchar_t *wcs1, *wcs2; p1 = p2 = NULL; wcs1 = wcs2 = NULL; /* optimization: try simple case first */ if (strcasecmp(s1, s2) == 0) return (0); /* fold white space, and try again */ p1 = slp_fold_space(s1); p2 = slp_fold_space(s2); if (!p1 || !p2) goto cleanup; if ((diff = strcasecmp(p1, p2)) == 0) goto cleanup; /* * try converting to wide char -- we must be in a locale which * supports the UTF8 codeset for this to work. */ if ((wcslen1 = mbstowcs(NULL, p1, 0)) == (size_t)-1) goto cleanup; if (!(wcs1 = malloc(sizeof (*wcs1) * (wcslen1 + 1)))) { slp_err(LOG_CRIT, 0, "slp_strcasecmp", "out of memory"); goto cleanup; } if ((wcslen2 = mbstowcs(NULL, p2, 0)) == (size_t)-1) goto cleanup; if (!(wcs2 = malloc(sizeof (*wcs2) * (wcslen2 + 1)))) { slp_err(LOG_CRIT, 0, "slp_strcasecmp", "out of memory"); goto cleanup; } if (mbstowcs(wcs1, p1, wcslen1 + 1) == (size_t)-1) goto cleanup; if (mbstowcs(wcs2, p2, wcslen2 + 1) == (size_t)-1) goto cleanup; diff = wscasecmp(wcs1, wcs2); cleanup: if (p1) free(p1); if (p2) free(p2); if (wcs1) free(wcs1); if (wcs2) free(wcs2); return (diff); }