xref: /illumos-gate/usr/src/common/util/string.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Implementations of the functions described in vsnprintf(3C) and string(3C),
31  * for use by the kernel, the standalone, and kmdb.  Unless otherwise specified,
32  * these functions match the section 3C manpages.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/varargs.h>
37 #if defined(_BOOT) || defined(_KMDB)
38 #include <string.h>
39 #else
40 #include <sys/systm.h>
41 #endif
42 #ifdef _KERNEL
43 #include <sys/debug.h>
44 #endif	/* _KERNEL */
45 
46 /*
47  * kmdb has its own *printf routines, and thus doesn't need these versions too.
48  */
49 #if !defined(_KMDB)
50 
51 #define	ADDCHAR(c)	if (bufp++ - buf < buflen) bufp[-1] = (c)
52 
53 /*
54  * Given a buffer 'buf' of size 'buflen', render as much of the string
55  * described by <fmt, args> as possible.  The string will always be
56  * null-terminated, so the maximum string length is 'buflen - 1'.
57  * Returns the number of bytes that would be necessary to render the
58  * entire string, not including null terminator (just like vsnprintf(3S)).
59  * To determine buffer size in advance, use vsnprintf(NULL, 0, fmt, args) + 1.
60  *
61  * There is no support for floating point, and the C locale is assumed.
62  */
63 size_t
64 vsnprintf(char *buf, size_t buflen, const char *fmt, va_list aargs)
65 {
66 	uint64_t ul, tmp;
67 	char *bufp = buf;	/* current buffer pointer */
68 	int pad, width, ells, base, sign, c;
69 	char *digits, *sp, *bs;
70 	char numbuf[65];	/* sufficient for a 64-bit binary value */
71 	va_list args;
72 
73 	/*
74 	 * Make a copy so that all our callers don't have to make a copy
75 	 */
76 	va_copy(args, aargs);
77 
78 	if ((ssize_t)buflen < 0)
79 		buflen = 0;
80 
81 	while ((c = *fmt++) != '\0') {
82 		if (c != '%') {
83 			ADDCHAR(c);
84 			continue;
85 		}
86 
87 		if ((c = *fmt++) == '\0')
88 			break;
89 
90 		for (pad = ' '; c == '0'; c = *fmt++)
91 			pad = '0';
92 
93 		for (width = 0; c >= '0' && c <= '9'; c = *fmt++)
94 			width = width * 10 + c - '0';
95 
96 		for (ells = 0; c == 'l'; c = *fmt++)
97 			ells++;
98 
99 		digits = "0123456789abcdef";
100 
101 		if (c >= 'A' && c <= 'Z') {
102 			c += 'a' - 'A';
103 			digits = "0123456789ABCDEF";
104 		}
105 
106 		base = sign = 0;
107 
108 		switch (c) {
109 		case 'd':
110 			sign = 1;
111 			/*FALLTHROUGH*/
112 		case 'u':
113 			base = 10;
114 			break;
115 		case 'p':
116 			ells = 1;
117 			/*FALLTHROUGH*/
118 		case 'x':
119 			base = 16;
120 			break;
121 		case 'o':
122 			base = 8;
123 			break;
124 		case 'b':
125 			ells = 0;
126 			base = 1;
127 			break;
128 		case 'c':
129 			ul = (int64_t)va_arg(args, int);
130 			ADDCHAR((int)ul & 0xff);
131 			break;
132 		case 's':
133 			sp = va_arg(args, char *);
134 			if (sp == NULL)
135 				sp = "<null string>";
136 			while ((c = *sp++) != 0)
137 				ADDCHAR(c);
138 			break;
139 		case '%':
140 			ADDCHAR('%');
141 			break;
142 		}
143 
144 		if (base == 0)
145 			continue;
146 
147 		if (ells == 0)
148 			ul = (int64_t)va_arg(args, int);
149 		else if (ells == 1)
150 			ul = (int64_t)va_arg(args, long);
151 		else
152 			ul = (int64_t)va_arg(args, int64_t);
153 
154 		if (sign && (int64_t)ul < 0)
155 			ul = -ul;
156 		else
157 			sign = 0;
158 
159 		if (ells < 8 / sizeof (long))
160 			ul &= 0xffffffffU;
161 
162 		if (c == 'b') {
163 			bs = va_arg(args, char *);
164 			base = *bs++;
165 		}
166 
167 		tmp = ul;
168 		do {
169 			width--;
170 		} while ((tmp /= base) != 0);
171 
172 		if (sign && pad == '0')
173 			ADDCHAR('-');
174 		while (width-- > sign)
175 			ADDCHAR(pad);
176 		if (sign && pad == ' ')
177 			ADDCHAR('-');
178 
179 		sp = numbuf;
180 		tmp = ul;
181 		do {
182 			*sp++ = digits[tmp % base];
183 		} while ((tmp /= base) != 0);
184 
185 		while (sp > numbuf) {
186 			sp--;
187 			ADDCHAR(*sp);
188 		}
189 
190 		if (c == 'b' && ul != 0) {
191 			int any = 0;
192 			c = *bs++;
193 			while (c != 0) {
194 				if (ul & (1 << (c - 1))) {
195 					if (any++ == 0)
196 						ADDCHAR('<');
197 					while ((c = *bs++) >= 32)
198 						ADDCHAR(c);
199 					ADDCHAR(',');
200 				} else {
201 					while ((c = *bs++) >= 32)
202 						continue;
203 				}
204 			}
205 			if (any) {
206 				bufp--;
207 				ADDCHAR('>');
208 			}
209 		}
210 	}
211 	if (bufp - buf < buflen)
212 		bufp[0] = c;
213 	else if (buflen != 0)
214 		buf[buflen - 1] = c;
215 
216 	va_end(args);
217 
218 	return (bufp - buf);
219 }
220 
221 /*PRINTFLIKE1*/
222 size_t
223 snprintf(char *buf, size_t buflen, const char *fmt, ...)
224 {
225 	va_list args;
226 
227 	va_start(args, fmt);
228 	buflen = vsnprintf(buf, buflen, fmt, args);
229 	va_end(args);
230 
231 	return (buflen);
232 }
233 
234 #if defined(_BOOT)
235 /*
236  * The sprintf() and vsprintf() routines aren't shared with the kernel because
237  * the DDI mandates that they return the buffer rather than its length.
238  */
239 /*PRINTFLIKE2*/
240 int
241 sprintf(char *buf, const char *fmt, ...)
242 {
243 	va_list args;
244 
245 	va_start(args, fmt);
246 	(void) vsnprintf(buf, INT_MAX, fmt, args);
247 	va_end(args);
248 
249 	return (strlen(buf));
250 }
251 
252 int
253 vsprintf(char *buf, const char *fmt, va_list args)
254 {
255 	(void) vsnprintf(buf, INT_MAX, fmt, args);
256 	return (strlen(buf));
257 }
258 #endif
259 
260 #endif /* !_KMDB */
261 
262 char *
263 strcat(char *s1, const char *s2)
264 {
265 	char *os1 = s1;
266 
267 	while (*s1++ != '\0')
268 		;
269 	s1--;
270 	while ((*s1++ = *s2++) != '\0')
271 		;
272 	return (os1);
273 }
274 
275 char *
276 strchr(const char *sp, int c)
277 {
278 	do {
279 		if (*sp == (char)c)
280 			return ((char *)sp);
281 	} while (*sp++);
282 	return (NULL);
283 }
284 
285 int
286 strcmp(const char *s1, const char *s2)
287 {
288 	while (*s1 == *s2++)
289 		if (*s1++ == '\0')
290 			return (0);
291 	return (*(unsigned char *)s1 - *(unsigned char *)--s2);
292 }
293 
294 int
295 strncmp(const char *s1, const char *s2, size_t n)
296 {
297 	if (s1 == s2)
298 		return (0);
299 	n++;
300 	while (--n != 0 && *s1 == *s2++)
301 		if (*s1++ == '\0')
302 			return (0);
303 	return ((n == 0) ? 0 : *(unsigned char *)s1 - *(unsigned char *)--s2);
304 }
305 
306 static const char charmap[] = {
307 	'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
308 	'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
309 	'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
310 	'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
311 	'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
312 	'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
313 	'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
314 	'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
315 	'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
316 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
317 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
318 	'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
319 	'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
320 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
321 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
322 	'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
323 	'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
324 	'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
325 	'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
326 	'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
327 	'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
328 	'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
329 	'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
330 	'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
331 	'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
332 	'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
333 	'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
334 	'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
335 	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
336 	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
337 	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
338 	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
339 };
340 
341 int
342 strcasecmp(const char *s1, const char *s2)
343 {
344 	const unsigned char *cm = (const unsigned char *)charmap;
345 	const unsigned char *us1 = (const unsigned char *)s1;
346 	const unsigned char *us2 = (const unsigned char *)s2;
347 
348 	while (cm[*us1] == cm[*us2++])
349 		if (*us1++ == '\0')
350 			return (0);
351 	return (cm[*us1] - cm[*(us2 - 1)]);
352 }
353 
354 int
355 strncasecmp(const char *s1, const char *s2, size_t n)
356 {
357 	const unsigned char *cm = (const unsigned char *)charmap;
358 	const unsigned char *us1 = (const unsigned char *)s1;
359 	const unsigned char *us2 = (const unsigned char *)s2;
360 
361 	while (n != 0 && cm[*us1] == cm[*us2++]) {
362 		if (*us1++ == '\0')
363 			return (0);
364 		n--;
365 	}
366 	return (n == 0 ? 0 : cm[*us1] - cm[*(us2 - 1)]);
367 }
368 
369 char *
370 strcpy(char *s1, const char *s2)
371 {
372 	char *os1 = s1;
373 
374 	while ((*s1++ = *s2++) != '\0')
375 		;
376 	return (os1);
377 }
378 
379 char *
380 strncpy(char *s1, const char *s2, size_t n)
381 {
382 	char *os1 = s1;
383 
384 	n++;
385 	while (--n != 0 && (*s1++ = *s2++) != '\0')
386 		;
387 	if (n != 0)
388 		while (--n != 0)
389 			*s1++ = '\0';
390 	return (os1);
391 }
392 
393 char *
394 strrchr(const char *sp, int c)
395 {
396 	char *r = NULL;
397 
398 	do {
399 		if (*sp == (char)c)
400 			r = (char *)sp;
401 	} while (*sp++);
402 
403 	return (r);
404 }
405 
406 char *
407 strstr(const char *as1, const char *as2)
408 {
409 	const char *s1, *s2;
410 	const char *tptr;
411 	char c;
412 
413 	s1 = as1;
414 	s2 = as2;
415 
416 	if (s2 == NULL || *s2 == '\0')
417 		return ((char *)s1);
418 	c = *s2;
419 
420 	while (*s1)
421 		if (*s1++ == c) {
422 			tptr = s1;
423 			while ((c = *++s2) == *s1++ && c)
424 				;
425 			if (c == 0)
426 				return ((char *)tptr - 1);
427 			s1 = tptr;
428 			s2 = as2;
429 			c = *s2;
430 		}
431 
432 	return (NULL);
433 }
434 
435 char *
436 strpbrk(const char *string, const char *brkset)
437 {
438 	const char *p;
439 
440 	do {
441 		for (p = brkset; *p != '\0' && *p != *string; ++p)
442 			;
443 		if (*p != '\0')
444 			return ((char *)string);
445 	} while (*string++);
446 
447 	return (NULL);
448 }
449 
450 char *
451 strncat(char *s1, const char *s2, size_t n)
452 {
453 	char *os1 = s1;
454 
455 	n++;
456 	while (*s1++ != '\0')
457 		;
458 	--s1;
459 	while ((*s1++ = *s2++) != '\0') {
460 		if (--n == 0) {
461 			s1[-1] = '\0';
462 			break;
463 		}
464 	}
465 	return (os1);
466 }
467 
468 #if defined(_BOOT) || defined(_KMDB)
469 #define	bcopy(src, dst, n)	(void) memcpy((dst), (src), (n))
470 #endif
471 
472 size_t
473 strlcat(char *dst, const char *src, size_t dstsize)
474 {
475 	char *df = dst;
476 	size_t left = dstsize;
477 	size_t l1;
478 	size_t l2 = strlen(src);
479 	size_t copied;
480 
481 	while (left-- != 0 && *df != '\0')
482 		df++;
483 	l1 = df - dst;
484 	if (dstsize == l1)
485 		return (l1 + l2);
486 
487 	copied = l1 + l2 >= dstsize ? dstsize - l1 - 1 : l2;
488 	bcopy(src, dst + l1, copied);
489 	dst[l1+copied] = '\0';
490 	return (l1 + l2);
491 }
492 
493 size_t
494 strlcpy(char *dst, const char *src, size_t len)
495 {
496 	size_t slen = strlen(src);
497 	size_t copied;
498 
499 	if (len == 0)
500 		return (slen);
501 
502 	if (slen >= len)
503 		copied = len - 1;
504 	else
505 		copied = slen;
506 	bcopy(src, dst, copied);
507 	dst[copied] = '\0';
508 	return (slen);
509 }
510 
511 size_t
512 strspn(const char *string, const char *charset)
513 {
514 	const char *p, *q;
515 
516 	for (q = string; *q != '\0'; ++q) {
517 		for (p = charset; *p != '\0' && *p != *q; ++p)
518 			;
519 		if (*p == '\0')
520 			break;
521 	}
522 
523 	return (q - string);
524 }
525 
526 /*
527  * Unless mentioned otherwise, all of the routines below should be added to
528  * the Solaris DDI as necessary.  For now, only provide them to standalone.
529  */
530 #if defined(_BOOT) || defined(_KMDB)
531 char *
532 strtok(char *string, const char *sepset)
533 {
534 	char		*p, *q, *r;
535 	static char	*savept;
536 
537 	/*
538 	 * Set `p' to our current location in the string.
539 	 */
540 	p = (string == NULL) ? savept : string;
541 	if (p == NULL)
542 		return (NULL);
543 
544 	/*
545 	 * Skip leading separators; bail if no tokens remain.
546 	 */
547 	q = p + strspn(p, sepset);
548 	if (*q == '\0')
549 		return (NULL);
550 
551 	/*
552 	 * Mark the end of the token and set `savept' for the next iteration.
553 	 */
554 	if ((r = strpbrk(q, sepset)) == NULL)
555 		savept = NULL;
556 	else {
557 		*r = '\0';
558 		savept = ++r;
559 	}
560 
561 	return (q);
562 }
563 
564 /*
565  * The strlen() routine isn't shared with the kernel because it has its own
566  * hand-tuned assembly version.
567  */
568 size_t
569 strlen(const char *s)
570 {
571 	size_t n = 0;
572 
573 	while (*s++)
574 		n++;
575 	return (n);
576 }
577 
578 #endif /* _BOOT || _KMDB */
579 
580 #ifdef _KERNEL
581 /*
582  * Check for a valid C identifier:
583  *	a letter or underscore, followed by
584  *	zero or more letters, digits and underscores.
585  */
586 
587 #define	IS_DIGIT(c)	((c) >= '0' && (c) <= '9')
588 
589 #define	IS_ALPHA(c)	\
590 	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
591 
592 int
593 strident_valid(const char *id)
594 {
595 	int c = *id++;
596 
597 	if (!IS_ALPHA(c) && c != '_')
598 		return (0);
599 	while ((c = *id++) != 0) {
600 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
601 			return (0);
602 	}
603 	return (1);
604 }
605 
606 /*
607  * Convert a string into a valid C identifier by replacing invalid
608  * characters with '_'.  Also makes sure the string is nul-terminated
609  * and takes up at most n bytes.
610  */
611 void
612 strident_canon(char *s, size_t n)
613 {
614 	char c;
615 	char *end = s + n - 1;
616 
617 	ASSERT(n > 0);
618 
619 	if ((c = *s) == 0)
620 		return;
621 
622 	if (!IS_ALPHA(c) && c != '_')
623 		*s = '_';
624 
625 	while (s < end && ((c = *(++s)) != 0)) {
626 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
627 			*s = '_';
628 	}
629 	*s = 0;
630 }
631 
632 #endif	/* _KERNEL */
633