xref: /illumos-gate/usr/src/common/util/string.c (revision da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Implementations of the functions described in vsnprintf(3C) and string(3C),
30  * for use by the kernel, the standalone, and kmdb.  Unless otherwise specified,
31  * these functions match the section 3C manpages.
32  */
33 
34 #include <sys/types.h>
35 #include <sys/varargs.h>
36 
37 #if defined(_KERNEL)
38 #include <sys/systm.h>
39 #include <sys/debug.h>
40 #elif !defined(_BOOT)
41 #include <string.h>
42 #endif
43 
44 #ifndef	NULL
45 #define	NULL	0l
46 #endif
47 
48 #include "memcpy.h"
49 #include "string.h"
50 
51 /*
52  * We don't need these for x86 boot or kmdb.
53  */
54 #if !defined(_KMDB) && (!defined(_BOOT) || defined(__sparc))
55 
56 #define	ADDCHAR(c)	if (bufp++ - buf < buflen) bufp[-1] = (c)
57 
58 /*
59  * Given a buffer 'buf' of size 'buflen', render as much of the string
60  * described by <fmt, args> as possible.  The string will always be
61  * null-terminated, so the maximum string length is 'buflen - 1'.
62  * Returns the number of bytes that would be necessary to render the
63  * entire string, not including null terminator (just like vsnprintf(3S)).
64  * To determine buffer size in advance, use vsnprintf(NULL, 0, fmt, args) + 1.
65  *
66  * There is no support for floating point, and the C locale is assumed.
67  */
68 size_t
69 vsnprintf(char *buf, size_t buflen, const char *fmt, va_list aargs)
70 {
71 	uint64_t ul, tmp;
72 	char *bufp = buf;	/* current buffer pointer */
73 	int pad, width, base, sign, c, num;
74 	int prec, h_count, l_count, dot_count;
75 	int pad_count, transfer_count, left_align;
76 	char *digits, *sp, *bs;
77 	char numbuf[65];	/* sufficient for a 64-bit binary value */
78 	va_list args;
79 
80 	/*
81 	 * Make a copy so that all our callers don't have to make a copy
82 	 */
83 	va_copy(args, aargs);
84 
85 	if ((ssize_t)buflen < 0)
86 		buflen = 0;
87 
88 	while ((c = *fmt++) != '\0') {
89 		if (c != '%') {
90 			ADDCHAR(c);
91 			continue;
92 		}
93 
94 		width = prec = 0;
95 		left_align = base = sign = 0;
96 		h_count = l_count = dot_count = 0;
97 		pad = ' ';
98 		digits = "0123456789abcdef";
99 next_fmt:
100 		if ((c = *fmt++) == '\0')
101 			break;
102 
103 		if (c >= 'A' && c <= 'Z') {
104 			c += 'a' - 'A';
105 			digits = "0123456789ABCDEF";
106 		}
107 
108 		switch (c) {
109 		case '-':
110 			left_align++;
111 			goto next_fmt;
112 		case '0':
113 			if (dot_count == 0)
114 				pad = '0';
115 			/*FALLTHROUGH*/
116 		case '1':
117 		case '2':
118 		case '3':
119 		case '4':
120 		case '5':
121 		case '6':
122 		case '7':
123 		case '8':
124 		case '9':
125 			num = 0;
126 			for (;;) {
127 				num = 10 * num + c - '0';
128 				c = *fmt;
129 				if (c < '0' || c > '9')
130 					break;
131 				else
132 					fmt++;
133 			}
134 			if (dot_count > 0)
135 				prec = num;
136 			else
137 				width = num;
138 
139 			goto next_fmt;
140 		case '.':
141 			dot_count++;
142 			goto next_fmt;
143 		case '*':
144 			if (dot_count > 0)
145 				prec = (int)va_arg(args, int);
146 			else
147 				width = (int)va_arg(args, int);
148 			goto next_fmt;
149 		case 'l':
150 			l_count++;
151 			goto next_fmt;
152 		case 'h':
153 			h_count++;
154 			goto next_fmt;
155 		case 'd':
156 			sign = 1;
157 			/*FALLTHROUGH*/
158 		case 'u':
159 			base = 10;
160 			break;
161 		case 'p':
162 			l_count = 1;
163 			/*FALLTHROUGH*/
164 		case 'x':
165 			base = 16;
166 			break;
167 		case 'o':
168 			base = 8;
169 			break;
170 		case 'b':
171 			l_count = 0;
172 			base = 1;
173 			break;
174 		case 'c':
175 			c = (char)va_arg(args, int);
176 			ADDCHAR(c);
177 			break;
178 		case 's':
179 			sp = va_arg(args, char *);
180 			if (sp == NULL) {
181 				sp = "<null string>";
182 				/* avoid truncation */
183 				prec = strlen(sp);
184 			}
185 			/*
186 			 * Handle simple case specially to avoid
187 			 * performance hit of strlen()
188 			 */
189 			if (prec == 0 && width == 0) {
190 				while ((c = *sp++) != 0)
191 					ADDCHAR(c);
192 				break;
193 			}
194 			transfer_count = strlen(sp);
195 			if (prec > 0) {
196 				/* trim string if too long */
197 				if (transfer_count > prec)
198 					transfer_count = prec;
199 				/* widen field if too narrow */
200 				if (prec > width)
201 					width = prec;
202 			}
203 			if (width > transfer_count)
204 				pad_count = width - transfer_count;
205 			else
206 				pad_count = 0;
207 			while ((!left_align) && (pad_count-- > 0))
208 				ADDCHAR(' ');
209 			/* ADDCHAR() evaluates arg at most once */
210 			while (transfer_count-- > 0)
211 				ADDCHAR(*sp++);
212 			while ((left_align) && (pad_count-- > 0))
213 				ADDCHAR(' ');
214 			break;
215 		case '%':
216 			ADDCHAR('%');
217 			break;
218 		}
219 
220 		if (base == 0)
221 			continue;
222 
223 		if (h_count == 0 && l_count == 0)
224 			if (sign)
225 				ul = (int64_t)va_arg(args, int);
226 			else
227 				ul = (int64_t)va_arg(args, unsigned int);
228 		else if (l_count > 1)
229 			if (sign)
230 				ul = (int64_t)va_arg(args, int64_t);
231 			else
232 				ul = (int64_t)va_arg(args, uint64_t);
233 		else if (l_count > 0)
234 			if (sign)
235 				ul = (int64_t)va_arg(args, long);
236 			else
237 				ul = (int64_t)va_arg(args, unsigned long);
238 		else if (h_count > 1)
239 			if (sign)
240 				ul = (int64_t)((char)va_arg(args, int));
241 			else
242 				ul = (int64_t)((unsigned char)va_arg(args,
243 				    int));
244 		else if (h_count > 0)
245 			if (sign)
246 				ul = (int64_t)((short)va_arg(args, int));
247 			else
248 				ul = (int64_t)((unsigned short)va_arg(args,
249 				    int));
250 
251 		if (sign && (int64_t)ul < 0)
252 			ul = -ul;
253 		else
254 			sign = 0;
255 
256 		if (c == 'b') {
257 			bs = va_arg(args, char *);
258 			base = *bs++;
259 		}
260 
261 		/* avoid repeated division if width is 0 */
262 		if (width > 0) {
263 			tmp = ul;
264 			do {
265 				width--;
266 			} while ((tmp /= base) != 0);
267 		}
268 
269 		if (sign && pad == '0')
270 			ADDCHAR('-');
271 		while (width-- > sign)
272 			ADDCHAR(pad);
273 		if (sign && pad == ' ')
274 			ADDCHAR('-');
275 
276 		sp = numbuf;
277 		tmp = ul;
278 		do {
279 			*sp++ = digits[tmp % base];
280 		} while ((tmp /= base) != 0);
281 
282 		while (sp > numbuf) {
283 			sp--;
284 			ADDCHAR(*sp);
285 		}
286 
287 		if (c == 'b' && ul != 0) {
288 			int any = 0;
289 			c = *bs++;
290 			while (c != 0) {
291 				if (ul & (1 << (c - 1))) {
292 					if (any++ == 0)
293 						ADDCHAR('<');
294 					while ((c = *bs++) >= 32)
295 						ADDCHAR(c);
296 					ADDCHAR(',');
297 				} else {
298 					while ((c = *bs++) >= 32)
299 						continue;
300 				}
301 			}
302 			if (any) {
303 				bufp--;
304 				ADDCHAR('>');
305 			}
306 		}
307 	}
308 	if (bufp - buf < buflen)
309 		bufp[0] = c;
310 	else if (buflen != 0)
311 		buf[buflen - 1] = c;
312 
313 	va_end(args);
314 
315 	return (bufp - buf);
316 }
317 
318 /*PRINTFLIKE1*/
319 size_t
320 snprintf(char *buf, size_t buflen, const char *fmt, ...)
321 {
322 	va_list args;
323 
324 	va_start(args, fmt);
325 	buflen = vsnprintf(buf, buflen, fmt, args);
326 	va_end(args);
327 
328 	return (buflen);
329 }
330 
331 #if defined(_BOOT) && defined(__sparc)
332 /*
333  * The sprintf() and vsprintf() routines aren't shared with the kernel because
334  * the DDI mandates that they return the buffer rather than its length.
335  */
336 /*PRINTFLIKE2*/
337 int
338 sprintf(char *buf, const char *fmt, ...)
339 {
340 	va_list args;
341 
342 	va_start(args, fmt);
343 	(void) vsnprintf(buf, INT_MAX, fmt, args);
344 	va_end(args);
345 
346 	return (strlen(buf));
347 }
348 
349 int
350 vsprintf(char *buf, const char *fmt, va_list args)
351 {
352 	(void) vsnprintf(buf, INT_MAX, fmt, args);
353 	return (strlen(buf));
354 }
355 #endif /* _BOOT && __sparc */
356 
357 #endif /* !_KMDB && (!_BOOT || __sparc) */
358 
359 char *
360 strcat(char *s1, const char *s2)
361 {
362 	char *os1 = s1;
363 
364 	while (*s1++ != '\0')
365 		;
366 	s1--;
367 	while ((*s1++ = *s2++) != '\0')
368 		;
369 	return (os1);
370 }
371 
372 char *
373 strchr(const char *sp, int c)
374 {
375 	do {
376 		if (*sp == (char)c)
377 			return ((char *)sp);
378 	} while (*sp++);
379 	return (NULL);
380 }
381 
382 int
383 strcmp(const char *s1, const char *s2)
384 {
385 	while (*s1 == *s2++)
386 		if (*s1++ == '\0')
387 			return (0);
388 	return (*(unsigned char *)s1 - *(unsigned char *)--s2);
389 }
390 
391 int
392 strncmp(const char *s1, const char *s2, size_t n)
393 {
394 	if (s1 == s2)
395 		return (0);
396 	n++;
397 	while (--n != 0 && *s1 == *s2++)
398 		if (*s1++ == '\0')
399 			return (0);
400 	return ((n == 0) ? 0 : *(unsigned char *)s1 - *(unsigned char *)--s2);
401 }
402 
403 static const char charmap[] = {
404 	'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
405 	'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
406 	'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
407 	'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
408 	'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
409 	'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
410 	'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
411 	'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
412 	'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
413 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
414 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
415 	'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
416 	'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
417 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
418 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
419 	'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
420 	'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
421 	'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
422 	'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
423 	'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
424 	'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
425 	'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
426 	'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
427 	'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
428 	'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
429 	'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
430 	'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
431 	'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
432 	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
433 	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
434 	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
435 	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
436 };
437 
438 int
439 strcasecmp(const char *s1, const char *s2)
440 {
441 	const unsigned char *cm = (const unsigned char *)charmap;
442 	const unsigned char *us1 = (const unsigned char *)s1;
443 	const unsigned char *us2 = (const unsigned char *)s2;
444 
445 	while (cm[*us1] == cm[*us2++])
446 		if (*us1++ == '\0')
447 			return (0);
448 	return (cm[*us1] - cm[*(us2 - 1)]);
449 }
450 
451 int
452 strncasecmp(const char *s1, const char *s2, size_t n)
453 {
454 	const unsigned char *cm = (const unsigned char *)charmap;
455 	const unsigned char *us1 = (const unsigned char *)s1;
456 	const unsigned char *us2 = (const unsigned char *)s2;
457 
458 	while (n != 0 && cm[*us1] == cm[*us2++]) {
459 		if (*us1++ == '\0')
460 			return (0);
461 		n--;
462 	}
463 	return (n == 0 ? 0 : cm[*us1] - cm[*(us2 - 1)]);
464 }
465 
466 char *
467 strcpy(char *s1, const char *s2)
468 {
469 	char *os1 = s1;
470 
471 	while ((*s1++ = *s2++) != '\0')
472 		;
473 	return (os1);
474 }
475 
476 char *
477 strncpy(char *s1, const char *s2, size_t n)
478 {
479 	char *os1 = s1;
480 
481 	n++;
482 	while (--n != 0 && (*s1++ = *s2++) != '\0')
483 		;
484 	if (n != 0)
485 		while (--n != 0)
486 			*s1++ = '\0';
487 	return (os1);
488 }
489 
490 char *
491 strrchr(const char *sp, int c)
492 {
493 	char *r = NULL;
494 
495 	do {
496 		if (*sp == (char)c)
497 			r = (char *)sp;
498 	} while (*sp++);
499 
500 	return (r);
501 }
502 
503 char *
504 strstr(const char *as1, const char *as2)
505 {
506 	const char *s1, *s2;
507 	const char *tptr;
508 	char c;
509 
510 	s1 = as1;
511 	s2 = as2;
512 
513 	if (s2 == NULL || *s2 == '\0')
514 		return ((char *)s1);
515 	c = *s2;
516 
517 	while (*s1)
518 		if (*s1++ == c) {
519 			tptr = s1;
520 			while ((c = *++s2) == *s1++ && c)
521 				;
522 			if (c == 0)
523 				return ((char *)tptr - 1);
524 			s1 = tptr;
525 			s2 = as2;
526 			c = *s2;
527 		}
528 
529 	return (NULL);
530 }
531 
532 char *
533 strpbrk(const char *string, const char *brkset)
534 {
535 	const char *p;
536 
537 	do {
538 		for (p = brkset; *p != '\0' && *p != *string; ++p)
539 			;
540 		if (*p != '\0')
541 			return ((char *)string);
542 	} while (*string++);
543 
544 	return (NULL);
545 }
546 
547 char *
548 strncat(char *s1, const char *s2, size_t n)
549 {
550 	char *os1 = s1;
551 
552 	n++;
553 	while (*s1++ != '\0')
554 		;
555 	--s1;
556 	while ((*s1++ = *s2++) != '\0') {
557 		if (--n == 0) {
558 			s1[-1] = '\0';
559 			break;
560 		}
561 	}
562 	return (os1);
563 }
564 
565 #if defined(_BOOT) || defined(_KMDB)
566 #define	bcopy(src, dst, n)	(void) memcpy((dst), (src), (n))
567 #endif
568 
569 size_t
570 strlcat(char *dst, const char *src, size_t dstsize)
571 {
572 	char *df = dst;
573 	size_t left = dstsize;
574 	size_t l1;
575 	size_t l2 = strlen(src);
576 	size_t copied;
577 
578 	while (left-- != 0 && *df != '\0')
579 		df++;
580 	/*LINTED: possible ptrdiff_t overflow*/
581 	l1 = (size_t)(df - dst);
582 	if (dstsize == l1)
583 		return (l1 + l2);
584 
585 	copied = l1 + l2 >= dstsize ? dstsize - l1 - 1 : l2;
586 	bcopy(src, dst + l1, copied);
587 	dst[l1+copied] = '\0';
588 	return (l1 + l2);
589 }
590 
591 size_t
592 strlcpy(char *dst, const char *src, size_t len)
593 {
594 	size_t slen = strlen(src);
595 	size_t copied;
596 
597 	if (len == 0)
598 		return (slen);
599 
600 	if (slen >= len)
601 		copied = len - 1;
602 	else
603 		copied = slen;
604 	bcopy(src, dst, copied);
605 	dst[copied] = '\0';
606 	return (slen);
607 }
608 
609 size_t
610 strspn(const char *string, const char *charset)
611 {
612 	const char *p, *q;
613 
614 	for (q = string; *q != '\0'; ++q) {
615 		for (p = charset; *p != '\0' && *p != *q; ++p)
616 			;
617 		if (*p == '\0')
618 			break;
619 	}
620 
621 	/*LINTED: possible ptrdiff_t overflow*/
622 	return ((size_t)(q - string));
623 }
624 
625 size_t
626 strcspn(const char *string, const char *charset)
627 {
628 	const char *p, *q;
629 
630 	for (q = string; *q != '\0'; ++q) {
631 		for (p = charset; *p != '\0' && *p != *q; ++p)
632 			;
633 		if (*p != '\0')
634 			break;
635 	}
636 
637 	/*LINTED E_PTRDIFF_OVERFLOW*/
638 	return ((size_t)(q - string));
639 }
640 
641 /*
642  * strsep
643  *
644  * The strsep() function locates, in the string referenced by *stringp, the
645  * first occurrence of any character in the string delim (or the terminating
646  * `\0' character) and replaces it with a `\0'.  The location of the next
647  * character after the delimiter character (or NULL, if the end of the
648  * string was reached) is stored in *stringp.  The original value of
649  * *stringp is returned.
650  *
651  * If *stringp is initially NULL, strsep() returns NULL.
652  */
653 char *
654 strsep(char **stringp, const char *delim)
655 {
656 	char *s;
657 	const char *spanp;
658 	int c, sc;
659 	char *tok;
660 
661 	if ((s = *stringp) == NULL)
662 		return (NULL);
663 
664 	for (tok = s; ; ) {
665 		c = *s++;
666 		spanp = delim;
667 		do {
668 			if ((sc = *spanp++) == c) {
669 				if (c == 0)
670 					s = NULL;
671 				else
672 					s[-1] = 0;
673 				*stringp = s;
674 				return (tok);
675 			}
676 		} while (sc != 0);
677 	}
678 	/* NOTREACHED */
679 }
680 
681 /*
682  * Unless mentioned otherwise, all of the routines below should be added to
683  * the Solaris DDI as necessary.  For now, only provide them to standalone.
684  */
685 #if defined(_BOOT) || defined(_KMDB)
686 char *
687 strtok(char *string, const char *sepset)
688 {
689 	char		*p, *q, *r;
690 	static char	*savept;
691 
692 	/*
693 	 * Set `p' to our current location in the string.
694 	 */
695 	p = (string == NULL) ? savept : string;
696 	if (p == NULL)
697 		return (NULL);
698 
699 	/*
700 	 * Skip leading separators; bail if no tokens remain.
701 	 */
702 	q = p + strspn(p, sepset);
703 	if (*q == '\0')
704 		return (NULL);
705 
706 	/*
707 	 * Mark the end of the token and set `savept' for the next iteration.
708 	 */
709 	if ((r = strpbrk(q, sepset)) == NULL)
710 		savept = NULL;
711 	else {
712 		*r = '\0';
713 		savept = ++r;
714 	}
715 
716 	return (q);
717 }
718 
719 /*
720  * The strlen() routine isn't shared with the kernel because it has its own
721  * hand-tuned assembly version.
722  */
723 size_t
724 strlen(const char *s)
725 {
726 	size_t n = 0;
727 
728 	while (*s++)
729 		n++;
730 	return (n);
731 }
732 
733 #endif /* _BOOT || _KMDB */
734 
735 #ifdef _KERNEL
736 /*
737  * Check for a valid C identifier:
738  *	a letter or underscore, followed by
739  *	zero or more letters, digits and underscores.
740  */
741 
742 #define	IS_DIGIT(c)	((c) >= '0' && (c) <= '9')
743 
744 #define	IS_ALPHA(c)	\
745 	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
746 
747 int
748 strident_valid(const char *id)
749 {
750 	int c = *id++;
751 
752 	if (!IS_ALPHA(c) && c != '_')
753 		return (0);
754 	while ((c = *id++) != 0) {
755 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
756 			return (0);
757 	}
758 	return (1);
759 }
760 
761 /*
762  * Convert a string into a valid C identifier by replacing invalid
763  * characters with '_'.  Also makes sure the string is nul-terminated
764  * and takes up at most n bytes.
765  */
766 void
767 strident_canon(char *s, size_t n)
768 {
769 	char c;
770 	char *end = s + n - 1;
771 
772 	ASSERT(n > 0);
773 
774 	if ((c = *s) == 0)
775 		return;
776 
777 	if (!IS_ALPHA(c) && c != '_')
778 		*s = '_';
779 
780 	while (s < end && ((c = *(++s)) != 0)) {
781 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
782 			*s = '_';
783 	}
784 	*s = 0;
785 }
786 
787 #endif	/* _KERNEL */
788