xref: /illumos-gate/usr/src/common/util/string.c (revision fc77c2683a49f56e308f395ccaa03e52265d8c2d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Implementations of the functions described in vsnprintf(3C) and string(3C),
31  * for use by the kernel, the standalone, and kmdb.  Unless otherwise specified,
32  * these functions match the section 3C manpages.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/varargs.h>
37 #if defined(_BOOT) || defined(_KMDB)
38 #include <string.h>
39 #else
40 #include <sys/systm.h>
41 #endif
42 #ifdef _KERNEL
43 #include <sys/debug.h>
44 #endif	/* _KERNEL */
45 
46 /*
47  * kmdb has its own *printf routines, and thus doesn't need these versions too.
48  */
49 #if !defined(_KMDB)
50 
51 #define	ADDCHAR(c)	if (bufp++ - buf < buflen) bufp[-1] = (c)
52 
53 /*
54  * Given a buffer 'buf' of size 'buflen', render as much of the string
55  * described by <fmt, args> as possible.  The string will always be
56  * null-terminated, so the maximum string length is 'buflen - 1'.
57  * Returns the number of bytes that would be necessary to render the
58  * entire string, not including null terminator (just like vsnprintf(3S)).
59  * To determine buffer size in advance, use vsnprintf(NULL, 0, fmt, args) + 1.
60  *
61  * There is no support for floating point, and the C locale is assumed.
62  */
63 size_t
64 vsnprintf(char *buf, size_t buflen, const char *fmt, va_list aargs)
65 {
66 	uint64_t ul, tmp;
67 	char *bufp = buf;	/* current buffer pointer */
68 	int pad, width, base, sign, c, num;
69 	int prec, h_count, l_count, dot_count;
70 	int pad_count, transfer_count, left_align;
71 	char *digits, *sp, *bs;
72 	char numbuf[65];	/* sufficient for a 64-bit binary value */
73 	va_list args;
74 
75 	/*
76 	 * Make a copy so that all our callers don't have to make a copy
77 	 */
78 	va_copy(args, aargs);
79 
80 	if ((ssize_t)buflen < 0)
81 		buflen = 0;
82 
83 	while ((c = *fmt++) != '\0') {
84 		if (c != '%') {
85 			ADDCHAR(c);
86 			continue;
87 		}
88 
89 		width = prec = 0;
90 		left_align = base = sign = 0;
91 		h_count = l_count = dot_count = 0;
92 		pad = ' ';
93 		digits = "0123456789abcdef";
94 next_fmt:
95 		if ((c = *fmt++) == '\0')
96 			break;
97 
98 		if (c >= 'A' && c <= 'Z') {
99 			c += 'a' - 'A';
100 			digits = "0123456789ABCDEF";
101 		}
102 
103 		switch (c) {
104 		case '-':
105 			left_align++;
106 			goto next_fmt;
107 		case '0':
108 			if (dot_count == 0)
109 				pad = '0';
110 			/*FALLTHROUGH*/
111 		case '1':
112 		case '2':
113 		case '3':
114 		case '4':
115 		case '5':
116 		case '6':
117 		case '7':
118 		case '8':
119 		case '9':
120 			num = 0;
121 			for (;;) {
122 				num = 10 * num + c - '0';
123 				c = *fmt;
124 				if (c < '0' || c > '9')
125 					break;
126 				else
127 					fmt++;
128 			}
129 			if (dot_count > 0)
130 				prec = num;
131 			else
132 				width = num;
133 
134 			goto next_fmt;
135 		case '.':
136 			dot_count++;
137 			goto next_fmt;
138 		case '*':
139 			width = (int)va_arg(args, int);
140 			goto next_fmt;
141 		case 'l':
142 			l_count++;
143 			goto next_fmt;
144 		case 'h':
145 			h_count++;
146 			goto next_fmt;
147 		case 'd':
148 			sign = 1;
149 			/*FALLTHROUGH*/
150 		case 'u':
151 			base = 10;
152 			break;
153 		case 'p':
154 			l_count = 1;
155 			/*FALLTHROUGH*/
156 		case 'x':
157 			base = 16;
158 			break;
159 		case 'o':
160 			base = 8;
161 			break;
162 		case 'b':
163 			l_count = 0;
164 			base = 1;
165 			break;
166 		case 'c':
167 			c = (char)va_arg(args, int);
168 			ADDCHAR(c);
169 			break;
170 		case 's':
171 			sp = va_arg(args, char *);
172 			if (sp == NULL) {
173 				sp = "<null string>";
174 				/* avoid truncation */
175 				prec = strlen(sp);
176 			}
177 			/*
178 			 * Handle simple case specially to avoid
179 			 * performance hit of strlen()
180 			 */
181 			if (prec == 0 && width == 0) {
182 				while ((c = *sp++) != 0)
183 					ADDCHAR(c);
184 				break;
185 			}
186 			transfer_count = strlen(sp);
187 			if (prec > 0) {
188 				/* trim string if too long */
189 				if (transfer_count > prec)
190 					transfer_count = prec;
191 				/* widen field if too narrow */
192 				if (prec > width)
193 					width = prec;
194 			}
195 			if (width > transfer_count)
196 				pad_count = width - transfer_count;
197 			else
198 				pad_count = 0;
199 			while ((!left_align) && (pad_count-- > 0))
200 				ADDCHAR(' ');
201 			/* ADDCHAR() evaluates arg at most once */
202 			while (transfer_count-- > 0)
203 				ADDCHAR(*sp++);
204 			while ((left_align) && (pad_count-- > 0))
205 				ADDCHAR(' ');
206 			break;
207 		case '%':
208 			ADDCHAR('%');
209 			break;
210 		}
211 
212 		if (base == 0)
213 			continue;
214 
215 		if (h_count == 0 && l_count == 0)
216 			if (sign)
217 				ul = (int64_t)va_arg(args, int);
218 			else
219 				ul = (int64_t)va_arg(args, unsigned int);
220 		else if (l_count > 1)
221 			if (sign)
222 				ul = (int64_t)va_arg(args, int64_t);
223 			else
224 				ul = (int64_t)va_arg(args, uint64_t);
225 		else if (l_count > 0)
226 			if (sign)
227 				ul = (int64_t)va_arg(args, long);
228 			else
229 				ul = (int64_t)va_arg(args, unsigned long);
230 		else if (h_count > 1)
231 			if (sign)
232 				ul = (int64_t)((char)va_arg(args, int));
233 			else
234 				ul = (int64_t)((unsigned char)va_arg(args,
235 				    int));
236 		else if (h_count > 0)
237 			if (sign)
238 				ul = (int64_t)((short)va_arg(args, int));
239 			else
240 				ul = (int64_t)((unsigned short)va_arg(args,
241 				    int));
242 
243 		if (sign && (int64_t)ul < 0)
244 			ul = -ul;
245 		else
246 			sign = 0;
247 
248 		if (c == 'b') {
249 			bs = va_arg(args, char *);
250 			base = *bs++;
251 		}
252 
253 		/* avoid repeated division if width is 0 */
254 		if (width > 0) {
255 			tmp = ul;
256 			do {
257 				width--;
258 			} while ((tmp /= base) != 0);
259 		}
260 
261 		if (sign && pad == '0')
262 			ADDCHAR('-');
263 		while (width-- > sign)
264 			ADDCHAR(pad);
265 		if (sign && pad == ' ')
266 			ADDCHAR('-');
267 
268 		sp = numbuf;
269 		tmp = ul;
270 		do {
271 			*sp++ = digits[tmp % base];
272 		} while ((tmp /= base) != 0);
273 
274 		while (sp > numbuf) {
275 			sp--;
276 			ADDCHAR(*sp);
277 		}
278 
279 		if (c == 'b' && ul != 0) {
280 			int any = 0;
281 			c = *bs++;
282 			while (c != 0) {
283 				if (ul & (1 << (c - 1))) {
284 					if (any++ == 0)
285 						ADDCHAR('<');
286 					while ((c = *bs++) >= 32)
287 						ADDCHAR(c);
288 					ADDCHAR(',');
289 				} else {
290 					while ((c = *bs++) >= 32)
291 						continue;
292 				}
293 			}
294 			if (any) {
295 				bufp--;
296 				ADDCHAR('>');
297 			}
298 		}
299 	}
300 	if (bufp - buf < buflen)
301 		bufp[0] = c;
302 	else if (buflen != 0)
303 		buf[buflen - 1] = c;
304 
305 	va_end(args);
306 
307 	return (bufp - buf);
308 }
309 
310 /*PRINTFLIKE1*/
311 size_t
312 snprintf(char *buf, size_t buflen, const char *fmt, ...)
313 {
314 	va_list args;
315 
316 	va_start(args, fmt);
317 	buflen = vsnprintf(buf, buflen, fmt, args);
318 	va_end(args);
319 
320 	return (buflen);
321 }
322 
323 #if defined(_BOOT)
324 /*
325  * The sprintf() and vsprintf() routines aren't shared with the kernel because
326  * the DDI mandates that they return the buffer rather than its length.
327  */
328 /*PRINTFLIKE2*/
329 int
330 sprintf(char *buf, const char *fmt, ...)
331 {
332 	va_list args;
333 
334 	va_start(args, fmt);
335 	(void) vsnprintf(buf, INT_MAX, fmt, args);
336 	va_end(args);
337 
338 	return (strlen(buf));
339 }
340 
341 int
342 vsprintf(char *buf, const char *fmt, va_list args)
343 {
344 	(void) vsnprintf(buf, INT_MAX, fmt, args);
345 	return (strlen(buf));
346 }
347 #endif
348 
349 #endif /* !_KMDB */
350 
351 char *
352 strcat(char *s1, const char *s2)
353 {
354 	char *os1 = s1;
355 
356 	while (*s1++ != '\0')
357 		;
358 	s1--;
359 	while ((*s1++ = *s2++) != '\0')
360 		;
361 	return (os1);
362 }
363 
364 char *
365 strchr(const char *sp, int c)
366 {
367 	do {
368 		if (*sp == (char)c)
369 			return ((char *)sp);
370 	} while (*sp++);
371 	return (NULL);
372 }
373 
374 int
375 strcmp(const char *s1, const char *s2)
376 {
377 	while (*s1 == *s2++)
378 		if (*s1++ == '\0')
379 			return (0);
380 	return (*(unsigned char *)s1 - *(unsigned char *)--s2);
381 }
382 
383 int
384 strncmp(const char *s1, const char *s2, size_t n)
385 {
386 	if (s1 == s2)
387 		return (0);
388 	n++;
389 	while (--n != 0 && *s1 == *s2++)
390 		if (*s1++ == '\0')
391 			return (0);
392 	return ((n == 0) ? 0 : *(unsigned char *)s1 - *(unsigned char *)--s2);
393 }
394 
395 static const char charmap[] = {
396 	'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
397 	'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
398 	'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
399 	'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
400 	'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
401 	'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
402 	'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
403 	'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
404 	'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
405 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
406 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
407 	'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
408 	'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
409 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
410 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
411 	'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
412 	'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
413 	'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
414 	'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
415 	'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
416 	'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
417 	'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
418 	'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
419 	'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
420 	'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
421 	'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
422 	'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
423 	'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
424 	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
425 	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
426 	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
427 	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
428 };
429 
430 int
431 strcasecmp(const char *s1, const char *s2)
432 {
433 	const unsigned char *cm = (const unsigned char *)charmap;
434 	const unsigned char *us1 = (const unsigned char *)s1;
435 	const unsigned char *us2 = (const unsigned char *)s2;
436 
437 	while (cm[*us1] == cm[*us2++])
438 		if (*us1++ == '\0')
439 			return (0);
440 	return (cm[*us1] - cm[*(us2 - 1)]);
441 }
442 
443 int
444 strncasecmp(const char *s1, const char *s2, size_t n)
445 {
446 	const unsigned char *cm = (const unsigned char *)charmap;
447 	const unsigned char *us1 = (const unsigned char *)s1;
448 	const unsigned char *us2 = (const unsigned char *)s2;
449 
450 	while (n != 0 && cm[*us1] == cm[*us2++]) {
451 		if (*us1++ == '\0')
452 			return (0);
453 		n--;
454 	}
455 	return (n == 0 ? 0 : cm[*us1] - cm[*(us2 - 1)]);
456 }
457 
458 char *
459 strcpy(char *s1, const char *s2)
460 {
461 	char *os1 = s1;
462 
463 	while ((*s1++ = *s2++) != '\0')
464 		;
465 	return (os1);
466 }
467 
468 char *
469 strncpy(char *s1, const char *s2, size_t n)
470 {
471 	char *os1 = s1;
472 
473 	n++;
474 	while (--n != 0 && (*s1++ = *s2++) != '\0')
475 		;
476 	if (n != 0)
477 		while (--n != 0)
478 			*s1++ = '\0';
479 	return (os1);
480 }
481 
482 char *
483 strrchr(const char *sp, int c)
484 {
485 	char *r = NULL;
486 
487 	do {
488 		if (*sp == (char)c)
489 			r = (char *)sp;
490 	} while (*sp++);
491 
492 	return (r);
493 }
494 
495 char *
496 strstr(const char *as1, const char *as2)
497 {
498 	const char *s1, *s2;
499 	const char *tptr;
500 	char c;
501 
502 	s1 = as1;
503 	s2 = as2;
504 
505 	if (s2 == NULL || *s2 == '\0')
506 		return ((char *)s1);
507 	c = *s2;
508 
509 	while (*s1)
510 		if (*s1++ == c) {
511 			tptr = s1;
512 			while ((c = *++s2) == *s1++ && c)
513 				;
514 			if (c == 0)
515 				return ((char *)tptr - 1);
516 			s1 = tptr;
517 			s2 = as2;
518 			c = *s2;
519 		}
520 
521 	return (NULL);
522 }
523 
524 char *
525 strpbrk(const char *string, const char *brkset)
526 {
527 	const char *p;
528 
529 	do {
530 		for (p = brkset; *p != '\0' && *p != *string; ++p)
531 			;
532 		if (*p != '\0')
533 			return ((char *)string);
534 	} while (*string++);
535 
536 	return (NULL);
537 }
538 
539 char *
540 strncat(char *s1, const char *s2, size_t n)
541 {
542 	char *os1 = s1;
543 
544 	n++;
545 	while (*s1++ != '\0')
546 		;
547 	--s1;
548 	while ((*s1++ = *s2++) != '\0') {
549 		if (--n == 0) {
550 			s1[-1] = '\0';
551 			break;
552 		}
553 	}
554 	return (os1);
555 }
556 
557 #if defined(_BOOT) || defined(_KMDB)
558 #define	bcopy(src, dst, n)	(void) memcpy((dst), (src), (n))
559 #endif
560 
561 size_t
562 strlcat(char *dst, const char *src, size_t dstsize)
563 {
564 	char *df = dst;
565 	size_t left = dstsize;
566 	size_t l1;
567 	size_t l2 = strlen(src);
568 	size_t copied;
569 
570 	while (left-- != 0 && *df != '\0')
571 		df++;
572 	l1 = df - dst;
573 	if (dstsize == l1)
574 		return (l1 + l2);
575 
576 	copied = l1 + l2 >= dstsize ? dstsize - l1 - 1 : l2;
577 	bcopy(src, dst + l1, copied);
578 	dst[l1+copied] = '\0';
579 	return (l1 + l2);
580 }
581 
582 size_t
583 strlcpy(char *dst, const char *src, size_t len)
584 {
585 	size_t slen = strlen(src);
586 	size_t copied;
587 
588 	if (len == 0)
589 		return (slen);
590 
591 	if (slen >= len)
592 		copied = len - 1;
593 	else
594 		copied = slen;
595 	bcopy(src, dst, copied);
596 	dst[copied] = '\0';
597 	return (slen);
598 }
599 
600 size_t
601 strspn(const char *string, const char *charset)
602 {
603 	const char *p, *q;
604 
605 	for (q = string; *q != '\0'; ++q) {
606 		for (p = charset; *p != '\0' && *p != *q; ++p)
607 			;
608 		if (*p == '\0')
609 			break;
610 	}
611 
612 	return (q - string);
613 }
614 
615 /*
616  * Unless mentioned otherwise, all of the routines below should be added to
617  * the Solaris DDI as necessary.  For now, only provide them to standalone.
618  */
619 #if defined(_BOOT) || defined(_KMDB)
620 char *
621 strtok(char *string, const char *sepset)
622 {
623 	char		*p, *q, *r;
624 	static char	*savept;
625 
626 	/*
627 	 * Set `p' to our current location in the string.
628 	 */
629 	p = (string == NULL) ? savept : string;
630 	if (p == NULL)
631 		return (NULL);
632 
633 	/*
634 	 * Skip leading separators; bail if no tokens remain.
635 	 */
636 	q = p + strspn(p, sepset);
637 	if (*q == '\0')
638 		return (NULL);
639 
640 	/*
641 	 * Mark the end of the token and set `savept' for the next iteration.
642 	 */
643 	if ((r = strpbrk(q, sepset)) == NULL)
644 		savept = NULL;
645 	else {
646 		*r = '\0';
647 		savept = ++r;
648 	}
649 
650 	return (q);
651 }
652 
653 /*
654  * The strlen() routine isn't shared with the kernel because it has its own
655  * hand-tuned assembly version.
656  */
657 size_t
658 strlen(const char *s)
659 {
660 	size_t n = 0;
661 
662 	while (*s++)
663 		n++;
664 	return (n);
665 }
666 
667 #endif /* _BOOT || _KMDB */
668 
669 #ifdef _KERNEL
670 /*
671  * Check for a valid C identifier:
672  *	a letter or underscore, followed by
673  *	zero or more letters, digits and underscores.
674  */
675 
676 #define	IS_DIGIT(c)	((c) >= '0' && (c) <= '9')
677 
678 #define	IS_ALPHA(c)	\
679 	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
680 
681 int
682 strident_valid(const char *id)
683 {
684 	int c = *id++;
685 
686 	if (!IS_ALPHA(c) && c != '_')
687 		return (0);
688 	while ((c = *id++) != 0) {
689 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
690 			return (0);
691 	}
692 	return (1);
693 }
694 
695 /*
696  * Convert a string into a valid C identifier by replacing invalid
697  * characters with '_'.  Also makes sure the string is nul-terminated
698  * and takes up at most n bytes.
699  */
700 void
701 strident_canon(char *s, size_t n)
702 {
703 	char c;
704 	char *end = s + n - 1;
705 
706 	ASSERT(n > 0);
707 
708 	if ((c = *s) == 0)
709 		return;
710 
711 	if (!IS_ALPHA(c) && c != '_')
712 		*s = '_';
713 
714 	while (s < end && ((c = *(++s)) != 0)) {
715 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
716 			*s = '_';
717 	}
718 	*s = 0;
719 }
720 
721 #endif	/* _KERNEL */
722