xref: /illumos-gate/usr/src/lib/libc/port/locale/strptime.c (revision 25f48f6755be2e1b122ca5e2e4030ed583fd600e)
1 /*
2  * Copyright 2011, Nexenta Systems, Inc.  All rights reserved.
3  * Copyright (c) 1994 Powerdog Industries.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * The views and conclusions contained in the software and documentation
30  * are those of the authors and should not be interpreted as representing
31  * official policies, either expressed or implied, of Powerdog Industries.
32  */
33 
34 #include "lint.h"
35 #include <time.h>
36 #include <ctype.h>
37 #include <errno.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <pthread.h>
41 #include <alloca.h>
42 #include "timelocal.h"
43 
44 #define	asizeof(a)	(sizeof (a) / sizeof ((a)[0]))
45 
46 #define	F_GMT		(1 << 0)
47 #define	F_ZERO		(1 << 1)
48 #define	F_RECURSE	(1 << 2)
49 
50 static char *
51 __strptime(const char *buf, const char *fmt, struct tm *tm, int *flagsp)
52 {
53 	char	c;
54 	const char *ptr;
55 	int	i, len, recurse = 0;
56 	int Ealternative, Oalternative;
57 	struct lc_time_T *tptr = __get_current_time_locale();
58 
59 	if (*flagsp & F_RECURSE)
60 		recurse = 1;
61 	*flagsp |= F_RECURSE;
62 
63 	if (*flagsp & F_ZERO)
64 		(void) memset(tm, 0, sizeof (*tm));
65 	*flagsp &= ~F_ZERO;
66 
67 	ptr = fmt;
68 	while (*ptr != 0) {
69 		if (*buf == 0)
70 			break;
71 
72 		c = *ptr++;
73 
74 		if (c != '%') {
75 			if (isspace(c))
76 				while (isspace(*buf))
77 					buf++;
78 			else if (c != *buf++)
79 				return (NULL);
80 			continue;
81 		}
82 
83 		Ealternative = 0;
84 		Oalternative = 0;
85 label:
86 		c = *ptr++;
87 		switch (c) {
88 		case 0:
89 		case '%':
90 			if (*buf++ != '%')
91 				return (NULL);
92 			break;
93 
94 		case '+':
95 			buf = __strptime(buf, tptr->date_fmt, tm, flagsp);
96 			if (buf == NULL)
97 				return (NULL);
98 			break;
99 
100 		case 'C':
101 			if (!isdigit(*buf))
102 				return (NULL);
103 
104 			/* XXX This will break for 3-digit centuries. */
105 			len = 2;
106 			for (i = 0; len && isdigit(*buf); buf++) {
107 				i *= 10;
108 				i += *buf - '0';
109 				len--;
110 			}
111 			if (i < 19)
112 				return (NULL);
113 
114 			tm->tm_year = i * 100 - 1900;
115 			break;
116 
117 		case 'c':
118 			buf = __strptime(buf, tptr->c_fmt, tm, flagsp);
119 			if (buf == NULL)
120 				return (NULL);
121 			break;
122 
123 		case 'D':
124 			buf = __strptime(buf, "%m/%d/%y", tm, flagsp);
125 			if (buf == NULL)
126 				return (NULL);
127 			break;
128 
129 		case 'E':
130 			if (Ealternative || Oalternative)
131 				break;
132 			Ealternative++;
133 			goto label;
134 
135 		case 'O':
136 			if (Ealternative || Oalternative)
137 				break;
138 			Oalternative++;
139 			goto label;
140 
141 		case 'F':
142 			buf = __strptime(buf, "%Y-%m-%d", tm, flagsp);
143 			if (buf == NULL)
144 				return (NULL);
145 			break;
146 
147 		case 'R':
148 			buf = __strptime(buf, "%H:%M", tm, flagsp);
149 			if (buf == NULL)
150 				return (NULL);
151 			break;
152 
153 		case 'r':
154 			buf = __strptime(buf, tptr->ampm_fmt, tm, flagsp);
155 			if (buf == NULL)
156 				return (NULL);
157 			break;
158 
159 		case 'T':
160 			buf = __strptime(buf, "%H:%M:%S", tm, flagsp);
161 			if (buf == NULL)
162 				return (NULL);
163 			break;
164 
165 		case 'X':
166 			buf = __strptime(buf, tptr->X_fmt, tm, flagsp);
167 			if (buf == NULL)
168 				return (NULL);
169 			break;
170 
171 		case 'x':
172 			buf = __strptime(buf, tptr->x_fmt, tm, flagsp);
173 			if (buf == NULL)
174 				return (NULL);
175 			break;
176 
177 		case 'j':
178 			if (!isdigit(*buf))
179 				return (NULL);
180 
181 			len = 3;
182 			for (i = 0; len && isdigit(*buf); buf++) {
183 				i *= 10;
184 				i += *buf - '0';
185 				len--;
186 			}
187 			if (i < 1 || i > 366)
188 				return (NULL);
189 
190 			tm->tm_yday = i - 1;
191 			break;
192 
193 		case 'M':
194 		case 'S':
195 			if (*buf == 0 || isspace(*buf))
196 				break;
197 
198 			if (!isdigit(*buf))
199 				return (NULL);
200 
201 			len = 2;
202 			for (i = 0; len && isdigit(*buf); buf++) {
203 				i *= 10;
204 				i += *buf - '0';
205 				len--;
206 			}
207 
208 			if (c == 'M') {
209 				if (i > 59)
210 					return (NULL);
211 				tm->tm_min = i;
212 			} else {
213 				if (i > 60)
214 					return (NULL);
215 				tm->tm_sec = i;
216 			}
217 
218 			if (isspace(*buf))
219 				while (*ptr != 0 && !isspace(*ptr))
220 					ptr++;
221 			break;
222 
223 		case 'H':
224 		case 'I':
225 		case 'k':
226 		case 'l':
227 			/*
228 			 * Of these, %l is the only specifier explicitly
229 			 * documented as not being zero-padded.  However,
230 			 * there is no harm in allowing zero-padding.
231 			 *
232 			 * XXX The %l specifier may gobble one too many
233 			 * digits if used incorrectly.
234 			 */
235 			if (!isdigit(*buf))
236 				return (NULL);
237 
238 			len = 2;
239 			for (i = 0; len && isdigit(*buf); buf++) {
240 				i *= 10;
241 				i += *buf - '0';
242 				len--;
243 			}
244 			if (c == 'H' || c == 'k') {
245 				if (i > 23)
246 					return (NULL);
247 			} else if (i > 12)
248 				return (NULL);
249 
250 			tm->tm_hour = i;
251 
252 			if (isspace(*buf))
253 				while (*ptr != 0 && !isspace(*ptr))
254 					ptr++;
255 			break;
256 
257 		case 'p':
258 			/*
259 			 * XXX This is bogus if parsed before hour-related
260 			 * specifiers.
261 			 */
262 			len = strlen(tptr->am);
263 			if (strncasecmp(buf, tptr->am, len) == 0) {
264 				if (tm->tm_hour > 12)
265 					return (NULL);
266 				if (tm->tm_hour == 12)
267 					tm->tm_hour = 0;
268 				buf += len;
269 				break;
270 			}
271 
272 			len = strlen(tptr->pm);
273 			if (strncasecmp(buf, tptr->pm, len) == 0) {
274 				if (tm->tm_hour > 12)
275 					return (NULL);
276 				if (tm->tm_hour != 12)
277 					tm->tm_hour += 12;
278 				buf += len;
279 				break;
280 			}
281 
282 			return (NULL);
283 
284 		case 'A':
285 		case 'a':
286 			for (i = 0; i < asizeof(tptr->weekday); i++) {
287 				len = strlen(tptr->weekday[i]);
288 				if (strncasecmp(buf, tptr->weekday[i], len) ==
289 				    0)
290 					break;
291 				len = strlen(tptr->wday[i]);
292 				if (strncasecmp(buf, tptr->wday[i], len) == 0)
293 					break;
294 			}
295 			if (i == asizeof(tptr->weekday))
296 				return (NULL);
297 
298 			tm->tm_wday = i;
299 			buf += len;
300 			break;
301 
302 		case 'U':
303 		case 'W':
304 			/*
305 			 * XXX This is bogus, as we can not assume any valid
306 			 * information present in the tm structure at this
307 			 * point to calculate a real value, so just check the
308 			 * range for now.
309 			 */
310 			if (!isdigit(*buf))
311 				return (NULL);
312 
313 			len = 2;
314 			for (i = 0; len && isdigit(*buf); buf++) {
315 				i *= 10;
316 				i += *buf - '0';
317 				len--;
318 			}
319 			if (i > 53)
320 				return (NULL);
321 
322 			if (isspace(*buf))
323 				while (*ptr != 0 && !isspace(*ptr))
324 					ptr++;
325 			break;
326 
327 		case 'w':
328 			if (!isdigit(*buf))
329 				return (NULL);
330 
331 			i = *buf - '0';
332 			if (i > 6)
333 				return (NULL);
334 
335 			tm->tm_wday = i;
336 
337 			if (isspace(*buf))
338 				while (*ptr != 0 && !isspace(*ptr))
339 					ptr++;
340 			break;
341 
342 		case 'e':
343 			/*
344 			 * The %e format has a space before single digits
345 			 * which we need to skip.
346 			 */
347 			if (isspace(*buf))
348 				buf++;
349 			/* FALLTHROUGH */
350 		case 'd':
351 			/*
352 			 * The %e specifier is explicitly documented as not
353 			 * being zero-padded but there is no harm in allowing
354 			 * such padding.
355 			 *
356 			 * XXX The %e specifier may gobble one too many
357 			 * digits if used incorrectly.
358 			 */
359 			if (!isdigit(*buf))
360 				return (NULL);
361 
362 			len = 2;
363 			for (i = 0; len && isdigit(*buf); buf++) {
364 				i *= 10;
365 				i += *buf - '0';
366 				len--;
367 			}
368 			if (i > 31)
369 				return (NULL);
370 
371 			tm->tm_mday = i;
372 
373 			if (isspace(*buf))
374 				while (*ptr != 0 && !isspace(*ptr))
375 					ptr++;
376 			break;
377 
378 		case 'B':
379 		case 'b':
380 		case 'h':
381 			for (i = 0; i < asizeof(tptr->month); i++) {
382 				len = strlen(tptr->month[i]);
383 				if (strncasecmp(buf, tptr->month[i], len) == 0)
384 					break;
385 			}
386 			/*
387 			 * Try the abbreviated month name if the full name
388 			 * wasn't found.
389 			 */
390 			if (i == asizeof(tptr->month)) {
391 				for (i = 0; i < asizeof(tptr->month); i++) {
392 					len = strlen(tptr->mon[i]);
393 					if (strncasecmp(buf, tptr->mon[i],
394 					    len) == 0)
395 						break;
396 				}
397 			}
398 			if (i == asizeof(tptr->month))
399 				return (NULL);
400 
401 			tm->tm_mon = i;
402 			buf += len;
403 			break;
404 
405 		case 'm':
406 			if (!isdigit(*buf))
407 				return (NULL);
408 
409 			len = 2;
410 			for (i = 0; len && isdigit(*buf); buf++) {
411 				i *= 10;
412 				i += *buf - '0';
413 				len--;
414 			}
415 			if (i < 1 || i > 12)
416 				return (NULL);
417 
418 			tm->tm_mon = i - 1;
419 
420 			if (isspace(*buf))
421 				while (*ptr != NULL && !isspace(*ptr))
422 					ptr++;
423 			break;
424 
425 		case 's':
426 			{
427 			char *cp;
428 			int sverrno;
429 			time_t t;
430 
431 			sverrno = errno;
432 			errno = 0;
433 			t = strtol(buf, &cp, 10);
434 			if (errno == ERANGE) {
435 				errno = sverrno;
436 				return (NULL);
437 			}
438 			errno = sverrno;
439 			buf = cp;
440 			(void) gmtime_r(&t, tm);
441 			*flagsp |= F_GMT;
442 			}
443 			break;
444 
445 		case 'Y':
446 		case 'y':
447 			if (*buf == NULL || isspace(*buf))
448 				break;
449 
450 			if (!isdigit(*buf))
451 				return (NULL);
452 
453 			len = (c == 'Y') ? 4 : 2;
454 			for (i = 0; len && isdigit(*buf); buf++) {
455 				i *= 10;
456 				i += *buf - '0';
457 				len--;
458 			}
459 			if (c == 'Y')
460 				i -= 1900;
461 			if (c == 'y' && i < 69)
462 				i += 100;
463 			if (i < 0)
464 				return (NULL);
465 
466 			tm->tm_year = i;
467 
468 			if (isspace(*buf))
469 				while (*ptr != 0 && !isspace(*ptr))
470 					ptr++;
471 			break;
472 
473 		case 'Z':
474 			{
475 			const char *cp = buf;
476 			char *zonestr;
477 
478 			while (isupper(*cp))
479 				++cp;
480 			if (cp - buf) {
481 				zonestr = alloca(cp - buf + 1);
482 				(void) strncpy(zonestr, buf, cp - buf);
483 				zonestr[cp - buf] = '\0';
484 				tzset();
485 				if (strcmp(zonestr, "GMT") == 0) {
486 					*flagsp |= F_GMT;
487 				} else if (0 == strcmp(zonestr, tzname[0])) {
488 					tm->tm_isdst = 0;
489 				} else if (0 == strcmp(zonestr, tzname[1])) {
490 					tm->tm_isdst = 1;
491 				} else {
492 					return (NULL);
493 				}
494 				buf += cp - buf;
495 			}
496 			}
497 			break;
498 
499 		case 'z':
500 			{
501 			int sign = 1;
502 
503 			if (*buf != '+') {
504 				if (*buf == '-')
505 					sign = -1;
506 				else
507 					return (NULL);
508 			}
509 			buf++;
510 			i = 0;
511 			for (len = 4; len > 0; len--) {
512 				if (!isdigit(*buf))
513 					return (NULL);
514 				i *= 10;
515 				i += *buf - '0';
516 				buf++;
517 			}
518 
519 			tm->tm_hour -= sign * (i / 100);
520 			tm->tm_min -= sign * (i % 100);
521 			*flagsp |= F_GMT;
522 			}
523 			break;
524 		}
525 	}
526 
527 	if (!recurse) {
528 		if (buf && (*flagsp & F_GMT)) {
529 			time_t t = timegm(tm);
530 			(void) localtime_r(&t, tm);
531 		}
532 	}
533 
534 	return ((char *)buf);
535 }
536 
537 char *
538 strptime(const char *buf, const char *fmt, struct tm *tm)
539 {
540 	int	flags = F_ZERO;
541 
542 	return (__strptime(buf, fmt, tm, &flags));
543 }
544 
545 /*
546  * This is used by Solaris, and is a variant that does not clear the
547  * incoming tm.  It is triggered by -D_STRPTIME_DONTZERO.
548  */
549 char *
550 __strptime_dontzero(const char *buf, const char *fmt, struct tm *tm)
551 {
552 	int	flags = 0;
553 
554 	return (__strptime(buf, fmt, tm, &flags));
555 }
556