1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This file contains the common part of the functions string_to_decimal, 31 * func_to_decimal, and file_to_decimal. Much of this code has been dupli- 32 * cated in wstring_to_decimal (see wstod.c) with some simplifications and 33 * appropriate modifications for wide characters. DO NOT fix a bug here 34 * without fixing the same bug in wstring_to_decimal, if it applies. 35 * 36 * The code below makes the following assumptions. 37 * 38 * 1. The first six parameters to the function are declared with the 39 * following names and types: 40 * 41 * char **ppc; 42 * int nmax; 43 * int fortran_conventions; 44 * decimal_record *pd; 45 * enum decimal_string_form *pform; 46 * char **pechar; 47 * 48 * 2. Before this file is #included, the following variables have been 49 * defined and initialized as shown: 50 * 51 * char *cp; 52 * char *good = *ppc - 1; 53 * int current; 54 * int nread; 55 * 56 * If the first character can be read successfully, then current is set 57 * to the value of the first character, cp is set to *ppc, (char)current 58 * is stored at *cp, and nread = 1. If the first character cannot be 59 * read successfully, then current = EOF and nread = 0. 60 * 61 * 3. The macro NEXT is defined to expand to code that implements 62 * the following logic: 63 * 64 * if (nread < nmax) { 65 * current = <next character>; 66 * if (current != EOF) { 67 * *++cp = (char)current; 68 * nread++; 69 * } 70 * } else 71 * current = EOF; 72 * 73 * Note that nread always reflects the number of characters successfully 74 * read, the buffer pointed to by *ppc gets filled only with characters 75 * that have been successfully read, and cp always points to the location 76 * in the buffer that was filled by the last character successfully read. 77 * current == EOF if and only if we can't read any more, either because 78 * we've reached the end of the input file or the buffer is full (i.e., 79 * we've read nmax characters). 80 * 81 * 4. After this file is #included, the following variables may be used 82 * and will have the specified values: 83 * 84 * *ppc, *pd, *pform, and *pechar will be set as documented in the 85 * manual page; 86 * nmax and fortran_conventions will be unchanged; 87 * nread will be the number of characters actually read; 88 * cp will point to the last character actually read, provided at least 89 * one character was read successfully (in which case cp >= *ppc). 90 */ 91 92 #define UCASE(c) ((('a' <= c) && (c <= 'z'))? c - 32 : c) 93 94 #define NZDIGIT(c) (('1' <= c && c <= '9') || ((int)form < 0 && \ 95 (('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')))) 96 97 { 98 static const char *infstring = "INFINITY"; 99 static const char *nanstring = "NAN"; 100 101 int sigfound, spacefound = 0; 102 int ids = 0; 103 int i, agree; 104 int nzbp = 0; /* number of zeros before point */ 105 int nzap = 0; /* number of zeros after point */ 106 char decpt; 107 int nfast, nfastlimit; 108 char *pfast; 109 int e, esign; 110 int expshift = 0; 111 enum decimal_string_form form; 112 113 /* 114 * This routine assumes that the radix point is a single 115 * ASCII character, so that following this assignment, the 116 * condition (current == decpt) will correctly detect it. 117 */ 118 if (fortran_conventions > 0) 119 decpt = '.'; 120 else 121 decpt = *(localeconv()->decimal_point); 122 123 /* input is invalid until we find something */ 124 pd->fpclass = fp_signaling; 125 pd->sign = 0; 126 pd->exponent = 0; 127 pd->ds[0] = '\0'; 128 pd->more = 0; 129 pd->ndigits = 0; 130 *pform = form = invalid_form; 131 *pechar = NULL; 132 133 /* skip white space */ 134 while (isspace(current)) { 135 spacefound = 1; 136 NEXT; 137 } 138 139 if (fortran_conventions >= 2 && spacefound) { 140 /* 141 * We found at least one white space character. For 142 * Fortran formatted input, accept this; if we don't 143 * find anything else, we'll interpret it as a valid zero. 144 */ 145 pd->fpclass = fp_zero; 146 form = whitespace_form; 147 sigfound = 0; /* 0 = only zeros found so far */ 148 if (current == EOF) { 149 good = cp; 150 goto done; 151 } else { 152 good = cp - 1; 153 } 154 } else { 155 sigfound = -1; /* -1 = no digits found yet */ 156 } 157 158 /* look for optional leading sign */ 159 if (current == '+') { 160 NEXT; 161 } else if (current == '-') { 162 pd->sign = 1; 163 NEXT; 164 } 165 166 /* 167 * Admissible first non-white-space, non-sign characters are 168 * 0-9, i, I, n, N, or the radix point. 169 */ 170 if ('1' <= current && current <= '9') { 171 good = cp; 172 pd->fpclass = fp_normal; 173 form = fixed_int_form; 174 sigfound = 1; /* 1 = significant digits found */ 175 pd->ds[ids++] = (char)current; 176 NEXT; 177 } else { 178 switch (current) { 179 case ' ': 180 if (fortran_conventions < 2) 181 goto done; 182 /* 183 * When fortran_conventions >= 2, treat leading 184 * blanks the same as leading zeroes. 185 */ 186 /*FALLTHRU*/ 187 188 case '0': 189 /* 190 * Accept the leading zero and set pd->fpclass 191 * accordingly, but don't set sigfound until we 192 * determine that this isn't a "fake" hex string 193 * (i.e., 0x.p...). 194 */ 195 good = cp; 196 pd->fpclass = fp_zero; 197 if (fortran_conventions < 0) { 198 /* look for a hex fp string */ 199 NEXT; 200 if (current == 'X' || current == 'x') { 201 /* assume hex fp form */ 202 form = (enum decimal_string_form)-1; 203 expshift = 2; 204 NEXT; 205 /* 206 * Only a digit or radix point can 207 * follow "0x". 208 */ 209 if (NZDIGIT(current)) { 210 pd->fpclass = fp_normal; 211 good = cp; 212 sigfound = 1; 213 pd->ds[ids++] = (char)current; 214 NEXT; 215 break; 216 } else if (current == decpt) { 217 NEXT; 218 goto afterpoint; 219 } else if (current != '0') { 220 /* not hex fp after all */ 221 form = fixed_int_form; 222 expshift = 0; 223 goto done; 224 } 225 } else { 226 form = fixed_int_form; 227 } 228 } else { 229 form = fixed_int_form; 230 } 231 232 /* skip all leading zeros */ 233 while (current == '0' || (current == ' ' && 234 fortran_conventions >= 2)) { 235 NEXT; 236 } 237 sigfound = 0; /* 0 = only zeros found so far */ 238 if (current == EOF) { 239 good = cp; 240 goto done; 241 } else { 242 good = cp - 1; 243 } 244 break; 245 246 case 'i': 247 case 'I': 248 /* look for inf or infinity */ 249 NEXT; 250 agree = 1; 251 while (agree <= 7 && 252 UCASE(current) == infstring[agree]) { 253 NEXT; 254 agree++; 255 } 256 if (agree < 3) 257 goto done; 258 /* found valid infinity */ 259 pd->fpclass = fp_infinity; 260 sigfound = 1; 261 __inf_read = 1; 262 if (agree < 8) { 263 good = (current == EOF)? cp + 3 - agree : 264 cp + 2 - agree; 265 form = inf_form; 266 } else { 267 good = (current == EOF)? cp : cp - 1; 268 form = infinity_form; 269 } 270 /* 271 * Accept trailing blanks if no extra characters 272 * intervene. 273 */ 274 if (fortran_conventions >= 2 && (agree == 3 || 275 agree == 8)) { 276 while (current == ' ') { 277 NEXT; 278 } 279 good = (current == EOF)? cp : cp - 1; 280 } 281 goto done; 282 283 case 'n': 284 case 'N': 285 /* look for nan or nan(string) */ 286 NEXT; 287 agree = 1; 288 while (agree <= 2 && 289 UCASE(current) == nanstring[agree]) { 290 NEXT; 291 agree++; 292 } 293 if (agree < 3) 294 goto done; 295 /* found valid NaN */ 296 good = (current == EOF)? cp : cp - 1; 297 pd->fpclass = fp_quiet; 298 form = nan_form; 299 sigfound = 1; 300 __nan_read = 1; 301 if (current == '(') { 302 /* accept parenthesized string */ 303 NEXT; 304 if (fortran_conventions < 0) { 305 while ((isalnum(current) || 306 current == '_') && 307 ids < DECIMAL_STRING_LENGTH - 1) { 308 pd->ds[ids++] = (char)current; 309 NEXT; 310 } 311 while (isalnum(current) || 312 current == '_') { 313 pd->more = 1; 314 NEXT; 315 } 316 } else { 317 while (current > 0 && current != ')' && 318 ids < DECIMAL_STRING_LENGTH - 1) { 319 pd->ds[ids++] = (char)current; 320 NEXT; 321 } 322 while (current > 0 && current != ')') { 323 pd->more = 1; 324 NEXT; 325 } 326 } 327 if (current != ')') 328 goto done; 329 good = cp; 330 form = nanstring_form; 331 /* prepare for loop below */ 332 if (fortran_conventions >= 2) { 333 NEXT; 334 } 335 } 336 /* accept trailing blanks */ 337 if (fortran_conventions >= 2) { 338 while (current == ' ') { 339 NEXT; 340 } 341 good = (current == EOF)? cp : cp - 1; 342 } 343 goto done; 344 345 default: 346 if (current == decpt) { 347 /* 348 * Don't accept the radix point just yet; 349 * we need to see at least one digit. 350 */ 351 NEXT; 352 goto afterpoint; 353 } 354 goto done; 355 } 356 } 357 358 nextnumber: 359 /* 360 * Admissible characters after the first digit are a valid digit, 361 * an exponent delimiter (E or e for any decimal form; +, -, D, d, 362 * Q, or q when fortran_conventions >= 2; P or p for hex form), 363 * or the radix point. (Note that we can't get here unless we've 364 * already found a digit.) 365 */ 366 if (NZDIGIT(current)) { 367 /* 368 * Found another nonzero digit. If there's enough room 369 * in pd->ds, store any intervening zeros we've found so far 370 * and then store this digit. Otherwise, stop storing 371 * digits in pd->ds and set pd->more. 372 */ 373 if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) { 374 for (i = 0; i < nzbp; i++) 375 pd->ds[ids++] = '0'; 376 pd->ds[ids++] = (char)current; 377 } else { 378 pd->exponent += (nzbp + 1) << expshift; 379 pd->more = 1; 380 if (ids < DECIMAL_STRING_LENGTH) { 381 pd->ds[ids] = '\0'; 382 pd->ndigits = ids; 383 /* don't store any more digits */ 384 ids = DECIMAL_STRING_LENGTH; 385 } 386 } 387 pd->fpclass = fp_normal; 388 sigfound = 1; 389 nzbp = 0; 390 NEXT; 391 392 /* 393 * Use an optimized loop to grab a consecutive sequence 394 * of nonzero digits quickly. 395 */ 396 nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids; 397 for (nfast = 0, pfast = &(pd->ds[ids]); 398 nfast < nfastlimit && NZDIGIT(current); 399 nfast++) { 400 *pfast++ = (char)current; 401 NEXT; 402 } 403 ids += nfast; 404 if (current == '0') 405 goto nextnumberzero; /* common case */ 406 /* advance good to the last accepted digit */ 407 good = (current == EOF)? cp : cp - 1; 408 goto nextnumber; 409 } else { 410 switch (current) { 411 case ' ': 412 if (fortran_conventions < 2) 413 goto done; 414 if (fortran_conventions == 2) { 415 while (current == ' ') { 416 NEXT; 417 } 418 good = (current == EOF)? cp : cp - 1; 419 goto nextnumber; 420 } 421 /* 422 * When fortran_conventions > 2, treat internal 423 * blanks the same as zeroes. 424 */ 425 /*FALLTHRU*/ 426 427 case '0': 428 nextnumberzero: 429 /* 430 * Count zeros before the radix point. Later we 431 * will either put these zeros into pd->ds or add 432 * nzbp to pd->exponent to account for them. 433 */ 434 while (current == '0' || (current == ' ' && 435 fortran_conventions > 2)) { 436 nzbp++; 437 NEXT; 438 } 439 good = (current == EOF)? cp : cp - 1; 440 goto nextnumber; 441 442 case '+': 443 case '-': 444 case 'D': 445 case 'd': 446 case 'Q': 447 case 'q': 448 /* 449 * Only accept these as the start of the exponent 450 * field if fortran_conventions is positive. 451 */ 452 if (fortran_conventions <= 0) 453 goto done; 454 /*FALLTHRU*/ 455 456 case 'E': 457 case 'e': 458 if ((int)form < 0) 459 goto done; 460 goto exponent; 461 462 case 'P': 463 case 'p': 464 if ((int)form > 0) 465 goto done; 466 goto exponent; 467 468 default: 469 if (current == decpt) { 470 /* accept the radix point */ 471 good = cp; 472 if (form == fixed_int_form) 473 form = fixed_intdot_form; 474 NEXT; 475 goto afterpoint; 476 } 477 goto done; 478 } 479 } 480 481 afterpoint: 482 /* 483 * Admissible characters after the radix point are a valid digit 484 * or an exponent delimiter. (Note that it is possible to get 485 * here even though we haven't found any digits yet.) 486 */ 487 if (NZDIGIT(current)) { 488 /* found a digit after the point; revise form */ 489 if (form == invalid_form || form == whitespace_form) 490 form = fixed_dotfrac_form; 491 else if (form == fixed_intdot_form) 492 form = fixed_intdotfrac_form; 493 good = cp; 494 if (sigfound < 1) { 495 /* no significant digits found until now */ 496 pd->fpclass = fp_normal; 497 sigfound = 1; 498 pd->ds[ids++] = (char)current; 499 pd->exponent = (-(nzap + 1)) << expshift; 500 } else { 501 /* significant digits have been found */ 502 if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) { 503 for (i = 0; i < nzbp + nzap; i++) 504 pd->ds[ids++] = '0'; 505 pd->ds[ids++] = (char)current; 506 pd->exponent -= (nzap + 1) << expshift; 507 } else { 508 pd->exponent += nzbp << expshift; 509 pd->more = 1; 510 if (ids < DECIMAL_STRING_LENGTH) { 511 pd->ds[ids] = '\0'; 512 pd->ndigits = ids; 513 /* don't store any more digits */ 514 ids = DECIMAL_STRING_LENGTH; 515 } 516 } 517 } 518 nzbp = 0; 519 nzap = 0; 520 NEXT; 521 522 /* 523 * Use an optimized loop to grab a consecutive sequence 524 * of nonzero digits quickly. 525 */ 526 nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids; 527 for (nfast = 0, pfast = &(pd->ds[ids]); 528 nfast < nfastlimit && NZDIGIT(current); 529 nfast++) { 530 *pfast++ = (char)current; 531 NEXT; 532 } 533 ids += nfast; 534 pd->exponent -= nfast << expshift; 535 if (current == '0') 536 goto zeroafterpoint; 537 /* advance good to the last accepted digit */ 538 good = (current == EOF)? cp : cp - 1; 539 goto afterpoint; 540 } else { 541 switch (current) { 542 case ' ': 543 if (fortran_conventions < 2) 544 goto done; 545 if (fortran_conventions == 2) { 546 /* 547 * Treat a radix point followed by blanks 548 * but no digits as zero so we'll pass FCVS. 549 */ 550 if (sigfound == -1) { 551 pd->fpclass = fp_zero; 552 sigfound = 0; 553 } 554 while (current == ' ') { 555 NEXT; 556 } 557 good = (current == EOF)? cp : cp - 1; 558 goto afterpoint; 559 } 560 /* 561 * when fortran_conventions > 2, treat internal 562 * blanks the same as zeroes 563 */ 564 /*FALLTHRU*/ 565 566 case '0': 567 /* found a digit after the point; revise form */ 568 if (form == invalid_form || form == whitespace_form) 569 form = fixed_dotfrac_form; 570 else if (form == fixed_intdot_form) 571 form = fixed_intdotfrac_form; 572 if (sigfound == -1) { 573 pd->fpclass = fp_zero; 574 sigfound = 0; 575 } 576 zeroafterpoint: 577 /* 578 * Count zeros after the radix point. If we find 579 * any more nonzero digits later, we will put these 580 * zeros into pd->ds and decrease pd->exponent by 581 * nzap. 582 */ 583 while (current == '0' || (current == ' ' && 584 fortran_conventions > 2)) { 585 nzap++; 586 NEXT; 587 } 588 if (current == EOF) { 589 good = cp; 590 goto done; 591 } else { 592 good = cp - 1; 593 } 594 goto afterpoint; 595 596 case '+': 597 case '-': 598 case 'D': 599 case 'd': 600 case 'Q': 601 case 'q': 602 /* 603 * Only accept these as the start of the exponent 604 * field if fortran_conventions is positive. 605 */ 606 if (fortran_conventions <= 0) 607 goto done; 608 /*FALLTHRU*/ 609 610 case 'E': 611 case 'e': 612 /* don't accept exponent without preceding digits */ 613 if (sigfound == -1 || (int)form < 0) 614 goto done; 615 break; 616 617 case 'P': 618 case 'p': 619 /* don't accept exponent without preceding digits */ 620 if (sigfound == -1 || (int)form > 0) 621 goto done; 622 break; 623 624 default: 625 goto done; 626 } 627 } 628 629 exponent: 630 /* 631 * Set *pechar to point to the character that looks like the 632 * beginning of the exponent field, then attempt to parse it. 633 */ 634 *pechar = cp; 635 if (current != '+' && current != '-') { 636 /* skip the exponent character and following blanks */ 637 NEXT; 638 if (fortran_conventions >= 2 && current == ' ') { 639 while (current == ' ') { 640 NEXT; 641 } 642 if (fortran_conventions > 2) 643 good = (current == EOF)? cp : cp - 1; 644 } 645 } 646 647 e = 0; 648 esign = 0; 649 650 /* look for optional exponent sign */ 651 if (current == '+') { 652 NEXT; 653 } else if (current == '-') { 654 esign = 1; 655 NEXT; 656 } 657 658 /* 659 * Accumulate explicit exponent. Note that if we don't find at 660 * least one digit, good won't be updated and e will remain 0. 661 * Also, we keep e from getting too large so we don't overflow 662 * the range of int (but notice that the threshold is large 663 * enough that any larger e would cause the result to underflow 664 * or overflow anyway). 665 */ 666 while (('0' <= current && current <= '9') || current == ' ') { 667 if (current == ' ') { 668 if (fortran_conventions < 2) 669 break; 670 if (fortran_conventions == 2) { 671 NEXT; 672 continue; 673 } 674 current = '0'; 675 } 676 good = cp; 677 if (e <= 1000000) 678 e = 10 * e + current - '0'; 679 NEXT; 680 if (fortran_conventions == 2 && current == ' ') { 681 /* accept trailing blanks */ 682 while (current == ' ') { 683 NEXT; 684 } 685 good = (current == EOF)? cp : cp - 1; 686 } 687 } 688 if (esign == 1) 689 pd->exponent -= e; 690 else 691 pd->exponent += e; 692 693 /* 694 * If we successfully parsed an exponent field, update form 695 * accordingly. If we didn't, don't set *pechar. 696 */ 697 if (good >= *pechar) { 698 switch (form) { 699 case whitespace_form: 700 case fixed_int_form: 701 form = floating_int_form; 702 break; 703 704 case fixed_intdot_form: 705 form = floating_intdot_form; 706 break; 707 708 case fixed_dotfrac_form: 709 form = floating_dotfrac_form; 710 break; 711 712 case fixed_intdotfrac_form: 713 form = floating_intdotfrac_form; 714 break; 715 } 716 } else { 717 *pechar = NULL; 718 } 719 720 done: 721 /* 722 * If we found any zeros before the radix point that were not 723 * accounted for earlier, adjust the exponent. (This is only 724 * relevant when pd->fpclass == fp_normal, but it's harmless 725 * in all other cases.) 726 */ 727 pd->exponent += nzbp << expshift; 728 729 /* terminate pd->ds if we haven't already */ 730 if (ids < DECIMAL_STRING_LENGTH) { 731 pd->ds[ids] = '\0'; 732 pd->ndigits = ids; 733 } 734 735 /* 736 * If we accepted any characters, advance *ppc to point to the 737 * first character we didn't accept; otherwise, pass back a 738 * signaling nan. 739 */ 740 if (good >= *ppc) { 741 *ppc = good + 1; 742 } else { 743 pd->fpclass = fp_signaling; 744 pd->sign = 0; 745 form = invalid_form; 746 } 747 748 *pform = form; 749 } 750