/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #pragma ident "%Z%%M% %I% %E% SMI" /* * This file contains the common part of the functions string_to_decimal, * func_to_decimal, and file_to_decimal. Much of this code has been dupli- * cated in wstring_to_decimal (see wstod.c) with some simplifications and * appropriate modifications for wide characters. DO NOT fix a bug here * without fixing the same bug in wstring_to_decimal, if it applies. * * The code below makes the following assumptions. * * 1. The first six parameters to the function are declared with the * following names and types: * * char **ppc; * int nmax; * int fortran_conventions; * decimal_record *pd; * enum decimal_string_form *pform; * char **pechar; * * 2. Before this file is #included, the following variables have been * defined and initialized as shown: * * char *cp; * char *good = *ppc - 1; * int current; * int nread; * * If the first character can be read successfully, then current is set * to the value of the first character, cp is set to *ppc, (char)current * is stored at *cp, and nread = 1. If the first character cannot be * read successfully, then current = EOF and nread = 0. * * 3. The macro NEXT is defined to expand to code that implements * the following logic: * * if (nread < nmax) { * current = ; * if (current != EOF) { * *++cp = (char)current; * nread++; * } * } else * current = EOF; * * Note that nread always reflects the number of characters successfully * read, the buffer pointed to by *ppc gets filled only with characters * that have been successfully read, and cp always points to the location * in the buffer that was filled by the last character successfully read. * current == EOF if and only if we can't read any more, either because * we've reached the end of the input file or the buffer is full (i.e., * we've read nmax characters). * * 4. After this file is #included, the following variables may be used * and will have the specified values: * * *ppc, *pd, *pform, and *pechar will be set as documented in the * manual page; * nmax and fortran_conventions will be unchanged; * nread will be the number of characters actually read; * cp will point to the last character actually read, provided at least * one character was read successfully (in which case cp >= *ppc). */ #define UCASE(c) ((('a' <= c) && (c <= 'z'))? c - 32 : c) #define NZDIGIT(c) (('1' <= c && c <= '9') || ((int)form < 0 && \ (('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')))) { static const char *infstring = "INFINITY"; static const char *nanstring = "NAN"; int sigfound, spacefound = 0; int ids = 0; int i, agree; int nzbp = 0; /* number of zeros before point */ int nzap = 0; /* number of zeros after point */ char decpt; int nfast, nfastlimit; char *pfast; int e, esign; int expshift = 0; enum decimal_string_form form; /* * This routine assumes that the radix point is a single * ASCII character, so that following this assignment, the * condition (current == decpt) will correctly detect it. */ if (fortran_conventions > 0) decpt = '.'; else decpt = *(localeconv()->decimal_point); /* input is invalid until we find something */ pd->fpclass = fp_signaling; pd->sign = 0; pd->exponent = 0; pd->ds[0] = '\0'; pd->more = 0; pd->ndigits = 0; *pform = form = invalid_form; *pechar = NULL; /* skip white space */ while (isspace(current)) { spacefound = 1; NEXT; } if (fortran_conventions >= 2 && spacefound) { /* * We found at least one white space character. For * Fortran formatted input, accept this; if we don't * find anything else, we'll interpret it as a valid zero. */ pd->fpclass = fp_zero; form = whitespace_form; sigfound = 0; /* 0 = only zeros found so far */ if (current == EOF) { good = cp; goto done; } else { good = cp - 1; } } else { sigfound = -1; /* -1 = no digits found yet */ } /* look for optional leading sign */ if (current == '+') { NEXT; } else if (current == '-') { pd->sign = 1; NEXT; } /* * Admissible first non-white-space, non-sign characters are * 0-9, i, I, n, N, or the radix point. */ if ('1' <= current && current <= '9') { good = cp; pd->fpclass = fp_normal; form = fixed_int_form; sigfound = 1; /* 1 = significant digits found */ pd->ds[ids++] = (char)current; NEXT; } else { switch (current) { case ' ': if (fortran_conventions < 2) goto done; /* * When fortran_conventions >= 2, treat leading * blanks the same as leading zeroes. */ /*FALLTHRU*/ case '0': /* * Accept the leading zero and set pd->fpclass * accordingly, but don't set sigfound until we * determine that this isn't a "fake" hex string * (i.e., 0x.p...). */ good = cp; pd->fpclass = fp_zero; if (fortran_conventions < 0) { /* look for a hex fp string */ NEXT; if (current == 'X' || current == 'x') { /* assume hex fp form */ form = (enum decimal_string_form)-1; expshift = 2; NEXT; /* * Only a digit or radix point can * follow "0x". */ if (NZDIGIT(current)) { pd->fpclass = fp_normal; good = cp; sigfound = 1; pd->ds[ids++] = (char)current; NEXT; break; } else if (current == decpt) { NEXT; goto afterpoint; } else if (current != '0') { /* not hex fp after all */ form = fixed_int_form; expshift = 0; goto done; } } else { form = fixed_int_form; } } else { form = fixed_int_form; } /* skip all leading zeros */ while (current == '0' || (current == ' ' && fortran_conventions >= 2)) { NEXT; } sigfound = 0; /* 0 = only zeros found so far */ if (current == EOF) { good = cp; goto done; } else { good = cp - 1; } break; case 'i': case 'I': /* look for inf or infinity */ NEXT; agree = 1; while (agree <= 7 && UCASE(current) == infstring[agree]) { NEXT; agree++; } if (agree < 3) goto done; /* found valid infinity */ pd->fpclass = fp_infinity; sigfound = 1; __inf_read = 1; if (agree < 8) { good = (current == EOF)? cp + 3 - agree : cp + 2 - agree; form = inf_form; } else { good = (current == EOF)? cp : cp - 1; form = infinity_form; } /* * Accept trailing blanks if no extra characters * intervene. */ if (fortran_conventions >= 2 && (agree == 3 || agree == 8)) { while (current == ' ') { NEXT; } good = (current == EOF)? cp : cp - 1; } goto done; case 'n': case 'N': /* look for nan or nan(string) */ NEXT; agree = 1; while (agree <= 2 && UCASE(current) == nanstring[agree]) { NEXT; agree++; } if (agree < 3) goto done; /* found valid NaN */ good = (current == EOF)? cp : cp - 1; pd->fpclass = fp_quiet; form = nan_form; sigfound = 1; __nan_read = 1; if (current == '(') { /* accept parenthesized string */ NEXT; if (fortran_conventions < 0) { while ((isalnum(current) || current == '_') && ids < DECIMAL_STRING_LENGTH - 1) { pd->ds[ids++] = (char)current; NEXT; } while (isalnum(current) || current == '_') { pd->more = 1; NEXT; } } else { while (current > 0 && current != ')' && ids < DECIMAL_STRING_LENGTH - 1) { pd->ds[ids++] = (char)current; NEXT; } while (current > 0 && current != ')') { pd->more = 1; NEXT; } } if (current != ')') goto done; good = cp; form = nanstring_form; /* prepare for loop below */ if (fortran_conventions >= 2) { NEXT; } } /* accept trailing blanks */ if (fortran_conventions >= 2) { while (current == ' ') { NEXT; } good = (current == EOF)? cp : cp - 1; } goto done; default: if (current == decpt) { /* * Don't accept the radix point just yet; * we need to see at least one digit. */ NEXT; goto afterpoint; } goto done; } } nextnumber: /* * Admissible characters after the first digit are a valid digit, * an exponent delimiter (E or e for any decimal form; +, -, D, d, * Q, or q when fortran_conventions >= 2; P or p for hex form), * or the radix point. (Note that we can't get here unless we've * already found a digit.) */ if (NZDIGIT(current)) { /* * Found another nonzero digit. If there's enough room * in pd->ds, store any intervening zeros we've found so far * and then store this digit. Otherwise, stop storing * digits in pd->ds and set pd->more. */ if (ids + nzbp + 2 < DECIMAL_STRING_LENGTH) { for (i = 0; i < nzbp; i++) pd->ds[ids++] = '0'; pd->ds[ids++] = (char)current; } else { pd->exponent += (nzbp + 1) << expshift; pd->more = 1; if (ids < DECIMAL_STRING_LENGTH) { pd->ds[ids] = '\0'; pd->ndigits = ids; /* don't store any more digits */ ids = DECIMAL_STRING_LENGTH; } } pd->fpclass = fp_normal; sigfound = 1; nzbp = 0; NEXT; /* * Use an optimized loop to grab a consecutive sequence * of nonzero digits quickly. */ nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids; for (nfast = 0, pfast = &(pd->ds[ids]); nfast < nfastlimit && NZDIGIT(current); nfast++) { *pfast++ = (char)current; NEXT; } ids += nfast; if (current == '0') goto nextnumberzero; /* common case */ /* advance good to the last accepted digit */ good = (current == EOF)? cp : cp - 1; goto nextnumber; } else { switch (current) { case ' ': if (fortran_conventions < 2) goto done; if (fortran_conventions == 2) { while (current == ' ') { NEXT; } good = (current == EOF)? cp : cp - 1; goto nextnumber; } /* * When fortran_conventions > 2, treat internal * blanks the same as zeroes. */ /*FALLTHRU*/ case '0': nextnumberzero: /* * Count zeros before the radix point. Later we * will either put these zeros into pd->ds or add * nzbp to pd->exponent to account for them. */ while (current == '0' || (current == ' ' && fortran_conventions > 2)) { nzbp++; NEXT; } good = (current == EOF)? cp : cp - 1; goto nextnumber; case '+': case '-': case 'D': case 'd': case 'Q': case 'q': /* * Only accept these as the start of the exponent * field if fortran_conventions is positive. */ if (fortran_conventions <= 0) goto done; /*FALLTHRU*/ case 'E': case 'e': if ((int)form < 0) goto done; goto exponent; case 'P': case 'p': if ((int)form > 0) goto done; goto exponent; default: if (current == decpt) { /* accept the radix point */ good = cp; if (form == fixed_int_form) form = fixed_intdot_form; NEXT; goto afterpoint; } goto done; } } afterpoint: /* * Admissible characters after the radix point are a valid digit * or an exponent delimiter. (Note that it is possible to get * here even though we haven't found any digits yet.) */ if (NZDIGIT(current)) { /* found a digit after the point; revise form */ if (form == invalid_form || form == whitespace_form) form = fixed_dotfrac_form; else if (form == fixed_intdot_form) form = fixed_intdotfrac_form; good = cp; if (sigfound < 1) { /* no significant digits found until now */ pd->fpclass = fp_normal; sigfound = 1; pd->ds[ids++] = (char)current; pd->exponent = (-(nzap + 1)) << expshift; } else { /* significant digits have been found */ if (ids + nzbp + nzap + 2 < DECIMAL_STRING_LENGTH) { for (i = 0; i < nzbp + nzap; i++) pd->ds[ids++] = '0'; pd->ds[ids++] = (char)current; pd->exponent -= (nzap + 1) << expshift; } else { pd->exponent += nzbp << expshift; pd->more = 1; if (ids < DECIMAL_STRING_LENGTH) { pd->ds[ids] = '\0'; pd->ndigits = ids; /* don't store any more digits */ ids = DECIMAL_STRING_LENGTH; } } } nzbp = 0; nzap = 0; NEXT; /* * Use an optimized loop to grab a consecutive sequence * of nonzero digits quickly. */ nfastlimit = DECIMAL_STRING_LENGTH - 3 - ids; for (nfast = 0, pfast = &(pd->ds[ids]); nfast < nfastlimit && NZDIGIT(current); nfast++) { *pfast++ = (char)current; NEXT; } ids += nfast; pd->exponent -= nfast << expshift; if (current == '0') goto zeroafterpoint; /* advance good to the last accepted digit */ good = (current == EOF)? cp : cp - 1; goto afterpoint; } else { switch (current) { case ' ': if (fortran_conventions < 2) goto done; if (fortran_conventions == 2) { /* * Treat a radix point followed by blanks * but no digits as zero so we'll pass FCVS. */ if (sigfound == -1) { pd->fpclass = fp_zero; sigfound = 0; } while (current == ' ') { NEXT; } good = (current == EOF)? cp : cp - 1; goto afterpoint; } /* * when fortran_conventions > 2, treat internal * blanks the same as zeroes */ /*FALLTHRU*/ case '0': /* found a digit after the point; revise form */ if (form == invalid_form || form == whitespace_form) form = fixed_dotfrac_form; else if (form == fixed_intdot_form) form = fixed_intdotfrac_form; if (sigfound == -1) { pd->fpclass = fp_zero; sigfound = 0; } zeroafterpoint: /* * Count zeros after the radix point. If we find * any more nonzero digits later, we will put these * zeros into pd->ds and decrease pd->exponent by * nzap. */ while (current == '0' || (current == ' ' && fortran_conventions > 2)) { nzap++; NEXT; } if (current == EOF) { good = cp; goto done; } else { good = cp - 1; } goto afterpoint; case '+': case '-': case 'D': case 'd': case 'Q': case 'q': /* * Only accept these as the start of the exponent * field if fortran_conventions is positive. */ if (fortran_conventions <= 0) goto done; /*FALLTHRU*/ case 'E': case 'e': /* don't accept exponent without preceding digits */ if (sigfound == -1 || (int)form < 0) goto done; break; case 'P': case 'p': /* don't accept exponent without preceding digits */ if (sigfound == -1 || (int)form > 0) goto done; break; default: goto done; } } exponent: /* * Set *pechar to point to the character that looks like the * beginning of the exponent field, then attempt to parse it. */ *pechar = cp; if (current != '+' && current != '-') { /* skip the exponent character and following blanks */ NEXT; if (fortran_conventions >= 2 && current == ' ') { while (current == ' ') { NEXT; } if (fortran_conventions > 2) good = (current == EOF)? cp : cp - 1; } } e = 0; esign = 0; /* look for optional exponent sign */ if (current == '+') { NEXT; } else if (current == '-') { esign = 1; NEXT; } /* * Accumulate explicit exponent. Note that if we don't find at * least one digit, good won't be updated and e will remain 0. * Also, we keep e from getting too large so we don't overflow * the range of int (but notice that the threshold is large * enough that any larger e would cause the result to underflow * or overflow anyway). */ while (('0' <= current && current <= '9') || current == ' ') { if (current == ' ') { if (fortran_conventions < 2) break; if (fortran_conventions == 2) { NEXT; continue; } current = '0'; } good = cp; if (e <= 1000000) e = 10 * e + current - '0'; NEXT; if (fortran_conventions == 2 && current == ' ') { /* accept trailing blanks */ while (current == ' ') { NEXT; } good = (current == EOF)? cp : cp - 1; } } if (esign == 1) pd->exponent -= e; else pd->exponent += e; /* * If we successfully parsed an exponent field, update form * accordingly. If we didn't, don't set *pechar. */ if (good >= *pechar) { switch (form) { case whitespace_form: case fixed_int_form: form = floating_int_form; break; case fixed_intdot_form: form = floating_intdot_form; break; case fixed_dotfrac_form: form = floating_dotfrac_form; break; case fixed_intdotfrac_form: form = floating_intdotfrac_form; break; } } else { *pechar = NULL; } done: /* * If we found any zeros before the radix point that were not * accounted for earlier, adjust the exponent. (This is only * relevant when pd->fpclass == fp_normal, but it's harmless * in all other cases.) */ pd->exponent += nzbp << expshift; /* terminate pd->ds if we haven't already */ if (ids < DECIMAL_STRING_LENGTH) { pd->ds[ids] = '\0'; pd->ndigits = ids; } /* * If we accepted any characters, advance *ppc to point to the * first character we didn't accept; otherwise, pass back a * signaling nan. */ if (good >= *ppc) { *ppc = good + 1; } else { pd->fpclass = fp_signaling; pd->sign = 0; form = invalid_form; } *pform = form; }