diff options
Diffstat (limited to 'src/runtime/fstranl.r')
-rw-r--r-- | src/runtime/fstranl.r | 260 |
1 files changed, 260 insertions, 0 deletions
diff --git a/src/runtime/fstranl.r b/src/runtime/fstranl.r new file mode 100644 index 0000000..be13839 --- /dev/null +++ b/src/runtime/fstranl.r @@ -0,0 +1,260 @@ +/* + * File: fstranl.r + * String analysis functions: any,bal,find,many,match,upto + * + * str_anal is a macro for performing the standard conversions and + * defaulting for string analysis functions. It takes as arguments the + * parameters for subject, beginning position, and ending position. It + * produces declarations for these 3 names prepended with cnv_. These + * variables will contain the converted versions of the arguments. + */ +#begdef str_anal(s, i, j) + declare { + C_integer cnv_ ## i; + C_integer cnv_ ## j; + } + + abstract { + return integer + } + + if is:null(s) then { + inline { + s = k_subject; + } + if is:null(i) then inline { + cnv_ ## i = k_pos; + } + } + else { + if !cnv:string(s) then + runerr(103,s) + if is:null(i) then inline { + cnv_ ## i = 1; + } + } + + if !is:null(i) then + if cnv:C_integer(i,cnv_ ## i) then inline { + if ((cnv_ ## i = cvpos(cnv_ ## i, StrLen(s))) == CvtFail) + fail; + } + else + runerr(101,i) + + + if is:null(j) then inline { + cnv_ ## j = StrLen(s) + 1; + } + else if cnv:C_integer(j,cnv_ ## j) then inline { + if ((cnv_ ## j = cvpos(cnv_ ## j, StrLen(s))) == CvtFail) + fail; + if (cnv_ ## i > cnv_ ## j) { + register C_integer tmp; + tmp = cnv_ ## i; + cnv_ ## i = cnv_ ## j; + cnv_ ## j = tmp; + } + } + else + runerr(101,j) + +#enddef + + +"any(c,s,i1,i2) - produces i1+1 if i2 is greater than 1 and s[i] is contained " +"in c and poseq(i2,x) is greater than poseq(i1,x), but fails otherwise." + +function{0,1} any(c,s,i,j) + str_anal( s, i, j ) + if !cnv:tmp_cset(c) then + runerr(104,c) + body { + if (cnv_i == cnv_j) + fail; + if (!Testb(StrLoc(s)[cnv_i-1], c)) + fail; + return C_integer cnv_i+1; + } +end + + +"bal(c1,c2,c3,s,i1,i2) - generates the sequence of integer positions in s up to" +" a character of c1 in s[i1:i2] that is balanced with respect to characters in" +" c2 and c3, but fails if there is no such position." + +function{*} bal(c1,c2,c3,s,i,j) + str_anal( s, i, j ) + if !def:tmp_cset(c1,fullcs) then + runerr(104,c1) + if !def:tmp_cset(c2,lparcs) then + runerr(104,c2) + if !def:tmp_cset(c3,rparcs) then + runerr(104,c3) + + body { + C_integer cnt; + char c; + + /* + * Loop through characters in s[i:j]. When a character in c2 + * is found, increment cnt; when a character in c3 is found, decrement + * cnt. When cnt is 0 there have been an equal number of occurrences + * of characters in c2 and c3, i.e., the string to the left of + * i is balanced. If the string is balanced and the current character + * (s[i]) is in c, suspend with i. Note that if cnt drops below + * zero, bal fails. + */ + cnt = 0; + while (cnv_i < cnv_j) { + c = StrLoc(s)[cnv_i-1]; + if (cnt == 0 && Testb(c, c1)) { + suspend C_integer cnv_i; + } + if (Testb(c, c2)) + cnt++; + else if (Testb(c, c3)) + cnt--; + if (cnt < 0) + fail; + cnv_i++; + } + /* + * Eventually fail. + */ + fail; + } +end + + +"find(s1,s2,i1,i2) - generates the sequence of positions in s2 at which " +"s1 occurs as a substring in s2[i1:i2], but fails if there is no such position." + +function{*} find(s1,s2,i,j) + str_anal( s2, i, j ) + if !cnv:string(s1) then + runerr(103,s1) + + body { + register char *str1, *str2; + C_integer s1_len, l, term; + + /* + * Loop through s2[i:j] trying to find s1 at each point, stopping + * when the remaining portion s2[i:j] is too short to contain s1. + * Optimize me! + */ + s1_len = StrLen(s1); + term = cnv_j - s1_len; + while (cnv_i <= term) { + str1 = StrLoc(s1); + str2 = StrLoc(s2) + cnv_i - 1; + l = s1_len; + + /* + * Compare strings on a byte-wise basis; if the end is reached + * before inequality is found, suspend with the position of the + * string. + */ + do { + if (l-- <= 0) { + suspend C_integer cnv_i; + break; + } + } while (*str1++ == *str2++); + cnv_i++; + } + fail; + } +end + + +"many(c,s,i1,i2) - produces the position in s after the longest initial " +"sequence of characters in c in s[i1:i2] but fails if there is none." + +function{0,1} many(c,s,i,j) + str_anal( s, i, j ) + if !cnv:tmp_cset(c) then + runerr(104,c) + body { + C_integer start_i = cnv_i; + /* + * Move i along s[i:j] until a character that is not in c is found + * or the end of the string is reached. + */ + while (cnv_i < cnv_j) { + if (!Testb(StrLoc(s)[cnv_i-1], c)) + break; + cnv_i++; + } + /* + * Fail if no characters in c were found; otherwise + * return the position of the first character not in c. + */ + if (cnv_i == start_i) + fail; + return C_integer cnv_i; + } +end + + +"match(s1,s2,i1,i2) - produces i1+*s1 if s1==s2[i1+:*s1], but fails otherwise." + +function{0,1} match(s1,s2,i,j) + str_anal( s2, i, j ) + if !cnv:tmp_string(s1) then + runerr(103,s1) + body { + char *str1, *str2; + + /* + * Cannot match unless s2[i:j] is as long as s1. + */ + if (cnv_j - cnv_i < StrLen(s1)) + fail; + + /* + * Compare s1 with s2[i:j] for *s1 characters; fail if an + * inequality is found. + */ + str1 = StrLoc(s1); + str2 = StrLoc(s2) + cnv_i - 1; + for (cnv_j = StrLen(s1); cnv_j > 0; cnv_j--) + if (*str1++ != *str2++) + fail; + + /* + * Return position of end of matched string in s2. + */ + return C_integer cnv_i + StrLen(s1); + } +end + + +"upto(c,s,i1,i2) - generates the sequence of integer positions in s up to a " +"character in c in s[i2:i2], but fails if there is no such position." + +function{*} upto(c,s,i,j) + str_anal( s, i, j ) + if !cnv:tmp_cset(c) then + runerr(104,c) + body { + C_integer tmp; + + /* + * Look through s[i:j] and suspend position of each occurrence of + * of a character in c. + */ + while (cnv_i < cnv_j) { + tmp = (C_integer)StrLoc(s)[cnv_i-1]; + if (Testb(tmp, c)) { + suspend C_integer cnv_i; + } + cnv_i++; + } + /* + * Eventually fail. + */ + fail; + } +end |