diff options
Diffstat (limited to 'ipl/procs/wildcard.icn')
-rw-r--r-- | ipl/procs/wildcard.icn | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/ipl/procs/wildcard.icn b/ipl/procs/wildcard.icn new file mode 100644 index 0000000..a0f6af6 --- /dev/null +++ b/ipl/procs/wildcard.icn @@ -0,0 +1,186 @@ +############################################################################ +# +# File: wildcard.icn +# +# Subject: Procedures for UNIX-like wild-card pattern matching +# +# Author: Robert J. Alexander +# +# Date: September 26, 1990 +# +############################################################################ +# +# This file is in the public domain. +# +############################################################################ +# +# This is a kit of procedures to deal with UNIX-like filename wild-card +# patterns containing *, ?, and [...]. The meanings are as of the +# pattern characters are the same as in the UNIX shells csh and sh. +# They are described briefly in the wild_pat() procedure. +# +# These procedures are interesting partly because of the "recursive +# suspension" technique used to simulate conjunction of an arbitrary +# number of computed expressions. +# +# +# The public procedures are: +# +# wild_match(pattern,s,i1,i2) : i3,i4,...,iN +# wild_find(pattern,s,i1,i2) : i3,i4,...,iN +# +# wild_match() produces the sequence of positions in "s" past a +# substring starting at "i1" that matches "pattern", but fails if there +# is no such position. Similar to match(), but is capable of +# generating multiple positions. +# +# wild_find() produces the sequence of positions in "s" where +# substrings begin that match "pattern", but fails if there is no such +# position. Similar to find(). +# +# "pattern" can be either a string or a pattern list -- see wild_pat(), +# below. +# +# Default values of s, i1, and i2 are the same as for Icon's built-in +# string scanning procedures such as match(). +# +# +# wild_pat(s) : L +# +# Creates a pattern element list from pattern string "s". A pattern +# element is needed by wild_match() and wild_find(). wild_match() and +# wild_find() will automatically convert a pattern string to a pattern +# list, but it is faster to do the conversion explicitly if multiple +# operations are done using the same pattern. +# + +procedure wild_match(plist,s,i1,i2) # i3,i4,...,iN +# +# Produce the sequence of positions in s past a string starting at i1 +# that matches the pattern plist, but fails if there is no such +# position. Similar to match(), but is capable of generating multiple +# positions. +# + /i1:= if /s := &subject then &pos else 1 ; /i2 := 0 + plist := (if type(plist) == "string" then wild_pat else copy)(plist) + suspend s[i1:i2] ? (wild_match1(plist) & i1 + &pos - 1) +end + + +procedure wild_find(plist,s,i1,i2) # i3,i4,...,iN +# +# Produce the sequence of positions in s where strings begin that match +# the pattern plist, but fails if there is no such position. Similar +# to find(). +# + local p + /i1 := if /s := &subject then &pos else 1 ; /i2 := 0 + if type(plist) == "string" then plist := wild_pat(plist) + s[i1:i2] ? suspend ( + wild_skip(plist) & + p := &pos & + tab(wild_match(plist))\1 & + i1 + p - 1) +end + + +procedure wild_pat(s) # L +# +# Produce pattern list representing pattern string s. +# + local c,ch,chars,complement,e,plist,special + # + # Create a list of pattern elements. Pattern strings are parsed + # and converted into list elements as follows: + # + # * --> 0 Match any substring (including empty) + # ? --> 1 Matches any single character + # [abc] --> 'abc' Matches single character in 'abc' (more below) + # abc --> "abc" Matches "abc" + # \ Escapes the following character, causing it + # to be considered part of a string to match + # rather than one of the special pattern + # characters. + # + plist := [] + s ? { + until pos(0) do { + c := &null + # + # Put pattern element on list. + # + e := (="*" & 0) | (="?" & 1) | (="\\" & move(1)) | + (="[" & c := (=("]" | "!]" | "!-]" | "") || tab(find("]"))) & + move(1)) | + move(1) || tab(upto('*?[\\') | 0) + # + # If it's [abc], create a cset. Special notations: + # + # A-Z means all characters from A to Z inclusive. + # ! (if first) means any character not among those specified. + # - or ] (if first, or after initial !) means itself. + # + \c ? { + complement := ="!" | &null + special := '-]' + e := '' + while ch := tab(any(special)) do { + e ++:= ch + special --:= ch + } + while chars := tab(find("-")) do { + move(1) + e ++:= chars[1:-1] ++ + &cset[ord(chars[-1]) + 1:ord(move(1)) + 2] + } + e ++:= tab(0) + if \complement then e := ~e + } + if type(e) == "string" == type(plist[-1]) then plist[-1] ||:= e + else put(plist,e) + } + } + return plist +end + + +procedure wild_skip(plist) # s1,s2,...,sN +# +# Used privately -- match a sequence of strings in s past which a match +# of the first pattern element in plist is likely to succeed. This +# procedure is used for heuristic performance improvement by +# wild_match() for the "*" pattern element by matching only strings +# where the next element is likely to succeed, and by wild_find() to +# attempt matches only at likely positions. +# + local x,t + x := plist[1] + suspend tab( + case type(x) of { + "string": find(x) + "cset": upto(x) + default: &pos to *&subject + 1 + } + ) +end + + +procedure wild_match1(plist,v) # s1,s2,...,sN +# +# Used privately by wild_match() to simulate a computed conjunction +# expression via recursive suspension. +# + local c + if c := pop(plist) then { + suspend wild_match1(plist,case c of { + 0: wild_skip(plist) + 1: move(1) + default: case type(c) of { + "cset": tab(any(c)) + default: =c + } + }) + push(plist,c) + } + else return v +end |