Source file wildcard.icn
############################################################################
#
#	File:     wildcard.icn
#
#	Subject:  Procedures for UNIX-like wild-card pattern matching
#
#	Author:   Robert J. Alexander
#
#	Date:     September 26, 1990
#
############################################################################
#
#   This file is in the public domain.
#
############################################################################
#
#  This is a kit of procedures to deal with UNIX-like filename wild-card
#  patterns containing *, ?, and [...].  The meanings are as of the
#  pattern characters are the same as in the UNIX shells csh and sh.
#  They are described briefly in the wild_pat() procedure.
#
#  These procedures are interesting partly because of the "recursive
#  suspension" technique used to simulate conjunction of an arbitrary
#  number of computed expressions.
#
#
#  The public procedures are:
#
#  wild_match(pattern,s,i1,i2) : i3,i4,...,iN
#  wild_find(pattern,s,i1,i2) : i3,i4,...,iN
#
#  wild_match() produces the sequence of positions in "s" past a
#  substring starting at "i1" that matches "pattern", but fails if there
#  is no such position.  Similar to match(), but is capable of
#  generating multiple positions.
#
#  wild_find() produces the sequence of positions in "s" where
#  substrings begin that match "pattern", but fails if there is no such
#  position.  Similar to find().
#
#  "pattern" can be either a string or a pattern list -- see wild_pat(),
#  below.
#
#  Default values of s, i1, and i2 are the same as for Icon's built-in
#  string scanning procedures such as match().
#
#
#  wild_pat(s) : L
#
#  Creates a pattern element list from pattern string "s".  A pattern
#  element is needed by wild_match() and wild_find().  wild_match() and
#  wild_find() will automatically convert a pattern string to a pattern
#  list, but it is faster to do the conversion explicitly if multiple
#  operations are done using the same pattern.
#

procedure wild_match(plist,s,i1,i2) # i3,i4,...,iN
#
#  Produce the sequence of positions in s past a string starting at i1
#  that matches the pattern plist, but fails if there is no such
#  position.  Similar to match(), but is capable of generating multiple
#  positions.
#
   /i1:= if /s := &subject then &pos else 1 ; /i2 := 0
   plist := (if type(plist) == "string" then wild_pat else copy)(plist)
   suspend s[i1:i2] ? (wild_match1(plist) & i1 + &pos - 1)
end


procedure wild_find(plist,s,i1,i2) # i3,i4,...,iN
#
#  Produce the sequence of positions in s where strings begin that match
#  the pattern plist, but fails if there is no such position.  Similar
#  to find().
#
   local p
   /i1 := if /s := &subject then &pos else 1 ; /i2 := 0
   if type(plist) == "string" then plist := wild_pat(plist)
   s[i1:i2] ? suspend (
	 wild_skip(plist) &
	 p := &pos &
	 tab(wild_match(plist))\1 &
	 i1 + p - 1)
end


procedure wild_pat(s) # L
#
#  Produce pattern list representing pattern string s.
#
   local c,ch,chars,complement,e,plist,special
   #
   #  Create a list of pattern elements.  Pattern strings are parsed
   #  and converted into list elements as follows:
   #
   #	* --> 0			Match any substring (including empty)
   #	? --> 1			Matches any single character
   #	[abc] --> 'abc'		Matches single character in 'abc' (more below)
   #	abc --> "abc"		Matches "abc"
   #	\			Escapes the following character, causing it
   #				to be considered part of a string to match
   #				rather than one of the special pattern
   #				characters.
   #
   plist := []
   s ? {
      until pos(0) do {
	 c := &null
	 #
	 #  Put pattern element on list.
	 #
	 e := (="*" & 0) | (="?" & 1) | (="\\" & move(1)) |
	       (="[" & c := (=("]" | "!]" | "!-]" | "") || tab(find("]"))) &
		     move(1)) |
	       move(1) || tab(upto('*?[\\') | 0)
	 #
	 #  If it's [abc], create a cset.  Special notations:
	 #
	 #	   A-Z means all characters from A to Z inclusive.
	 #	   ! (if first) means any character not among those specified.
	 #	   - or ] (if first, or after initial !) means itself.
	 #
	 \c ? {
	    complement := ="!" | &null
	    special := '-]'
	    e := ''
	    while ch := tab(any(special)) do {
	       e ++:= ch
	       special --:= ch
	       }
	    while chars := tab(find("-")) do {
	       move(1)
	       e ++:= chars[1:-1] ++
		     &cset[ord(chars[-1]) + 1:ord(move(1)) + 2]
	       }
	    e ++:= tab(0)
	    if \complement then e := ~e
	    }
	 if type(e) == "string" == type(plist[-1]) then plist[-1] ||:= e
	 else put(plist,e)
	 }
      }
   return plist
end


procedure wild_skip(plist) # s1,s2,...,sN
#
#  Used privately -- match a sequence of strings in s past which a match
#  of the first pattern element in plist is likely to succeed.	This
#  procedure is used for heuristic performance improvement by
#  wild_match() for the "*" pattern element by matching only strings
#  where the next element is likely to succeed, and by wild_find() to
#  attempt matches only at likely positions.
#
   local x,t
   x := plist[1]
   suspend tab(
      case type(x) of {
	 "string": find(x)
	 "cset": upto(x)
	 default: &pos to *&subject + 1
	 }
      )
end


procedure wild_match1(plist,v) # s1,s2,...,sN
#
#  Used privately by wild_match() to simulate a computed conjunction
#  expression via recursive suspension.
#
   local c
   if c := pop(plist) then {
      suspend wild_match1(plist,case c of {
	 0: wild_skip(plist)
	 1: move(1)    
	 default: case type(c) of {
	       "cset": tab(any(c))
	       default: =c
	       } 
	 })
      push(plist,c)
      }
   else return v
end

This page produced by UniDoc on 2021/04/15 @ 23:59:44.