scan_util.icn - source code

Source file scan_util.icn
#<p>
#   General purpose scanning routines contributed by various people.
#</p><p>
#   This is one of several files contributing to the util package.
#</p>
#<p>
# <b>Author:</b> Steve Wampler (<i>sbw@tapestry.tucson.az.us</i>)
#</p>
#<p>
#  This file is in the <i>public domain</i>.
#</p>

package util

import lang

#<p>
# Produce a 'snapshot' of string scanning showing the current
#   scanning position.
#   This is a (trivially) matching procedure.
#   <[returns an empty string, corresponding to a <tt>move(0)</tt>]>
#</p>
procedure snapshot(prefix:"")   # Precedes any output (default is "")
   ::write(prefix, "'",&subject,"'")
   ::write(prefix, " ",repl(" ",&pos-1),"^")
   return ""   # Makes this a matching procedure!
end

#<p>
# Similar to snapshot(), but displays only a portion of very long
#   scan subjects.
#   This is a (trivially) matching procedure.
#   <[returns an empty string, corresponding to a <tt>move(0)</tt>]>
#</p>
procedure scanlocus(prefix:"", # Precedes any output (default is "")
                    n:80)      # amount of &subject to show (default is 80)
   local p1, p2, hn

   p1 := if &pos < n then 1 else &pos-(n/2)
   p2 := if (p1+n) > *&subject then 0 else p1+n
   ::write(&errout, prefix, &subject[p1:p2])
   ::write(&errout, prefix, ::repl(" ",&pos-p1), "^ ", &pos)

   return ""
end

#<p>
#  Produce everything up to a substring and skip past that substring.
#    This is a matching procedure.
#    On success, the scanning position is left after the substring.
#    <[returns tabbed over portion, omitting skipped substring]>
#</p>
procedure tabSkip(s)     # substring to tab up to and skip over
   suspend 1(::tab(::find(s)),::move(*s))
end

#<p>
# Skip whitespace.
#   This is a matching procedure.
#   <[returns skipped over whitespace, if any]>
#   <[fails if no whitespace]>
#</p>
procedure WS()
   static WS := ' \t'
   suspend ::tab(::many(WS))
end

#<p>
# Matches 0 or more whitespace characters  (Cannot fail)
#   This is a matching procedure.
#   <[returns skipped over whitespace, if any]>
#</p>
procedure ws()
   suspend WS() | ""
end

#<p>
# Tabmatch past a Unicon variable name.
#   This is a matching procedure.
# <[returns matched Unicon variable name]>
#</p>
procedure matchVar()
   static fc := &letters ++ '_',
          wc := fc ++ &digits
   suspend ::tab(::any(fc)) || (::tab(::many(wc))|"")
end

#<p>
# Like find, but accepts a list of strings and finds them by
#   their order of appearance in the subject string.
# <i>The list of strings is searched in order.  To locate the longest
# substring first, sort the list in reverse order, as in:</i>
# <pre>
#     findFirst(::reverse(::sort(a)))
# </pre>
#</p>
#<p>
#  <i><b>Deprecated</b> in favor of the FindFirst class.</i>
#</p>
procedure findFirst(a)   # List of strings to look for.
   local fc := '', p
   every fc ++:= (!a)[1]
   suspend 1(p := ::upto(fc), ::match(!a,,p))
end

#<p>
# Finds substrings in the order in which they appear in the
#   subject string.  If the substrings are submitted as a
#   list, then the substrings are matched in order (i.e. given
#   two potential matches [because one is a prefix of the other]
#   FindFirst matches the first one found in the list).  To
#   find the shortest substring first, sort the list.  To
#   find the longest substring first, sort the list in reverse
#   order.
#</p>
#<p>
# This is a class to avoid the overhead of creating the internal
#   structures on each invocation (while allowing different
#   instances of FirstFirst to be used concurrently).
#</p>
class FindFirst : Object(fchars,cMaps,subs,lastMatch)

   #<p>
   # Like find, but locates substrings in the order of
   #   their appearance in the subject string.
   #   This is a matching procedure.
   # <[generates positions (left-to-right) of matched substrings]>
   #</p>
   method locate(s,    # subject, defaults to &subject
                 i,    # start position, defaults to &pos
                 j     # final position, defaults to 0
                 )
      /s := &subject
      /i := &pos
      /j := 0
      if *\s > 0 then {
	 suspend 1(p := ::upto(fchars,s,i,j),
		   ::match(lastMatch <- !cMaps[s[p]],s,p,j))
	 }
   end

   #<p>
   #  Produce the last matched substring.  (<i>Only valid if locate()
   #    succeeded!</i>)
   #  <[returns last matched substring from call to <tt>locate()</tt>]>
   #</p>
   method getMatch()
      return \lastMatch
   end

   #<p>
   #  Forget the last matched substring.
   #</p>
   method clearMatch()
      lastMatch := &null
   end

   #<p>
   #  Move past the match, returning it.  Fails if no match yet or if the
   #  scan position has been moved since the last call to <tt>locate()</tt>.
   #   This is a matching procedure.
   #  <[returns matched substring]>
   #</p>
   method moveMatch()
      return =\lastMatch
   end

   #<p>
   #  Change the set of substrings to look for.  <i>Not normally used
   #  externally</i>.
   #</p>
   method setKeys(a)    # a set of substrings
      local k

      subs := a
      cMaps := ::table()
      fchars := ''
      every k := !subs do {
	 fchars ++:= k[1]
	 /cMaps[k[1]] := []
	 ::put(cMaps[k[1]], k)
	 }
      lastMatch := &null
   end

initially (a)    # a collection of substrings
   setKeys(a)
end

#<p>
# Tabmatch past the next unescaped character in cs
#   Fails if no such unescaped character exists.
#   This is a matching procedure.
#   <[generates matched substrings]>
#</p>
procedure tabPast(cs)    # cset of characters to tab past.
   local sPos := &pos
   s := ""
   while s ||:= isEscapeSeq(::tab(::upto(cs))) || ::move(1)
   suspend s ||:= ::tab(::upto(cs))\1 || ::move(1)
   &pos := sPos
end

#<p>
# Tabmatch a Unicon string
#   This is a matching procedure.
#   <[generates matched substrings]>
#</p>
procedure matchString()
   suspend ="\"" || tabPast('"')
end

#<p>
# Tabmatch a Unicon cset
#   This is a matching procedure.
#   <[generates matched substrings]>
#</p>
procedure matchCSet()
   suspend ="\'" || tabPast('\'')
end

#<p>
# Succeed if string s ends in an odd number of escape characters.
#  This is a specialty procedure - it's intended to simplify
#  the task of determining if the 'next' character of the
#  string that has s as a substring is escaped or not.
#  If this procedure succeeds, that 'next' character is
#  escaped and s is returned.
# <[returns <tt>s<tt> if it ends in an odd number of escape characters]>
#</p>
#<p>
# The second parameter defaults to a backslash, the traditional
# escape character.
#</p>
#<p>
# This is not a scanning procedure, but is placed in this source file
#  to avoid circular dependencies when building.
#</p>
procedure isEscapeSeq(s,        # String to examine
                      esc:'\\'  # Escape character (defaults to <tt>\</tt>)
                     )
   ::reverse(s) ? if *::tab(::many(esc)) % 2 = 1 then return s
end

#<p>
# Skip to a position in the subject string.
#  This is similar to tab(), but doesn't construct a substring.
#  <[param p position to jump to in subject string]>
#</p>
procedure skipTo(p)
   suspend &pos <- p
end

#<p>
# Skip over n characters in subject string.
#  This is similar to move(), but doesn't construct a substring.
#  <[param n number of characters to move over in subject string]>
#</p>
procedure skipOver(n)
   suspend &pos <- &pos+n
end

#<p>
# Match strings the same way bal() matches characters.
#   The input table is a tagged set of strings to match.  The key is
#   the <I>start</I> string while the value is the <I>end</I> string.
#   This makes sure the start and stop strings are balanced w.r.t
#   each other.
#   <[param keyStrings table of start->stop pairs]>
#   <[generates matching substrings]>
#</p>
#<p>
# For example, given the table:
#<pre>
#
#     t := table()
#     t["begin"] := "end"
#
#</pre>
#  then the code:
#<pre>
#
#     if match("begin") then
#        clause := sbal(t)
#
#</pre>
# assigns to <tt>clause</tt> the substring from <tt>begin</tt> through
# the matching <tt>end</tt>.  (Assuming, of course, that there are no
# conflicts along the way...)
#</p>
#<p>
# <b>This is an unoptimized preliminary version that may contain bugs.</b>
#</p>
procedure sbal(keyStrings)
   local startStrings, stopStrings, allStrings, w, ff, bCount

   every ::insert(startStrings := ::set(), ::key(keyStrings))
   every ::insert(stopStrings  := ::set(), !keyStrings)
   every ::put(allStrings := [], !(startStrings|stopStrings))

   ff := FindFirst(allStrings)
   inside := 0

   while ::tab(ff.locate()) do {
      p1 := &pos
      w := ff.moveMatch()
      if ::member(startStrings, w) then {
	 if (inside +:= 1) = 1 then sPos := p1
	 }
      else if ::member(stopStrings, w) then {
	 if (inside -:= 1) = 0 then {
	    suspend .&subject[sPos:&pos]
	    }
	 }
      if inside < 0 then fail
      }
end
This page produced by UniDoc on 2021/04/15 @ 23:59:43.