Source file dif.icn
############################################################################
#
#	File:     dif.icn
#
#	Subject:  Procedure to check for differences
#
#	Author:   Robert J. Alexander
#
#	Date:     August 14, 1996
#
############################################################################
#
#   This file is in the public domain.
#
############################################################################
#
#	dif(stream, compare, eof, group)
#		generates a sequence of differences between an	arbitrary
#		number of input streams.  Each result is returned as a list
#		of diff_recs, one for each input stream, with each diff_rec
#		containing a list of items that differ and their position
#		in the input stream.
#
#  The diff_rec type is declared as:
#
#		record diff_rec(pos,diffs)
#
#  dif() fails if there are no differences, i.e. it produces an empty
#  result sequence.
#
############################################################################
#
#  For example, if two input streams are:
#
#	a b c d e f g h
#	a b d e f i j
#
#  the output sequence would be:
#
#	[diff_rec(3,[c]),diff_rec(3,[])]
#	[diff_rec(7,[g,h]),diff_rec(6,[i,j])
#
#  The arguments to dif(stream,compare,eof,group) are:
#
#	stream		A list of data objects that represent input streams
#			from which dif will extract its input "records".
#			The elements can be of several different types which
#			result in different actions, as follows:
#
#			   Type			   Action
#			===========	=============================
#			file		file is "read" to get records
#
#			co-expression	co-expression is activated to
#					get records
#
#			list		records are "gotten" (get()) from
#					the list
#
#			diff_proc	a record type defined in "dif" to
#					allow a procedure (or procedures)
#					suppled by dif's caller to be called
#					to get records.  Diff_proc has two
#					fields, the procedure to call and the
#					argument to call it with.  Its
#					definition looks like this:
#
#					   record diff_proc(proc,arg)
#			
#
#  Optional arguments:
#
#	compare		Item comparison procedure -- succeeds if
#			"equal", otherwise fails (default is the
#			identity "===" comparison).  The comparison
#			must allow for the fact that the eof object
#			(see next) might be an argument, and a pair of
#			eofs must compare equal.
#
#	eof		An object that is distinguishable from other
#			objects in the stream.  Default is &null.
#
#	group		A procedure that is called with the current number
#			of unmatched items as its argument.  It must
#			return the number of matching items required
#			for file synchronization to occur.  Default is
#			the formula Trunc((2.0 * Log(M)) + 2.0) where
#			M is the number of unmatched items.
#
############################################################################

invocable all

record diff_rec(pos,diffs)
record diff_proc(proc,arg)
record diff_file(stream,queue)


procedure dif(stream,compare,eof,group)
  local f,linenbr,line,difflist,gf,i,j,k,l,m,n,x,test,
	result,synclist,nsyncs,syncpoint
  #
  #  Provide default arguments and initialize data.
  #
  /compare := proc("===",2)
  /group := groupfactor
  f := []
  every put(f,diff_file(!stream,[]))
  linenbr := list(*stream,0)
  line := list(*stream)
  test := list(*stream)
  difflist := list(*stream)
  every !difflist := []
  #
  #  Loop to process all records of all input streams.
  #
  repeat {
    #
    #  This is the "idle loop" where we spin until we find a discrepancy
    #  among the data streams.  A line is read from each stream, with a
    #  check for eof on all streams.  Then the line from the first
    #  stream is compared to the lines from all the others.
    #
    repeat {
      every i := 1 to *stream do
        line[i] := diffread(f[i]) | eof
      if not (every x := !line do
        (x === eof) | break) then break break
      every !linenbr +:= 1
      if (every x := !line[2:0] do
        compare(x,line[1]) | break) then break
    }
    #
    #  Aha!  We have found a difference.  Create a difference list,
    #  one entry per stream, primed with the differing line we just found.
    #
    every i := 1 to *stream do
      difflist[i] := [line[i]]
    repeat {
      #
      #  Add a new input line from each stream to the difference list.
      #  Then build lists of the subset of different lines we need to
      #  actually compare.
      #
      every i := 1 to *stream do
        put(difflist[i],diffread(f[i]) | eof)
      gf := group(*difflist[1])
      every i := 1 to *stream do
        test[i] := difflist[i][-gf:0]
      #
      #  Create a "synchronization matrix", with a row and column for
      #  each input stream.  The entries will be initially &null, then
      #  will be set to the synchronization position if sync is
      #  achieved between the two streams.  Another list is created to
      #  keep track of how many syncs have been achieved for each stream.
      #
      j := *difflist[1] - gf + 1
      synclist := list(*stream)
      every !synclist := list(*stream)
      every k := 1 to *stream do
        synclist[k][k] := j
      nsyncs := list(*stream,1)
      #
      #  Loop through positions to start comparing lines.  This set of
      #  nested loops will be exited when a stream achieves sync with
      #  all other streams.
      #
      every i := 1 to j do {
        #
        #  Loop through all streams.
        #
        every k := 1 to *stream do {
          #
          #  Loop through all streams.
          #
	  every l := 1 to *stream do {
	    if /synclist[k][l] then {	# avoid unnecessary comparisons
	      #
              #  Compare items of the test list to the differences list
              #  at all possible positions.  If they compare, store the
              #  current position in the sync matrix and bump the count
              #  of streams sync'd to this stream.  If all streams are in
	      #  sync, exit all loops but the outer one.
	      #
	      m := i - 1
	      if not every n := 1 to gf do {
	        if not compare(test[k][n],difflist[l][m +:= 1]) then break
	      } then {
	        synclist[k][l] := i	# store current position
	        if (nsyncs[k] +:= 1) = *stream then break break break break
	      }
	    }
	  }
	}
      }
    }
    #
    #  Prepare an output set.  Since we have read the input streams past
    #  the point of synchronization, we must queue those lines before their
    #  input streams. 
    #
    synclist := synclist[k]
    result := list(*stream)
    every i := 1 to *stream do {
      j := synclist[i]
      while difflist[i][j -:= 1] === eof	# trim past eof
      result[i] := diff_rec(linenbr[i],difflist[i][1:j + 1])
      f[i].queue := difflist[i][synclist[i] + gf:0] ||| f[i].queue
      linenbr[i] +:= synclist[i] + gf - 2
      difflist[i] := []
    }
    suspend result
  }
end

#
#  diffread() -- Read a line from an input stream.
#
procedure diffread(f)
  local x
  return get(f.queue) | case type(x := f.stream) of {
    "file" | "window": read(x)
    "co-expression": @x
    "diff_proc": x.proc(x.arg)
    "list": get(x)
  }
end

#
#  groupfactor() -- Determine how many like lines we need to close
#  off a group of differences.  This is the default routine -- the
#  caller may provide his own.
#
procedure groupfactor(m)  # Compute: Trunc((2.0 * Log(m)) + 2.0)
  m := string(m)
  return 2 * *m + if m <<= "316227766"[1+:*m] then 0 else 1
end


This page produced by UniDoc on 2021/04/15 @ 23:59:45.