Source file ngrams.icn
############################################################################
#
#	File:     ngrams.icn
#
#	Subject:  Procedures to produce n-grams
#
#	Author:   Ralph E. Griswold
#
#	Date:     March 20, 1998
#
############################################################################
#
#   This file is in the public domain.
#
############################################################################
#
#     The procedure ngrams(s, n, c, t) generates a tabulation of the n-grams
#  in the specified string.  If c is non-null, it is used as the set of
#  characters from which n-grams are taken (other characters break n-grams).
#  The default for c is the upper- and lowercase letters.  If t is non-null,
#  the tabulation is given in order of frequency; otherwise in alphabetical
#  order of n-grams.
#
#     For backward compatibility, the first argument may be a file, in
#  which case, it is read to provide the string.
#
############################################################################

procedure ngrams(s, i, c, t)		#: n-grams with count
   local line, grams, a, count, f

   if not (integer(i) > 0) then stop("*** invalid ngrams specification")

   /c := &lcase || &ucase
   if not (c := cset(c)) then stop("*** invalid cset specification")

   grams := table(0)

   if type(s) == "file" then {
      line := ""
      while line ||:= reads(f, 1000)
      }
   else line := s
   line ? while tab(upto(c)) do
      (tab(many(c)) \ 1) ? while grams[move(i)] +:= 1 do
         move(-i + 1)
   if /t then {
      a := sort(grams, 4)
      while count := pull(a) do
         suspend pull(a) || right(count, 8)
         }
   else {
      a := sort(grams, 3)
      suspend |(get(a) || right(get(a),8))
      }
end

procedure ngramset(s, i, c)		#: n-grams set
   local line, grams, a, count, f

   if not (integer(i) > 0) then stop("*** invalid ngrams specification")

   /c := &lcase || &ucase
   if not (c := cset(c)) then stop("*** invalid cset specification")

   grams := set()

   if type(s) == "file" then {
      line := ""
      while line ||:= reads(f, 1000)
      }
   else line := s

   line ? while tab(upto(c)) do
      (tab(many(c)) \ 1) ? while insert(grams, move(i)) do
         move(-i + 1)

   return grams

end

This page produced by UniDoc on 2021/04/15 @ 23:59:44.