#
# deen.icn - give English equivalents of German words
# example from the Unicon book, chapter 11
$define DEEN "http://ftp.tu-chemnitz.de/pub/Local/urz/ding/de-en/de-en.txt.gz"
procedure main(av)
if av[1]=="-all" then all := pop(av)
dd := DeenDictionary()
every s := !av do {
if lu := dd.lookup(s) then {
if \all then
every write(s, ": ", dd.lookup(s).definition)
else write(s, ": ", lu.definition)
}
else write(s, " is not in the dictionary.")
}
end
class buffer(filename, text)
# read a buffer in from a file
# todo: decompress if .gz extension
method read()
if match("http://", filename) then mode := "m" else mode := "r"
f := open(filename, mode) | stop("can't open ", image(filename))
if filename[-3:0] == ".gz" then {
if mode=="m" then { # download_to_local_file
if not (f2 := open("de-en.txt.gz","w")) then stop("can't write")
while s := reads(f, 1000000) do writes(f2, s)
close(f)
close(f2)
}
system("gunzip de-en.txt.gz")
f := open("de-en.txt") | stop("can't read de-en.txt")
}
writes("Opened ",image(f),".\nReading")
text := [ ]
every put(text, !f) do if *text%1000=0 then writes(".")
close(f)
write("\ndone. Read ", *text, " lines")
return
end
method erase()
#... ?
end
# ...additional buffer operations
initially
if \filename then read()
end
class buftable : buffer()
method read()
self.buffer.read()
tmp := table()
every line := !text do {
line ? {
word := tab(many(&letters)) | stop("failed on ", image(line))
tmp[word] := line
}
}
text := tmp
return
end
method lookup(s)
suspend ! \ (text[s])
end
end
class dictionaryentry(word, part, etymology, definition)
# decode a dictionary entry into its components
# assumed format is word;pos;eym;def
method decode(s)
s ? {
word := tab(find(";"))
move(1)
part := tab(find(";"))
move(1)
etymology := tab(find(";"))
move(1)
definition := tab(0)
}
end
method encode() # encode a dictionary entry into a string
return word || ";" || part || ";" || etymology || ";" || definition
end
initially
if /part then # constructor was called with a single string argument
decode(word)
end
class dictionary : buftable()
method read()
self.buffer.read()
tmp := table()
every line := !text do
line ? {
word := tab(many(&letters)) | stop("failed on ", image(line))
tmp[word] := dictionaryentry(line) | fail
}
text := tmp
end
method Write()
f := open(filename, "w") | fail
every write(f, (!text).encode)
close(f)
end
end
class DeenEntry : dictionaryentry(gender)
method decode(s)
# write("subentry decode ", image(s))
end
initially(de, en)
de ? {
if word := trim(tab(find("{")),,0) then {
="{"
gender := tab(find("}"))
}
else { # here is one without gender info
word := trim(tab(find("[")|0),,0)
gender := "?"
}
}
definition := en
end
#
# Return a list of dictionary entries for a given line of text
#
procedure get_entries(s)
subentries := []
s ? {
deutsch := tab(find("::")) | stop("no :: in ", image(s))
="::"; tab(many(' '))
english := tab(0)
deutsch ? {
while *(deutschwort := tab(find("|") | 0))>0 do {
deutschwort := trim(deutschwort,,0)
="|"
tab(many(' '))
englishword := trim(english[1:find("|",english)|0],,0)
english ?:= {
tab(many(' \t'))
=englishword
tab(many(' \t'))
="|"
tab(many(' \t'))
tab(0)
}
if i := find(";", deutschwort) then {
deutschwort ? {
while *(dword := tab(find(";") | 0))>0 do {
=";"
tab(many(' '))
if gronk:=englishword[1:upto(';|', englishword)|0] then {
if *gronk>0 then {
eword := gronk
}
}
if /eword then stop("botched eword for dword ",image(dword)," in\n", s,"\n with remaining english of ", image(englishword))
if eword === "" then write("empty eword for ", dword, " in:\n", s)
put(subentries, DeenEntry(dword, eword))
englishword ?:= { =eword; =";"; tab(many(' ')); tab(0)}
}
}
}
else {
put(subentries, DeenEntry(deutschwort, englishword))
}
}
}
}
return subentries
end
class DeenDictionary : dictionary()
method read()
self.buffer.read()
tmp := table()
every line := !text do
line ? {
if ="#" | line=="" then next
if not (L := get_entries(line)) then
stop("get_entries failing on ", image(line))
every x := !L do {
if not member(tmp, x.word) then
tmp[x.word] := [x]
else put(tmp[x.word], x)
}
}
text := tmp
end
initially
if stat("de-en.txt") then filename := "de-en.txt"
else if stat("de-en.txt.gz") then filename := "de-en.txt.gz"
else filename := DEEN
self.read()
end
This page produced by UniDoc on 2021/04/15 @ 23:59:43.