############################################################################
#
# File: getmail.icn
#
# Subject: Procedure to parse mail file
#
# Author: Charles Shartsis
#
# Date: August 19, 1996
#
############################################################################
#
# This file is in the public domain.
#
############################################################################
#
# The getmail procedure reads a Unix/Internet type mail folder
# and generates a sequence of records, one per mail message.
# It fails when end-of-file is reached. Each record contains the
# message header and message text components parsed into separate
# record fields. The entire uninterpreted message (header and text)
# are also stored in the record. See the description
# of message_record below.
#
# The argument to getmail is either the name of a mail folder or
# the file handle for a mail folder which has already been opened
# for reading. If getmail is resumed after the last message is
# generated, it closes the mail folder and returns failure.
#
# If getmail generates an incomplete sequence (does not close the
# folder and return failure) and is then restarted (not resumed)
# on the same or a different mail folder, the previous folder file
# handle remains open and inaccessible. This may be a problem if
# done repeatedly since there is usually an OS-imposed limit
# on number of open file handles. Safest way to use getmail
# is using one of the below forms:
#
# message := message_record()
# every message := !getmail("folder_name") do {
#
# process message ...
#
# }
#
# message := message_record()
# coex := create getmail("folder_name")
# while message := @coex do {
#
# process message ...
#
# }
#
# Note that if message_record's are stored in a list, the records
# may be sorted by individual components (like sender, _date, _subject)
# using sortf function in Icon Version 9.0.
#
############################################################################
#
# Requires: Icon Version 9 or greater
#
############################################################################
record message_record(
# components of "From " line
sender, # E-Mail address of sender
dayofweek,
month,
day,
time,
year,
# selected message header fields
# The following record fields hold the contents of common
# message header fields. Each record field contains the
# corresponding message field's body (as a string) or a null indicating
# that no such field was present in the header.
# Note that a list of message_record's
# can be sorted on any of these fields using the sortff function.
# The record field name is related to the message header field name
# in the following way:
#
# record_field_name := "_" ||
# map(message_header_field_name, &ucase || "-", &lcase || "_")
#
# Thus the "Mime-Version" field body is stored in the _mime_version
# record field. Multiline message header fields are "unfolded"
# into a single line according to RFC 822. The message field
# name, the following colon, and any immediately following
# whitespace are stripped from the beginning of the
# record field. E.g., if a header contains
#
# Mime-Version: 1.0
#
# then
#
# message._mime_version := "1.0"
#
# The "Received:" field is handled differently from the other
# fields since there are typically multiple occurrences of it
# in the same header. The _received record field is either null or
# contains a list of "Received:" fields. The message field names
# are NOT stripped off. Thus
#
# Received: from relay4.UU.NET by mail.netcom.com (8.6.12/Netcom)
# id PAA10801; Sun, 28 May 1995 15:24:17 -0700
# Received: from alterdial.UU.NET by relay4.UU.NET with SMTP
# id QQyrsr05731; Sun, 28 May 1995 18:17:45 -0400
#
# get stored as:
# message._received :=
# ["Received: from relay4.UU.NET by mail.netcom.com (8.6.12/Netcom) id etc...",
# "Received: from alterdial.UU.NET by relay4.UU.NET with SMTP id etc..."]
_return_path,
_received,
_date,
_message_id,
_x_sender,
_x_mailer,
_mime_version,
_content_type,
_to,
_from,
_subject,
_status,
_x_status,
_path,
_xref,
_references,
_errors_to,
_x_lines,
_x_vm_attributes,
_reply_to,
_newsgroups,
_content_length,
# The "other" field gets all the message header fields for which we have not set up
# a specific record field. The "other" record field either contains null
# or a list of header fields not stored in the previous fields.
# Message field names are NOT stripped off field bodies before being stored.
# If there are multiple occurrences of the previously selected fields
# (except _received which is assumed to occur multiple times), then
# the first occurrence is stored in the appropriate record field from
# the list above while subsequent occurences in the same header are
# stored as separate list elements in the "other" record field.
# E.g., the following header fields:
#
# ...
# Whatever: Hello
# Status: RO
# Status: XX
# Status: YY
# ...
#
# would be stored as
#
# message._status := "RO"
# message.other :=
# [..., "Whatever: Hello", "Status: XX", "Status: YY", ...]
other,
# The message text
# This field is either null or a list of lines comprising
# the message text.
message_text,
# The entire message - header and text
# This field contains a list of uninterpreted lines (no RFC 822 unfolding)
# comprising the raw message.
all
)
# getmail SEQ
procedure getmail(folder_name)
local folder, line, message, ws, item_tag, first_item_value, tag_field
local time, message_text, unfolded_line
ws := ' \t'
if type(folder_name) == "file" then
folder := folder_name
else
folder := open(folder_name, "r") |
stop("Could not open ", folder_name)
line := read(folder) | &null
# body ITR UNTIL EOF
until /line do {
# message SEQ
message := message_record()
every !message := &null
# header SEQ
# from-line SEQ
message.all := []
put(message.all, line)
line ? (
="From" & tab(many(ws)) &
message.sender <- tab(many(~ws)) & tab(many(ws)) &
message.dayofweek <- tab(many(&letters)) & tab(many(ws)) &
message.month <- tab(many(&letters)) & tab(many(ws)) &
message.day <- tab(many(&digits)) & tab(many(ws)) &
message.time <- match_time() & tab(many(ws)) &
message.year <- match_year()
) |
stop("Invalid first message header line:\n", line)
line := read(folder) | &null
# from-line END
# header-fields ITR UNTIL EOF or blank-line or From line
until /line | line == "" | is_From_line(line) do {
# header-field SEQ
# first-line SEQ
put(message.all, line)
# process quoted EOL character
if line[-1] == "\\" then
line[-1] := "\n"
unfolded_line := line
line := read(folder) | &null
# first-line END
# after-lines ITR UNTIL EOF or line doesn't start with ws or
# blank-line or From line
until /line | not any(ws, line) | line == "" | is_From_line(line) do {
# after-line SEQ
put(message.all, line)
# process quoted EOL character
if line[-1] == "\\" then
line[-1] := "\n"
if unfolded_line[-1] == "\n" then
line[1] := ""
unfolded_line ||:= line
line := read(folder) | &null
# after-line END
# after-lines END
}
process_header_field(message, unfolded_line)
# header-field END
# header-fields END
}
# header END
# post-header ALT if blank line
if line == "" then {
# optional-message-text SEQ
# blank-line SEQ
put(message.all, line)
line := read(folder) | &null
# blank-line END
# message-text ITR UNTIL EOF or From line
until /line | is_From_line(line) do {
# message-text-line SEQ
put(message.all, line)
/message.message_text := []
put(message.message_text, line)
line := read(folder) | &null
# message-text-line END
# message-text END
}
# optional-message-text END
# post-header ALT default
} else {
# post-header END
}
suspend message
# message END
# body END
}
if folder ~=== &input then
close(folder)
# getmail END
end
#############################################################################
# procedure is_From_line
#############################################################################
procedure is_From_line(line)
return line ? ="From "
end
#############################################################################
# procedure match_time
#############################################################################
procedure match_time()
suspend tab(any(&digits)) || tab(any(&digits)) || =":" ||
tab(any(&digits)) || tab(any(&digits)) || =":" ||
tab(any(&digits)) || tab(any(&digits))
end
#############################################################################
# procedure match_year
#############################################################################
procedure match_year()
suspend tab(any(&digits)) || tab(any(&digits)) ||
tab(any(&digits)) || tab(any(&digits))
end
#############################################################################
# procedure mfield_to_rfield_name
#############################################################################
procedure mfield_to_rfield_name(mfield_name)
static mapfrom, mapto
initial {
mapfrom := &ucase || "-"
mapto := &lcase || "_"
}
return "_" || map(mfield_name, mapfrom, mapto)
end
#############################################################################
# procedure process_header_field
#############################################################################
procedure process_header_field(message, field)
local record_field_name, header_field_name, field_body
static field_chars, ws
# header field name can have ASCII 33 through 126 except for colon
initial {
field_chars := cset(string(&ascii)[34:-1]) -- ':'
ws := ' \t'
}
field ? (
header_field_name <- tab(many(field_chars)) & =":" &
(tab(many(ws)) | "") &
field_body <- tab(0)
) |
stop("Invalid header field:\n", field)
record_field_name := mfield_to_rfield_name(header_field_name)
# This is one of the selected fields
if message[record_field_name] then {
# Its a "Received" field
if record_field_name == "_received" then {
# Append whole field to received field list
/message._received := []
put(message._received, field)
# Not a "Received" field
} else {
# First occurrence in header of selected field
if /message[record_field_name] then {
# Assign field body to selected record field
message[record_field_name] := field_body
# Subsequent occurrence in header of selected field
} else {
# Append whole field to other field list
/message.other := []
put(message.other, field)
}
}
# Not a selected field
} else {
# Append whole field to other field list
/message.other := []
put(message.other, field)
}
end
#############################################################################
This page produced by UniDoc on 2021/04/15 @ 23:59:45.