Source file getmail.icn
############################################################################
#
#	File:     getmail.icn
#
#	Subject:  Procedure to parse mail file
#
#	Author:   Charles Shartsis
#
#	Date:     August 19, 1996
#
############################################################################
#
#   This file is in the public domain.
#
############################################################################
#
# The getmail procedure reads a Unix/Internet type mail folder
# and generates a sequence of records, one per mail message.
# It fails when end-of-file is reached.  Each record contains the
# message header and message text components parsed into separate
# record fields.  The entire uninterpreted message (header and text)
# are also stored in the record.  See the description
# of message_record below.
# 
# The argument to getmail is either the name of a mail folder or
# the file handle for a mail folder which has already been opened
# for reading.  If getmail is resumed after the last message is
# generated, it closes the mail folder and returns failure.
# 
# If getmail generates an incomplete sequence (does not close the
# folder and return failure) and is then restarted (not resumed)
# on the same or a different mail folder, the previous folder file
# handle remains open and inaccessible.  This may be a problem if
# done repeatedly since there is usually an OS-imposed limit
# on number of open file handles.  Safest way to use getmail
# is using one of the below forms:
# 
#     message := message_record()
#     every message := !getmail("folder_name") do {
#     
#             process message ...
#             
#     }
# 
#     message := message_record()
#     coex := create getmail("folder_name")
#     while message := @coex do {
#     
#             process message ...
#             
#     }
# 
# Note that if message_record's are stored  in a list, the records
# may be sorted by individual components (like sender, _date, _subject)
# using sortf function in Icon Version 9.0.
#     
############################################################################
#
#  Requires:  Icon Version 9 or greater
#
############################################################################

record message_record(

    # components of "From " line
    sender,         # E-Mail address of sender
    dayofweek,
    month,
    day,
    time,
    year,
    
    # selected message header fields
    
    # The following record fields hold the contents of common
    # message header fields.  Each record field contains the
    # corresponding message field's body (as a string) or a null indicating
    # that no such field was present in the header.
    # Note that a list of message_record's
    # can be sorted on any of these fields using the sortff function.
    # The record field name is related to the message header field name
    # in the following way:
    # 
    # record_field_name := "_" || 
    #     map(message_header_field_name, &ucase || "-", &lcase || "_")
    # 
    # Thus the "Mime-Version" field body is stored in the _mime_version
    # record field.  Multiline message header fields are "unfolded"
    # into a single line according to RFC 822.  The message field
    # name, the following colon, and any immediately following
    # whitespace are stripped from the beginning of the
    # record field.  E.g., if a header contains
    # 
    # Mime-Version:           1.0
    # 
    # then
    # 
    # message._mime_version := "1.0"
    # 
    # The "Received:" field is handled differently from the other
    # fields since there are typically multiple occurrences of it
    # in the same header. The _received record field is either null or
    # contains a list of "Received:" fields.  The message field names
    # are NOT stripped off.  Thus
    # 
    # Received: from relay4.UU.NET by mail.netcom.com (8.6.12/Netcom)
    #     id PAA10801; Sun, 28 May 1995 15:24:17 -0700
    # Received: from alterdial.UU.NET by relay4.UU.NET with SMTP 
    #     id QQyrsr05731; Sun, 28 May 1995 18:17:45 -0400
    # 
    # get stored as:
    # message._received :=
    # ["Received: from relay4.UU.NET by mail.netcom.com (8.6.12/Netcom)    id etc...",
    # "Received: from alterdial.UU.NET by relay4.UU.NET with SMTP     id etc..."]
     
    _return_path,
    _received,
    _date,
    _message_id,
    _x_sender,
    _x_mailer,
    _mime_version,
    _content_type,
    _to,
    _from,
    _subject,
    _status,
    _x_status,
    _path,
    _xref,
    _references,
    _errors_to,
    _x_lines,
    _x_vm_attributes,
    _reply_to,
    _newsgroups,
    _content_length,
    
    # The "other" field gets all the message header fields for which we have not set up
    # a specific record field.  The "other" record field either contains null
    # or a list of header fields not stored in the previous fields.
    # Message field names are NOT stripped off field bodies before being stored.
    # If there are multiple occurrences of the previously selected fields
    # (except _received which is assumed to occur multiple times), then 
    # the first occurrence is stored in the appropriate record field from
    # the list above while subsequent occurences in the same header are
    # stored as separate list elements in the "other" record field.
    # E.g., the following header fields:
    # 
    # ...
    # Whatever: Hello
    # Status: RO
    # Status: XX
    # Status: YY
    # ...
    # 
    # would be stored as
    # 
    # message._status := "RO"
    # message.other :=
    #     [..., "Whatever: Hello", "Status: XX", "Status: YY", ...]

    other,
    
    # The message text
    # This field is either null or a list of lines comprising
    # the message text.
    message_text,
    
    # The entire message - header and text
    # This field contains a list of uninterpreted lines (no RFC 822 unfolding)
    # comprising the raw message.
    
    all
    
)

# getmail SEQ
procedure getmail(folder_name)

    local folder, line, message, ws, item_tag, first_item_value, tag_field
    local time, message_text, unfolded_line
    
    ws := ' \t'
    
    if type(folder_name) == "file" then
        folder := folder_name
    else
        folder := open(folder_name, "r") |
            stop("Could not open ", folder_name)
    line := read(folder) | &null

    # body ITR UNTIL EOF
    until /line do {
        # message SEQ
            message := message_record()
            every !message := &null
            # header SEQ
                # from-line SEQ
                    message.all := []
                    put(message.all, line)
                    line ? (
                        ="From" & tab(many(ws)) &
                        message.sender <- tab(many(~ws)) & tab(many(ws)) &
                        message.dayofweek <- tab(many(&letters)) & tab(many(ws)) &
                        message.month <- tab(many(&letters)) & tab(many(ws)) &
                        message.day <- tab(many(&digits)) & tab(many(ws)) &
                        message.time <- match_time() & tab(many(ws)) &
                        message.year <- match_year()
                    ) |
                    stop("Invalid first message header line:\n", line)
                    line := read(folder) | &null
                # from-line END
                # header-fields ITR UNTIL EOF or blank-line or From line
                until /line | line == "" | is_From_line(line) do {
                    # header-field SEQ
                        # first-line SEQ
                            put(message.all, line)
                            # process quoted EOL character
                            if line[-1] == "\\" then
                                line[-1] := "\n"
                            unfolded_line := line
                            line := read(folder) | &null
                        # first-line END
                        # after-lines ITR UNTIL EOF or line doesn't start with ws or 
                        #               blank-line or From line
                        until /line | not any(ws, line) | line == "" | is_From_line(line) do {
                            # after-line SEQ
                                put(message.all, line)
                                # process quoted EOL character
                                if line[-1] == "\\" then
                                    line[-1] := "\n"
                                if unfolded_line[-1]  == "\n" then
                                    line[1] := ""
                                unfolded_line ||:= line
                                line := read(folder) | &null
                            # after-line END
                        # after-lines END
                        }
                        process_header_field(message, unfolded_line)
                    # header-field END
                # header-fields END
                }
            # header END
            # post-header ALT if blank line
            if line == "" then {
                # optional-message-text SEQ
                    # blank-line SEQ
                        put(message.all, line)
                        line := read(folder) | &null
                    # blank-line END
                    # message-text ITR UNTIL EOF or From line
                    until /line | is_From_line(line) do {
                        # message-text-line SEQ
                            put(message.all, line)
                            /message.message_text := []
                            put(message.message_text, line)
                            line := read(folder) | &null
                        # message-text-line END
                    # message-text END
                    }
                # optional-message-text END
            # post-header ALT default
            } else {
            # post-header END
            }
           suspend message
        # message END
    # body END
    }

    if folder ~=== &input then
        close(folder)
# getmail END
end

#############################################################################
#                   procedure is_From_line
#############################################################################

procedure is_From_line(line)

    return line ? ="From "

end

#############################################################################
#                   procedure match_time
#############################################################################

procedure match_time()

    suspend tab(any(&digits)) || tab(any(&digits)) || =":" ||
            tab(any(&digits)) || tab(any(&digits)) || =":" ||
            tab(any(&digits)) || tab(any(&digits))

end

#############################################################################
#                   procedure match_year
#############################################################################

procedure match_year()

    suspend tab(any(&digits)) || tab(any(&digits)) ||
            tab(any(&digits)) || tab(any(&digits))

end

#############################################################################
#                   procedure mfield_to_rfield_name
#############################################################################

procedure mfield_to_rfield_name(mfield_name)

    static mapfrom, mapto
    
    initial {
        mapfrom := &ucase || "-"
        mapto := &lcase || "_"
    }

    return "_" || map(mfield_name, mapfrom, mapto)

end

#############################################################################
#                   procedure process_header_field
#############################################################################

procedure process_header_field(message, field)

    local record_field_name, header_field_name, field_body
    static field_chars, ws
    
    # header field name can have ASCII 33 through 126 except for colon
    initial {
        field_chars := cset(string(&ascii)[34:-1]) -- ':'
        ws := ' \t'
    }
    
    field ? (
        header_field_name <- tab(many(field_chars)) & =":" &
        (tab(many(ws)) | "") &
        field_body <- tab(0)
    ) |
    stop("Invalid header field:\n", field)
    record_field_name := mfield_to_rfield_name(header_field_name)
    
    # This is one of the selected fields
    if message[record_field_name] then {
    
        # Its a "Received" field
        if record_field_name == "_received" then {
            # Append whole field to received field list
            /message._received := []
            put(message._received, field)
        
        # Not a "Received" field
        } else {
        
            # First occurrence in header of selected field
            if /message[record_field_name] then {
                # Assign field body to selected record field
                message[record_field_name] := field_body
            
            # Subsequent occurrence in header of selected field
            } else {
                # Append whole field to other field list
                /message.other := []
                put(message.other, field)
            }
        }
    
    # Not a selected field
    } else {
                # Append whole field to other field list
                /message.other := []
                put(message.other, field)
    }
    
end

#############################################################################


This page produced by UniDoc on 2021/04/15 @ 23:59:45.