komimportmail

#!/usr/bin/env python
# -*- python -*- -*- coding: iso-8859-1 -*-
# LysKOM email import

# $Id: komimportmail,v 1.55 2008/03/16 19:25:48 kent Exp $
# (C) 1999 Kent Engström. Released under GPL.

# Note: Python 2.2.2+ is needed because of email

import sys
import time
import email, email.Utils, email.Header
from email.Iterators import typed_subpart_iterator
import lockingshelve
import kom
import os
from komimportmail_config import *
import types
import re
import getopt
import traceback

# Get revision number from RCS/CVS
vc_revision = "$Revision: 1.55 $"
revision = vc_revision[11:-2]

# Error reporting: send message to stderr and logfile, exit with right code
def error_exit(str, exit_code):
    line = "FATAL: " + str
    sys.stderr.write(line + "\n")
    log(line + " (exit: %d)" % exit_code, LOG_ERROR)
    sys.exit(exit_code)

# Exit codes from sendmail-8.9.3/src/sysexits.c:
EX_OK = 0
EX_USAGE = 64
EX_DATAERR = 65
EX_NOUSER = 67
EX_UNAVAILABLE = 69 # Catchall
EX_TEMPFAIL = 75 # This is the key to making sendmail requeue a mail!

# Log levels
log_levels = {0: "ERROR",
              1: "WARNING",
              2: "INFO",
              3: "DEBUG"}

LOG_ERROR = 0
LOG_WARNING = 1
LOG_INFO = 2
LOG_DEBUG = 3

def find_log_level(name):
    name = name.upper()
    for (l, n) in log_levels.items():
        if name == n:
            return l
    raise ValueError

# Logging
log_file = None
log_pid = None
def log(line, level):
    global log_file
    global log_pid
    
    if log_file is None:
        log_file = open(LOG_FILE, "a")

    if log_pid is None:
        log_pid = os.getpid()

    if level <= log_level:
        log_file.write("%d: %s\n" % (log_pid, line))
        log_file.flush()

# Newline removal:
# newline (+ any following space/tab whitespace) becomes a single space
def remove_newlines(s):
    return re.sub("\n[ \t]*", " ", s)

# Class to handle Message-ID:s

class MessageIDDb:
    def __init__(self, filename):
        self.filename = filename
        self.db = lockingshelve.LockingShelve(filename) # May raise exc.

    # Return text_no corresponding to Message-ID or
    # None if the text hasn't been entered yet
    def get_text_no_from_id(self, id):
        try:
            data = self.db[id]
            if type(data) <> types.TupleType:
                return None
            (text_no, added_time_t) = data
            return text_no
        except:
            return None

    # Return list of text_nos wanting to comment a Message-ID or
    # None if no such data is present
    def get_texts_commenting(self, id):
        try:
            data = self.db[id]
            if type(data) <> types.ListType:
                return None
            return data
        except:
            return None

    # Register that Message-ID has been imported as text_no
    def register_text(self, id, text_no):
        self.db[id] = (text_no, int(time.time()))

    # Register that Message-ID should be commented by text_no
    # if it is seen later.
    def register_link(self, id, text_no):
        shelf = self.db.exclusive_open()
        try:
            try:
                text_nos = shelf[id]
            except:
                text_nos = []
            if type(text_nos) <> types.ListType:
                return
            text_nos.append(text_no)
            shelf[id] = text_nos
        finally:
            self.db.exclusive_close()

    # Garbage collect the Message-ID database
    def gc_message_id_db(self):
        log("Garbage collection started", LOG_INFO)
        s = self.db.exclusive_open()
    
        ctr_left = 0
        ctr_gone = 0
        id_list = s.keys()

        log("Will check %d articles" % (len(id_list)),
            LOG_INFO)
        for id in id_list:
            data = s[id]
            if type(data) == types.TupleType:
                # Normal Message-ID --> text_no
                (text_no, created) = data
                try:
                    ts = c.textstats[text_no] # Use the cache
                    ctr_left = ctr_left + 1
                except kom.NoSuchText:
                    ctr_gone = ctr_gone + 1
                    log("GC %7d: %s" % (text_no, id), LOG_INFO)
                    del s[id]
            elif type(data) == types.ListType:
                # Backwards Message-ID --> [text_no_list]
                all_texts_gone = 1
                for text_no in data:
                    try:
                        ts = c.textstats[text_no] # Use the cache
                        all_texts_gone = 0
                        break
                    except kom.NoSuchText:
                        pass
                if all_texts_gone:
                    ctr_gone = ctr_gone + 1
                    log("GC BACK   : %s" % (id), LOG_INFO)
                    del s[id]
                else:
                    ctr_left = ctr_left + 1
            else:
                log("%s: unknown type, cannot GC" % (id), LOG_WARNING)
                ctr_left = ctr_left + 1
                
        self.db.exclusive_close()
        log("Garbage collection finished (left: %d, gone: %d)" % \
                (ctr_left, ctr_gone),
            LOG_INFO)

    # Dump Message-ID database to stdout
    def dump(self):
        s = self.db.shared_open()
    
        id_list = s.keys()
        for id in id_list:
            data = s[id]
            if type(data) == types.TupleType:
                (text_no, created) = data
                print "%s --> %d" % (id, text_no)
            elif type(data) == types.ListType:
                print "%s <== %s" % (id, data)
            else:
                print "%s STRANGE DATA" % (id)

        self.db.shared_close()

# Create an article or add recipients to an existing one
def create_article_or_add_recipients(mail,
                                     envelope_sender,
                                     envelope_recipients,
                                     with_hdr):

    # Log start
    log("Starting on %s." % time.ctime(time.time()), LOG_INFO)

    # Get Message-ID from mail
    if "Message-ID" in mail:
        message_id = last_message_id_in(mail["Message-ID"],
                                        fail_on_bad_format = False)
    else:
        message_id = None

    if message_id is None or message_id == "<>":
        error_exit("No or empty message ID in mail", EX_DATAERR)

    # We do not touch something exported by an exporter if we
    # recognize it!
    exported_article_no = exporter_id_to_article_no(message_id)
    if exported_article_no is not None:
        log("Not touching exported mail with Message-ID: %s" % message_id, LOG_INFO)
        return
    
    log("Handling mail with Message-ID: %s" % message_id, LOG_INFO)

    # Lookup Message-ID -> Text-No (fails if this is a new mail)
    existing_text_no = message_id_db.get_text_no_from_id(message_id)
    if existing_text_no is None:
        log("This mail is new", LOG_INFO)
    else:
        log("This is existing article %d" % existing_text_no, LOG_INFO)

    # Map recipient names to conference numbers
    recipient_list = name_list_to_conf_no_list(envelope_recipients)
    if len(recipient_list) == 0:
        error_exit("No valid recipients found", EX_NOUSER)

    # Remove recipients based on the import restrictions aux-items
    # mx-refuse-import and mx-allow-envelope-sender-regexp
    recipient_list = filter_recipients(recipient_list,
                                       mail,
                                       envelope_sender)
    if len(recipient_list) == 0:
        log("No recipients left after import restrictions handling", LOG_INFO)
        return

    # Choose function
    if existing_text_no is None:
        # We should create a new article based on this mail
        create_article(mail, envelope_sender, recipient_list,
                       message_id, with_hdr)
    else:
        # We should add new recipients to this article
        add_recipients(mail, envelope_sender, recipient_list,
                       message_id, existing_text_no)


# Create a new article based on this mail
def create_article(mail, envelope_sender, envelope_recipients,
                   message_id, with_hdr):

    # Threading
    
    parent_message_id = message_id_of_parent(mail)
    if parent_message_id <> None:
        # If the parent seems to be an exported text, we will
        # bypass "normal" handling!
        parent_text_no = exporter_id_to_article_no(parent_message_id)
        if parent_text_no is not None:
            log("Comment to %s (exported article %d) " % \
                (parent_message_id, parent_text_no), LOG_INFO)
        else:
            parent_text_no = message_id_db.get_text_no_from_id(parent_message_id)
            if parent_text_no is not None:
                log("Comment to %s (article %d)" % \
                    (parent_message_id, parent_text_no), LOG_INFO)
            else:
                log("Comment to %s (not present in KOM?)" % \
                    (parent_message_id), LOG_INFO)
    else:
        # No threading
        parent_text_no = None

    # MIME multipart handling.  We choose not to preserve the exact
    # hierachical structure of the MIME message itself, as that would
    # lead to a situation where the articles corresponding to the
    # "inner nodes", e.g. multipart/mixed, would be empty and serve
    # only as placeholders for comments. Instead, we do something
    # really simple: We get all discrete parts in a linear list, and
    # let the first part be the main article. All remaining parts will
    # become comments to this article, regardless of the original
    # hierarchical structure in the MIME message.

    parts = linear_list_of_discrete_parts(mail)
    log("Number of MIME parts: %d" % len(parts), LOG_INFO)
    
    # Handle the first (perhaps only) part
    text_no = create_article_part(mail,
                                  envelope_sender,
                                  envelope_recipients,
                                  message_id,
                                  parts[0],
                                  parent_message_id,
                                  parent_text_no,
                                  top = None,
                                  with_hdr = with_hdr)

    # Handle the rest of the parts as comments to the first part
    sub_text_nos = []
    for part in parts[1:]:
        sub_text_no = create_article_part(mail,
                                          envelope_sender,
                                          envelope_recipients,
                                          message_id,
                                          part,
                                          parent_message_id,
                                          parent_text_no,
                                          top = text_no)
        sub_text_nos.append(sub_text_no)

    # Now, we need to add an AI_MX_MIME_PART_IN to the first part
    # for each appendix, as we did not know the text numbers of the
    # appendices when we created the first part.
    
    if len(sub_text_nos) > 0:
        add_mx_mime_part_in_items(text_no, sub_text_nos)

    # Threading. If our Message-ID has a list of list of
    # test-that-wants-to-comment-us attached, process it now
    # Note that we must to this before entering our
    # Message-ID -> text_no mapping below.

    texts_commenting_us = message_id_db.get_texts_commenting(message_id)
    if texts_commenting_us is not None:
        add_comment_links(text_no, texts_commenting_us)

    # Threading. Register Message-ID -> our own text_no.
    try:
        message_id_db.register_text(message_id, text_no)
        log("Message-ID recorded in database.", LOG_INFO)
    except:
        log("Failed to record Message-ID in database.", LOG_WARNING)
        pass

    # Threading. If we are trying to comment a Message-ID that
    # is not yet present: register for future threading.
    if parent_message_id is not None and parent_text_no is None:
        log("Registering us as comment to non-present %s" %
            parent_message_id, LOG_INFO)
        message_id_db.register_link(parent_message_id, text_no)


# Create article part
# The "parent" argument tells us if this is the main article or
# an appendix. A lot of actions below depends on this.
def create_article_part(mail,
                        envelope_sender,
                        recipient_list,
                        message_id,
                        part,
                        parent_message_id = None,
                        parent_text_no = None,
                        top = None,
                        with_hdr = 0):

    # Prepare Misc-Info and Aux Item list for later user
    misc_info = kom.CookedMiscInfo()
    aux_items = []

    # Add recipients to Misc-Info
    for conf_no in recipient_list:
        mir = kom.MIRecipient(type = kom.MIR_TO, recpt = conf_no)
        misc_info.recipient_list.append(mir)

    # Threading of main article based on In-Reply-To/References
    if top is None and parent_text_no is not None:
        mic = kom.MICommentTo(kom.MIC_COMMENT, parent_text_no)
        misc_info.comment_to_list.append(mic)

    # Threading of appendices as comments (or footnotes) to the
    # main article. We mark the appendix with a special aux-item,
    # mx-mime-belongs-to to designate it as a MIME appendix.
    if top is not None:
        mic = kom.MICommentTo(APPENDIX_COMMENT_TYPE, top)
        misc_info.comment_to_list.append(mic)
        aux_items.append(kom.AuxItem(kom.AI_MX_MIME_BELONGS_TO, str(top)))
        
    # Let the world know about this marvelous creating software :-)
    aux_items.append(kom.AuxItem(kom.AI_CREATING_SOFTWARE,
                                 "komimportmail %s" % revision))

    # Handle envelope information (creating aux-items)
    # We only put these aux-items on the main article (not on appendices).

    if top is None:
        aux_items.append(kom.AuxItem(kom.AI_MX_ENVELOPE_SENDER,
                                     envelope_sender))
        # Currently, the envelope recipients are not stored anywhere.
        # Perhaps that would be useful later on if the recognized
        # adresses becomes more advanced than simple integers.

    # Handle mail headers (creating aux-items)
    # We now put these aux-items on the main article and the appendices.

    # Aux-items for non-address single header fields

    # Message-ID
    ai = kom.AuxItem(kom.AI_MX_MESSAGE_ID, message_id)
    aux_items.append(ai)

    # In-Reply-To information
    if parent_message_id is not None:
        ai = kom.AuxItem(kom.AI_MX_IN_REPLY_TO, parent_message_id)
        aux_items.append(ai)

    # Aux-items for adress header fields come in three formats:
    #
    # 1) Complete address including name (mailbox in the
    #    terminology of draft-ietf-drums-msg-fmt-07.txt)
    #    Example: "Joe Q. Public" <john.q.public@example.com>
    #         or  john.q.public@example.com
    #
    # 2) Just the bare email address (called addr-spec)
    #    Example: john.q.public@example.com
    # 
    # 3) Just the name (a display-name in the drums specification):
    #    Example: Joe Q. Public
    # 
    # The rationale for using different formats for different aux-items
    # is that the LysKOM clients should not have to be able to
    # parse addresses in order to use Reply-To adresses, etc.

    for (header_name, aux_item_1,   aux_item_2,         aux_item_3) in [
        ("From",      None,         kom.AI_MX_FROM,     kom.AI_MX_AUTHOR),
        ("To",        kom.AI_MX_TO, None,               None),
        ("Cc",        kom.AI_MX_CC, None,               None),
        ("Reply-To",  None,         kom.AI_MX_REPLY_TO, None),
        ]:

        for (display_name, addr_spec) in \
            email.Utils.getaddresses(mail.get_all(header_name, [])):
            # Remove outer double qoutes and RFC2047 coding if
            # present in the display name.                
            if display_name[0:1] == '"' and display_name[-1:0] == '"':
                display_name = display_name[1:-1]
            display_name = rfc2047_decode_lossy_to_iso_8859_1(display_name)

            # Recreate mailbox
            if display_name:
                mailbox = "%s <%s>" % (display_name, addr_spec)
            else:
                mailbox = addr_spec

            # Add aux-items of the correct type
            if aux_item_1:
                aux_items.append(kom.AuxItem(aux_item_1, mailbox))
            if aux_item_2:
                aux_items.append(kom.AuxItem(aux_item_2, addr_spec))
            if aux_item_3 and display_name <> "":
                aux_items.append(kom.AuxItem(aux_item_3, display_name))

    # Date
    if "Date" in mail and not SKIP_AI_MX_DATE:
        parsed_date = email.Utils.parsedate_tz(mail["Date"])
        if parsed_date is not None:
            try:
                tz_mins = parsed_date[9] / 60
                if tz_mins == 0:
                    tz = "+0000"
                elif tz_mins > 0:
                    tz = "+%02d%02d" % (tz_mins/60, tz_mins % 60)
                else:
                    tz_mins = -tz_mins
                    tz = "-%02d%02d" % (tz_mins/60, tz_mins % 60)
            except:
                tz = "-0000" # TZ not known
                
            date = "%04d-%02d-%02d %02d:%02d:%02d %s" % \
                   (parsed_date[0], parsed_date[1], parsed_date[2],
                    parsed_date[3], parsed_date[4], parsed_date[5],
                    tz)
            aux_items.append(kom.AuxItem(kom.AI_MX_DATE, date))

    # The complete set of mail headers (no decoding of these)
    # This one should not be put on the appendices:
    if top is None:
        header_str_list = map(lambda a: a[0] + ": " + a[1], mail.items())
        aux_items.append(kom.AuxItem(kom.AI_MX_MISC,
                                     "\n".join(header_str_list)))


    # Handle MIME headers (creating aux-items)
    # We put these aux-items on the main article and the appendices
    
    # MIME Content type
    # The server rejects bad content-types that does not contain a slash,
    # so we don't try to feed it such bogosities.

    mime_type = part.get("Content-Type", "text/plain").lower()
    if mime_type.find("/") == -1:
        log("Bad MIME type %s not used" % mime_type, LOG_WARNING)
    else:
        aux_items.append(kom.AuxItem(kom.AI_CONTENT_TYPE,
                                     mime_type))
        log("MIME type of this part: %s" % mime_type, LOG_INFO)

    # Get the charset from the mime_type (None if not given)
    charset = part.get_param("charset")
    log("Charset of this part: %s" % charset, LOG_INFO)
    
    # MIME filename (Name parameter of Content-Type header)
    mime_filename = part.get_param("name")
    if mime_filename:
        aux_items.append(kom.AuxItem(kom.AI_MX_MIME_FILE_NAME,
                                     mime_filename))

    # The complete set of MIME part headers (no decoding of these)
    # For the first part, the MIME headers are mixed together
    # with the mail headers. Therefore, we try to remove
    # non-MIME-headers here.

    aux_items.append(kom.AuxItem(kom.AI_MX_MIME_MISC, \
                                 "\n".join(only_mime_headers(part))))

    # Subject
    if "Subject" in mail:
        subject = rfc2047_decode_to_unicode(mail["Subject"])
    else:
        subject = ""

    # The subject of an appendix is slightly modified 
    if top is not None:
        if mime_filename:
            subject = APPENDIX_SUBJECT_PREFIX % mime_filename + subject
        else:
            subject = APPENDIX_SUBJECT_PREFIX_NONAME + subject

    # Encode subject to match th ebody
    subject = best_effort_encode_subject_to_charset(subject, charset)
    subject = remove_newlines(subject)

    # Try to create article part
    payload = part.get_payload(decode=True)

    # Searching for a bug: we sometimes seem to get None from
    # get_payload, although is_multipart is False.
    # This happens for the text/plain parts of message/delivery-status.
    # Workaround: use str(msg) instead of msg.get_payload(decode=True)
    # as a fallback.
    if payload is None:
        payload = str(part)
        if payload is None:
            payload = ""
            log("get_payload workaround failed", LOG_WARNING)

    if with_hdr:
        payload = createBodyHeaders(mail, part, top) + payload
    text_no = send_to_kom(subject, payload, misc_info, aux_items)
    return text_no

def createBodyHeaders(mail, part, top):
    ret = ""
    if top == None:
	if "From" in mail:
	    ret = ret + "From: %s\n" % rfc2047_decode_lossy_to_iso_8859_1(mail["From"])
	if "To" in mail:
	    ret = ret + "To: %s\n" % rfc2047_decode_lossy_to_iso_8859_1(mail["To"])
	if "Cc" in mail:
	    ret = ret + "Cc: %s\n" % rfc2047_decode_lossy_to_iso_8859_1(mail["CC"])
	if "Message-ID" in mail:
	    ret = ret + "Message-ID: %s\n" % mail["Message-ID"]
    if "" != ret:
        ret = ret + "\n"
    return ret
    

# Send a text (subject, body, Misc-Info and Aux-Items) to KOM.
# Destructively prune comment-to/footnote-to Misc-Info items
# if they seem to cause problems.
# Also, replace too large bodies with an error indication.
# Return text_no or throw exception.
def send_to_kom(subject, body, misc_info, aux_items):
    log("send_to_kom with aux_items: %s" % aux_items, LOG_DEBUG)
    attempting_to_send = 1
    while attempting_to_send:
        try:
            text_no = kom.ReqCreateText(c,
                                        subject + "\n" + body,
                                        misc_info, aux_items).response()
            log("Article %d created" % text_no, LOG_INFO)
            attempting_to_send = 0
        except kom.StringTooLong, error_status:
            # Replace the body with an error indication
            body = "[komimportmail Error: The LysKOM server thinks %d kB is too big.]" % (len(body) / 1024)
        except kom.NoSuchText, error_status:
            # We assume this is because of a bad Comment-To link
            # (perhaps the text we are trying to comment has been
            # deleted). Remove the offending Misc-Info entry and
            # try again, if possible.
            attempting_to_send = 0
            try:
                bad_text_no = int(str(error_status))
                log("Article creation failed as commented article %d does not exist" % bad_text_no, LOG_WARNING)
                for i in range(0,len(misc_info.comment_to_list)):
                    if misc_info.comment_to_list[i].text_no == bad_text_no:
                        del misc_info.comment_to_list[i]
                        attempting_to_send = 1
                        log("Retrying with offending misc-item removed", LOG_DEBUG)
                        break
            except:
                # If we fail to remove the offending Misc-Info item,
                # we raise an exception
                raise kom.NoSuchText, bad_text_no # hope it was set

        except kom.AuxItemPermission, error_status:
            # Debugging!
            log("Bad aux-item is: %s" % error_status, LOG_ERROR)
            raise

    return text_no

# Add an AI_MX_MIME_PART_IN to the first part
# for each appendix.
def add_mx_mime_part_in_items(parent_no, child_nos):
    aux_items = []
    for child_no in child_nos:
        aux_items.append(kom.AuxItem(kom.AI_MX_MIME_PART_IN, str(child_no)))
    try:
        kom.ReqModifyTextInfo(c, parent_no, [], aux_items)
    except kom.ServerError:
        log("Failed (%s) to add mx-mime-part-in items" % \
            (sys.exc_info()[0]), LOG_WARNING)
        

# Add more recipients to this article
# We do not add new aux-items, even if some headers may be different
# in this copy (e.g. the "Received:" trace lines).
def add_recipients(mail, envelope_sender, recipient_list,
                   message_id, existing_text_no):

    # Just loop and try to add the recipients, silently
    # ignoring any errors from the server.
    for recipient in recipient_list:
        try:
            kom.ReqAddRecipient(c,
                                existing_text_no,
                                recipient,
                                kom.MIR_TO).response()
            log("Added conference %d as recipient" % recipient, LOG_INFO)
        except kom.ServerError:
            log("Failed (%s) to add conference %d as recipient" % \
                (sys.exc_info()[0], recipient), LOG_WARNING)

# Add comment links to an article
def add_comment_links(text_no, commenting_text_nos):
    # Just loop and try to add the comments, silently
    # ignoring any errors from the server.
    for commenting_text_no in commenting_text_nos:
        try:
            kom.ReqAddComment(c,
                              commenting_text_no,
                              text_no).response()
            log("Added %s as comment to %d" % (commenting_text_no,
                                               text_no), LOG_INFO)
        except kom.ServerError:
            log("Failed (%s) to add %d as comment to %d" % \
                (sys.exc_info()[0], commenting_text_no, text_no),
                LOG_WARNING)

# Convert a list of recipient names to a list of conference numbers
# Note that the recipient names should not contain the @ or the domain
# part of the email address, just the local part.
#
# Allowable recipient name formats (in the order they are checked):
# 
# 1) An integer, or an integer prefixed by a single "p" or "P".
#    The conference/letterbox corresponding to this
#    conf_no is the recipient. Note that an invalid integer results
#    in a miss. No further alternatives are checked.
#
# 3) Any other string. Underscores are converted to spaces and the
#    name looked up using lookup-z-name. If there is exactly one match,
#    that conference/letterbox is choosen as recipient.

def name_list_to_conf_no_list(name_list):
    conf_no_list = []
    for name in name_list:

        # 1) Integer = conf_no
        try:
            conf_no = int(name)
        except:
            if name[0:1] in "pP":
                try:
                    conf_no = int(name[1:])
                except:
                    conf_no = None
            else:
                conf_no = None
                
        if conf_no:
            # We have an integer.
            # Do a simple existence check and add.
            try:
                conf_stat = c.conferences[conf_no]
                conf_no_list.append(conf_no)
                log("Numeric recipient %d seems to be OK" % conf_no, LOG_INFO)
            except:
                log("Numeric recipient %d ignored (conference not found)" % \
                    conf_no, LOG_WARNING)
            continue # No other formats will be checked

        # 2) Other string = name to look up
        changed_name = name.replace("_", " ").replace(".", " ")
        matches = c.lookup_name(changed_name,
                                want_pers = 1,
                                want_confs = 1)
        if len(matches) == 0:
            log("Name recipient '%s' gave no match" % name, LOG_WARNING)
        elif len(matches) > 1:
            log("Name recipient '%s' gave %d matches (ambiguous)" % \
                (name, len(matches)),
                LOG_WARNING)
        else:
            log("Name recipient '%s' matches %d: '%s'" % \
                (name, matches[0][0], matches[0][1]),
                LOG_INFO)
            conf_no_list.append(matches[0][0])            

    return conf_no_list

#
# Given a set of mail headers, guess the Message-ID this message
# is a comment to. If this does not appear to be a comment to
# anything, return None.
# Algorithm: choose last thing in <..> of In-Reply-To. If that
# header does not exist, try using References instead.

def message_id_of_parent(mail):
    for field_name in ["In-Reply-To", "References"]:
        if field_name in mail:
            msg_id = last_message_id_in(mail[field_name])
            if msg_id:
                log("Threading on %s: %s)" % \
                    (field_name, msg_id),
                    LOG_INFO)
                return msg_id
            else:
                log("No ID found in %s: '%s'" % \
                    (field_name, headers[field_name]),
                    LOG_DEBUG)
    log("No threading possible.", LOG_INFO)
    return None

def last_message_id_in(header, fail_on_bad_format = True):
    pos_lt = header.rfind("<")
    pos_gt = header.rfind(">")
    if -1 < pos_lt < pos_gt:
        return header[pos_lt:pos_gt+1] # Include "<" and ">" in ID
    else:
        if fail_on_bad_format:
            return None
        else:
            return header

# Try to convert a Message-ID from an exporter to 
# an article number. Return that number or None
# if there is no match (or the feature is not active)

def exporter_id_to_article_no(id):
    if EXPORTER_ID_REGEXP is None: return None
    m = re.match(EXPORTER_ID_REGEXP, id)
    if m:
        return int(m.group(1))
    else:
        return None

# Filter a list of headers, keeping only the MIME-related ones.
# Currently, we consider "MIME-Version" and any header beginning
# with "Content-" as MIME-related.

re_good_mime_header = re.compile("(^MIME-Version:|^Content-)",
                                 re.IGNORECASE)

def only_mime_headers(mail):
    ok_headers = []
    for (header, value) in mail.items():
        if re_good_mime_header.match(header):
            ok_headers.append(header + ": " + value)
    return ok_headers

#
# Filter recipient list to respect mx-refuse-import and
# mx-allow-envelope-sender-regexp
#

def filter_recipients(recipient_list, mail, envelope_sender):
    ok_to_import_to = []
    for conf_no in recipient_list:
        try:
            conf = c.conferences[conf_no]
        except kom.ServerError:
            log("Cannot get information about conf_no %d" % conf_no,
                LOG_WARNING)
            ok_to_import_to.append(conf_no) # Play it safe...
            continue

        if (check_mx_refuse_import(conf, mail) and
            check_mx_allow_envelope_sender_regexp(conf, envelope_sender)):
            ok_to_import_to.append(conf_no)

    return ok_to_import_to

#
# Does this recipient allow import of this mail based on
# mx-refuse-import?
#

def check_mx_refuse_import(conf, mail):
    for mxri in kom.all_aux_items_with_tag(conf.aux_items,
                                           kom.AI_MX_REFUSE_IMPORT):
        if mxri.data == "all":
            log("Unconditional mx-refuse-import on conference '%s'" % \
                (conf.name),
                LOG_DEBUG)
            return 0 # refuse
        elif mxri.data == "spam":
            # Check for X-Spam-Flag from SpamAssassin:
            x_spam_flag = mail.get("X-Spam-Flag", "NO")
            if x_spam_flag == "YES":
                log("Spam mx-refuse-import on conference '%s'" % \
                    (conf.name),
                    LOG_DEBUG)
                return 0 # refuse
        elif mxri.data == "html":
            # Check for text/html content-type on top level
            mime_type = mail.get_content_type()
            if mime_type == "text/html":
                log("HTML mx-refuse-import on conference '%s'" % \
                    (conf.name),
                    LOG_DEBUG)
                return 0 # refuse
        else:
            log("Unknown mx-refuse-import '%s' on conference '%s'" % \
                (mxri.data, conf.name),
                LOG_WARNING)

    return 1 # accept

#
# Does this recipient allow import of this mail based on
# mx-allow-envelope-sender-regexp?
#

def check_mx_allow_envelope_sender_regexp(conf, envelope_sender):
    items = kom.all_aux_items_with_tag(conf.aux_items,
                                       kom.AI_MX_ALLOW_ENVELOPE_SENDER_REGEXP)
    if len(items) == 0:
        return 1 # accept

    for mxaesr in items:
        try:
            if re.match(mxaesr.data, envelope_sender):
                return 1 # accept
        except:
            log("Bad envelope sender regexp ignored (%s)" % (mxaesr.data),
                LOG_WARNING)

    return 0 # refuse


# RFC2047 handling

def rfc2047_decode_to_unicode(header_value):
    out = []
    for (part, coding) in email.Header.decode_header(header_value):
        if coding is None:
            coding = "us-ascii"

        try:
            decoded_part = part.decode(coding)
        except (LookupError, UnicodeDecodeError):
            # Assume it is ISO-8859-1. Ugly...
            # We take this path both if the coding is unknown and
            # if the decoding fails (e.g. declared as ASCII but charcode>= 128)
            decoded_part = part.decode('iso-8859-1')

        out.append(decoded_part)
    return u" ".join(out)

def rfc2047_decode_lossy_to_iso_8859_1(header_value):
    u = rfc2047_decode_to_unicode(header_value)
    return u.encode("iso-8859-1", "replace")

def best_effort_encode_subject_to_charset(s, charset):
    if charset is None or charset.lower() == "us-ascii":
        charset = "iso-8859-1"
    
    try:
        return s.encode(charset, "replace")
    except LookupError:
        log("Cannot recode subject to charset %s (will use iso-8859-1)" % charset, LOG_WARNING)
        return s.encode("iso-8859-1", "replace")

# HTML handling

def remove_redundant_html(mail):
    # For the moment, concentrate on the most common case:
    # - a multipart/alternative with first text/plain, then text/html
    if mail.is_multipart():
        for alt in typed_subpart_iterator(mail,
                                          'multipart',
                                          'alternative'):
            parts = alt.get_payload()
            if len(parts) == 2 and \
               parts[0].get_content_maintype() == \
               parts[1].get_content_maintype() == "text" and \
               parts[0].get_content_subtype() == "plain" and \
               parts[1].get_content_subtype() == "html":
                del parts[1]

def linear_list_of_discrete_parts(mail):
    ll = []
    for p in mail.walk():
        if not p.is_multipart():
            ll.append(p)
    return ll

# MAIN

# Options totally changing the behaviour
#   --gc	Do a garbage collection
#   --dump	Dump Message-ID database
#
# Normal options:
#   --log-level=LEVEL
#		Set log level to (ERROR, WARNING, INFO, DEBUG)
#		Default is INFO
#   --with-hdr  Add Message-ID:, From: and To: to message body top.
#
# Arguments:
#   envelope-sender [envelope-recipient...]
#
# There should be at least one envelope-recipient.

FUNC_IMPORT_MAIL = 0; FUNC_GC = 1; FUNC_DUMP = 2
function = FUNC_IMPORT_MAIL
log_level = LOG_INFO
with_hdr = 0

try:
    options, arguments = getopt.getopt(sys.argv[1:],
                                       "",
                                       [
                                           "gc",
                                           "with-hdr",
                                           "dump",
                                           "log-level=",
                                           ])
except getopt.error, reason:
    error_exit("Usage error (%s)" % reason, EX_USAGE)

for (opt, optarg) in options:
    if opt == "--gc":
        function = FUNC_GC
    elif opt == "--dump":
        function = FUNC_DUMP
    elif opt == "--with-hdr":
        with_hdr = 1
    elif opt == "--log-level":
        try:
            new_log_level = find_log_level(optarg)
        except:
            error_exit("Bad --log-level=%s" % optarg, EX_USAGE)
        log_level = new_log_level
    else:
        error_exit("Bad option '%s'" % opt, EX_USAGE)

# Check for arguments

if function == FUNC_IMPORT_MAIL:
    if len(arguments) < 2:
        error_exit("Too few arguments (%d)" % len(arguments), EX_USAGE)
    else:
        envelope_sender = arguments[0]
        envelope_recipients = arguments[1:]

# Connect and log in

try:
    c = kom.CachedConnection(KOMSERVER, KOMPORT)
    try:
        kom.ReqLogin(c, KOMPERSON, KOMPASSSWORD, invisible=1).response()
    except:
        # Change to EX_TEMPFAIL if you consider this transient...
        error_exit("Failed to login to LysKOM server", EX_UNAVAILABLE)
except:
    error_exit("Failed to connect to LysKOM server", EX_TEMPFAIL)

# Prepare to use the Message-ID database

try:
    message_id_db = MessageIDDb(ID_DB_FILE)
except:
    # It's a bit too tough luck to be unsubscribed from a lot of
    # mailing lists because of this one
    error_exit("Cannot access Message-ID database", EX_TEMPFAIL)

# Choose function
if function == FUNC_GC:
    # Function: Garbage collection
    message_id_db.gc_message_id_db()
elif function == FUNC_DUMP:
    # Function: Dump
    message_id_db.dump()
elif function == FUNC_IMPORT_MAIL:
    # Function: Normal mail handling
    try:
        try:
            mail = email.message_from_file(sys.stdin)
        except RuntimeError:
            error_exit("RuntimeError, message probably malformed", EX_UNAVAILABLE)
            
        if REMOVE_REDUNDANT_HTML:
            remove_redundant_html(mail)
        create_article_or_add_recipients(mail,
                                         envelope_sender,
                                         envelope_recipients,
                                         with_hdr)
    except kom.ReceiveError:
        error_exit("Receive Error, try later", EX_TEMPFAIL)
    except kom.OutOfmemory:
        error_exit("Out of Memory (OutOfMemory), try later", EX_TEMPFAIL)
    except MemoryError:
        error_exit("MemoryError, try later", EX_TEMPFAIL)
    except kom.TemporaryFailure:
        error_exit("Temporary Failure, try later", EX_TEMPFAIL)
    except StandardError:
        # Log exception to file, instead of showing it in the bounce
        (t,v,tb) = sys.exc_info()
        tbs = "".join(traceback.format_exception(t,v,tb))
        log("Traceback: " + tbs, LOG_ERROR)
        sys.stderr.write("An unexpected error occurred. "
                         "The complete error message has been logged to a file "
                         "where the postmaster may read it.\n")
        sys.exit(EX_UNAVAILABLE)
else:
    # Function: ?
    error_exit("Internal error: unknown function", EX_USAGE)

# Exit successfully
log("Done on %s." % time.ctime(time.time()), LOG_INFO)
sys.exit(EX_OK)