-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkomimportmail
executable file
·1052 lines (906 loc) · 37.3 KB
/
komimportmail
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# -*- python -*- -*- coding: iso-8859-1 -*-
# LysKOM email import
# $Id: komimportmail,v 1.55 2008/03/16 19:25:48 kent Exp $
# (C) 1999 Kent Engström. Released under GPL.
# Note: Python 2.2.2+ is needed because of email
import sys
import time
import email, email.Utils, email.Header
from email.Iterators import typed_subpart_iterator
import lockingshelve
import kom
import os
from komimportmail_config import *
import types
import re
import getopt
import traceback
# Get revision number from RCS/CVS
vc_revision = "$Revision: 1.55 $"
revision = vc_revision[11:-2]
# Error reporting: send message to stderr and logfile, exit with right code
def error_exit(str, exit_code):
line = "FATAL: " + str
sys.stderr.write(line + "\n")
log(line + " (exit: %d)" % exit_code, LOG_ERROR)
sys.exit(exit_code)
# Exit codes from sendmail-8.9.3/src/sysexits.c:
EX_OK = 0
EX_USAGE = 64
EX_DATAERR = 65
EX_NOUSER = 67
EX_UNAVAILABLE = 69 # Catchall
EX_TEMPFAIL = 75 # This is the key to making sendmail requeue a mail!
# Log levels
log_levels = {0: "ERROR",
1: "WARNING",
2: "INFO",
3: "DEBUG"}
LOG_ERROR = 0
LOG_WARNING = 1
LOG_INFO = 2
LOG_DEBUG = 3
def find_log_level(name):
name = name.upper()
for (l, n) in log_levels.items():
if name == n:
return l
raise ValueError
# Logging
log_file = None
log_pid = None
def log(line, level):
global log_file
global log_pid
if log_file is None:
log_file = open(LOG_FILE, "a")
if log_pid is None:
log_pid = os.getpid()
if level <= log_level:
log_file.write("%d: %s\n" % (log_pid, line))
log_file.flush()
# Newline removal:
# newline (+ any following space/tab whitespace) becomes a single space
def remove_newlines(s):
return re.sub("\n[ \t]*", " ", s)
# Class to handle Message-ID:s
class MessageIDDb:
def __init__(self, filename):
self.filename = filename
self.db = lockingshelve.LockingShelve(filename) # May raise exc.
# Return text_no corresponding to Message-ID or
# None if the text hasn't been entered yet
def get_text_no_from_id(self, id):
try:
data = self.db[id]
if type(data) <> types.TupleType:
return None
(text_no, added_time_t) = data
return text_no
except:
return None
# Return list of text_nos wanting to comment a Message-ID or
# None if no such data is present
def get_texts_commenting(self, id):
try:
data = self.db[id]
if type(data) <> types.ListType:
return None
return data
except:
return None
# Register that Message-ID has been imported as text_no
def register_text(self, id, text_no):
self.db[id] = (text_no, int(time.time()))
# Register that Message-ID should be commented by text_no
# if it is seen later.
def register_link(self, id, text_no):
shelf = self.db.exclusive_open()
try:
try:
text_nos = shelf[id]
except:
text_nos = []
if type(text_nos) <> types.ListType:
return
text_nos.append(text_no)
shelf[id] = text_nos
finally:
self.db.exclusive_close()
# Garbage collect the Message-ID database
def gc_message_id_db(self):
log("Garbage collection started", LOG_INFO)
s = self.db.exclusive_open()
ctr_left = 0
ctr_gone = 0
id_list = s.keys()
log("Will check %d articles" % (len(id_list)),
LOG_INFO)
for id in id_list:
data = s[id]
if type(data) == types.TupleType:
# Normal Message-ID --> text_no
(text_no, created) = data
try:
ts = c.textstats[text_no] # Use the cache
ctr_left = ctr_left + 1
except kom.NoSuchText:
ctr_gone = ctr_gone + 1
log("GC %7d: %s" % (text_no, id), LOG_INFO)
del s[id]
elif type(data) == types.ListType:
# Backwards Message-ID --> [text_no_list]
all_texts_gone = 1
for text_no in data:
try:
ts = c.textstats[text_no] # Use the cache
all_texts_gone = 0
break
except kom.NoSuchText:
pass
if all_texts_gone:
ctr_gone = ctr_gone + 1
log("GC BACK : %s" % (id), LOG_INFO)
del s[id]
else:
ctr_left = ctr_left + 1
else:
log("%s: unknown type, cannot GC" % (id), LOG_WARNING)
ctr_left = ctr_left + 1
self.db.exclusive_close()
log("Garbage collection finished (left: %d, gone: %d)" % \
(ctr_left, ctr_gone),
LOG_INFO)
# Dump Message-ID database to stdout
def dump(self):
s = self.db.shared_open()
id_list = s.keys()
for id in id_list:
data = s[id]
if type(data) == types.TupleType:
(text_no, created) = data
print "%s --> %d" % (id, text_no)
elif type(data) == types.ListType:
print "%s <== %s" % (id, data)
else:
print "%s STRANGE DATA" % (id)
self.db.shared_close()
# Create an article or add recipients to an existing one
def create_article_or_add_recipients(mail,
envelope_sender,
envelope_recipients,
with_hdr):
# Log start
log("Starting on %s." % time.ctime(time.time()), LOG_INFO)
# Get Message-ID from mail
if "Message-ID" in mail:
message_id = last_message_id_in(mail["Message-ID"],
fail_on_bad_format = False)
else:
message_id = None
if message_id is None or message_id == "<>":
error_exit("No or empty message ID in mail", EX_DATAERR)
# We do not touch something exported by an exporter if we
# recognize it!
exported_article_no = exporter_id_to_article_no(message_id)
if exported_article_no is not None:
log("Not touching exported mail with Message-ID: %s" % message_id, LOG_INFO)
return
log("Handling mail with Message-ID: %s" % message_id, LOG_INFO)
# Lookup Message-ID -> Text-No (fails if this is a new mail)
existing_text_no = message_id_db.get_text_no_from_id(message_id)
if existing_text_no is None:
log("This mail is new", LOG_INFO)
else:
log("This is existing article %d" % existing_text_no, LOG_INFO)
# Map recipient names to conference numbers
recipient_list = name_list_to_conf_no_list(envelope_recipients)
if len(recipient_list) == 0:
error_exit("No valid recipients found", EX_NOUSER)
# Remove recipients based on the import restrictions aux-items
# mx-refuse-import and mx-allow-envelope-sender-regexp
recipient_list = filter_recipients(recipient_list,
mail,
envelope_sender)
if len(recipient_list) == 0:
log("No recipients left after import restrictions handling", LOG_INFO)
return
# Choose function
if existing_text_no is None:
# We should create a new article based on this mail
create_article(mail, envelope_sender, recipient_list,
message_id, with_hdr)
else:
# We should add new recipients to this article
add_recipients(mail, envelope_sender, recipient_list,
message_id, existing_text_no)
# Create a new article based on this mail
def create_article(mail, envelope_sender, envelope_recipients,
message_id, with_hdr):
# Threading
parent_message_id = message_id_of_parent(mail)
if parent_message_id <> None:
# If the parent seems to be an exported text, we will
# bypass "normal" handling!
parent_text_no = exporter_id_to_article_no(parent_message_id)
if parent_text_no is not None:
log("Comment to %s (exported article %d) " % \
(parent_message_id, parent_text_no), LOG_INFO)
else:
parent_text_no = message_id_db.get_text_no_from_id(parent_message_id)
if parent_text_no is not None:
log("Comment to %s (article %d)" % \
(parent_message_id, parent_text_no), LOG_INFO)
else:
log("Comment to %s (not present in KOM?)" % \
(parent_message_id), LOG_INFO)
else:
# No threading
parent_text_no = None
# MIME multipart handling. We choose not to preserve the exact
# hierachical structure of the MIME message itself, as that would
# lead to a situation where the articles corresponding to the
# "inner nodes", e.g. multipart/mixed, would be empty and serve
# only as placeholders for comments. Instead, we do something
# really simple: We get all discrete parts in a linear list, and
# let the first part be the main article. All remaining parts will
# become comments to this article, regardless of the original
# hierarchical structure in the MIME message.
parts = linear_list_of_discrete_parts(mail)
log("Number of MIME parts: %d" % len(parts), LOG_INFO)
# Handle the first (perhaps only) part
text_no = create_article_part(mail,
envelope_sender,
envelope_recipients,
message_id,
parts[0],
parent_message_id,
parent_text_no,
top = None,
with_hdr = with_hdr)
# Handle the rest of the parts as comments to the first part
sub_text_nos = []
for part in parts[1:]:
sub_text_no = create_article_part(mail,
envelope_sender,
envelope_recipients,
message_id,
part,
parent_message_id,
parent_text_no,
top = text_no)
sub_text_nos.append(sub_text_no)
# Now, we need to add an AI_MX_MIME_PART_IN to the first part
# for each appendix, as we did not know the text numbers of the
# appendices when we created the first part.
if len(sub_text_nos) > 0:
add_mx_mime_part_in_items(text_no, sub_text_nos)
# Threading. If our Message-ID has a list of list of
# test-that-wants-to-comment-us attached, process it now
# Note that we must to this before entering our
# Message-ID -> text_no mapping below.
texts_commenting_us = message_id_db.get_texts_commenting(message_id)
if texts_commenting_us is not None:
add_comment_links(text_no, texts_commenting_us)
# Threading. Register Message-ID -> our own text_no.
try:
message_id_db.register_text(message_id, text_no)
log("Message-ID recorded in database.", LOG_INFO)
except:
log("Failed to record Message-ID in database.", LOG_WARNING)
pass
# Threading. If we are trying to comment a Message-ID that
# is not yet present: register for future threading.
if parent_message_id is not None and parent_text_no is None:
log("Registering us as comment to non-present %s" %
parent_message_id, LOG_INFO)
message_id_db.register_link(parent_message_id, text_no)
# Create article part
# The "parent" argument tells us if this is the main article or
# an appendix. A lot of actions below depends on this.
def create_article_part(mail,
envelope_sender,
recipient_list,
message_id,
part,
parent_message_id = None,
parent_text_no = None,
top = None,
with_hdr = 0):
# Prepare Misc-Info and Aux Item list for later user
misc_info = kom.CookedMiscInfo()
aux_items = []
# Add recipients to Misc-Info
for conf_no in recipient_list:
mir = kom.MIRecipient(type = kom.MIR_TO, recpt = conf_no)
misc_info.recipient_list.append(mir)
# Threading of main article based on In-Reply-To/References
if top is None and parent_text_no is not None:
mic = kom.MICommentTo(kom.MIC_COMMENT, parent_text_no)
misc_info.comment_to_list.append(mic)
# Threading of appendices as comments (or footnotes) to the
# main article. We mark the appendix with a special aux-item,
# mx-mime-belongs-to to designate it as a MIME appendix.
if top is not None:
mic = kom.MICommentTo(APPENDIX_COMMENT_TYPE, top)
misc_info.comment_to_list.append(mic)
aux_items.append(kom.AuxItem(kom.AI_MX_MIME_BELONGS_TO, str(top)))
# Let the world know about this marvelous creating software :-)
aux_items.append(kom.AuxItem(kom.AI_CREATING_SOFTWARE,
"komimportmail %s" % revision))
# Handle envelope information (creating aux-items)
# We only put these aux-items on the main article (not on appendices).
if top is None:
aux_items.append(kom.AuxItem(kom.AI_MX_ENVELOPE_SENDER,
envelope_sender))
# Currently, the envelope recipients are not stored anywhere.
# Perhaps that would be useful later on if the recognized
# adresses becomes more advanced than simple integers.
# Handle mail headers (creating aux-items)
# We now put these aux-items on the main article and the appendices.
# Aux-items for non-address single header fields
# Message-ID
ai = kom.AuxItem(kom.AI_MX_MESSAGE_ID, message_id)
aux_items.append(ai)
# In-Reply-To information
if parent_message_id is not None:
ai = kom.AuxItem(kom.AI_MX_IN_REPLY_TO, parent_message_id)
aux_items.append(ai)
# Aux-items for adress header fields come in three formats:
#
# 1) Complete address including name (mailbox in the
# terminology of draft-ietf-drums-msg-fmt-07.txt)
# Example: "Joe Q. Public" <[email protected]>
# or [email protected]
#
# 2) Just the bare email address (called addr-spec)
# Example: [email protected]
#
# 3) Just the name (a display-name in the drums specification):
# Example: Joe Q. Public
#
# The rationale for using different formats for different aux-items
# is that the LysKOM clients should not have to be able to
# parse addresses in order to use Reply-To adresses, etc.
for (header_name, aux_item_1, aux_item_2, aux_item_3) in [
("From", None, kom.AI_MX_FROM, kom.AI_MX_AUTHOR),
("To", kom.AI_MX_TO, None, None),
("Cc", kom.AI_MX_CC, None, None),
("Reply-To", None, kom.AI_MX_REPLY_TO, None),
]:
for (display_name, addr_spec) in \
email.Utils.getaddresses(mail.get_all(header_name, [])):
# Remove outer double qoutes and RFC2047 coding if
# present in the display name.
if display_name[0:1] == '"' and display_name[-1:0] == '"':
display_name = display_name[1:-1]
display_name = rfc2047_decode_lossy_to_iso_8859_1(display_name)
# Recreate mailbox
if display_name:
mailbox = "%s <%s>" % (display_name, addr_spec)
else:
mailbox = addr_spec
# Add aux-items of the correct type
if aux_item_1:
aux_items.append(kom.AuxItem(aux_item_1, mailbox))
if aux_item_2:
aux_items.append(kom.AuxItem(aux_item_2, addr_spec))
if aux_item_3 and display_name <> "":
aux_items.append(kom.AuxItem(aux_item_3, display_name))
# Date
if "Date" in mail and not SKIP_AI_MX_DATE:
parsed_date = email.Utils.parsedate_tz(mail["Date"])
if parsed_date is not None:
try:
tz_mins = parsed_date[9] / 60
if tz_mins == 0:
tz = "+0000"
elif tz_mins > 0:
tz = "+%02d%02d" % (tz_mins/60, tz_mins % 60)
else:
tz_mins = -tz_mins
tz = "-%02d%02d" % (tz_mins/60, tz_mins % 60)
except:
tz = "-0000" # TZ not known
date = "%04d-%02d-%02d %02d:%02d:%02d %s" % \
(parsed_date[0], parsed_date[1], parsed_date[2],
parsed_date[3], parsed_date[4], parsed_date[5],
tz)
aux_items.append(kom.AuxItem(kom.AI_MX_DATE, date))
# The complete set of mail headers (no decoding of these)
# This one should not be put on the appendices:
if top is None:
header_str_list = map(lambda a: a[0] + ": " + a[1], mail.items())
aux_items.append(kom.AuxItem(kom.AI_MX_MISC,
"\n".join(header_str_list)))
# Handle MIME headers (creating aux-items)
# We put these aux-items on the main article and the appendices
# MIME Content type
# The server rejects bad content-types that does not contain a slash,
# so we don't try to feed it such bogosities.
mime_type = part.get("Content-Type", "text/plain").lower()
if mime_type.find("/") == -1:
log("Bad MIME type %s not used" % mime_type, LOG_WARNING)
else:
aux_items.append(kom.AuxItem(kom.AI_CONTENT_TYPE,
mime_type))
log("MIME type of this part: %s" % mime_type, LOG_INFO)
# Get the charset from the mime_type (None if not given)
charset = part.get_param("charset")
log("Charset of this part: %s" % charset, LOG_INFO)
# MIME filename (Name parameter of Content-Type header)
mime_filename = part.get_param("name")
if mime_filename:
aux_items.append(kom.AuxItem(kom.AI_MX_MIME_FILE_NAME,
mime_filename))
# The complete set of MIME part headers (no decoding of these)
# For the first part, the MIME headers are mixed together
# with the mail headers. Therefore, we try to remove
# non-MIME-headers here.
aux_items.append(kom.AuxItem(kom.AI_MX_MIME_MISC, \
"\n".join(only_mime_headers(part))))
# Subject
if "Subject" in mail:
subject = rfc2047_decode_to_unicode(mail["Subject"])
else:
subject = ""
# The subject of an appendix is slightly modified
if top is not None:
if mime_filename:
subject = APPENDIX_SUBJECT_PREFIX % mime_filename + subject
else:
subject = APPENDIX_SUBJECT_PREFIX_NONAME + subject
# Encode subject to match th ebody
subject = best_effort_encode_subject_to_charset(subject, charset)
subject = remove_newlines(subject)
# Try to create article part
payload = part.get_payload(decode=True)
# Searching for a bug: we sometimes seem to get None from
# get_payload, although is_multipart is False.
# This happens for the text/plain parts of message/delivery-status.
# Workaround: use str(msg) instead of msg.get_payload(decode=True)
# as a fallback.
if payload is None:
payload = str(part)
if payload is None:
payload = ""
log("get_payload workaround failed", LOG_WARNING)
if with_hdr:
payload = createBodyHeaders(mail, part, top) + payload
text_no = send_to_kom(subject, payload, misc_info, aux_items)
return text_no
def createBodyHeaders(mail, part, top):
ret = ""
if top == None:
if "From" in mail:
ret = ret + "From: %s\n" % rfc2047_decode_lossy_to_iso_8859_1(mail["From"])
if "To" in mail:
ret = ret + "To: %s\n" % rfc2047_decode_lossy_to_iso_8859_1(mail["To"])
if "Cc" in mail:
ret = ret + "Cc: %s\n" % rfc2047_decode_lossy_to_iso_8859_1(mail["CC"])
if "Message-ID" in mail:
ret = ret + "Message-ID: %s\n" % mail["Message-ID"]
if "" != ret:
ret = ret + "\n"
return ret
# Send a text (subject, body, Misc-Info and Aux-Items) to KOM.
# Destructively prune comment-to/footnote-to Misc-Info items
# if they seem to cause problems.
# Also, replace too large bodies with an error indication.
# Return text_no or throw exception.
def send_to_kom(subject, body, misc_info, aux_items):
log("send_to_kom with aux_items: %s" % aux_items, LOG_DEBUG)
attempting_to_send = 1
while attempting_to_send:
try:
text_no = kom.ReqCreateText(c,
subject + "\n" + body,
misc_info, aux_items).response()
log("Article %d created" % text_no, LOG_INFO)
attempting_to_send = 0
except kom.StringTooLong, error_status:
# Replace the body with an error indication
body = "[komimportmail Error: The LysKOM server thinks %d kB is too big.]" % (len(body) / 1024)
except kom.NoSuchText, error_status:
# We assume this is because of a bad Comment-To link
# (perhaps the text we are trying to comment has been
# deleted). Remove the offending Misc-Info entry and
# try again, if possible.
attempting_to_send = 0
try:
bad_text_no = int(str(error_status))
log("Article creation failed as commented article %d does not exist" % bad_text_no, LOG_WARNING)
for i in range(0,len(misc_info.comment_to_list)):
if misc_info.comment_to_list[i].text_no == bad_text_no:
del misc_info.comment_to_list[i]
attempting_to_send = 1
log("Retrying with offending misc-item removed", LOG_DEBUG)
break
except:
# If we fail to remove the offending Misc-Info item,
# we raise an exception
raise kom.NoSuchText, bad_text_no # hope it was set
except kom.AuxItemPermission, error_status:
# Debugging!
log("Bad aux-item is: %s" % error_status, LOG_ERROR)
raise
return text_no
# Add an AI_MX_MIME_PART_IN to the first part
# for each appendix.
def add_mx_mime_part_in_items(parent_no, child_nos):
aux_items = []
for child_no in child_nos:
aux_items.append(kom.AuxItem(kom.AI_MX_MIME_PART_IN, str(child_no)))
try:
kom.ReqModifyTextInfo(c, parent_no, [], aux_items)
except kom.ServerError:
log("Failed (%s) to add mx-mime-part-in items" % \
(sys.exc_info()[0]), LOG_WARNING)
# Add more recipients to this article
# We do not add new aux-items, even if some headers may be different
# in this copy (e.g. the "Received:" trace lines).
def add_recipients(mail, envelope_sender, recipient_list,
message_id, existing_text_no):
# Just loop and try to add the recipients, silently
# ignoring any errors from the server.
for recipient in recipient_list:
try:
kom.ReqAddRecipient(c,
existing_text_no,
recipient,
kom.MIR_TO).response()
log("Added conference %d as recipient" % recipient, LOG_INFO)
except kom.ServerError:
log("Failed (%s) to add conference %d as recipient" % \
(sys.exc_info()[0], recipient), LOG_WARNING)
# Add comment links to an article
def add_comment_links(text_no, commenting_text_nos):
# Just loop and try to add the comments, silently
# ignoring any errors from the server.
for commenting_text_no in commenting_text_nos:
try:
kom.ReqAddComment(c,
commenting_text_no,
text_no).response()
log("Added %s as comment to %d" % (commenting_text_no,
text_no), LOG_INFO)
except kom.ServerError:
log("Failed (%s) to add %d as comment to %d" % \
(sys.exc_info()[0], commenting_text_no, text_no),
LOG_WARNING)
# Convert a list of recipient names to a list of conference numbers
# Note that the recipient names should not contain the @ or the domain
# part of the email address, just the local part.
#
# Allowable recipient name formats (in the order they are checked):
#
# 1) An integer, or an integer prefixed by a single "p" or "P".
# The conference/letterbox corresponding to this
# conf_no is the recipient. Note that an invalid integer results
# in a miss. No further alternatives are checked.
#
# 3) Any other string. Underscores are converted to spaces and the
# name looked up using lookup-z-name. If there is exactly one match,
# that conference/letterbox is choosen as recipient.
def name_list_to_conf_no_list(name_list):
conf_no_list = []
for name in name_list:
# 1) Integer = conf_no
try:
conf_no = int(name)
except:
if name[0:1] in "pP":
try:
conf_no = int(name[1:])
except:
conf_no = None
else:
conf_no = None
if conf_no:
# We have an integer.
# Do a simple existence check and add.
try:
conf_stat = c.conferences[conf_no]
conf_no_list.append(conf_no)
log("Numeric recipient %d seems to be OK" % conf_no, LOG_INFO)
except:
log("Numeric recipient %d ignored (conference not found)" % \
conf_no, LOG_WARNING)
continue # No other formats will be checked
# 2) Other string = name to look up
changed_name = name.replace("_", " ").replace(".", " ")
matches = c.lookup_name(changed_name,
want_pers = 1,
want_confs = 1)
if len(matches) == 0:
log("Name recipient '%s' gave no match" % name, LOG_WARNING)
elif len(matches) > 1:
log("Name recipient '%s' gave %d matches (ambiguous)" % \
(name, len(matches)),
LOG_WARNING)
else:
log("Name recipient '%s' matches %d: '%s'" % \
(name, matches[0][0], matches[0][1]),
LOG_INFO)
conf_no_list.append(matches[0][0])
return conf_no_list
#
# Given a set of mail headers, guess the Message-ID this message
# is a comment to. If this does not appear to be a comment to
# anything, return None.
# Algorithm: choose last thing in <..> of In-Reply-To. If that
# header does not exist, try using References instead.
def message_id_of_parent(mail):
for field_name in ["In-Reply-To", "References"]:
if field_name in mail:
msg_id = last_message_id_in(mail[field_name])
if msg_id:
log("Threading on %s: %s)" % \
(field_name, msg_id),
LOG_INFO)
return msg_id
else:
log("No ID found in %s: '%s'" % \
(field_name, headers[field_name]),
LOG_DEBUG)
log("No threading possible.", LOG_INFO)
return None
def last_message_id_in(header, fail_on_bad_format = True):
pos_lt = header.rfind("<")
pos_gt = header.rfind(">")
if -1 < pos_lt < pos_gt:
return header[pos_lt:pos_gt+1] # Include "<" and ">" in ID
else:
if fail_on_bad_format:
return None
else:
return header
# Try to convert a Message-ID from an exporter to
# an article number. Return that number or None
# if there is no match (or the feature is not active)
def exporter_id_to_article_no(id):
if EXPORTER_ID_REGEXP is None: return None
m = re.match(EXPORTER_ID_REGEXP, id)
if m:
return int(m.group(1))
else:
return None
# Filter a list of headers, keeping only the MIME-related ones.
# Currently, we consider "MIME-Version" and any header beginning
# with "Content-" as MIME-related.
re_good_mime_header = re.compile("(^MIME-Version:|^Content-)",
re.IGNORECASE)
def only_mime_headers(mail):
ok_headers = []
for (header, value) in mail.items():
if re_good_mime_header.match(header):
ok_headers.append(header + ": " + value)
return ok_headers
#
# Filter recipient list to respect mx-refuse-import and
# mx-allow-envelope-sender-regexp
#
def filter_recipients(recipient_list, mail, envelope_sender):
ok_to_import_to = []
for conf_no in recipient_list:
try:
conf = c.conferences[conf_no]
except kom.ServerError:
log("Cannot get information about conf_no %d" % conf_no,
LOG_WARNING)
ok_to_import_to.append(conf_no) # Play it safe...
continue
if (check_mx_refuse_import(conf, mail) and
check_mx_allow_envelope_sender_regexp(conf, envelope_sender)):
ok_to_import_to.append(conf_no)
return ok_to_import_to
#
# Does this recipient allow import of this mail based on
# mx-refuse-import?
#
def check_mx_refuse_import(conf, mail):
for mxri in kom.all_aux_items_with_tag(conf.aux_items,
kom.AI_MX_REFUSE_IMPORT):
if mxri.data == "all":
log("Unconditional mx-refuse-import on conference '%s'" % \
(conf.name),
LOG_DEBUG)
return 0 # refuse
elif mxri.data == "spam":
# Check for X-Spam-Flag from SpamAssassin:
x_spam_flag = mail.get("X-Spam-Flag", "NO")
if x_spam_flag == "YES":
log("Spam mx-refuse-import on conference '%s'" % \
(conf.name),
LOG_DEBUG)
return 0 # refuse
elif mxri.data == "html":
# Check for text/html content-type on top level
mime_type = mail.get_content_type()
if mime_type == "text/html":
log("HTML mx-refuse-import on conference '%s'" % \
(conf.name),
LOG_DEBUG)
return 0 # refuse
else:
log("Unknown mx-refuse-import '%s' on conference '%s'" % \
(mxri.data, conf.name),
LOG_WARNING)
return 1 # accept
#
# Does this recipient allow import of this mail based on
# mx-allow-envelope-sender-regexp?
#
def check_mx_allow_envelope_sender_regexp(conf, envelope_sender):
items = kom.all_aux_items_with_tag(conf.aux_items,
kom.AI_MX_ALLOW_ENVELOPE_SENDER_REGEXP)
if len(items) == 0:
return 1 # accept
for mxaesr in items:
try:
if re.match(mxaesr.data, envelope_sender):
return 1 # accept
except:
log("Bad envelope sender regexp ignored (%s)" % (mxaesr.data),
LOG_WARNING)
return 0 # refuse
# RFC2047 handling
def rfc2047_decode_to_unicode(header_value):
out = []
for (part, coding) in email.Header.decode_header(header_value):
if coding is None:
coding = "us-ascii"
try:
decoded_part = part.decode(coding)
except (LookupError, UnicodeDecodeError):
# Assume it is ISO-8859-1. Ugly...
# We take this path both if the coding is unknown and
# if the decoding fails (e.g. declared as ASCII but charcode>= 128)
decoded_part = part.decode('iso-8859-1')
out.append(decoded_part)
return u" ".join(out)
def rfc2047_decode_lossy_to_iso_8859_1(header_value):
u = rfc2047_decode_to_unicode(header_value)
return u.encode("iso-8859-1", "replace")
def best_effort_encode_subject_to_charset(s, charset):
if charset is None or charset.lower() == "us-ascii":
charset = "iso-8859-1"
try:
return s.encode(charset, "replace")
except LookupError:
log("Cannot recode subject to charset %s (will use iso-8859-1)" % charset, LOG_WARNING)
return s.encode("iso-8859-1", "replace")
# HTML handling
def remove_redundant_html(mail):
# For the moment, concentrate on the most common case:
# - a multipart/alternative with first text/plain, then text/html
if mail.is_multipart():
for alt in typed_subpart_iterator(mail,
'multipart',
'alternative'):
parts = alt.get_payload()
if len(parts) == 2 and \
parts[0].get_content_maintype() == \
parts[1].get_content_maintype() == "text" and \
parts[0].get_content_subtype() == "plain" and \
parts[1].get_content_subtype() == "html":
del parts[1]
def linear_list_of_discrete_parts(mail):
ll = []
for p in mail.walk():
if not p.is_multipart():
ll.append(p)
return ll
# MAIN
# Options totally changing the behaviour
# --gc Do a garbage collection
# --dump Dump Message-ID database
#
# Normal options:
# --log-level=LEVEL
# Set log level to (ERROR, WARNING, INFO, DEBUG)
# Default is INFO
# --with-hdr Add Message-ID:, From: and To: to message body top.
#
# Arguments:
# envelope-sender [envelope-recipient...]
#
# There should be at least one envelope-recipient.
FUNC_IMPORT_MAIL = 0; FUNC_GC = 1; FUNC_DUMP = 2
function = FUNC_IMPORT_MAIL
log_level = LOG_INFO
with_hdr = 0
try:
options, arguments = getopt.getopt(sys.argv[1:],
"",
[
"gc",
"with-hdr",
"dump",
"log-level=",
])
except getopt.error, reason:
error_exit("Usage error (%s)" % reason, EX_USAGE)
for (opt, optarg) in options:
if opt == "--gc":
function = FUNC_GC
elif opt == "--dump":
function = FUNC_DUMP
elif opt == "--with-hdr":
with_hdr = 1
elif opt == "--log-level":
try:
new_log_level = find_log_level(optarg)
except:
error_exit("Bad --log-level=%s" % optarg, EX_USAGE)
log_level = new_log_level
else:
error_exit("Bad option '%s'" % opt, EX_USAGE)
# Check for arguments
if function == FUNC_IMPORT_MAIL:
if len(arguments) < 2:
error_exit("Too few arguments (%d)" % len(arguments), EX_USAGE)
else:
envelope_sender = arguments[0]
envelope_recipients = arguments[1:]
# Connect and log in
try:
c = kom.CachedConnection(KOMSERVER, KOMPORT)
try:
kom.ReqLogin(c, KOMPERSON, KOMPASSSWORD, invisible=1).response()
except:
# Change to EX_TEMPFAIL if you consider this transient...
error_exit("Failed to login to LysKOM server", EX_UNAVAILABLE)
except:
error_exit("Failed to connect to LysKOM server", EX_TEMPFAIL)
# Prepare to use the Message-ID database