-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathanalyse.py
88 lines (63 loc) · 2.44 KB
/
analyse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import json
import sys
import email
import collections
import time
def extract_address(email_string):
email_msg = email.message_from_string(email_string)
from_field = email_msg['From'].strip()
if '<' not in from_field:
return from_field
else:
return from_field.split('<')[1].strip('>')
def extract_date(email_string):
latest_email = email.message_from_string(email_string)
date_tuple = email.utils.parsedate(latest_email['Date'])
if date_tuple:
return time.strftime('%d %b %Y', date_tuple)
else:
return '-- --- ----'
def extract_domain(address):
if '@' not in address:
return address
else:
return address.split('@')[1]
def address_uids_mapping(store):
uid_to_address = {uid: extract_address(email_string) for uid, email_string in store.items()}
address_to_uids = collections.defaultdict(list)
for k, v in uid_to_address.items():
address_to_uids[v].append(k)
return address_to_uids
if __name__ == "__main__":
args = len(sys.argv)
if (args < 2):
print ('Usage: python analyse.py <data_path>')
exit()
else:
DATA_PATH = sys.argv[1]
store = json.load(open(DATA_PATH))
addresses = map(extract_address, store.values())
domains = map(extract_domain, addresses)
address_occurences = collections.Counter(addresses).most_common()
domain_occurences = collections.Counter(domains).most_common()
address_to_uids = address_uids_mapping(store)
print('\nYou have {} unread emails from {} unique senders:\n'.format(len(store), len(address_occurences)))
print('Last received\tUnread\tAddress')
for x in address_occurences:
occurences = str(x[1])
address = x[0]
uids = address_to_uids[address]
latest_uid = max(uids)
date = extract_date(store[latest_uid])
print(date + '\t' + occurences + '\t' + address)
print('\nYou have {} unread emails from {} unique domains:\n'.format(len(store), len(domain_occurences)))
print('Unread\tDomain')
for x in domain_occurences:
occurences = str(x[1])
domain = x[0]
matching_addresses = [y for y in set(addresses) if extract_domain(y) == domain]
if (len(matching_addresses)) > 1:
address_spelling = 'addresses'
else:
address_spelling = 'address'
print('{}\t{} ({} {})'.format(occurences, domain, len(matching_addresses), address_spelling))