6
6
from credslayer .core .utils import CreditCard
7
7
8
8
# This regex has been made in order to prevent false positives, theoretically it can miss a few addresses.
9
- email_regex = re .compile (r'(?:\t| |^|<|,|:)([^+\x00-\x20@<>/\\{}`^\'*:;=()%\[\],_\-"]'
10
- r'[^\x00-\x20@<>/\\{}`^\'*:;=()%\[\],"]{2,63}@(?:[a-z0-9]{2,63}\.)+[a-z]{2,6})' )
9
+ _email_regex = re .compile (r'(?:\t| |^|<|,|:)([^+\x00-\x20@<>/\\{}`^\'*:;=()%\[\],_\-"]'
10
+ r'[^\x00-\x20@<>/\\{}`^\'*:;=()%\[\],"]{2,63}@(?:[a-z0-9]{2,63}\.)+[a-z]{2,6})' )
11
11
12
12
# Tries to match things that look like a credit card.
13
13
# Things like 11111111-11111111 will also match, that's why there's a second step to validate that data.
14
- first_step_credit_card_regex = re .compile (r"(?:\s|^)(?:\d[ -]*?){13,16}(?:\s|$)" )
14
+ _first_step_credit_card_regex = re .compile (r"(?:\s|^)(?:\d[ -]*?){13,16}(?:\s|$)" )
15
15
16
16
# https://gist.github.com/michaelkeevildown/9096cd3aac9029c4e6e05588448a8841
17
- second_step_credit_card_regex = re .compile (
17
+ _second_step_credit_card_regex = re .compile (
18
18
r"^(?P<AmericanExpress>3[47][0-9]{13})"
19
19
r"|(?P<BCGlobal>(?:6541|6556)[0-9]{12})"
20
20
r"|(?P<CarteBlanche>389[0-9]{11})"
41
41
credit_cards_already_found = set ()
42
42
43
43
44
- def extract_emails (packet_strings : List [str ]) -> Set :
44
+ def extract_emails (packet_strings : List [str ]) -> Set [str ]:
45
+ """
46
+ Parameters
47
+ ----------
48
+ packet_strings
49
+ The list of strings to extract emails from.
50
+
51
+ Returns
52
+ -------
53
+ Set[str]
54
+ A set of emails found.
55
+ """
45
56
emails = set ()
46
57
47
58
for string in packet_strings :
48
- emails_found = email_regex .findall (string )
59
+ emails_found = _email_regex .findall (string )
49
60
50
61
for email_found in emails_found :
51
62
if email_found not in emails_already_found :
@@ -56,17 +67,28 @@ def extract_emails(packet_strings: List[str]) -> Set:
56
67
57
68
58
69
def extract_credit_cards (packet_strings : List [str ]) -> Set [CreditCard ]:
70
+ """
71
+ Parameters
72
+ ----------
73
+ packet_strings
74
+ The list of strings to extract credit cards from.
75
+
76
+ Returns
77
+ -------
78
+ Set[CreditCard]
79
+ A set of `CreditCard` tuple.
80
+ """
59
81
credit_cards = set ()
60
82
61
83
def clean_credit_card (card ):
62
84
return card .replace (" " , "" ).replace ("-" , "" )
63
85
64
86
for string in packet_strings :
65
- credit_cards_found = first_step_credit_card_regex .findall (string )
87
+ credit_cards_found = _first_step_credit_card_regex .findall (string )
66
88
67
89
for credit_card_found in credit_cards_found :
68
90
credit_card_found = credit_card_found .strip () # Remove potential whitespaces
69
- credit_card_match = second_step_credit_card_regex .match (clean_credit_card (credit_card_found ))
91
+ credit_card_match = _second_step_credit_card_regex .match (clean_credit_card (credit_card_found ))
70
92
71
93
if credit_card_match and credit_card_found not in credit_cards_already_found :
72
94
credit_cards .add (CreditCard (credit_card_match .lastgroup , credit_card_found ))
0 commit comments