|
3 | 3 | import ipaddress
|
4 | 4 | from ..exceptions import InvalidIndicator
|
5 | 5 | from ..constants import PYVERSION
|
6 |
| -from .ztime import parse_timestamp |
7 |
| -import sys |
| 6 | + |
8 | 7 |
|
9 | 8 | if PYVERSION == 3:
|
10 | 9 | from urllib.parse import urlparse
|
| 10 | + from urllib.parse import urlsplit |
| 11 | + from urllib.parse import urlunsplit |
11 | 12 | else:
|
12 | 13 | from urlparse import urlparse
|
13 | 14 |
|
14 |
| -from pprint import pprint |
15 | 15 |
|
16 |
| -RE_IPV4 = re.compile('^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(\d{1,3})$') |
17 |
| -RE_IPV4_CIDR = re.compile('^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\/\d{1,2})$') |
| 16 | +RE_IPV4 = re.compile(r'^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(\d{1,3})$') |
| 17 | +RE_IPV4_CIDR = re.compile(r'^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\/\d{1,2})$') |
18 | 18 |
|
19 | 19 | # http://stackoverflow.com/a/17871737
|
20 |
| -RE_IPV6 = re.compile('(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))') |
| 20 | +RE_IPV6 = re.compile(r'(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))') |
21 | 21 |
|
22 | 22 | # http://goo.gl/Cztyn2 -- probably needs more work
|
23 | 23 | # http://stackoverflow.com/a/26987741/7205341
|
24 | 24 | # ^((xn--)?(--)?[a-zA-Z0-9-_@]+(-[a-zA-Z0-9]+)*\.)+[a-zA-Z]{2,}(--p1ai)?$
|
25 | 25 | #RE_FQDN = re.compile('^((?!-))(xn--)?[a-z0-9][a-z0-9-_\.]{0,61}[a-z0-9]{0,1}\.(xn--)?([a-z0-9\-]{1,61}|[a-z0-9-]{1,30}\.[a-z]{2,})$')
|
26 | 26 | # http://stackoverflow.com/questions/14402407/maximum-length-of-a-domain-name-without-the-http-www-com-parts
|
27 |
| -RE_FQDN = re.compile('^((?!-))(xn--)?[a-z0-9][a-z0-9-_\.]{0,245}[a-z0-9]{0,1}\.(xn--)?([a-z0-9\-]{1,61}|[a-z0-9-]{1,30}\.[a-z]{2,})$') |
28 |
| -RE_URI_SCHEMES = re.compile('^(https?|ftp)$') |
29 |
| -RE_EMAIL = re.compile("^[-\w+.!#$%&'*\/=?^_`{|}~;]+@[-.0-9a-zA-Z][-.0-9a-zA-Z]*[a-zA-Z]{2,}$") |
30 |
| -RE_ASN = re.compile('^(AS|as)[0-9]{1,6}$') |
| 27 | +RE_FQDN = re.compile(r'^((?!-))(xn--)?[a-z0-9][a-z0-9-_\.]{0,245}[a-z0-9]{0,1}\.(xn--)?([a-z0-9\-]{1,61}|[a-z0-9-]{1,30}\.[a-z]{2,})\.?$') |
| 28 | +RE_URI_SCHEMES = re.compile(r'^(https?|ftp)$') |
| 29 | +RE_EMAIL = re.compile(r"^[-\w+.!#$%&'*\/=?^_`{|}~;]+@[-.0-9a-zA-Z][-.0-9a-zA-Z]*[a-zA-Z]{2,}$") |
| 30 | +RE_ASN = re.compile(r'^(AS|as)[0-9]{1,6}$') |
31 | 31 |
|
32 | 32 | RE_HASH = {
|
33 |
| - 'uuid': re.compile('^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$'), |
34 |
| - 'md5': re.compile('^[a-fA-F0-9]{32}$'), |
35 |
| - 'sha1': re.compile('^[a-fA-F0-9]{40}$'), |
36 |
| - 'sha256': re.compile('^[a-fA-F0-9]{64}$'), |
37 |
| - 'sha512': re.compile('^[a-fA-F0-9]{128}$'), |
| 33 | + 'uuid': re.compile(r'^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$'), |
| 34 | + 'md5': re.compile(r'^[a-fA-F0-9]{32}$'), |
| 35 | + 'sha1': re.compile(r'^[a-fA-F0-9]{40}$'), |
| 36 | + 'sha256': re.compile(r'^[a-fA-F0-9]{64}$'), |
| 37 | + 'sha512': re.compile(r'^[a-fA-F0-9]{128}$'), |
38 | 38 | }
|
39 | 39 |
|
40 |
| -RE_IPV4_PADDING = re.compile(r"(^|\.)0+([^/.])") |
| 40 | +RE_IPV4_PADDING = re.compile(r'(^|\.)0+([^/.])') |
41 | 41 |
|
42 | 42 |
|
43 | 43 | def ipv4_normalize(i):
|
@@ -205,6 +205,25 @@ def normalize_itype(i, itype=None):
|
205 | 205 | return i
|
206 | 206 |
|
207 | 207 |
|
| 208 | +def normalize_indicator(i, itype=None, lowercase=False, lowercase_explicit=False): |
| 209 | + if itype == 'fqdn': |
| 210 | + i = i.rstrip('.') |
| 211 | + # only don't lowercase if lowercase=False and lowercase_explicit=True (set by user) |
| 212 | + if lowercase or not lowercase_explicit: |
| 213 | + i = i.lower() |
| 214 | + elif itype == 'url': |
| 215 | + u = urlparse(i) |
| 216 | + i = u.geturl().rstrip('/') |
| 217 | + if lowercase and lowercase_explicit: |
| 218 | + i = i.lower() |
| 219 | + elif lowercase or not lowercase_explicit: |
| 220 | + scheme, netloc, path, qs, anchor = urlsplit(i) |
| 221 | + netloc = netloc.rstrip('.').lower() |
| 222 | + i = urlunsplit((scheme, netloc, path, qs, anchor)) |
| 223 | + |
| 224 | + return i |
| 225 | + |
| 226 | + |
208 | 227 | def is_subdomain(i):
|
209 | 228 | itype = resolve_itype(i)
|
210 | 229 | if itype is not 'fqdn':
|
|
0 commit comments